From 23a210284109f397905c7e7763573ac1d6ebcdd3 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Sat, 13 Nov 2021 18:16:36 +0000 Subject: [PATCH 1/4] Add method to swap the ordr of reads for dual-guide --- pycroquet/classes.py | 14 +- pycroquet/dualguide.py | 254 ++++++++++++++++++------------- pycroquet/libparser.py | 6 + pycroquet/readparser.py | 4 +- pycroquet/readwriter.py | 48 +++--- pycroquet/resources/library.yaml | 2 +- 6 files changed, 205 insertions(+), 123 deletions(-) diff --git a/pycroquet/classes.py b/pycroquet/classes.py index b4d4394..510c42b 100644 --- a/pycroquet/classes.py +++ b/pycroquet/classes.py @@ -83,6 +83,7 @@ class LibraryHeader: config: dict info_items: dict = None is_single: bool = False + reverse_read_order: bool = False @dataclass @@ -96,6 +97,12 @@ class Guide: other: dict[str, str] = field(default_factory=dict) unique: bool = True count: int = 0 + _composite_sgrna_seq = None + + def composite_sgrna_seq(self) -> str: + if self._composite_sgrna_seq is None: + self._composite_sgrna_seq = "|".join(self.sgrna_seqs) + return self._composite_sgrna_seq @dataclass @@ -137,12 +144,15 @@ def sgrna_ids_by_seq(self, target_seq): self._sgrna_ids_by_seq = sgrna_ids_by_seq return self._sgrna_ids_by_seq[target_seq] - def guide_by_sgrna_set(self, seq_l, seq_r): + def guide_by_sgrna_set(self, seq_l, seq_r) -> List[int]: match = f"{seq_l}|{seq_r}" if self._guide_by_sgrna_set is None: data = {} for i, g in enumerate(self.guides): - data["|".join(g.sgrna_seqs)] = i + composite_guide = g.composite_sgrna_seq() + if composite_guide not in data: + data[composite_guide] = [] + data[composite_guide].append(i) self._guide_by_sgrna_set = data return self._guide_by_sgrna_set.get(match) diff --git a/pycroquet/dualguide.py b/pycroquet/dualguide.py index 3c799cd..7a9fd8e 100644 --- a/pycroquet/dualguide.py +++ b/pycroquet/dualguide.py @@ -31,14 +31,17 @@ import logging import os import shutil +import sys from time import time from typing import Dict from typing import Final from typing import List +from typing import Optional from typing import Tuple import pysam from pygas.alignercpu import AlignerCpu +from pygas.classes import AlignmentBatch from pygas.classes import Backtrack import pycroquet.tools as ctools @@ -54,11 +57,63 @@ from pycroquet.readwriter import guide_header from pycroquet.readwriter import read_iter from pycroquet.readwriter import to_alignment -from pycroquet.readwriter import to_mapped_read +from pycroquet.readwriter import to_mapped_reads CLASSIFICATION: Final = Classification() +def best_unique_l_mm_r( + library: Library, bt_set_l: List[Backtrack], bt_set_r: List[Backtrack] +) -> Tuple[int, Backtrack, Backtrack]: + """ + find the pairing that is most likely to be the real item + """ + guides_f_r = {} + bt_l = bt_set_l[0] + guide_idxs_l = set(library.target_to_guides[bt_l.sm.target]) + gidx = None + for bt_r in bt_set_r: + guide_idxs_r = set(library.target_to_guides[bt_r.sm.target]) + guide_intersect = guide_idxs_l.intersection(guide_idxs_r) + if guide_intersect: + if len(guide_intersect) > 1: + raise ValueError("Multiple guides via intersect has never been seen, extra logic may be required") + if bt_l.sm.reversed is False and bt_r.sm.reversed is True: + gidx = guide_intersect.pop() + guides_f_r[gidx] = (gidx, bt_l, bt_r) + if guides_f_r: + if len(guides_f_r) > 1: + raise ValueError("Multiple guides has never been seen, extra logic may be required") + return guides_f_r[gidx] + return None + + +def best_mm_l_unique_r( + library: Library, bt_set_l: List[Backtrack], bt_set_r: List[Backtrack] +) -> Tuple[int, Backtrack, Backtrack]: + """ + find the pairing that is most likely to be the real item + """ + guides_f_r = {} + bt_r = bt_set_r[0] + guide_idxs_r = set(library.target_to_guides[bt_r.sm.target]) + gidx = None + for bt_l in bt_set_l: + guide_idxs_l = set(library.target_to_guides[bt_l.sm.target]) + guide_intersect = guide_idxs_r.intersection(guide_idxs_l) + if guide_intersect: + if len(guide_intersect) > 1: + raise ValueError("Multiple guides via intersect has never been seen, extra logic may be required") + if bt_l.sm.reversed is False and bt_r.sm.reversed is True: + gidx = guide_intersect.pop() + guides_f_r[gidx] = (gidx, bt_l, bt_r) + if guides_f_r: + if len(guides_f_r) > 1: + raise ValueError("Multiple guides has never been seen, extra logic may be required") + return guides_f_r[gidx] + return None + + def best_multimatch_set(library: Library, bt_set_l: List[Backtrack], bt_set_r: List[Backtrack]): """ try to identify a single guide pair when one or both ends are multi-map @@ -92,106 +147,96 @@ def classify_read_pair( map_l: Tuple[str, List[Backtrack]], map_r: Tuple[str, List[Backtrack]], library: Library, -) -> Tuple[str, int, Backtrack, Backtrack, str, str]: # noqa R701 +) -> Tuple[str, Optional[List[int]], Backtrack, Backtrack, str, str]: # noqa R701 """ Returns: classification string (see classes.Classification) + None|list of gidx R1 backtrack R2 backtrack - R1 guide index - R2 guide index + R1 map type + R2 map type """ (mtype_l, mdata_l) = map_l (mtype_r, mdata_r) = map_r if mtype_l == "unique" and mtype_l == mtype_r: (bt_l, bt_r) = (mdata_l[0], mdata_r[0]) - (tseq_l, tseq_r) = (bt_l.sm.target, bt_r.sm.target) - gidx = library.guide_by_sgrna_set(tseq_l, tseq_r) - if gidx is not None and bt_l.sm.reversed is False and bt_r.sm.reversed is True: - return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) - - # gives the common guides - guide_intersect = intersect_guide_targets(library, tseq_l, tseq_r) - # but now need to know if any give the appropriate orientations - g_size = len(guide_intersect) - if g_size == 0: - return (CLASSIFICATION.swap, -1, bt_l, bt_r, mtype_l, mtype_r) - if g_size == 1: - gidx = guide_intersect.pop() + gidx = library.guide_by_sgrna_set(bt_l.sm.target, bt_r.sm.target) + if gidx is not None: if bt_l.sm.reversed is False and bt_r.sm.reversed is True: return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) - else: - return (CLASSIFICATION.aberrant_match, -1, bt_l, bt_l, mtype_l, mtype_r) - # g_size > 1 - if bt_l.sm.reversed is False and bt_r.sm.reversed is True: - for gidx in guide_intersect: - guide = library.guides[gidx] - if guide.sgrna_seqs[0] == tseq_l and guide.sgrna_seqs[1] == tseq_r: - return ( - CLASSIFICATION.match, - gidx, - bt_l, - bt_r, - mtype_l, - mtype_r, - ) - return (CLASSIFICATION.aberrant_match, -1, bt_l, bt_l, mtype_l, mtype_r) - - if mtype_l == "unique": + # other orientations will be an abberant match as we are in unique/unique + return (CLASSIFICATION.aberrant_match, None, bt_l, bt_r, mtype_l, mtype_r) + # what about r1/r2 order swap + gidx = library.guide_by_sgrna_set(bt_r.sm.target, bt_l.sm.target) + if gidx is not None: + return (CLASSIFICATION.aberrant_match, None, bt_l, bt_r, mtype_l, mtype_r) + # which leaves swap (as both are unique but to different guide sets) + return (CLASSIFICATION.swap, None, bt_l, bt_r, mtype_l, mtype_r) + elif mtype_l == "unmapped" and mtype_l == mtype_r: + return (CLASSIFICATION.no_match, None, None, None, mtype_l, mtype_r) + elif mtype_l == "multimap" and mtype_l == mtype_r: + best_pair = best_multimatch_set(library, mdata_l, mdata_r) + if best_pair: + # this will impact how we write out alignments + (gidx, bt_l, bt_r) = best_pair + gidx = library.guide_by_sgrna_set(bt_l.sm.target, bt_r.sm.target) + if gidx is not None: + if bt_l.sm.reversed is False and bt_r.sm.reversed is True: + return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) + # other orientations will be an abberant match as we are in unique/unique + return (CLASSIFICATION.aberrant_match, None, bt_l, bt_r, mtype_l, mtype_r) + # what about r1/r2 order swap + gidx = library.guide_by_sgrna_set(bt_r.sm.target, bt_l.sm.target) + if gidx is not None: + return (CLASSIFICATION.aberrant_match, None, bt_l, bt_r, mtype_l, mtype_r) + # which leaves ambiguous (as both map to "things") + return (CLASSIFICATION.ambiguous, None, None, None, mtype_l, mtype_r) + elif mtype_l == "unique": + bt_l = mdata_l[0] if mtype_r == "unmapped": - bt_l = mdata_l[0] if bt_l.sm.reversed: - return (CLASSIFICATION.r_open_3p, -1, bt_l, None, mtype_l, mtype_r) + return (CLASSIFICATION.r_open_3p, None, bt_l, None, mtype_l, mtype_r) else: - return (CLASSIFICATION.f_open_3p, -1, bt_l, None, mtype_l, mtype_r) - # then multimap - best_pair = best_multimatch_set(library, mdata_l, mdata_r) + return (CLASSIFICATION.f_open_3p, None, bt_l, None, mtype_l, mtype_r) + # then one end multimap + best_pair = best_unique_l_mm_r(library, mdata_l, mdata_r) + # above function checks orientation, if not as expected this goes to *_multi_3p if best_pair: - ( - gidx, - bt_l, - bt_r, - ) = best_pair # this will impact how we write out alignments - return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) - bt_l = mdata_l[0] + (gidx, bt_l, bt_r) = best_pair + gidx = library.guide_by_sgrna_set(bt_l.sm.target, bt_r.sm.target) + if gidx: + return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) + # what about r1/r2 order swap + gidx = library.guide_by_sgrna_set(bt_r.sm.target, bt_l.sm.target) + if gidx: + return (CLASSIFICATION.aberrant_match, gidx, bt_l, bt_r, mtype_l, mtype_r) if bt_l.sm.reversed: - return (CLASSIFICATION.r_multi_3p, -1, bt_l, None, mtype_l, mtype_r) - else: - return (CLASSIFICATION.f_multi_3p, -1, bt_l, None, mtype_l, mtype_r) - if mtype_r == "unique": + return (CLASSIFICATION.r_multi_3p, None, bt_l, None, mtype_l, mtype_r) + return (CLASSIFICATION.f_multi_3p, None, bt_l, None, mtype_l, mtype_r) + elif mtype_r == "unique": + bt_r = mdata_r[0] if mtype_l == "unmapped": - bt_r = mdata_r[0] if bt_r.sm.reversed: - return (CLASSIFICATION.r_open_5p, -1, None, bt_r, mtype_l, mtype_r) + return (CLASSIFICATION.r_open_5p, None, None, bt_r, mtype_l, mtype_r) else: - return (CLASSIFICATION.f_open_5p, -1, None, bt_r, mtype_l, mtype_r) - # then multi - best_pair = best_multimatch_set(library, mdata_l, mdata_r) + return (CLASSIFICATION.f_open_5p, None, None, bt_r, mtype_l, mtype_r) + # then one end multimap + best_pair = best_mm_l_unique_r(library, mdata_l, mdata_r) + # above function checks orientation, if not as expected this goes to *_multi_5p if best_pair: - ( - gidx, - bt_l, - bt_r, - ) = best_pair # this will impact how we write out alignments - return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) - bt_r = mdata_r[0] + (gidx, bt_l, bt_r) = best_pair + gidx = library.guide_by_sgrna_set(bt_l.sm.target, bt_r.sm.target) + if gidx: + return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) + # what about r1/r2 order swap + gidx = library.guide_by_sgrna_set(bt_r.sm.target, bt_l.sm.target) + if gidx: + return (CLASSIFICATION.aberrant_match, gidx, bt_l, bt_r, mtype_l, mtype_r) if bt_r.sm.reversed: - return (CLASSIFICATION.r_multi_5p, -1, None, bt_r, mtype_l, mtype_r) - else: - return (CLASSIFICATION.f_multi_5p, -1, None, bt_r, mtype_l, mtype_r) - if mtype_l == "multimap" and mtype_l == mtype_r: - best_pair = best_multimatch_set(library, mdata_l, mdata_r) - if best_pair: - ( - gidx, - bt_l, - bt_r, - ) = best_pair # this will impact how we write out alignments - return (CLASSIFICATION.match, gidx, bt_l, bt_r, mtype_l, mtype_r) - return (CLASSIFICATION.no_match, -1, None, None, mtype_l, mtype_r) - - # all that remains are the combination of unmapped/multimap on both ends - return (CLASSIFICATION.no_match, -1, None, None, mtype_l, mtype_r) + return (CLASSIFICATION.r_multi_5p, None, None, bt_r, mtype_l, mtype_r) + return (CLASSIFICATION.f_multi_5p, None, None, bt_r, mtype_l, mtype_r) + return (CLASSIFICATION.no_match, None, None, None, mtype_l, mtype_r) def order_hits(hits: List[Backtrack], first_bt: Backtrack): @@ -230,7 +275,7 @@ def read_pairs_to_guides( class_cache = {} # initialise counts counts = _init_class_counts() - align_file = os.path.join(workspace, "tmp.cram") + align_file = os.path.join(workspace, "tmp.bam") with pysam.AlignmentFile(align_file, "wb", header=header, reference_filename=guide_fa, threads=cpus) as af: iter = read_iter(seq_file, default_rgid=default_rgid, cpus=cpus, trim_len=trim_len) @@ -244,16 +289,12 @@ def read_pairs_to_guides( a_l, a_r = None, None - (class_type, guide_idx, hits_l, hits_r, orig_l, orig_r) = ( - None, - -1, - [], - [], - None, - None, - ) + class_type, hits_l, hits_r, orig_l, orig_r = None, None, None, None, None + guide_idx = None - if pair_lookup not in class_cache: + if pair_lookup in class_cache: + (class_type, guide_idx, hits_l, hits_r, orig_l, orig_r) = class_cache[pair_lookup] + else: (map_l, map_r) = (aligned_results[read_l], aligned_results[read_r]) # try to order from most likely to least ( @@ -265,26 +306,31 @@ def read_pairs_to_guides( orig_r, ) = classify_read_pair(map_l, map_r, library) + hits_l = order_hits(map_l[1], bt_l) + hits_r = order_hits(map_r[1], bt_r) + class_cache[pair_lookup] = ( class_type, guide_idx, - order_hits(map_l[1], bt_l), - order_hits(map_r[1], bt_r), + hits_l, + hits_r, orig_l, orig_r, ) - (class_type, guide_idx, hits_l, hits_r, orig_l, orig_r) = class_cache[pair_lookup] - if guide_idx != -1: - library.guides[guide_idx].count += 1 + if guide_idx is not None: + for gidx in guide_idx: + library.guides[gidx].count += 1 if hits_l: - (a_l, multi) = to_mapped_read(seqread_l, ref_ids, library, hits_l, guide_idx=guide_idx) + (a_lst, multi) = to_mapped_reads(seqread_l, ref_ids, library, hits_l, guide_idx=guide_idx, dual=True) + for a in a_lst: + af.write(a) if multi: stats.multimap_reads += 1 stats.mapped_to_guide_reads += 1 else: - a_l = to_alignment(seqread_l, False, [], unmapped=True) + a = to_alignment(seqread_l, False, [], unmapped=True) if class_type == CLASSIFICATION.r_multi_5p: stats.multimap_reads += 1 else: @@ -292,13 +338,17 @@ def read_pairs_to_guides( stats.multimap_reads += 1 else: stats.unmapped_reads += 1 + af.write(a) if hits_r: - (a_r, multi) = to_mapped_read(seqread_r, ref_ids, library, hits_r, guide_idx=guide_idx) + (a_lst, multi) = to_mapped_reads(seqread_r, ref_ids, library, hits_r, guide_idx=guide_idx, dual=True) + for a in a_lst: + af.write(a) if multi: stats.multimap_reads += 1 stats.mapped_to_guide_reads += 1 else: - a_r = to_alignment(seqread_r, False, [], unmapped=True) + a = to_alignment(seqread_r, False, [], unmapped=True) + af.write(a) if class_type == CLASSIFICATION.r_multi_3p: stats.multimap_reads += 1 else: @@ -307,10 +357,10 @@ def read_pairs_to_guides( else: stats.unmapped_reads += 1 - af.write(a_l) - af.write(a_r) - - counts[class_type] += 1 + if class_type == CLASSIFICATION.match: + counts[class_type] += len(guide_idx) + else: + counts[class_type] += 1 logging.info(f"Alignment grouping took: {int(time() - start)}s") return (counts, align_file) diff --git a/pycroquet/libparser.py b/pycroquet/libparser.py index df2494f..24e1201 100644 --- a/pycroquet/libparser.py +++ b/pycroquet/libparser.py @@ -71,6 +71,7 @@ def parse_header(ifh: TextIO, line: str) -> LibraryHeader: info_items = {} lib_type = None + reverse_read_order = False while line and line.startswith("##"): # these are the info lines line = line.rstrip() @@ -80,6 +81,8 @@ def parse_header(ifh: TextIO, line: str) -> LibraryHeader: (tag, value) = m_res.group(1, 2) if tag in info_items: raise ValueError(f"Duplicate key '{tag}' found for '##' header line") + if tag == "dual-orientation" and value == "R2_R1": + reverse_read_order = True if tag == "library-type": if value not in LIBRARY_TYPES: raise ValueError(f"Value for 'library-type' ({value}) is not valid, choose from: " + str(LIBRARY_TYPES)) @@ -96,6 +99,7 @@ def parse_header(ifh: TextIO, line: str) -> LibraryHeader: required_cols=config["columns"]["required"], config=config, column_separators=col_sep, + reverse_read_order=reverse_read_order, ) @@ -219,6 +223,8 @@ def parse_data_rows(lh: LibraryHeader, ifh: TextIO) -> List[Guide]: f"All columns that can be split should have the same number of elements. Column {col} ({split_len}), differs from {initial_col} ({expect_len})" ) + if lh.reverse_read_order: + guide.sgrna_seqs = list(reversed(guide.sgrna_seqs)) for seq in guide.sgrna_seqs: if ACGT_ONLY.fullmatch(seq) is None: print(guide) diff --git a/pycroquet/readparser.py b/pycroquet/readparser.py index 1dd15ed..f01415f 100644 --- a/pycroquet/readparser.py +++ b/pycroquet/readparser.py @@ -318,6 +318,8 @@ def collate(seq_file, workspace, cpus): return seq_file tmp_hts = os.path.join(workspace, "collated.reads.bam") logging.info(f"Collating pairs from {seq_file} to {tmp_hts}") - pysam.collate("-f", "-l", "1", "--no-PG", "-@", str(hts_cpus), "-o", tmp_hts, seq_file) + collate_opts = ["-f", "-l", "1", "--no-PG", "-@", str(hts_cpus), "-o", tmp_hts, seq_file] + logging.debug("samtools collate " + " ".join(collate_opts)) + pysam.collate(*collate_opts) logging.info(f"Collation of reads took: {int(time() - start)}s") return tmp_hts diff --git a/pycroquet/readwriter.py b/pycroquet/readwriter.py index 212cd3a..e9cf1fc 100644 --- a/pycroquet/readwriter.py +++ b/pycroquet/readwriter.py @@ -27,6 +27,7 @@ # statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being # identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008, # 2009, 2010, 2011, 2012’. +import copy import gzip import hashlib import logging @@ -273,15 +274,16 @@ def sm_from_guide_hit(library: Library, hit: Backtrack, strand: str, mapq: int = return sa_set -def to_mapped_read( +def to_mapped_reads( seqread, ref_ids, library: Library, hits: List[Backtrack], reverse_in=None, strand_in=None, - guide_idx=-1, -) -> Tuple[pysam.AlignedSegment, bool]: + guide_idx=None, + dual=False, +) -> Tuple[List[pysam.AlignedSegment], bool]: mapq = 60 if len(hits) > 1: mapq = 0 @@ -292,35 +294,45 @@ def to_mapped_read( if strand_in is None: strand = "-" if hit.sm.reversed else "+" this_sa_set = sm_from_guide_hit(library, hit, strand, mapq) - sa_set = sa_set | this_sa_set + sa_set = sa_set | this_sa_set # merge 2 dicts requires >=3.9 is_secondary = False + alignments = [] + sa_set_len = len(sa_set) for hit in hits: ref_names = library.sgrna_ids_by_seq(hit.sm.target) tags = [("NM", hit.nm), ("MD", hit.md), ("AS", hit.sm.score)] - if guide_idx >= 0 and is_secondary is False: + if guide_idx is not None and is_secondary is False: # only apply to the primary mapping - tags.append(("YG", library.guides[guide_idx].id)) + tags.append(("YG", ";".join([library.guides[i].id for i in guide_idx]))) for ref_name in ref_names: - if len(sa_set) > 1: - tags.append( - ( - "SA", - ";".join([v for k, v in sa_set.items() if k != ref_name]), + local_tags = copy.copy(tags) + if sa_set_len > 1: + local_tags.append(("NH", sa_set_len)) + if sa_set_len <= 10: + local_tags.append( + ( + "SA", + ";".join([v for k, v in sa_set.items() if k != ref_name]), + ) ) - ) - tags.append(("NH", len(sa_set))) + if reverse_in is None: reverse = hit.sm.reversed - a = to_alignment(seqread, reverse, tags) + a = to_alignment(seqread, reverse, local_tags) # complete mapped info a.is_secondary = is_secondary a.reference_id = ref_ids[ref_name] a.reference_start = hit.t_pos - 1 # zero based a.cigarstring = hit.cigar a.mapping_quality = mapq + alignments.append(a) + if dual: + break is_secondary = True - return (a, len(sa_set) > 1) + if dual: + break + return (alignments, sa_set_len > 1) def write_alignments( @@ -374,10 +386,12 @@ def write_alignments( a = None if hit_type in ("unique", "multimap"): - (a, _) = to_mapped_read(seqread, ref_ids, library, hits, reverse, strand_in=strand) + (a_lst, _) = to_mapped_reads(seqread, ref_ids, library, hits, reverse, strand_in=strand) + for a in a_lst: + af.write(a) elif hit_type == "unmapped": a = to_alignment(seqread, reverse, [], unmapped=True) - af.write(a) + af.write(a) def reads_to_hts( diff --git a/pycroquet/resources/library.yaml b/pycroquet/resources/library.yaml index 1e3fa43..6311aee 100644 --- a/pycroquet/resources/library.yaml +++ b/pycroquet/resources/library.yaml @@ -28,4 +28,4 @@ columns: custom_annotation: headers: required: [] - optional: [] + optional: [dual-orientation] From 2b2c37bb4d0a499f14de714766b4369d3b954d45 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Mon, 15 Nov 2021 08:45:52 +0000 Subject: [PATCH 2/4] cleanup unused imports --- pycroquet/dualguide.py | 1 - pycroquet/readwriter.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/pycroquet/dualguide.py b/pycroquet/dualguide.py index 7a9fd8e..c6de76b 100644 --- a/pycroquet/dualguide.py +++ b/pycroquet/dualguide.py @@ -31,7 +31,6 @@ import logging import os import shutil -import sys from time import time from typing import Dict from typing import Final diff --git a/pycroquet/readwriter.py b/pycroquet/readwriter.py index e9cf1fc..fdf809c 100644 --- a/pycroquet/readwriter.py +++ b/pycroquet/readwriter.py @@ -33,7 +33,6 @@ import logging import os from array import array -from tempfile import TemporaryDirectory from typing import Dict from typing import Iterator from typing import List @@ -43,7 +42,6 @@ from pygas.classes import Backtrack from pygas.matrix import revcomp -from pycroquet.classes import Guide from pycroquet.classes import Library from pycroquet.classes import Seqread from pycroquet.classes import Stats From 2df5bae2691569b2b086eac871449de7e492e2f5 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 16 Nov 2021 16:09:45 +0000 Subject: [PATCH 3/4] Detail primary changes and version update --- CHANGES.md | 5 +++++ setup.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index e47121a..a6c5c55 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,10 @@ # CHANGES +## 1.3.0 + +- dual-guide: added library header item to deal with reversed read order when comparing against sgRNA. +- dual-guide: handled data multiplication issue in CRAM outputs, quicker. + ## 1.2.1 Handle change to how bgzip data is reported by magic decode. diff --git a/setup.py b/setup.py index 8dde923..a42a3ce 100755 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ "author": "Keiran M Raine", "url": "https://github.com/cancerit/pycroquet", "author_email": "cgphelp@sanger.ac.uk", - "version": "1.2.1", + "version": "1.3.0", "license": "AGPL-3.0", "python_requires": ">= 3.9", "install_requires": ["click", "click-option-group", "python-magic", "pysam", "pygas", "PyYAML"], From bb9b315e98ada9867843d4a60f59000a7ef50a77 Mon Sep 17 00:00:00 2001 From: Keiran Raine Date: Tue, 16 Nov 2021 16:10:52 +0000 Subject: [PATCH 4/4] Deal with odd failure of docker build on circleci (builds locally fine) --- Dockerfile | 2 ++ requirements-dev.txt | 2 +- requirements.txt | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index dddf8f5..793ba7c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,6 +45,8 @@ ENV VIRTUAL_ENV=$OPT/venv RUN python3.9 -m venv $VIRTUAL_ENV ENV PATH="$VIRTUAL_ENV/bin:$PATH" +RUN pip install --no-cache-dir --upgrade pip==21.3.1 && pip install --no-cache-dir Cython==0.29.24 + COPY pycroquet/ pycroquet/ COPY README.md setup.py requirements.txt requirements-dev.txt ./ RUN pip install --no-cache-dir -r requirements-dev.txt diff --git a/requirements-dev.txt b/requirements-dev.txt index 9094346..85a7253 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,7 +9,7 @@ pluggy==0.13.1 py==1.10.0 pygas==1.0.4 pyparsing==2.4.7 -pysam==0.16.0.1 +pysam==0.18.0 pytest==6.2.4 pytest-cov==2.12.1 python-magic==0.4.24 diff --git a/requirements.txt b/requirements.txt index 38cd9bb..766720d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ click==8.0.1 click-option-group==0.5.3 Cython==0.29.24 pygas==1.0.4 -pysam==0.16.0.1 +pysam==0.18.0 python-magic==0.4.24 PyYAML==5.4.1