Skip to content

Commit

Permalink
Merge pull request #75 from CGATOxford/{TS}-addSubsetToGroup
Browse files Browse the repository at this point in the history
Add subset to group command and correct output
  • Loading branch information
TomSmithCGAT authored Feb 1, 2017
2 parents 70b5ce3 + 09e30a4 commit e1b9c4a
Show file tree
Hide file tree
Showing 11 changed files with 25,651 additions and 3,638 deletions.
3,268 changes: 1,634 additions & 1,634 deletions tests/group_cluster.tsv

Large diffs are not rendered by default.

3,268 changes: 1,634 additions & 1,634 deletions tests/group_cluster_py3.tsv

Large diffs are not rendered by default.

360 changes: 180 additions & 180 deletions tests/group_dir.tsv

Large diffs are not rendered by default.

360 changes: 180 additions & 180 deletions tests/group_dir_py3.tsv

Large diffs are not rendered by default.

5,498 changes: 5,498 additions & 0 deletions tests/group_dir_subset.sam

Large diffs are not rendered by default.

5,499 changes: 5,499 additions & 0 deletions tests/group_dir_subset.tsv

Large diffs are not rendered by default.

5,498 changes: 5,498 additions & 0 deletions tests/group_dir_subset_py3.sam

Large diffs are not rendered by default.

5,499 changes: 5,499 additions & 0 deletions tests/group_dir_subset_py3.tsv

Large diffs are not rendered by default.

28 changes: 22 additions & 6 deletions tests/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,27 @@ dedup_single_sep:
references: [single_sep.sam]
options: "dedup -L test.log --out-sam --random-seed=123456789 --method=directional --umi-separator=:"

group unique:
group_unique:
skip_python: 3
stdin: chr19.bam
outputs: [stdout, group_uniq.tsv]
references: [group_uniq.sam, group_uniq.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=unique --output-bam --out-sam --group-out=group_uniq.tsv

group cluster:
group_cluster:
skip_python: 3
stdin: chr19.bam
outputs: [stdout, group_cluster.tsv]
references: [group_cluster.sam, group_cluster.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=cluster --output-bam --out-sam --group-out=group_cluster.tsv

group_directional_subset:
skip_python: 3
stdin: chr19.bam
outputs: [stdout, group_dir_subset.tsv]
references: [group_dir_subset.sam, group_dir_subset.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=directional --output-bam --out-sam --group-out=group_dir_subset.tsv --subset=0.1

# ## End of python 2 tests ##


Expand Down Expand Up @@ -172,34 +179,43 @@ dedup_single_sep_py3:
references: [single_sep_py3.sam]
options: "dedup -L test.log --out-sam --random-seed=123456789 --method=directional --umi-separator=:"

group unique_py3:
group_unique_py3:
skip_python: 2
stdin: chr19.bam
outputs: [stdout, group_uniq.tsv]
references: [group_uniq_py3.sam, group_uniq_py3.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=unique --output-bam --out-sam --group-out=group_uniq.tsv

group cluster_py3:
group_cluster_py3:
skip_python: 2
stdin: chr19.bam
outputs: [stdout, group_cluster.tsv]
references: [group_cluster_py3.sam, group_cluster_py3.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=cluster --output-bam --out-sam --group-out=group_cluster.tsv

# group adjacency_py3:
# group_adjacency_py3:
# skip_python: 2
# stdin: chr19.bam
# outputs: [stdout, group_adj.tsv]
# references: [group_adj_py3.sam, group_adj_py3.tsv]
# options: group -L test.log --out-sam --random-seed=123456789 --method=adjacency --output-bam --out-sam --group-out=group_adj.tsv

group directional_py3:
group_directional_py3:
skip_python: 2
stdin: chr19.bam
outputs: [stdout, group_dir.tsv]
references: [group_dir_py3.sam, group_dir_py3.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=directional --output-bam --out-sam --group-out=group_dir.tsv


group_directional_subset_py3:
skip_python: 2
stdin: chr19.bam
outputs: [stdout, group_dir_subset.tsv]
references: [group_dir_subset_py3.sam, group_dir_subset_py3.tsv]
options: group -L test.log --out-sam --random-seed=123456789 --method=directional --output-bam --out-sam --group-out=group_dir_subset.tsv --subset=0.1


## End of python 3 tests ##


Expand Down
9 changes: 6 additions & 3 deletions umi_tools/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,9 @@ def main(argv=None):
parser.add_option("--umi-separator", dest="umi_sep",
type="string", help="separator between read id and UMI",
default="_")
parser.add_option("--subset", dest="subset", type="float",
help="Use only a fraction of reads, specified by subset",
default=None)
parser.add_option("--spliced-is-unique", dest="spliced",
action="store_true",
help="Treat a spliced read as different to an unspliced"
Expand Down Expand Up @@ -353,7 +356,7 @@ def main(argv=None):
infile,
read_events,
ignore_umi=False,
subset=False,
subset=options.subset,
quality_threshold=options.mapping_quality,
paired=options.paired,
chrom=options.chrom,
Expand Down Expand Up @@ -407,8 +410,8 @@ def main(argv=None):
mapping_outfile.write("%s\n" % "\t".join(map(str, (
read.query_name, read.reference_name,
umi_methods.get_read_position(read, options.soft)[1],
umi_methods.get_umi(read, options.umi_sep).decode(),
counts[top_umi],
umi.decode(),
counts[umi],
top_umi.decode(),
group_count,
unique_id))))
Expand Down
2 changes: 1 addition & 1 deletion umi_tools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.5"
__version__ = "0.3.6"

0 comments on commit e1b9c4a

Please sign in to comment.