-
Notifications
You must be signed in to change notification settings - Fork 2
/
uncat_reads
82 lines (63 loc) · 1.96 KB
/
uncat_reads
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/perl -w
=head1 NAME
uncat_reads
=head1 SYNOPSIS
uncat_reads [--help] [--verbose] [--spacer=STRING] [--length=INT] --in=<fasta> [--out1=FILENAME] [--out2=FILENAME]
Splits reads at a given length or separator.
--help: This info.
--spacer: sequence at which to split reads; alternatively, supply
--length: length of forward reads; to be used instead of spacer
--in: fasta file whose reads are to be split
--out1, out2: name of files with each half; if not stated, out.1.fa and out.2.fa are used
=head1 AUTHOR
=cut
use warnings;
use strict;
use Bio::SeqIO;
use Getopt::Long;
use Pod::Usage;
my $fastafile;
my $spacer;
my $length;
my $out1 = "out.1.fa";
my $out2 = "out.2.fa";
my $help = 0;
my $verbose = 0;
GetOptions(
"spacer=s" => \$spacer,
"length=i" => \$length,
"in=s" => \$fastafile,
"out1=s" => \ $out1,
"out2=s" => \ $out2,
"help!" => \$help,
"verbose!" => \$verbose,
);
pod2usage(0) if $help;
pod2usage(-msg => "Need an infile", -exitval => 1) unless $fastafile;
my $infile = Bio::SeqIO->new(-file=>"$fastafile", -format => "fasta") or die "Can't open $fastafile";
pod2usage(-msg => "Need a spacer or length", -exitval => 1) unless ($spacer or $length);
open OUT1, ">$out1" or die "Can't open $out1";
open OUT2, ">$out2" or die "Can't open $out2";
while (my $seq = $infile->next_seq){
my $sequence = $seq -> seq;
my $id = $seq -> display_id;
if ($length>0){
my $seq_length=length($sequence);
my $read1=substr($sequence, 0, $length);
$sequence=~/^$read1(\w*)/;
my $read2=$1;
print OUT1 ">$id\n".$read1."\n";
print OUT2 ">$id\n".$read2."\n";
}
elsif ($sequence=~/$spacer/){
my @reads=split(/$spacer/,$sequence);
print OUT1 ">$id\n".$reads[0]."\n";
print OUT2 ">$id\n".$reads[1]."\n";
}
else{
print "Warning: sequence $id does not possess spacer $spacer\n";
}
}
close OUT1;
close OUT2;