A comprehensive annotation of non-coding RNAs in available bat genome assemblies. Here, we provide all major code writen and used to annotate and merge various ncRNAs in available bat genomes.
Please note, that the scripts need to be adjusted to your working environment. The calls of the scripts are documented below. The scripts are for illustrative purposes rather than being run in their entirety on other data sets.
As input for the scripts, the annotation files (GTF) provided at our supplement page or the formated bat genomes provided at the OSF repository are needed. For some scripts, make sure that BATLIST.csv is available in the current directory.
python='python3.6'
out_dir='~/lncrna'
lncipedia='lncipedia_5_2_hc.fasta'
##### blast and sort
for i in data/genomes/*.renamed.fa; do
basename=$(basename $i)
species=${basename%%.*}
echo 'started '$species;
blastn -task blastn -num_threads 6 -query $lncipedia -db $i -evalue 1e-10 -outfmt "6 qseqid sseqid pident length mismatch gapopen qstart qend qlen sstart send evalue bitscore slen" > $out_dir/$species.blast
sort $out_dir/$species.blast -o $out_dir/$species.blast.sorted
rm $out_dir/$species.blast
echo $species' done';
done
##### find transcripts, generate gtf
for i in $out_dir*.blast.sorted; do
basename=$(basename $i)
species=${basename%%.*}
echo 'started '$species;
$python 'find_transcripts_from_blast.py' $i
$python 'blast_transcripts_to_gtf.py' $i.transcripts
echo $species' done';
done
##### merge piles of transcripts
for i in $out_dir/*.blast.sorted.transcripts.gtf; do
echo 'started '$i;
$python 'merge_stacks.py' $i
echo $i' done';
done
Used scripts:
python3 convert_mitos_output.py
Used script:
python3 make_ids_unique.py
Used script:
for BAT in `cat BATLIST.csv`; do echo $BAT; merge_gtf_global_ids.py \
mito/gtf/${BAT}.gtf tRNAs/gtf/${BAT}.gtf rRNA/gtf/${BAT}.gtf \
other_gorap/gtf/${BAT^^}.gtf snoRNA_gorap/gtf/${BAT^^}.gtf \
miRNA_gorap/gtf/${BAT^^}.gtf miRNA_mirdeep/gtf/${BAT}.gtf \
2>&1 > merged/gtf/${BAT}.gtf | tee merged/mergelog/${BAT}.log \
; done || echo "ERROR in $BAT"
Used script:
for ANNO in annotations/abbr/???.gff; do BATA=${ANNO##*/}; BAT=${BATA%%.*}; \
format_ncbi.py $ANNO > NCBI_converted/gtf/${BAT}.gtf \
2> NCBI_converted/convertlog/${BAT}.log; done
Used script:
for BAT in `cat BATLIST.csv`; do echo $BAT; merge_gtf_ncbi.py \
merged/gtf/${BAT}.gtf NCBI_converted/gtf/${BAT}.gtf \
2>&1 > NCBI_merged/gtf/${BAT}.gtf | tee NCBI_merged/mergelog/${BAT}.log \
|| break; done || echo "ERROR in $BAT"
Used script: