Skip to content

Commit

Permalink
Added ROSE workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
LeonHafner committed Jan 6, 2024
1 parent 8c6c89e commit ef90358
Show file tree
Hide file tree
Showing 14 changed files with 898 additions and 3 deletions.
43 changes: 43 additions & 0 deletions bin/bed_to_gff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python3

import argparse

def convert_bed_to_gff(bed_filename, gff_filename):
gff_lines = []

with open(bed_filename, 'r') as bed:
for line in bed:
parts = line.strip().split()
if len(parts) < 3:
# skip invalid lines
continue

seqid, start, end = parts[0], int(parts[1]), parts[2]
# adjust for 0-based to 1-based coordinate
start += 1

# Set default values for the other GFF columns (referring to the names in https://en.wikipedia.org/wiki/General_feature_format)
source = 'bed2gff'
type = 'region'
score = '.'
strand = '.'
phase = '.'
attributes = '.'

gff_line = f"{seqid}\t{source}\t{type}\t{start}\t{end}\t{score}\t{strand}\t{phase}\t{attributes}"
gff_lines.append(gff_line)

with open(gff_filename, 'w') as gff:
gff.write('\n'.join(gff_lines))

def main():
parser = argparse.ArgumentParser(description='Convert BED file to GFF format.')
parser.add_argument('--bed', '-i', type=str, help='Input BED file')
parser.add_argument('--gff', '-o', type=str, help='Output GFF file')

args = parser.parse_args()

convert_bed_to_gff(args.bed, args.gff)

if __name__ == "__main__":
main()
34 changes: 34 additions & 0 deletions bin/reformat_gff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env python3

import pandas as pd
import argparse


parser = argparse.ArgumentParser(prog="GFF to ROSE-GFF",
description="Takes a standard GFF as input and reformats it into a ROSE input GFF")

parser.add_argument("-i", "--input", required=True)
parser.add_argument("-o", "--output", required=True)

args = parser.parse_args()
path_input = args.input
path_output = args.output


gff = pd.read_csv(path_input,
sep = "\t",
names = ["seqname", "source", "feature1", "start", "end", "score", "strand", "dot", "feature2"],
index_col=False).drop(columns=["score", "feature1", "dot", "feature2"])

if not all(gff['seqname'].str.startswith('chr')):
gff['seqname'] = ["chr" + str(chrom) for chrom in gff['seqname'].tolist()]
gff['source'] = ['enhancer_'+ str(num) for num in range(gff.shape[0])]
gff['id2'] = gff['source']
gff['empty1'] = ''
gff['empty2'] = ''
gff['empty3'] = ''
gff = gff[['seqname', 'source', 'empty1', 'start', 'end', 'empty2', 'strand', 'empty3', 'id2']]
gff


gff.to_csv(path_output, sep = "\t", header = False, index=False)
Loading

0 comments on commit ef90358

Please sign in to comment.