Skip to content

Commit

Permalink
test site
Browse files Browse the repository at this point in the history
  • Loading branch information
YuWei-CH committed May 29, 2024
1 parent 2a464b7 commit 3bac5dc
Show file tree
Hide file tree
Showing 4 changed files with 254 additions and 22 deletions.
96 changes: 96 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,102 @@ def submit():
"You'll receive an e-mail when job is done with download link"), 'info')
return render_template('form.html', form=form)

# test site
@app.route('/test', methods=['GET', 'POST'])
def submit_test():

# Default in_fasta and in_gff
DEFAULT_FILES = {
'ref_fasta': './staticData/ref/Mus_musculus.GRCm38.dna.toplevel.fa',
'ref_gff': './staticData/ref/Mus_musculus.GRCm38.88.gff3',
'in_fasta': './staticData/inserted/test-in.fa',
'in_gff': './staticData/inserted/test-in.gtf',
'upstream_fasta': './staticData/up-down-seq/test-up.fa',
'downstream_fasta': './staticData/up-down-seq/test-down.fa'
}

form = Testjob(request.form) # test job
if request.method == 'POST' and form.validate():
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and request.form[
'position']:
flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
return redirect(url_for('submit'))
if (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename):
if not (request.files['downstream_fasta'].filename and request.files['upstream_fasta'].filename):
flash("Error: Must enter both upstream and downstream", 'error')
return redirect(url_for('submit'))
# # comment out the condition check, since we allowed default up/down stream fasta
# if not (request.files['downstream_fasta'].filename or request.files['upstream_fasta'].filename) and not \
# request.form['position']:
# flash("Error: You must provide either the position, or the upstream and downstream sequences.", 'error')
# return redirect(url_for('submit'))
else:
# User Submits Job #
# (1) Create unique ID for each submission
timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f')
target_dir = os.path.join(UPLOAD_FOLDER, timestamp)
# (2) Log to Database
if not os.path.isfile('database.db'):
db_create()

db_submit(request, timestamp)

# (3) Upload files from user device to server
# Verify all files are present before uploading
for files in UPLOAD_FILES:
verified = verify_test_uploads(files)
if not verified:
return redirect(url_for('submit'))

# Upload Files to UPLOAD_DIR/timestamp/ and save the name into uploaded_files or use local files
if verified:
# Storing all files that will be passed to run.sh
uploaded_files = {}
for file_key in UPLOAD_FILES: # upload inserted files
uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)
# set defualt None to up/down stream fasta
for file_key in ['upstream_fasta', 'downstream_fasta']:
uploaded_files['upstream_fasta'] = None
uploaded_files['downstream_fasta'] = None

if not request.form['position']:
# Handle case where position is not provided and upstream/downstream files are required
for file_key in ['upstream_fasta', 'downstream_fasta']:
uploaded_files[file_key] = upload_test(target_dir, file_key, DEFAULT_FILES)

# Replace Ref Sequence files with local file realpath
if request.form['ref_fasta'] == 'ftp://ftp.ensembl.org/pub/release-88/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.toplevel.fa.gz':
uploaded_files['ref_fasta'] = DEFAULT_FILES['ref_fasta']
else:
uploaded_files['ref_fasta'] = request.form['ref_fasta']
if request.form['ref_gff'] == 'ftp://ftp.ensembl.org/pub/release-88/gff3/mus_musculus/Mus_musculus.GRCm38.88.gff3.gz':
uploaded_files['ref_gff'] = DEFAULT_FILES['ref_gff']
else:
uploaded_files['ref_gff'] = request.form['ref_gff']

# Use same Redis for production site and test site
redis_conn = Redis() # initializes a connection to the default Redis server running on localhost
q = Queue(connection=redis_conn, default_timeout=3000)

job = q.enqueue(redisjob, args=(target_dir,
timestamp,
request.form['email'], # [email protected]
request.form['chrom'], # 1
uploaded_files['upstream_fasta'], # by default
uploaded_files['downstream_fasta'],
request.form['position'],
uploaded_files['ref_fasta'], # by default
uploaded_files['ref_gff'], # by default
uploaded_files['in_fasta'], # by default
uploaded_files['in_gff'] # by default
),
result_ttl=-1,
job_timeout=3000
)
db_update(timestamp, "jobID", job.get_id())
flash(Markup('JOB ID: ' + job.get_id() + '<br>' +
"You'll receive an e-mail when job is done with download link"), 'info')
return render_template('form.html', form=form)

@app.route('/download/<timestamp>')
def downloadFile(timestamp):
Expand Down
75 changes: 75 additions & 0 deletions forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,78 @@ class SubmitJob(Form):
URL(),
InputRequired()
])

# form for test job
class Testjob(Form):
email = StringField('Email Address',
description="When job is complete this e-mail will receive the download links",
render_kw={
"autofocus": "",
},
validators=[
InputRequired(),
Email()
],
default="[email protected]") # hard code email

chrom = StringField('Chromosome',
description="ID of the chromosome to modify. Must match ID in FASTA file.",
validators=[
InputRequired()
],
default = "1") # Chromosome has been set to 1 default

# POSITION
position = StringField('Position',
description="Position in chromosome at which to insert <in_fasta>. Can use -1 to add to end "
"of chromosome. Note: Position is 0-based",
validators=[Optional()])
# OR
upstream_fasta = FileField('Upstream Sequence',
description="FASTA file with upstream sequence. If no file is selected, the system will use 'test-up.fa' as a default.",
validators=[
Optional()
])
downstream_fasta = FileField('Downstream Sequence',
description="FASTA file with downstream sequence. If no file is selected, the system will use 'test-down.fa' as a default.",
validators=[
Optional()
])

# Uploads
in_fasta = FileField('Inserted Sequence (FASTA)',
description="Please upload the new sequence to be inserted into the reference genome. If no file is selected, the system will use 'test-in.fa' as a default.",
validators=[
Optional(),
FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
# FileRequired()
])
in_gff = FileField('Inserted Reference (gff3 or gtf)',
description="Please upload the GFF file describing the new FASTA sequence to be inserted. If no file is selected, the system will use 'test-in.gff' as a default.",
validators=[
Optional(),
FileAllowed([ALLOWED_EXTENSIONS], 'Invalid File Type'),
# InputRequired()
])
# Downloads
ref_fasta = StringField('Reference Sequence (FASTA)',
description="URL to reference FASTA file. e.g. ftp://ftp.ensembl.org/pub/release-88/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.toplevel.fa.gz",
render_kw={
"placeholder": "Enter Reference URL",
},
validators=[
URL(),
InputRequired()
],
default = "ftp://ftp.ensembl.org/pub/release-88/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna.toplevel.fa.gz")

ref_gff = StringField('Reference Annotation (gff3 or gtf)',
description="URL to reference gff file. e.g. ftp://ftp.ensembl.org/pub/release-88/gff3/mus_musculus/Mus_musculus.GRCm38.88.gff3.gz",
render_kw={
"placeholder": "Enter Reference URL",
},
validators=[
URL(),
InputRequired()
],
default = "ftp://ftp.ensembl.org/pub/release-88/gff3/mus_musculus/Mus_musculus.GRCm38.88.gff3.gz")
34 changes: 34 additions & 0 deletions job.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ def verify_uploads(file):
flash('Invalid File Type for ' + file, 'error')
return False

# verify upload files for test site
def verify_test_uploads(file):
fileObj = request.files[file]

if fileObj.filename == '':
# flash('No ' + file + ' file selected for uploading', 'error')
# return False
return True # If no file is uploaded then the default file is used
if fileObj and allowed_file(fileObj.filename):
return True
else:
flash('Invalid File Type for ' + file, 'error')
return False



def upload(target_dir, file):
fileObj = request.files[file]
Expand All @@ -99,6 +114,25 @@ def upload(target_dir, file):
# save the file
fileObj.save(os.path.join(target_dir,
secure_filename(fileObj.filename)))
# upload file function for test site
def upload_test(target_dir, file_key, default_files):
# if file is empty (indicated use default file), fileObj set to None
fileObj = request.files[file_key] if file_key in request.files else None
os.makedirs(target_dir, exist_ok=True) # dirs for upload files

if fileObj:
# save the uploaded file
filename = secure_filename(fileObj.filename)
file_path = os.path.join(target_dir, filename)
fileObj.save(file_path)
return fileObj.filename
else:
# Use the default file if no file was uploaded, pass realpath
src = os.path.abspath(default_files[file_key])
dst = os.path.join(target_dir, os.path.basename(src)) # link name in target_dir
if not os.path.exists(dst): # Only create the symlink if it doesn't already exist
os.symlink(src, dst) # Create a soft link
return os.path.basename(src)


def download(target_dir, URL):
Expand Down
71 changes: 49 additions & 22 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ target_dir="$1"
timestamp="$2"
email="$3"
chrom="$4"
ref_fastaURL="$5"
ref_gffURL="$6"
ref_fasta="$5"
ref_gff="$6"
in_fasta="$7"
in_gff="$8"

Expand All @@ -20,26 +20,53 @@ echo "########################################"
echo "[$(date "+%D %T")] START $timestamp"
echo "########################################"

# Download files from user provided URLs to server
ref_fasta=$(basename "$ref_fastaURL")
echo "wget --no-check-certificate -nv $ref_fastaURL -O $target_dir/$ref_fasta"
wget --no-check-certificate -nv $ref_fastaURL -O $target_dir/$ref_fasta
# Function to determine if a path is a URL
is_url() {
if [[ $1 =~ ^https?:// ]] || [[ $1 =~ ^ftp:// ]]; then
return 0 # It's a URL
else
return 1 # It's a file path
fi
}

ref_gff=$(basename "$ref_gffURL")
echo "wget --no-check-certificate -nv $ref_gffURL -O $target_dir/$ref_gff"
wget --no-check-certificate -nv $ref_gffURL -O $target_dir/$ref_gff
# Function to process downloading and decompressing files
download_and_decompress() {
local file_url=$1
local target_path=$2
local file_name=$(basename "$file_url")

# If downloads compresssed (gzip), uncompress with pigz
if [[ ${ref_fasta: -3} == ".gz" ]]; then
echo "pigz -d $target_dir/$ref_fasta"
pigz -d $target_dir/$ref_fasta
ref_fasta=${ref_fasta:: -3}
echo "Downloading $file_url"
wget --no-check-certificate -nv "$file_url" -O "$target_path/$file_name"
if [[ ${file_name: -3} == ".gz" ]]; then
echo "pigz -d $target_path/$file_name"
pigz -d "$target_path/$file_name"
file_name=${file_name::-3}
fi
}

# Create the upload directories
mkdir -p "./$target_dir"

# Variables to hold the final paths to be used in the reform.py command
ref_fasta_path="$ref_fasta"
ref_gff_path="$ref_gff"

# Check and process the reference fasta file
if is_url "$ref_fasta"; then
download_and_decompress "$ref_fasta" "./$target_dir"
ref_fasta_path="./$target_dir/$(basename "$ref_fasta")"
else
echo "Using local file: $ref_fasta"
ref_fasta_path="$ref_fasta"
fi

if [[ ${ref_gff: -3} == ".gz" ]]; then
echo "pigz -d $target_dir/$ref_gff"
pigz -d $target_dir/$ref_gff
ref_gff=${ref_gff:: -3}
# Check and process the reference gff file
if is_url "$ref_gff"; then
download_and_decompress "$ref_gff" "./$target_dir"
ref_fasta_path="./$target_dir/$(basename "$ref_gff")"
else
echo "Using local file: $ref_gff"
ref_gff_path="$ref_gff"
fi

# Run reform.py
Expand All @@ -48,21 +75,21 @@ mkdir -p ./results/$timestamp

if [ ! -z "$position" ]; then
echo /home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta ./uploads/$timestamp/$in_fasta \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta ./uploads/$timestamp/$ref_fasta --ref_gff ./uploads/$timestamp/$ref_gff \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
--output_dir "./results/$timestamp/"

/home/reform/venv/bin/python reform.py --chrom $chrom --position $position --in_fasta ./uploads/$timestamp/$in_fasta \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta ./uploads/$timestamp/$ref_fasta --ref_gff ./uploads/$timestamp/$ref_gff \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
--output_dir "./results/$timestamp/"
else
echo /home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta ./uploads/$timestamp/$upstream_fasta \
--downstream_fasta ./uploads/$timestamp/$downstream_fasta --in_fasta ./uploads/$timestamp/$in_fasta \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta ./uploads/$timestamp/$ref_fasta --ref_gff ./uploads/$timestamp/$ref_gff \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
--output_dir "./results/$timestamp/"

/home/reform/venv/bin/python reform.py --chrom $chrom --upstream_fasta ./uploads/$timestamp/$upstream_fasta \
--downstream_fasta ./uploads/$timestamp/$downstream_fasta --in_fasta ./uploads/$timestamp/$in_fasta \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta ./uploads/$timestamp/$ref_fasta --ref_gff ./uploads/$timestamp/$ref_gff \
--in_gff ./uploads/$timestamp/$in_gff --ref_fasta "$ref_fasta_path" --ref_gff "$ref_gff_path" \
--output_dir "./results/$timestamp/"
fi

Expand Down

0 comments on commit 3bac5dc

Please sign in to comment.