phac-nml · kevinkle · Feb 2, 2018 · Feb 3, 2018 · Feb 3, 2018 · Feb 3, 2018
diff --git a/.gitignore b/.gitignore
@@ -110,3 +110,6 @@ output/
 validation/enterobase_90_50_with_blacklist.csv
 .coveragerc
 coverage_html_report/
+
+# Conda-build backup files
+*.bak
diff --git a/.travis.yml b/.travis.yml
@@ -1,6 +1,7 @@
 language: python
 python:
   # We don't actually use the Travis Python, but this keeps it organized.
+  - "2.7"
   - "3.6"
 install:
   - sudo apt-get update
@@ -18,10 +19,27 @@ install:
   - conda update -q conda
   # Useful for debugging any issues with conda
   - conda info -a
-  - conda config --add channels bioconda
-  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION samtools bowtie2 mash bcftools biopython nose blast pandas seqtk
+  - conda config --add channels bioconda --add channels conda-forge
+  - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION samtools bowtie2 mash bcftools biopython nose blast pandas seqtk future
   - source activate test-environment
+  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
+      conda install backports.weakref && conda install -c kevinkle subprocess32;
+    else
+      continue;
+    fi
   - python setup.py install
-
+  # Setup automatic conda uploading.
+  # - conda install anaconda-client
+  # - conda config --set anaconda_upload yes
+  # test the conda build
+  - conda install conda-build
+  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
+      conda build -c kevinkle -c bioconda recipe/ --python=2.7;
+    else
+      conda build -c bioconda recipe/ --python=3.6;
+    fi
+  -
 script:
-  - nosetests
+  - nosetests
+notifications:
+  email: false
diff --git a/README.md b/README.md
@@ -1,7 +1,9 @@
 # ECTyper (an easy typer)
-**ecyper** wraps a standalone serotyping module for _Escherichia coli_. 
+**ecyper** wraps a standalone serotyping module for _Escherichia coli_.
 Supports _fasta_ and _fastq_ file formats.
 
+[![Build Status](https://travis-ci.org/phac-nml/ecoli_serotyping.svg?branch=master)](https://travis-ci.org/phac-nml/ecoli_serotyping)
+
 # Dependencies:
 - python 3.6.3.*
 - pandas 0.21.0.*
@@ -19,8 +21,8 @@ Supports _fasta_ and _fastq_ file formats.
     1. `bash miniconda.sh -b -p $HOME/miniconda`
     1. `export PATH="$HOME/miniconda/bin:$PATH"`
 2. Install ectyper  
-    * Directly via `conda` 
-    	1. `conda install -c bioconda ectyper` 
+    * Directly via `conda`
+    	1. `conda install -c bioconda ectyper`
     * Through `github`
     	1. Install dependencies
           `conda install pandas samtools bowtie2 mash bcftools biopython nose blast seqtk tqdm python=3.6`
@@ -62,3 +64,13 @@ optional arguments:
                         Directory location of output files.
 ```
 * The first time species identification is enabled you will need to wait for **ectyper** to download the reference sequences.
+
+# Building the conda package
+Python 2.7
+(requires a custom version of process32 from the channel kevinkle)
+
+`conda build -c kevinkle -c bioconda recipe/ --python=2.7`
+
+Python 3.6
+
+`conda build -c bioconda recipe/ --python=3.6`
diff --git a/ectyper.puml b/ectyper.puml
diff --git a/ectyper/blastFunctions.py b/ectyper/blastFunctions.py
@@ -3,6 +3,14 @@
 """
 Functions for setting up, running, and parsing blast
 """
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from builtins import open
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
 import logging
 import os
 

diff --git a/ectyper/commandLineOptions.py b/ectyper/commandLineOptions.py
@@ -1,5 +1,12 @@
 #!/usr/bin/env python
 
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
+from builtins import int
+from future import standard_library
+standard_library.install_aliases()
 import argparse
 
 

diff --git a/ectyper/definitions.py b/ectyper/definitions.py
@@ -3,12 +3,25 @@
 """
     Definitions for the ectyper project
 """
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
 
+from future import standard_library
+standard_library.install_aliases()
 import os
+import sys
 
 ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
 DATA_DIR = os.path.join(ROOT_DIR, 'Data')
-WORKPLACE_DIR = os.getcwd()
+# Python3 vs Python2 difference.
+try:
+    # Python3
+    WORKPLACE_DIR = os.getcwdu()
+except:
+    # Python2
+    WORKPLACE_DIR = os.getcwd()
 
 SEROTYPE_FILE = os.path.join(DATA_DIR, 'ectyper_data.fasta')
 SEROTYPE_ALLELE_JSON = os.path.join(DATA_DIR, 'ectyper_dict.json')
@@ -18,3 +31,22 @@
 SAMTOOLS = 'samtools'
 REFSEQ_SUMMARY = os.path.join(DATA_DIR, 'assembly_summary_refseq.txt')
 REFSEQ_SKETCH = os.path.join(DATA_DIR, 'refseq.genomes.k21s1000.msh')
+
+if os.name == 'posix' and sys.version_info[0] < 3:
+    # Python2
+    from ectyper.tempfile import TemporaryDirectory
+    from tempfile import NamedTemporaryFile
+else:
+    # Python3
+    from tempfile import TemporaryDirectory, NamedTemporaryFile
+# Aliases
+TEMPDIR = TemporaryDirectory
+NAMEDTEMPFILE = NamedTemporaryFile
+
+# Python 2.7 Compatibility
+if sys.version_info[0] < 3:
+    # In Python 2.7, Pandas will need binary (not unicode) when using open().
+    read_flags = 'rb'
+else:
+    # Python 3.6 will read as unicode text when using open().
+    read_flags = 'r'
diff --git a/ectyper/ectyper.py b/ectyper/ectyper.py
@@ -2,10 +2,17 @@
 """
     Predictive serotyping for _E. coli_.
 """
+from __future__ import division
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import absolute_import
+from builtins import range
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
 import logging
 import os
 import sys
-import tempfile
 import datetime
 from urllib.request import urlretrieve
 
@@ -40,7 +47,7 @@ def run_program():
     LOG.debug(args)
 
     ## Initialize temporary directories for the scope of this program
-    with tempfile.TemporaryDirectory() as temp_dir:
+    with definitions.TEMPDIR() as temp_dir:
         temp_files = create_tmp_files(temp_dir, output_dir=args.output)
         LOG.debug(temp_files)
 
@@ -167,22 +174,22 @@ def create_tmp_files(temp_dir, output_dir=None):
 
 def run_prediction(genome_files, args, predictions_file):
     '''Core prediction functionality
-    
+
     Args:
         genome_files:
             list of genome files
         args:
             commandline arguments
         predictions_file:
             filename of prediction output
-    
+
     Returns:
         predictions_file with prediction written in it
     '''
     query_file = definitions.SEROTYPE_FILE
     ectyper_dict_file = definitions.SEROTYPE_ALLELE_JSON
     # create a temp dir for blastdb
-    with tempfile.TemporaryDirectory() as temp_dir:
+    with definitions.TEMPDIR() as temp_dir:
         # Divide genome files into chunks
         chunk_size = 50
         genome_chunks = [
@@ -191,6 +198,8 @@ def run_prediction(genome_files, args, predictions_file):
         ]
         for index, chunk in enumerate(genome_chunks):
             LOG.info("Start creating blast database #{0}".format(index + 1))
+            LOG.info("Using SEROTYPE_FILE: {0}".format(query_file))
+            LOG.info("Using SEROTYPE_ALLELE_JSON: {0}".format(ectyper_dict_file))
             blast_db = blastFunctions.create_blast_db(chunk, temp_dir)
 
             LOG.info("Start blast alignment on database #{0}".format(index + 1))
@@ -204,10 +213,10 @@ def run_prediction(genome_files, args, predictions_file):
 
 def get_raw_files(raw_files):
     """Take all the raw files, and filter not fasta / fastq
-    
+
     Args:
         raw_files(str): list of files from user input
-    
+
     Returns:
         A dictitionary collection of fasta and fastq files
         example:
@@ -235,7 +244,7 @@ def filter_for_ecoli_files(raw_dict, temp_files, verify=False, species=False):
     Assemble fastq files to fasta files,
     then filter all files by reference method if verify is enabled,
     if identified as non-ecoli, identify species by mash method if species is enabled.
-    
+
     Args:
         raw_dict{fasta:list_of_files, fastq:list_of_files}:
             dictionary collection of fasta and fastq files
@@ -266,7 +275,7 @@ def filter_file_by_species(genome_file, genome_format, temp_dir, verify=False, s
     Assemble fastq file to fasta file,
     then filter the file by reference method if verify is enabled,
     if identified as non-ecoli, identify species by mash method if species is enabled.
-    
+
     Args:
         genome_file: input genome file
         genome_format(str): fasta or fastq

diff --git a/ectyper/genomeFunctions.py b/ectyper/genomeFunctions.py
@@ -1,12 +1,18 @@
 '''
 Genome Utilities
 '''
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
 #!/usr/bin/env python
 
+from builtins import str
+from future import standard_library
+standard_library.install_aliases()
 import logging
 import os
 import re
-import tempfile
 from tarfile import is_tarfile
 
 from Bio import SeqIO
@@ -63,14 +69,14 @@ def get_valid_format(file):
     """
     for fm in ['fastq', 'fasta']:
         try:
-            with open(file, "r") as handle:
+            with open(file, definitions.read_flags) as handle:
                 data = SeqIO.parse(handle, fm)
                 if any(data):
                     if is_tarfile(file):
                         LOG.warning("Compressed file is not supported: {}".format(file))
                         return None
                     return fm
-        except FileNotFoundError as err:
+        except IOError as err:
             LOG.warning("{0} is not found".format(file))
             return None
         except UnicodeDecodeError as err:
@@ -113,14 +119,14 @@ def get_genome_names_from_files(files, temp_dir):
         n_name = file_path_name.replace(' ', '_')
 
         # create a new file for the updated fasta headers
-        new_file = tempfile.NamedTemporaryFile(dir=temp_dir, delete=False).name
+        new_file = definitions.NAMEDTEMPFILE(dir=temp_dir, delete=False).name
 
         # add the new name to the list of files and genomes
         list_of_files.append(new_file)
         list_of_genomes.append(n_name)
 
         with open(new_file, "w") as outfile:
-            with open(file) as infile:
+            with open(file, definitions.read_flags) as infile:
                 for record in SeqIO.parse(infile, "fasta"):
                     outfile.write(">lcl|" + n_name + "|" + record.description + "\n")
                     outfile.write(str(record.seq) + "\n")

diff --git a/ectyper/loggingFunctions.py b/ectyper/loggingFunctions.py
@@ -2,7 +2,13 @@
 """
     Set up the logging
 """
+from __future__ import unicode_literals
+from __future__ import print_function
+from __future__ import division
+from __future__ import absolute_import
 
+from future import standard_library
+standard_library.install_aliases()
 import logging
 import os