From 699be5bbdad0570c07769b10a41de307bd85cb79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aleksander=20Cis=C5=82ak?= <alex.cis91@gmail.com>
Date: Sat, 30 Jun 2018 12:46:50 +0200
Subject: [PATCH] Slightly updated script parameter naming convention.

---
 README.md                 | 24 ++++++++++++------------
 run_all.sh                | 10 +++++-----
 scripts/ed_histogram.py   |  4 ++--
 scripts/generate_synth.py | 38 +++++++++++++++++++-------------------
 4 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/README.md b/README.md
index 1787cf9..2f7a90c 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ Testing on human genome and synthetic data.
 
 1. Generate synthetic data by running `python generate_synth.py` (requires Python 2.7) 4 times for the number of segments (parameter `nSegments`) set to 100, 500, 1000, and 1600 thousands. Rename files as `chr24.eds`, `chr25.eds`, `chr26.eds`, `chr27.eds`.
 
-1. Set parameter `inputDir` in `run_all.sh` to the folder containing `.eds` files and pattern files (all pattern files are located in the `sample/` folder as part of this package).
+1. Set parameter `inDir` in `run_all.sh` to the folder containing `.eds` files and pattern files (all pattern files are located in the `sample/` folder as part of this package).
 
 1. Compile SoPanG (see above).
 
@@ -80,7 +80,7 @@ Parameter name  | Parameter description
 
 Parameter name   | Parameter description
 ---------------- | ---------------------
-`dBufferSize`    | buffer size for processing segment variants, the size of the largest segment (i.e., the number of variants) from the input file cannot be larger than this value
+`dBufferSize`    | buffer size for processing segment variants, the size of the largest segment (i.e. the number of variants) from the input file cannot be larger than this value
 `maskBufferSize` | buffer size for Shift-Or masks for the input alphabet, must be larger than the largest input character ASCII code
 `wordSize`       | word size (in bits) used by the Shift-Or algorithm
 
@@ -90,23 +90,23 @@ Parameter name   | Parameter description
 
 Parameter name   | Parameter description
 ---------------- | ---------------------
-`inputDir`       | input directory path containing `.eds` ED text files and `.txt` input pattern files
+`inDir`          | input directory path containing `.eds` ED text files and `.txt` input pattern files
 `outFile`        | base name for output files
 
 #### ed_histogram.py (scripts folder)
 
 Parameter name         | Parameter description
 ---------------------- | ---------------------
-`inputDir`             | input directory path containing `.eds` ED text files
+`pInDir`               | input directory path containing `.eds` ED text files
 
 #### generate_synth.py (scripts folder)
 
-Parameter name         | Parameter description
----------------------- | ---------------------
-`nSegments`            | total number of segments
-`alphabet`             | alphabet for character sampling
-`nDegeneratePositions` | number of segments (must be smaller than or equal to `nSegments`) which are non-deterministic, i.e., contain multiple variants
-`nMaxSegmentVariants`  | maximum number of variants (`a`), the number of variants for each non-deterministic segment will be sampled from the interval `[2, a]`
-`nMaxVariantLength`    | maximum length of each segment variant (`b`), the length for each variant will be sampled from the interval `[0, b]` (segments might contain empty words)
-`outFile`              | output file path
+Parameter name          | Parameter description
+----------------------- | ---------------------
+`pNSegments`            | total number of segments
+`pAlphabet`             | alphabet for character sampling
+`pNDegeneratePositions` | number of segments (must be smaller than or equal to `nSegments`) which are non-deterministic, i.e. contain multiple variants
+`pNMaxSegmentVariants`  | maximum number of variants (`a`), the number of variants for each non-deterministic segment will be sampled from the interval `[2, a]`
+`pNMaxVariantLength`    | maximum length of each segment variant (`b`), the length for each variant will be sampled from the interval `[0, b]` (segments might contain empty words)
+`pOutFile`              | output file path
 
diff --git a/run_all.sh b/run_all.sh
index 15857db..fc45666 100755
--- a/run_all.sh
+++ b/run_all.sh
@@ -1,12 +1,12 @@
 #!/bin/sh
 
-inputDir="data"
+inDir="data"
 outFile="out"
 
 for i in $(seq 1 27);
 do
-    ./sopang -d -o ${outFile}8.txt $inputDir/chr${i}.eds $inputDir/patterns8.txt;
-    ./sopang -d -o ${outFile}16.txt $inputDir/chr${i}.eds $inputDir/patterns16.txt;
-    ./sopang -d -o ${outFile}32.txt $inputDir/chr${i}.eds $inputDir/patterns32.txt;
-    ./sopang -d -o ${outFile}64.txt $inputDir/chr${i}.eds $inputDir/patterns64.txt;
+    ./sopang -d -o ${outFile}8.txt $inDir/chr${i}.eds $inDir/patterns8.txt;
+    ./sopang -d -o ${outFile}16.txt $inDir/chr${i}.eds $inDir/patterns16.txt;
+    ./sopang -d -o ${outFile}32.txt $inDir/chr${i}.eds $inDir/patterns32.txt;
+    ./sopang -d -o ${outFile}64.txt $inDir/chr${i}.eds $inDir/patterns64.txt;
 done
diff --git a/scripts/ed_histogram.py b/scripts/ed_histogram.py
index 17a967e..24ab7ec 100644
--- a/scripts/ed_histogram.py
+++ b/scripts/ed_histogram.py
@@ -6,11 +6,11 @@
 import os
 import re
 
-inputDir = "../sample" # Input directory path containing `.eds` ED text files.
+pInDir = "../sample" # Input directory path containing `.eds` ED text files.
 
 def main():
     res = []
-    edsFiles = [os.path.join(inputDir, f) for f in os.listdir(inputDir) if f.endswith(".eds")]
+    edsFiles = [os.path.join(pInDir, f) for f in os.listdir(pInDir) if f.endswith(".eds")]
 
     for edsFile in edsFiles:
         print "Reading file: {0}".format(edsFile)
diff --git a/scripts/generate_synth.py b/scripts/generate_synth.py
index 8d22d4b..35dfd72 100644
--- a/scripts/generate_synth.py
+++ b/scripts/generate_synth.py
@@ -15,49 +15,49 @@
 
 import random
 
-nSegments = 100 * 1000 # Total number of segments: 100, 500, 1000, 1600 thousands segments.
-alphabet = "ACGTN" # Alphabet for character sampling.
+pNSegments = 100 * 1000 # Total number of segments: 100, 500, 1000, 1600 thousands segments.
+pAlphabet = "ACGTN" # Alphabet for character sampling.
 
-# Number of segments (must be smaller than or equal to nSegments) which are degenerate (indeterminate),
+# Number of segments (must be smaller than or equal to nSegments) which are non-deterministic,
 # i.e. contain multiple variants.
-nDegeneratePositions = int(0.1 * nSegments) # 10% of the text as in Grossi et al.
+pNDegeneratePositions = int(0.1 * pNSegments) # 10% of the text as in Grossi et al.
 
-# Maximum number of variants (a), the number of variants for each degenerate segment
+# Maximum number of variants (a), the number of variants for each non-deterministic segment
 # will be sampled from the interval [2, a].
-nMaxSegmentVariants = 10
+pNMaxSegmentVariants = 10
 
 # Maximum length of each segment variant (b), the length for each variant 
 # will be sampled from the interval [0, b] (segments might contain empty words).
-nMaxVariantLength = 10
+pNMaxVariantLength = 10
 
-outFile = "text.eds" # Output file path.
+pOutFile = "text.eds" # Output file path.
 
 def main():
-    textSizeMB = round(nSegments / 1000.0 / 1000.0, 3)
-    print "Started, alph = \"{0}\", text size = {1}m".format(alphabet, textSizeMB)
+    textSizeMB = round(pNSegments / 1000.0 / 1000.0, 3)
+    print "Started, alph = \"{0}\", text size = {1}m".format(pAlphabet, textSizeMB)
 
-    text = randomString(alphabet, nSegments)
+    text = randomString(pAlphabet, pNSegments)
 
     # Randomly drawn degenerate positions.
-    degenPosList = random.sample(xrange(nSegments), nDegeneratePositions)
+    degenPosList = random.sample(xrange(pNSegments), pNDegeneratePositions)
     # Dictionary: position in text -> list of a few strings.
     degenStrings = {}
 
-    print "Generating degenerate strings for #positions = {0}k".format(nDegeneratePositions / 1000.0)
+    print "Generating degenerate strings for #positions = {0}k".format(pNDegeneratePositions / 1000.0)
 
     for curPos in degenPosList:
         # Degenerate letter is defined as a "non-empty set of strings".
-        howMany = random.randint(2, nMaxSegmentVariants)
+        howMany = random.randint(2, pNMaxSegmentVariants)
         curSet = set()
 
         while len(curSet) < howMany:
-            curLen = random.randint(0, nMaxVariantLength) # Includes empty strings.
-            curStr = randomString(alphabet, curLen)
+            curLen = random.randint(0, pNMaxVariantLength) # Includes empty strings.
+            curStr = randomString(pAlphabet, curLen)
             curSet.add(curStr)
 
         degenStrings[curPos] = curSet
 
-    assert len(degenPosList) == len(degenStrings) == nDegeneratePositions
+    assert len(degenPosList) == len(degenStrings) == pNDegeneratePositions
     dumpToFile(text, set(degenPosList), degenStrings)
 
 def randomString(alph, size):
@@ -89,10 +89,10 @@ def dumpToFile(text, degenPosSet, degenStrings):
 
                 outStr += curRun + "}"
 
-    with open(outFile, "w") as f:
+    with open(pOutFile, "w") as f:
         f.write(outStr)
 
-    print "Dumped to file: {0}".format(outFile)
+    print "Dumped to file: {0}".format(pOutFile)
 
 if __name__ == "__main__":
     main()