From 48775a865613d55255b0524f19729bb51b8e63bd Mon Sep 17 00:00:00 2001
From: Xiaming Chen <chenxm35@gmail.com>
Date: Sun, 13 Dec 2015 20:19:07 +0800
Subject: [PATCH] Fix bug of wifi session merging; Fix bugs of http pushing;
 Add tcp porter

---
 crontab                                       |   2 +
 .../omnilab/odh/spark/MergeWifiSession.scala  |  20 +-
 global_config.sh                              |   0
 porters/check_syslog.sh                       |  31 ++
 porters/check_syslog_session.sh               |  32 ++
 porters/wifi_syslog.sh                        |  30 +-
 porters/wifi_syslog_session.sh                |  14 +-
 porters/wifi_traffic_http.sh                  |  20 +-
 porters/wifi_traffic_tcp.sh                   |  45 ++
 porters/wifi_traffic_tcp/gzip2.py             | 502 ++++++++++++++++++
 porters/wifi_traffic_tcp/unzip_tcp.py         |  44 ++
 workflow.sh                                   |  11 +-
 12 files changed, 721 insertions(+), 30 deletions(-)
 create mode 100644 crontab
 mode change 100644 => 100755 global_config.sh
 create mode 100644 porters/check_syslog.sh
 create mode 100644 porters/check_syslog_session.sh
 mode change 100644 => 100755 porters/wifi_syslog.sh
 mode change 100644 => 100755 porters/wifi_syslog_session.sh
 mode change 100644 => 100755 porters/wifi_traffic_http.sh
 create mode 100755 porters/wifi_traffic_tcp.sh
 create mode 100644 porters/wifi_traffic_tcp/gzip2.py
 create mode 100644 porters/wifi_traffic_tcp/unzip_tcp.py
 mode change 100644 => 100755 workflow.sh

diff --git a/crontab b/crontab
new file mode 100644
index 0000000..eab2d30
--- /dev/null
+++ b/crontab
@@ -0,0 +1,2 @@
+## Start workflow
+00 03 * * * chmod +x workflow.sh && nohup /home/omnilab/OmniDataHouse/workflow.sh
\ No newline at end of file
diff --git a/etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/spark/MergeWifiSession.scala b/etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/spark/MergeWifiSession.scala
index 67045c8..53bab81 100644
--- a/etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/spark/MergeWifiSession.scala
+++ b/etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/spark/MergeWifiSession.scala
@@ -48,16 +48,20 @@ object MergeWifiSession {
       .map{ filtered => {
         val parts = filtered.split(',')
 
-        CleanWIFILog(
-          MAC = parts(0),
-          time = parts(1).toLong,
-          code = parts(2).toInt,
-          payload = parts(3)
-        )
+        if (parts.length < 4) {
+          null
+        } else {
 
-    }}
+          CleanWIFILog(
+            MAC = parts(0),
+            time = parts(1).toLong,
+            code = parts(2).toInt,
+            payload = parts(3)
+          )
 
-      .filter(m => validSessionCodes.contains(m.code))
+        }}}
+
+      .filter(m => m != null && validSessionCodes.contains(m.code))
 
       .groupBy(_.MAC)
 
diff --git a/global_config.sh b/global_config.sh
old mode 100644
new mode 100755
diff --git a/porters/check_syslog.sh b/porters/check_syslog.sh
new file mode 100644
index 0000000..d8d5e68
--- /dev/null
+++ b/porters/check_syslog.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+function clean_trash () {
+  hadoop fs -rm -r .Trash/Current > /dev/null
+}
+
+function die () {
+    echo "${@}"
+    exit 1
+}
+
+# Check permission
+if [ `whoami` != 'omnilab' ]; then
+    die "Need permission of OMNILAB to run. Try user omnilab."
+fi
+
+# Global vars
+BASEDIR=$(dirname $0)/..
+source $BASEDIR/global_config.sh
+
+for rawfile in `ls $WIFI_SYSLOG_PATH`; do
+    rfname=${rawfile%.*}
+    year=`echo $rfname | cut -d "-" -f1 | sed -e 's/wifilog\([0-9]*\)/\1/g'`
+    month=`echo $rfname | cut -d "-" -f2`
+    day=`echo $rfname | cut -d "-" -f3`
+    if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then
+        echo `printf "%4d-%02d-%02d" $year $month $day`
+    fi
+done
+
+exit 0;
diff --git a/porters/check_syslog_session.sh b/porters/check_syslog_session.sh
new file mode 100644
index 0000000..606ffda
--- /dev/null
+++ b/porters/check_syslog_session.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Check the dates without successful processing
+
+function clean_trash () {
+  hadoop fs -rm -r .Trash/Current > /dev/null
+}
+
+function die () {
+    echo "${@}"
+    exit 1
+}
+
+# Check permission
+if [ `whoami` != 'omnilab' ]; then
+    die "Need permission of OMNILAB to run. Try user omnilab."
+fi
+
+# Global vars
+BASEDIR=$(dirname $0)/..
+source $BASEDIR/global_config.sh
+
+for rawfile in `ls $WIFI_SYSLOG_PATH`; do
+    rfname=${rawfile%.*}
+    year=`echo $rfname | cut -d "-" -f1 | sed -e 's/wifilog\([0-9]*\)/\1/g'`
+    month=`echo $rfname | cut -d "-" -f2`
+    day=`echo $rfname | cut -d "-" -f3`
+    if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG_SESSION/$rfname/_SUCCESS; then
+        echo `printf "%4d-%02d-%02d" $year ${month#0} ${day#0}`
+    fi
+done
+
+exit 0;
diff --git a/porters/wifi_syslog.sh b/porters/wifi_syslog.sh
old mode 100644
new mode 100755
index 9dfb090..5edf1e0
--- a/porters/wifi_syslog.sh
+++ b/porters/wifi_syslog.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
+#
+# Usage:
+#  wifi_syslog.sh [2013-04-25]
 
 function clean_trash () {
   hadoop fs -rm -r .Trash/Current > /dev/null
@@ -37,6 +40,11 @@ TEMPWP=$HDFS_WIFI_SYSLOG/_temp
 hadoop fs -rm -r $TEMPWP
 hadoop fs -mkdir -p $TEMPWP
 
+TARGET=$(date -d "yesterday" '+%Y-%m-%d')
+if [ $1 != "" ]; then
+    TARGET=$1
+fi
+
 # Process yesterday's file
 for rawfile in `ls $WIFI_SYSLOG_PATH`; do
 
@@ -46,20 +54,20 @@ for rawfile in `ls $WIFI_SYSLOG_PATH`; do
     month=`echo $rfname | cut -d "-" -f2`
     day=`echo $rfname | cut -d "-" -f3`
 
-    if [ $year$month$day == $(date -d "yesterday" '+%Y%m%d') ]; then
+    if [ "$year-$month-$day" == $TARGET ]; then
 
-	if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then
+    	if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then
 
-	    # Decompress file
-	    if ! hadoop fs -test -e $TEMPWP/$rfname; then
-		gunzip -c $WIFI_SYSLOG_PATH/$rawfile | hadoop fs -put - $TEMPWP/$rfname
-	    fi
+    	    # Decompress file
+    	    if ! hadoop fs -test -e $TEMPWP/$rfname; then
+    		gunzip -c $WIFI_SYSLOG_PATH/$rawfile | hadoop fs -put - $TEMPWP/$rfname
+    	    fi
 
-	    # Cleanse wifilog
-	    hadoop fs -rm -r $HDFS_WIFI_SYSLOG/$rfname
-	    spark-submit2 --class $CLSNAME $BINJAR $TEMPWP/$rfname $HDFS_WIFI_SYSLOG/$rfname
-	    hadoop fs -rm -r $TEMPWP/$rfname
-	fi
+    	    # Cleanse wifilog
+    	    hadoop fs -rm -r $HDFS_WIFI_SYSLOG/$rfname
+    	    spark-submit2 --class $CLSNAME $BINJAR $TEMPWP/$rfname $HDFS_WIFI_SYSLOG/$rfname
+    	    hadoop fs -rm -r $TEMPWP/$rfname
+    	fi
 
     fi
 
diff --git a/porters/wifi_syslog_session.sh b/porters/wifi_syslog_session.sh
old mode 100644
new mode 100755
index 0c8f0b9..e481d95
--- a/porters/wifi_syslog_session.sh
+++ b/porters/wifi_syslog_session.sh
@@ -1,4 +1,7 @@
 #!/bin/bash
+#
+# Usage:
+#  wifi_syslog_session.sh [2013-04-25]
 
 function clean_trash () {
   hadoop fs -rm -r .Trash/Current > /dev/null
@@ -32,12 +35,13 @@ if ! hadoop fs -test -d $HDFS_WIFI_SYSLOG_SESSION; then
     hadoop fs -mkdir -p $HDFS_WIFI_SYSLOG_SESSION
 fi
 
-year=`date -d "yesterday" '+%Y'`
-month=`date -d "yesterday" '+%m'`
-day=`date -d "yesterday" '+%d'`
+TARGET=$(date -d "yesterday" '+%Y-%m-%d')
+if [ $1 != "" ]; then
+    TARGET=$1
+fi
 
-INPUT=$HDFS_WIFI_SYSLOG/wifilog$year-$month-$day
-OUTPUT=$HDFS_WIFI_SYSLOG_SESSION/wifilog$year-$month-$day
+INPUT=$HDFS_WIFI_SYSLOG/wifilog$TARGET
+OUTPUT=$HDFS_WIFI_SYSLOG_SESSION/wifilog$TARGET
 
 if ! hadoop fs -test -e $OUTPUT/_SUCCESS; then
     hadoop fs -rm -r $OUTPUT
diff --git a/porters/wifi_traffic_http.sh b/porters/wifi_traffic_http.sh
old mode 100644
new mode 100755
index 0504ec1..64fd1b7
--- a/porters/wifi_traffic_http.sh
+++ b/porters/wifi_traffic_http.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# Usage: wifi_traffic_http.sh [2015-01-01]
 
 function clean_trash () {
   hadoop fs -rm -r .Trash/Current > /dev/null
@@ -31,17 +32,32 @@ year=`date -d "yesterday" "+%Y"`
 month=`date -d "yesterday" "+%m"`
 day=`date -d "yesterday" "+%d"`
 
-INPUT=$WIFI_TRAFFIC_PATH/$year$month/http/$year$month$day-*.gz
+if [ $1 != "" ]; then
+    year=`echo $1 | cut -d'-' -f1`
+    month=`echo $1 | cut -d'-' -f2`
+    day=`echo $1 | cut -d'-' -f3`
+fi
+
+INPUT=$WIFI_TRAFFIC_PATH/$year$month/http/$year$month$day-*.jn.*
 INPUT_TEMP=$HDFS_WIFI_TRAFFIC/HTTP/_temp
 OUTPUT=$HDFS_WIFI_TRAFFIC/HTTP/$year$month$day
 
+if ! hadoop fs -test -d $INPUT_TEMP; then
+    hadoop fs -mkdir -p $INPUT_TEMP
+fi
+
 # Decompress files
 for rawfile in `ls $INPUT`; do
     echo $rawfile
+    rfext=${rawfile##*.}
     rfname=${rawfile%.*}
 
     if ! hadoop fs -test -e $INPUT_TEMP/`basename $rfname`; then
-        gunzip -c $rawfile | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
+        if [ $rfext == "gz" ]; then
+            gunzip -c $rawfile | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
+        else
+            hadoop fs -put $rawfile $INPUT_TEMP/`basename $rawfile`
+        fi
     fi
 done
 
diff --git a/porters/wifi_traffic_tcp.sh b/porters/wifi_traffic_tcp.sh
new file mode 100755
index 0000000..f510d0f
--- /dev/null
+++ b/porters/wifi_traffic_tcp.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+function die () {
+    echo "${@}"
+    exit 1
+}
+
+# Check permission
+if [ `whoami` != 'omnilab' ]; then
+    die "Need permission of OMNILAB to run. Try user omnilab."
+fi
+
+# Global vars
+BASEDIR=$(dirname $0)/..
+source $BASEDIR/global_config.sh
+
+# Check root path for raw data
+if [ ! -d $WIFI_TRAFFIC_PATH ]; then
+    die "Cann't find path for archived traffic data: $WIFI_TRAFFIC_PATH"
+fi
+
+year=`date -d "yesterday" "+%Y"`
+month=`date -d "yesterday" "+%b"`
+month2=`date -d "yesterday" "+%m"`
+day=`date -d "yesterday" "+%d"`
+
+INPUT_PATH=$WIFI_TRAFFIC_PATH/$year$month/tcp/*_$day_$month_$year.out
+
+OUTPUT_TCP=$HDFS_WIFI_TRAFFIC/TCP/$year$month2$day
+OUTPUT_TCP_NOCOMPLETE=$HDFS_WIFI_TRAFFIC/TCP_NOCOMPLETE/$year$month2$day
+OUTPUT_UDP=$HDFS_WIFI_TRAFFIC/UDP/$year$month2$day
+
+# Decompress files WITHOUT further processing
+for file in `ls $INPUT_PATH`; do
+    echo $file
+    rfname=${file%.*}
+
+    if ! hadoop fs -test -e $INPUT_TEMP/`basename $rfname`; then
+        gunzip -c $file | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
+    fi
+done
+
+clean_trash
+
+exit 0;
diff --git a/porters/wifi_traffic_tcp/gzip2.py b/porters/wifi_traffic_tcp/gzip2.py
new file mode 100644
index 0000000..7984863
--- /dev/null
+++ b/porters/wifi_traffic_tcp/gzip2.py
@@ -0,0 +1,502 @@
+"""Functions that read and write gzipped files.
+
+The user of the file doesn't have to worry about the compression,
+but random access is not allowed."""
+
+# based on Andrew Kuchling's minigzip.py distributed with the zlib module
+
+import struct, sys, time, os
+import zlib
+import io
+import __builtin__
+
+__all__ = ["GzipFile","open"]
+
+FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
+
+READ, WRITE = 1, 2
+
+def write32u(output, value):
+    # The L format writes the bit pattern correctly whether signed
+    # or unsigned.
+    output.write(struct.pack("<L", value))
+
+def read32(input):
+    return struct.unpack("<I", input.read(4))[0]
+
+def open(filename, mode="rb", compresslevel=9):
+    """Shorthand for GzipFile(filename, mode, compresslevel).
+
+    The filename argument is required; mode defaults to 'rb'
+    and compresslevel defaults to 9.
+
+    """
+    return GzipFile(filename, mode, compresslevel)
+
+class GzipFile(io.BufferedIOBase):
+    """The GzipFile class simulates most of the methods of a file object with
+    the exception of the readinto() and truncate() methods.
+
+    """
+
+    myfileobj = None
+    max_read_chunk = 10 * 1024 * 1024   # 10Mb
+
+    def __init__(self, filename=None, mode=None,
+                 compresslevel=9, fileobj=None, mtime=None):
+        """Constructor for the GzipFile class.
+
+        At least one of fileobj and filename must be given a
+        non-trivial value.
+
+        The new class instance is based on fileobj, which can be a regular
+        file, a StringIO object, or any other object which simulates a file.
+        It defaults to None, in which case filename is opened to provide
+        a file object.
+
+        When fileobj is not None, the filename argument is only used to be
+        included in the gzip file header, which may includes the original
+        filename of the uncompressed file.  It defaults to the filename of
+        fileobj, if discernible; otherwise, it defaults to the empty string,
+        and in this case the original filename is not included in the header.
+
+        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', or 'wb',
+        depending on whether the file will be read or written.  The default
+        is the mode of fileobj if discernible; otherwise, the default is 'rb'.
+        Be aware that only the 'rb', 'ab', and 'wb' values should be used
+        for cross-platform portability.
+
+        The compresslevel argument is an integer from 1 to 9 controlling the
+        level of compression; 1 is fastest and produces the least compression,
+        and 9 is slowest and produces the most compression.  The default is 9.
+
+        The mtime argument is an optional numeric timestamp to be written
+        to the stream when compressing.  All gzip compressed streams
+        are required to contain a timestamp.  If omitted or None, the
+        current time is used.  This module ignores the timestamp when
+        decompressing; however, some programs, such as gunzip, make use
+        of it.  The format of the timestamp is the same as that of the
+        return value of time.time() and of the st_mtime member of the
+        object returned by os.stat().
+
+        """
+
+        # guarantee the file is opened in binary mode on platforms
+        # that care about that sort of thing
+        if mode and 'b' not in mode:
+            mode += 'b'
+        if fileobj is None:
+            fileobj = self.myfileobj = __builtin__.open(filename, mode or 'rb')
+        if filename is None:
+            if hasattr(fileobj, 'name'): filename = fileobj.name
+            else: filename = ''
+        if mode is None:
+            if hasattr(fileobj, 'mode'): mode = fileobj.mode
+            else: mode = 'rb'
+
+        if mode[0:1] == 'r':
+            self.mode = READ
+            # Set flag indicating start of a new member
+            self._new_member = True
+            # Buffer data read from gzip file. extrastart is offset in
+            # stream where buffer starts. extrasize is number of
+            # bytes remaining in buffer from current stream position.
+            self.extrabuf = ""
+            self.extrasize = 0
+            self.extrastart = 0
+            self.name = filename
+            # Starts small, scales exponentially
+            self.min_readsize = 100
+
+        elif mode[0:1] == 'w' or mode[0:1] == 'a':
+            self.mode = WRITE
+            self._init_write(filename)
+            self.compress = zlib.compressobj(compresslevel,
+                                             zlib.DEFLATED,
+                                             -zlib.MAX_WBITS,
+                                             zlib.DEF_MEM_LEVEL,
+                                             0)
+        else:
+            raise IOError, "Mode " + mode + " not supported"
+
+        self.fileobj = fileobj
+        self.offset = 0
+        self.mtime = mtime
+
+        if self.mode == WRITE:
+            self._write_gzip_header()
+
+    @property
+    def filename(self):
+        import warnings
+        warnings.warn("use the name attribute", DeprecationWarning, 2)
+        if self.mode == WRITE and self.name[-3:] != ".gz":
+            return self.name + ".gz"
+        return self.name
+
+    def __repr__(self):
+        s = repr(self.fileobj)
+        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'
+
+    def _init_write(self, filename):
+        self.name = filename
+        self.crc = zlib.crc32("") & 0xffffffffL
+        self.size = 0
+        self.writebuf = []
+        self.bufsize = 0
+
+    def _write_gzip_header(self):
+        self.fileobj.write('\037\213')             # magic header
+        self.fileobj.write('\010')                 # compression method
+        fname = os.path.basename(self.name)
+        if fname.endswith(".gz"):
+            fname = fname[:-3]
+        flags = 0
+        if fname:
+            flags = FNAME
+        self.fileobj.write(chr(flags))
+        mtime = self.mtime
+        if mtime is None:
+            mtime = time.time()
+        write32u(self.fileobj, long(mtime))
+        self.fileobj.write('\002')
+        self.fileobj.write('\377')
+        if fname:
+            self.fileobj.write(fname + '\000')
+
+    def _init_read(self):
+        self.crc = zlib.crc32("") & 0xffffffffL
+        self.size = 0
+
+    def _read_gzip_header(self):
+        magic = self.fileobj.read(2)
+        if magic != '\037\213':
+            raise IOError, 'Not a gzipped file'
+        method = ord( self.fileobj.read(1) )
+        if method != 8:
+            raise IOError, 'Unknown compression method'
+        flag = ord( self.fileobj.read(1) )
+        self.mtime = read32(self.fileobj)
+        # extraflag = self.fileobj.read(1)
+        # os = self.fileobj.read(1)
+        self.fileobj.read(2)
+
+        if flag & FEXTRA:
+            # Read & discard the extra field, if present
+            xlen = ord(self.fileobj.read(1))
+            xlen = xlen + 256*ord(self.fileobj.read(1))
+            self.fileobj.read(xlen)
+        if flag & FNAME:
+            # Read and discard a null-terminated string containing the filename
+            while True:
+                s = self.fileobj.read(1)
+                if not s or s=='\000':
+                    break
+        if flag & FCOMMENT:
+            # Read and discard a null-terminated string containing a comment
+            while True:
+                s = self.fileobj.read(1)
+                if not s or s=='\000':
+                    break
+        if flag & FHCRC:
+            self.fileobj.read(2)     # Read & discard the 16-bit header CRC
+
+    def write(self,data):
+        if self.mode != WRITE:
+            import errno
+            raise IOError(errno.EBADF, "write() on read-only GzipFile object")
+
+        if self.fileobj is None:
+            raise ValueError, "write() on closed GzipFile object"
+
+        # Convert data type if called by io.BufferedWriter.
+        if isinstance(data, memoryview):
+            data = data.tobytes()
+
+        if len(data) > 0:
+            self.size = self.size + len(data)
+            self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+            self.fileobj.write( self.compress.compress(data) )
+            self.offset += len(data)
+
+        return len(data)
+
+    def read(self, size=-1):
+        if self.mode != READ:
+            import errno
+            raise IOError(errno.EBADF, "read() on write-only GzipFile object")
+
+        if self.extrasize <= 0 and self.fileobj is None:
+            return ''
+
+        readsize = 1024
+        if size < 0:        # get the whole thing
+            try:
+                while True:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                size = self.extrasize
+        else:               # just get some more of it
+            try:
+                while size > self.extrasize:
+                    self._read(readsize)
+                    readsize = min(self.max_read_chunk, readsize * 2)
+            except EOFError:
+                if size > self.extrasize:
+                    size = self.extrasize
+
+        offset = self.offset - self.extrastart
+        chunk = self.extrabuf[offset: offset + size]
+        self.extrasize = self.extrasize - size
+
+        self.offset += size
+        return chunk
+
+    def _unread(self, buf):
+        self.extrasize = len(buf) + self.extrasize
+        self.offset -= len(buf)
+
+    def _read(self, size=1024):
+        if self.fileobj is None:
+            raise EOFError, "Reached EOF"
+
+        if self._new_member:
+            # If the _new_member flag is set, we have to
+            # jump to the next member, if there is one.
+            #
+            # First, check if we're at the end of the file;
+            # if so, it's time to stop; no more members to read.
+            pos = self.fileobj.tell()   # Save current position
+            self.fileobj.seek(0, 2)     # Seek to end of file
+            if pos == self.fileobj.tell():
+                raise EOFError, "Reached EOF"
+            else:
+                self.fileobj.seek( pos ) # Return to original position
+
+            self._init_read()
+            self._read_gzip_header()
+            self.decompress = zlib.decompressobj(-zlib.MAX_WBITS)
+            self._new_member = False
+
+        # Read a chunk of data from the file
+        buf = self.fileobj.read(size)
+
+        # If the EOF has been reached, flush the decompression object
+        # and mark this object as finished.
+
+        if buf == "":
+            uncompress = self.decompress.flush()
+            self._read_eof()
+            self._add_read_data( uncompress )
+            raise EOFError, 'Reached EOF'
+
+        uncompress = self.decompress.decompress(buf)
+        self._add_read_data( uncompress )
+
+        if self.decompress.unused_data != "":
+            # Ending case: we've come to the end of a member in the file,
+            # so seek back to the start of the unused data, finish up
+            # this member, and read a new gzip header.
+            # (The number of bytes to seek back is the length of the unused
+            # data, minus 8 because _read_eof() will rewind a further 8 bytes)
+            self.fileobj.seek( -len(self.decompress.unused_data)+8, 1)
+
+            # Check the CRC and file size, and set the flag so we read
+            # a new member on the next call
+            self._read_eof()
+            self._new_member = True
+
+    def _add_read_data(self, data):
+        self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
+        offset = self.offset - self.extrastart
+        self.extrabuf = self.extrabuf[offset:] + data
+        self.extrasize = self.extrasize + len(data)
+        self.extrastart = self.offset
+        self.size = self.size + len(data)
+
+    def _read_eof(self):
+        # We've read to the end of the file, so we have to rewind in order
+        # to reread the 8 bytes containing the CRC and the file size.
+        # We check the that the computed CRC and size of the
+        # uncompressed data matches the stored values.  Note that the size
+        # stored is the true file size mod 2**32.
+	"""
+        self.fileobj.seek(-8, 1)
+        crc32 = read32(self.fileobj)
+        isize = read32(self.fileobj)  # may exceed 2GB
+        if crc32 != self.crc:
+            raise IOError("CRC check failed %s != %s" % (hex(crc32),
+                                                         hex(self.crc)))
+        elif isize != (self.size & 0xffffffffL):
+            raise IOError, "Incorrect length of data produced"
+
+        # Gzip files can be padded with zeroes and still have archives.
+        # Consume all zero bytes and set the file position to the first
+        # non-zero byte. See http://www.gzip.org/#faq8
+        c = "\x00"
+        while c == "\x00":
+            c = self.fileobj.read(1)
+        if c:
+            self.fileobj.seek(-1, 1)
+	"""
+	pass
+
+    @property
+    def closed(self):
+        return self.fileobj is None
+
+    def close(self):
+        if self.fileobj is None:
+            return
+        if self.mode == WRITE:
+            self.fileobj.write(self.compress.flush())
+            write32u(self.fileobj, self.crc)
+            # self.size may exceed 2GB, or even 4GB
+            write32u(self.fileobj, self.size & 0xffffffffL)
+            self.fileobj = None
+        elif self.mode == READ:
+            self.fileobj = None
+        if self.myfileobj:
+            self.myfileobj.close()
+            self.myfileobj = None
+
+    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
+        if self.mode == WRITE:
+            # Ensure the compressor's buffer is flushed
+            self.fileobj.write(self.compress.flush(zlib_mode))
+            self.fileobj.flush()
+
+    def fileno(self):
+        """Invoke the underlying file object's fileno() method.
+
+        This will raise AttributeError if the underlying file object
+        doesn't support fileno().
+        """
+        return self.fileobj.fileno()
+
+    def rewind(self):
+        '''Return the uncompressed stream file position indicator to the
+        beginning of the file'''
+        if self.mode != READ:
+            raise IOError("Can't rewind in write mode")
+        self.fileobj.seek(0)
+        self._new_member = True
+        self.extrabuf = ""
+        self.extrasize = 0
+        self.extrastart = 0
+        self.offset = 0
+
+    def readable(self):
+        return self.mode == READ
+
+    def writable(self):
+        return self.mode == WRITE
+
+    def seekable(self):
+        return True
+
+    def seek(self, offset, whence=0):
+        if whence:
+            if whence == 1:
+                offset = self.offset + offset
+            else:
+                raise ValueError('Seek from end not supported')
+        if self.mode == WRITE:
+            if offset < self.offset:
+                raise IOError('Negative seek in write mode')
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.write(1024 * '\0')
+            self.write((count % 1024) * '\0')
+        elif self.mode == READ:
+            if offset < self.offset:
+                # for negative seek, rewind and do positive seek
+                self.rewind()
+            count = offset - self.offset
+            for i in range(count // 1024):
+                self.read(1024)
+            self.read(count % 1024)
+
+        return self.offset
+
+    def readline(self, size=-1):
+        if size < 0:
+            # Shortcut common case - newline found in buffer.
+            offset = self.offset - self.extrastart
+            i = self.extrabuf.find('\n', offset) + 1
+            if i > 0:
+                self.extrasize -= i - offset
+                self.offset += i - offset
+                return self.extrabuf[offset: i]
+
+            size = sys.maxint
+            readsize = self.min_readsize
+        else:
+            readsize = size
+        bufs = []
+        while size != 0:
+            c = self.read(readsize)
+            i = c.find('\n')
+
+            # We set i=size to break out of the loop under two
+            # conditions: 1) there's no newline, and the chunk is
+            # larger than size, or 2) there is a newline, but the
+            # resulting line would be longer than 'size'.
+            if (size <= i) or (i == -1 and len(c) > size):
+                i = size - 1
+
+            if i >= 0 or c == '':
+                bufs.append(c[:i + 1])    # Add portion of last chunk
+                self._unread(c[i + 1:])   # Push back rest of chunk
+                break
+
+            # Append chunk to list, decrease 'size',
+            bufs.append(c)
+            size = size - len(c)
+            readsize = min(size, readsize * 2)
+        if readsize > self.min_readsize:
+            self.min_readsize = min(readsize, self.min_readsize * 2, 512)
+        return ''.join(bufs) # Return resulting line
+
+
+def _test():
+    # Act like gzip; with -d, act like gunzip.
+    # The input file is not deleted, however, nor are any other gzip
+    # options or features supported.
+    args = sys.argv[1:]
+    decompress = args and args[0] == "-d"
+    if decompress:
+        args = args[1:]
+    if not args:
+        args = ["-"]
+    for arg in args:
+        if decompress:
+            if arg == "-":
+                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin)
+                g = sys.stdout
+            else:
+                if arg[-3:] != ".gz":
+                    print "filename doesn't end in .gz:", repr(arg)
+                    continue
+                f = open(arg, "rb")
+                g = __builtin__.open(arg[:-3], "wb")
+        else:
+            if arg == "-":
+                f = sys.stdin
+                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout)
+            else:
+                f = __builtin__.open(arg, "rb")
+                g = open(arg + ".gz", "wb")
+        while True:
+            chunk = f.read(1024)
+            if not chunk:
+                break
+            g.write(chunk)
+        if g is not sys.stdout:
+            g.close()
+        if f is not sys.stdin:
+            f.close()
+
+if __name__ == '__main__':
+    _test()
diff --git a/porters/wifi_traffic_tcp/unzip_tcp.py b/porters/wifi_traffic_tcp/unzip_tcp.py
new file mode 100644
index 0000000..12a142d
--- /dev/null
+++ b/porters/wifi_traffic_tcp/unzip_tcp.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+import os
+import sys
+
+from gzip2 import GzipFile
+
+class FileReader(object):
+
+    @staticmethod
+    def open_file(filename, mode='rb'):
+        """ open plain or compressed file
+        @return file handler
+        """
+        parts = os.path.basename(filename).split('.')
+        try:
+            assert parts[-1] == 'gz'
+            fh = GzipFile(mode=mode, filename = filename)
+        except:
+            fh = open(filename, mode)
+        return fh
+
+    @staticmethod
+    def list_files(folder, regex_str=r'.', match=True):
+        """ find all files under 'folder' with names matching 
+        some reguler expression
+        """
+        assert os.path.isdir(folder)
+        all_files_path = []
+        for root, dirs, files in os.walk(folder):
+            for filename in files:
+                if match and re.match(regex_str, filename, re.IGNORECASE):
+                    all_files_path.append(os.path.join(root, filename))
+                elif not match and re.search(regex_str, filename, re.IGNORECASE):
+                    all_files_path.append(os.path.join(root, filename))
+        return all_files_path
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print('Usage: unzip_tcp.py file.gz')
+        sys.exit(-1)
+
+    for line in FileReader.open_file(sys.argv[1]):
+        print(line.strip('\r\n '))
diff --git a/workflow.sh b/workflow.sh
old mode 100644
new mode 100755
index aa094a7..733103c
--- a/workflow.sh
+++ b/workflow.sh
@@ -11,14 +11,17 @@ if [ `whoami` != 'omnilab' ]; then
 fi
 
 # Global vars
-BASEDIR=$(dirname $0)/..
+BASEDIR=$(dirname $0)
 source $BASEDIR/global_config.sh
 
 ## Run WifiSyslog cleansing
-exec '$BASEDIR/porters/wifi_syslog.sh'
+chmod +x $BASEDIR/porters/wifi_syslog.sh
+source $BASEDIR/porters/wifi_syslog.sh
 
 ## Run WifiSyslogSession extraction
-exec '$BASEDIR/porters/wifi_syslog_session.sh'
+chmod +x $BASEDIR/porters/wifi_syslog_session.sh
+source $BASEDIR/porters/wifi_syslog_session.sh
 
 ## Run WifiTrafficHttp cleansing
-exec '$BASEDIR/porters/wifi_traffic_http.sh'
+chmod +x $BASEDIR/porters/wifi_traffic_http.sh
+source $BASEDIR/porters/wifi_traffic_http.sh