Skip to content

Commit

Permalink
Fix bug of wifi session merging;
Browse files Browse the repository at this point in the history
Fix bugs of http pushing;
Add tcp porter
  • Loading branch information
caesar0301 committed Dec 13, 2015
1 parent e5ee29f commit 48775a8
Show file tree
Hide file tree
Showing 12 changed files with 721 additions and 30 deletions.
2 changes: 2 additions & 0 deletions crontab
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
## Start workflow
00 03 * * * chmod +x workflow.sh && nohup /home/omnilab/OmniDataHouse/workflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,20 @@ object MergeWifiSession {
.map{ filtered => {
val parts = filtered.split(',')

CleanWIFILog(
MAC = parts(0),
time = parts(1).toLong,
code = parts(2).toInt,
payload = parts(3)
)
if (parts.length < 4) {
null
} else {

}}
CleanWIFILog(
MAC = parts(0),
time = parts(1).toLong,
code = parts(2).toInt,
payload = parts(3)
)

.filter(m => validSessionCodes.contains(m.code))
}}}

.filter(m => m != null && validSessionCodes.contains(m.code))

.groupBy(_.MAC)

Expand Down
Empty file modified global_config.sh
100644 → 100755
Empty file.
31 changes: 31 additions & 0 deletions porters/check_syslog.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/bin/bash

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
}

function die () {
echo "${@}"
exit 1
}

# Check permission
if [ `whoami` != 'omnilab' ]; then
die "Need permission of OMNILAB to run. Try user omnilab."
fi

# Global vars
BASEDIR=$(dirname $0)/..
source $BASEDIR/global_config.sh

for rawfile in `ls $WIFI_SYSLOG_PATH`; do
rfname=${rawfile%.*}
year=`echo $rfname | cut -d "-" -f1 | sed -e 's/wifilog\([0-9]*\)/\1/g'`
month=`echo $rfname | cut -d "-" -f2`
day=`echo $rfname | cut -d "-" -f3`
if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then
echo `printf "%4d-%02d-%02d" $year $month $day`
fi
done

exit 0;
32 changes: 32 additions & 0 deletions porters/check_syslog_session.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
# Check the dates without successful processing

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
}

function die () {
echo "${@}"
exit 1
}

# Check permission
if [ `whoami` != 'omnilab' ]; then
die "Need permission of OMNILAB to run. Try user omnilab."
fi

# Global vars
BASEDIR=$(dirname $0)/..
source $BASEDIR/global_config.sh

for rawfile in `ls $WIFI_SYSLOG_PATH`; do
rfname=${rawfile%.*}
year=`echo $rfname | cut -d "-" -f1 | sed -e 's/wifilog\([0-9]*\)/\1/g'`
month=`echo $rfname | cut -d "-" -f2`
day=`echo $rfname | cut -d "-" -f3`
if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG_SESSION/$rfname/_SUCCESS; then
echo `printf "%4d-%02d-%02d" $year ${month#0} ${day#0}`
fi
done

exit 0;
30 changes: 19 additions & 11 deletions porters/wifi_syslog.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/bin/bash
#
# Usage:
# wifi_syslog.sh [2013-04-25]

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
Expand Down Expand Up @@ -37,6 +40,11 @@ TEMPWP=$HDFS_WIFI_SYSLOG/_temp
hadoop fs -rm -r $TEMPWP
hadoop fs -mkdir -p $TEMPWP

TARGET=$(date -d "yesterday" '+%Y-%m-%d')
if [ $1 != "" ]; then
TARGET=$1
fi

# Process yesterday's file
for rawfile in `ls $WIFI_SYSLOG_PATH`; do

Expand All @@ -46,20 +54,20 @@ for rawfile in `ls $WIFI_SYSLOG_PATH`; do
month=`echo $rfname | cut -d "-" -f2`
day=`echo $rfname | cut -d "-" -f3`

if [ $year$month$day == $(date -d "yesterday" '+%Y%m%d') ]; then
if [ "$year-$month-$day" == $TARGET ]; then

if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then
if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then

# Decompress file
if ! hadoop fs -test -e $TEMPWP/$rfname; then
gunzip -c $WIFI_SYSLOG_PATH/$rawfile | hadoop fs -put - $TEMPWP/$rfname
fi
# Decompress file
if ! hadoop fs -test -e $TEMPWP/$rfname; then
gunzip -c $WIFI_SYSLOG_PATH/$rawfile | hadoop fs -put - $TEMPWP/$rfname
fi

# Cleanse wifilog
hadoop fs -rm -r $HDFS_WIFI_SYSLOG/$rfname
spark-submit2 --class $CLSNAME $BINJAR $TEMPWP/$rfname $HDFS_WIFI_SYSLOG/$rfname
hadoop fs -rm -r $TEMPWP/$rfname
fi
# Cleanse wifilog
hadoop fs -rm -r $HDFS_WIFI_SYSLOG/$rfname
spark-submit2 --class $CLSNAME $BINJAR $TEMPWP/$rfname $HDFS_WIFI_SYSLOG/$rfname
hadoop fs -rm -r $TEMPWP/$rfname
fi

fi

Expand Down
14 changes: 9 additions & 5 deletions porters/wifi_syslog_session.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
#!/bin/bash
#
# Usage:
# wifi_syslog_session.sh [2013-04-25]

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
Expand Down Expand Up @@ -32,12 +35,13 @@ if ! hadoop fs -test -d $HDFS_WIFI_SYSLOG_SESSION; then
hadoop fs -mkdir -p $HDFS_WIFI_SYSLOG_SESSION
fi

year=`date -d "yesterday" '+%Y'`
month=`date -d "yesterday" '+%m'`
day=`date -d "yesterday" '+%d'`
TARGET=$(date -d "yesterday" '+%Y-%m-%d')
if [ $1 != "" ]; then
TARGET=$1
fi

INPUT=$HDFS_WIFI_SYSLOG/wifilog$year-$month-$day
OUTPUT=$HDFS_WIFI_SYSLOG_SESSION/wifilog$year-$month-$day
INPUT=$HDFS_WIFI_SYSLOG/wifilog$TARGET
OUTPUT=$HDFS_WIFI_SYSLOG_SESSION/wifilog$TARGET

if ! hadoop fs -test -e $OUTPUT/_SUCCESS; then
hadoop fs -rm -r $OUTPUT
Expand Down
20 changes: 18 additions & 2 deletions porters/wifi_traffic_http.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/bash
# Usage: wifi_traffic_http.sh [2015-01-01]

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
Expand Down Expand Up @@ -31,17 +32,32 @@ year=`date -d "yesterday" "+%Y"`
month=`date -d "yesterday" "+%m"`
day=`date -d "yesterday" "+%d"`

INPUT=$WIFI_TRAFFIC_PATH/$year$month/http/$year$month$day-*.gz
if [ $1 != "" ]; then
year=`echo $1 | cut -d'-' -f1`
month=`echo $1 | cut -d'-' -f2`
day=`echo $1 | cut -d'-' -f3`
fi

INPUT=$WIFI_TRAFFIC_PATH/$year$month/http/$year$month$day-*.jn.*
INPUT_TEMP=$HDFS_WIFI_TRAFFIC/HTTP/_temp
OUTPUT=$HDFS_WIFI_TRAFFIC/HTTP/$year$month$day

if ! hadoop fs -test -d $INPUT_TEMP; then
hadoop fs -mkdir -p $INPUT_TEMP
fi

# Decompress files
for rawfile in `ls $INPUT`; do
echo $rawfile
rfext=${rawfile##*.}
rfname=${rawfile%.*}

if ! hadoop fs -test -e $INPUT_TEMP/`basename $rfname`; then
gunzip -c $rawfile | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
if [ $rfext == "gz" ]; then
gunzip -c $rawfile | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
else
hadoop fs -put $rawfile $INPUT_TEMP/`basename $rawfile`
fi
fi
done

Expand Down
45 changes: 45 additions & 0 deletions porters/wifi_traffic_tcp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

function die () {
echo "${@}"
exit 1
}

# Check permission
if [ `whoami` != 'omnilab' ]; then
die "Need permission of OMNILAB to run. Try user omnilab."
fi

# Global vars
BASEDIR=$(dirname $0)/..
source $BASEDIR/global_config.sh

# Check root path for raw data
if [ ! -d $WIFI_TRAFFIC_PATH ]; then
die "Cann't find path for archived traffic data: $WIFI_TRAFFIC_PATH"
fi

year=`date -d "yesterday" "+%Y"`
month=`date -d "yesterday" "+%b"`
month2=`date -d "yesterday" "+%m"`
day=`date -d "yesterday" "+%d"`

INPUT_PATH=$WIFI_TRAFFIC_PATH/$year$month/tcp/*_$day_$month_$year.out

OUTPUT_TCP=$HDFS_WIFI_TRAFFIC/TCP/$year$month2$day
OUTPUT_TCP_NOCOMPLETE=$HDFS_WIFI_TRAFFIC/TCP_NOCOMPLETE/$year$month2$day
OUTPUT_UDP=$HDFS_WIFI_TRAFFIC/UDP/$year$month2$day

# Decompress files WITHOUT further processing
for file in `ls $INPUT_PATH`; do
echo $file
rfname=${file%.*}

if ! hadoop fs -test -e $INPUT_TEMP/`basename $rfname`; then
gunzip -c $file | hadoop fs -put - $INPUT_TEMP/`basename $rfname`
fi
done

clean_trash

exit 0;
Loading

0 comments on commit 48775a8

Please sign in to comment.