-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
d32ebae
commit 19cc497
Showing
22 changed files
with
323 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This folder contains binary jars in deployment mode. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
name := "ArubaSyslog" | ||
name := "WifiToolkit" | ||
|
||
version := "1.0" | ||
|
||
|
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<module type="JAVA_MODULE" version="4"> | ||
<component name="NewModuleRootManager" inherit-compiler-output="true"> | ||
<exclude-output /> | ||
<content url="file://$MODULE_DIR$"> | ||
<sourceFolder url="file://$MODULE_DIR$/java" isTestSource="false" /> | ||
<sourceFolder url="file://$MODULE_DIR$/scala" isTestSource="false" /> | ||
</content> | ||
<orderEntry type="inheritedJdk" /> | ||
<orderEntry type="sourceFolder" forTests="false" /> | ||
</component> | ||
</module> |
File renamed without changes.
File renamed without changes.
137 changes: 137 additions & 0 deletions
137
etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/spark/CleanseHttp.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
package cn.edu.sjtu.omnilab.odh.spark | ||
|
||
|
||
import cn.edu.sjtu.omnilab.odh.utils.{HttpSchema} | ||
import org.apache.spark.{SparkContext, SparkConf} | ||
|
||
|
||
object CleanseHttp { | ||
|
||
def main(args: Array[String]): Unit = { | ||
|
||
if (args.length < 2) { | ||
println("Usage: CleanseHttp <in> <out>") | ||
sys.exit(-1) | ||
} | ||
|
||
val input = args(0) | ||
val output = args(1) | ||
|
||
val conf = new SparkConf() | ||
conf.setAppName("Cleanse HTTP logs") | ||
val spark = new SparkContext(conf) | ||
|
||
spark.textFile(input) | ||
|
||
.map( m => { | ||
val fields = splitLog(m) | ||
|
||
if ( fields == null ) { | ||
null | ||
} else { | ||
// Select required fields | ||
"%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s".format( | ||
|
||
// connection info | ||
fields(HttpSchema.source_ip), | ||
formatInt(fields(HttpSchema.source_port)), | ||
fields(HttpSchema.dest_ip), | ||
formatInt(fields(HttpSchema.dest_port)), | ||
fields(HttpSchema.conn), | ||
|
||
// connection timing metrics | ||
formatDouble(fields(HttpSchema.conn_ts)), | ||
formatDouble(fields(HttpSchema.close_ts)), | ||
formatDouble(fields(HttpSchema.conn_dur)), | ||
formatDouble(fields(HttpSchema.idle_time0)), | ||
formatDouble(fields(HttpSchema.request_ts)), | ||
formatDouble(fields(HttpSchema.request_dur)), | ||
formatDouble(fields(HttpSchema.response_ts)), | ||
formatDouble(fields(HttpSchema.response_dur_b)), | ||
formatDouble(fields(HttpSchema.response_dur_e)), | ||
formatDouble(fields(HttpSchema.idle_time1)), | ||
|
||
// request/response sizes | ||
formatInt(fields(HttpSchema.request_size)), | ||
formatInt(fields(HttpSchema.response_size)), | ||
|
||
// request keywords | ||
fields(HttpSchema.request_method), | ||
formatString(fields(HttpSchema.request_url)), | ||
formatString(fields(HttpSchema.request_host)), | ||
formatString(fields(HttpSchema.request_user_agent)), | ||
formatString(fields(HttpSchema.request_referrer)), | ||
|
||
// response keywords | ||
formatInt(fields(HttpSchema.response_code)), | ||
formatString(fields(HttpSchema.response_server)), | ||
formatString(fields(HttpSchema.response_ctype)) | ||
) | ||
} | ||
}) | ||
|
||
.filter(m => m != null) | ||
|
||
.saveAsTextFile(output) | ||
|
||
} | ||
|
||
/** | ||
* Split each http log entry into fields. | ||
* Null fields will be empty string. | ||
* | ||
* @param line a single entry of HTTP log | ||
* @return an array of separated parts | ||
*/ | ||
def splitLog(line: String): Array[String] = { | ||
// get HTTP header fields | ||
val chops = line.split("""\"\s\""""); | ||
if ( chops.length != 21 ) | ||
return null | ||
|
||
// get timestamps | ||
val timestamps = chops(0).split(" "); | ||
if (timestamps.length != 18 ) | ||
return null | ||
|
||
val results = timestamps ++ chops.slice(1, 21) | ||
|
||
// remove N/A values and extrat quote | ||
results.transform( field => { | ||
var new_field = field.replaceAll("\"", "") | ||
if (new_field == "N/A") | ||
new_field = "" | ||
new_field | ||
}) | ||
|
||
results | ||
} | ||
|
||
|
||
def formatDouble(field: String): String = { | ||
try { | ||
"%.3f".format(field.toDouble) | ||
} catch { | ||
case e: Exception => "" | ||
} | ||
} | ||
|
||
|
||
def formatInt(field: String): String = { | ||
try { | ||
"%d".format(field.toInt) | ||
} catch { | ||
case e: Exception => "" | ||
} | ||
} | ||
|
||
|
||
def formatString(field: String): String = { | ||
try { | ||
field.replaceAll("|", "") | ||
} catch { | ||
case e: Exception => field | ||
} | ||
} | ||
|
||
} |
1 change: 1 addition & 0 deletions
1
...u/omnilab/odh/spark/CleanseWifiLogs.scala → ...u/omnilab/odh/spark/CleanseWifiLogs.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
47 changes: 47 additions & 0 deletions
47
etlers/WifiToolkit/src/main/scala/cn/edu/sjtu/omnilab/odh/utils/HttpSchema.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
package cn.edu.sjtu.omnilab.odh.utils | ||
|
||
/** | ||
* Data Schema of SJTU HTTP logs | ||
*/ | ||
object HttpSchema { | ||
|
||
val source_ip = 0 | ||
val source_port = 1 | ||
val dest_ip = 2 | ||
val dest_port = 3 | ||
val conn = 4 | ||
val conn_ts = 5 | ||
val close_ts = 6 | ||
val conn_dur = 7 | ||
val idle_time0 = 8 | ||
val request_ts = 9 | ||
val request_dur = 10 | ||
val response_ts = 11 | ||
val response_dur_b = 12 | ||
val response_dur_e = 13 | ||
val idle_time1 = 14 | ||
val request_size = 15 | ||
val response_size = 16 | ||
val request_method = 17 | ||
val request_url = 18 | ||
val request_protocol = 19 | ||
val request_host = 20 | ||
val request_user_agent = 21 | ||
val request_referrer = 22 | ||
val request_conn = 23 | ||
val request_keep_alive = 24 | ||
val response_protocol = 25 | ||
val response_code = 26 | ||
val response_server = 27 | ||
val response_clen = 28 | ||
val response_ctype = 29 | ||
val response_cenc = 30 | ||
val response_etag = 31 | ||
val response_cache_ctl = 32 | ||
val response_last_mod = 33 | ||
val response_age = 34 | ||
val response_expire = 35 | ||
val response_connval = 36 | ||
val response_keep_alive = 37 | ||
|
||
} |
2 changes: 1 addition & 1 deletion
2
...cn/edu/sjtu/omnilab/odh/spark/Utils.scala → ...cn/edu/sjtu/omnilab/odh/utils/Utils.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
#!/bin/bash | ||
|
||
function clean_trash () { | ||
hadoop fs -rm -r .Trash/Current > /dev/null | ||
} | ||
|
||
function die () { | ||
echo "${@}" | ||
exit 1 | ||
} | ||
|
||
# Check permission | ||
if [ `whoami` != 'omnilab' ]; then | ||
die "Need permission of OMNILAB to run. Try user omnilab." | ||
fi | ||
|
||
# Global vars | ||
BASEDIR=$(dirname $0)/.. | ||
source $BASEDIR/global_config.sh | ||
|
||
# Cleansing tools | ||
BINJAR=$BASEDIR/deploy/WifiToolkit-assembly-1.0.jar | ||
CLSNAME="cn.edu.sjtu.omnilab.odh.spark.CleanseHttp" | ||
|
||
# Check root path for raw data | ||
if [ ! -d $WIFI_TRAFFIC_PATH ]; then | ||
die "Cann't find path for archived traffic data: $WIFI_TRAFFIC_PATH" | ||
fi | ||
|
||
year=`date -d "yesterday" "+%Y"` | ||
month=`date -d "yesterday" "+%m"` | ||
day=`date -d "yesterday" "+%d"` | ||
|
||
INPUT=$WIFI_TRAFFIC_PATH/$year$month/http/$year$month$day-*.gz | ||
INPUT_TEMP=$HDFS_WIFI_TRAFFIC/HTTP/_temp | ||
OUTPUT=$HDFS_WIFI_TRAFFIC/HTTP/$year$month$day | ||
|
||
# Decompress files | ||
for rawfile in `ls $INPUT`; do | ||
echo $rawfile | ||
rfname=${rawfile%.*} | ||
|
||
if ! hadoop fs -test -e $INPUT_TEMP/`basename $rfname`; then | ||
gunzip -c $rawfile | hadoop fs -put - $INPUT_TEMP/`basename $rfname` | ||
fi | ||
done | ||
|
||
if ! hadoop fs -test -e $OUTPUT/_SUCCESS; then | ||
hadoop fs -rm -r $OUTPUT | ||
spark-submit2 --class $CLSNAME $BINJAR $INPUT_TEMP/$year$month$day* $OUTPUT | ||
hadoop fs -rm -r $INPUT_TEMP | ||
fi | ||
|
||
clean_trash | ||
|
||
exit 0; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# WifiSyslog | ||
|
||
This repo contains updated Wifi HTTP traffic logs. | ||
|
||
|
||
## Data path | ||
|
||
hdfs://user/omnilab/warehouse/WifiTraffic/HTTP | ||
|
||
|
||
## Data format | ||
|
||
There are 25 fields recorded and each line represents a request-response (RR) pair. | ||
|
||
**Connection info.** | ||
|
||
* [0] source_ip: string, the source IP address | ||
* [1] source_port: int, the source port number of a connection | ||
* [2] dest_ip: string, the destination IP address | ||
* [3] dest_port: int, the destination port number of a connection | ||
* [4] connection: string, position indicator of an RR pair in a connection | ||
|
||
**Connection timings** | ||
|
||
* [5] connect_ts: double, the timestamp to start the TCP connection | ||
* [6] close_ts: double, the timestamp to terminate the TCP connection | ||
* [7] connection_dur: double, the duration in seconds of the whole TCP connection | ||
* [8] idle_time0: double, the idle interval between connection success and first byte of request | ||
* [9] request_ts: double, the timestamp to send the first byte of request | ||
* [10] request_dur: double, the duration to sending out request data | ||
* [11] response_ts: double, the timestsamp to receive the first byte of response | ||
* [12] response_dur_b: double, the idle interval between last byte of request and first byte of response | ||
* [13] response_dur_e: double, the idle interval between first byte of response and last byte of response | ||
* [14] idle_time1: double, the idle interval between response end and next new request or TCP connection end. | ||
|
||
**RR volumes** | ||
|
||
* [15] request_size: int, the size of request (including request header length) | ||
* [16] response_size: int, the size of response (including response header length) | ||
|
||
**Request keywords** | ||
|
||
* [17] request_method: string, request method, such as GET, POST. | ||
* [18] request_url: string, requested URL | ||
* [19] request_host: string, remote server name | ||
* [20] request_user_agent: string, indicator of user agent | ||
* [21] request_referrer: string, request referrer | ||
|
||
**Response keywords** | ||
|
||
* [22] response_code: int, response statues | ||
* [23] response_server: string, the remote server type | ||
* [24] response_ctype: string, the content type | ||
|
||
|
||
## Data sample | ||
|
||
10.186.227.204|63851|114.66.198.57|80|start|1446307201.983||0.030|0.000|1446307202.129|0.000|1446307202.413|0.028|0.000|0.004|589|291|GET|/tools/FsPlatformAction?rprotocol=3*_*action=161.Foamii*_*actionresult=13888*_*actionobjectver=0*_*channelid=*_*mac=64D954A9C582*_*guid=83fced5c-ac31-4e78-880e-1a201ee9cff6*_*name=Aptupd*_*version=3.0.7.5*_*actiontime=|1|other*_*pullupname=FunWorks64*_*pullupversion=*_*cid=1799*_*aptid=1|3|stat.funshion.net|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.3; WOW64; Trident/7.0; .NET4.0E; .NET4.0C; .NET CLR 3.5.30729; .NET CLR 2.0.50727; .NET CLR 3.0.30729; Shuame; GWX:RESERVED) Funshion/1.0.0.1||200|nginx/1.2.2|text/plain | ||
10.186.227.204|63851|114.66.198.57|80|continue|||||1446307202.449|0.000|1446307202.729|0.028|0.000|0.006|589|291|GET|/tools/FsPlatformAction?rprotocol=3*_*action=161.Foamii*_*actionresult=13825*_*actionobjectver=0*_*channelid=*_*mac=64D954A9C582*_*guid=83fced5c-ac31-4e78-880e-1a201ee9cff6*_*name=Aptupd*_*version=3.0.7.5*_*actiontime=|1|other*_*pullupname=FunWorks64*_*pullupversion=*_*cid=1799*_*aptid=1|3|stat.funshion.net|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.3; WOW64; Trident/7.0; .NET4.0E; .NET4.0C; .NET CLR 3.5.30729; .NET CLR 2.0.50727; .NET CLR 3.0.30729; Shuame; GWX:RESERVED) Funshion/1.0.0.1||200|nginx/1.2.2|text/plain | ||
10.187.252.52|62241|121.195.187.54|80|unique|1446307202.270|1446307202.382|0.036|0.009|1446307202.315|0.000|1446307202.349|0.034|0.000|0.033|780|1349|POST|/sugg?ifc=4&em=4|s.wisdom.www.sogou.com|SogouPSI||200|nginx| | ||
10.186.227.204|63851|114.66.198.57|80|last||1446307202.400|||1446307202.787|0.000|1446307202.107|0.028|0.000|0.293|579|291|GET|/tools/FsPlatformAction?rprotocol=3*_*action=161.Foamii*_*actionresult=13829*_*actionobjectver=0*_*channelid=*_*mac=64D954A9C582*_*guid=83fced5c-ac31-4e78-880e-1a201ee9cff6*_*name=Aptupd*_*version=3.0.7.5*_*actiontime=*_*pullupname=FunWorks64*_*pullupversion=*_*cid=1799*_*aptid=3|stat.funshion.net|Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.3; WOW64; Trident/7.0; .NET4.0E; .NET4.0C; .NET CLR 3.5.30729; .NET CLR 2.0.50727; .NET CLR 3.0.30729; Shuame; GWX:RESERVED) Funshion/1.0.0.1||200|nginx/1.2.2|text/plain |