Skip to content

Commit

Permalink
Remove log bug about year loss
Browse files Browse the repository at this point in the history
  • Loading branch information
caesar0301 committed Dec 9, 2015
1 parent 8b6e7b8 commit a3e091a
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@
* Local4: 1001 111.186.48.1/20, New: 10.187.0.0/16
* Local5: 1001 111.186.0.1/20, New: 10.184.0.0/16
*
* We also lost year in time for most months of 2013.
* This requires fix in the pattern matching.
*
*
* @Author chenxm, gwj
*/
Expand All @@ -56,7 +59,7 @@ public static String filterData(String rawLogEntry) throws IOException {
final int[] CODE_USRSTATUS = {522005, 522006, 522026}; // User Entry added, deleted, and user miss
final int[] CODE_USERROAM = {500010};

final String regPrefix = "(\\w+\\s+\\d+\\s+(?:\\d{1,2}:){2}\\d{1,2}\\s+\\d{4})";
final String regPrefix = "(\\w+\\s+\\d+\\s+(?:\\d{1,2}:){2}\\d{1,2}(?:\\s+\\d{4})?)";
final String regUserMac = "((?:[0-9a-f]{2}:){5}[0-9a-f]{2})";
final String regApInfo = "((?:\\d{1,3}\\.){3}\\d{1,3})-((?:[0-9a-f]{2}:){5}[0-9a-f]{2})-([\\w-]+)";

Expand Down Expand Up @@ -87,6 +90,8 @@ public static String filterData(String rawLogEntry) throws IOException {
return cleanLog;

int messageCode = Integer.valueOf(chops[2].split(">", 2)[0]);
System.out.println(messageCode);

if (hasCodes(messageCode, CODE_AUTHREQ)) { // Auth request
Matcher matcher = REG_AUTHREQ.matcher(rawLogEntry);
if (matcher.find()) {
Expand All @@ -97,6 +102,7 @@ public static String filterData(String rawLogEntry) throws IOException {
}
} else if (hasCodes(messageCode, CODE_DEAUTH)) { // Deauth from and to
Matcher matcher = REG_DEAUTH.matcher(rawLogEntry);
System.out.println(matcher.find());
if (matcher.find()) {
String time = formattrans(matcher.group(1));
String usermac = matcher.group(2).replaceAll(":", "");
Expand Down Expand Up @@ -190,6 +196,7 @@ private static int putRecordMap(String userMac, String record, Map<String, List<

//This function is used to change the date format from "May 4" to "2013-05-04"
private static String formattrans(String date_string){

//Prepare for the month name for date changing
TreeMap<String, String> month_tmap = new TreeMap<String, String>();
month_tmap.put("Jan", "01");
Expand All @@ -207,13 +214,18 @@ private static String formattrans(String date_string){

//change the date from "May 4" to "2013-05-04"
// month: group(1), day: group(2), time: group(3), year: group(4)
String date_reg = "(\\w+)\\s+(\\d+)\\s+((?:\\d{1,2}:){2}\\d{1,2})\\s+(\\d{4})";
String date_reg = "(\\w+)\\s+(\\d+)\\s+((?:\\d{1,2}:){2}\\d{1,2})(?:\\s+(\\d{4}))?";
Pattern date_pattern = Pattern.compile(date_reg);
Matcher date_matcher = date_pattern.matcher(date_string);
if(! date_matcher.find())
return null;

String year_string=date_matcher.group(4);
String year_string = date_matcher.group(4);
if ( year_string == null ) {
// We may lose year before 2013
year_string = "2013";
}

//change the month format
String month_string = date_matcher.group(1);
if(month_tmap.containsKey(month_string)){
Expand Down
26 changes: 26 additions & 0 deletions global_config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash

####################################
## Local data repos at NFS
####################################

DATA_FTP='/mnt/omnidata'

WIFI_TRAFFIC_PATH=$DATA_FTP/SJTU/wifi-archive

WIFI_SYSLOG_PATH=$DATA_FTP/SJTU/wifi-syslog

HZ_MOBILE_PATH=$DATA_FTP/NetworkTraffic/mobilelogs-hz2012/Original


####################################
## Data repos on HDFS
####################################

HDFS_WIFI_TRAFFIC='/user/omnilab/warehouse/WifiTraffic'

HDFS_WIFI_SYSLOG='/user/omnilab/warehouse/WifiSyslog'

HDFS_HZ_MOBILE='/user/omnilab/warehouse/HzMobile'

HDFS_D4D_SENEGAL='/user/omnilab/warehouse/Senegal'
55 changes: 55 additions & 0 deletions porters/wifi_syslog.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

function clean_trash () {
hadoop fs -rm -r .Trash/Current > /dev/null
}

function die () {
echo "${@}"
exit 1
}

# Global vars
BASEDIR=$(dirname $0)/..
source $BASEDIR/global_config.sh

# Cleansing tools
BINJAR=$BASEDIR/etlers/ArubaSyslog/target/scala-2.10/ArubaSyslog-assembly-1.0.jar
CLSNAME="cn.edu.sjtu.omnilab.odh.spark.CleanseWifiLogs"

TEMPWP=$HDFS_WIFI_SYSLOG/_temp
hadoop fs -rm -r $TEMPWP
hadoop fs -mkdir -p $TEMPWP

# Process yesterday's file
for rawfile in `ls $WIFI_SYSLOG_PATH`; do

rfname=${rawfile%.*}

year=`echo $rfname | cut -d "-" -f1 | sed -e 's/wifilog\([0-9]*\)/\1/g'`
month=`echo $rfname | cut -d "-" -f2`
day=`echo $rfname | cut -d "-" -f3`

if [ $year$month$day == $(date -d "yesterday" '+%Y%m%d') ]; then

if ! hadoop fs -test -e $HDFS_WIFI_SYSLOG/$rfname/_SUCCESS; then

# Decompress file
if ! hadoop fs -test -e $TEMPWP/$rfname; then
gunzip -c $WIFI_SYSLOG_PATH/$rawfile | hadoop fs -put - $TEMPWP/$rfname
fi

# Cleanse wifilog
hadoop fs -rm -r $HDFS_WIFI_SYSLOG/$rfname
spark-submit2 --class $CLSNAME $BINJAR $TEMPWP/$rfname $HDFS_WIFI_SYSLOG/$rfname

fi
fi

done

hadoop fs -rm -r $TEMPWP

clean_trash

exit 0;

0 comments on commit a3e091a

Please sign in to comment.