Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

busybox ash #2

Open
wants to merge 8 commits into
base: 1_23_stable-cernvm
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
183 changes: 183 additions & 0 deletions collect_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#!/usr/bin/python

'''
Python file that loads the data containing time stamps of all commands
executing during booting time of a virtual machine and analyzes them to
get duration of each so it can provide the best part to be optimized
and reduced in a sense of time.
'''

import os
import numpy as numpy
import fnmatch
from operator import itemgetter


if ("ASH_SCORES" in os.environ):
directoryScores = os.environ["ASH_SCORES"]
else:
print("Chosen directory with data from BusyBox ash does not exist.")
exit()

def loadScores():
'''
Method that loads all time stamps related to the specific commands,
removes duplicates and time stamps that were used just as an
orientation (e.g. -1).
Result is written in a text file named "all".
'''

mergedFiles = directoryScores + "/all"
fAll = open(mergedFiles, "w")
fAll.write("command!start!pid!end\n")
allWrittenLines = []
for scorePid in fnmatch.filter(os.listdir(directoryScores), 'scores.csv.*'):

scoreFile = directoryScores + "/" + scorePid
f = open(scoreFile, "r")
lines = f.readlines()
f.close()

allShareLines = []
sharePidLines = []
listWritten = []
for line in lines:
flag = False
if (line != "command!start!pid!end\n") :
#set the flag if line contains -1 meaning it takes more time to execute complex commands
for i in range ( len(line.split('!'))):
item = line.split('!')[i]
if ("-1" in item and (len(item) == 2 or item.endswith('\n'))):
flag = True

#list of repeating pids
if (flag == False and findByPid(line, listWritten) == False and line not in allWrittenLines):
fAll.write(line)
listWritten.append(line)

for i in range(len(listWritten)):
allWrittenLines.append(listWritten[i])



fAll.close()


def findByPid(line, writtenLines):
'''
Method that returns true statement if there already exists a time stamp
related to the specific pid, i.e. if that command is already written
in a text file with results.
'''

for i in range(len(writtenLines)):
if (len(line.split('!')) == len(writtenLines[i].split('!'))):
countTheSame = 0
for j in range(len(writtenLines[i].split('!'))):
if (writtenLines[i].split('!')[j] == line.split('!')[j]):
countTheSame += 1
if (countTheSame == len(writtenLines[i].split('!'))-1):
return True
return False



def hasSamePid(first, second):
'''
Method that returns true statement if two lines containing the
command, start time stamp and pid, i.e. end time stamp and pid have
the same pid so that duration can be calculated for that specific pid.
'''

first = first.split('!')[2]
if first.endswith('\n'):
first = first[:-1]

if (first == second.split('!')[2]):
return True
return False



def calculateDuration():
'''
Given all scores from the file "all", this method needs to group lines
two by two based on its pid to calculate duraion of each process that
is executing. When duration is calculated, results are sorted based
on it value and saved to a new file named "sortedAll".
'''

mergedFiles = directoryScores + "/all"
newSorted = directoryScores + "/sortedAll"
fAll = open(mergedFiles, "r")
fSort = open(newSorted, "w")
fSort.write("command ! total ! nbOfTimes ! avgDuration\n")
lines = fAll.readlines()
minStart = float("inf")
maxEnd = 0

for i in range(len(lines)):

if (lines[i] != "command!start!pid!end\n"):

if (len(lines[i].split("!")) == 3):
firstTime = lines[i].split('!')[1]
firstLine = [long(s) for s in firstTime.split() if s.isdigit()]
if (firstLine[0] < minStart):

minStart = firstLine[0]
else:
firstTime = lines[i].split('!')[3]
firstLine = [long(s) for s in firstTime.split() if s.isdigit()]
if (firstLine[0] > maxEnd):

maxEnd = firstLine[0]


duration = maxEnd- minStart
writeLine = "total duration: " + "%ld" % duration + "\n"
fSort.write(writeLine)

usedCommands=[]
hashAll = {}
for i in range(1, len(lines) - 1):
if (lines[i] != "command!start!pid!end\n" and len(lines[i].split('!')) == 3):
j = i+1
while (j < len(lines)):
secondLine = lines[j]
if (len(lines[i].split('!')) < len(secondLine.split('!'))):
if (hasSamePid(lines[i],secondLine)):
a = lines[i].split('!')[2]
if a.endswith('\n'):
a = a[:-1]
firstTime = lines[i].split('!')[1]
firstLine = [long(s) for s in firstTime.split() if s.isdigit()]
secondTime= secondLine.split('!')[3]
secondLine = [long(s) for s in secondTime.split() if s.isdigit()]

duration = secondLine[0] - firstLine[0]
comLine = lines[i].split('!')[0]
if (comLine not in usedCommands):
usedCommands.append(comLine)
hashAll[comLine] = [duration, 1, duration]
else:
hashAll[comLine][0] += duration
hashAll[comLine][1] += 1
hashAll[comLine][2] = hashAll[comLine][0]/hashAll[comLine][1]
break
else:
j += 1
continue
else:
j += 1
continue

hashAll=sorted(hashAll.items(), key=lambda e: e[1][2])
for i in range(len(hashAll)):
writeLine = "\n" + hashAll[i][0] + "!" + " %ld " % hashAll[i][1][0] + "!" + " %ld " % hashAll[i][1][1] + "!" + " %ld " % hashAll[i][1][2]
fSort.write(writeLine)
fAll.close()
fSort.close()



13 changes: 13 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/python

'''
python file to run all functions needed to analyze
data from busybox ash
'''

import collect_data as cd

cd.loadScores()

cd.calculateDuration()

Loading