From 8bbd45d3b904695d4afe3a9ca0e6b6852d93e86d Mon Sep 17 00:00:00 2001 From: Konstantinos Kallas Date: Mon, 21 Aug 2023 17:42:45 +0300 Subject: [PATCH 01/13] New rewrites of dgsh scripts, cleaner and more meaningful --- evaluation/benchmarks/dgsh/sequential/1.sh | 21 ++++ evaluation/benchmarks/dgsh/sequential/2.sh | 24 ++++ evaluation/benchmarks/dgsh/sequential/3.sh | 132 +++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100755 evaluation/benchmarks/dgsh/sequential/1.sh create mode 100755 evaluation/benchmarks/dgsh/sequential/2.sh create mode 100755 evaluation/benchmarks/dgsh/sequential/3.sh diff --git a/evaluation/benchmarks/dgsh/sequential/1.sh b/evaluation/benchmarks/dgsh/sequential/1.sh new file mode 100755 index 000000000..5f15105a1 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/1.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) + +cat >"$file1" + +printf 'File type:\t' +file - <"$file1" + +printf 'Original size:\t' +wc -c <"$file1" + +printf 'xz:\t\t' +xz -c <"$file1" | wc -c + +printf 'bzip2:\t\t' +bzip2 -c <"$file1" | wc -c + +printf 'gzip:\t\t' +gzip -c <"$file1" | wc -c diff --git a/evaluation/benchmarks/dgsh/sequential/2.sh b/evaluation/benchmarks/dgsh/sequential/2.sh new file mode 100755 index 000000000..64eb30cf2 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/2.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +## Note: Needs to be run on a big git repository to make sense (maybe linux) + +## Initialize the necessary temporary files +file1=$(mktemp) + +forder() +{ + sort | + uniq -c | + sort -rn +} + + +git log --format="%an:%ad" --date=default "$@" >"$file1" + +echo "Authors ordered by number of commits" +# Order by frequency +awk -F: '{print $1}' <"$file1" | forder + +echo "Days ordered by number of commits" +# Order by frequency +awk -F: '{print substr($2, 1, 3)}' <"$file1" | forder diff --git a/evaluation/benchmarks/dgsh/sequential/3.sh b/evaluation/benchmarks/dgsh/sequential/3.sh new file mode 100755 index 000000000..5c9611f79 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/3.sh @@ -0,0 +1,132 @@ +#!/bin/bash + +## Note: Needs to be run on a big git repository to make sense (maybe linux) + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) +file4=$(mktemp) + +find "$@" \( -name \*.c -or -name \*.h \) -type f -print0 >"$file1" + +echo -n 'FNAMELEN: ' + +tr \\0 \\n <"$file1" | +# Remove path +sed 's|^.*/||' | +# Maintain average +awk '{s += length($1); n++} END { + if (n>0) + print s / n; + else + print 0; }' + +xargs -0 /bin/cat <"$file1" >"$file2" + +sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file2" | + cpp -P >"$file3" + +# Structure definitions +echo -n 'NSTRUCT: ' + +egrep -c 'struct[ ]*{|struct[ ]*[a-zA-Z_][a-zA-Z0-9_]*[ ]*{' <"$file3" +#}} (match preceding openings) + +# Type definitions +echo -n 'NTYPEDEF: ' +grep -cw typedef <"$file3" + +# Use of void +echo -n 'NVOID: ' +grep -cw void <"$file3" + +# Use of gets +echo -n 'NGETS: ' +grep -cw gets <"$file3" + +# Average identifier length +echo -n 'IDLEN: ' + +tr -cs 'A-Za-z0-9_' '\n' <"$file3" | +sort -u | +awk '/^[A-Za-z]/ { len += length($1); n++ } END { + if (n>0) + print len / n; + else + print 0; }' + +echo -n 'CHLINESCHAR: ' +wc -lc <"$file2" | + awk '{OFS=":"; print $1, $2}' + +echo -n 'NCCHAR: ' +sed 's/#/@/g' <"$file2" | +cpp -traditional -P | +wc -c | +awk '{OFMT = "%.0f"; print $1/1000}' + +# Number of comments +echo -n 'NCOMMENT: ' +egrep -c '/\*|//' <"$file2" + +# Occurences of the word Copyright +echo -n 'NCOPYRIGHT: ' +grep -ci copyright <"$file2" + +# C files +find "$@" -name \*.c -type f -print0 >"$file2" + +# Convert to newline separation for counting +tr \\0 \\n <"$file2" >"$file3" + +# Number of C files +echo -n 'NCFILE: ' +wc -l <"$file3" + +# Number of directories containing C files +echo -n 'NCDIR: ' +sed 's,/[^/]*$,,;s,^.*/,,' <"$file3" | +sort -u | +wc -l + +# C code +xargs -0 /bin/cat <"$file2" >"$file3" + +# Lines and characters +echo -n 'CLINESCHAR: ' +wc -lc <"$file3" | +awk '{OFS=":"; print $1, $2}' + +# C code without comments and strings +sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file3" | +cpp -P >"$file4" + +# Number of functions +echo -n 'NFUNCTION: ' +grep -c '^{' <"$file4" + +# Number of gotos +echo -n 'NGOTO: ' +grep -cw goto <"$file4" + +# Occurrences of the register keyword +echo -n 'NREGISTER: ' +grep -cw register <"$file4" + +# Number of macro definitions +echo -n 'NMACRO: ' +grep -c '@[ ]*define[ ][ ]*[a-zA-Z_][a-zA-Z0-9_]*(' <"$file4" +# Number of include directives +echo -n 'NINCLUDE: ' +grep -c '@[ ]*include' <"$file4" + +# Number of constants +echo -n 'NCONST: ' +grep -ohw '[0-9][x0-9][0-9a-f]*' <"$file4" | wc -l + + +# Header files +echo -n 'NHFILE: ' +find "$@" -name \*.h -type f | +wc -l \ No newline at end of file From 3fb753799bf589e950da796acbe67853ab01fd26 Mon Sep 17 00:00:00 2001 From: Konstantinos Kallas Date: Thu, 31 Aug 2023 14:22:28 -0400 Subject: [PATCH 02/13] Add 4.sh --- evaluation/benchmarks/dgsh/sequential/4.sh | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 evaluation/benchmarks/dgsh/sequential/4.sh diff --git a/evaluation/benchmarks/dgsh/sequential/4.sh b/evaluation/benchmarks/dgsh/sequential/4.sh new file mode 100755 index 000000000..36fa22fd8 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/4.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) + +# Create list of files +find "$@" -type f | + +# Produce lines of the form +# MD5(filename)= 811bfd4b5974f39e986ddc037e1899e7 +xargs openssl md5 | + +# Convert each line into a "filename md5sum" pair +sed 's/^MD5(//;s/)= / /' | + +# Sort by MD5 sum +sort -k2 > "$file1" + +# Print an MD5 sum for each file that appears more than once +awk '{print $2}' < "$file1" | uniq -d > "$file2" + + +# Join the repeated MD5 sums with the corresponding file names +# Join expects two inputs, second will come from scatter +# XXX make streaming input identifiers transparent to users +join -2 2 "$file2" "$file1" | + +# Output same files on a single line +awk ' +BEGIN {ORS=""} +$1 != prev && prev {print "\n"} +END {if (prev) print "\n"} +{if (prev) print " "; prev = $1; print $2}' From 5120cf0e1d98cfaf4f6fec1a180d980870a248d5 Mon Sep 17 00:00:00 2001 From: Konstantinos Kallas Date: Thu, 31 Aug 2023 14:41:09 -0400 Subject: [PATCH 03/13] Two more dgsh scripts --- evaluation/benchmarks/dgsh/sequential/5.sh | 28 ++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100755 evaluation/benchmarks/dgsh/sequential/5.sh diff --git a/evaluation/benchmarks/dgsh/sequential/5.sh b/evaluation/benchmarks/dgsh/sequential/5.sh new file mode 100755 index 000000000..5bfe5cf89 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/5.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) +file4=$(mktemp) + +export LC_ALL=C + +cat > "$file1" + +# Find errors + +# Obtain list of words in text +cat "$file1" | +tr -cs A-Za-z \\n | +tr A-Z a-z | +sort -u > "$file2" + +# Ensure dictionary is compatibly sorted +cat "$file1" | +sort /usr/share/dict/words > "$file3" + +# List errors as a set difference +comm -23 "$file2" "$file3" > "$file4" + +fgrep -f "$file4" -i --color -w -C 2 "$file1" \ No newline at end of file From bc71b6c0dd4fe7bfc54be7baa7de91a6b2b536a8 Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Thu, 7 Sep 2023 09:11:14 +0300 Subject: [PATCH 04/13] Rewrite 6.sh script --- evaluation/benchmarks/dgsh/sequential/6.sh | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/6.sh diff --git a/evaluation/benchmarks/dgsh/sequential/6.sh b/evaluation/benchmarks/dgsh/sequential/6.sh new file mode 100644 index 000000000..f2ad97e7f --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/6.sh @@ -0,0 +1,35 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) +file4=$(mktemp) +file5=$(mktemp) + +cat > $file1 + +# Consistent sorting across machines +export LC_ALL=C + +# Stream input from file and split input one word per line +tr -cs a-zA-Z '\n' < "$file1" | +# Create list of unique words +sort -u > "$file2" + +# List two-letter palindromes +sed 's/.*\(.\)\(.\)\2\1.*/p: \1\2-\2\1/;t + g' "$file2" > "$file3" + +# List four consecutive consonants +sed -E 's/.*([^aeiouyAEIOUY]{4}).*/c: \1/;t + g' "$file2" > "$file4" + +# List length of words longer than 12 characters +awk '{if (length($1) > 12) print "l:", length($1); + else print ""}' "$file2" > "$file5" + +# Paste the four streams side-by-side +paste "$file2" "$file3" "$file4" "$file5" | +# List only words satisfying one or more properties +fgrep : From 4b3d3f1f581d3da0262add71bb0b3967d0ef19e5 Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Thu, 7 Sep 2023 11:33:42 +0300 Subject: [PATCH 05/13] Rewrite 7.sh dgsh script --- evaluation/benchmarks/dgsh/sequential/7.sh | 62 ++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/7.sh diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/7.sh new file mode 100644 index 000000000..36eae12b5 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/7.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Consistent sorting across machines +export LC_ALL=C + +# Convert input into a ranked frequency list +ranked_frequency() +{ + awk '{count[$1]++} END {for (i in count) print count[i], i}' | + # We want the standard sort here + sort -rn +} + +# Convert standard input to a ranked frequency list of specified n-grams +ngram() +{ + local N=$1 + + perl -ne 'for ($i = 0; $i < length($_) - '$N'; $i++) { + print substr($_, $i, '$N'), "\n"; + }' | + ranked_frequency +} + +# Temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) + +cat > "$file1" + +# Split input one word per line +tr -cs a-zA-Z '\n' < "$file1" > "$file2" + +# Digram frequency +echo "Digram frequency" +ngram 2 < "$file2" + +# Trigram frequency +echo "Trigram frequency" +ngram 3 < "$file2" + +# Word frequency +echo "Word frequency" +ranked_frequency < "$file2" + +# Store number of characters to use in awk below +nchars=$(wc -c < "$file1") + +# Character frequency +echo "Character frequency" +sed 's/./&\ +/g' < "$file1" | +# Print absolute +ranked_frequency | tee "$file3" + +# Print relative +echo "Relative character frequency" +awk -v NCHARS=$nchars 'BEGIN { + OFMT = "%.2g%%"} + {print $1, $2, $1 / NCHARS * 100}' "$file3" + From 5fddc71eef01b24e2e0d8e80ce0a9223bce569be Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Thu, 7 Sep 2023 22:18:28 +0300 Subject: [PATCH 06/13] Rewrite 11.sh dgsh script --- evaluation/benchmarks/dgsh/sequential/11.sh | 112 ++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/11.sh diff --git a/evaluation/benchmarks/dgsh/sequential/11.sh b/evaluation/benchmarks/dgsh/sequential/11.sh new file mode 100644 index 000000000..1ef24687c --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/11.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) +file4=$(mktemp) +file5=$(mktemp) +file6=$(mktemp) +file7=$(mktemp) + +export LC_ALL=C + +# Commit history in the form of ascending Unix timestamps, emails +git log --pretty=tformat:'%at %ae' | +awk 'NF == 2 && $1 > 100000 && $1 < '`date +%s` | +sort -n > "$file1" + +# Calculate number of committers +awk '{print $2}' "$file1" | +sort -u | +wc -l > "$file2" +cp "$file2" "$file3" +cp "$file2" "$file4" + +# Calculate last commit timestamp in seconds +tail -1 "$file1" | +awk '{print $1}' > "$file5" + +# Calculate first commit timestamp in seconds +head -1 "$file1" | +awk '{print $1}' >> "$file5" + +# Gather last and first commit timestamp and compute the difference in days +cat "$file5" | +tr '\n' ' ' | +awk '{print int(($1 - $2) / 60 / 60 / 24)}' > "$file5" + +sort -k2 "$file1" > "$file6" + +# Place committers left/right of the median according to the number of their commits +awk '{print $2}' "$file1" | +sort | +uniq -c | +sort -n | +awk -v committers1="$file2" ' +BEGIN { + while ((getline NCOMMITTERS < committers1) > 0) {} + l = 0; r = NCOMMITTERS; +} +{print NR % 2 ? l++ : --r, $2}' | +sort -k2 > "$file7" + +# Join committer positions with commit timestamps based on committer email +join -j 2 "$file6" "$file7" | +sort -k 2n > "$file6" + +# Create portable bitmap +{ + echo 'P1' + { + cat "$file3" + cat "$file5" + } | + tr '\n' ' ' | + awk '{print $1, $2}' + + perl -na -e ' + BEGIN { + open(my $ncf, "<", "'"$file4"'"); + $ncommitters = <$ncf>; + @empty[$ncommitters - 1] = 0; @committers = @empty; + } + sub out { + print join("", map($_ ? "1" : "0", @committers)), "\n"; + } + + $day = int($F[1] / 60 / 60 / 24); + $pday = $day if (!defined($pday)); + + while ($day != $pday) { + out(); + @committers = @empty; + $pday++; + } + + $committers[$F[2]] = 1; + + END { out(); } + ' "$file6" +} | +pgmmorphconv -erode <( +cat <large.png + # A smaller image + pamscale -width 640 | + pnmtopng >small.png +} From 0e9d1c87b080d464c369e1574fa26240214d5da8 Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Thu, 7 Sep 2023 22:19:52 +0300 Subject: [PATCH 07/13] Rename 7.sh to 8.sh --- evaluation/benchmarks/dgsh/sequential/{7.sh => 8.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename evaluation/benchmarks/dgsh/sequential/{7.sh => 8.sh} (100%) diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/8.sh similarity index 100% rename from evaluation/benchmarks/dgsh/sequential/7.sh rename to evaluation/benchmarks/dgsh/sequential/8.sh From 8fb9b630d4879416302f168920b7e1dcb2ef90ca Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 18:21:34 +0300 Subject: [PATCH 08/13] Create 16.sh --- evaluation/benchmarks/dgsh/sequential/16.sh | 25 +++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/16.sh diff --git a/evaluation/benchmarks/dgsh/sequential/16.sh b/evaluation/benchmarks/dgsh/sequential/16.sh new file mode 100644 index 000000000..718935c2f --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/16.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) +file4=$(mktemp) + +# Save the ls output to a temporary file +ls -n > "$file1" + +# Reorder fields in DIR-like way +awk '!/^total/ {print $6, $7, $8, $1, sprintf("%8d", $5), $9}' "$file1" > "$file2" + +# Count number of files +wc -l "$file1" | tr -d \\n > "$file3" +echo -n ' File(s) ' >> "$file3" +awk '{s += $5} END {printf("%d bytes\n", s)}' "$file1" >> "$file3" + +# Count number of directories and print label for number of dirs and calculate free bytes +grep -c '^d' "$file1" | tr -d \\n > "$file4" +df -h . | awk '!/Use%/{print " Dir(s) " $4 " bytes free"}' >> "$file4" + +# Display the results +cat "$file2" "$file3" "$file4" From aea3e6804472729f76c566c2bd594816282e0e70 Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 18:22:23 +0300 Subject: [PATCH 09/13] Create 17.sh --- evaluation/benchmarks/dgsh/sequential/17.sh | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/17.sh diff --git a/evaluation/benchmarks/dgsh/sequential/17.sh b/evaluation/benchmarks/dgsh/sequential/17.sh new file mode 100644 index 000000000..effa236fc --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/17.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +# Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) + +# Read the input stream and save to a temporary file +cat $INPUT_FILE > "$file1" + +# Process the input in two different ways +cut -d , -f 5-6 "$file1" > "$file2" +cut -d , -f 2-4 "$file1" > "$file3" + +# Merge the processed results +paste -d , "$file2" "$file3" From 8d97bb642f254fc133609a607067002b3362bd9d Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:03:33 +0300 Subject: [PATCH 10/13] Rewrite 9.sh --- evaluation/benchmarks/dgsh/sequential/9.sh | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/9.sh diff --git a/evaluation/benchmarks/dgsh/sequential/9.sh b/evaluation/benchmarks/dgsh/sequential/9.sh new file mode 100644 index 000000000..e1b721102 --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/9.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +## Initialize the necessary temporary files +file1=$(mktemp) +file2=$(mktemp) +file3=$(mktemp) + +# Find object files and print defined symbols +find "$1" -name "*.o" | xargs nm > "$file1" + +# List all defined (exported) symbols +awk 'NF == 3 && $2 ~ /[A-Z]/ {print $3}' "$file1" | sort > "$file2" + +# List all undefined (imported) symbols +awk '$1 == "U" {print $2}' "$file1" | sort > "$file3" + +# Print exports that are not imported +comm -23 "$file2" "$file3" From 118a0bd834e6a42391dbd6b399dc6eca827a8105 Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:03:53 +0300 Subject: [PATCH 11/13] Rename 17.sh to 18.sh --- evaluation/benchmarks/dgsh/sequential/{17.sh => 18.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename evaluation/benchmarks/dgsh/sequential/{17.sh => 18.sh} (100%) diff --git a/evaluation/benchmarks/dgsh/sequential/17.sh b/evaluation/benchmarks/dgsh/sequential/18.sh similarity index 100% rename from evaluation/benchmarks/dgsh/sequential/17.sh rename to evaluation/benchmarks/dgsh/sequential/18.sh From 7b0234f5ab42ee05a97f24312ac7fc4bff1b4c7e Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:04:05 +0300 Subject: [PATCH 12/13] Rename 16.sh to 17.sh --- evaluation/benchmarks/dgsh/sequential/{16.sh => 17.sh} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename evaluation/benchmarks/dgsh/sequential/{16.sh => 17.sh} (100%) diff --git a/evaluation/benchmarks/dgsh/sequential/16.sh b/evaluation/benchmarks/dgsh/sequential/17.sh similarity index 100% rename from evaluation/benchmarks/dgsh/sequential/16.sh rename to evaluation/benchmarks/dgsh/sequential/17.sh From e89af5b49fbdd0ab2582fabf6b553d6cbd52c5ba Mon Sep 17 00:00:00 2001 From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com> Date: Fri, 8 Sep 2023 22:25:58 +0300 Subject: [PATCH 13/13] Add 7.sh sequential dgsh script --- evaluation/benchmarks/dgsh/sequential/7.sh | 149 +++++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 evaluation/benchmarks/dgsh/sequential/7.sh diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/7.sh new file mode 100644 index 000000000..90c4ff07e --- /dev/null +++ b/evaluation/benchmarks/dgsh/sequential/7.sh @@ -0,0 +1,149 @@ +#!/bin/bash + +# Consistent sorting +export LC_ALL=C + +toplist() +{ + uniq -c | sort -rn | head -$1 + echo +} + +# Output the argument as a section header +header() +{ + echo + echo "$1" + echo "$1" | sed 's/./-/g' +} + +# Print initial header only if DGSH_DRAW_EXIT is not set +if [ -z "${DGSH_DRAW_EXIT}" ] +then + cat < "$file_initial" + +# Number of accesses +echo -n 'Number of accesses: ' +wc -l "$file_initial" + +# Total transferred bytes +awk '{s += $NF} END {print s}' "$file_initial" > "$file_bytes" +echo -n 'Number of Gbytes transferred: ' +awk '{print $1 / 1024 / 1024 / 1024}' "$file_bytes" + +# Process Host names +awk '{print $1}' "$file_initial" > "$file_hosts" + +# Number of accesses +echo -n 'Number of accesses: ' +wc -l < "$file_hosts" + +# Sorted hosts +sort "$file_hosts" > "$file_sorted_hosts" + +# Unique hosts +uniq "$file_sorted_hosts" > "$file_unique_hosts" +echo -n 'Number of hosts: ' +wc -l < "$file_unique_hosts" + +# Number of TLDs +echo -n 'Number of top level domains: ' +awk -F. '$NF !~ /[0-9]/ {print $NF}' "$file_unique_hosts" | sort -u | wc -l + + +# Top 10 hosts +toplist 10 < "$file_sorted_hosts" + +uniq -c "$file_sorted_hosts" | sort -rn | head -10 +echo + +# Top 20 TLDs +header "Top 20 Top Level Domains" + +awk -F. '$NF !~ /^[0-9]/ {print $NF}' "$file_sorted_hosts" | sort | toplist 20 +echo + +# Domains +awk -F. 'BEGIN {OFS = "."} $NF !~ /^[0-9]/ {$1 = ""; print}' "$file_sorted_hosts" | sort > "$file_domains" + +# Number of domains +echo -n 'Number of domains: ' +uniq "$file_domains" | wc -l + +# Top 10 domains +header "Top 10 domains" +toplist 10 < "$file_domains" + +# Hosts by volume +header Top 10 Hosts by Transfer +awk ' {bytes[$1] += $NF} +END {for (h in bytes) print bytes[h], h}' "$file_initial" | sort -rn | head -10 + +# Sorted page name requests +awk '{print $7}' "$file_initial" | sort > "$file_requests" + +# Top 20 area requests (input is already sorted) +header "Top 20 area requests" +awk -F/ '{print $2}' "$file_requests" | toplist 20 +# Number of different pages +echo -n 'Number of different pages: ' +cat "$file_requests" | uniq | wc -l + +# Top 20 requests +header "Top 20 requests" +toplist 20 < "$file_requests" + +# Access time: dd/mmm/yyyy:hh:mm:ss +awk '{print substr($4, 2)}' "$file_initial" > "$file_times" + +# Just dates +awk -F: '{print $1}' "$file_times" > "$file_dates" + +# Number of days +echo -n 'Accesses per day: ' +uniq "$file_dates" | wc -l > "$file_day_count" +awk ' +BEGIN { + getline NACCESS < "'"$file_initial"'" +} +{print NACCESS / $1}' "$file_day_count" + +echo -n 'MBytes per day: ' +awk ' +BEGIN { + getline NXBYTES < "'"$file_bytes"'" +} +{print NXBYTES / $1 / 1024 / 1024}' "$file_day_count" + +header "Accesses by Date" +uniq -c < "$file_dates" + +# Accesses by day of week +header "Accesses by Day of Week" +sed 's|/|-|g' "$file_dates" | date -f - +%a 2>/dev/null | sort | uniq -c | sort -rn + +# Accesses by Local Hour +header "Accesses by Local Hour" +awk -F: '{print $2}' "$file_times" | sort | uniq -c