From 8bbd45d3b904695d4afe3a9ca0e6b6852d93e86d Mon Sep 17 00:00:00 2001
From: Konstantinos Kallas <konstantinos.kallas@hotmail.com>
Date: Mon, 21 Aug 2023 17:42:45 +0300
Subject: [PATCH 01/13] New rewrites of dgsh scripts, cleaner and more
 meaningful

---
 evaluation/benchmarks/dgsh/sequential/1.sh |  21 ++++
 evaluation/benchmarks/dgsh/sequential/2.sh |  24 ++++
 evaluation/benchmarks/dgsh/sequential/3.sh | 132 +++++++++++++++++++++
 3 files changed, 177 insertions(+)
 create mode 100755 evaluation/benchmarks/dgsh/sequential/1.sh
 create mode 100755 evaluation/benchmarks/dgsh/sequential/2.sh
 create mode 100755 evaluation/benchmarks/dgsh/sequential/3.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/1.sh b/evaluation/benchmarks/dgsh/sequential/1.sh
new file mode 100755
index 000000000..5f15105a1
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/1.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+
+cat >"$file1"
+
+printf 'File type:\t'
+file - <"$file1"
+
+printf 'Original size:\t'
+wc -c <"$file1"
+
+printf 'xz:\t\t'
+xz -c <"$file1" | wc -c
+
+printf 'bzip2:\t\t'
+bzip2 -c <"$file1" | wc -c
+
+printf 'gzip:\t\t'
+gzip -c <"$file1" | wc -c
diff --git a/evaluation/benchmarks/dgsh/sequential/2.sh b/evaluation/benchmarks/dgsh/sequential/2.sh
new file mode 100755
index 000000000..64eb30cf2
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/2.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+## Note: Needs to be run on a big git repository to make sense (maybe linux)
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+
+forder()
+{
+	sort |
+	uniq -c |
+	sort -rn
+}
+
+
+git log --format="%an:%ad" --date=default "$@" >"$file1"
+
+echo "Authors ordered by number of commits"
+# Order by frequency
+awk -F: '{print $1}' <"$file1" | forder
+
+echo "Days ordered by number of commits"
+# Order by frequency
+awk -F: '{print substr($2, 1, 3)}' <"$file1" | forder
diff --git a/evaluation/benchmarks/dgsh/sequential/3.sh b/evaluation/benchmarks/dgsh/sequential/3.sh
new file mode 100755
index 000000000..5c9611f79
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/3.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+## Note: Needs to be run on a big git repository to make sense (maybe linux)
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+
+find "$@" \( -name \*.c -or -name \*.h \) -type f -print0 >"$file1"
+
+echo -n 'FNAMELEN: '
+
+tr \\0 \\n <"$file1" |
+# Remove path
+sed 's|^.*/||' |
+# Maintain average
+awk '{s += length($1); n++} END {
+    if (n>0)
+        print s / n;
+    else
+        print 0; }'
+
+xargs -0 /bin/cat <"$file1" >"$file2"
+
+sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file2" |
+    cpp -P >"$file3"
+
+# Structure definitions
+echo -n 'NSTRUCT: '
+
+egrep -c 'struct[   ]*{|struct[   ]*[a-zA-Z_][a-zA-Z0-9_]*[       ]*{' <"$file3"
+#}} (match preceding openings)
+
+# Type definitions
+echo -n 'NTYPEDEF: '
+grep -cw typedef <"$file3"
+
+# Use of void
+echo -n 'NVOID: '
+grep -cw void <"$file3"
+
+# Use of gets
+echo -n 'NGETS: '
+grep -cw gets <"$file3"
+
+# Average identifier length
+echo -n 'IDLEN: '
+
+tr -cs 'A-Za-z0-9_' '\n' <"$file3" |
+sort -u |
+awk '/^[A-Za-z]/ { len += length($1); n++ } END {
+    if (n>0)
+        print len / n;
+    else
+        print 0; }'
+
+echo -n 'CHLINESCHAR: '
+wc -lc  <"$file2" |
+    awk '{OFS=":"; print $1, $2}'
+
+echo -n 'NCCHAR: '
+sed 's/#/@/g' <"$file2" |
+cpp -traditional -P |
+wc -c |
+awk '{OFMT = "%.0f"; print $1/1000}'
+
+# Number of comments
+echo -n 'NCOMMENT: '
+egrep -c '/\*|//' <"$file2"
+
+# Occurences of the word Copyright
+echo -n 'NCOPYRIGHT: '
+grep -ci copyright <"$file2"
+
+# C files
+find "$@" -name \*.c -type f -print0 >"$file2"
+
+# Convert to newline separation for counting
+tr \\0 \\n <"$file2" >"$file3"
+
+# Number of C files
+echo -n 'NCFILE: '
+wc -l <"$file3"
+
+# Number of directories containing C files
+echo -n 'NCDIR: '
+sed 's,/[^/]*$,,;s,^.*/,,' <"$file3" |
+sort -u |
+wc -l
+
+# C code
+xargs -0 /bin/cat <"$file2" >"$file3"
+
+# Lines and characters
+echo -n 'CLINESCHAR: '
+wc -lc <"$file3" |
+awk '{OFS=":"; print $1, $2}'
+
+# C code without comments and strings
+sed 's/#/@/g;s/\\[\\"'\'']/@/g;s/"[^"]*"/""/g;'"s/'[^']*'/''/g" <"$file3" |
+cpp -P >"$file4"
+
+# Number of functions
+echo -n 'NFUNCTION: '
+grep -c '^{' <"$file4"
+
+# Number of gotos
+echo -n 'NGOTO: '
+grep -cw goto <"$file4"
+
+# Occurrences of the register keyword
+echo -n 'NREGISTER: '
+grep -cw register <"$file4"
+
+# Number of macro definitions
+echo -n 'NMACRO: '
+grep -c '@[   ]*define[   ][   ]*[a-zA-Z_][a-zA-Z0-9_]*(' <"$file4"
+# Number of include directives
+echo -n 'NINCLUDE: '
+grep -c '@[   ]*include' <"$file4"
+
+# Number of constants
+echo -n 'NCONST: '
+grep -ohw '[0-9][x0-9][0-9a-f]*' <"$file4" | wc -l 
+
+
+# Header files
+echo -n 'NHFILE: '
+find "$@" -name \*.h -type f |
+wc -l
\ No newline at end of file

From 3fb753799bf589e950da796acbe67853ab01fd26 Mon Sep 17 00:00:00 2001
From: Konstantinos Kallas <konstantinos.kallas@hotmail.com>
Date: Thu, 31 Aug 2023 14:22:28 -0400
Subject: [PATCH 02/13] Add 4.sh

---
 evaluation/benchmarks/dgsh/sequential/4.sh | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100755 evaluation/benchmarks/dgsh/sequential/4.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/4.sh b/evaluation/benchmarks/dgsh/sequential/4.sh
new file mode 100755
index 000000000..36fa22fd8
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/4.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+# Create list of files
+find "$@" -type f |
+
+# Produce lines of the form
+# MD5(filename)= 811bfd4b5974f39e986ddc037e1899e7
+xargs openssl md5 |
+
+# Convert each line into a "filename md5sum" pair
+sed 's/^MD5(//;s/)= / /' |
+
+# Sort by MD5 sum
+sort -k2 > "$file1"
+
+# Print an MD5 sum for each file that appears more than once
+awk '{print $2}' < "$file1" | uniq -d > "$file2"
+
+
+# Join the repeated MD5 sums with the corresponding file names
+# Join expects two inputs, second will come from scatter
+# XXX make streaming input identifiers transparent to users
+join -2 2 "$file2" "$file1" |
+
+# Output same files on a single line
+awk '
+BEGIN {ORS=""}
+$1 != prev && prev {print "\n"}
+END {if (prev) print "\n"}
+{if (prev) print " "; prev = $1; print $2}'

From 5120cf0e1d98cfaf4f6fec1a180d980870a248d5 Mon Sep 17 00:00:00 2001
From: Konstantinos Kallas <konstantinos.kallas@hotmail.com>
Date: Thu, 31 Aug 2023 14:41:09 -0400
Subject: [PATCH 03/13] Two more dgsh scripts

---
 evaluation/benchmarks/dgsh/sequential/5.sh | 28 ++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100755 evaluation/benchmarks/dgsh/sequential/5.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/5.sh b/evaluation/benchmarks/dgsh/sequential/5.sh
new file mode 100755
index 000000000..5bfe5cf89
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/5.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+
+export LC_ALL=C
+
+cat > "$file1"
+
+# Find errors
+
+# Obtain list of words in text
+cat "$file1" |
+tr -cs A-Za-z \\n |
+tr A-Z a-z |
+sort -u > "$file2"
+
+# Ensure dictionary is compatibly sorted
+cat "$file1" |
+sort /usr/share/dict/words > "$file3"
+
+# List errors as a set difference
+comm -23 "$file2" "$file3" > "$file4"
+
+fgrep -f "$file4" -i --color -w -C 2 "$file1"
\ No newline at end of file

From bc71b6c0dd4fe7bfc54be7baa7de91a6b2b536a8 Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Thu, 7 Sep 2023 09:11:14 +0300
Subject: [PATCH 04/13] Rewrite 6.sh script

---
 evaluation/benchmarks/dgsh/sequential/6.sh | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/6.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/6.sh b/evaluation/benchmarks/dgsh/sequential/6.sh
new file mode 100644
index 000000000..f2ad97e7f
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/6.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+file5=$(mktemp)
+
+cat > $file1
+
+# Consistent sorting across machines
+export LC_ALL=C
+
+# Stream input from file and split input one word per line
+tr -cs a-zA-Z '\n' < "$file1" |
+# Create list of unique words
+sort -u > "$file2"
+
+# List two-letter palindromes
+sed 's/.*\(.\)\(.\)\2\1.*/p: \1\2-\2\1/;t
+	g' "$file2" > "$file3"
+
+# List four consecutive consonants
+sed -E 's/.*([^aeiouyAEIOUY]{4}).*/c: \1/;t
+	g' "$file2" > "$file4"
+
+# List length of words longer than 12 characters
+awk '{if (length($1) > 12) print "l:", length($1);
+	else print ""}' "$file2" > "$file5"
+
+# Paste the four streams side-by-side
+paste "$file2" "$file3" "$file4" "$file5" | 
+# List only words satisfying one or more properties
+fgrep :

From 4b3d3f1f581d3da0262add71bb0b3967d0ef19e5 Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Thu, 7 Sep 2023 11:33:42 +0300
Subject: [PATCH 05/13] Rewrite 7.sh dgsh script

---
 evaluation/benchmarks/dgsh/sequential/7.sh | 62 ++++++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/7.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/7.sh
new file mode 100644
index 000000000..36eae12b5
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/7.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Consistent sorting across machines
+export LC_ALL=C
+
+# Convert input into a ranked frequency list
+ranked_frequency()
+{
+	awk '{count[$1]++} END {for (i in count) print count[i], i}' |
+	# We want the standard sort here
+	sort -rn
+}
+
+# Convert standard input to a ranked frequency list of specified n-grams
+ngram()
+{
+	local N=$1
+
+	perl -ne 'for ($i = 0; $i < length($_) - '$N'; $i++) {
+		print substr($_, $i, '$N'), "\n";
+	}' |
+	ranked_frequency
+}
+
+# Temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+cat > "$file1"
+
+# Split input one word per line
+tr -cs a-zA-Z '\n' < "$file1" > "$file2"
+
+# Digram frequency
+echo "Digram frequency"
+ngram 2 < "$file2" 
+
+# Trigram frequency
+echo "Trigram frequency"
+ngram 3 < "$file2" 
+
+# Word frequency
+echo "Word frequency"
+ranked_frequency < "$file2"
+
+# Store number of characters to use in awk below
+nchars=$(wc -c < "$file1")
+
+# Character frequency
+echo "Character frequency"
+sed 's/./&\
+/g' < "$file1" | 
+# Print absolute
+ranked_frequency | tee "$file3"
+
+# Print relative
+echo "Relative character frequency"
+awk -v NCHARS=$nchars 'BEGIN {
+		OFMT = "%.2g%%"}
+		{print $1, $2, $1 / NCHARS * 100}' "$file3"
+

From 5fddc71eef01b24e2e0d8e80ce0a9223bce569be Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Thu, 7 Sep 2023 22:18:28 +0300
Subject: [PATCH 06/13] Rewrite 11.sh dgsh script

---
 evaluation/benchmarks/dgsh/sequential/11.sh | 112 ++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/11.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/11.sh b/evaluation/benchmarks/dgsh/sequential/11.sh
new file mode 100644
index 000000000..1ef24687c
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/11.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+file5=$(mktemp)
+file6=$(mktemp)
+file7=$(mktemp)
+
+export LC_ALL=C
+
+# Commit history in the form of ascending Unix timestamps, emails
+git log --pretty=tformat:'%at %ae' | 
+awk 'NF == 2 && $1 > 100000 && $1 < '`date +%s` | 
+sort -n > "$file1"
+
+# Calculate number of committers
+awk '{print $2}' "$file1" | 
+sort -u | 
+wc -l > "$file2"
+cp "$file2" "$file3"
+cp "$file2" "$file4"
+
+# Calculate last commit timestamp in seconds
+tail -1 "$file1" | 
+awk '{print $1}' > "$file5"
+
+# Calculate first commit timestamp in seconds
+head -1 "$file1" | 
+awk '{print $1}' >> "$file5"
+
+# Gather last and first commit timestamp and compute the difference in days
+cat "$file5" | 
+tr '\n' ' ' | 
+awk '{print int(($1 - $2) / 60 / 60 / 24)}' > "$file5"
+
+sort -k2 "$file1" > "$file6"
+
+# Place committers left/right of the median according to the number of their commits
+awk '{print $2}' "$file1" | 
+sort | 
+uniq -c | 
+sort -n | 
+awk -v committers1="$file2" '
+BEGIN {
+    while ((getline NCOMMITTERS < committers1) > 0) {}
+    l = 0; r = NCOMMITTERS;
+}
+{print NR % 2 ? l++ : --r, $2}' |
+sort -k2 > "$file7"
+
+# Join committer positions with commit timestamps based on committer email
+join -j 2 "$file6" "$file7" | 
+sort -k 2n > "$file6"
+
+# Create portable bitmap
+{
+    echo 'P1'
+    {
+        cat "$file3"
+        cat "$file5"
+    } | 
+    tr '\n' ' ' | 
+    awk '{print $1, $2}'
+    
+    perl -na -e '
+    BEGIN {
+        open(my $ncf, "<", "'"$file4"'");
+        $ncommitters = <$ncf>;
+        @empty[$ncommitters - 1] = 0; @committers = @empty;
+    }
+    sub out {
+        print join("", map($_ ? "1" : "0", @committers)), "\n";
+    }
+
+    $day = int($F[1] / 60 / 60 / 24);
+    $pday = $day if (!defined($pday));
+
+    while ($day != $pday) {
+        out();
+        @committers = @empty;
+        $pday++;
+    }
+
+    $committers[$F[2]] = 1;
+
+    END { out(); }
+    ' "$file6"
+} | 
+pgmmorphconv -erode <(
+cat <<EOF
+P1
+7 7
+1 1 1 0 1 1 1
+1 1 0 0 0 1 1
+1 0 0 0 0 0 1
+0 0 0 0 0 0 0
+1 0 0 0 0 0 1
+1 1 0 0 0 1 1
+1 1 1 0 1 1 1
+EOF
+) | 
+tee | 
+{
+    # Full-scale image
+    pnmtopng >large.png
+    # A smaller image
+    pamscale -width 640 | 
+    pnmtopng >small.png
+}

From 0e9d1c87b080d464c369e1574fa26240214d5da8 Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Thu, 7 Sep 2023 22:19:52 +0300
Subject: [PATCH 07/13] Rename 7.sh to 8.sh

---
 evaluation/benchmarks/dgsh/sequential/{7.sh => 8.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename evaluation/benchmarks/dgsh/sequential/{7.sh => 8.sh} (100%)

diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/8.sh
similarity index 100%
rename from evaluation/benchmarks/dgsh/sequential/7.sh
rename to evaluation/benchmarks/dgsh/sequential/8.sh

From 8fb9b630d4879416302f168920b7e1dcb2ef90ca Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 18:21:34 +0300
Subject: [PATCH 08/13] Create 16.sh

---
 evaluation/benchmarks/dgsh/sequential/16.sh | 25 +++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/16.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/16.sh b/evaluation/benchmarks/dgsh/sequential/16.sh
new file mode 100644
index 000000000..718935c2f
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/16.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+file4=$(mktemp)
+
+# Save the ls output to a temporary file
+ls -n > "$file1"
+
+# Reorder fields in DIR-like way
+awk '!/^total/ {print $6, $7, $8, $1, sprintf("%8d", $5), $9}' "$file1" > "$file2"
+
+# Count number of files
+wc -l "$file1" | tr -d \\n > "$file3"
+echo -n ' File(s) ' >> "$file3"
+awk '{s += $5} END {printf("%d bytes\n", s)}' "$file1" >> "$file3"
+
+# Count number of directories and print label for number of dirs and calculate free bytes
+grep -c '^d' "$file1" | tr -d \\n > "$file4"
+df -h . | awk '!/Use%/{print " Dir(s) " $4 " bytes free"}' >> "$file4"
+
+# Display the results
+cat "$file2" "$file3" "$file4"

From aea3e6804472729f76c566c2bd594816282e0e70 Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 18:22:23 +0300
Subject: [PATCH 09/13] Create 17.sh

---
 evaluation/benchmarks/dgsh/sequential/17.sh | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/17.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/17.sh b/evaluation/benchmarks/dgsh/sequential/17.sh
new file mode 100644
index 000000000..effa236fc
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/17.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+# Read the input stream and save to a temporary file
+cat $INPUT_FILE > "$file1"
+
+# Process the input in two different ways
+cut -d , -f 5-6 "$file1" > "$file2"
+cut -d , -f 2-4 "$file1" > "$file3"
+
+# Merge the processed results
+paste -d , "$file2" "$file3"

From 8d97bb642f254fc133609a607067002b3362bd9d Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 22:03:33 +0300
Subject: [PATCH 10/13] Rewrite 9.sh

---
 evaluation/benchmarks/dgsh/sequential/9.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/9.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/9.sh b/evaluation/benchmarks/dgsh/sequential/9.sh
new file mode 100644
index 000000000..e1b721102
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/9.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+## Initialize the necessary temporary files
+file1=$(mktemp)
+file2=$(mktemp)
+file3=$(mktemp)
+
+# Find object files and print defined symbols
+find "$1" -name "*.o" | xargs nm > "$file1"
+
+# List all defined (exported) symbols
+awk 'NF == 3 && $2 ~ /[A-Z]/ {print $3}' "$file1" | sort > "$file2"
+
+# List all undefined (imported) symbols
+awk '$1 == "U" {print $2}' "$file1" | sort > "$file3"
+
+# Print exports that are not imported
+comm -23 "$file2" "$file3"

From 118a0bd834e6a42391dbd6b399dc6eca827a8105 Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 22:03:53 +0300
Subject: [PATCH 11/13] Rename 17.sh to 18.sh

---
 evaluation/benchmarks/dgsh/sequential/{17.sh => 18.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename evaluation/benchmarks/dgsh/sequential/{17.sh => 18.sh} (100%)

diff --git a/evaluation/benchmarks/dgsh/sequential/17.sh b/evaluation/benchmarks/dgsh/sequential/18.sh
similarity index 100%
rename from evaluation/benchmarks/dgsh/sequential/17.sh
rename to evaluation/benchmarks/dgsh/sequential/18.sh

From 7b0234f5ab42ee05a97f24312ac7fc4bff1b4c7e Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 22:04:05 +0300
Subject: [PATCH 12/13] Rename 16.sh to 17.sh

---
 evaluation/benchmarks/dgsh/sequential/{16.sh => 17.sh} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename evaluation/benchmarks/dgsh/sequential/{16.sh => 17.sh} (100%)

diff --git a/evaluation/benchmarks/dgsh/sequential/16.sh b/evaluation/benchmarks/dgsh/sequential/17.sh
similarity index 100%
rename from evaluation/benchmarks/dgsh/sequential/16.sh
rename to evaluation/benchmarks/dgsh/sequential/17.sh

From e89af5b49fbdd0ab2582fabf6b553d6cbd52c5ba Mon Sep 17 00:00:00 2001
From: Georgios Liargkovas <56384743+gliargovas@users.noreply.github.com>
Date: Fri, 8 Sep 2023 22:25:58 +0300
Subject: [PATCH 13/13] Add 7.sh sequential dgsh script

---
 evaluation/benchmarks/dgsh/sequential/7.sh | 149 +++++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 evaluation/benchmarks/dgsh/sequential/7.sh

diff --git a/evaluation/benchmarks/dgsh/sequential/7.sh b/evaluation/benchmarks/dgsh/sequential/7.sh
new file mode 100644
index 000000000..90c4ff07e
--- /dev/null
+++ b/evaluation/benchmarks/dgsh/sequential/7.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+# Consistent sorting
+export LC_ALL=C
+
+toplist()
+{
+	uniq -c | sort -rn | head -$1
+	echo
+}
+
+# Output the argument as a section header
+header()
+{
+	echo
+	echo "$1"
+	echo "$1" | sed 's/./-/g'
+}
+
+# Print initial header only if DGSH_DRAW_EXIT is not set
+if [ -z "${DGSH_DRAW_EXIT}" ]
+then
+    cat <<EOF
+			WWW server statistics
+			=====================
+
+Summary
+-------
+EOF
+fi
+
+## Initialize temporary files
+file_initial=$(mktemp)
+file_bytes=$(mktemp)
+file_hosts=$(mktemp)
+file_requests=$(mktemp)
+file_sorted_hosts=$(mktemp)
+file_unique_hosts=$(mktemp)
+file_domains=$(mktemp)
+file_day_count=$(mktemp)
+file_dates=$(mktemp)
+file_times=$(mktemp)
+
+# This file will capture a large portion of the processed data to be reused in subsequent parts
+cat > "$file_initial"
+
+# Number of accesses
+echo -n 'Number of accesses: '
+wc -l "$file_initial"
+
+# Total transferred bytes
+awk '{s += $NF} END {print s}' "$file_initial" > "$file_bytes"
+echo -n 'Number of Gbytes transferred: '
+awk '{print $1 / 1024 / 1024 / 1024}' "$file_bytes"
+
+# Process Host names
+awk '{print $1}' "$file_initial" > "$file_hosts"
+
+# Number of accesses
+echo -n 'Number of accesses: '
+wc -l < "$file_hosts"
+
+# Sorted hosts
+sort "$file_hosts" > "$file_sorted_hosts"
+
+# Unique hosts
+uniq "$file_sorted_hosts" > "$file_unique_hosts"
+echo -n 'Number of hosts: '
+wc -l < "$file_unique_hosts"
+
+# Number of TLDs
+echo -n 'Number of top level domains: '
+awk -F. '$NF !~ /[0-9]/ {print $NF}' "$file_unique_hosts" | sort -u | wc -l
+
+
+# Top 10 hosts
+toplist 10 < "$file_sorted_hosts"
+
+uniq -c "$file_sorted_hosts" | sort -rn | head -10
+echo
+
+# Top 20 TLDs
+header "Top 20 Top Level Domains"
+
+awk -F. '$NF !~ /^[0-9]/ {print $NF}' "$file_sorted_hosts" | sort | toplist 20
+echo
+
+# Domains
+awk -F. 'BEGIN {OFS = "."} $NF !~ /^[0-9]/ {$1 = ""; print}' "$file_sorted_hosts" | sort > "$file_domains"
+
+# Number of domains
+echo -n 'Number of domains: '
+uniq "$file_domains" | wc -l
+
+# Top 10 domains
+header "Top 10 domains"
+toplist 10 < "$file_domains"
+
+# Hosts by volume
+header Top 10 Hosts by Transfer
+awk '    {bytes[$1] += $NF}
+END {for (h in bytes) print bytes[h], h}' "$file_initial" | sort -rn | head -10
+
+# Sorted page name requests
+awk '{print $7}' "$file_initial" | sort > "$file_requests"
+
+# Top 20 area requests (input is already sorted)
+header "Top 20 area requests"
+awk -F/ '{print $2}' "$file_requests" | toplist 20
+# Number of different pages
+echo -n 'Number of different pages: '
+cat "$file_requests" | uniq | wc -l
+
+# Top 20 requests
+header "Top 20 requests"
+toplist 20 < "$file_requests"
+
+# Access time: dd/mmm/yyyy:hh:mm:ss
+awk '{print substr($4, 2)}' "$file_initial" > "$file_times"
+
+# Just dates
+awk -F: '{print $1}' "$file_times" > "$file_dates"
+
+# Number of days
+echo -n 'Accesses per day: '
+uniq "$file_dates" | wc -l > "$file_day_count"
+awk '
+BEGIN {
+    getline NACCESS < "'"$file_initial"'"
+}
+{print NACCESS / $1}' "$file_day_count"
+
+echo -n 'MBytes per day: '
+awk '
+BEGIN {
+    getline NXBYTES < "'"$file_bytes"'"
+}
+{print NXBYTES / $1 / 1024 / 1024}' "$file_day_count"
+
+header "Accesses by Date"
+uniq -c < "$file_dates"
+
+# Accesses by day of week
+header "Accesses by Day of Week"
+sed 's|/|-|g' "$file_dates" | date -f - +%a 2>/dev/null | sort | uniq -c | sort -rn
+
+# Accesses by Local Hour
+header "Accesses by Local Hour"
+awk -F: '{print $2}' "$file_times" | sort | uniq -c