Merge branch 'main' of github.com:verus-lang/paper-sosp24-artifact

verus-lang · Aug 20, 2024 · c74e7e8 · c74e7e8
2 parents 5d94832 + eede04f
commit c74e7e8
Show file tree

Hide file tree

Showing 3 changed files with 249 additions and 31 deletions.
diff --git a/ironkv/compare.py b/ironkv/compare.py
@@ -0,0 +1,88 @@
+import os
+import re
+import subprocess
+import sys
+import time
+
+VERUS_PATH = "C:/Apps/verus-systems-code/ironfleet-comparison/ironsht/bin/"
+DAFNY_PATH = "C:/Apps/ironclad-microsoft/ironfleet/bin/"
+RAW_DATA_PATH = "raw-data.txt"
+NUM_THREADS = 10
+SECONDS = 30
+NUM_KEYS = 1000
+NUM_EXPERIMENT_ITERATIONS = 100
+CONFIDENCE_QUANTILE = 0.95
+VALUE_SIZES = [ 128, 256, 512 ]
+
+raw_data_out = open(RAW_DATA_PATH, "w")
+raw_data_out.write("Language\tThreads\tSeconds\tWorkload\tNumber of keys\tValue size\tRequests completed\n")
+
+def launch_server(verus, which_server):
+    server_exe = os.path.join(VERUS_PATH if verus else DAFNY_PATH, "IronSHTServer.dll")
+    cmd = [
+              "dotnet",
+              server_exe,
+              "certs/MySHT.IronSHT.service.txt",
+              f"certs/MySHT.IronSHT.server{which_server}.private.txt"
+          ]
+    server = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    while True:
+        line = server.stdout.readline().decode('utf-8').strip()
+        if line == "[[READY]]":
+            return server
+
+def measure_client(verus, workload, value_size):
+    server1 = launch_server(verus, 1)
+    server2 = launch_server(verus, 2)
+    server3 = launch_server(verus, 3)
+
+    client_exe = os.path.join(VERUS_PATH if verus else DAFNY_PATH, "IronSHTClient.dll")
+    cmd = [
+              "dotnet",
+              client_exe,
+              "certs/MySHT.IronSHT.service.txt",
+              f"nthreads={NUM_THREADS}",
+              f"duration={SECONDS}",
+              f"workload={workload}",
+              f"numkeys={NUM_KEYS}",
+              f"valuesize={value_size}"
+          ]
+    client = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    num_requests_completed = 0
+    while True:
+        line = client.stdout.readline()
+        if not line:
+            break
+        line = line.decode('utf-8').strip()
+        if re.search('^#req', line):
+            num_requests_completed = num_requests_completed + 1
+    kilo_requests_per_second = num_requests_completed * 0.001 / SECONDS
+    raw_data_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                           "verus" if verus else "dafny",
+                           NUM_THREADS,
+                           SECONDS,
+                           workload,
+                           NUM_KEYS,
+                           value_size,
+                           num_requests_completed
+                      ))
+    raw_data_out.flush()
+
+    server1.kill()
+    server2.kill()
+    server3.kill()
+
+    return kilo_requests_per_second
+
+def do_experiments():
+    for i in range(NUM_EXPERIMENT_ITERATIONS):
+        for verus in [True, False]:
+             language = ("Verus" if verus else "Dafny")
+             for workload in [ 'g', 's' ]:
+                 for value_size in VALUE_SIZES:
+                      print(f"Performing experiment #{i} for {language} with workload {workload} and value size {value_size}")
+                      kops = measure_client(verus, workload, value_size)
+
+do_experiments()
+
+raw_data_out.close()
diff --git a/ironkv/gengraph.py b/ironkv/gengraph.py
@@ -0,0 +1,93 @@
+import collections
+import numpy
+import os
+import re
+import scipy.stats
+import sys
+
+RAW_DATA_FILE = "raw-data.txt"
+VALUE_SIZES = [ 128, 256, 512 ]
+CONFIDENCE_QUANTILE = 0.95
+
+def get_header_map(header_line):
+    column_headings = header_line.strip().split('\t')
+    num_columns = len(column_headings)
+    which_column = {}
+    for col, heading in enumerate(column_headings):
+        which_column[heading] = col
+    return (which_column, num_columns)
+
+data = collections.defaultdict(list)
+raw_data_in = open(RAW_DATA_FILE, "r")
+(header_map, num_columns) = get_header_map(raw_data_in.readline())
+
+for line in raw_data_in.readlines():
+    values = line.strip().split('\t')
+    if len(values) < num_columns:
+        continue
+    language = values[header_map['Language']]
+    seconds = int(values[header_map['Seconds']])
+    workload = values[header_map['Workload']]
+    value_size = int(values[header_map['Value size']])
+    num_requests_completed = int(values[header_map['Requests completed']])
+    kops_per_sec = num_requests_completed * 0.001 / seconds
+    key = f"{language}_{workload}_{value_size}"
+    data[key].append(kops_per_sec)
+
+print(r"""
+\begin{figure}
+  \begin{tikzpicture}
+  \centering
+  \begin{axis}[
+        ybar,
+        ymin = 0,
+        ymax = 6,
+        height=4.5cm, width=\columnwidth,
+        legend image code/.code={ \draw [#1] (0cm,-0.1cm) rectangle (0.1cm,0.2cm); },
+        legend style={at={(0.275, 0.95)}},
+        bar width=0.4cm,
+        ymajorgrids, tick align=inside,
+        enlarge y limits={value=.1,upper},
+        axis x line*=bottom,
+        axis y line*=left,
+        x tick label style={rotate=10,anchor=east,xshift=16pt,yshift=-8pt,font=\scriptsize},
+        tickwidth=0pt,
+        enlarge x limits=true,
+        xlabel={Workload type, bytes per value},
+        ylabel={Throughput (kop/s)},
+        symbolic x coords={
+           Get 128,Get 256,Get 512,Set 128,Set 256,Set 512
+        },
+       xtick=data
+    ]
+""")
+
+for language in ['dafny', 'verus']:
+    printable_language = "Verus" if language == "verus" else "IronFleet"
+    printable_color = "teal" if language == "verus" else "red"
+    printable_pattern = "" if verus else ",postaction={pattern=north east lines}"
+    print(r"\addplot [draw=none, fill=%s!100%s,error bars/.cd, y dir=both, y explicit] coordinates {" % (printable_color, printable_pattern))
+    for workload in ['g', 's']:
+        printable_workload = 'Get' if workload == 'g' else 'Set'
+        for value_size in VALUE_SIZES:
+            printable_value_size = value_size
+            key = f"{language}_{workload}_{value_size}"
+            if len(data[key]) == 0:
+                print(f"Could not find data for key {key}")
+                sys.exit(-1)
+            a = numpy.array(data[key])
+            mean = numpy.mean(a)
+            std_err = scipy.stats.sem(a)
+            (conf95_l, conf95_r) = scipy.stats.t.interval(confidence=CONFIDENCE_QUANTILE, df=len(a) - 1, loc=mean, scale=std_err)
+            conf95_diff = conf95_r - mean
+            xlabel=f"{printable_workload} {printable_value_size}"
+            print(f"({xlabel},{mean}) += (0, {conf95_diff}) -= (0, {conf95_diff})")
+    print("};")
+
+print(r"""
+    \legend{IronFleet,Verus}
+  \end{axis}
+  \end{tikzpicture}
+\caption{Throughput comparison of IronFleet and Verus versions of IronSHT. Workload varies between Get/Set and in how many bytes are in each value. Each bar shows the mean of 100 trials; error bars show 95\% confidence intervals.\label{fig:ironsht-throughput-comparison}}
+\end{figure}
+""")
diff --git a/site/guide.md b/site/guide.md
@@ -7,7 +7,7 @@ title: "Verus: A Practical Foundation for Systems Verification<br/>Artifact Guid
 
 There are three sets of experiments with different technical requirements.
 
-### Set 1: verification statistics for macrobenchmarks and millibenchmarks, page table performance, mimalloc performance comparison, persistent memory log performance (emulated) — Figures 6, 7, 8, 11, 12, 13.
+### Set 1: verification statistics for macrobenchmarks and millibenchmarks, page table performance, mimalloc benchmark suite, persistent memory log performance (emulated) — Figures 6, 7, 8, 11, 12, 13.
 
 Set 1 requires Linux x86_64 (Ubuntu 22.04) with at least 8 physical cores on one CPU, although more cores may reduce scheduling noise (we recommend at least 10). Set 1 requires the Docker runtime (Docker-CE). We recommend CloudLab d6515, or if they are in short supply, CloudLab c220g2.
 
@@ -80,63 +80,100 @@ scp '<username>@<node>.cloudlab.us:/mydata/verus-sosp24-artifact/macro-stats/res
 scp '<username>@<node>.cloudlab.us:/mydata/verus-sosp24-artifact/macro-stats/results/macro-table.pdf' .
 ```
 
+**Step 3. Run the mimalloc benchmark suite**
+
+Navigate to the directory **(TODO where?)**
+
+Run:
+
+```
+./build-benchmarks-and-allocators.sh
+./compare-benchmarks.sh
+```
+
+This should only take a couple of minutes.
+
+Note many benchmarks are expected to fail, and you'll probably see indications of it
+in the intermediate output. The end will summarize the results in tabular form.
+The last table, formatted in LaTeX, only contains the benchmarks that succeeded.
+The output should resemble Figure 12.
 
 ## Set 2
 
 ### Claims
 
-**TODO.**
+Our claim is that our Verus version of IronKV has similar performance to the
+Dafny version of IronKV (also called IronSHT in some contexts) from IronFleet.
 
-### Instructions
+The methodology is that we benchmark the Dafny and Verus versions of IronKV
+using the test harness from IronFleet's repository. The experiments for the
+paper reflect a run on Windows 11 Enterprise on a 2.4 GHz Intel Core i9-10885H
+CPU 8-core laptop with 64 GB of RAM. The resulting figure (Figure 9 in the
+paper) confirms our claim.
 
-**TODO. Code from https://github.com/verus-lang/verified-ironkv?**
+These instructions describe how to generate your own version of Figure 9 on
+the Windows machine of your choice. The figure you generate will be in LaTeX
+format, in a file named `ironfleet-port-plot.tex`. Unless you use exactly the
+same type of machine we use, your results may be quantitatively different from
+ours. For example, you may find higher or lower absolute throughput. But your
+results should still (hopefully) confirm our claim of similar performance.
+
+### Instructions
 
 To run this experiment, take the following steps:
 
-* Build the IronFleet version of IronSHT.
+* Build the IronFleet version of IronKV.
     * Install `dotnet`.
     * Install `scons` with `pip install scons`.
     * Download the Dafny 3.4.0 release, including its executable, from
       `https://github.com/dafny-lang/dafny/releases/download/v3.4.0/dafny-3.4.0-x64-win.zip`.
-    * Sync to commit `2fe4dcdc323b92e93f759cc3e373521366b7f691` of the Ironclad repository at `[email protected]:microsoft/Ironclad.git`.
-	* From the `ironfleet` directory in that repository, run `scons
-      --dafny-path=<path>` where `<path>` is the path to the directory
+    * Sync to commit `2fe4dcdc323b92e93f759cc3e373521366b7f691` of the
+      Ironclad repository at `https://github.com/microsoft/Ironclad.git`.
+    * From the `ironfleet` directory in that repository, run
+      `scons --dafny-path=<path>` where `<path>` is the path to the directory
       containing the Dafny 3.4.0 executable.
-* Build the Verus version of IronSHT.
-    * Download the Verus source code from commit `96957b633471e4d5a6bc267f9bf0e31555e888db`
-      of the repo at `[email protected]:secure-foundations/verus.git`.
+* Build the Verus version of IronKV.
+    * Download the Verus source code from commit
+      `96957b633471e4d5a6bc267f9bf0e31555e888db`
+      of the repo at `https://github.com/verus-lang/verus`.
     * Build the Verus source code as the repo describes, making sure to use
       `--release` on the `vargo build`.
-    * Download the Verus version of IronSHT from commit
-      `ea501b56ef92290329ba434fb8b675a5f467de65` of the Verus systems code
-      repository at `git@github.com:verus-lang/verus-systems-code.git`.
-    * Make a small local update to that repository to make it operate on
+    * Download the Verus version of IronKV from commit
+      `ea501b56ef92290329ba434fb8b675a5f467de65` of the
+      repository at `https://github.com/verus-lang/verified-ironkv.git`.
+    * Make a small local update to that repository's code to make it operate on
       Windows, as follows:  In the file
-      `ironfleet-comparison/ironsht/csharp/IronSHTClient/Client.cs`, change
+      `ironsht/csharp/IronSHTClient/Client.cs`, change
       all references to `../liblib.so` to `lib.dll`.
-    * From the `ironfleet-comparison` directory, run `scons
-         --verus-path=<path>` where `<path>` is the path to the root directory
-      for the Verus repository.
-    * Copy the resulting `lib.dll` from the `ironfleet-comparison` directory
-      to the directory containing this `README.md` (`sys/eval/ironsht`).
+    * From the top-level directory of that repository, run
+      `scons --verus-path=<path>` where `<path>` is the path to the root
+      directory for the Verus repository. This will create a `lib.dll`
+      file in the top-level directory of that repository.
+    * Copy that `lib.dll` file to the `ironkv` subdirectory of the repository
+      for this artifact. For instance, if this file you're reading now is
+      `<path>/site/guide.md`, copy it to `<path>/ironkv/lib.dll`.
 * Prepare to run the experiment.
-    * Change directory to the directory containing this `README.md`
-      (`sys/eval/ironsht`).
-    * Generate certificates with `dotnet <path>/ironsht/bin/CreateIronServiceCerts.dll
-      outputdir=certs name=MySHT type=IronSHT addr1=127.0.0.1 port1=4001
-      addr2=127.0.0.1 port2=4002 addr3=127.0.0.1 port3=4003` where `<path>` is
-      the path to either the Dafny or Verus IronSHT code.
+    * Change directory to the `ironkv` subdirectory of the repository for
+      this artifact. For instance, if this file you're reading now is
+      `<path>/site/guide.md`, change directory to `<path>/ironkv/`.
+    * Generate certificates by running
+      `dotnet <path>/ironsht/bin/CreateIronServiceCerts.dll
+     outputdir=certs name=MySHT type=IronSHT addr1=127.0.0.1 port1=4001
+     addr2=127.0.0.1 port2=4002 addr3=127.0.0.1 port3=4003`
+      where `<path>` is the path to either the Dafny or Verus IronKV code.
     * With `pip`, install `numpy` and `scipy`.
     * Update the hardcoded paths `VERUS_PATH` and `DAFNY_PATH` in the script
       `compare.py` to match where those directories are on your machine.
     * Prepare your machine for the experiment by telling Windows to never
       sleep, by telling it to use the "best performance" power mode (to
       disable SpeedStep), and by plugging it into a real charging outlet (not
       just a USB-C connector to a monitor).
-* Run the experiment by running `python compare.py` from this directory. This will
-  overwrite the file `raw-data.txt` with its output.
-* Generate the graph by running `python gengraph.py > ..\..\paper\ironfleet-port-plot.tex`.
-  This uses the data stored in `raw-data.txt`.
+* Run the experiment by running `python compare.py` from the `ironkv`
+  subdirectory of the repository for this artifact. This will overwrite the
+  file `raw-data.txt` with its output.
+* Generate the graph by running `python gengraph.py > ironfleet-port-plot.tex`.
+  This uses the data stored in `raw-data.txt` to generate a graph, in LaTeX
+  format, in the file `ironfleet-port-plot.tex`.
 
 
 ## Set 3 - Node Replication