Skip to content

Commit

Permalink
Merge branch 'main' of github.com:verus-lang/paper-sosp24-artifact
Browse files Browse the repository at this point in the history
  • Loading branch information
utaal committed Aug 20, 2024
2 parents 5d94832 + eede04f commit c74e7e8
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 31 deletions.
88 changes: 88 additions & 0 deletions ironkv/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
import os
import re
import subprocess
import sys
import time

VERUS_PATH = "C:/Apps/verus-systems-code/ironfleet-comparison/ironsht/bin/"
DAFNY_PATH = "C:/Apps/ironclad-microsoft/ironfleet/bin/"
RAW_DATA_PATH = "raw-data.txt"
NUM_THREADS = 10
SECONDS = 30
NUM_KEYS = 1000
NUM_EXPERIMENT_ITERATIONS = 100
CONFIDENCE_QUANTILE = 0.95
VALUE_SIZES = [ 128, 256, 512 ]

raw_data_out = open(RAW_DATA_PATH, "w")
raw_data_out.write("Language\tThreads\tSeconds\tWorkload\tNumber of keys\tValue size\tRequests completed\n")

def launch_server(verus, which_server):
server_exe = os.path.join(VERUS_PATH if verus else DAFNY_PATH, "IronSHTServer.dll")
cmd = [
"dotnet",
server_exe,
"certs/MySHT.IronSHT.service.txt",
f"certs/MySHT.IronSHT.server{which_server}.private.txt"
]
server = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
while True:
line = server.stdout.readline().decode('utf-8').strip()
if line == "[[READY]]":
return server

def measure_client(verus, workload, value_size):
server1 = launch_server(verus, 1)
server2 = launch_server(verus, 2)
server3 = launch_server(verus, 3)

client_exe = os.path.join(VERUS_PATH if verus else DAFNY_PATH, "IronSHTClient.dll")
cmd = [
"dotnet",
client_exe,
"certs/MySHT.IronSHT.service.txt",
f"nthreads={NUM_THREADS}",
f"duration={SECONDS}",
f"workload={workload}",
f"numkeys={NUM_KEYS}",
f"valuesize={value_size}"
]
client = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
num_requests_completed = 0
while True:
line = client.stdout.readline()
if not line:
break
line = line.decode('utf-8').strip()
if re.search('^#req', line):
num_requests_completed = num_requests_completed + 1
kilo_requests_per_second = num_requests_completed * 0.001 / SECONDS
raw_data_out.write("%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
"verus" if verus else "dafny",
NUM_THREADS,
SECONDS,
workload,
NUM_KEYS,
value_size,
num_requests_completed
))
raw_data_out.flush()

server1.kill()
server2.kill()
server3.kill()

return kilo_requests_per_second

def do_experiments():
for i in range(NUM_EXPERIMENT_ITERATIONS):
for verus in [True, False]:
language = ("Verus" if verus else "Dafny")
for workload in [ 'g', 's' ]:
for value_size in VALUE_SIZES:
print(f"Performing experiment #{i} for {language} with workload {workload} and value size {value_size}")
kops = measure_client(verus, workload, value_size)

do_experiments()

raw_data_out.close()
93 changes: 93 additions & 0 deletions ironkv/gengraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import collections
import numpy
import os
import re
import scipy.stats
import sys

RAW_DATA_FILE = "raw-data.txt"
VALUE_SIZES = [ 128, 256, 512 ]
CONFIDENCE_QUANTILE = 0.95

def get_header_map(header_line):
column_headings = header_line.strip().split('\t')
num_columns = len(column_headings)
which_column = {}
for col, heading in enumerate(column_headings):
which_column[heading] = col
return (which_column, num_columns)

data = collections.defaultdict(list)
raw_data_in = open(RAW_DATA_FILE, "r")
(header_map, num_columns) = get_header_map(raw_data_in.readline())

for line in raw_data_in.readlines():
values = line.strip().split('\t')
if len(values) < num_columns:
continue
language = values[header_map['Language']]
seconds = int(values[header_map['Seconds']])
workload = values[header_map['Workload']]
value_size = int(values[header_map['Value size']])
num_requests_completed = int(values[header_map['Requests completed']])
kops_per_sec = num_requests_completed * 0.001 / seconds
key = f"{language}_{workload}_{value_size}"
data[key].append(kops_per_sec)

print(r"""
\begin{figure}
\begin{tikzpicture}
\centering
\begin{axis}[
ybar,
ymin = 0,
ymax = 6,
height=4.5cm, width=\columnwidth,
legend image code/.code={ \draw [#1] (0cm,-0.1cm) rectangle (0.1cm,0.2cm); },
legend style={at={(0.275, 0.95)}},
bar width=0.4cm,
ymajorgrids, tick align=inside,
enlarge y limits={value=.1,upper},
axis x line*=bottom,
axis y line*=left,
x tick label style={rotate=10,anchor=east,xshift=16pt,yshift=-8pt,font=\scriptsize},
tickwidth=0pt,
enlarge x limits=true,
xlabel={Workload type, bytes per value},
ylabel={Throughput (kop/s)},
symbolic x coords={
Get 128,Get 256,Get 512,Set 128,Set 256,Set 512
},
xtick=data
]
""")

for language in ['dafny', 'verus']:
printable_language = "Verus" if language == "verus" else "IronFleet"
printable_color = "teal" if language == "verus" else "red"
printable_pattern = "" if verus else ",postaction={pattern=north east lines}"
print(r"\addplot [draw=none, fill=%s!100%s,error bars/.cd, y dir=both, y explicit] coordinates {" % (printable_color, printable_pattern))
for workload in ['g', 's']:
printable_workload = 'Get' if workload == 'g' else 'Set'
for value_size in VALUE_SIZES:
printable_value_size = value_size
key = f"{language}_{workload}_{value_size}"
if len(data[key]) == 0:
print(f"Could not find data for key {key}")
sys.exit(-1)
a = numpy.array(data[key])
mean = numpy.mean(a)
std_err = scipy.stats.sem(a)
(conf95_l, conf95_r) = scipy.stats.t.interval(confidence=CONFIDENCE_QUANTILE, df=len(a) - 1, loc=mean, scale=std_err)
conf95_diff = conf95_r - mean
xlabel=f"{printable_workload} {printable_value_size}"
print(f"({xlabel},{mean}) += (0, {conf95_diff}) -= (0, {conf95_diff})")
print("};")

print(r"""
\legend{IronFleet,Verus}
\end{axis}
\end{tikzpicture}
\caption{Throughput comparison of IronFleet and Verus versions of IronSHT. Workload varies between Get/Set and in how many bytes are in each value. Each bar shows the mean of 100 trials; error bars show 95\% confidence intervals.\label{fig:ironsht-throughput-comparison}}
\end{figure}
""")
99 changes: 68 additions & 31 deletions site/guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ title: "Verus: A Practical Foundation for Systems Verification<br/>Artifact Guid

There are three sets of experiments with different technical requirements.

### Set 1: verification statistics for macrobenchmarks and millibenchmarks, page table performance, mimalloc performance comparison, persistent memory log performance (emulated) — Figures 6, 7, 8, 11, 12, 13.
### Set 1: verification statistics for macrobenchmarks and millibenchmarks, page table performance, mimalloc benchmark suite, persistent memory log performance (emulated) — Figures 6, 7, 8, 11, 12, 13.

Set 1 requires Linux x86_64 (Ubuntu 22.04) with at least 8 physical cores on one CPU, although more cores may reduce scheduling noise (we recommend at least 10). Set 1 requires the Docker runtime (Docker-CE). We recommend CloudLab d6515, or if they are in short supply, CloudLab c220g2.

Expand Down Expand Up @@ -80,63 +80,100 @@ scp '<username>@<node>.cloudlab.us:/mydata/verus-sosp24-artifact/macro-stats/res
scp '<username>@<node>.cloudlab.us:/mydata/verus-sosp24-artifact/macro-stats/results/macro-table.pdf' .
```

**Step 3. Run the mimalloc benchmark suite**

Navigate to the directory **(TODO where?)**

Run:

```
./build-benchmarks-and-allocators.sh
./compare-benchmarks.sh
```

This should only take a couple of minutes.

Note many benchmarks are expected to fail, and you'll probably see indications of it
in the intermediate output. The end will summarize the results in tabular form.
The last table, formatted in LaTeX, only contains the benchmarks that succeeded.
The output should resemble Figure 12.

## Set 2

### Claims

**TODO.**
Our claim is that our Verus version of IronKV has similar performance to the
Dafny version of IronKV (also called IronSHT in some contexts) from IronFleet.

### Instructions
The methodology is that we benchmark the Dafny and Verus versions of IronKV
using the test harness from IronFleet's repository. The experiments for the
paper reflect a run on Windows 11 Enterprise on a 2.4 GHz Intel Core i9-10885H
CPU 8-core laptop with 64 GB of RAM. The resulting figure (Figure 9 in the
paper) confirms our claim.

**TODO. Code from https://github.com/verus-lang/verified-ironkv?**
These instructions describe how to generate your own version of Figure 9 on
the Windows machine of your choice. The figure you generate will be in LaTeX
format, in a file named `ironfleet-port-plot.tex`. Unless you use exactly the
same type of machine we use, your results may be quantitatively different from
ours. For example, you may find higher or lower absolute throughput. But your
results should still (hopefully) confirm our claim of similar performance.

### Instructions

To run this experiment, take the following steps:

* Build the IronFleet version of IronSHT.
* Build the IronFleet version of IronKV.
* Install `dotnet`.
* Install `scons` with `pip install scons`.
* Download the Dafny 3.4.0 release, including its executable, from
`https://github.com/dafny-lang/dafny/releases/download/v3.4.0/dafny-3.4.0-x64-win.zip`.
* Sync to commit `2fe4dcdc323b92e93f759cc3e373521366b7f691` of the Ironclad repository at `[email protected]:microsoft/Ironclad.git`.
* From the `ironfleet` directory in that repository, run `scons
--dafny-path=<path>` where `<path>` is the path to the directory
* Sync to commit `2fe4dcdc323b92e93f759cc3e373521366b7f691` of the
Ironclad repository at `https://github.com/microsoft/Ironclad.git`.
* From the `ironfleet` directory in that repository, run
`scons --dafny-path=<path>` where `<path>` is the path to the directory
containing the Dafny 3.4.0 executable.
* Build the Verus version of IronSHT.
* Download the Verus source code from commit `96957b633471e4d5a6bc267f9bf0e31555e888db`
of the repo at `[email protected]:secure-foundations/verus.git`.
* Build the Verus version of IronKV.
* Download the Verus source code from commit
`96957b633471e4d5a6bc267f9bf0e31555e888db`
of the repo at `https://github.com/verus-lang/verus`.
* Build the Verus source code as the repo describes, making sure to use
`--release` on the `vargo build`.
* Download the Verus version of IronSHT from commit
`ea501b56ef92290329ba434fb8b675a5f467de65` of the Verus systems code
repository at `git@github.com:verus-lang/verus-systems-code.git`.
* Make a small local update to that repository to make it operate on
* Download the Verus version of IronKV from commit
`ea501b56ef92290329ba434fb8b675a5f467de65` of the
repository at `https://github.com/verus-lang/verified-ironkv.git`.
* Make a small local update to that repository's code to make it operate on
Windows, as follows: In the file
`ironfleet-comparison/ironsht/csharp/IronSHTClient/Client.cs`, change
`ironsht/csharp/IronSHTClient/Client.cs`, change
all references to `../liblib.so` to `lib.dll`.
* From the `ironfleet-comparison` directory, run `scons
--verus-path=<path>` where `<path>` is the path to the root directory
for the Verus repository.
* Copy the resulting `lib.dll` from the `ironfleet-comparison` directory
to the directory containing this `README.md` (`sys/eval/ironsht`).
* From the top-level directory of that repository, run
`scons --verus-path=<path>` where `<path>` is the path to the root
directory for the Verus repository. This will create a `lib.dll`
file in the top-level directory of that repository.
* Copy that `lib.dll` file to the `ironkv` subdirectory of the repository
for this artifact. For instance, if this file you're reading now is
`<path>/site/guide.md`, copy it to `<path>/ironkv/lib.dll`.
* Prepare to run the experiment.
* Change directory to the directory containing this `README.md`
(`sys/eval/ironsht`).
* Generate certificates with `dotnet <path>/ironsht/bin/CreateIronServiceCerts.dll
outputdir=certs name=MySHT type=IronSHT addr1=127.0.0.1 port1=4001
addr2=127.0.0.1 port2=4002 addr3=127.0.0.1 port3=4003` where `<path>` is
the path to either the Dafny or Verus IronSHT code.
* Change directory to the `ironkv` subdirectory of the repository for
this artifact. For instance, if this file you're reading now is
`<path>/site/guide.md`, change directory to `<path>/ironkv/`.
* Generate certificates by running
`dotnet <path>/ironsht/bin/CreateIronServiceCerts.dll
outputdir=certs name=MySHT type=IronSHT addr1=127.0.0.1 port1=4001
addr2=127.0.0.1 port2=4002 addr3=127.0.0.1 port3=4003`
where `<path>` is the path to either the Dafny or Verus IronKV code.
* With `pip`, install `numpy` and `scipy`.
* Update the hardcoded paths `VERUS_PATH` and `DAFNY_PATH` in the script
`compare.py` to match where those directories are on your machine.
* Prepare your machine for the experiment by telling Windows to never
sleep, by telling it to use the "best performance" power mode (to
disable SpeedStep), and by plugging it into a real charging outlet (not
just a USB-C connector to a monitor).
* Run the experiment by running `python compare.py` from this directory. This will
overwrite the file `raw-data.txt` with its output.
* Generate the graph by running `python gengraph.py > ..\..\paper\ironfleet-port-plot.tex`.
This uses the data stored in `raw-data.txt`.
* Run the experiment by running `python compare.py` from the `ironkv`
subdirectory of the repository for this artifact. This will overwrite the
file `raw-data.txt` with its output.
* Generate the graph by running `python gengraph.py > ironfleet-port-plot.tex`.
This uses the data stored in `raw-data.txt` to generate a graph, in LaTeX
format, in the file `ironfleet-port-plot.tex`.


## Set 3 - Node Replication
Expand Down

0 comments on commit c74e7e8

Please sign in to comment.