From 6b8fcffd00ad220439f892d72af8bfc325d54950 Mon Sep 17 00:00:00 2001 From: abbycin Date: Wed, 4 Mar 2026 23:47:40 +0800 Subject: [PATCH] docs: clarify comparison semantics and add baseline compare script --- README.md | 38 ++++++++++------ docs/repro.md | 8 ++-- scripts/compare_baseline.py | 90 +++++++++++++++++++++++++++++++++++++ 3 files changed, 119 insertions(+), 17 deletions(-) create mode 100644 scripts/compare_baseline.py diff --git a/README.md b/README.md index 96bbb86..16266a2 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # kv_bench (Mace vs RocksDB) -Quick start for reproducible comparison. Full guide: [docs/repro.md](./docs/repro.md). +Quick start for reproducible Mace vs RocksDB comparison. Full guide: [docs/repro.md](./docs/repro.md). ## 5-Minute Quickstart 1. Set your storage root (any mount path, not hardcoded to `/nvme`): @@ -13,13 +13,13 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}" 2. Initialize Python env once: ```bash -cd /home/abby/kv_bench/scripts +cd "$HOME/kv_bench/scripts" ./init.sh source ./bin/activate -cd /home/abby/kv_bench +cd "$HOME/kv_bench" ``` -3. Run baseline comparison (both engines write to the same CSV): +3. Run baseline comparison (both engines append to the same CSV): ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks" @@ -29,20 +29,32 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r ./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" ./scripts/benchmark_results.csv ``` -4. View and plot results: +4. Plot results: ```bash ./scripts/bin/python ./scripts/plot.py ./scripts/benchmark_results.csv ./scripts ``` -## Fast Result Reading -- Raw input CSV: `./scripts/benchmark_results.csv` -- Key columns: - - `engine` (`mace` / `rocksdb`) - - `workload_id` (`W1..W6`) - - `ops_per_sec` (higher is better) - - `p99_us` (lower is better) - - `error_ops` (must be 0 before drawing conclusions) +5. Print a direct comparison table from the CSV: + +```bash +./scripts/bin/python ./scripts/compare_baseline.py ./scripts/benchmark_results.csv +``` + +## What Is Compared +- Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path` +- Throughput metric: workload-level `ops_per_sec` (higher is better) + - `W1/W2/W3/W4`: mixed read+update throughput + - `W5`: mixed read+update+scan throughput + - `W6`: scan throughput (counted by scan requests, not scanned key count) +- Tail latency metric: workload-level `p99_us` (lower is better) + - This is the mixed p99 of all operations executed in that workload row, not per-op-type p99 + - `W1/W2/W3/W4`: mixed read+update p99 + - `W5`: mixed read+update+scan p99 + - `W6`: scan p99 +- Reliability gate: if `error_ops > 0`, debug that case before drawing conclusions + +Raw CSV path: `./scripts/benchmark_results.csv` ## Phase Reports - Phase 1 (stability CV): diff --git a/docs/repro.md b/docs/repro.md index c9c4cc6..ca596d6 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -22,8 +22,8 @@ For the local profile, assume approximately: Before running, set paths and verify filesystem type/capacity: ```bash -export KV_BENCH_STORAGE_ROOT=/home/abby/kv_bench/target/repro_storage -export KV_BENCH_RESULT_ROOT=/home/abby/kv_bench/target/repro_results +export KV_BENCH_STORAGE_ROOT="$HOME/kv_bench/target/repro_storage" +export KV_BENCH_RESULT_ROOT="$HOME/kv_bench/target/repro_results" mkdir -p "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}" df -hT "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}" @@ -36,10 +36,10 @@ Requirements: ## 3. Initialization ```bash -cd /home/abby/kv_bench/scripts +cd "$HOME/kv_bench/scripts" ./init.sh source ./bin/activate -cd /home/abby/kv_bench +cd "$HOME/kv_bench" ``` ## 4. Quick Baseline (W1~W6) diff --git a/scripts/compare_baseline.py b/scripts/compare_baseline.py new file mode 100644 index 0000000..69a2838 --- /dev/null +++ b/scripts/compare_baseline.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 + +import argparse +import sys +import pandas as pd + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Compare mace vs rocksdb from benchmark_results.csv" + ) + parser.add_argument( + "csv_path", + nargs="?", + default="./scripts/benchmark_results.csv", + help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)", + ) + args = parser.parse_args() + + df = pd.read_csv(args.csv_path) + + required = { + "engine", + "workload_id", + "threads", + "key_size", + "value_size", + "durability_mode", + "read_path", + "ops_per_sec", + "p99_us", + "error_ops", + } + missing = required - set(df.columns) + if missing: + raise ValueError(f"Missing columns in csv: {sorted(missing)}") + + keys = [ + "workload_id", + "threads", + "key_size", + "value_size", + "durability_mode", + "read_path", + ] + + ok = df[df["error_ops"] == 0].copy() + if ok.empty: + print("No rows with error_ops == 0, cannot compare.") + return 0 + + agg = ok.groupby(keys + ["engine"], as_index=False).agg( + ops_per_sec=("ops_per_sec", "median"), + p99_us=("p99_us", "median"), + ) + + piv = agg.pivot_table( + index=keys, + columns="engine", + values=["ops_per_sec", "p99_us"], + aggfunc="first", + ) + piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns] + out = piv.reset_index() + + for col in [ + "ops_per_sec_mace", + "ops_per_sec_rocksdb", + "p99_us_mace", + "p99_us_rocksdb", + ]: + if col not in out.columns: + out[col] = pd.NA + + out["qps_ratio_mace_over_rocksdb"] = ( + out["ops_per_sec_mace"] / out["ops_per_sec_rocksdb"] + ) + out["p99_ratio_mace_over_rocksdb"] = out["p99_us_mace"] / out["p99_us_rocksdb"] + out = out.sort_values(keys) + + print(out.to_string(index=False)) + print("\nInterpretation:") + print("- qps_ratio_mace_over_rocksdb > 1: mace has higher throughput") + print("- p99_ratio_mace_over_rocksdb < 1: mace has lower p99 latency") + + return 0 + + +if __name__ == "__main__": + sys.exit(main())