From f0044d1d62588bc0cfd83ca0dd6f7c6c6d83d268 Mon Sep 17 00:00:00 2001 From: abbycin Date: Mon, 9 Mar 2026 12:26:34 +0800 Subject: [PATCH] Clarify workloads and comparison filtering --- README.md | 16 +++++++++------- scripts/compare_baseline.py | 25 ++++++++++++++++++++----- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c3573ca..b40518c 100644 --- a/README.md +++ b/README.md @@ -47,14 +47,16 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r - Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path` - Fairness rule for read-heavy workloads: `get`, `scan`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs - Throughput metric: workload-level `ops_per_sec` (higher is better) - - `W1/W2/W3/W4`: mixed read+update throughput - - `W5`: mixed read+update+scan throughput - - `W6`: scan throughput (counted by scan requests, not scanned key count) - Tail latency metric: workload-level `p99_us` (lower is better) - - This is the mixed p99 of all operations executed in that workload row, not per-op-type p99 - - `W1/W2/W3/W4`: mixed read+update p99 - - `W5`: mixed read+update+scan p99 - - `W6`: scan p99 + - This is the workload-level p99 of all operations executed in that row, not per-op-type p99 + +## Workloads +- `W1`: `95%` read + `5%` update, uniform distribution +- `W2`: `95%` read + `5%` update, Zipf distribution +- `W3`: `50%` read + `50%` update, uniform distribution +- `W4`: `5%` read + `95%` update, uniform distribution +- `W5`: `70%` read + `25%` update + `5%` scan, uniform distribution +- `W6`: `100%` scan, uniform distribution; throughput is counted by scan requests, not scanned key count Raw CSV path: `./scripts/benchmark_results.csv` diff --git a/scripts/compare_baseline.py b/scripts/compare_baseline.py index 69a2838..de10864 100644 --- a/scripts/compare_baseline.py +++ b/scripts/compare_baseline.py @@ -15,6 +15,11 @@ def main() -> int: default="./scripts/benchmark_results.csv", help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)", ) + parser.add_argument( + "--filter-errors", + action="store_true", + help="Only compare rows with error_ops == 0 (default: include all rows)", + ) args = parser.parse_args() df = pd.read_csv(args.csv_path) @@ -44,20 +49,28 @@ def main() -> int: "read_path", ] - ok = df[df["error_ops"] == 0].copy() - if ok.empty: - print("No rows with error_ops == 0, cannot compare.") + if args.filter_errors: + base = df[df["error_ops"] == 0].copy() + else: + base = df.copy() + + if base.empty: + if args.filter_errors: + print("No rows with error_ops == 0, cannot compare.") + else: + print("No rows found in csv, cannot compare.") return 0 - agg = ok.groupby(keys + ["engine"], as_index=False).agg( + agg = base.groupby(keys + ["engine"], as_index=False).agg( ops_per_sec=("ops_per_sec", "median"), p99_us=("p99_us", "median"), + error_ops=("error_ops", "median"), ) piv = agg.pivot_table( index=keys, columns="engine", - values=["ops_per_sec", "p99_us"], + values=["ops_per_sec", "p99_us", "error_ops"], aggfunc="first", ) piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns] @@ -68,6 +81,8 @@ def main() -> int: "ops_per_sec_rocksdb", "p99_us_mace", "p99_us_rocksdb", + "error_ops_mace", + "error_ops_rocksdb", ]: if col not in out.columns: out[col] = pd.NA