From cbc1e24b084560da16a093142b826d672715b3ca Mon Sep 17 00:00:00 2001 From: abbycin Date: Thu, 12 Mar 2026 08:36:22 +0800 Subject: [PATCH] Unify benchmark op counters --- docs/repro.md | 4 +-- rocksdb/main.cpp | 44 +++++++++++++++---------------- scripts/compare_baseline.py | 29 ++++++++++++++------- src/main.rs | 52 ++++++++++++++++++------------------- 4 files changed, 69 insertions(+), 60 deletions(-) diff --git a/docs/repro.md b/docs/repro.md index 89f9312..0d85f86 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -178,7 +178,7 @@ Unified schema columns include: - `durability_mode` (`relaxed` / `durable`) - `threads,key_size,value_size,prefill_keys` - `ops` -- `total_ops,ok_ops,err_ops` +- `total_op,ok_op,err_op` - `p50_us,p95_us,p99_us,p999_us` - `read_path` @@ -207,4 +207,4 @@ Only compare rows with identical: - `read_path` - read-heavy workload rows are expected to include the pre-run GC/compaction pass described above -If `err_ops > 0`, investigate that case before drawing conclusions. +If `err_op > 0`, investigate that case before drawing conclusions. diff --git a/rocksdb/main.cpp b/rocksdb/main.cpp index ffa784c..4665022 100644 --- a/rocksdb/main.cpp +++ b/rocksdb/main.cpp @@ -136,8 +136,8 @@ struct Quantiles { }; struct ThreadStats { - uint64_t total_ops = 0; - uint64_t err_ops = 0; + uint64_t total_op = 0; + uint64_t err_op = 0; std::array hist{}; }; @@ -160,9 +160,9 @@ struct ResultRow { ReadPath read_path; uint64_t warmup_secs; uint64_t measure_secs; - uint64_t total_ops; - uint64_t ok_ops; - uint64_t err_ops; + uint64_t total_op; + uint64_t ok_op; + uint64_t err_op; double ops; Quantiles quantiles; uint64_t elapsed_us; @@ -440,7 +440,7 @@ static std::string csv_escape(const std::string &v) { static const char *result_header() { return "schema_version,ts_epoch_ms,engine,workload_id,mode,durability_mode,threads,key_size,value_size,prefill_" "keys,shared_keyspace,distribution,zipf_theta,read_pct,update_pct,scan_pct,scan_len,read_path,warmup_secs," - "measure_secs,total_ops,ok_ops,err_ops,ops,p50_us,p95_us,p99_us,p999_us,elapsed_us,host,os,arch,kernel,cpu_" + "measure_secs,total_op,ok_op,err_op,ops,p50_us,p95_us,p99_us,p999_us,elapsed_us,host,os,arch,kernel,cpu_" "cores,mem_total_kb,mem_available_kb"; } @@ -450,8 +450,8 @@ static std::string result_row_csv(const ResultRow &r) { r.ts_epoch_ms, "rocksdb", csv_escape(r.workload_id), csv_escape(r.mode), durability_str(r.durability_mode), r.threads, r.key_size, r.value_size, r.prefill_keys, r.shared_keyspace, distribution_str(r.distribution), r.zipf_theta, r.read_pct, r.update_pct, - r.scan_pct, r.scan_len, read_path_str(r.read_path), r.warmup_secs, r.measure_secs, r.total_ops, - r.ok_ops, r.err_ops, static_cast(r.ops), r.quantiles.p50_us, r.quantiles.p95_us, + r.scan_pct, r.scan_len, read_path_str(r.read_path), r.warmup_secs, r.measure_secs, r.total_op, + r.ok_op, r.err_op, static_cast(r.ops), r.quantiles.p50_us, r.quantiles.p95_us, r.quantiles.p99_us, r.quantiles.p999_us, r.elapsed_us, csv_escape(r.meta.host), csv_escape(r.meta.os), csv_escape(r.meta.arch), csv_escape(r.meta.kernel), r.meta.cpu_cores, r.meta.mem_total_kb, r.meta.mem_available_kb); @@ -897,9 +897,9 @@ int main(int argc, char *argv[]) { mark_measure_start(); auto record = [&](bool ok, uint64_t us) { - stats.total_ops += 1; + stats.total_op += 1; if (!ok) { - stats.err_ops += 1; + stats.err_op += 1; } auto b = latency_bucket(us); stats.hist[b] += 1; @@ -954,21 +954,21 @@ int main(int argc, char *argv[]) { measure_begin_ns = measure_end_ns; } uint64_t elapsed_us = (measure_end_ns - measure_begin_ns) / 1000; - uint64_t total_ops = 0; - uint64_t err_ops = 0; + uint64_t total_op = 0; + uint64_t err_op = 0; std::array merged_hist{}; for (const auto &s: thread_stats) { - total_ops += s.total_ops; - err_ops += s.err_ops; + total_op += s.total_op; + err_op += s.err_op; for (size_t i = 0; i < merged_hist.size(); ++i) { merged_hist[i] += s.hist[i]; } } - auto ops = elapsed_us == 0 ? 0.0 : (static_cast(total_ops) * 1'000'000.0 / static_cast(elapsed_us)); + auto ops = elapsed_us == 0 ? 0.0 : (static_cast(total_op) * 1'000'000.0 / static_cast(elapsed_us)); - uint64_t ok_ops = total_ops >= err_ops ? (total_ops - err_ops) : 0; + uint64_t ok_op = total_op >= err_op ? (total_op - err_op) : 0; auto row = ResultRow{ .ts_epoch_ms = now_epoch_ms(), @@ -989,9 +989,9 @@ int main(int argc, char *argv[]) { .read_path = read_path.value(), .warmup_secs = effective_warmup_secs, .measure_secs = effective_measure_secs, - .total_ops = total_ops, - .ok_ops = ok_ops, - .err_ops = err_ops, + .total_op = total_op, + .ok_op = ok_op, + .err_op = err_op, .ops = ops, .quantiles = Quantiles{ @@ -1010,9 +1010,9 @@ int main(int argc, char *argv[]) { } fmt::println( - "engine=rocksdb workload={} mode={} durability={} threads={} ops={} err={} qps={} p99_us={} result_file={}", - row.workload_id, row.mode, durability_str(row.durability_mode), row.threads, row.total_ops, row.err_ops, - static_cast(row.ops), row.quantiles.p99_us, args.result_file); + "engine=rocksdb workload={} mode={} durability={} threads={} total_op={} ok_op={} err_op={} ops={} p99_us={} result_file={}", + row.workload_id, row.mode, durability_str(row.durability_mode), row.threads, row.total_op, row.ok_op, + row.err_op, static_cast(row.ops), row.quantiles.p99_us, args.result_file); delete handle; delete db; diff --git a/scripts/compare_baseline.py b/scripts/compare_baseline.py index 7277cc3..05ad7c0 100644 --- a/scripts/compare_baseline.py +++ b/scripts/compare_baseline.py @@ -18,12 +18,21 @@ def main() -> int: parser.add_argument( "--filter-errors", action="store_true", - help="Only compare rows with err_ops == 0 (default: include all rows)", + help="Only compare rows with err_op == 0 (default: include all rows)", ) args = parser.parse_args() df = pd.read_csv(args.csv_path) + legacy_columns = { + "total_ops": "total_op", + "ok_ops": "ok_op", + "err_ops": "err_op", + } + for legacy, current in legacy_columns.items(): + if legacy in df.columns and current not in df.columns: + df = df.rename(columns={legacy: current}) + required = { "engine", "workload_id", @@ -34,7 +43,7 @@ def main() -> int: "read_path", "ops", "p99_us", - "err_ops", + "err_op", } missing = required - set(df.columns) if missing: @@ -50,13 +59,13 @@ def main() -> int: ] if args.filter_errors: - base = df[df["err_ops"] == 0].copy() + base = df[df["err_op"] == 0].copy() else: base = df.copy() if base.empty: if args.filter_errors: - print("No rows with err_ops == 0, cannot compare.") + print("No rows with err_op == 0, cannot compare.") else: print("No rows found in csv, cannot compare.") return 0 @@ -64,13 +73,13 @@ def main() -> int: agg = base.groupby(keys + ["engine"], as_index=False).agg( ops=("ops", "median"), p99_us=("p99_us", "median"), - err_ops=("err_ops", "median"), + err_op=("err_op", "median"), ) piv = agg.pivot_table( index=keys, columns="engine", - values=["ops", "p99_us", "err_ops"], + values=["ops", "p99_us", "err_op"], aggfunc="first", ) piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns] @@ -81,13 +90,13 @@ def main() -> int: "ops_rocksdb", "p99_us_mace", "p99_us_rocksdb", - "err_ops_mace", - "err_ops_rocksdb", + "err_op_mace", + "err_op_rocksdb", ]: if col not in out.columns: out[col] = pd.NA - out["qps_ratio_mace_over_rocksdb"] = ( + out["ops_ratio_mace_over_rocksdb"] = ( out["ops_mace"] / out["ops_rocksdb"] ) out["p99_ratio_mace_over_rocksdb"] = out["p99_us_mace"] / out["p99_us_rocksdb"] @@ -95,7 +104,7 @@ def main() -> int: print(out.to_string(index=False)) print("\nInterpretation:") - print("- qps_ratio_mace_over_rocksdb > 1: mace has higher throughput") + print("- ops_ratio_mace_over_rocksdb > 1: mace has higher throughput") print("- p99_ratio_mace_over_rocksdb < 1: mace has lower p99 latency") return 0 diff --git a/src/main.rs b/src/main.rs index 972f7fc..e20ac06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -217,9 +217,9 @@ struct ResultRow { read_path: ReadPath, warmup_secs: u64, measure_secs: u64, - total_ops: u64, - ok_ops: u64, - err_ops: u64, + total_op: u64, + ok_op: u64, + err_op: u64, ops: f64, quantiles: Quantiles, elapsed_us: u64, @@ -228,16 +228,16 @@ struct ResultRow { #[derive(Clone, Debug)] struct ThreadStats { - total_ops: u64, - err_ops: u64, + total_op: u64, + err_op: u64, hist: [u64; LAT_BUCKETS], } impl Default for ThreadStats { fn default() -> Self { Self { - total_ops: 0, - err_ops: 0, + total_op: 0, + err_op: 0, hist: [0; LAT_BUCKETS], } } @@ -488,7 +488,7 @@ fn csv_escape(raw: &str) -> String { } fn result_header() -> &'static str { - "schema_version,ts_epoch_ms,engine,workload_id,mode,durability_mode,threads,key_size,value_size,prefill_keys,shared_keyspace,distribution,zipf_theta,read_pct,update_pct,scan_pct,scan_len,read_path,warmup_secs,measure_secs,total_ops,ok_ops,err_ops,ops,p50_us,p95_us,p99_us,p999_us,elapsed_us,host,os,arch,kernel,cpu_cores,mem_total_kb,mem_available_kb" + "schema_version,ts_epoch_ms,engine,workload_id,mode,durability_mode,threads,key_size,value_size,prefill_keys,shared_keyspace,distribution,zipf_theta,read_pct,update_pct,scan_pct,scan_len,read_path,warmup_secs,measure_secs,total_op,ok_op,err_op,ops,p50_us,p95_us,p99_us,p999_us,elapsed_us,host,os,arch,kernel,cpu_cores,mem_total_kb,mem_available_kb" } fn result_row_csv(row: &ResultRow) -> String { @@ -513,9 +513,9 @@ fn result_row_csv(row: &ResultRow) -> String { row.read_path.as_str(), row.warmup_secs, row.measure_secs, - row.total_ops, - row.ok_ops, - row.err_ops, + row.total_op, + row.ok_op, + row.err_op, row.ops, row.quantiles.p50_us, row.quantiles.p95_us, @@ -932,13 +932,13 @@ fn main() { } let mut merged_hist = [0u64; LAT_BUCKETS]; - let mut total_ops = 0u64; - let mut err_ops = 0u64; + let mut total_op = 0u64; + let mut err_op = 0u64; for h in handles { let s = h.join().unwrap(); - total_ops += s.total_ops; - err_ops += s.err_ops; + total_op += s.total_op; + err_op += s.err_op; for (i, v) in s.hist.iter().enumerate() { merged_hist[i] += *v; } @@ -950,10 +950,10 @@ fn main() { let ops = if elapsed_us == 0 { 0.0 } else { - (total_ops as f64) * 1_000_000.0 / (elapsed_us as f64) + (total_op as f64) * 1_000_000.0 / (elapsed_us as f64) }; - let ok_ops = total_ops.saturating_sub(err_ops); + let ok_op = total_op.saturating_sub(err_op); let quantiles = Quantiles { p50_us: histogram_quantile_us(&merged_hist, 0.50), @@ -982,9 +982,9 @@ fn main() { read_path, warmup_secs: effective_warmup_secs, measure_secs: effective_measure_secs, - total_ops, - ok_ops, - err_ops, + total_op, + ok_op, + err_op, ops, quantiles, elapsed_us, @@ -997,14 +997,14 @@ fn main() { } println!( - "engine=mace workload={} mode={} durability={} threads={} total_ops={} ok_ops={} err_ops={} ops={:.2} p99_us={} result_file={}", + "engine=mace workload={} mode={} durability={} threads={} total_op={} ok_op={} err_op={} ops={:.2} p99_us={} result_file={}", row.workload_id, row.mode, row.durability_mode.as_str(), row.threads, - row.total_ops, - row.ok_ops, - row.err_ops, + row.total_op, + row.ok_op, + row.err_op, row.ops, row.quantiles.p99_us, args.result_file @@ -1172,9 +1172,9 @@ fn run_one_op( }; if let Some(stats) = stats { - stats.total_ops += 1; + stats.total_op += 1; if !ok { - stats.err_ops += 1; + stats.err_op += 1; } if let Some(start) = start { let us = start.elapsed().as_micros() as u64;