diff --git a/README.md b/README.md index 4a50d25..c3573ca 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r ## What Is Compared - Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path` -- Fairness rule for read-heavy workloads: `get`, `scan`, `mixed`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs +- Fairness rule for read-heavy workloads: `get`, `scan`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs - Throughput metric: workload-level `ops_per_sec` (higher is better) - `W1/W2/W3/W4`: mixed read+update throughput - `W5`: mixed read+update+scan throughput diff --git a/docs/repro.md b/docs/repro.md index 26f02e4..4d44bc9 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -64,7 +64,7 @@ WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \ ``` Fairness note: -- For read-heavy workloads (`get`, `scan`, `mixed`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement. +- For read-heavy workloads (`get`, `scan`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement. - The purpose is to make the RocksDB vs Mace comparison fairer, since RocksDB reads may need to touch multiple SSTs and should not be benchmarked with GC/compaction artificially disabled. Generate plots: diff --git a/rocksdb/main.cpp b/rocksdb/main.cpp index 1310bc0..0b47574 100644 --- a/rocksdb/main.cpp +++ b/rocksdb/main.cpp @@ -68,7 +68,6 @@ struct Args { size_t key_size = 16; size_t value_size = 1024; size_t blob_size = 8192; - size_t insert_ratio = 30; bool random = false; std::string mode = "insert"; std::optional workload; @@ -277,15 +276,10 @@ static std::optional parse_workload(const Args &args, std::string if (mode == "get") { return WorkloadSpec{"LEGACY_GET", "get", Distribution::Uniform, 100, 0, 0, args.scan_len, true, false}; } - if (mode == "mixed") { - return WorkloadSpec{"LEGACY_MIXED", "mixed", Distribution::Uniform, - static_cast(100 - args.insert_ratio), - static_cast(args.insert_ratio), 0, args.scan_len, true, false}; - } if (mode == "scan") { return WorkloadSpec{"LEGACY_SCAN", "scan", Distribution::Uniform, 0, 0, 100, args.scan_len, true, false}; } - err = fmt::format("invalid mode `{}` (supported: insert/get/mixed/scan)", args.mode); + err = fmt::format("invalid mode `{}` (supported: insert/get/scan)", args.mode); return std::nullopt; } @@ -718,14 +712,13 @@ int main(int argc, char *argv[]) { bool disable_cleanup = false; std::string workload; - app.add_option("-m,--mode", args.mode, "Mode: insert, get, mixed, scan"); + app.add_option("-m,--mode", args.mode, "Mode: insert, get, scan"); app.add_option("--workload", workload, "Workload preset: W1..W6"); app.add_option("-t,--threads", args.threads, "Threads"); app.add_option("-k,--key-size", args.key_size, "Key Size"); app.add_option("-v,--value-size", args.value_size, "Value Size"); app.add_option("-b,--blob-size", args.blob_size, "Blob Size"); app.add_option("-i,--iterations", args.iterations, "Iterations"); - app.add_option("-r,--insert-ratio", args.insert_ratio, "Update ratio for legacy mixed mode"); app.add_option("-p,--path", args.path, "Database path"); app.add_option("--prefill-keys", args.prefill_keys, "Prefill key count"); app.add_option("--warmup-secs", args.warmup_secs, "Warmup duration seconds"); @@ -765,10 +758,6 @@ int main(int argc, char *argv[]) { fmt::println(stderr, "key_size and value_size must be >= 16"); return 1; } - if (args.insert_ratio > 100) { - fmt::println(stderr, "insert_ratio must be in [0,100]"); - return 1; - } if (!(args.zipf_theta > 0.0 && args.zipf_theta < 1.0)) { fmt::println(stderr, "zipf_theta must be in (0,1)"); return 1; diff --git a/scripts/fast_test.py b/scripts/fast_test.py index d922610..6cf71a6 100644 --- a/scripts/fast_test.py +++ b/scripts/fast_test.py @@ -38,7 +38,6 @@ ENGINE_ORDER = ("mace", "rocksdb") MODE_PLAN = ( ("put", "insert"), ("get", "get"), - ("mixed", "mixed"), ("scan", "scan"), ) KV_PROFILES = ( @@ -98,7 +97,6 @@ def run_engine_cases( prefill_keys: int, read_path: str, durability: str, - insert_ratio: int, ) -> None: for mode_display, mode_cli in MODE_PLAN: for threads in thread_points: @@ -135,8 +133,6 @@ def run_engine_cases( if mode_cli != "insert": args.extend(["--prefill-keys", str(prefill_keys)]) - if mode_cli == "mixed": - args.extend(["--insert-ratio", str(insert_ratio)]) if engine == "mace": args.append("--shared-keyspace") @@ -173,9 +169,7 @@ def annotate_points(x_values: Sequence[int], y_values: Sequence[float], y_max: f ) -def mode_title(mode_display: str, insert_ratio: int) -> str: - if mode_display == "mixed": - return f"Mixed ({100 - insert_ratio}% Get, {insert_ratio}% Put)" +def mode_title(mode_display: str) -> str: return mode_display.capitalize() @@ -184,7 +178,6 @@ def plot_results( result_csv: Path, output_dir: Path, thread_points: Sequence[int], - insert_ratio: int, ) -> list[Path]: df = pd.read_csv(result_csv) required = {"engine", "mode", "threads", "key_size", "value_size", "ops_per_sec"} @@ -246,7 +239,7 @@ def plot_results( ) annotate_points(x, y, y_max, line_color) - plt.title(mode_title(mode_display, insert_ratio), fontsize=16) + plt.title(mode_title(mode_display), fontsize=16) plt.xlabel("Threads", fontsize=14) plt.ylabel("OPS/s", fontsize=14) plt.xticks(list(thread_points), fontsize=12) @@ -272,7 +265,6 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace: parser.add_argument("--measure-secs", type=int, default=10) parser.add_argument("--iterations", type=int, default=500_000) parser.add_argument("--prefill-keys", type=int, default=500_000) - parser.add_argument("--insert-ratio", type=int, default=30) parser.add_argument("--read-path", choices=("snapshot", "rw_txn"), default="snapshot") parser.add_argument("--durability", choices=("relaxed", "durable"), default="relaxed") parser.add_argument("--csv-name", default="fast_test_results.csv") @@ -328,7 +320,6 @@ def main(argv: Sequence[str]) -> int: prefill_keys=args.prefill_keys, read_path=args.read_path, durability=args.durability, - insert_ratio=args.insert_ratio, ) run_engine_cases( engine="rocksdb", @@ -342,14 +333,12 @@ def main(argv: Sequence[str]) -> int: prefill_keys=args.prefill_keys, read_path=args.read_path, durability=args.durability, - insert_ratio=args.insert_ratio, ) outputs = plot_results( result_csv=result_csv, output_dir=script_dir, thread_points=thread_points, - insert_ratio=args.insert_ratio, ) print("[done] generated charts:") for p in outputs: diff --git a/src/main.rs b/src/main.rs index e93a06c..e7d76b0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -48,9 +48,6 @@ struct Args { #[arg(short = 'i', long, default_value_t = 10000)] iterations: usize, - #[arg(short = 'r', long, default_value_t = 30)] - insert_ratio: u8, - #[arg(long, default_value_t = false)] random: bool, @@ -349,17 +346,6 @@ fn parse_workload(args: &Args) -> Result { requires_prefill: true, insert_only: false, }), - "mixed" => Ok(WorkloadSpec { - id: "LEGACY_MIXED".into(), - mode_label: "mixed".into(), - distribution: Distribution::Uniform, - read_pct: 100u8.saturating_sub(args.insert_ratio), - update_pct: args.insert_ratio, - scan_pct: 0, - scan_len: args.scan_len, - requires_prefill: true, - insert_only: false, - }), "scan" => Ok(WorkloadSpec { id: "LEGACY_SCAN".into(), mode_label: "scan".into(), @@ -372,7 +358,7 @@ fn parse_workload(args: &Args) -> Result { insert_only: false, }), _ => Err(format!( - "invalid mode `{}` (supported: insert, get, mixed, scan)", + "invalid mode `{}` (supported: insert, get, scan)", args.mode )), } @@ -659,10 +645,6 @@ fn main() { eprintln!("key_size and value_size must be >= 16"); exit(1); } - if args.insert_ratio > 100 { - eprintln!("insert ratio must be between 0 and 100"); - exit(1); - } if !(0.0..1.0).contains(&args.zipf_theta) { eprintln!("zipf_theta must be in range (0, 1)"); exit(1);