diff --git a/README.md b/README.md index cabf4ad..f1dc6de 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r ## What Is Compared - Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path` +- Fairness rule for read-heavy workloads: `get`, `scan`, `mixed`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs - Throughput metric: workload-level `ops_per_sec` (higher is better) - `W1/W2/W3/W4`: mixed read+update throughput - `W5`: mixed read+update+scan throughput diff --git a/docs/repro.md b/docs/repro.md index 31ffeee..073288b 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -63,6 +63,10 @@ WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \ ./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" ``` +Fairness note: +- For read-heavy workloads (`get`, `scan`, `mixed`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement. +- The purpose is to make the RocksDB vs Mace comparison fairer, since RocksDB reads may need to touch multiple SSTs and should not be benchmarked with GC/compaction artificially disabled. + Generate plots: ```bash @@ -202,5 +206,6 @@ Only compare rows with identical: - `threads` - `durability_mode` - `read_path` +- read-heavy workload rows are expected to include the pre-run GC/compaction pass described above If `error_ops > 0`, investigate that case before drawing conclusions. diff --git a/rocksdb/main.cpp b/rocksdb/main.cpp index 0df0aed..1310bc0 100644 --- a/rocksdb/main.cpp +++ b/rocksdb/main.cpp @@ -289,6 +289,22 @@ static std::optional parse_workload(const Args &args, std::string return std::nullopt; } +static bool workload_runs_gc(const WorkloadSpec &spec) { + return spec.requires_prefill; +} + +static void run_prefill_gc(rocksdb::OptimisticTransactionDB *db, + rocksdb::ColumnFamilyHandle *handle) { + require_ok(db->EnableAutoCompaction({handle}), "enable auto compaction"); + + rocksdb::FlushOptions flush_options; + flush_options.wait = true; + require_ok(db->Flush(flush_options, handle), "prefill flush"); + + rocksdb::CompactRangeOptions compact_options; + require_ok(db->CompactRange(compact_options, handle, nullptr, nullptr), "prefill compaction"); +} + static std::vector split_ranges(size_t total, size_t n) { std::vector out; out.reserve(n); @@ -801,10 +817,6 @@ int main(int argc, char *argv[]) { cfo.enable_blob_files = true; cfo.min_blob_size = args.blob_size; cfo.disable_auto_compactions = true; - cfo.max_compaction_bytes = (1ULL << 60); - cfo.level0_stop_writes_trigger = 1000000; - cfo.level0_slowdown_writes_trigger = 1000000; - cfo.level0_file_num_compaction_trigger = 1000000; cfo.write_buffer_size = 64 << 20; cfo.max_write_buffer_number = 128; @@ -822,6 +834,7 @@ int main(int argc, char *argv[]) { options.enable_pipelined_write = true; options.max_background_flushes = 8; options.env->SetBackgroundThreads(8, rocksdb::Env::Priority::HIGH); + options.env->SetBackgroundThreads(8, rocksdb::Env::Priority::LOW); auto wopt = rocksdb::WriteOptions(); wopt.no_slowdown = true; @@ -871,6 +884,10 @@ int main(int argc, char *argv[]) { } } + if (workload_runs_gc(workload_spec)) { + run_prefill_gc(db, handle); + } + std::barrier ready_barrier(static_cast(args.threads + 1)); std::barrier measure_barrier(static_cast(args.threads + 1)); diff --git a/scripts/fast_test.py b/scripts/fast_test.py index 942b8a2..d922610 100644 --- a/scripts/fast_test.py +++ b/scripts/fast_test.py @@ -144,7 +144,7 @@ def run_engine_cases( f"[run] engine={engine} mode={mode_display} " f"threads={threads} key={key_size} value={value_size}" ) - print(f"{' '.join(args)}") + # print(f"{' '.join(args)}") subprocess.run(args, check=True) diff --git a/src/main.rs b/src/main.rs index e91a005..e93a06c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -378,6 +378,10 @@ fn parse_workload(args: &Args) -> Result { } } +fn workload_runs_gc(spec: &WorkloadSpec) -> bool { + spec.requires_prefill +} + fn split_ranges(total: usize, n: usize) -> Vec { let mut ranges = Vec::with_capacity(n); if n == 0 { @@ -775,6 +779,11 @@ fn main() { } } + if workload_runs_gc(&workload) { + db.enable_gc(); + db.start_gc(); + } + let op_counts = split_ranges(args.iterations, args.threads); let ready_barrier = Arc::new(Barrier::new(args.threads + 1)); let measure_barrier = Arc::new(Barrier::new(args.threads + 1));