Align read workload GC handling across engines

2026-03-08 21:57:09 +08:00 · 2026-03-08 21:57:09 +08:00 · 2c33c45d2c
commit 2c33c45d2c
parent ddc3f8af7e
5 changed files with 37 additions and 5 deletions
--- a/README.md
+++ b/README.md
@ -43,6 +43,7 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r
 ## What Is Compared
 - Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path`
 - Fairness rule for read-heavy workloads: `get`, `scan`, `mixed`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs
 - Throughput metric: workload-level `ops_per_sec` (higher is better)
  - `W1/W2/W3/W4`: mixed read+update throughput
  - `W5`: mixed read+update+scan throughput
--- a/docs/repro.md
+++ b/docs/repro.md
@ -63,6 +63,10 @@ WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
 ./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
 ```
 Fairness note:
 - For read-heavy workloads (`get`, `scan`, `mixed`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement.
 - The purpose is to make the RocksDB vs Mace comparison fairer, since RocksDB reads may need to touch multiple SSTs and should not be benchmarked with GC/compaction artificially disabled.
 Generate plots:
 ```bash
@ -202,5 +206,6 @@ Only compare rows with identical:
 - `threads`
 - `durability_mode`
 - `read_path`
 - read-heavy workload rows are expected to include the pre-run GC/compaction pass described above
 If `error_ops > 0`, investigate that case before drawing conclusions.
--- a/rocksdb/main.cpp
+++ b/rocksdb/main.cpp
@ -289,6 +289,22 @@ static std::optional<WorkloadSpec> parse_workload(const Args &args, std::string
    return std::nullopt;
 }
 static bool workload_runs_gc(const WorkloadSpec &spec) {
    return spec.requires_prefill;
 }
 static void run_prefill_gc(rocksdb::OptimisticTransactionDB *db,
                           rocksdb::ColumnFamilyHandle *handle) {
    require_ok(db->EnableAutoCompaction({handle}), "enable auto compaction");
    rocksdb::FlushOptions flush_options;
    flush_options.wait = true;
    require_ok(db->Flush(flush_options, handle), "prefill flush");
    rocksdb::CompactRangeOptions compact_options;
    require_ok(db->CompactRange(compact_options, handle, nullptr, nullptr), "prefill compaction");
 }
 static std::vector<ThreadRange> split_ranges(size_t total, size_t n) {
    std::vector<ThreadRange> out;
    out.reserve(n);
@ -801,10 +817,6 @@ int main(int argc, char *argv[]) {
    cfo.enable_blob_files = true;
    cfo.min_blob_size = args.blob_size;
    cfo.disable_auto_compactions = true;
    cfo.max_compaction_bytes = (1ULL << 60);
    cfo.level0_stop_writes_trigger = 1000000;
    cfo.level0_slowdown_writes_trigger = 1000000;
    cfo.level0_file_num_compaction_trigger = 1000000;
    cfo.write_buffer_size = 64 << 20;
    cfo.max_write_buffer_number = 128;
@ -822,6 +834,7 @@ int main(int argc, char *argv[]) {
    options.enable_pipelined_write = true;
    options.max_background_flushes = 8;
    options.env->SetBackgroundThreads(8, rocksdb::Env::Priority::HIGH);
    options.env->SetBackgroundThreads(8, rocksdb::Env::Priority::LOW);
    auto wopt = rocksdb::WriteOptions();
    wopt.no_slowdown = true;
@ -871,6 +884,10 @@ int main(int argc, char *argv[]) {
        }
    }
    if (workload_runs_gc(workload_spec)) {
        run_prefill_gc(db, handle);
    }
    std::barrier ready_barrier(static_cast<ptrdiff_t>(args.threads + 1));
    std::barrier measure_barrier(static_cast<ptrdiff_t>(args.threads + 1));
--- a/scripts/fast_test.py
+++ b/scripts/fast_test.py
@ -144,7 +144,7 @@ def run_engine_cases(
                    f"[run] engine={engine} mode={mode_display} "
                    f"threads={threads} key={key_size} value={value_size}"
                )
-                print(f"{' '.join(args)}")
+                # print(f"{' '.join(args)}")
                subprocess.run(args, check=True)
--- a/src/main.rs
+++ b/src/main.rs
@ -378,6 +378,10 @@ fn parse_workload(args: &Args) -> Result<WorkloadSpec, String> {
    }
 }
 fn workload_runs_gc(spec: &WorkloadSpec) -> bool {
    spec.requires_prefill
 }
 fn split_ranges(total: usize, n: usize) -> Vec<ThreadRange> {
    let mut ranges = Vec::with_capacity(n);
    if n == 0 {
@ -775,6 +779,11 @@ fn main() {
        }
    }
    if workload_runs_gc(&workload) {
        db.enable_gc();
        db.start_gc();
    }
    let op_counts = split_ranges(args.iterations, args.threads);
    let ready_barrier = Arc::new(Barrier::new(args.threads + 1));
    let measure_barrier = Arc::new(Barrier::new(args.threads + 1));