bench: align mace/rocksdb runners and reproducible local profile

2026-03-04 23:10:05 +08:00 · 2026-03-04 23:10:05 +08:00 · 4ba25a9ad0
commit 4ba25a9ad0
parent d5b32fd590
11 changed files with 272 additions and 185 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -4,7 +4,7 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
-mace-kv = "0.0.27"
+mace-kv = { path = "/home/workspace/gits/github/mace" }
 clap = { version = "4.5.48", features = ["derive"] }
 rand = "0.9.2"
 log = "0.4.22"
--- a/docs/repro.md
+++ b/docs/repro.md
@ -1,25 +1,38 @@
 # kv_bench Reproduction Guide (Mace vs RocksDB)

-This repository is used to reproduce and compare `mace` and `rocksdb` benchmark results across phase0~phase4.
+This document defines a reproducible workflow for `mace` and `rocksdb` across phase0~phase4.
+It now has two profiles:
+
+- `local` (default in this doc): validated on this machine class, intended to run end-to-end without exhausting resources.
+- `full`: benchmark-refactor target matrix (much longer runtime).

 ## 1. Prerequisites
 - Linux
- A high-speed storage mount directory you choose (typically an NVMe mount point)
 - Rust/Cargo
 - CMake (to build `rocksdb_bench`)
- Python 3 (for result aggregation and plotting)
+- Python 3 (reporting/plotting)
+- A persistent storage path (NVMe/SSD recommended), **not tmpfs**

-## 2. Storage Directory Configuration (Important)
-`/nvme` is no longer hardcoded. You can use any mount directory.
+## 2. Hardware + Storage Baseline
+For the local profile, assume approximately:
+- CPU: `6C12T`
+- RAM: `32GB`
+- Disk: `100GB` available benchmark storage

-Recommended: set one shared variable first:
+Before running, set paths and verify filesystem type/capacity:

 ```bash
-export KV_BENCH_STORAGE_ROOT=/path/to/your/nvme_mount/kvbench
-mkdir -p "${KV_BENCH_STORAGE_ROOT}"
+export KV_BENCH_STORAGE_ROOT=/home/abby/kv_bench/target/repro_storage
+export KV_BENCH_RESULT_ROOT=/home/abby/kv_bench/target/repro_results
+mkdir -p "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}"
+
+df -hT "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}"
+free -h
 ```

-All scripts below take this directory (or one of its subdirectories) as the first argument.
+Requirements:
+- `KV_BENCH_STORAGE_ROOT` and `KV_BENCH_RESULT_ROOT` must not be on `tmpfs`.
+- Keep at least `25GB` free under storage root before long runs.

 ## 3. Initialization
 ```bash
@ -29,139 +42,155 @@ source ./bin/activate
 cd /home/abby/kv_bench
 ```

-## 4. Quick Baseline Comparison (W1~W6)
-Clean old data first:
+## 4. Quick Baseline (W1~W6)
+Clean old data:

 ```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks"
 mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks"
+rm -f "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
 ```

-Run both engines:
+Run both engines (`local` profile parameters):

 ```bash
-./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" ./scripts/benchmark_results.csv
-./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" ./scripts/benchmark_results.csv
+WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
+./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
+
+WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
+./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
 ```

 Generate plots:

 ```bash
-./scripts/bin/python ./scripts/plot.py ./scripts/benchmark_results.csv ./scripts
+./scripts/bin/python ./scripts/plot.py "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" "${KV_BENCH_RESULT_ROOT}"
 ```

-## 5. Phase Reproduction Commands
+## 5. Phase Reproduction

-### Phase 1
+### 5.1 Phase 1
 ```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/phase1"
 mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase1"
-./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" ./scripts/phase1_results.csv
+rm -f "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
+
+WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
+PHASE1_WORKLOADS="W1 W3 W6" \
+PHASE1_THREADS="1 6" \
+PHASE1_PROFILES="P2" \
+PHASE1_PREFILL_TIER_S_P2=200000 \
+./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
 ```

-### Phase 2
+### 5.2 Phase 2
 ```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/phase2"
 mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase2"
-./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv
+rm -f "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
+
+WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
+PHASE2_WORKLOADS_TIER_M="W1 W3 W6" \
+PHASE2_THREADS_TIER_M="1 6" \
+PHASE2_PROFILES="P2" \
+PHASE2_PREFILL_TIER_M_P2=500000 \
+RUN_TIER_L_REPRESENTATIVE=0 \
+./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
 ```

-Optional: enable tier-l representative subset:
+Optional (`full` profile tier-l representative subset):

 ```bash
 RUN_TIER_L_REPRESENTATIVE=1 TIER_L_REPEATS=1 \
-./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv
+./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
 ```

-### Phase 3
+### 5.3 Phase 3
 ```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/phase3"
 mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase3"
-./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" ./scripts/phase3_results.csv
+rm -f "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
+
+WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
+PHASE3_WORKLOADS="W1 W3" \
+PHASE3_THREADS="1 6" \
+PHASE3_DURABILITIES="relaxed durable" \
+PHASE3_KEY_SIZE=32 PHASE3_VALUE_SIZE=1024 PHASE3_PREFILL_KEYS=500000 \
+./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
 ```

-### Phase 4 (run one engine at a time)
-Mace:
+### 5.4 Phase 4 (run one engine at a time)
+`local` profile (memory-safe on 32GB machines, validated on 2026-03-04):
+
 ```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_mace"
+rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"
+
+SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \
+PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \
+SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \
+PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \
 ./scripts/phase4_soak.sh mace "${KV_BENCH_STORAGE_ROOT}/phase4_mace" \
-  ./scripts/phase4_results_mace.csv ./scripts/phase4_restart_mace.csv
-```
+  "${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"

-RocksDB:
-```bash
 rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_rocks"
+rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
+
+SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \
+PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \
+SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \
+PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \
 ./scripts/phase4_soak.sh rocksdb "${KV_BENCH_STORAGE_ROOT}/phase4_rocks" \
-  ./scripts/phase4_results_rocks.csv ./scripts/phase4_restart_rocks.csv
+  "${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
 ```

-## 6. Where Result Inputs (CSV) Are Stored
-Default output files:
- `./scripts/benchmark_results.csv`
- `./scripts/phase1_results.csv`
- `./scripts/phase2_results.csv`
- `./scripts/phase3_results.csv`
- `./scripts/phase4_results_*.csv`
- `./scripts/phase4_restart_*.csv`
+Notes:
+- The previous local phase4 parameters (`W3/W6`, larger prefill/time windows) can exceed 32GB RAM on current `mace` builds.
+- The local phase4 profile above keeps crash/restart semantics, but intentionally reduces write pressure so it completes on this host class.

-The unified schema is emitted by both engine binaries (same format for mace/rocksdb). Key columns:
- `engine`: `mace` / `rocksdb`
- `workload_id`: `W1..W6`
- `durability_mode`: `relaxed` / `durable`
- `threads,key_size,value_size,prefill_keys`: case configuration
- `ops_per_sec`: throughput
- `p50_us,p95_us,p99_us,p999_us`: latency percentiles
- `error_ops`: number of failed operations
- `read_path`: `snapshot` / `rw_txn`
+For `full` profile, remove the `PHASE4_*` overrides and use benchmark-refactor defaults (recommend `>=64GB` RAM and ample NVMe space).

-## 7. Where to Interpret Results
+## 6. Result Files
+The commands above write CSVs under `${KV_BENCH_RESULT_ROOT}`:
+- `benchmark_results.csv`
+- `phase1_results.csv`
+- `phase2_results.csv`
+- `phase3_results.csv`
+- `phase4_results_*.csv`
+- `phase4_restart_*.csv`

-### Phase 1 (stability)
+Unified schema columns include:
+- `engine` (`mace` / `rocksdb`)
+- `workload_id` (`W1..W6`)
+- `durability_mode` (`relaxed` / `durable`)
+- `threads,key_size,value_size,prefill_keys`
+- `ops_per_sec`
+- `p50_us,p95_us,p99_us,p999_us`
+- `error_ops`
+- `read_path`
+
+## 7. Report Commands
 ```bash
-./scripts/bin/python ./scripts/phase1_eval.py ./scripts/phase1_results.csv
+./scripts/bin/python ./scripts/phase1_eval.py "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
+./scripts/bin/python ./scripts/phase2_report.py "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
+./scripts/bin/python ./scripts/phase3_report.py "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
+./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"
+./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
 ```
-Check:
- `throughput_cv` (<=10%)
- `p99_cv` (<=15%)
- `stable` and overall pass ratio

-### Phase 2 (core report)
-```bash
-./scripts/bin/python ./scripts/phase2_report.py ./scripts/phase2_results.csv
-```
-Check:
- `throughput_median`
- `p95_median`, `p99_median`
- `slower_engine`, `slower_ratio`
+## 8. Full-Profile Toggle (Benchmark Refactor Matrix)
+If you want the full benchmark-refactor matrix:
+- Use default phase script matrices (no `PHASE*_*` narrowing).
+- Increase `WARMUP_SECS/MEASURE_SECS/REPEATS` to target values.
+- Enable `RUN_TIER_L_REPRESENTATIVE=1` as needed.
+- Keep large runs on persistent NVMe storage with enough free disk.

-### Phase 3 (durability cost)
-```bash
-./scripts/bin/python ./scripts/phase3_report.py ./scripts/phase3_results.csv
-```
-Check:
- `throughput_drop_pct` (durable vs relaxed throughput drop)
- `p99_inflation_pct` (durable vs relaxed p99 inflation)
-
-### Phase 4 (recovery capability)
-```bash
-./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_mace.csv
-./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_rocks.csv
-```
-Check:
- `restart_success`
- `restart_ready_ms` at `p50/p95/p99/max`
-
-## 8. CLI Configurability (No Hardcoded Disk Prefix)
- Both benchmark binaries support `--path` to set the DB directory.
- All scripts use the first argument as storage root/path.
- You can point `${KV_BENCH_STORAGE_ROOT}` to any mount point (NVMe, SSD, RAID, ephemeral disk).
-
-## 9. Comparison Best Practices
-Only compare cases under identical dimensions:
+## 9. Comparison Rules
+Only compare rows with identical:
 - `workload_id`
 - `key_size/value_size`
 - `threads`
 - `durability_mode`
 - `read_path`

-If `error_ops > 0`, investigate that case first before drawing performance conclusions.
+If `error_ops > 0`, investigate that case before drawing conclusions.
--- a/rocksdb/main.cpp
+++ b/rocksdb/main.cpp
@ -72,7 +72,7 @@ struct Args {
    bool random = false;
    std::string mode = "insert";
    std::optional<std::string> workload;
-    std::string path = "/nvme/kv_bench_rocksdb";
+    std::string path;
    bool shared_keyspace = true;
    size_t prefill_keys = 0;
    uint64_t warmup_secs = 0;
--- a/scripts/mace.sh
+++ b/scripts/mace.sh
@ -3,7 +3,7 @@
 set -euo pipefail

 if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
+    printf "Usage: %s <storage_root> [result_csv]\n" "$0"
    exit 1
 fi

@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}"
 prefill_keys="${PREFILL_KEYS:-200000}"
 read_path="${READ_PATH:-snapshot}"

-if [[ "${db_root}" != /nvme* ]]; then
-    printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
-    exit 1
-fi
-
 mkdir -p "${db_root}"
 mkdir -p "$(dirname -- "${result_file}")"

--- a/scripts/phase1.sh
+++ b/scripts/phase1.sh
@ -3,16 +3,15 @@
 set -euo pipefail

 if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
+    printf "Usage: %s <storage_root> [result_csv]\n" "$0"
    exit 1
 fi

 script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
 root_dir="$(cd -- "${script_dir}/.." && pwd)"
-
-if [[ "$1" != /nvme* ]]; then
-    printf "db_root must be under /nvme, got: %s\n" "$1" >&2
-    exit 1
+python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
+if [ ! -x "${python_bin}" ]; then
+    python_bin="${PYTHON:-python3}"
 fi

 db_root="$1"
@ -23,6 +22,12 @@ measure_secs="${MEASURE_SECS:-300}"
 repeats="${REPEATS:-3}"
 read_path="${READ_PATH:-snapshot}"

+phase1_workloads_raw="${PHASE1_WORKLOADS:-W1 W3 W6}"
+phase1_threads_raw="${PHASE1_THREADS:-1 12}"
+phase1_profiles_raw="${PHASE1_PROFILES:-P2 P3}"
+phase1_prefill_tier_s_p2="${PHASE1_PREFILL_TIER_S_P2:-6100805}"
+phase1_prefill_tier_s_p3="${PHASE1_PREFILL_TIER_S_P3:-392449}"
+
 mkdir -p "${db_root}"
 mkdir -p "$(dirname -- "${result_file}")"

@ -30,9 +35,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
 (cd "${root_dir}/rocksdb" && cmake --preset release)
 (cd "${root_dir}/rocksdb" && cmake --build --preset release)

-workloads=(W1 W3 W6)
-threads=(1 12)
-profiles=(P2 P3)
+IFS=' ' read -r -a workloads <<< "${phase1_workloads_raw}"
+IFS=' ' read -r -a threads <<< "${phase1_threads_raw}"
+IFS=' ' read -r -a profiles <<< "${phase1_profiles_raw}"
+
+if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then
+    printf "phase1 workloads/threads/profiles must not be empty\n" >&2
+    exit 1
+fi

 profile_key() {
    case "$1" in
@ -52,8 +62,8 @@ profile_val() {

 profile_prefill_tier_s() {
    case "$1" in
-        P2) echo 6100805 ;;
-        P3) echo 392449 ;;
+        P2) echo "${phase1_prefill_tier_s_p2}" ;;
+        P3) echo "${phase1_prefill_tier_s_p3}" ;;
        *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;;
    esac
 }
@ -113,5 +123,5 @@ for repeat in $(seq 1 "${repeats}"); do
    done
 done

-python3 "${script_dir}/phase1_eval.py" "${result_file}"
+"${python_bin}" "${script_dir}/phase1_eval.py" "${result_file}"
 printf "Phase 1 finished. Results: %s\n" "${result_file}"
--- a/scripts/phase2.sh
+++ b/scripts/phase2.sh
@ -3,16 +3,15 @@
 set -euo pipefail

 if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
+    printf "Usage: %s <storage_root> [result_csv]\n" "$0"
    exit 1
 fi

 script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
 root_dir="$(cd -- "${script_dir}/.." && pwd)"
-
-if [[ "$1" != /nvme* ]]; then
-    printf "db_root must be under /nvme, got: %s\n" "$1" >&2
-    exit 1
+python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
+if [ ! -x "${python_bin}" ]; then
+    python_bin="${PYTHON:-python3}"
 fi

 db_root="$1"
@ -25,6 +24,16 @@ read_path="${READ_PATH:-snapshot}"
 run_tier_l_rep="${RUN_TIER_L_REPRESENTATIVE:-0}"
 tier_l_repeats="${TIER_L_REPEATS:-1}"

+phase2_workloads_tier_m_raw="${PHASE2_WORKLOADS_TIER_M:-W1 W2 W3 W4 W6}"
+phase2_workloads_tier_l_rep_raw="${PHASE2_WORKLOADS_TIER_L_REP:-W1 W3 W6}"
+phase2_threads_tier_m_raw="${PHASE2_THREADS_TIER_M:-1 6 12}"
+phase2_threads_tier_l_rep_raw="${PHASE2_THREADS_TIER_L_REP:-1 12}"
+phase2_profiles_raw="${PHASE2_PROFILES:-P2 P3}"
+phase2_prefill_tier_m_p2="${PHASE2_PREFILL_TIER_M_P2:-18302417}"
+phase2_prefill_tier_m_p3="${PHASE2_PREFILL_TIER_M_P3:-1177348}"
+phase2_prefill_tier_l_p2="${PHASE2_PREFILL_TIER_L_P2:-28470427}"
+phase2_prefill_tier_l_p3="${PHASE2_PREFILL_TIER_L_P3:-1831430}"
+
 mkdir -p "${db_root}"
 mkdir -p "$(dirname -- "${result_file}")"

@ -32,11 +41,21 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
 (cd "${root_dir}/rocksdb" && cmake --preset release)
 (cd "${root_dir}/rocksdb" && cmake --build --preset release)

-workloads_tier_m=(W1 W2 W3 W4 W6)
-workloads_tier_l_rep=(W1 W3 W6)
-threads_tier_m=(1 6 12)
-threads_tier_l_rep=(1 12)
-profiles=(P2 P3)
+IFS=' ' read -r -a workloads_tier_m <<< "${phase2_workloads_tier_m_raw}"
+IFS=' ' read -r -a workloads_tier_l_rep <<< "${phase2_workloads_tier_l_rep_raw}"
+IFS=' ' read -r -a threads_tier_m <<< "${phase2_threads_tier_m_raw}"
+IFS=' ' read -r -a threads_tier_l_rep <<< "${phase2_threads_tier_l_rep_raw}"
+IFS=' ' read -r -a profiles <<< "${phase2_profiles_raw}"
+
+if [ "${#workloads_tier_m[@]}" -eq 0 ] || [ "${#threads_tier_m[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then
+    printf "phase2 tier-m workloads/threads/profiles must not be empty\n" >&2
+    exit 1
+fi
+
+if [ "${run_tier_l_rep}" = "1" ] && { [ "${#workloads_tier_l_rep[@]}" -eq 0 ] || [ "${#threads_tier_l_rep[@]}" -eq 0 ]; }; then
+    printf "phase2 tier-l representative workloads/threads must not be empty when enabled\n" >&2
+    exit 1
+fi

 profile_key() {
    case "$1" in
@ -59,14 +78,14 @@ prefill_for() {
    local profile="$2"
    if [ "${tier}" = "tier-m" ]; then
        case "${profile}" in
-            P2) echo 18302417 ;;
-            P3) echo 1177348 ;;
+            P2) echo "${phase2_prefill_tier_m_p2}" ;;
+            P3) echo "${phase2_prefill_tier_m_p3}" ;;
            *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
        esac
    elif [ "${tier}" = "tier-l" ]; then
        case "${profile}" in
-            P2) echo 28470427 ;;
-            P3) echo 1831430 ;;
+            P2) echo "${phase2_prefill_tier_l_p2}" ;;
+            P3) echo "${phase2_prefill_tier_l_p3}" ;;
            *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
        esac
    else
@ -146,5 +165,5 @@ if [ "${run_tier_l_rep}" = "1" ]; then
    done
 fi

-python3 "${script_dir}/phase2_report.py" "${result_file}"
+"${python_bin}" "${script_dir}/phase2_report.py" "${result_file}"
 printf "Phase 2 finished. Results: %s\n" "${result_file}"
--- a/scripts/phase3.sh
+++ b/scripts/phase3.sh
@ -3,16 +3,15 @@
 set -euo pipefail

 if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
+    printf "Usage: %s <storage_root> [result_csv]\n" "$0"
    exit 1
 fi

 script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
 root_dir="$(cd -- "${script_dir}/.." && pwd)"
-
-if [[ "$1" != /nvme* ]]; then
-    printf "db_root must be under /nvme, got: %s\n" "$1" >&2
-    exit 1
+python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
+if [ ! -x "${python_bin}" ]; then
+    python_bin="${PYTHON:-python3}"
 fi

 db_root="$1"
@ -23,6 +22,13 @@ measure_secs="${MEASURE_SECS:-300}"
 repeats="${REPEATS:-5}"
 read_path="${READ_PATH:-snapshot}"

+phase3_workloads_raw="${PHASE3_WORKLOADS:-W1 W3 W6}"
+phase3_threads_raw="${PHASE3_THREADS:-1 12}"
+phase3_durabilities_raw="${PHASE3_DURABILITIES:-relaxed durable}"
+key_size="${PHASE3_KEY_SIZE:-32}"
+value_size="${PHASE3_VALUE_SIZE:-1024}"
+prefill_keys="${PHASE3_PREFILL_KEYS:-18302417}" # tier-m P2
+
 mkdir -p "${db_root}"
 mkdir -p "$(dirname -- "${result_file}")"

@ -30,12 +36,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
 (cd "${root_dir}/rocksdb" && cmake --preset release)
 (cd "${root_dir}/rocksdb" && cmake --build --preset release)

-workloads=(W1 W3 W6)
-threads=(1 12)
-durabilities=(relaxed durable)
-key_size=32
-value_size=1024
-prefill_keys=18302417 # tier-m P2
+IFS=' ' read -r -a workloads <<< "${phase3_workloads_raw}"
+IFS=' ' read -r -a threads <<< "${phase3_threads_raw}"
+IFS=' ' read -r -a durabilities <<< "${phase3_durabilities_raw}"
+
+if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#durabilities[@]}" -eq 0 ]; then
+    printf "phase3 workloads/threads/durabilities must not be empty\n" >&2
+    exit 1
+fi

 run_case() {
    local engine="$1"
@ -91,5 +99,5 @@ for repeat in $(seq 1 "${repeats}"); do
    done
 done

-python3 "${script_dir}/phase3_report.py" "${result_file}"
+"${python_bin}" "${script_dir}/phase3_report.py" "${result_file}"
 printf "Phase 3 finished. Results: %s\n" "${result_file}"
--- a/scripts/phase3_report.py
+++ b/scripts/phase3_report.py
@ -2,6 +2,7 @@

 import sys
 import pandas as pd
+import os


 def main() -> int:
@ -23,10 +24,12 @@ def main() -> int:
    if missing:
        raise ValueError(f"Missing columns: {sorted(missing)}")

+    target_key_size = int(os.getenv("PHASE3_REPORT_KEY_SIZE", "32"))
+    target_value_size = int(os.getenv("PHASE3_REPORT_VALUE_SIZE", "1024"))
    sub = df[
        (df["workload_id"].isin(["W1", "W3", "W6"]))
-        & (df["key_size"] == 32)
-        & (df["value_size"] == 1024)
+        & (df["key_size"] == target_key_size)
+        & (df["value_size"] == target_value_size)
    ].copy()

    if sub.empty:
--- a/scripts/phase4_soak.sh
+++ b/scripts/phase4_soak.sh
@ -3,7 +3,7 @@
 set -euo pipefail

 if [ "$#" -lt 2 ] || [ "$#" -gt 4 ]; then
-    printf "Usage: %s <engine:mace|rocksdb> <db_path_under_/nvme> [result_csv] [restart_csv]\n" "$0"
+    printf "Usage: %s <engine:mace|rocksdb> <db_path> [result_csv] [restart_csv]\n" "$0"
    exit 1
 fi

@ -12,15 +12,14 @@ db_path="$2"

 script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
 root_dir="$(cd -- "${script_dir}/.." && pwd)"
+python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
+if [ ! -x "${python_bin}" ]; then
+    python_bin="${PYTHON:-python3}"
+fi

 result_file="${3:-${script_dir}/phase4_results.csv}"
 restart_file="${4:-${script_dir}/phase4_restart.csv}"

-if [[ "${db_path}" != /nvme* ]]; then
-    printf "db_path must be under /nvme, got: %s\n" "${db_path}" >&2
-    exit 1
-fi
-
 if [[ "${engine}" != "mace" && "${engine}" != "rocksdb" ]]; then
    printf "engine must be mace or rocksdb\n" >&2
    exit 1
@ -34,15 +33,17 @@ soak_hours="${SOAK_HOURS:-12}"
 crash_interval_secs="${CRASH_INTERVAL_SECS:-1800}"
 verify_measure_secs="${VERIFY_MEASURE_SECS:-30}"
 run_measure_secs="${RUN_MEASURE_SECS:-3600}"
+seed_measure_secs="${SEED_MEASURE_SECS:-5}"
 warmup_secs="${WARMUP_SECS:-30}"
+max_cycles="${PHASE4_MAX_CYCLES:-0}"

 # baseline: tier-m + W3 + P2 + 12 threads
-workload_main="W3"
-workload_verify="W6"
-threads=12
-key_size=32
-value_size=1024
-prefill_keys=18302417
+workload_main="${PHASE4_WORKLOAD_MAIN:-W3}"
+workload_verify="${PHASE4_WORKLOAD_VERIFY:-W6}"
+threads="${PHASE4_THREADS:-12}"
+key_size="${PHASE4_KEY_SIZE:-32}"
+value_size="${PHASE4_VALUE_SIZE:-1024}"
+prefill_keys="${PHASE4_PREFILL_KEYS:-18302417}"
 read_path="${READ_PATH:-snapshot}"
 durability="${DURABILITY:-relaxed}"

@ -66,6 +67,7 @@ run_cmd() {
              --warmup-secs "${warmup_secs}" \
              --measure-secs "${measure_secs}" \
              --shared-keyspace \
+              --no-cleanup \
              --read-path "${read_path}" \
              --durability "${durability}" \
              --reuse-path \
@ -82,6 +84,7 @@ run_cmd() {
              --warmup-secs "${warmup_secs}" \
              --measure-secs "${measure_secs}" \
              --shared-keyspace \
+              --no-cleanup \
              --read-path "${read_path}" \
              --durability "${durability}" \
              --reuse-path \
@ -140,6 +143,7 @@ start_run_bg() {
              --warmup-secs "${warmup_secs}" \
              --measure-secs "${measure_secs}" \
              --shared-keyspace \
+              --no-cleanup \
              --read-path "${read_path}" \
              --durability "${durability}" \
              --reuse-path \
@ -156,6 +160,7 @@ start_run_bg() {
              --warmup-secs "${warmup_secs}" \
              --measure-secs "${measure_secs}" \
              --shared-keyspace \
+              --no-cleanup \
              --read-path "${read_path}" \
              --durability "${durability}" \
              --reuse-path \
@ -204,10 +209,13 @@ fi

 # seed dataset once (with prefill)
 printf "[phase4][%s] seed dataset at %s\n" "${engine}" "${db_path}"
-run_cmd "${workload_main}" 5 0
+run_cmd "${workload_main}" "${seed_measure_secs}" 0

 cycle=0
 while [ "$(date +%s)" -lt "${end_epoch}" ]; do
+    if [ "${max_cycles}" -gt 0 ] && [ "${cycle}" -ge "${max_cycles}" ]; then
+        break
+    fi
    cycle="$((cycle + 1))"
    cycle_start="$(date +%s)"
    printf "[phase4][%s] cycle=%s start=%s\n" "${engine}" "${cycle}" "${cycle_start}"
@ -241,5 +249,5 @@ while [ "$(date +%s)" -lt "${end_epoch}" ]; do
      "${cycle}" "${cycle_start}" "${kill_sent}" "${worker_exit}" "${restart_status}" "${restart_ready_ms}" >> "${restart_file}"
 done

-python3 "${script_dir}/phase4_report.py" "${restart_file}"
+"${python_bin}" "${script_dir}/phase4_report.py" "${restart_file}"
 printf "Phase 4 soak finished. Results: %s | Restart log: %s\n" "${result_file}" "${restart_file}"
--- a/scripts/rocksdb.sh
+++ b/scripts/rocksdb.sh
@ -3,7 +3,7 @@
 set -euo pipefail

 if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
-    printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
+    printf "Usage: %s <storage_root> [result_csv]\n" "$0"
    exit 1
 fi

@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}"
 prefill_keys="${PREFILL_KEYS:-200000}"
 read_path="${READ_PATH:-snapshot}"

-if [[ "${db_root}" != /nvme* ]]; then
-    printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
-    exit 1
-fi
-
 mkdir -p "${db_root}"
 mkdir -p "$(dirname -- "${result_file}")"

--- a/src/main.rs
+++ b/src/main.rs
@ -1,4 +1,4 @@
-use clap::Parser;
+use clap::{ArgAction, Parser};
 #[cfg(target_os = "linux")]
 use logger::Logger;
 use mace::{Mace, Options};
@ -27,7 +27,7 @@ const PREFILL_BATCH: usize = 1024;
 #[derive(Parser, Debug, Clone)]
 #[command(author, version, about, long_about = None)]
 struct Args {
-    #[arg(short = 'p', long, default_value = "/nvme/kv_bench_mace")]
+    #[arg(short = 'p', long)]
    path: String,

    #[arg(short = 'm', long, default_value = "insert")]
@ -60,6 +60,9 @@ struct Args {
    #[arg(long, default_value_t = true)]
    shared_keyspace: bool,

+    #[arg(long, action = ArgAction::SetTrue)]
+    no_shared_keyspace: bool,
+
    #[arg(long, default_value_t = 0)]
    prefill_keys: usize,

@ -87,6 +90,9 @@ struct Args {
    #[arg(long, default_value_t = true)]
    cleanup: bool,

+    #[arg(long, action = ArgAction::SetTrue)]
+    no_cleanup: bool,
+
    #[arg(long, default_value_t = false)]
    skip_prefill: bool,

@ -416,8 +422,7 @@ fn make_thread_prefix(tid: usize) -> Vec<u8> {

 fn latency_bucket(us: u64) -> usize {
    let v = us.max(1);
-    let idx = (63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1);
-    idx
+    (63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1)
 }

 fn histogram_quantile_us(hist: &[u64; LAT_BUCKETS], q: f64) -> u64 {
@ -614,12 +619,14 @@ fn pick_op_kind(rng: &mut StdRng, spec: &WorkloadSpec) -> OpKind {
 fn main() {
    #[cfg(target_os = "linux")]
    {
-        Logger::init().add_file("/tmp/x.log", true);
-        log::set_max_level(log::LevelFilter::Info);
+        Logger::init().add_file("kv_bench.log", true);
+        log::set_max_level(log::LevelFilter::Error);
    }

    let args = Args::parse();
    let path = Path::new(&args.path);
+    let shared_keyspace = args.shared_keyspace && !args.no_shared_keyspace;
+    let cleanup = args.cleanup && !args.no_cleanup;

    if args.path.is_empty() {
        eprintln!("path is empty");
@ -629,6 +636,17 @@ fn main() {
        eprintln!("path {:?} already exists", args.path);
        exit(1);
    }
+    if args.skip_prefill && !args.reuse_path {
+        eprintln!("--skip-prefill requires --reuse-path");
+        exit(1);
+    }
+    if args.skip_prefill && !path.exists() {
+        eprintln!(
+            "--skip-prefill requires existing path, but `{}` does not exist",
+            args.path
+        );
+        exit(1);
+    }
    if args.threads == 0 {
        eprintln!("threads must be greater than 0");
        exit(1);
@ -673,6 +691,12 @@ fn main() {
        }
    };

+    let mixed_workload = workload.read_pct > 0 && workload.update_pct > 0;
+    if mixed_workload && !shared_keyspace {
+        eprintln!("mixed workloads require shared keyspace");
+        exit(1);
+    }
+
    let prefill_keys = if workload.requires_prefill {
        if args.prefill_keys > 0 {
            args.prefill_keys
@ -689,11 +713,6 @@ fn main() {
    }

    let thread_prefill_ranges = split_ranges(prefill_keys, args.threads);
-    let thread_op_ranges = if args.shared_keyspace {
-        thread_prefill_ranges.clone()
-    } else {
-        thread_prefill_ranges.clone()
-    };

    let mut opt = Options::new(path);
    opt.sync_on_write = durability_mode == DurabilityMode::Durable;
@ -702,7 +721,7 @@ fn main() {
    opt.data_file_size = 64 << 20;
    opt.max_log_size = 1 << 30;
    opt.default_arenas = 128;
-    opt.tmp_store = args.cleanup;
+    opt.tmp_store = cleanup;

    let db = Mace::new(opt.validate().unwrap()).unwrap();
    db.disable_gc();
@ -716,13 +735,12 @@ fn main() {
    let value = Arc::new(vec![b'0'; args.value_size]);

    if workload.requires_prefill && !args.skip_prefill {
-        let mut fill_handles = Vec::with_capacity(args.threads);
-        for tid in 0..args.threads {
+        let mut fill_handles = Vec::with_capacity(thread_prefill_ranges.len());
+        for (tid, tr) in thread_prefill_ranges.iter().copied().enumerate() {
            let bucket = bkt.clone();
            let v = value.clone();
            let key_size = args.key_size;
-            let shared = args.shared_keyspace;
-            let tr = thread_prefill_ranges[tid];
+            let shared = shared_keyspace;
            fill_handles.push(std::thread::spawn(move || {
                coreid::bind_core(tid);
                let mut in_batch = 0usize;
@ -766,22 +784,24 @@ fn main() {
            let ins_ctr = Arc::clone(&insert_counter);
            let key_size = args.key_size;
            let random_insert = args.random;
-            let read_path = read_path;
+            let read_path_mode = read_path;
            let warmup_secs = args.warmup_secs;
            let measure_secs = args.measure_secs;
            let distribution = spec.distribution;
            let zipf_theta = args.zipf_theta;
            let scan_len = spec.scan_len;
-            let shared = args.shared_keyspace;
-            let prefill_keys = prefill_keys;
-            let local_key_len = thread_op_ranges[tid].len;
+            let shared = shared_keyspace;
+            let prefill_key_count = prefill_keys;
+            let local_key_len = thread_prefill_ranges[tid].len;
            let local_op_count = op_counts[tid].len;

            std::thread::spawn(move || {
                coreid::bind_core(tid);
                let seed = (now_epoch_ms() as u64)
-                    ^ ((tid as u64 + 1) * 0x9E37_79B9_7F4A_7C15)
-                    ^ ((prefill_keys as u64) << 7);
+                    ^ (tid as u64)
+                        .wrapping_add(1)
+                        .wrapping_mul(0x9E37_79B9_7F4A_7C15)
+                    ^ (prefill_key_count as u64).wrapping_shl(7);
                let mut rng = StdRng::seed_from_u64(seed);
                let mut stats = ThreadStats::default();
                let mut local_insert_idx = 0usize;
@ -805,11 +825,11 @@ fn main() {
                            &spec,
                            distribution,
                            zipf_theta,
-                            read_path,
+                            read_path_mode,
                            key_size,
                            scan_len,
                            shared,
-                            prefill_keys,
+                            prefill_key_count,
                            local_key_len,
                            tid,
                            &ins_ctr,
@ -833,11 +853,11 @@ fn main() {
                            &spec,
                            distribution,
                            zipf_theta,
-                            read_path,
+                            read_path_mode,
                            key_size,
                            scan_len,
                            shared,
-                            prefill_keys,
+                            prefill_key_count,
                            local_key_len,
                            tid,
                            &ins_ctr,
@ -861,11 +881,11 @@ fn main() {
                            &spec,
                            distribution,
                            zipf_theta,
-                            read_path,
+                            read_path_mode,
                            key_size,
                            scan_len,
                            shared,
-                            prefill_keys,
+                            prefill_key_count,
                            local_key_len,
                            tid,
                            &ins_ctr,
@ -921,7 +941,7 @@ fn main() {
        key_size: args.key_size,
        value_size: args.value_size,
        prefill_keys,
-        shared_keyspace: args.shared_keyspace,
+        shared_keyspace,
        distribution: workload.distribution,
        zipf_theta: args.zipf_theta,
        read_pct: workload.read_pct,