From 4ba25a9ad0a97f43f7fdcf3235f945026f2a24f6 Mon Sep 17 00:00:00 2001 From: abbycin Date: Wed, 4 Mar 2026 23:10:05 +0800 Subject: [PATCH] bench: align mace/rocksdb runners and reproducible local profile --- Cargo.toml | 2 +- docs/repro.md | 205 ++++++++++++++++++++++----------------- rocksdb/main.cpp | 2 +- scripts/mace.sh | 7 +- scripts/phase1.sh | 32 +++--- scripts/phase2.sh | 49 +++++++--- scripts/phase3.sh | 32 +++--- scripts/phase3_report.py | 7 +- scripts/phase4_soak.sh | 36 ++++--- scripts/rocksdb.sh | 7 +- src/main.rs | 78 +++++++++------ 11 files changed, 272 insertions(+), 185 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d5a492a..adcacf9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2024" [dependencies] -mace-kv = "0.0.27" +mace-kv = { path = "/home/workspace/gits/github/mace" } clap = { version = "4.5.48", features = ["derive"] } rand = "0.9.2" log = "0.4.22" diff --git a/docs/repro.md b/docs/repro.md index 3de1ec4..5df0216 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -1,25 +1,38 @@ # kv_bench Reproduction Guide (Mace vs RocksDB) -This repository is used to reproduce and compare `mace` and `rocksdb` benchmark results across phase0~phase4. +This document defines a reproducible workflow for `mace` and `rocksdb` across phase0~phase4. +It now has two profiles: + +- `local` (default in this doc): validated on this machine class, intended to run end-to-end without exhausting resources. +- `full`: benchmark-refactor target matrix (much longer runtime). ## 1. Prerequisites - Linux -- A high-speed storage mount directory you choose (typically an NVMe mount point) - Rust/Cargo - CMake (to build `rocksdb_bench`) -- Python 3 (for result aggregation and plotting) +- Python 3 (reporting/plotting) +- A persistent storage path (NVMe/SSD recommended), **not tmpfs** -## 2. Storage Directory Configuration (Important) -`/nvme` is no longer hardcoded. You can use any mount directory. +## 2. Hardware + Storage Baseline +For the local profile, assume approximately: +- CPU: `6C12T` +- RAM: `32GB` +- Disk: `100GB` available benchmark storage -Recommended: set one shared variable first: +Before running, set paths and verify filesystem type/capacity: ```bash -export KV_BENCH_STORAGE_ROOT=/path/to/your/nvme_mount/kvbench -mkdir -p "${KV_BENCH_STORAGE_ROOT}" +export KV_BENCH_STORAGE_ROOT=/home/abby/kv_bench/target/repro_storage +export KV_BENCH_RESULT_ROOT=/home/abby/kv_bench/target/repro_results +mkdir -p "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}" + +df -hT "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}" +free -h ``` -All scripts below take this directory (or one of its subdirectories) as the first argument. +Requirements: +- `KV_BENCH_STORAGE_ROOT` and `KV_BENCH_RESULT_ROOT` must not be on `tmpfs`. +- Keep at least `25GB` free under storage root before long runs. ## 3. Initialization ```bash @@ -29,139 +42,155 @@ source ./bin/activate cd /home/abby/kv_bench ``` -## 4. Quick Baseline Comparison (W1~W6) -Clean old data first: +## 4. Quick Baseline (W1~W6) +Clean old data: ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks" mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks" +rm -f "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" ``` -Run both engines: +Run both engines (`local` profile parameters): ```bash -./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" ./scripts/benchmark_results.csv -./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" ./scripts/benchmark_results.csv +WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \ +./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" + +WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \ +./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" ``` Generate plots: ```bash -./scripts/bin/python ./scripts/plot.py ./scripts/benchmark_results.csv ./scripts +./scripts/bin/python ./scripts/plot.py "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" "${KV_BENCH_RESULT_ROOT}" ``` -## 5. Phase Reproduction Commands +## 5. Phase Reproduction -### Phase 1 +### 5.1 Phase 1 ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/phase1" mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase1" -./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" ./scripts/phase1_results.csv +rm -f "${KV_BENCH_RESULT_ROOT}/phase1_results.csv" + +WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ +PHASE1_WORKLOADS="W1 W3 W6" \ +PHASE1_THREADS="1 6" \ +PHASE1_PROFILES="P2" \ +PHASE1_PREFILL_TIER_S_P2=200000 \ +./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" "${KV_BENCH_RESULT_ROOT}/phase1_results.csv" ``` -### Phase 2 +### 5.2 Phase 2 ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/phase2" mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase2" -./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv +rm -f "${KV_BENCH_RESULT_ROOT}/phase2_results.csv" + +WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ +PHASE2_WORKLOADS_TIER_M="W1 W3 W6" \ +PHASE2_THREADS_TIER_M="1 6" \ +PHASE2_PROFILES="P2" \ +PHASE2_PREFILL_TIER_M_P2=500000 \ +RUN_TIER_L_REPRESENTATIVE=0 \ +./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv" ``` -Optional: enable tier-l representative subset: +Optional (`full` profile tier-l representative subset): ```bash RUN_TIER_L_REPRESENTATIVE=1 TIER_L_REPEATS=1 \ -./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv +./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv" ``` -### Phase 3 +### 5.3 Phase 3 ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/phase3" mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase3" -./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" ./scripts/phase3_results.csv +rm -f "${KV_BENCH_RESULT_ROOT}/phase3_results.csv" + +WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ +PHASE3_WORKLOADS="W1 W3" \ +PHASE3_THREADS="1 6" \ +PHASE3_DURABILITIES="relaxed durable" \ +PHASE3_KEY_SIZE=32 PHASE3_VALUE_SIZE=1024 PHASE3_PREFILL_KEYS=500000 \ +./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" "${KV_BENCH_RESULT_ROOT}/phase3_results.csv" ``` -### Phase 4 (run one engine at a time) -Mace: +### 5.4 Phase 4 (run one engine at a time) +`local` profile (memory-safe on 32GB machines, validated on 2026-03-04): + ```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_mace" +rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv" + +SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \ +PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \ +SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \ +PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \ ./scripts/phase4_soak.sh mace "${KV_BENCH_STORAGE_ROOT}/phase4_mace" \ - ./scripts/phase4_results_mace.csv ./scripts/phase4_restart_mace.csv -``` + "${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv" -RocksDB: -```bash rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_rocks" +rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv" + +SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \ +PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \ +SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \ +PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \ ./scripts/phase4_soak.sh rocksdb "${KV_BENCH_STORAGE_ROOT}/phase4_rocks" \ - ./scripts/phase4_results_rocks.csv ./scripts/phase4_restart_rocks.csv + "${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv" ``` -## 6. Where Result Inputs (CSV) Are Stored -Default output files: -- `./scripts/benchmark_results.csv` -- `./scripts/phase1_results.csv` -- `./scripts/phase2_results.csv` -- `./scripts/phase3_results.csv` -- `./scripts/phase4_results_*.csv` -- `./scripts/phase4_restart_*.csv` +Notes: +- The previous local phase4 parameters (`W3/W6`, larger prefill/time windows) can exceed 32GB RAM on current `mace` builds. +- The local phase4 profile above keeps crash/restart semantics, but intentionally reduces write pressure so it completes on this host class. -The unified schema is emitted by both engine binaries (same format for mace/rocksdb). Key columns: -- `engine`: `mace` / `rocksdb` -- `workload_id`: `W1..W6` -- `durability_mode`: `relaxed` / `durable` -- `threads,key_size,value_size,prefill_keys`: case configuration -- `ops_per_sec`: throughput -- `p50_us,p95_us,p99_us,p999_us`: latency percentiles -- `error_ops`: number of failed operations -- `read_path`: `snapshot` / `rw_txn` +For `full` profile, remove the `PHASE4_*` overrides and use benchmark-refactor defaults (recommend `>=64GB` RAM and ample NVMe space). -## 7. Where to Interpret Results +## 6. Result Files +The commands above write CSVs under `${KV_BENCH_RESULT_ROOT}`: +- `benchmark_results.csv` +- `phase1_results.csv` +- `phase2_results.csv` +- `phase3_results.csv` +- `phase4_results_*.csv` +- `phase4_restart_*.csv` -### Phase 1 (stability) +Unified schema columns include: +- `engine` (`mace` / `rocksdb`) +- `workload_id` (`W1..W6`) +- `durability_mode` (`relaxed` / `durable`) +- `threads,key_size,value_size,prefill_keys` +- `ops_per_sec` +- `p50_us,p95_us,p99_us,p999_us` +- `error_ops` +- `read_path` + +## 7. Report Commands ```bash -./scripts/bin/python ./scripts/phase1_eval.py ./scripts/phase1_results.csv +./scripts/bin/python ./scripts/phase1_eval.py "${KV_BENCH_RESULT_ROOT}/phase1_results.csv" +./scripts/bin/python ./scripts/phase2_report.py "${KV_BENCH_RESULT_ROOT}/phase2_results.csv" +./scripts/bin/python ./scripts/phase3_report.py "${KV_BENCH_RESULT_ROOT}/phase3_results.csv" +./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv" +./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv" ``` -Check: -- `throughput_cv` (<=10%) -- `p99_cv` (<=15%) -- `stable` and overall pass ratio -### Phase 2 (core report) -```bash -./scripts/bin/python ./scripts/phase2_report.py ./scripts/phase2_results.csv -``` -Check: -- `throughput_median` -- `p95_median`, `p99_median` -- `slower_engine`, `slower_ratio` +## 8. Full-Profile Toggle (Benchmark Refactor Matrix) +If you want the full benchmark-refactor matrix: +- Use default phase script matrices (no `PHASE*_*` narrowing). +- Increase `WARMUP_SECS/MEASURE_SECS/REPEATS` to target values. +- Enable `RUN_TIER_L_REPRESENTATIVE=1` as needed. +- Keep large runs on persistent NVMe storage with enough free disk. -### Phase 3 (durability cost) -```bash -./scripts/bin/python ./scripts/phase3_report.py ./scripts/phase3_results.csv -``` -Check: -- `throughput_drop_pct` (durable vs relaxed throughput drop) -- `p99_inflation_pct` (durable vs relaxed p99 inflation) - -### Phase 4 (recovery capability) -```bash -./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_mace.csv -./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_rocks.csv -``` -Check: -- `restart_success` -- `restart_ready_ms` at `p50/p95/p99/max` - -## 8. CLI Configurability (No Hardcoded Disk Prefix) -- Both benchmark binaries support `--path` to set the DB directory. -- All scripts use the first argument as storage root/path. -- You can point `${KV_BENCH_STORAGE_ROOT}` to any mount point (NVMe, SSD, RAID, ephemeral disk). - -## 9. Comparison Best Practices -Only compare cases under identical dimensions: +## 9. Comparison Rules +Only compare rows with identical: - `workload_id` - `key_size/value_size` - `threads` - `durability_mode` - `read_path` -If `error_ops > 0`, investigate that case first before drawing performance conclusions. +If `error_ops > 0`, investigate that case before drawing conclusions. diff --git a/rocksdb/main.cpp b/rocksdb/main.cpp index 0649669..7d873e2 100644 --- a/rocksdb/main.cpp +++ b/rocksdb/main.cpp @@ -72,7 +72,7 @@ struct Args { bool random = false; std::string mode = "insert"; std::optional workload; - std::string path = "/nvme/kv_bench_rocksdb"; + std::string path; bool shared_keyspace = true; size_t prefill_keys = 0; uint64_t warmup_secs = 0; diff --git a/scripts/mace.sh b/scripts/mace.sh index 1d6c0cc..3fff3b3 100755 --- a/scripts/mace.sh +++ b/scripts/mace.sh @@ -3,7 +3,7 @@ set -euo pipefail if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: %s [result_csv]\n" "$0" + printf "Usage: %s [result_csv]\n" "$0" exit 1 fi @@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}" prefill_keys="${PREFILL_KEYS:-200000}" read_path="${READ_PATH:-snapshot}" -if [[ "${db_root}" != /nvme* ]]; then - printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2 - exit 1 -fi - mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" diff --git a/scripts/phase1.sh b/scripts/phase1.sh index 2b98fbf..286c650 100755 --- a/scripts/phase1.sh +++ b/scripts/phase1.sh @@ -3,16 +3,15 @@ set -euo pipefail if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: %s [result_csv]\n" "$0" + printf "Usage: %s [result_csv]\n" "$0" exit 1 fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" - -if [[ "$1" != /nvme* ]]; then - printf "db_root must be under /nvme, got: %s\n" "$1" >&2 - exit 1 +python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" +if [ ! -x "${python_bin}" ]; then + python_bin="${PYTHON:-python3}" fi db_root="$1" @@ -23,6 +22,12 @@ measure_secs="${MEASURE_SECS:-300}" repeats="${REPEATS:-3}" read_path="${READ_PATH:-snapshot}" +phase1_workloads_raw="${PHASE1_WORKLOADS:-W1 W3 W6}" +phase1_threads_raw="${PHASE1_THREADS:-1 12}" +phase1_profiles_raw="${PHASE1_PROFILES:-P2 P3}" +phase1_prefill_tier_s_p2="${PHASE1_PREFILL_TIER_S_P2:-6100805}" +phase1_prefill_tier_s_p3="${PHASE1_PREFILL_TIER_S_P3:-392449}" + mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" @@ -30,9 +35,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml" (cd "${root_dir}/rocksdb" && cmake --preset release) (cd "${root_dir}/rocksdb" && cmake --build --preset release) -workloads=(W1 W3 W6) -threads=(1 12) -profiles=(P2 P3) +IFS=' ' read -r -a workloads <<< "${phase1_workloads_raw}" +IFS=' ' read -r -a threads <<< "${phase1_threads_raw}" +IFS=' ' read -r -a profiles <<< "${phase1_profiles_raw}" + +if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then + printf "phase1 workloads/threads/profiles must not be empty\n" >&2 + exit 1 +fi profile_key() { case "$1" in @@ -52,8 +62,8 @@ profile_val() { profile_prefill_tier_s() { case "$1" in - P2) echo 6100805 ;; - P3) echo 392449 ;; + P2) echo "${phase1_prefill_tier_s_p2}" ;; + P3) echo "${phase1_prefill_tier_s_p3}" ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } @@ -113,5 +123,5 @@ for repeat in $(seq 1 "${repeats}"); do done done -python3 "${script_dir}/phase1_eval.py" "${result_file}" +"${python_bin}" "${script_dir}/phase1_eval.py" "${result_file}" printf "Phase 1 finished. Results: %s\n" "${result_file}" diff --git a/scripts/phase2.sh b/scripts/phase2.sh index 7a0de81..e7638f5 100755 --- a/scripts/phase2.sh +++ b/scripts/phase2.sh @@ -3,16 +3,15 @@ set -euo pipefail if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: %s [result_csv]\n" "$0" + printf "Usage: %s [result_csv]\n" "$0" exit 1 fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" - -if [[ "$1" != /nvme* ]]; then - printf "db_root must be under /nvme, got: %s\n" "$1" >&2 - exit 1 +python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" +if [ ! -x "${python_bin}" ]; then + python_bin="${PYTHON:-python3}" fi db_root="$1" @@ -25,6 +24,16 @@ read_path="${READ_PATH:-snapshot}" run_tier_l_rep="${RUN_TIER_L_REPRESENTATIVE:-0}" tier_l_repeats="${TIER_L_REPEATS:-1}" +phase2_workloads_tier_m_raw="${PHASE2_WORKLOADS_TIER_M:-W1 W2 W3 W4 W6}" +phase2_workloads_tier_l_rep_raw="${PHASE2_WORKLOADS_TIER_L_REP:-W1 W3 W6}" +phase2_threads_tier_m_raw="${PHASE2_THREADS_TIER_M:-1 6 12}" +phase2_threads_tier_l_rep_raw="${PHASE2_THREADS_TIER_L_REP:-1 12}" +phase2_profiles_raw="${PHASE2_PROFILES:-P2 P3}" +phase2_prefill_tier_m_p2="${PHASE2_PREFILL_TIER_M_P2:-18302417}" +phase2_prefill_tier_m_p3="${PHASE2_PREFILL_TIER_M_P3:-1177348}" +phase2_prefill_tier_l_p2="${PHASE2_PREFILL_TIER_L_P2:-28470427}" +phase2_prefill_tier_l_p3="${PHASE2_PREFILL_TIER_L_P3:-1831430}" + mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" @@ -32,11 +41,21 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml" (cd "${root_dir}/rocksdb" && cmake --preset release) (cd "${root_dir}/rocksdb" && cmake --build --preset release) -workloads_tier_m=(W1 W2 W3 W4 W6) -workloads_tier_l_rep=(W1 W3 W6) -threads_tier_m=(1 6 12) -threads_tier_l_rep=(1 12) -profiles=(P2 P3) +IFS=' ' read -r -a workloads_tier_m <<< "${phase2_workloads_tier_m_raw}" +IFS=' ' read -r -a workloads_tier_l_rep <<< "${phase2_workloads_tier_l_rep_raw}" +IFS=' ' read -r -a threads_tier_m <<< "${phase2_threads_tier_m_raw}" +IFS=' ' read -r -a threads_tier_l_rep <<< "${phase2_threads_tier_l_rep_raw}" +IFS=' ' read -r -a profiles <<< "${phase2_profiles_raw}" + +if [ "${#workloads_tier_m[@]}" -eq 0 ] || [ "${#threads_tier_m[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then + printf "phase2 tier-m workloads/threads/profiles must not be empty\n" >&2 + exit 1 +fi + +if [ "${run_tier_l_rep}" = "1" ] && { [ "${#workloads_tier_l_rep[@]}" -eq 0 ] || [ "${#threads_tier_l_rep[@]}" -eq 0 ]; }; then + printf "phase2 tier-l representative workloads/threads must not be empty when enabled\n" >&2 + exit 1 +fi profile_key() { case "$1" in @@ -59,14 +78,14 @@ prefill_for() { local profile="$2" if [ "${tier}" = "tier-m" ]; then case "${profile}" in - P2) echo 18302417 ;; - P3) echo 1177348 ;; + P2) echo "${phase2_prefill_tier_m_p2}" ;; + P3) echo "${phase2_prefill_tier_m_p3}" ;; *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;; esac elif [ "${tier}" = "tier-l" ]; then case "${profile}" in - P2) echo 28470427 ;; - P3) echo 1831430 ;; + P2) echo "${phase2_prefill_tier_l_p2}" ;; + P3) echo "${phase2_prefill_tier_l_p3}" ;; *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;; esac else @@ -146,5 +165,5 @@ if [ "${run_tier_l_rep}" = "1" ]; then done fi -python3 "${script_dir}/phase2_report.py" "${result_file}" +"${python_bin}" "${script_dir}/phase2_report.py" "${result_file}" printf "Phase 2 finished. Results: %s\n" "${result_file}" diff --git a/scripts/phase3.sh b/scripts/phase3.sh index 31b297f..2365e4b 100755 --- a/scripts/phase3.sh +++ b/scripts/phase3.sh @@ -3,16 +3,15 @@ set -euo pipefail if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: %s [result_csv]\n" "$0" + printf "Usage: %s [result_csv]\n" "$0" exit 1 fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" - -if [[ "$1" != /nvme* ]]; then - printf "db_root must be under /nvme, got: %s\n" "$1" >&2 - exit 1 +python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" +if [ ! -x "${python_bin}" ]; then + python_bin="${PYTHON:-python3}" fi db_root="$1" @@ -23,6 +22,13 @@ measure_secs="${MEASURE_SECS:-300}" repeats="${REPEATS:-5}" read_path="${READ_PATH:-snapshot}" +phase3_workloads_raw="${PHASE3_WORKLOADS:-W1 W3 W6}" +phase3_threads_raw="${PHASE3_THREADS:-1 12}" +phase3_durabilities_raw="${PHASE3_DURABILITIES:-relaxed durable}" +key_size="${PHASE3_KEY_SIZE:-32}" +value_size="${PHASE3_VALUE_SIZE:-1024}" +prefill_keys="${PHASE3_PREFILL_KEYS:-18302417}" # tier-m P2 + mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" @@ -30,12 +36,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml" (cd "${root_dir}/rocksdb" && cmake --preset release) (cd "${root_dir}/rocksdb" && cmake --build --preset release) -workloads=(W1 W3 W6) -threads=(1 12) -durabilities=(relaxed durable) -key_size=32 -value_size=1024 -prefill_keys=18302417 # tier-m P2 +IFS=' ' read -r -a workloads <<< "${phase3_workloads_raw}" +IFS=' ' read -r -a threads <<< "${phase3_threads_raw}" +IFS=' ' read -r -a durabilities <<< "${phase3_durabilities_raw}" + +if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#durabilities[@]}" -eq 0 ]; then + printf "phase3 workloads/threads/durabilities must not be empty\n" >&2 + exit 1 +fi run_case() { local engine="$1" @@ -91,5 +99,5 @@ for repeat in $(seq 1 "${repeats}"); do done done -python3 "${script_dir}/phase3_report.py" "${result_file}" +"${python_bin}" "${script_dir}/phase3_report.py" "${result_file}" printf "Phase 3 finished. Results: %s\n" "${result_file}" diff --git a/scripts/phase3_report.py b/scripts/phase3_report.py index da77ec6..c918857 100755 --- a/scripts/phase3_report.py +++ b/scripts/phase3_report.py @@ -2,6 +2,7 @@ import sys import pandas as pd +import os def main() -> int: @@ -23,10 +24,12 @@ def main() -> int: if missing: raise ValueError(f"Missing columns: {sorted(missing)}") + target_key_size = int(os.getenv("PHASE3_REPORT_KEY_SIZE", "32")) + target_value_size = int(os.getenv("PHASE3_REPORT_VALUE_SIZE", "1024")) sub = df[ (df["workload_id"].isin(["W1", "W3", "W6"])) - & (df["key_size"] == 32) - & (df["value_size"] == 1024) + & (df["key_size"] == target_key_size) + & (df["value_size"] == target_value_size) ].copy() if sub.empty: diff --git a/scripts/phase4_soak.sh b/scripts/phase4_soak.sh index bdd1dd8..4739d0f 100755 --- a/scripts/phase4_soak.sh +++ b/scripts/phase4_soak.sh @@ -3,7 +3,7 @@ set -euo pipefail if [ "$#" -lt 2 ] || [ "$#" -gt 4 ]; then - printf "Usage: %s [result_csv] [restart_csv]\n" "$0" + printf "Usage: %s [result_csv] [restart_csv]\n" "$0" exit 1 fi @@ -12,15 +12,14 @@ db_path="$2" script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" +python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" +if [ ! -x "${python_bin}" ]; then + python_bin="${PYTHON:-python3}" +fi result_file="${3:-${script_dir}/phase4_results.csv}" restart_file="${4:-${script_dir}/phase4_restart.csv}" -if [[ "${db_path}" != /nvme* ]]; then - printf "db_path must be under /nvme, got: %s\n" "${db_path}" >&2 - exit 1 -fi - if [[ "${engine}" != "mace" && "${engine}" != "rocksdb" ]]; then printf "engine must be mace or rocksdb\n" >&2 exit 1 @@ -34,15 +33,17 @@ soak_hours="${SOAK_HOURS:-12}" crash_interval_secs="${CRASH_INTERVAL_SECS:-1800}" verify_measure_secs="${VERIFY_MEASURE_SECS:-30}" run_measure_secs="${RUN_MEASURE_SECS:-3600}" +seed_measure_secs="${SEED_MEASURE_SECS:-5}" warmup_secs="${WARMUP_SECS:-30}" +max_cycles="${PHASE4_MAX_CYCLES:-0}" # baseline: tier-m + W3 + P2 + 12 threads -workload_main="W3" -workload_verify="W6" -threads=12 -key_size=32 -value_size=1024 -prefill_keys=18302417 +workload_main="${PHASE4_WORKLOAD_MAIN:-W3}" +workload_verify="${PHASE4_WORKLOAD_VERIFY:-W6}" +threads="${PHASE4_THREADS:-12}" +key_size="${PHASE4_KEY_SIZE:-32}" +value_size="${PHASE4_VALUE_SIZE:-1024}" +prefill_keys="${PHASE4_PREFILL_KEYS:-18302417}" read_path="${READ_PATH:-snapshot}" durability="${DURABILITY:-relaxed}" @@ -66,6 +67,7 @@ run_cmd() { --warmup-secs "${warmup_secs}" \ --measure-secs "${measure_secs}" \ --shared-keyspace \ + --no-cleanup \ --read-path "${read_path}" \ --durability "${durability}" \ --reuse-path \ @@ -82,6 +84,7 @@ run_cmd() { --warmup-secs "${warmup_secs}" \ --measure-secs "${measure_secs}" \ --shared-keyspace \ + --no-cleanup \ --read-path "${read_path}" \ --durability "${durability}" \ --reuse-path \ @@ -140,6 +143,7 @@ start_run_bg() { --warmup-secs "${warmup_secs}" \ --measure-secs "${measure_secs}" \ --shared-keyspace \ + --no-cleanup \ --read-path "${read_path}" \ --durability "${durability}" \ --reuse-path \ @@ -156,6 +160,7 @@ start_run_bg() { --warmup-secs "${warmup_secs}" \ --measure-secs "${measure_secs}" \ --shared-keyspace \ + --no-cleanup \ --read-path "${read_path}" \ --durability "${durability}" \ --reuse-path \ @@ -204,10 +209,13 @@ fi # seed dataset once (with prefill) printf "[phase4][%s] seed dataset at %s\n" "${engine}" "${db_path}" -run_cmd "${workload_main}" 5 0 +run_cmd "${workload_main}" "${seed_measure_secs}" 0 cycle=0 while [ "$(date +%s)" -lt "${end_epoch}" ]; do + if [ "${max_cycles}" -gt 0 ] && [ "${cycle}" -ge "${max_cycles}" ]; then + break + fi cycle="$((cycle + 1))" cycle_start="$(date +%s)" printf "[phase4][%s] cycle=%s start=%s\n" "${engine}" "${cycle}" "${cycle_start}" @@ -241,5 +249,5 @@ while [ "$(date +%s)" -lt "${end_epoch}" ]; do "${cycle}" "${cycle_start}" "${kill_sent}" "${worker_exit}" "${restart_status}" "${restart_ready_ms}" >> "${restart_file}" done -python3 "${script_dir}/phase4_report.py" "${restart_file}" +"${python_bin}" "${script_dir}/phase4_report.py" "${restart_file}" printf "Phase 4 soak finished. Results: %s | Restart log: %s\n" "${result_file}" "${restart_file}" diff --git a/scripts/rocksdb.sh b/scripts/rocksdb.sh index 7c85e92..df82a80 100755 --- a/scripts/rocksdb.sh +++ b/scripts/rocksdb.sh @@ -3,7 +3,7 @@ set -euo pipefail if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then - printf "Usage: %s [result_csv]\n" "$0" + printf "Usage: %s [result_csv]\n" "$0" exit 1 fi @@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}" prefill_keys="${PREFILL_KEYS:-200000}" read_path="${READ_PATH:-snapshot}" -if [[ "${db_root}" != /nvme* ]]; then - printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2 - exit 1 -fi - mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" diff --git a/src/main.rs b/src/main.rs index 0cdd9b2..f5a29a0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,4 @@ -use clap::Parser; +use clap::{ArgAction, Parser}; #[cfg(target_os = "linux")] use logger::Logger; use mace::{Mace, Options}; @@ -27,7 +27,7 @@ const PREFILL_BATCH: usize = 1024; #[derive(Parser, Debug, Clone)] #[command(author, version, about, long_about = None)] struct Args { - #[arg(short = 'p', long, default_value = "/nvme/kv_bench_mace")] + #[arg(short = 'p', long)] path: String, #[arg(short = 'm', long, default_value = "insert")] @@ -60,6 +60,9 @@ struct Args { #[arg(long, default_value_t = true)] shared_keyspace: bool, + #[arg(long, action = ArgAction::SetTrue)] + no_shared_keyspace: bool, + #[arg(long, default_value_t = 0)] prefill_keys: usize, @@ -87,6 +90,9 @@ struct Args { #[arg(long, default_value_t = true)] cleanup: bool, + #[arg(long, action = ArgAction::SetTrue)] + no_cleanup: bool, + #[arg(long, default_value_t = false)] skip_prefill: bool, @@ -416,8 +422,7 @@ fn make_thread_prefix(tid: usize) -> Vec { fn latency_bucket(us: u64) -> usize { let v = us.max(1); - let idx = (63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1); - idx + (63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1) } fn histogram_quantile_us(hist: &[u64; LAT_BUCKETS], q: f64) -> u64 { @@ -614,12 +619,14 @@ fn pick_op_kind(rng: &mut StdRng, spec: &WorkloadSpec) -> OpKind { fn main() { #[cfg(target_os = "linux")] { - Logger::init().add_file("/tmp/x.log", true); - log::set_max_level(log::LevelFilter::Info); + Logger::init().add_file("kv_bench.log", true); + log::set_max_level(log::LevelFilter::Error); } let args = Args::parse(); let path = Path::new(&args.path); + let shared_keyspace = args.shared_keyspace && !args.no_shared_keyspace; + let cleanup = args.cleanup && !args.no_cleanup; if args.path.is_empty() { eprintln!("path is empty"); @@ -629,6 +636,17 @@ fn main() { eprintln!("path {:?} already exists", args.path); exit(1); } + if args.skip_prefill && !args.reuse_path { + eprintln!("--skip-prefill requires --reuse-path"); + exit(1); + } + if args.skip_prefill && !path.exists() { + eprintln!( + "--skip-prefill requires existing path, but `{}` does not exist", + args.path + ); + exit(1); + } if args.threads == 0 { eprintln!("threads must be greater than 0"); exit(1); @@ -673,6 +691,12 @@ fn main() { } }; + let mixed_workload = workload.read_pct > 0 && workload.update_pct > 0; + if mixed_workload && !shared_keyspace { + eprintln!("mixed workloads require shared keyspace"); + exit(1); + } + let prefill_keys = if workload.requires_prefill { if args.prefill_keys > 0 { args.prefill_keys @@ -689,11 +713,6 @@ fn main() { } let thread_prefill_ranges = split_ranges(prefill_keys, args.threads); - let thread_op_ranges = if args.shared_keyspace { - thread_prefill_ranges.clone() - } else { - thread_prefill_ranges.clone() - }; let mut opt = Options::new(path); opt.sync_on_write = durability_mode == DurabilityMode::Durable; @@ -702,7 +721,7 @@ fn main() { opt.data_file_size = 64 << 20; opt.max_log_size = 1 << 30; opt.default_arenas = 128; - opt.tmp_store = args.cleanup; + opt.tmp_store = cleanup; let db = Mace::new(opt.validate().unwrap()).unwrap(); db.disable_gc(); @@ -716,13 +735,12 @@ fn main() { let value = Arc::new(vec![b'0'; args.value_size]); if workload.requires_prefill && !args.skip_prefill { - let mut fill_handles = Vec::with_capacity(args.threads); - for tid in 0..args.threads { + let mut fill_handles = Vec::with_capacity(thread_prefill_ranges.len()); + for (tid, tr) in thread_prefill_ranges.iter().copied().enumerate() { let bucket = bkt.clone(); let v = value.clone(); let key_size = args.key_size; - let shared = args.shared_keyspace; - let tr = thread_prefill_ranges[tid]; + let shared = shared_keyspace; fill_handles.push(std::thread::spawn(move || { coreid::bind_core(tid); let mut in_batch = 0usize; @@ -766,22 +784,24 @@ fn main() { let ins_ctr = Arc::clone(&insert_counter); let key_size = args.key_size; let random_insert = args.random; - let read_path = read_path; + let read_path_mode = read_path; let warmup_secs = args.warmup_secs; let measure_secs = args.measure_secs; let distribution = spec.distribution; let zipf_theta = args.zipf_theta; let scan_len = spec.scan_len; - let shared = args.shared_keyspace; - let prefill_keys = prefill_keys; - let local_key_len = thread_op_ranges[tid].len; + let shared = shared_keyspace; + let prefill_key_count = prefill_keys; + let local_key_len = thread_prefill_ranges[tid].len; let local_op_count = op_counts[tid].len; std::thread::spawn(move || { coreid::bind_core(tid); let seed = (now_epoch_ms() as u64) - ^ ((tid as u64 + 1) * 0x9E37_79B9_7F4A_7C15) - ^ ((prefill_keys as u64) << 7); + ^ (tid as u64) + .wrapping_add(1) + .wrapping_mul(0x9E37_79B9_7F4A_7C15) + ^ (prefill_key_count as u64).wrapping_shl(7); let mut rng = StdRng::seed_from_u64(seed); let mut stats = ThreadStats::default(); let mut local_insert_idx = 0usize; @@ -805,11 +825,11 @@ fn main() { &spec, distribution, zipf_theta, - read_path, + read_path_mode, key_size, scan_len, shared, - prefill_keys, + prefill_key_count, local_key_len, tid, &ins_ctr, @@ -833,11 +853,11 @@ fn main() { &spec, distribution, zipf_theta, - read_path, + read_path_mode, key_size, scan_len, shared, - prefill_keys, + prefill_key_count, local_key_len, tid, &ins_ctr, @@ -861,11 +881,11 @@ fn main() { &spec, distribution, zipf_theta, - read_path, + read_path_mode, key_size, scan_len, shared, - prefill_keys, + prefill_key_count, local_key_len, tid, &ins_ctr, @@ -921,7 +941,7 @@ fn main() { key_size: args.key_size, value_size: args.value_size, prefill_keys, - shared_keyspace: args.shared_keyspace, + shared_keyspace, distribution: workload.distribution, zipf_theta: args.zipf_theta, read_pct: workload.read_pct,