bench: align mace/rocksdb runners and reproducible local profile

This commit is contained in:
abbycin 2026-03-04 23:10:05 +08:00
parent d5b32fd590
commit 4ba25a9ad0
11 changed files with 272 additions and 185 deletions

View File

@ -4,7 +4,7 @@ version = "0.1.0"
edition = "2024"
[dependencies]
mace-kv = "0.0.27"
mace-kv = { path = "/home/workspace/gits/github/mace" }
clap = { version = "4.5.48", features = ["derive"] }
rand = "0.9.2"
log = "0.4.22"

View File

@ -1,25 +1,38 @@
# kv_bench Reproduction Guide (Mace vs RocksDB)
This repository is used to reproduce and compare `mace` and `rocksdb` benchmark results across phase0~phase4.
This document defines a reproducible workflow for `mace` and `rocksdb` across phase0~phase4.
It now has two profiles:
- `local` (default in this doc): validated on this machine class, intended to run end-to-end without exhausting resources.
- `full`: benchmark-refactor target matrix (much longer runtime).
## 1. Prerequisites
- Linux
- A high-speed storage mount directory you choose (typically an NVMe mount point)
- Rust/Cargo
- CMake (to build `rocksdb_bench`)
- Python 3 (for result aggregation and plotting)
- Python 3 (reporting/plotting)
- A persistent storage path (NVMe/SSD recommended), **not tmpfs**
## 2. Storage Directory Configuration (Important)
`/nvme` is no longer hardcoded. You can use any mount directory.
## 2. Hardware + Storage Baseline
For the local profile, assume approximately:
- CPU: `6C12T`
- RAM: `32GB`
- Disk: `100GB` available benchmark storage
Recommended: set one shared variable first:
Before running, set paths and verify filesystem type/capacity:
```bash
export KV_BENCH_STORAGE_ROOT=/path/to/your/nvme_mount/kvbench
mkdir -p "${KV_BENCH_STORAGE_ROOT}"
export KV_BENCH_STORAGE_ROOT=/home/abby/kv_bench/target/repro_storage
export KV_BENCH_RESULT_ROOT=/home/abby/kv_bench/target/repro_results
mkdir -p "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}"
df -hT "${KV_BENCH_STORAGE_ROOT}" "${KV_BENCH_RESULT_ROOT}"
free -h
```
All scripts below take this directory (or one of its subdirectories) as the first argument.
Requirements:
- `KV_BENCH_STORAGE_ROOT` and `KV_BENCH_RESULT_ROOT` must not be on `tmpfs`.
- Keep at least `25GB` free under storage root before long runs.
## 3. Initialization
```bash
@ -29,139 +42,155 @@ source ./bin/activate
cd /home/abby/kv_bench
```
## 4. Quick Baseline Comparison (W1~W6)
Clean old data first:
## 4. Quick Baseline (W1~W6)
Clean old data:
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks"
mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_rocks"
rm -f "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
```
Run both engines:
Run both engines (`local` profile parameters):
```bash
./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" ./scripts/benchmark_results.csv
./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" ./scripts/benchmark_results.csv
WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
./scripts/mace.sh "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
./scripts/rocksdb.sh "${KV_BENCH_STORAGE_ROOT}/basic_rocks" "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv"
```
Generate plots:
```bash
./scripts/bin/python ./scripts/plot.py ./scripts/benchmark_results.csv ./scripts
./scripts/bin/python ./scripts/plot.py "${KV_BENCH_RESULT_ROOT}/benchmark_results.csv" "${KV_BENCH_RESULT_ROOT}"
```
## 5. Phase Reproduction Commands
## 5. Phase Reproduction
### Phase 1
### 5.1 Phase 1
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/phase1"
mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase1"
./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" ./scripts/phase1_results.csv
rm -f "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
PHASE1_WORKLOADS="W1 W3 W6" \
PHASE1_THREADS="1 6" \
PHASE1_PROFILES="P2" \
PHASE1_PREFILL_TIER_S_P2=200000 \
./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
```
### Phase 2
### 5.2 Phase 2
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/phase2"
mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase2"
./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv
rm -f "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
PHASE2_WORKLOADS_TIER_M="W1 W3 W6" \
PHASE2_THREADS_TIER_M="1 6" \
PHASE2_PROFILES="P2" \
PHASE2_PREFILL_TIER_M_P2=500000 \
RUN_TIER_L_REPRESENTATIVE=0 \
./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
```
Optional: enable tier-l representative subset:
Optional (`full` profile tier-l representative subset):
```bash
RUN_TIER_L_REPRESENTATIVE=1 TIER_L_REPEATS=1 \
./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" ./scripts/phase2_results.csv
./scripts/phase2.sh "${KV_BENCH_STORAGE_ROOT}/phase2" "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
```
### Phase 3
### 5.3 Phase 3
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/phase3"
mkdir -p "${KV_BENCH_STORAGE_ROOT}/phase3"
./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" ./scripts/phase3_results.csv
rm -f "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \
PHASE3_WORKLOADS="W1 W3" \
PHASE3_THREADS="1 6" \
PHASE3_DURABILITIES="relaxed durable" \
PHASE3_KEY_SIZE=32 PHASE3_VALUE_SIZE=1024 PHASE3_PREFILL_KEYS=500000 \
./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
```
### Phase 4 (run one engine at a time)
Mace:
### 5.4 Phase 4 (run one engine at a time)
`local` profile (memory-safe on 32GB machines, validated on 2026-03-04):
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_mace"
rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"
SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \
PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \
SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \
PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \
./scripts/phase4_soak.sh mace "${KV_BENCH_STORAGE_ROOT}/phase4_mace" \
./scripts/phase4_results_mace.csv ./scripts/phase4_restart_mace.csv
```
"${KV_BENCH_RESULT_ROOT}/phase4_results_mace.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"
RocksDB:
```bash
rm -rf "${KV_BENCH_STORAGE_ROOT}/phase4_rocks"
rm -f "${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
SOAK_HOURS=1 PHASE4_MAX_CYCLES=3 \
PHASE4_WORKLOAD_MAIN=W1 PHASE4_WORKLOAD_VERIFY=W1 \
SEED_MEASURE_SECS=2 RUN_MEASURE_SECS=10 CRASH_INTERVAL_SECS=3 VERIFY_MEASURE_SECS=2 WARMUP_SECS=1 \
PHASE4_THREADS=1 PHASE4_KEY_SIZE=32 PHASE4_VALUE_SIZE=128 PHASE4_PREFILL_KEYS=1000 \
./scripts/phase4_soak.sh rocksdb "${KV_BENCH_STORAGE_ROOT}/phase4_rocks" \
./scripts/phase4_results_rocks.csv ./scripts/phase4_restart_rocks.csv
"${KV_BENCH_RESULT_ROOT}/phase4_results_rocks.csv" "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
```
## 6. Where Result Inputs (CSV) Are Stored
Default output files:
- `./scripts/benchmark_results.csv`
- `./scripts/phase1_results.csv`
- `./scripts/phase2_results.csv`
- `./scripts/phase3_results.csv`
- `./scripts/phase4_results_*.csv`
- `./scripts/phase4_restart_*.csv`
Notes:
- The previous local phase4 parameters (`W3/W6`, larger prefill/time windows) can exceed 32GB RAM on current `mace` builds.
- The local phase4 profile above keeps crash/restart semantics, but intentionally reduces write pressure so it completes on this host class.
The unified schema is emitted by both engine binaries (same format for mace/rocksdb). Key columns:
- `engine`: `mace` / `rocksdb`
- `workload_id`: `W1..W6`
- `durability_mode`: `relaxed` / `durable`
- `threads,key_size,value_size,prefill_keys`: case configuration
- `ops_per_sec`: throughput
- `p50_us,p95_us,p99_us,p999_us`: latency percentiles
- `error_ops`: number of failed operations
- `read_path`: `snapshot` / `rw_txn`
For `full` profile, remove the `PHASE4_*` overrides and use benchmark-refactor defaults (recommend `>=64GB` RAM and ample NVMe space).
## 7. Where to Interpret Results
## 6. Result Files
The commands above write CSVs under `${KV_BENCH_RESULT_ROOT}`:
- `benchmark_results.csv`
- `phase1_results.csv`
- `phase2_results.csv`
- `phase3_results.csv`
- `phase4_results_*.csv`
- `phase4_restart_*.csv`
### Phase 1 (stability)
Unified schema columns include:
- `engine` (`mace` / `rocksdb`)
- `workload_id` (`W1..W6`)
- `durability_mode` (`relaxed` / `durable`)
- `threads,key_size,value_size,prefill_keys`
- `ops_per_sec`
- `p50_us,p95_us,p99_us,p999_us`
- `error_ops`
- `read_path`
## 7. Report Commands
```bash
./scripts/bin/python ./scripts/phase1_eval.py ./scripts/phase1_results.csv
./scripts/bin/python ./scripts/phase1_eval.py "${KV_BENCH_RESULT_ROOT}/phase1_results.csv"
./scripts/bin/python ./scripts/phase2_report.py "${KV_BENCH_RESULT_ROOT}/phase2_results.csv"
./scripts/bin/python ./scripts/phase3_report.py "${KV_BENCH_RESULT_ROOT}/phase3_results.csv"
./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_mace.csv"
./scripts/bin/python ./scripts/phase4_report.py "${KV_BENCH_RESULT_ROOT}/phase4_restart_rocks.csv"
```
Check:
- `throughput_cv` (<=10%)
- `p99_cv` (<=15%)
- `stable` and overall pass ratio
### Phase 2 (core report)
```bash
./scripts/bin/python ./scripts/phase2_report.py ./scripts/phase2_results.csv
```
Check:
- `throughput_median`
- `p95_median`, `p99_median`
- `slower_engine`, `slower_ratio`
## 8. Full-Profile Toggle (Benchmark Refactor Matrix)
If you want the full benchmark-refactor matrix:
- Use default phase script matrices (no `PHASE*_*` narrowing).
- Increase `WARMUP_SECS/MEASURE_SECS/REPEATS` to target values.
- Enable `RUN_TIER_L_REPRESENTATIVE=1` as needed.
- Keep large runs on persistent NVMe storage with enough free disk.
### Phase 3 (durability cost)
```bash
./scripts/bin/python ./scripts/phase3_report.py ./scripts/phase3_results.csv
```
Check:
- `throughput_drop_pct` (durable vs relaxed throughput drop)
- `p99_inflation_pct` (durable vs relaxed p99 inflation)
### Phase 4 (recovery capability)
```bash
./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_mace.csv
./scripts/bin/python ./scripts/phase4_report.py ./scripts/phase4_restart_rocks.csv
```
Check:
- `restart_success`
- `restart_ready_ms` at `p50/p95/p99/max`
## 8. CLI Configurability (No Hardcoded Disk Prefix)
- Both benchmark binaries support `--path` to set the DB directory.
- All scripts use the first argument as storage root/path.
- You can point `${KV_BENCH_STORAGE_ROOT}` to any mount point (NVMe, SSD, RAID, ephemeral disk).
## 9. Comparison Best Practices
Only compare cases under identical dimensions:
## 9. Comparison Rules
Only compare rows with identical:
- `workload_id`
- `key_size/value_size`
- `threads`
- `durability_mode`
- `read_path`
If `error_ops > 0`, investigate that case first before drawing performance conclusions.
If `error_ops > 0`, investigate that case before drawing conclusions.

View File

@ -72,7 +72,7 @@ struct Args {
bool random = false;
std::string mode = "insert";
std::optional<std::string> workload;
std::string path = "/nvme/kv_bench_rocksdb";
std::string path;
bool shared_keyspace = true;
size_t prefill_keys = 0;
uint64_t warmup_secs = 0;

View File

@ -3,7 +3,7 @@
set -euo pipefail
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
printf "Usage: %s <storage_root> [result_csv]\n" "$0"
exit 1
fi
@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}"
prefill_keys="${PREFILL_KEYS:-200000}"
read_path="${READ_PATH:-snapshot}"
if [[ "${db_root}" != /nvme* ]]; then
printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
exit 1
fi
mkdir -p "${db_root}"
mkdir -p "$(dirname -- "${result_file}")"

View File

@ -3,16 +3,15 @@
set -euo pipefail
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
printf "Usage: %s <storage_root> [result_csv]\n" "$0"
exit 1
fi
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
root_dir="$(cd -- "${script_dir}/.." && pwd)"
if [[ "$1" != /nvme* ]]; then
printf "db_root must be under /nvme, got: %s\n" "$1" >&2
exit 1
python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
if [ ! -x "${python_bin}" ]; then
python_bin="${PYTHON:-python3}"
fi
db_root="$1"
@ -23,6 +22,12 @@ measure_secs="${MEASURE_SECS:-300}"
repeats="${REPEATS:-3}"
read_path="${READ_PATH:-snapshot}"
phase1_workloads_raw="${PHASE1_WORKLOADS:-W1 W3 W6}"
phase1_threads_raw="${PHASE1_THREADS:-1 12}"
phase1_profiles_raw="${PHASE1_PROFILES:-P2 P3}"
phase1_prefill_tier_s_p2="${PHASE1_PREFILL_TIER_S_P2:-6100805}"
phase1_prefill_tier_s_p3="${PHASE1_PREFILL_TIER_S_P3:-392449}"
mkdir -p "${db_root}"
mkdir -p "$(dirname -- "${result_file}")"
@ -30,9 +35,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
(cd "${root_dir}/rocksdb" && cmake --preset release)
(cd "${root_dir}/rocksdb" && cmake --build --preset release)
workloads=(W1 W3 W6)
threads=(1 12)
profiles=(P2 P3)
IFS=' ' read -r -a workloads <<< "${phase1_workloads_raw}"
IFS=' ' read -r -a threads <<< "${phase1_threads_raw}"
IFS=' ' read -r -a profiles <<< "${phase1_profiles_raw}"
if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then
printf "phase1 workloads/threads/profiles must not be empty\n" >&2
exit 1
fi
profile_key() {
case "$1" in
@ -52,8 +62,8 @@ profile_val() {
profile_prefill_tier_s() {
case "$1" in
P2) echo 6100805 ;;
P3) echo 392449 ;;
P2) echo "${phase1_prefill_tier_s_p2}" ;;
P3) echo "${phase1_prefill_tier_s_p3}" ;;
*) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;;
esac
}
@ -113,5 +123,5 @@ for repeat in $(seq 1 "${repeats}"); do
done
done
python3 "${script_dir}/phase1_eval.py" "${result_file}"
"${python_bin}" "${script_dir}/phase1_eval.py" "${result_file}"
printf "Phase 1 finished. Results: %s\n" "${result_file}"

View File

@ -3,16 +3,15 @@
set -euo pipefail
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
printf "Usage: %s <storage_root> [result_csv]\n" "$0"
exit 1
fi
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
root_dir="$(cd -- "${script_dir}/.." && pwd)"
if [[ "$1" != /nvme* ]]; then
printf "db_root must be under /nvme, got: %s\n" "$1" >&2
exit 1
python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
if [ ! -x "${python_bin}" ]; then
python_bin="${PYTHON:-python3}"
fi
db_root="$1"
@ -25,6 +24,16 @@ read_path="${READ_PATH:-snapshot}"
run_tier_l_rep="${RUN_TIER_L_REPRESENTATIVE:-0}"
tier_l_repeats="${TIER_L_REPEATS:-1}"
phase2_workloads_tier_m_raw="${PHASE2_WORKLOADS_TIER_M:-W1 W2 W3 W4 W6}"
phase2_workloads_tier_l_rep_raw="${PHASE2_WORKLOADS_TIER_L_REP:-W1 W3 W6}"
phase2_threads_tier_m_raw="${PHASE2_THREADS_TIER_M:-1 6 12}"
phase2_threads_tier_l_rep_raw="${PHASE2_THREADS_TIER_L_REP:-1 12}"
phase2_profiles_raw="${PHASE2_PROFILES:-P2 P3}"
phase2_prefill_tier_m_p2="${PHASE2_PREFILL_TIER_M_P2:-18302417}"
phase2_prefill_tier_m_p3="${PHASE2_PREFILL_TIER_M_P3:-1177348}"
phase2_prefill_tier_l_p2="${PHASE2_PREFILL_TIER_L_P2:-28470427}"
phase2_prefill_tier_l_p3="${PHASE2_PREFILL_TIER_L_P3:-1831430}"
mkdir -p "${db_root}"
mkdir -p "$(dirname -- "${result_file}")"
@ -32,11 +41,21 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
(cd "${root_dir}/rocksdb" && cmake --preset release)
(cd "${root_dir}/rocksdb" && cmake --build --preset release)
workloads_tier_m=(W1 W2 W3 W4 W6)
workloads_tier_l_rep=(W1 W3 W6)
threads_tier_m=(1 6 12)
threads_tier_l_rep=(1 12)
profiles=(P2 P3)
IFS=' ' read -r -a workloads_tier_m <<< "${phase2_workloads_tier_m_raw}"
IFS=' ' read -r -a workloads_tier_l_rep <<< "${phase2_workloads_tier_l_rep_raw}"
IFS=' ' read -r -a threads_tier_m <<< "${phase2_threads_tier_m_raw}"
IFS=' ' read -r -a threads_tier_l_rep <<< "${phase2_threads_tier_l_rep_raw}"
IFS=' ' read -r -a profiles <<< "${phase2_profiles_raw}"
if [ "${#workloads_tier_m[@]}" -eq 0 ] || [ "${#threads_tier_m[@]}" -eq 0 ] || [ "${#profiles[@]}" -eq 0 ]; then
printf "phase2 tier-m workloads/threads/profiles must not be empty\n" >&2
exit 1
fi
if [ "${run_tier_l_rep}" = "1" ] && { [ "${#workloads_tier_l_rep[@]}" -eq 0 ] || [ "${#threads_tier_l_rep[@]}" -eq 0 ]; }; then
printf "phase2 tier-l representative workloads/threads must not be empty when enabled\n" >&2
exit 1
fi
profile_key() {
case "$1" in
@ -59,14 +78,14 @@ prefill_for() {
local profile="$2"
if [ "${tier}" = "tier-m" ]; then
case "${profile}" in
P2) echo 18302417 ;;
P3) echo 1177348 ;;
P2) echo "${phase2_prefill_tier_m_p2}" ;;
P3) echo "${phase2_prefill_tier_m_p3}" ;;
*) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
esac
elif [ "${tier}" = "tier-l" ]; then
case "${profile}" in
P2) echo 28470427 ;;
P3) echo 1831430 ;;
P2) echo "${phase2_prefill_tier_l_p2}" ;;
P3) echo "${phase2_prefill_tier_l_p3}" ;;
*) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
esac
else
@ -146,5 +165,5 @@ if [ "${run_tier_l_rep}" = "1" ]; then
done
fi
python3 "${script_dir}/phase2_report.py" "${result_file}"
"${python_bin}" "${script_dir}/phase2_report.py" "${result_file}"
printf "Phase 2 finished. Results: %s\n" "${result_file}"

View File

@ -3,16 +3,15 @@
set -euo pipefail
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
printf "Usage: %s <storage_root> [result_csv]\n" "$0"
exit 1
fi
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
root_dir="$(cd -- "${script_dir}/.." && pwd)"
if [[ "$1" != /nvme* ]]; then
printf "db_root must be under /nvme, got: %s\n" "$1" >&2
exit 1
python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
if [ ! -x "${python_bin}" ]; then
python_bin="${PYTHON:-python3}"
fi
db_root="$1"
@ -23,6 +22,13 @@ measure_secs="${MEASURE_SECS:-300}"
repeats="${REPEATS:-5}"
read_path="${READ_PATH:-snapshot}"
phase3_workloads_raw="${PHASE3_WORKLOADS:-W1 W3 W6}"
phase3_threads_raw="${PHASE3_THREADS:-1 12}"
phase3_durabilities_raw="${PHASE3_DURABILITIES:-relaxed durable}"
key_size="${PHASE3_KEY_SIZE:-32}"
value_size="${PHASE3_VALUE_SIZE:-1024}"
prefill_keys="${PHASE3_PREFILL_KEYS:-18302417}" # tier-m P2
mkdir -p "${db_root}"
mkdir -p "$(dirname -- "${result_file}")"
@ -30,12 +36,14 @@ cargo build --release --manifest-path "${root_dir}/Cargo.toml"
(cd "${root_dir}/rocksdb" && cmake --preset release)
(cd "${root_dir}/rocksdb" && cmake --build --preset release)
workloads=(W1 W3 W6)
threads=(1 12)
durabilities=(relaxed durable)
key_size=32
value_size=1024
prefill_keys=18302417 # tier-m P2
IFS=' ' read -r -a workloads <<< "${phase3_workloads_raw}"
IFS=' ' read -r -a threads <<< "${phase3_threads_raw}"
IFS=' ' read -r -a durabilities <<< "${phase3_durabilities_raw}"
if [ "${#workloads[@]}" -eq 0 ] || [ "${#threads[@]}" -eq 0 ] || [ "${#durabilities[@]}" -eq 0 ]; then
printf "phase3 workloads/threads/durabilities must not be empty\n" >&2
exit 1
fi
run_case() {
local engine="$1"
@ -91,5 +99,5 @@ for repeat in $(seq 1 "${repeats}"); do
done
done
python3 "${script_dir}/phase3_report.py" "${result_file}"
"${python_bin}" "${script_dir}/phase3_report.py" "${result_file}"
printf "Phase 3 finished. Results: %s\n" "${result_file}"

View File

@ -2,6 +2,7 @@
import sys
import pandas as pd
import os
def main() -> int:
@ -23,10 +24,12 @@ def main() -> int:
if missing:
raise ValueError(f"Missing columns: {sorted(missing)}")
target_key_size = int(os.getenv("PHASE3_REPORT_KEY_SIZE", "32"))
target_value_size = int(os.getenv("PHASE3_REPORT_VALUE_SIZE", "1024"))
sub = df[
(df["workload_id"].isin(["W1", "W3", "W6"]))
& (df["key_size"] == 32)
& (df["value_size"] == 1024)
& (df["key_size"] == target_key_size)
& (df["value_size"] == target_value_size)
].copy()
if sub.empty:

View File

@ -3,7 +3,7 @@
set -euo pipefail
if [ "$#" -lt 2 ] || [ "$#" -gt 4 ]; then
printf "Usage: %s <engine:mace|rocksdb> <db_path_under_/nvme> [result_csv] [restart_csv]\n" "$0"
printf "Usage: %s <engine:mace|rocksdb> <db_path> [result_csv] [restart_csv]\n" "$0"
exit 1
fi
@ -12,15 +12,14 @@ db_path="$2"
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
root_dir="$(cd -- "${script_dir}/.." && pwd)"
python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}"
if [ ! -x "${python_bin}" ]; then
python_bin="${PYTHON:-python3}"
fi
result_file="${3:-${script_dir}/phase4_results.csv}"
restart_file="${4:-${script_dir}/phase4_restart.csv}"
if [[ "${db_path}" != /nvme* ]]; then
printf "db_path must be under /nvme, got: %s\n" "${db_path}" >&2
exit 1
fi
if [[ "${engine}" != "mace" && "${engine}" != "rocksdb" ]]; then
printf "engine must be mace or rocksdb\n" >&2
exit 1
@ -34,15 +33,17 @@ soak_hours="${SOAK_HOURS:-12}"
crash_interval_secs="${CRASH_INTERVAL_SECS:-1800}"
verify_measure_secs="${VERIFY_MEASURE_SECS:-30}"
run_measure_secs="${RUN_MEASURE_SECS:-3600}"
seed_measure_secs="${SEED_MEASURE_SECS:-5}"
warmup_secs="${WARMUP_SECS:-30}"
max_cycles="${PHASE4_MAX_CYCLES:-0}"
# baseline: tier-m + W3 + P2 + 12 threads
workload_main="W3"
workload_verify="W6"
threads=12
key_size=32
value_size=1024
prefill_keys=18302417
workload_main="${PHASE4_WORKLOAD_MAIN:-W3}"
workload_verify="${PHASE4_WORKLOAD_VERIFY:-W6}"
threads="${PHASE4_THREADS:-12}"
key_size="${PHASE4_KEY_SIZE:-32}"
value_size="${PHASE4_VALUE_SIZE:-1024}"
prefill_keys="${PHASE4_PREFILL_KEYS:-18302417}"
read_path="${READ_PATH:-snapshot}"
durability="${DURABILITY:-relaxed}"
@ -66,6 +67,7 @@ run_cmd() {
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--shared-keyspace \
--no-cleanup \
--read-path "${read_path}" \
--durability "${durability}" \
--reuse-path \
@ -82,6 +84,7 @@ run_cmd() {
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--shared-keyspace \
--no-cleanup \
--read-path "${read_path}" \
--durability "${durability}" \
--reuse-path \
@ -140,6 +143,7 @@ start_run_bg() {
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--shared-keyspace \
--no-cleanup \
--read-path "${read_path}" \
--durability "${durability}" \
--reuse-path \
@ -156,6 +160,7 @@ start_run_bg() {
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--shared-keyspace \
--no-cleanup \
--read-path "${read_path}" \
--durability "${durability}" \
--reuse-path \
@ -204,10 +209,13 @@ fi
# seed dataset once (with prefill)
printf "[phase4][%s] seed dataset at %s\n" "${engine}" "${db_path}"
run_cmd "${workload_main}" 5 0
run_cmd "${workload_main}" "${seed_measure_secs}" 0
cycle=0
while [ "$(date +%s)" -lt "${end_epoch}" ]; do
if [ "${max_cycles}" -gt 0 ] && [ "${cycle}" -ge "${max_cycles}" ]; then
break
fi
cycle="$((cycle + 1))"
cycle_start="$(date +%s)"
printf "[phase4][%s] cycle=%s start=%s\n" "${engine}" "${cycle}" "${cycle_start}"
@ -241,5 +249,5 @@ while [ "$(date +%s)" -lt "${end_epoch}" ]; do
"${cycle}" "${cycle_start}" "${kill_sent}" "${worker_exit}" "${restart_status}" "${restart_ready_ms}" >> "${restart_file}"
done
python3 "${script_dir}/phase4_report.py" "${restart_file}"
"${python_bin}" "${script_dir}/phase4_report.py" "${restart_file}"
printf "Phase 4 soak finished. Results: %s | Restart log: %s\n" "${result_file}" "${restart_file}"

View File

@ -3,7 +3,7 @@
set -euo pipefail
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
printf "Usage: %s <storage_root> [result_csv]\n" "$0"
exit 1
fi
@ -19,11 +19,6 @@ measure_secs="${MEASURE_SECS:-20}"
prefill_keys="${PREFILL_KEYS:-200000}"
read_path="${READ_PATH:-snapshot}"
if [[ "${db_root}" != /nvme* ]]; then
printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
exit 1
fi
mkdir -p "${db_root}"
mkdir -p "$(dirname -- "${result_file}")"

View File

@ -1,4 +1,4 @@
use clap::Parser;
use clap::{ArgAction, Parser};
#[cfg(target_os = "linux")]
use logger::Logger;
use mace::{Mace, Options};
@ -27,7 +27,7 @@ const PREFILL_BATCH: usize = 1024;
#[derive(Parser, Debug, Clone)]
#[command(author, version, about, long_about = None)]
struct Args {
#[arg(short = 'p', long, default_value = "/nvme/kv_bench_mace")]
#[arg(short = 'p', long)]
path: String,
#[arg(short = 'm', long, default_value = "insert")]
@ -60,6 +60,9 @@ struct Args {
#[arg(long, default_value_t = true)]
shared_keyspace: bool,
#[arg(long, action = ArgAction::SetTrue)]
no_shared_keyspace: bool,
#[arg(long, default_value_t = 0)]
prefill_keys: usize,
@ -87,6 +90,9 @@ struct Args {
#[arg(long, default_value_t = true)]
cleanup: bool,
#[arg(long, action = ArgAction::SetTrue)]
no_cleanup: bool,
#[arg(long, default_value_t = false)]
skip_prefill: bool,
@ -416,8 +422,7 @@ fn make_thread_prefix(tid: usize) -> Vec<u8> {
fn latency_bucket(us: u64) -> usize {
let v = us.max(1);
let idx = (63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1);
idx
(63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1)
}
fn histogram_quantile_us(hist: &[u64; LAT_BUCKETS], q: f64) -> u64 {
@ -614,12 +619,14 @@ fn pick_op_kind(rng: &mut StdRng, spec: &WorkloadSpec) -> OpKind {
fn main() {
#[cfg(target_os = "linux")]
{
Logger::init().add_file("/tmp/x.log", true);
log::set_max_level(log::LevelFilter::Info);
Logger::init().add_file("kv_bench.log", true);
log::set_max_level(log::LevelFilter::Error);
}
let args = Args::parse();
let path = Path::new(&args.path);
let shared_keyspace = args.shared_keyspace && !args.no_shared_keyspace;
let cleanup = args.cleanup && !args.no_cleanup;
if args.path.is_empty() {
eprintln!("path is empty");
@ -629,6 +636,17 @@ fn main() {
eprintln!("path {:?} already exists", args.path);
exit(1);
}
if args.skip_prefill && !args.reuse_path {
eprintln!("--skip-prefill requires --reuse-path");
exit(1);
}
if args.skip_prefill && !path.exists() {
eprintln!(
"--skip-prefill requires existing path, but `{}` does not exist",
args.path
);
exit(1);
}
if args.threads == 0 {
eprintln!("threads must be greater than 0");
exit(1);
@ -673,6 +691,12 @@ fn main() {
}
};
let mixed_workload = workload.read_pct > 0 && workload.update_pct > 0;
if mixed_workload && !shared_keyspace {
eprintln!("mixed workloads require shared keyspace");
exit(1);
}
let prefill_keys = if workload.requires_prefill {
if args.prefill_keys > 0 {
args.prefill_keys
@ -689,11 +713,6 @@ fn main() {
}
let thread_prefill_ranges = split_ranges(prefill_keys, args.threads);
let thread_op_ranges = if args.shared_keyspace {
thread_prefill_ranges.clone()
} else {
thread_prefill_ranges.clone()
};
let mut opt = Options::new(path);
opt.sync_on_write = durability_mode == DurabilityMode::Durable;
@ -702,7 +721,7 @@ fn main() {
opt.data_file_size = 64 << 20;
opt.max_log_size = 1 << 30;
opt.default_arenas = 128;
opt.tmp_store = args.cleanup;
opt.tmp_store = cleanup;
let db = Mace::new(opt.validate().unwrap()).unwrap();
db.disable_gc();
@ -716,13 +735,12 @@ fn main() {
let value = Arc::new(vec![b'0'; args.value_size]);
if workload.requires_prefill && !args.skip_prefill {
let mut fill_handles = Vec::with_capacity(args.threads);
for tid in 0..args.threads {
let mut fill_handles = Vec::with_capacity(thread_prefill_ranges.len());
for (tid, tr) in thread_prefill_ranges.iter().copied().enumerate() {
let bucket = bkt.clone();
let v = value.clone();
let key_size = args.key_size;
let shared = args.shared_keyspace;
let tr = thread_prefill_ranges[tid];
let shared = shared_keyspace;
fill_handles.push(std::thread::spawn(move || {
coreid::bind_core(tid);
let mut in_batch = 0usize;
@ -766,22 +784,24 @@ fn main() {
let ins_ctr = Arc::clone(&insert_counter);
let key_size = args.key_size;
let random_insert = args.random;
let read_path = read_path;
let read_path_mode = read_path;
let warmup_secs = args.warmup_secs;
let measure_secs = args.measure_secs;
let distribution = spec.distribution;
let zipf_theta = args.zipf_theta;
let scan_len = spec.scan_len;
let shared = args.shared_keyspace;
let prefill_keys = prefill_keys;
let local_key_len = thread_op_ranges[tid].len;
let shared = shared_keyspace;
let prefill_key_count = prefill_keys;
let local_key_len = thread_prefill_ranges[tid].len;
let local_op_count = op_counts[tid].len;
std::thread::spawn(move || {
coreid::bind_core(tid);
let seed = (now_epoch_ms() as u64)
^ ((tid as u64 + 1) * 0x9E37_79B9_7F4A_7C15)
^ ((prefill_keys as u64) << 7);
^ (tid as u64)
.wrapping_add(1)
.wrapping_mul(0x9E37_79B9_7F4A_7C15)
^ (prefill_key_count as u64).wrapping_shl(7);
let mut rng = StdRng::seed_from_u64(seed);
let mut stats = ThreadStats::default();
let mut local_insert_idx = 0usize;
@ -805,11 +825,11 @@ fn main() {
&spec,
distribution,
zipf_theta,
read_path,
read_path_mode,
key_size,
scan_len,
shared,
prefill_keys,
prefill_key_count,
local_key_len,
tid,
&ins_ctr,
@ -833,11 +853,11 @@ fn main() {
&spec,
distribution,
zipf_theta,
read_path,
read_path_mode,
key_size,
scan_len,
shared,
prefill_keys,
prefill_key_count,
local_key_len,
tid,
&ins_ctr,
@ -861,11 +881,11 @@ fn main() {
&spec,
distribution,
zipf_theta,
read_path,
read_path_mode,
key_size,
scan_len,
shared,
prefill_keys,
prefill_key_count,
local_key_len,
tid,
&ins_ctr,
@ -921,7 +941,7 @@ fn main() {
key_size: args.key_size,
value_size: args.value_size,
prefill_keys,
shared_keyspace: args.shared_keyspace,
shared_keyspace,
distribution: workload.distribution,
zipf_theta: args.zipf_theta,
read_pct: workload.read_pct,