phase0: align benchmark v2 workload protocol
This commit is contained in:
parent
abf82f735c
commit
0649db54e7
20
plan_exec.md
Normal file
20
plan_exec.md
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
# kv_bench 执行记录(benchmark_refactor)
|
||||||
|
|
||||||
|
## Phase 0(已完成)
|
||||||
|
- 日期:2026-03-03
|
||||||
|
- 范围:
|
||||||
|
- 重构 `src/main.rs` 与 `rocksdb/main.cpp`,完成 v2 方法学最小清单:
|
||||||
|
- workload preset:`W1..W6`
|
||||||
|
- mixed/read/scan 的 prefill + shared keyspace
|
||||||
|
- 时长模式:`--warmup-secs` / `--measure-secs`
|
||||||
|
- 显式 read path parity:`--read-path snapshot|rw_txn`
|
||||||
|
- 统一 schema 结果落盘(CSV)并自动附带机器/环境元数据
|
||||||
|
- 更新脚本:`scripts/mace.sh`、`scripts/rocksdb.sh`、`scripts/plot.py`、`scripts/init.sh`
|
||||||
|
- 默认数据目录切换为 `/nvme` 体系(脚本强制 db_root 在 `/nvme` 下)
|
||||||
|
- 编译验证:
|
||||||
|
- `cargo check -q` 通过
|
||||||
|
- `cargo build --release -q` 通过
|
||||||
|
- `cmake --build --preset release -j` 通过
|
||||||
|
- 运行烟测:
|
||||||
|
- `mace` 与 `rocksdb` 均可按新参数运行并写入统一 schema 结果文件
|
||||||
|
- 提交:待本阶段 commit
|
||||||
1061
rocksdb/main.cpp
1061
rocksdb/main.cpp
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
|||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
python3 -m venv .
|
python3 -m venv .
|
||||||
./bin/pip3 install pandas matplotlib adjustText
|
./bin/pip3 install pandas matplotlib
|
||||||
rm -f .gitignore
|
rm -f .gitignore
|
||||||
|
|||||||
@ -2,55 +2,61 @@
|
|||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
if [ "$#" -ne 1 ]
|
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
||||||
then
|
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
|
||||||
printf "\033[m$0 path\033[0m\n"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
root_dir="$(cd -- "${script_dir}/.." && pwd)"
|
root_dir="$(cd -- "${script_dir}/.." && pwd)"
|
||||||
|
|
||||||
cargo build --release --manifest-path "${root_dir}/Cargo.toml" 1>/dev/null 2>/dev/null
|
# The runner creates per-case unique paths under this root; each path must not exist.
|
||||||
|
db_root="$1"
|
||||||
|
result_file="${2:-${script_dir}/benchmark_results.csv}"
|
||||||
|
|
||||||
function samples() {
|
warmup_secs="${WARMUP_SECS:-10}"
|
||||||
export RUST_BACKTRACE=full
|
measure_secs="${MEASURE_SECS:-20}"
|
||||||
kv_sz=(16 16 100 1024 1024 1024 16 10240)
|
prefill_keys="${PREFILL_KEYS:-200000}"
|
||||||
mode=(insert get mixed scan)
|
read_path="${READ_PATH:-snapshot}"
|
||||||
# set -x
|
|
||||||
db_root="$1"
|
|
||||||
|
|
||||||
cnt=100000
|
if [[ "${db_root}" != /nvme* ]]; then
|
||||||
for ((i = 1; i <= $(nproc); i *= 2))
|
printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
|
||||||
do
|
exit 1
|
||||||
for ((j = 0; j < ${#kv_sz[@]}; j += 2))
|
|
||||||
do
|
|
||||||
for ((k = 0; k < ${#mode[@]}; k += 1))
|
|
||||||
do
|
|
||||||
if [ "${mode[k]}" == "insert" ]
|
|
||||||
then
|
|
||||||
"${root_dir}/target/release/kv_bench" --path "${db_root}" --threads "${i}" --iterations "${cnt}" --mode "${mode[k]}" --key-size "${kv_sz[j]}" --value-size "${kv_sz[j+1]}" --random
|
|
||||||
if test $? -ne 0
|
|
||||||
then
|
|
||||||
echo "${mode[k]} threads $i ksz ${kv_sz[j]} vsz ${kv_sz[j+1]} random fail"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
"${root_dir}/target/release/kv_bench" --path "${db_root}" --threads "${i}" --iterations "${cnt}" --mode "${mode[k]}" --key-size "${kv_sz[j]}" --value-size "${kv_sz[j+1]}"
|
|
||||||
if test $? -ne 0
|
|
||||||
then
|
|
||||||
echo "${mode[k]} threads $i ksz ${kv_sz[j]} vsz ${kv_sz[j+1]} fail"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
echo mode,threads,key_size,value_size,insert_ratio,ops,elapsed_us > "${script_dir}/mace.csv"
|
|
||||||
samples "$1" 1>> "${script_dir}/mace.csv"
|
|
||||||
if [ -x "${script_dir}/bin/python" ]; then
|
|
||||||
(cd "${script_dir}" && "${script_dir}/bin/python" plot.py mace.csv)
|
|
||||||
else
|
|
||||||
(cd "${script_dir}" && python3 plot.py mace.csv)
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
mkdir -p "${db_root}"
|
||||||
|
mkdir -p "$(dirname -- "${result_file}")"
|
||||||
|
|
||||||
|
cargo build --release --manifest-path "${root_dir}/Cargo.toml"
|
||||||
|
|
||||||
|
workloads=(W1 W2 W3 W4 W5 W6)
|
||||||
|
threads=(1 6 12)
|
||||||
|
profiles=(
|
||||||
|
"32 1024"
|
||||||
|
"32 16384"
|
||||||
|
)
|
||||||
|
|
||||||
|
for workload in "${workloads[@]}"; do
|
||||||
|
for t in "${threads[@]}"; do
|
||||||
|
for kv in "${profiles[@]}"; do
|
||||||
|
read -r key_size value_size <<< "${kv}"
|
||||||
|
run_path="$(mktemp -u -p "${db_root}" "mace_${workload}_${t}_${key_size}_${value_size}_XXXXXX")"
|
||||||
|
printf "[mace] workload=%s threads=%s key=%s value=%s path=%s\n" \
|
||||||
|
"${workload}" "${t}" "${key_size}" "${value_size}" "${run_path}"
|
||||||
|
"${root_dir}/target/release/kv_bench" \
|
||||||
|
--path "${run_path}" \
|
||||||
|
--workload "${workload}" \
|
||||||
|
--threads "${t}" \
|
||||||
|
--key-size "${key_size}" \
|
||||||
|
--value-size "${value_size}" \
|
||||||
|
--prefill-keys "${prefill_keys}" \
|
||||||
|
--warmup-secs "${warmup_secs}" \
|
||||||
|
--measure-secs "${measure_secs}" \
|
||||||
|
--shared-keyspace true \
|
||||||
|
--read-path "${read_path}" \
|
||||||
|
--result-file "${result_file}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
printf "Mace runs finished. Results appended to: %s\n" "${result_file}"
|
||||||
|
|||||||
136
scripts/plot.py
136
scripts/plot.py
@ -1,65 +1,81 @@
|
|||||||
import pandas as pd
|
#!/usr/bin/env python3
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
from adjustText import adjust_text
|
|
||||||
import sys
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
def real_mode(m):
|
def main() -> int:
|
||||||
if m == "mixed":
|
if len(sys.argv) not in (2, 3):
|
||||||
return "Mixed (70% Get, 30% Insert)"
|
print(f"Usage: {sys.argv[0]} <result_csv> [output_dir]")
|
||||||
elif m == "get":
|
return 1
|
||||||
return "Random Get"
|
|
||||||
elif m == "scan":
|
result_csv = Path(sys.argv[1])
|
||||||
return "Sequential Scan"
|
output_dir = Path(sys.argv[2]) if len(sys.argv) == 3 else result_csv.parent
|
||||||
return m.capitalize()
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
df = pd.read_csv(result_csv)
|
||||||
|
|
||||||
|
required = {
|
||||||
|
"engine",
|
||||||
|
"workload_id",
|
||||||
|
"threads",
|
||||||
|
"key_size",
|
||||||
|
"value_size",
|
||||||
|
"ops_per_sec",
|
||||||
|
"p99_us",
|
||||||
|
}
|
||||||
|
missing = required - set(df.columns)
|
||||||
|
if missing:
|
||||||
|
raise ValueError(f"Missing required columns: {sorted(missing)}")
|
||||||
|
|
||||||
|
for engine in sorted(df["engine"].unique()):
|
||||||
|
engine_df = df[df["engine"] == engine]
|
||||||
|
profiles = (
|
||||||
|
engine_df[["key_size", "value_size"]]
|
||||||
|
.drop_duplicates()
|
||||||
|
.sort_values(["key_size", "value_size"])
|
||||||
|
.itertuples(index=False)
|
||||||
|
)
|
||||||
|
|
||||||
|
for key_size, value_size in profiles:
|
||||||
|
sub = engine_df[
|
||||||
|
(engine_df["key_size"] == key_size)
|
||||||
|
& (engine_df["value_size"] == value_size)
|
||||||
|
]
|
||||||
|
if sub.empty:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for metric, ylabel in (("ops_per_sec", "OPS/s"), ("p99_us", "P99 Latency (us)")):
|
||||||
|
plt.figure(figsize=(12, 7))
|
||||||
|
for workload in sorted(sub["workload_id"].unique()):
|
||||||
|
wdf = sub[sub["workload_id"] == workload].sort_values("threads")
|
||||||
|
plt.plot(
|
||||||
|
wdf["threads"],
|
||||||
|
wdf[metric],
|
||||||
|
marker="o",
|
||||||
|
linewidth=2,
|
||||||
|
label=workload,
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.title(
|
||||||
|
f"{engine.upper()} {metric} (key={key_size}, value={value_size})",
|
||||||
|
fontsize=14,
|
||||||
|
)
|
||||||
|
plt.xlabel("Threads")
|
||||||
|
plt.ylabel(ylabel)
|
||||||
|
plt.grid(True, linestyle="--", alpha=0.5)
|
||||||
|
plt.legend()
|
||||||
|
plt.tight_layout()
|
||||||
|
out = output_dir / f"{engine}_{metric}_k{key_size}_v{value_size}.png"
|
||||||
|
plt.savefig(out)
|
||||||
|
plt.close()
|
||||||
|
|
||||||
|
print(f"Charts written to: {output_dir}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
name = sys.argv[1]
|
if __name__ == "__main__":
|
||||||
prefix = name.split(".")[0]
|
raise SystemExit(main())
|
||||||
|
|
||||||
# read benchmark data
|
|
||||||
# keep compatibility with older csv files that used elapsed/elasped
|
|
||||||
# and normalize to elapsed_us
|
|
||||||
|
|
||||||
df = pd.read_csv(f"./{name}")
|
|
||||||
if "elapsed_us" not in df.columns:
|
|
||||||
if "elapsed" in df.columns:
|
|
||||||
df = df.rename(columns={"elapsed": "elapsed_us"})
|
|
||||||
elif "elasped" in df.columns:
|
|
||||||
df = df.rename(columns={"elasped": "elapsed_us"})
|
|
||||||
|
|
||||||
# group by mode
|
|
||||||
modes = df["mode"].unique()
|
|
||||||
|
|
||||||
for mode in modes:
|
|
||||||
plt.figure(figsize=(16, 9))
|
|
||||||
subset = df[df["mode"] == mode]
|
|
||||||
|
|
||||||
# group by key/value size
|
|
||||||
key_value_combinations = subset.groupby(["key_size", "value_size"])
|
|
||||||
|
|
||||||
texts = []
|
|
||||||
for (key_size, value_size), group in key_value_combinations:
|
|
||||||
label = f"key={key_size}B, val={value_size}B"
|
|
||||||
x = group["threads"]
|
|
||||||
y = group["ops"]
|
|
||||||
|
|
||||||
# draw line
|
|
||||||
line, = plt.plot(x, y, marker="o", label=label)
|
|
||||||
|
|
||||||
# add labels
|
|
||||||
for xi, yi, ops in zip(x, y, group["ops"]):
|
|
||||||
texts.append(
|
|
||||||
plt.text(xi, yi, f"{int(ops)}", color=line.get_color(), fontsize=12)
|
|
||||||
)
|
|
||||||
|
|
||||||
adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray"))
|
|
||||||
|
|
||||||
plt.title(f"{prefix.upper()}: {real_mode(mode)}", fontsize=16)
|
|
||||||
plt.xlabel("Threads", fontsize=14)
|
|
||||||
plt.ylabel("OPS", fontsize=14)
|
|
||||||
plt.grid(True, linestyle="--", alpha=0.6)
|
|
||||||
plt.legend()
|
|
||||||
plt.tight_layout()
|
|
||||||
plt.savefig(f"{prefix}_{mode}.png")
|
|
||||||
plt.close()
|
|
||||||
|
|||||||
@ -2,55 +2,61 @@
|
|||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
if [ "$#" -ne 1 ]
|
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
||||||
then
|
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
|
||||||
printf "\033[m$0 path\033[0m\n"
|
exit 1
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
root_dir="$(cd -- "${script_dir}/.." && pwd)"
|
root_dir="$(cd -- "${script_dir}/.." && pwd)"
|
||||||
rocksdb_dir="${root_dir}/rocksdb"
|
rocksdb_dir="${root_dir}/rocksdb"
|
||||||
|
|
||||||
(cd "${rocksdb_dir}" && cmake --preset release 1>/dev/null 2>/dev/null)
|
db_root="$1"
|
||||||
(cd "${rocksdb_dir}" && cmake --build --preset release 1>/dev/null 2>/dev/null)
|
result_file="${2:-${script_dir}/benchmark_results.csv}"
|
||||||
|
|
||||||
function samples() {
|
warmup_secs="${WARMUP_SECS:-10}"
|
||||||
kv_sz=(16 16 100 1024 1024 1024 16 10240)
|
measure_secs="${MEASURE_SECS:-20}"
|
||||||
mode=(insert get mixed scan)
|
prefill_keys="${PREFILL_KEYS:-200000}"
|
||||||
# set -x
|
read_path="${READ_PATH:-snapshot}"
|
||||||
db_root="$1"
|
|
||||||
cnt=100000
|
|
||||||
for ((i = 1; i <= $(nproc); i *= 2))
|
|
||||||
do
|
|
||||||
for ((j = 0; j < ${#kv_sz[@]}; j += 2))
|
|
||||||
do
|
|
||||||
for ((k = 0; k < ${#mode[@]}; k += 1))
|
|
||||||
do
|
|
||||||
if [ "${mode[k]}" == "insert" ]
|
|
||||||
then
|
|
||||||
"${rocksdb_dir}/build/release/rocksdb_bench" --path "${db_root}" --threads "${i}" --iterations "${cnt}" --mode "${mode[k]}" --key-size "${kv_sz[j]}" --value-size "${kv_sz[j+1]}" --random
|
|
||||||
if test $? -ne 0
|
|
||||||
then
|
|
||||||
echo "${mode[k]} threads $i ksz ${kv_sz[j]} vsz ${kv_sz[j+1]} random fail"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
"${rocksdb_dir}/build/release/rocksdb_bench" --path "${db_root}" --threads "${i}" --iterations "${cnt}" --mode "${mode[k]}" --key-size "${kv_sz[j]}" --value-size "${kv_sz[j+1]}"
|
|
||||||
if test $? -ne 0
|
|
||||||
then
|
|
||||||
echo "${mode[k]} threads $i ksz ${kv_sz[j]} vsz ${kv_sz[j+1]} fail"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
done
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
echo mode,threads,key_size,value_size,insert_ratio,ops,elapsed_us > "${script_dir}/rocksdb.csv"
|
if [[ "${db_root}" != /nvme* ]]; then
|
||||||
samples "$1" 1>> "${script_dir}/rocksdb.csv"
|
printf "db_root must be under /nvme, got: %s\n" "${db_root}" >&2
|
||||||
if [ -x "${script_dir}/bin/python" ]; then
|
exit 1
|
||||||
(cd "${script_dir}" && "${script_dir}/bin/python" plot.py rocksdb.csv)
|
|
||||||
else
|
|
||||||
(cd "${script_dir}" && python3 plot.py rocksdb.csv)
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
mkdir -p "${db_root}"
|
||||||
|
mkdir -p "$(dirname -- "${result_file}")"
|
||||||
|
|
||||||
|
(cd "${rocksdb_dir}" && cmake --preset release)
|
||||||
|
(cd "${rocksdb_dir}" && cmake --build --preset release)
|
||||||
|
|
||||||
|
workloads=(W1 W2 W3 W4 W5 W6)
|
||||||
|
threads=(1 6 12)
|
||||||
|
profiles=(
|
||||||
|
"32 1024"
|
||||||
|
"32 16384"
|
||||||
|
)
|
||||||
|
|
||||||
|
for workload in "${workloads[@]}"; do
|
||||||
|
for t in "${threads[@]}"; do
|
||||||
|
for kv in "${profiles[@]}"; do
|
||||||
|
read -r key_size value_size <<< "${kv}"
|
||||||
|
run_path="$(mktemp -u -p "${db_root}" "rocksdb_${workload}_${t}_${key_size}_${value_size}_XXXXXX")"
|
||||||
|
printf "[rocksdb] workload=%s threads=%s key=%s value=%s path=%s\n" \
|
||||||
|
"${workload}" "${t}" "${key_size}" "${value_size}" "${run_path}"
|
||||||
|
"${rocksdb_dir}/build/release/rocksdb_bench" \
|
||||||
|
--path "${run_path}" \
|
||||||
|
--workload "${workload}" \
|
||||||
|
--threads "${t}" \
|
||||||
|
--key-size "${key_size}" \
|
||||||
|
--value-size "${value_size}" \
|
||||||
|
--prefill-keys "${prefill_keys}" \
|
||||||
|
--warmup-secs "${warmup_secs}" \
|
||||||
|
--measure-secs "${measure_secs}" \
|
||||||
|
--read-path "${read_path}" \
|
||||||
|
--result-file "${result_file}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
printf "RocksDB runs finished. Results appended to: %s\n" "${result_file}"
|
||||||
|
|||||||
1115
src/main.rs
1115
src/main.rs
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user