phase2: add steady-state matrix runner and report
This commit is contained in:
parent
436e81353f
commit
f0fd573d96
12
plan_exec.md
12
plan_exec.md
@ -36,3 +36,15 @@
|
||||
- `bash -n scripts/phase1.sh` 通过
|
||||
- `python3 -m py_compile scripts/phase1_eval.py` 通过
|
||||
- 提交:待本阶段 commit
|
||||
|
||||
## Phase 2(已完成)
|
||||
- 日期:2026-03-03
|
||||
- 范围:
|
||||
- 新增 `scripts/phase2.sh`:稳态核心报告矩阵执行器
|
||||
- `tier-m` 全量:`W1/W2/W3/W4/W6` × `P2/P3` × `threads(1/6/12)` × `repeats(默认5)`
|
||||
- 可选 `tier-l` 代表集:`RUN_TIER_L_REPRESENTATIVE=1` 启用,默认 `TIER_L_REPEATS=1`
|
||||
- 新增 `scripts/phase2_report.py`:输出按 case 的 `throughput/p95/p99 median`,并给出慢场景对比表
|
||||
- 验证:
|
||||
- `bash -n scripts/phase2.sh` 通过
|
||||
- `python3 -m py_compile scripts/phase2_report.py` 通过
|
||||
- 提交:待本阶段 commit
|
||||
|
||||
150
scripts/phase2.sh
Executable file
150
scripts/phase2.sh
Executable file
@ -0,0 +1,150 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then
|
||||
printf "Usage: %s <db_root_under_/nvme> [result_csv]\n" "$0"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
|
||||
root_dir="$(cd -- "${script_dir}/.." && pwd)"
|
||||
|
||||
if [[ "$1" != /nvme* ]]; then
|
||||
printf "db_root must be under /nvme, got: %s\n" "$1" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
db_root="$1"
|
||||
result_file="${2:-${script_dir}/phase2_results.csv}"
|
||||
|
||||
warmup_secs="${WARMUP_SECS:-120}"
|
||||
measure_secs="${MEASURE_SECS:-300}"
|
||||
repeats="${REPEATS:-5}"
|
||||
read_path="${READ_PATH:-snapshot}"
|
||||
run_tier_l_rep="${RUN_TIER_L_REPRESENTATIVE:-0}"
|
||||
tier_l_repeats="${TIER_L_REPEATS:-1}"
|
||||
|
||||
mkdir -p "${db_root}"
|
||||
mkdir -p "$(dirname -- "${result_file}")"
|
||||
|
||||
cargo build --release --manifest-path "${root_dir}/Cargo.toml"
|
||||
(cd "${root_dir}/rocksdb" && cmake --preset release)
|
||||
(cd "${root_dir}/rocksdb" && cmake --build --preset release)
|
||||
|
||||
workloads_tier_m=(W1 W2 W3 W4 W6)
|
||||
workloads_tier_l_rep=(W1 W3 W6)
|
||||
threads_tier_m=(1 6 12)
|
||||
threads_tier_l_rep=(1 12)
|
||||
profiles=(P2 P3)
|
||||
|
||||
profile_key() {
|
||||
case "$1" in
|
||||
P2) echo 32 ;;
|
||||
P3) echo 32 ;;
|
||||
*) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
profile_val() {
|
||||
case "$1" in
|
||||
P2) echo 1024 ;;
|
||||
P3) echo 16384 ;;
|
||||
*) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
prefill_for() {
|
||||
local tier="$1"
|
||||
local profile="$2"
|
||||
if [ "${tier}" = "tier-m" ]; then
|
||||
case "${profile}" in
|
||||
P2) echo 18302417 ;;
|
||||
P3) echo 1177348 ;;
|
||||
*) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
|
||||
esac
|
||||
elif [ "${tier}" = "tier-l" ]; then
|
||||
case "${profile}" in
|
||||
P2) echo 28470427 ;;
|
||||
P3) echo 1831430 ;;
|
||||
*) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;;
|
||||
esac
|
||||
else
|
||||
printf "unknown tier: %s\n" "${tier}" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_case() {
|
||||
local engine="$1"
|
||||
local tier="$2"
|
||||
local workload="$3"
|
||||
local profile="$4"
|
||||
local t="$5"
|
||||
local repeat="$6"
|
||||
|
||||
local key_size value_size prefill_keys run_path
|
||||
key_size="$(profile_key "${profile}")"
|
||||
value_size="$(profile_val "${profile}")"
|
||||
prefill_keys="$(prefill_for "${tier}" "${profile}")"
|
||||
run_path="$(mktemp -u -p "${db_root}" "${engine}_phase2_${tier}_${workload}_${profile}_t${t}_r${repeat}_XXXXXX")"
|
||||
|
||||
printf "[phase2][%s] tier=%s repeat=%s workload=%s profile=%s threads=%s path=%s\n" \
|
||||
"${engine}" "${tier}" "${repeat}" "${workload}" "${profile}" "${t}" "${run_path}"
|
||||
|
||||
if [ "${engine}" = "mace" ]; then
|
||||
"${root_dir}/target/release/kv_bench" \
|
||||
--path "${run_path}" \
|
||||
--workload "${workload}" \
|
||||
--threads "${t}" \
|
||||
--key-size "${key_size}" \
|
||||
--value-size "${value_size}" \
|
||||
--prefill-keys "${prefill_keys}" \
|
||||
--warmup-secs "${warmup_secs}" \
|
||||
--measure-secs "${measure_secs}" \
|
||||
--shared-keyspace true \
|
||||
--read-path "${read_path}" \
|
||||
--result-file "${result_file}"
|
||||
else
|
||||
"${root_dir}/rocksdb/build/release/rocksdb_bench" \
|
||||
--path "${run_path}" \
|
||||
--workload "${workload}" \
|
||||
--threads "${t}" \
|
||||
--key-size "${key_size}" \
|
||||
--value-size "${value_size}" \
|
||||
--prefill-keys "${prefill_keys}" \
|
||||
--warmup-secs "${warmup_secs}" \
|
||||
--measure-secs "${measure_secs}" \
|
||||
--read-path "${read_path}" \
|
||||
--result-file "${result_file}"
|
||||
fi
|
||||
}
|
||||
|
||||
# tier-m full matrix
|
||||
for repeat in $(seq 1 "${repeats}"); do
|
||||
for workload in "${workloads_tier_m[@]}"; do
|
||||
for profile in "${profiles[@]}"; do
|
||||
for t in "${threads_tier_m[@]}"; do
|
||||
run_case mace tier-m "${workload}" "${profile}" "${t}" "${repeat}"
|
||||
run_case rocksdb tier-m "${workload}" "${profile}" "${t}" "${repeat}"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
# tier-l representative subset (optional)
|
||||
if [ "${run_tier_l_rep}" = "1" ]; then
|
||||
for repeat in $(seq 1 "${tier_l_repeats}"); do
|
||||
for workload in "${workloads_tier_l_rep[@]}"; do
|
||||
for profile in "${profiles[@]}"; do
|
||||
for t in "${threads_tier_l_rep[@]}"; do
|
||||
run_case mace tier-l "${workload}" "${profile}" "${t}" "${repeat}"
|
||||
run_case rocksdb tier-l "${workload}" "${profile}" "${t}" "${repeat}"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
python3 "${script_dir}/phase2_report.py" "${result_file}"
|
||||
printf "Phase 2 finished. Results: %s\n" "${result_file}"
|
||||
105
scripts/phase2_report.py
Executable file
105
scripts/phase2_report.py
Executable file
@ -0,0 +1,105 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def infer_tier(row: pd.Series) -> str:
|
||||
key = int(row["key_size"])
|
||||
val = int(row["value_size"])
|
||||
prefill = int(row["prefill_keys"])
|
||||
|
||||
table = {
|
||||
(32, 1024, 18302417): "tier-m",
|
||||
(32, 16384, 1177348): "tier-m",
|
||||
(32, 1024, 28470427): "tier-l",
|
||||
(32, 16384, 1831430): "tier-l",
|
||||
}
|
||||
return table.get((key, val, prefill), "unknown")
|
||||
|
||||
|
||||
def main() -> int:
|
||||
if len(sys.argv) != 2:
|
||||
print(f"Usage: {sys.argv[0]} <result_csv>")
|
||||
return 1
|
||||
|
||||
df = pd.read_csv(sys.argv[1])
|
||||
|
||||
needed = {
|
||||
"engine",
|
||||
"workload_id",
|
||||
"key_size",
|
||||
"value_size",
|
||||
"prefill_keys",
|
||||
"threads",
|
||||
"ops_per_sec",
|
||||
"p95_us",
|
||||
"p99_us",
|
||||
}
|
||||
missing = needed - set(df.columns)
|
||||
if missing:
|
||||
raise ValueError(f"Missing columns: {sorted(missing)}")
|
||||
|
||||
sub = df[df["workload_id"].isin(["W1", "W2", "W3", "W4", "W6"])].copy()
|
||||
if sub.empty:
|
||||
print("No phase2 rows found in csv")
|
||||
return 0
|
||||
|
||||
sub["tier"] = sub.apply(infer_tier, axis=1)
|
||||
|
||||
grp_cols = ["tier", "engine", "workload_id", "key_size", "value_size", "threads"]
|
||||
summary = (
|
||||
sub.groupby(grp_cols)
|
||||
.agg(
|
||||
repeats=("ops_per_sec", "count"),
|
||||
throughput_median=("ops_per_sec", "median"),
|
||||
p95_median=("p95_us", "median"),
|
||||
p99_median=("p99_us", "median"),
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
with pd.option_context("display.max_rows", None, "display.max_columns", None):
|
||||
print(summary.to_string(index=False))
|
||||
|
||||
# Slow-scenario extraction by throughput median.
|
||||
piv = summary.pivot_table(
|
||||
index=["tier", "workload_id", "key_size", "value_size", "threads"],
|
||||
columns="engine",
|
||||
values="throughput_median",
|
||||
aggfunc="first",
|
||||
).reset_index()
|
||||
|
||||
if {"mace", "rocksdb"}.issubset(set(piv.columns)):
|
||||
piv["slower_engine"] = piv.apply(
|
||||
lambda r: "mace" if r["mace"] < r["rocksdb"] else "rocksdb",
|
||||
axis=1,
|
||||
)
|
||||
piv["slower_ratio"] = piv.apply(
|
||||
lambda r: min(r["mace"], r["rocksdb"]) / max(r["mace"], r["rocksdb"])
|
||||
if max(r["mace"], r["rocksdb"]) > 0
|
||||
else 0.0,
|
||||
axis=1,
|
||||
)
|
||||
print("\nSlow scenarios (by throughput median):")
|
||||
print(
|
||||
piv[
|
||||
[
|
||||
"tier",
|
||||
"workload_id",
|
||||
"key_size",
|
||||
"value_size",
|
||||
"threads",
|
||||
"mace",
|
||||
"rocksdb",
|
||||
"slower_engine",
|
||||
"slower_ratio",
|
||||
]
|
||||
].to_string(index=False)
|
||||
)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Loading…
Reference in New Issue
Block a user