diff --git a/plan_exec.md b/plan_exec.md index d0c54d5..81b28b8 100644 --- a/plan_exec.md +++ b/plan_exec.md @@ -18,3 +18,21 @@ - 运行烟测: - `mace` 与 `rocksdb` 均可按新参数运行并写入统一 schema 结果文件 - 提交:待本阶段 commit + +## Phase 1(已完成) +- 日期:2026-03-03 +- 范围: + - 新增 `scripts/phase1.sh`:按文档矩阵执行小规模试跑 + - dataset:`tier-s` + - workload:`W1/W3/W6` + - profile:`P2/P3` + - threads:`1/12` + - repeats:默认 `3`(可由 `REPEATS` 覆盖) + - 新增 `scripts/phase1_eval.py`:按 case 聚合并计算 + - throughput CV + - p99 CV + - 稳定性通过率(门槛:throughput CV<=10%, p99 CV<=15%) +- 验证: + - `bash -n scripts/phase1.sh` 通过 + - `python3 -m py_compile scripts/phase1_eval.py` 通过 +- 提交:待本阶段 commit diff --git a/scripts/phase1.sh b/scripts/phase1.sh new file mode 100755 index 0000000..b95f649 --- /dev/null +++ b/scripts/phase1.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +set -euo pipefail + +if [ "$#" -lt 1 ] || [ "$#" -gt 2 ]; then + printf "Usage: %s [result_csv]\n" "$0" + exit 1 +fi + +script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" +root_dir="$(cd -- "${script_dir}/.." && pwd)" + +if [[ "$1" != /nvme* ]]; then + printf "db_root must be under /nvme, got: %s\n" "$1" >&2 + exit 1 +fi + +db_root="$1" +result_file="${2:-${script_dir}/phase1_results.csv}" + +warmup_secs="${WARMUP_SECS:-120}" +measure_secs="${MEASURE_SECS:-300}" +repeats="${REPEATS:-3}" +read_path="${READ_PATH:-snapshot}" + +mkdir -p "${db_root}" +mkdir -p "$(dirname -- "${result_file}")" + +cargo build --release --manifest-path "${root_dir}/Cargo.toml" +(cd "${root_dir}/rocksdb" && cmake --preset release) +(cd "${root_dir}/rocksdb" && cmake --build --preset release) + +workloads=(W1 W3 W6) +threads=(1 12) +profiles=(P2 P3) + +profile_key() { + case "$1" in + P2) echo 32 ;; + P3) echo 32 ;; + *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; + esac +} + +profile_val() { + case "$1" in + P2) echo 1024 ;; + P3) echo 16384 ;; + *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; + esac +} + +profile_prefill_tier_s() { + case "$1" in + P2) echo 6100805 ;; + P3) echo 392449 ;; + *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; + esac +} + +run_case() { + local engine="$1" + local workload="$2" + local profile="$3" + local t="$4" + local repeat="$5" + + local key_size value_size prefill_keys run_path + key_size="$(profile_key "${profile}")" + value_size="$(profile_val "${profile}")" + prefill_keys="$(profile_prefill_tier_s "${profile}")" + run_path="$(mktemp -u -p "${db_root}" "${engine}_phase1_${workload}_${profile}_t${t}_r${repeat}_XXXXXX")" + + printf "[phase1][%s] repeat=%s workload=%s profile=%s threads=%s path=%s\n" \ + "${engine}" "${repeat}" "${workload}" "${profile}" "${t}" "${run_path}" + + if [ "${engine}" = "mace" ]; then + "${root_dir}/target/release/kv_bench" \ + --path "${run_path}" \ + --workload "${workload}" \ + --threads "${t}" \ + --key-size "${key_size}" \ + --value-size "${value_size}" \ + --prefill-keys "${prefill_keys}" \ + --warmup-secs "${warmup_secs}" \ + --measure-secs "${measure_secs}" \ + --shared-keyspace true \ + --read-path "${read_path}" \ + --result-file "${result_file}" + else + "${root_dir}/rocksdb/build/release/rocksdb_bench" \ + --path "${run_path}" \ + --workload "${workload}" \ + --threads "${t}" \ + --key-size "${key_size}" \ + --value-size "${value_size}" \ + --prefill-keys "${prefill_keys}" \ + --warmup-secs "${warmup_secs}" \ + --measure-secs "${measure_secs}" \ + --read-path "${read_path}" \ + --result-file "${result_file}" + fi +} + +for repeat in $(seq 1 "${repeats}"); do + for workload in "${workloads[@]}"; do + for profile in "${profiles[@]}"; do + for t in "${threads[@]}"; do + run_case mace "${workload}" "${profile}" "${t}" "${repeat}" + run_case rocksdb "${workload}" "${profile}" "${t}" "${repeat}" + done + done + done +done + +python3 "${script_dir}/phase1_eval.py" "${result_file}" +printf "Phase 1 finished. Results: %s\n" "${result_file}" diff --git a/scripts/phase1_eval.py b/scripts/phase1_eval.py new file mode 100755 index 0000000..5158948 --- /dev/null +++ b/scripts/phase1_eval.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 + +import sys +import pandas as pd + + +def cv(series: pd.Series) -> float: + mean = float(series.mean()) + if mean == 0: + return 0.0 + return float(series.std(ddof=1) / mean * 100.0) + + +def main() -> int: + if len(sys.argv) != 2: + print(f"Usage: {sys.argv[0]} ") + return 1 + + result_csv = sys.argv[1] + df = pd.read_csv(result_csv) + + needed = { + "engine", + "workload_id", + "key_size", + "value_size", + "threads", + "ops_per_sec", + "p99_us", + } + missing = needed - set(df.columns) + if missing: + raise ValueError(f"Missing columns: {sorted(missing)}") + + sub = df[df["workload_id"].isin(["W1", "W3", "W6"])].copy() + if sub.empty: + print("No phase1 rows found in csv") + return 0 + + grp_cols = ["engine", "workload_id", "key_size", "value_size", "threads"] + agg = ( + sub.groupby(grp_cols) + .agg( + repeats=("ops_per_sec", "count"), + throughput_cv=("ops_per_sec", cv), + p99_cv=("p99_us", cv), + throughput_median=("ops_per_sec", "median"), + p99_median=("p99_us", "median"), + ) + .reset_index() + ) + agg["stable"] = (agg["throughput_cv"] <= 10.0) & (agg["p99_cv"] <= 15.0) + + stable_ratio = float(agg["stable"].mean() * 100.0) if len(agg) > 0 else 0.0 + + with pd.option_context("display.max_rows", None, "display.max_columns", None): + print(agg.to_string(index=False)) + print(f"\nStable cases: {stable_ratio:.1f}% ({agg['stable'].sum()}/{len(agg)})") + + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())