From 9e1f1a884ce8dddfcc6e731a90b2a4a90f793b77 Mon Sep 17 00:00:00 2001 From: abbycin Date: Wed, 4 Mar 2026 23:34:12 +0800 Subject: [PATCH] bench: auto thread points and expand kv profiles --- docs/repro.md | 14 +++++++++--- maritx.md | 9 -------- scripts/mace.sh | 11 ++++++++- scripts/phase1.sh | 21 ++++++++++++++---- scripts/phase2.sh | 32 ++++++++++++++++++++++----- scripts/phase3.sh | 7 +++++- scripts/rocksdb.sh | 11 ++++++++- scripts/thread_points.sh | 48 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 128 insertions(+), 25 deletions(-) delete mode 100644 maritx.md create mode 100644 scripts/thread_points.sh diff --git a/docs/repro.md b/docs/repro.md index 5df0216..c9c4cc6 100644 --- a/docs/repro.md +++ b/docs/repro.md @@ -68,6 +68,17 @@ Generate plots: ``` ## 5. Phase Reproduction +Default thread points now follow host CPU count: +- If CPU count is a power of two: `1 2 4 ... N` (e.g., `4 -> 1 2 4`, `8 -> 1 2 4 8`, `16 -> 1 2 4 8 16`) +- Otherwise: odd-number progression up to near-full CPU usage (e.g., `12 -> 1 3 5 7 9 11`) + +You can still override via `PHASE1_THREADS` / `PHASE2_THREADS_TIER_M` / `PHASE3_THREADS`. + +Default KV profiles in scripts: +- `P1`: `16/128` +- `P2`: `32/1024` +- `P3`: `32/4096` +- `P4`: `32/16384` (large-value group) ### 5.1 Phase 1 ```bash @@ -77,7 +88,6 @@ rm -f "${KV_BENCH_RESULT_ROOT}/phase1_results.csv" WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ PHASE1_WORKLOADS="W1 W3 W6" \ -PHASE1_THREADS="1 6" \ PHASE1_PROFILES="P2" \ PHASE1_PREFILL_TIER_S_P2=200000 \ ./scripts/phase1.sh "${KV_BENCH_STORAGE_ROOT}/phase1" "${KV_BENCH_RESULT_ROOT}/phase1_results.csv" @@ -91,7 +101,6 @@ rm -f "${KV_BENCH_RESULT_ROOT}/phase2_results.csv" WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ PHASE2_WORKLOADS_TIER_M="W1 W3 W6" \ -PHASE2_THREADS_TIER_M="1 6" \ PHASE2_PROFILES="P2" \ PHASE2_PREFILL_TIER_M_P2=500000 \ RUN_TIER_L_REPRESENTATIVE=0 \ @@ -113,7 +122,6 @@ rm -f "${KV_BENCH_RESULT_ROOT}/phase3_results.csv" WARMUP_SECS=10 MEASURE_SECS=20 REPEATS=2 \ PHASE3_WORKLOADS="W1 W3" \ -PHASE3_THREADS="1 6" \ PHASE3_DURABILITIES="relaxed durable" \ PHASE3_KEY_SIZE=32 PHASE3_VALUE_SIZE=1024 PHASE3_PREFILL_KEYS=500000 \ ./scripts/phase3.sh "${KV_BENCH_STORAGE_ROOT}/phase3" "${KV_BENCH_RESULT_ROOT}/phase3_results.csv" diff --git a/maritx.md b/maritx.md deleted file mode 100644 index f2ab6ee..0000000 --- a/maritx.md +++ /dev/null @@ -1,9 +0,0 @@ -| 组合类型 | Key 大小 | Value 大小 | 典型场景 | 测试目标 | -|------------------|-------------------|---------------------------|------------------------------|--------------------------------------| -| 小 Key + 小 Value | 10-100 字节 | 100 字节 - 1 KB | 缓存用户信息、配置、热点数据 | 高吞吐量(QPS)、低延迟、内存效率 | -| 小 Key + 大 Value | 10-100 字节 | 1 KB - 数 MB | 存储文档、图片缩略图、日志 | 网络带宽利用率、持久化性能、压缩效率 | -| 大 Key + 小 Value | 1 KB - 10 KB | 100 字节 - 1 KB | 分布式锁、复杂命名空间 | Key 哈希性能、分片均衡性、元数据内存占用 | -| 大 Key + 大 Value | 1 KB - 10 KB | 数 MB - 100 MB | 极端负载测试、BLOB 存储 | 系统极限性能、I/O 吞吐、GC 压力 | -| 可变大小组合 | 10 字节 - 1 KB | 100 字节 - 10 MB | 混合业务负载(如电商系统) | 稳定性、缓存命中率、碎片化影响 | -| 超小 Value | 10-50 字节 | 1-8 字节 | 分布式锁、计数器(如 Redis 原子操作) | 高并发竞争性能、CAS 效率 | -| 超大 Value | 10-100 字节 | 接近系统上限(如 512 MB) | 单 Value 极限测试 | 单线程阻塞风险、序列化/反序列化瓶颈 | \ No newline at end of file diff --git a/scripts/mace.sh b/scripts/mace.sh index 3fff3b3..174dbf2 100755 --- a/scripts/mace.sh +++ b/scripts/mace.sh @@ -9,6 +9,8 @@ fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" +# shellcheck source=./thread_points.sh +. "${script_dir}/thread_points.sh" # The runner creates per-case unique paths under this root; each path must not exist. db_root="$1" @@ -25,9 +27,16 @@ mkdir -p "$(dirname -- "${result_file}")" cargo build --release --manifest-path "${root_dir}/Cargo.toml" workloads=(W1 W2 W3 W4 W5 W6) -threads=(1 6 12) +mace_threads_raw="${MACE_THREADS:-$(default_thread_points)}" +IFS=' ' read -r -a threads <<< "${mace_threads_raw}" +if [ "${#threads[@]}" -eq 0 ]; then + printf "mace threads list must not be empty\n" >&2 + exit 1 +fi profiles=( + "16 128" "32 1024" + "32 4096" "32 16384" ) diff --git a/scripts/phase1.sh b/scripts/phase1.sh index 286c650..c94df17 100755 --- a/scripts/phase1.sh +++ b/scripts/phase1.sh @@ -9,6 +9,8 @@ fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" +# shellcheck source=./thread_points.sh +. "${script_dir}/thread_points.sh" python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" if [ ! -x "${python_bin}" ]; then python_bin="${PYTHON:-python3}" @@ -23,10 +25,15 @@ repeats="${REPEATS:-3}" read_path="${READ_PATH:-snapshot}" phase1_workloads_raw="${PHASE1_WORKLOADS:-W1 W3 W6}" -phase1_threads_raw="${PHASE1_THREADS:-1 12}" -phase1_profiles_raw="${PHASE1_PROFILES:-P2 P3}" +phase1_threads_raw="${PHASE1_THREADS:-}" +if [ -z "${phase1_threads_raw}" ]; then + phase1_threads_raw="$(default_thread_points)" +fi +phase1_profiles_raw="${PHASE1_PROFILES:-P1 P2 P3 P4}" +phase1_prefill_tier_s_p1="${PHASE1_PREFILL_TIER_S_P1:-44739242}" phase1_prefill_tier_s_p2="${PHASE1_PREFILL_TIER_S_P2:-6100805}" -phase1_prefill_tier_s_p3="${PHASE1_PREFILL_TIER_S_P3:-392449}" +phase1_prefill_tier_s_p3="${PHASE1_PREFILL_TIER_S_P3:-1560671}" +phase1_prefill_tier_s_p4="${PHASE1_PREFILL_TIER_S_P4:-392449}" mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" @@ -46,24 +53,30 @@ fi profile_key() { case "$1" in + P1) echo 16 ;; P2) echo 32 ;; P3) echo 32 ;; + P4) echo 32 ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } profile_val() { case "$1" in + P1) echo 128 ;; P2) echo 1024 ;; - P3) echo 16384 ;; + P3) echo 4096 ;; + P4) echo 16384 ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } profile_prefill_tier_s() { case "$1" in + P1) echo "${phase1_prefill_tier_s_p1}" ;; P2) echo "${phase1_prefill_tier_s_p2}" ;; P3) echo "${phase1_prefill_tier_s_p3}" ;; + P4) echo "${phase1_prefill_tier_s_p4}" ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } diff --git a/scripts/phase2.sh b/scripts/phase2.sh index e7638f5..6a2c2a0 100755 --- a/scripts/phase2.sh +++ b/scripts/phase2.sh @@ -9,6 +9,8 @@ fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" +# shellcheck source=./thread_points.sh +. "${script_dir}/thread_points.sh" python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" if [ ! -x "${python_bin}" ]; then python_bin="${PYTHON:-python3}" @@ -26,13 +28,23 @@ tier_l_repeats="${TIER_L_REPEATS:-1}" phase2_workloads_tier_m_raw="${PHASE2_WORKLOADS_TIER_M:-W1 W2 W3 W4 W6}" phase2_workloads_tier_l_rep_raw="${PHASE2_WORKLOADS_TIER_L_REP:-W1 W3 W6}" -phase2_threads_tier_m_raw="${PHASE2_THREADS_TIER_M:-1 6 12}" -phase2_threads_tier_l_rep_raw="${PHASE2_THREADS_TIER_L_REP:-1 12}" -phase2_profiles_raw="${PHASE2_PROFILES:-P2 P3}" +phase2_threads_tier_m_raw="${PHASE2_THREADS_TIER_M:-}" +if [ -z "${phase2_threads_tier_m_raw}" ]; then + phase2_threads_tier_m_raw="$(default_thread_points)" +fi +phase2_threads_tier_l_rep_raw="${PHASE2_THREADS_TIER_L_REP:-}" +if [ -z "${phase2_threads_tier_l_rep_raw}" ]; then + phase2_threads_tier_l_rep_raw="$(default_thread_points)" +fi +phase2_profiles_raw="${PHASE2_PROFILES:-P1 P2 P3 P4}" +phase2_prefill_tier_m_p1="${PHASE2_PREFILL_TIER_M_P1:-134217728}" phase2_prefill_tier_m_p2="${PHASE2_PREFILL_TIER_M_P2:-18302417}" -phase2_prefill_tier_m_p3="${PHASE2_PREFILL_TIER_M_P3:-1177348}" +phase2_prefill_tier_m_p3="${PHASE2_PREFILL_TIER_M_P3:-4682013}" +phase2_prefill_tier_m_p4="${PHASE2_PREFILL_TIER_M_P4:-1177348}" +phase2_prefill_tier_l_p1="${PHASE2_PREFILL_TIER_L_P1:-208783132}" phase2_prefill_tier_l_p2="${PHASE2_PREFILL_TIER_L_P2:-28470427}" -phase2_prefill_tier_l_p3="${PHASE2_PREFILL_TIER_L_P3:-1831430}" +phase2_prefill_tier_l_p3="${PHASE2_PREFILL_TIER_L_P3:-7283132}" +phase2_prefill_tier_l_p4="${PHASE2_PREFILL_TIER_L_P4:-1831430}" mkdir -p "${db_root}" mkdir -p "$(dirname -- "${result_file}")" @@ -59,16 +71,20 @@ fi profile_key() { case "$1" in + P1) echo 16 ;; P2) echo 32 ;; P3) echo 32 ;; + P4) echo 32 ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } profile_val() { case "$1" in + P1) echo 128 ;; P2) echo 1024 ;; - P3) echo 16384 ;; + P3) echo 4096 ;; + P4) echo 16384 ;; *) printf "unknown profile: %s\n" "$1" >&2; exit 1 ;; esac } @@ -78,14 +94,18 @@ prefill_for() { local profile="$2" if [ "${tier}" = "tier-m" ]; then case "${profile}" in + P1) echo "${phase2_prefill_tier_m_p1}" ;; P2) echo "${phase2_prefill_tier_m_p2}" ;; P3) echo "${phase2_prefill_tier_m_p3}" ;; + P4) echo "${phase2_prefill_tier_m_p4}" ;; *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;; esac elif [ "${tier}" = "tier-l" ]; then case "${profile}" in + P1) echo "${phase2_prefill_tier_l_p1}" ;; P2) echo "${phase2_prefill_tier_l_p2}" ;; P3) echo "${phase2_prefill_tier_l_p3}" ;; + P4) echo "${phase2_prefill_tier_l_p4}" ;; *) printf "unknown profile: %s\n" "${profile}" >&2; exit 1 ;; esac else diff --git a/scripts/phase3.sh b/scripts/phase3.sh index 2365e4b..24c67e6 100755 --- a/scripts/phase3.sh +++ b/scripts/phase3.sh @@ -9,6 +9,8 @@ fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" +# shellcheck source=./thread_points.sh +. "${script_dir}/thread_points.sh" python_bin="${PYTHON_BIN:-${root_dir}/scripts/bin/python}" if [ ! -x "${python_bin}" ]; then python_bin="${PYTHON:-python3}" @@ -23,7 +25,10 @@ repeats="${REPEATS:-5}" read_path="${READ_PATH:-snapshot}" phase3_workloads_raw="${PHASE3_WORKLOADS:-W1 W3 W6}" -phase3_threads_raw="${PHASE3_THREADS:-1 12}" +phase3_threads_raw="${PHASE3_THREADS:-}" +if [ -z "${phase3_threads_raw}" ]; then + phase3_threads_raw="$(default_thread_points)" +fi phase3_durabilities_raw="${PHASE3_DURABILITIES:-relaxed durable}" key_size="${PHASE3_KEY_SIZE:-32}" value_size="${PHASE3_VALUE_SIZE:-1024}" diff --git a/scripts/rocksdb.sh b/scripts/rocksdb.sh index df82a80..f436b17 100755 --- a/scripts/rocksdb.sh +++ b/scripts/rocksdb.sh @@ -10,6 +10,8 @@ fi script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)" root_dir="$(cd -- "${script_dir}/.." && pwd)" rocksdb_dir="${root_dir}/rocksdb" +# shellcheck source=./thread_points.sh +. "${script_dir}/thread_points.sh" db_root="$1" result_file="${2:-${script_dir}/benchmark_results.csv}" @@ -26,9 +28,16 @@ mkdir -p "$(dirname -- "${result_file}")" (cd "${rocksdb_dir}" && cmake --build --preset release) workloads=(W1 W2 W3 W4 W5 W6) -threads=(1 6 12) +rocksdb_threads_raw="${ROCKSDB_THREADS:-$(default_thread_points)}" +IFS=' ' read -r -a threads <<< "${rocksdb_threads_raw}" +if [ "${#threads[@]}" -eq 0 ]; then + printf "rocksdb threads list must not be empty\n" >&2 + exit 1 +fi profiles=( + "16 128" "32 1024" + "32 4096" "32 16384" ) diff --git a/scripts/thread_points.sh b/scripts/thread_points.sh new file mode 100644 index 0000000..7b24851 --- /dev/null +++ b/scripts/thread_points.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash + +# Shared helpers for choosing default thread points from host CPU count. + +detect_logical_cpus() { + local detected + + if detected="$(nproc 2>/dev/null)"; then + : + elif detected="$(getconf _NPROCESSORS_ONLN 2>/dev/null)"; then + : + else + detected=1 + fi + + if [[ ! "${detected}" =~ ^[0-9]+$ ]] || [ "${detected}" -lt 1 ]; then + detected=1 + fi + + printf "%s\n" "${detected}" +} + +is_power_of_two() { + local n="$1" + [ "${n}" -gt 0 ] && [ $((n & (n - 1))) -eq 0 ] +} + +default_thread_points() { + local cpu_count="${1:-$(detect_logical_cpus)}" + local points=() + local t + + if is_power_of_two "${cpu_count}"; then + t=1 + while [ "${t}" -le "${cpu_count}" ]; do + points+=("${t}") + t=$((t * 2)) + done + else + t=1 + while [ "${t}" -le "${cpu_count}" ]; do + points+=("${t}") + t=$((t + 2)) + done + fi + + printf "%s\n" "${points[*]}" +}