kv_bench/scripts/phase4_soak.sh

141 lines
4.1 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
if [ "$#" -lt 2 ] || [ "$#" -gt 4 ]; then
printf "Usage: %s <engine:mace|rocksdb> <db_path_under_/nvme> [result_csv] [restart_csv]\n" "$0"
exit 1
fi
engine="$1"
db_path="$2"
script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)"
root_dir="$(cd -- "${script_dir}/.." && pwd)"
result_file="${3:-${script_dir}/phase4_results.csv}"
restart_file="${4:-${script_dir}/phase4_restart.csv}"
if [[ "${db_path}" != /nvme* ]]; then
printf "db_path must be under /nvme, got: %s\n" "${db_path}" >&2
exit 1
fi
if [[ "${engine}" != "mace" && "${engine}" != "rocksdb" ]]; then
printf "engine must be mace or rocksdb\n" >&2
exit 1
fi
mkdir -p "$(dirname -- "${db_path}")"
mkdir -p "$(dirname -- "${result_file}")"
mkdir -p "$(dirname -- "${restart_file}")"
soak_hours="${SOAK_HOURS:-12}"
crash_interval_secs="${CRASH_INTERVAL_SECS:-1800}"
verify_measure_secs="${VERIFY_MEASURE_SECS:-30}"
run_measure_secs="${RUN_MEASURE_SECS:-3600}"
warmup_secs="${WARMUP_SECS:-30}"
# baseline: tier-m + W3 + P2 + 12 threads
workload_main="W3"
workload_verify="W6"
threads=12
key_size=32
value_size=1024
prefill_keys=18302417
read_path="${READ_PATH:-snapshot}"
durability="${DURABILITY:-relaxed}"
start_epoch="$(date +%s)"
end_epoch="$((start_epoch + soak_hours * 3600))"
run_cmd() {
local workload="$1"
local measure_secs="$2"
local skip_prefill="$3"
if [ "${engine}" = "mace" ]; then
local cleanup_flag
cleanup_flag="false"
"${root_dir}/target/release/kv_bench" \
--path "${db_path}" \
--workload "${workload}" \
--threads "${threads}" \
--key-size "${key_size}" \
--value-size "${value_size}" \
--prefill-keys "${prefill_keys}" \
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--shared-keyspace true \
--read-path "${read_path}" \
--durability "${durability}" \
--cleanup "${cleanup_flag}" \
--reuse-path \
$( [ "${skip_prefill}" = "1" ] && printf '%s' "--skip-prefill" ) \
--result-file "${result_file}"
else
"${root_dir}/rocksdb/build/release/rocksdb_bench" \
--path "${db_path}" \
--workload "${workload}" \
--threads "${threads}" \
--key-size "${key_size}" \
--value-size "${value_size}" \
--prefill-keys "${prefill_keys}" \
--warmup-secs "${warmup_secs}" \
--measure-secs "${measure_secs}" \
--read-path "${read_path}" \
--durability "${durability}" \
--no-cleanup \
--reuse-path \
$( [ "${skip_prefill}" = "1" ] && printf '%s' "--skip-prefill" ) \
--result-file "${result_file}"
fi
}
if [ ! -f "${restart_file}" ]; then
printf "cycle,start_epoch,kill_sent,worker_exit,restart_status,restart_ready_ms\n" > "${restart_file}"
fi
# seed dataset once (with prefill)
printf "[phase4][%s] seed dataset at %s\n" "${engine}" "${db_path}"
run_cmd "${workload_main}" 5 0
cycle=0
while [ "$(date +%s)" -lt "${end_epoch}" ]; do
cycle="$((cycle + 1))"
cycle_start="$(date +%s)"
printf "[phase4][%s] cycle=%s start=%s\n" "${engine}" "${cycle}" "${cycle_start}"
# long run in background; kill after interval
(run_cmd "${workload_main}" "${run_measure_secs}" 1) &
runner_pid=$!
sleep "${crash_interval_secs}"
kill_sent=0
if kill -0 "${runner_pid}" 2>/dev/null; then
kill -9 "${runner_pid}" || true
kill_sent=1
fi
set +e
wait "${runner_pid}"
worker_exit=$?
set -e
restart_start_ms="$(date +%s%3N)"
restart_status=0
set +e
run_cmd "${workload_verify}" "${verify_measure_secs}" 1
restart_status=$?
set -e
restart_end_ms="$(date +%s%3N)"
restart_ready_ms="$((restart_end_ms - restart_start_ms))"
printf "%s,%s,%s,%s,%s,%s\n" \
"${cycle}" "${cycle_start}" "${kill_sent}" "${worker_exit}" "${restart_status}" "${restart_ready_ms}" >> "${restart_file}"
done
python3 "${script_dir}/phase4_report.py" "${restart_file}"
printf "Phase 4 soak finished. Results: %s | Restart log: %s\n" "${result_file}" "${restart_file}"