#!/usr/bin/env python3 import sys import pandas as pd import os def main() -> int: if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") return 1 df = pd.read_csv(sys.argv[1]) needed = { "engine", "workload_id", "threads", "durability_mode", "ops", "p99_us", } missing = needed - set(df.columns) if missing: raise ValueError(f"Missing columns: {sorted(missing)}") target_key_size = int(os.getenv("PHASE3_REPORT_KEY_SIZE", "32")) target_value_size = int(os.getenv("PHASE3_REPORT_VALUE_SIZE", "1024")) sub = df[ (df["workload_id"].isin(["W1", "W3", "W6"])) & (df["key_size"] == target_key_size) & (df["value_size"] == target_value_size) ].copy() if sub.empty: print("No phase3 rows found in csv") return 0 base = ( sub.groupby(["engine", "workload_id", "threads", "durability_mode"]) .agg( repeats=("ops", "count"), throughput_median=("ops", "median"), p99_median=("p99_us", "median"), ) .reset_index() ) with pd.option_context("display.max_rows", None, "display.max_columns", None): print(base.to_string(index=False)) piv_tput = base.pivot_table( index=["engine", "workload_id", "threads"], columns="durability_mode", values="throughput_median", aggfunc="first", ).reset_index() piv_p99 = base.pivot_table( index=["engine", "workload_id", "threads"], columns="durability_mode", values="p99_median", aggfunc="first", ).reset_index() merged = piv_tput.merge( piv_p99, on=["engine", "workload_id", "threads"], suffixes=("_tput", "_p99"), ) for col in ["relaxed_tput", "durable_tput", "relaxed_p99", "durable_p99"]: if col not in merged.columns: merged[col] = pd.NA merged["throughput_drop_pct"] = ( (1.0 - (merged["durable_tput"] / merged["relaxed_tput"])) * 100.0 ) merged["p99_inflation_pct"] = ( ((merged["durable_p99"] / merged["relaxed_p99"]) - 1.0) * 100.0 ) print("\nDurability cost summary:") print( merged[ [ "engine", "workload_id", "threads", "relaxed_tput", "durable_tput", "throughput_drop_pct", "relaxed_p99", "durable_p99", "p99_inflation_pct", ] ].to_string(index=False) ) return 0 if __name__ == "__main__": raise SystemExit(main())