Clarify workloads and comparison filtering
This commit is contained in:
parent
9a4abb7dba
commit
f0044d1d62
16
README.md
16
README.md
@ -47,14 +47,16 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r
|
|||||||
- Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path`
|
- Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path`
|
||||||
- Fairness rule for read-heavy workloads: `get`, `scan`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs
|
- Fairness rule for read-heavy workloads: `get`, `scan`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs
|
||||||
- Throughput metric: workload-level `ops_per_sec` (higher is better)
|
- Throughput metric: workload-level `ops_per_sec` (higher is better)
|
||||||
- `W1/W2/W3/W4`: mixed read+update throughput
|
|
||||||
- `W5`: mixed read+update+scan throughput
|
|
||||||
- `W6`: scan throughput (counted by scan requests, not scanned key count)
|
|
||||||
- Tail latency metric: workload-level `p99_us` (lower is better)
|
- Tail latency metric: workload-level `p99_us` (lower is better)
|
||||||
- This is the mixed p99 of all operations executed in that workload row, not per-op-type p99
|
- This is the workload-level p99 of all operations executed in that row, not per-op-type p99
|
||||||
- `W1/W2/W3/W4`: mixed read+update p99
|
|
||||||
- `W5`: mixed read+update+scan p99
|
## Workloads
|
||||||
- `W6`: scan p99
|
- `W1`: `95%` read + `5%` update, uniform distribution
|
||||||
|
- `W2`: `95%` read + `5%` update, Zipf distribution
|
||||||
|
- `W3`: `50%` read + `50%` update, uniform distribution
|
||||||
|
- `W4`: `5%` read + `95%` update, uniform distribution
|
||||||
|
- `W5`: `70%` read + `25%` update + `5%` scan, uniform distribution
|
||||||
|
- `W6`: `100%` scan, uniform distribution; throughput is counted by scan requests, not scanned key count
|
||||||
|
|
||||||
Raw CSV path: `./scripts/benchmark_results.csv`
|
Raw CSV path: `./scripts/benchmark_results.csv`
|
||||||
|
|
||||||
|
|||||||
@ -15,6 +15,11 @@ def main() -> int:
|
|||||||
default="./scripts/benchmark_results.csv",
|
default="./scripts/benchmark_results.csv",
|
||||||
help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)",
|
help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--filter-errors",
|
||||||
|
action="store_true",
|
||||||
|
help="Only compare rows with error_ops == 0 (default: include all rows)",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
df = pd.read_csv(args.csv_path)
|
df = pd.read_csv(args.csv_path)
|
||||||
@ -44,20 +49,28 @@ def main() -> int:
|
|||||||
"read_path",
|
"read_path",
|
||||||
]
|
]
|
||||||
|
|
||||||
ok = df[df["error_ops"] == 0].copy()
|
if args.filter_errors:
|
||||||
if ok.empty:
|
base = df[df["error_ops"] == 0].copy()
|
||||||
print("No rows with error_ops == 0, cannot compare.")
|
else:
|
||||||
|
base = df.copy()
|
||||||
|
|
||||||
|
if base.empty:
|
||||||
|
if args.filter_errors:
|
||||||
|
print("No rows with error_ops == 0, cannot compare.")
|
||||||
|
else:
|
||||||
|
print("No rows found in csv, cannot compare.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
agg = ok.groupby(keys + ["engine"], as_index=False).agg(
|
agg = base.groupby(keys + ["engine"], as_index=False).agg(
|
||||||
ops_per_sec=("ops_per_sec", "median"),
|
ops_per_sec=("ops_per_sec", "median"),
|
||||||
p99_us=("p99_us", "median"),
|
p99_us=("p99_us", "median"),
|
||||||
|
error_ops=("error_ops", "median"),
|
||||||
)
|
)
|
||||||
|
|
||||||
piv = agg.pivot_table(
|
piv = agg.pivot_table(
|
||||||
index=keys,
|
index=keys,
|
||||||
columns="engine",
|
columns="engine",
|
||||||
values=["ops_per_sec", "p99_us"],
|
values=["ops_per_sec", "p99_us", "error_ops"],
|
||||||
aggfunc="first",
|
aggfunc="first",
|
||||||
)
|
)
|
||||||
piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns]
|
piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns]
|
||||||
@ -68,6 +81,8 @@ def main() -> int:
|
|||||||
"ops_per_sec_rocksdb",
|
"ops_per_sec_rocksdb",
|
||||||
"p99_us_mace",
|
"p99_us_mace",
|
||||||
"p99_us_rocksdb",
|
"p99_us_rocksdb",
|
||||||
|
"error_ops_mace",
|
||||||
|
"error_ops_rocksdb",
|
||||||
]:
|
]:
|
||||||
if col not in out.columns:
|
if col not in out.columns:
|
||||||
out[col] = pd.NA
|
out[col] = pd.NA
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user