Compare commits
2 Commits
8da7c98842
...
f0044d1d62
| Author | SHA1 | Date | |
|---|---|---|---|
| f0044d1d62 | |||
| 9a4abb7dba |
18
README.md
18
README.md
@ -45,16 +45,18 @@ mkdir -p "${KV_BENCH_STORAGE_ROOT}/basic_mace" "${KV_BENCH_STORAGE_ROOT}/basic_r
|
|||||||
|
|
||||||
## What Is Compared
|
## What Is Compared
|
||||||
- Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path`
|
- Comparison unit: rows with identical `workload_id`, `threads`, `key_size`, `value_size`, `durability_mode`, `read_path`
|
||||||
- Fairness rule for read-heavy workloads: `get`, `scan`, `mixed`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs
|
- Fairness rule for read-heavy workloads: `get`, `scan`, and `W1`-`W6` run one GC/compaction pass after prefill and before warmup/measurement, so RocksDB is not compared with GC artificially disabled while reads may have to touch multiple SSTs
|
||||||
- Throughput metric: workload-level `ops_per_sec` (higher is better)
|
- Throughput metric: workload-level `ops_per_sec` (higher is better)
|
||||||
- `W1/W2/W3/W4`: mixed read+update throughput
|
|
||||||
- `W5`: mixed read+update+scan throughput
|
|
||||||
- `W6`: scan throughput (counted by scan requests, not scanned key count)
|
|
||||||
- Tail latency metric: workload-level `p99_us` (lower is better)
|
- Tail latency metric: workload-level `p99_us` (lower is better)
|
||||||
- This is the mixed p99 of all operations executed in that workload row, not per-op-type p99
|
- This is the workload-level p99 of all operations executed in that row, not per-op-type p99
|
||||||
- `W1/W2/W3/W4`: mixed read+update p99
|
|
||||||
- `W5`: mixed read+update+scan p99
|
## Workloads
|
||||||
- `W6`: scan p99
|
- `W1`: `95%` read + `5%` update, uniform distribution
|
||||||
|
- `W2`: `95%` read + `5%` update, Zipf distribution
|
||||||
|
- `W3`: `50%` read + `50%` update, uniform distribution
|
||||||
|
- `W4`: `5%` read + `95%` update, uniform distribution
|
||||||
|
- `W5`: `70%` read + `25%` update + `5%` scan, uniform distribution
|
||||||
|
- `W6`: `100%` scan, uniform distribution; throughput is counted by scan requests, not scanned key count
|
||||||
|
|
||||||
Raw CSV path: `./scripts/benchmark_results.csv`
|
Raw CSV path: `./scripts/benchmark_results.csv`
|
||||||
|
|
||||||
|
|||||||
@ -64,7 +64,7 @@ WARMUP_SECS=3 MEASURE_SECS=5 PREFILL_KEYS=50000 \
|
|||||||
```
|
```
|
||||||
|
|
||||||
Fairness note:
|
Fairness note:
|
||||||
- For read-heavy workloads (`get`, `scan`, `mixed`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement.
|
- For read-heavy workloads (`get`, `scan`, and `W1`-`W6`), both harnesses run one GC/compaction pass after prefill and before warmup/measurement.
|
||||||
- The purpose is to make the RocksDB vs Mace comparison fairer, since RocksDB reads may need to touch multiple SSTs and should not be benchmarked with GC/compaction artificially disabled.
|
- The purpose is to make the RocksDB vs Mace comparison fairer, since RocksDB reads may need to touch multiple SSTs and should not be benchmarked with GC/compaction artificially disabled.
|
||||||
|
|
||||||
Generate plots:
|
Generate plots:
|
||||||
|
|||||||
@ -68,7 +68,6 @@ struct Args {
|
|||||||
size_t key_size = 16;
|
size_t key_size = 16;
|
||||||
size_t value_size = 1024;
|
size_t value_size = 1024;
|
||||||
size_t blob_size = 8192;
|
size_t blob_size = 8192;
|
||||||
size_t insert_ratio = 30;
|
|
||||||
bool random = false;
|
bool random = false;
|
||||||
std::string mode = "insert";
|
std::string mode = "insert";
|
||||||
std::optional<std::string> workload;
|
std::optional<std::string> workload;
|
||||||
@ -277,15 +276,10 @@ static std::optional<WorkloadSpec> parse_workload(const Args &args, std::string
|
|||||||
if (mode == "get") {
|
if (mode == "get") {
|
||||||
return WorkloadSpec{"LEGACY_GET", "get", Distribution::Uniform, 100, 0, 0, args.scan_len, true, false};
|
return WorkloadSpec{"LEGACY_GET", "get", Distribution::Uniform, 100, 0, 0, args.scan_len, true, false};
|
||||||
}
|
}
|
||||||
if (mode == "mixed") {
|
|
||||||
return WorkloadSpec{"LEGACY_MIXED", "mixed", Distribution::Uniform,
|
|
||||||
static_cast<uint8_t>(100 - args.insert_ratio),
|
|
||||||
static_cast<uint8_t>(args.insert_ratio), 0, args.scan_len, true, false};
|
|
||||||
}
|
|
||||||
if (mode == "scan") {
|
if (mode == "scan") {
|
||||||
return WorkloadSpec{"LEGACY_SCAN", "scan", Distribution::Uniform, 0, 0, 100, args.scan_len, true, false};
|
return WorkloadSpec{"LEGACY_SCAN", "scan", Distribution::Uniform, 0, 0, 100, args.scan_len, true, false};
|
||||||
}
|
}
|
||||||
err = fmt::format("invalid mode `{}` (supported: insert/get/mixed/scan)", args.mode);
|
err = fmt::format("invalid mode `{}` (supported: insert/get/scan)", args.mode);
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -718,14 +712,13 @@ int main(int argc, char *argv[]) {
|
|||||||
bool disable_cleanup = false;
|
bool disable_cleanup = false;
|
||||||
std::string workload;
|
std::string workload;
|
||||||
|
|
||||||
app.add_option("-m,--mode", args.mode, "Mode: insert, get, mixed, scan");
|
app.add_option("-m,--mode", args.mode, "Mode: insert, get, scan");
|
||||||
app.add_option("--workload", workload, "Workload preset: W1..W6");
|
app.add_option("--workload", workload, "Workload preset: W1..W6");
|
||||||
app.add_option("-t,--threads", args.threads, "Threads");
|
app.add_option("-t,--threads", args.threads, "Threads");
|
||||||
app.add_option("-k,--key-size", args.key_size, "Key Size");
|
app.add_option("-k,--key-size", args.key_size, "Key Size");
|
||||||
app.add_option("-v,--value-size", args.value_size, "Value Size");
|
app.add_option("-v,--value-size", args.value_size, "Value Size");
|
||||||
app.add_option("-b,--blob-size", args.blob_size, "Blob Size");
|
app.add_option("-b,--blob-size", args.blob_size, "Blob Size");
|
||||||
app.add_option("-i,--iterations", args.iterations, "Iterations");
|
app.add_option("-i,--iterations", args.iterations, "Iterations");
|
||||||
app.add_option("-r,--insert-ratio", args.insert_ratio, "Update ratio for legacy mixed mode");
|
|
||||||
app.add_option("-p,--path", args.path, "Database path");
|
app.add_option("-p,--path", args.path, "Database path");
|
||||||
app.add_option("--prefill-keys", args.prefill_keys, "Prefill key count");
|
app.add_option("--prefill-keys", args.prefill_keys, "Prefill key count");
|
||||||
app.add_option("--warmup-secs", args.warmup_secs, "Warmup duration seconds");
|
app.add_option("--warmup-secs", args.warmup_secs, "Warmup duration seconds");
|
||||||
@ -765,10 +758,6 @@ int main(int argc, char *argv[]) {
|
|||||||
fmt::println(stderr, "key_size and value_size must be >= 16");
|
fmt::println(stderr, "key_size and value_size must be >= 16");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (args.insert_ratio > 100) {
|
|
||||||
fmt::println(stderr, "insert_ratio must be in [0,100]");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
if (!(args.zipf_theta > 0.0 && args.zipf_theta < 1.0)) {
|
if (!(args.zipf_theta > 0.0 && args.zipf_theta < 1.0)) {
|
||||||
fmt::println(stderr, "zipf_theta must be in (0,1)");
|
fmt::println(stderr, "zipf_theta must be in (0,1)");
|
||||||
return 1;
|
return 1;
|
||||||
|
|||||||
@ -15,6 +15,11 @@ def main() -> int:
|
|||||||
default="./scripts/benchmark_results.csv",
|
default="./scripts/benchmark_results.csv",
|
||||||
help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)",
|
help="Path to benchmark CSV (default: ./scripts/benchmark_results.csv)",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--filter-errors",
|
||||||
|
action="store_true",
|
||||||
|
help="Only compare rows with error_ops == 0 (default: include all rows)",
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
df = pd.read_csv(args.csv_path)
|
df = pd.read_csv(args.csv_path)
|
||||||
@ -44,20 +49,28 @@ def main() -> int:
|
|||||||
"read_path",
|
"read_path",
|
||||||
]
|
]
|
||||||
|
|
||||||
ok = df[df["error_ops"] == 0].copy()
|
if args.filter_errors:
|
||||||
if ok.empty:
|
base = df[df["error_ops"] == 0].copy()
|
||||||
print("No rows with error_ops == 0, cannot compare.")
|
else:
|
||||||
|
base = df.copy()
|
||||||
|
|
||||||
|
if base.empty:
|
||||||
|
if args.filter_errors:
|
||||||
|
print("No rows with error_ops == 0, cannot compare.")
|
||||||
|
else:
|
||||||
|
print("No rows found in csv, cannot compare.")
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
agg = ok.groupby(keys + ["engine"], as_index=False).agg(
|
agg = base.groupby(keys + ["engine"], as_index=False).agg(
|
||||||
ops_per_sec=("ops_per_sec", "median"),
|
ops_per_sec=("ops_per_sec", "median"),
|
||||||
p99_us=("p99_us", "median"),
|
p99_us=("p99_us", "median"),
|
||||||
|
error_ops=("error_ops", "median"),
|
||||||
)
|
)
|
||||||
|
|
||||||
piv = agg.pivot_table(
|
piv = agg.pivot_table(
|
||||||
index=keys,
|
index=keys,
|
||||||
columns="engine",
|
columns="engine",
|
||||||
values=["ops_per_sec", "p99_us"],
|
values=["ops_per_sec", "p99_us", "error_ops"],
|
||||||
aggfunc="first",
|
aggfunc="first",
|
||||||
)
|
)
|
||||||
piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns]
|
piv.columns = [f"{metric}_{engine}" for metric, engine in piv.columns]
|
||||||
@ -68,6 +81,8 @@ def main() -> int:
|
|||||||
"ops_per_sec_rocksdb",
|
"ops_per_sec_rocksdb",
|
||||||
"p99_us_mace",
|
"p99_us_mace",
|
||||||
"p99_us_rocksdb",
|
"p99_us_rocksdb",
|
||||||
|
"error_ops_mace",
|
||||||
|
"error_ops_rocksdb",
|
||||||
]:
|
]:
|
||||||
if col not in out.columns:
|
if col not in out.columns:
|
||||||
out[col] = pd.NA
|
out[col] = pd.NA
|
||||||
|
|||||||
@ -38,7 +38,6 @@ ENGINE_ORDER = ("mace", "rocksdb")
|
|||||||
MODE_PLAN = (
|
MODE_PLAN = (
|
||||||
("put", "insert"),
|
("put", "insert"),
|
||||||
("get", "get"),
|
("get", "get"),
|
||||||
("mixed", "mixed"),
|
|
||||||
("scan", "scan"),
|
("scan", "scan"),
|
||||||
)
|
)
|
||||||
KV_PROFILES = (
|
KV_PROFILES = (
|
||||||
@ -98,7 +97,6 @@ def run_engine_cases(
|
|||||||
prefill_keys: int,
|
prefill_keys: int,
|
||||||
read_path: str,
|
read_path: str,
|
||||||
durability: str,
|
durability: str,
|
||||||
insert_ratio: int,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
for mode_display, mode_cli in MODE_PLAN:
|
for mode_display, mode_cli in MODE_PLAN:
|
||||||
for threads in thread_points:
|
for threads in thread_points:
|
||||||
@ -135,8 +133,6 @@ def run_engine_cases(
|
|||||||
|
|
||||||
if mode_cli != "insert":
|
if mode_cli != "insert":
|
||||||
args.extend(["--prefill-keys", str(prefill_keys)])
|
args.extend(["--prefill-keys", str(prefill_keys)])
|
||||||
if mode_cli == "mixed":
|
|
||||||
args.extend(["--insert-ratio", str(insert_ratio)])
|
|
||||||
if engine == "mace":
|
if engine == "mace":
|
||||||
args.append("--shared-keyspace")
|
args.append("--shared-keyspace")
|
||||||
|
|
||||||
@ -173,9 +169,7 @@ def annotate_points(x_values: Sequence[int], y_values: Sequence[float], y_max: f
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def mode_title(mode_display: str, insert_ratio: int) -> str:
|
def mode_title(mode_display: str) -> str:
|
||||||
if mode_display == "mixed":
|
|
||||||
return f"Mixed ({100 - insert_ratio}% Get, {insert_ratio}% Put)"
|
|
||||||
return mode_display.capitalize()
|
return mode_display.capitalize()
|
||||||
|
|
||||||
|
|
||||||
@ -184,7 +178,6 @@ def plot_results(
|
|||||||
result_csv: Path,
|
result_csv: Path,
|
||||||
output_dir: Path,
|
output_dir: Path,
|
||||||
thread_points: Sequence[int],
|
thread_points: Sequence[int],
|
||||||
insert_ratio: int,
|
|
||||||
) -> list[Path]:
|
) -> list[Path]:
|
||||||
df = pd.read_csv(result_csv)
|
df = pd.read_csv(result_csv)
|
||||||
required = {"engine", "mode", "threads", "key_size", "value_size", "ops_per_sec"}
|
required = {"engine", "mode", "threads", "key_size", "value_size", "ops_per_sec"}
|
||||||
@ -246,7 +239,7 @@ def plot_results(
|
|||||||
)
|
)
|
||||||
annotate_points(x, y, y_max, line_color)
|
annotate_points(x, y, y_max, line_color)
|
||||||
|
|
||||||
plt.title(mode_title(mode_display, insert_ratio), fontsize=16)
|
plt.title(mode_title(mode_display), fontsize=16)
|
||||||
plt.xlabel("Threads", fontsize=14)
|
plt.xlabel("Threads", fontsize=14)
|
||||||
plt.ylabel("OPS/s", fontsize=14)
|
plt.ylabel("OPS/s", fontsize=14)
|
||||||
plt.xticks(list(thread_points), fontsize=12)
|
plt.xticks(list(thread_points), fontsize=12)
|
||||||
@ -272,7 +265,6 @@ def parse_args(argv: Sequence[str]) -> argparse.Namespace:
|
|||||||
parser.add_argument("--measure-secs", type=int, default=10)
|
parser.add_argument("--measure-secs", type=int, default=10)
|
||||||
parser.add_argument("--iterations", type=int, default=500_000)
|
parser.add_argument("--iterations", type=int, default=500_000)
|
||||||
parser.add_argument("--prefill-keys", type=int, default=500_000)
|
parser.add_argument("--prefill-keys", type=int, default=500_000)
|
||||||
parser.add_argument("--insert-ratio", type=int, default=30)
|
|
||||||
parser.add_argument("--read-path", choices=("snapshot", "rw_txn"), default="snapshot")
|
parser.add_argument("--read-path", choices=("snapshot", "rw_txn"), default="snapshot")
|
||||||
parser.add_argument("--durability", choices=("relaxed", "durable"), default="relaxed")
|
parser.add_argument("--durability", choices=("relaxed", "durable"), default="relaxed")
|
||||||
parser.add_argument("--csv-name", default="fast_test_results.csv")
|
parser.add_argument("--csv-name", default="fast_test_results.csv")
|
||||||
@ -328,7 +320,6 @@ def main(argv: Sequence[str]) -> int:
|
|||||||
prefill_keys=args.prefill_keys,
|
prefill_keys=args.prefill_keys,
|
||||||
read_path=args.read_path,
|
read_path=args.read_path,
|
||||||
durability=args.durability,
|
durability=args.durability,
|
||||||
insert_ratio=args.insert_ratio,
|
|
||||||
)
|
)
|
||||||
run_engine_cases(
|
run_engine_cases(
|
||||||
engine="rocksdb",
|
engine="rocksdb",
|
||||||
@ -342,14 +333,12 @@ def main(argv: Sequence[str]) -> int:
|
|||||||
prefill_keys=args.prefill_keys,
|
prefill_keys=args.prefill_keys,
|
||||||
read_path=args.read_path,
|
read_path=args.read_path,
|
||||||
durability=args.durability,
|
durability=args.durability,
|
||||||
insert_ratio=args.insert_ratio,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
outputs = plot_results(
|
outputs = plot_results(
|
||||||
result_csv=result_csv,
|
result_csv=result_csv,
|
||||||
output_dir=script_dir,
|
output_dir=script_dir,
|
||||||
thread_points=thread_points,
|
thread_points=thread_points,
|
||||||
insert_ratio=args.insert_ratio,
|
|
||||||
)
|
)
|
||||||
print("[done] generated charts:")
|
print("[done] generated charts:")
|
||||||
for p in outputs:
|
for p in outputs:
|
||||||
|
|||||||
20
src/main.rs
20
src/main.rs
@ -48,9 +48,6 @@ struct Args {
|
|||||||
#[arg(short = 'i', long, default_value_t = 10000)]
|
#[arg(short = 'i', long, default_value_t = 10000)]
|
||||||
iterations: usize,
|
iterations: usize,
|
||||||
|
|
||||||
#[arg(short = 'r', long, default_value_t = 30)]
|
|
||||||
insert_ratio: u8,
|
|
||||||
|
|
||||||
#[arg(long, default_value_t = false)]
|
#[arg(long, default_value_t = false)]
|
||||||
random: bool,
|
random: bool,
|
||||||
|
|
||||||
@ -349,17 +346,6 @@ fn parse_workload(args: &Args) -> Result<WorkloadSpec, String> {
|
|||||||
requires_prefill: true,
|
requires_prefill: true,
|
||||||
insert_only: false,
|
insert_only: false,
|
||||||
}),
|
}),
|
||||||
"mixed" => Ok(WorkloadSpec {
|
|
||||||
id: "LEGACY_MIXED".into(),
|
|
||||||
mode_label: "mixed".into(),
|
|
||||||
distribution: Distribution::Uniform,
|
|
||||||
read_pct: 100u8.saturating_sub(args.insert_ratio),
|
|
||||||
update_pct: args.insert_ratio,
|
|
||||||
scan_pct: 0,
|
|
||||||
scan_len: args.scan_len,
|
|
||||||
requires_prefill: true,
|
|
||||||
insert_only: false,
|
|
||||||
}),
|
|
||||||
"scan" => Ok(WorkloadSpec {
|
"scan" => Ok(WorkloadSpec {
|
||||||
id: "LEGACY_SCAN".into(),
|
id: "LEGACY_SCAN".into(),
|
||||||
mode_label: "scan".into(),
|
mode_label: "scan".into(),
|
||||||
@ -372,7 +358,7 @@ fn parse_workload(args: &Args) -> Result<WorkloadSpec, String> {
|
|||||||
insert_only: false,
|
insert_only: false,
|
||||||
}),
|
}),
|
||||||
_ => Err(format!(
|
_ => Err(format!(
|
||||||
"invalid mode `{}` (supported: insert, get, mixed, scan)",
|
"invalid mode `{}` (supported: insert, get, scan)",
|
||||||
args.mode
|
args.mode
|
||||||
)),
|
)),
|
||||||
}
|
}
|
||||||
@ -659,10 +645,6 @@ fn main() {
|
|||||||
eprintln!("key_size and value_size must be >= 16");
|
eprintln!("key_size and value_size must be >= 16");
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
if args.insert_ratio > 100 {
|
|
||||||
eprintln!("insert ratio must be between 0 and 100");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if !(0.0..1.0).contains(&args.zipf_theta) {
|
if !(0.0..1.0).contains(&args.zipf_theta) {
|
||||||
eprintln!("zipf_theta must be in range (0, 1)");
|
eprintln!("zipf_theta must be in range (0, 1)");
|
||||||
exit(1);
|
exit(1);
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user