This commit is contained in:
abbycin 2026-05-30 14:30:02 +08:00
parent 2f8b7ee588
commit eb7316b0ca
Signed by: abby
GPG Key ID: B636E0F0307EF8EB
2 changed files with 340 additions and 17 deletions

View File

@ -4,7 +4,11 @@ import argparse
import base64
import csv
import json
import re
import statistics
import tomllib
import urllib.error
import urllib.request
from collections import defaultdict
from pathlib import Path
from typing import Any
@ -19,12 +23,302 @@ ENGINE_STYLE_FALLBACK = [
WORKLOAD_TEMPLATE = [
("W1", "W1 (95R/5U, uniform)"),
("W2", "W2 (95R/5U, zipf)"),
("W3", "W3 (50R/50U)"),
("W4", "W4 (5R/95U)"),
("W5", "W5 (70R/25U/5S)"),
("W6", "W6 (100% scan)"),
("W3", "W3 (50R/50U, uniform)"),
("W4", "W4 (5R/95U, uniform)"),
("W5", "W5 (70R/25U/5S, uniform)"),
("W6", "W6 (100% scan, uniform)"),
]
WORKLOAD_LABELS = dict(WORKLOAD_TEMPLATE)
TEST_TOOL_SOURCE_URL = "https://github.com/abbycin/kv_bench"
SEMVER_RE = re.compile(r"^v?(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:[-+].*)?$")
def parse_semver(version: str) -> tuple[int, int, int] | None:
m = SEMVER_RE.match(version.strip())
if not m:
return None
major = int(m.group(1))
minor = int(m.group(2) or "0")
patch = int(m.group(3) or "0")
return (major, minor, patch)
def parse_semver_with_components(version: str) -> tuple[tuple[int, int, int], int] | None:
m = SEMVER_RE.match(version.strip())
if not m:
return None
major = int(m.group(1))
has_minor = m.group(2) is not None
has_patch = m.group(3) is not None
minor = int(m.group(2) or "0")
patch = int(m.group(3) or "0")
count = 1 + int(has_minor) + int(has_patch)
return (major, minor, patch), count
def wildcard_bounds(token: str) -> tuple[tuple[int, int, int], tuple[int, int, int]] | None:
parts = [p.strip() for p in token.split(".")]
prefix: list[int] = []
wildcard_seen = False
for part in parts:
if part in ("*", "x", "X"):
wildcard_seen = True
break
if not part.isdigit():
return None
prefix.append(int(part))
if not wildcard_seen:
return None
if not prefix:
return ((0, 0, 0), (10**9, 0, 0))
lower = (prefix + [0, 0, 0])[:3]
if len(prefix) == 1:
upper = (prefix[0] + 1, 0, 0)
elif len(prefix) == 2:
upper = (prefix[0], prefix[1] + 1, 0)
else:
upper = (prefix[0], prefix[1], prefix[2] + 1)
return ((lower[0], lower[1], lower[2]), upper)
def version_satisfies_clause(version: tuple[int, int, int], clause: str) -> bool:
part = clause.strip()
if not part:
return True
if part in ("*", "x", "X"):
return True
if part.startswith("^"):
parsed = parse_semver(part[1:].strip())
if parsed is None:
return False
major, minor, patch = parsed
if major > 0:
upper = (major + 1, 0, 0)
elif minor > 0:
upper = (0, minor + 1, 0)
else:
upper = (0, 0, patch + 1)
return version >= parsed and version < upper
if part.startswith("~"):
parsed_with_count = parse_semver_with_components(part[1:].strip())
if parsed_with_count is None:
return False
parsed, count = parsed_with_count
major, minor, _ = parsed
if count <= 1:
upper = (major + 1, 0, 0)
else:
upper = (major, minor + 1, 0)
return version >= parsed and version < upper
wildcard = wildcard_bounds(part)
if wildcard is not None:
lower, upper = wildcard
return version >= lower and version < upper
m = re.match(r"^(>=|<=|>|<|=)?\s*(.+)$", part)
if not m:
return False
op = m.group(1)
raw_token = m.group(2).strip()
token = parse_semver(raw_token)
if token is None:
return False
if op == ">=":
return version >= token
if op == "<=":
return version <= token
if op == ">":
return version > token
if op == "<":
return version < token
if op == "=":
return version == token
# Cargo default when no operator is caret.
return version_satisfies_clause(version, f"^{raw_token}")
def version_satisfies_requirement(version_text: str, requirement: str) -> bool:
version = parse_semver(version_text)
if version is None:
return False
req = requirement.strip()
if not req or req == "*":
return True
clauses = [part.strip() for part in req.split(",")]
return all(version_satisfies_clause(version, clause) for clause in clauses)
def resolve_crates_io_version(crate_name: str, requirement: str) -> str:
url = f"https://crates.io/api/v1/crates/{crate_name}"
req = urllib.request.Request(
url,
headers={
"User-Agent": "kv_bench-csv_to_html",
"Accept": "application/json",
},
)
try:
with urllib.request.urlopen(req, timeout=10) as resp:
payload = json.loads(resp.read().decode("utf-8"))
except (urllib.error.URLError, OSError, json.JSONDecodeError):
return "unknown"
candidates: list[str] = []
for item in payload.get("versions", []):
ver = item.get("num")
if not isinstance(ver, str):
continue
if item.get("yanked", False):
continue
if "-" in ver:
continue
if version_satisfies_requirement(ver, requirement):
candidates.append(ver)
if not candidates:
return "unknown"
candidates.sort(key=lambda v: parse_semver(v) or (0, 0, 0), reverse=True)
return candidates[0]
def resolve_lockfile_version(
repo_root: Path,
crate_name: str,
requirement: str,
) -> str:
lock_path = repo_root / "Cargo.lock"
if not lock_path.exists():
return "unknown"
try:
with lock_path.open("rb") as f:
lock_obj = tomllib.load(f)
except (OSError, tomllib.TOMLDecodeError):
return "unknown"
versions: list[str] = []
packages = lock_obj.get("package", [])
if not isinstance(packages, list):
return "unknown"
for pkg in packages:
if not isinstance(pkg, dict):
continue
if pkg.get("name") != crate_name:
continue
ver = pkg.get("version")
if not isinstance(ver, str):
continue
if "-" in ver:
continue
if version_satisfies_requirement(ver, requirement):
versions.append(ver)
if not versions:
return "unknown"
versions.sort(key=lambda v: parse_semver(v) or (0, 0, 0), reverse=True)
return versions[0]
def resolve_git_head(repo_path: Path) -> str:
git_dir = repo_path / ".git"
head_path = git_dir / "HEAD"
if not head_path.exists():
return "unknown"
try:
head_text = head_path.read_text(encoding="utf-8").strip()
except OSError:
return "unknown"
if head_text.startswith("ref: "):
ref_rel = head_text[5:].strip()
ref_path = git_dir / ref_rel
if ref_path.exists():
try:
return ref_path.read_text(encoding="utf-8").strip()
except OSError:
return "unknown"
packed_refs = git_dir / "packed-refs"
if packed_refs.exists():
try:
for line in packed_refs.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or line.startswith("^"):
continue
parts = line.split(" ")
if len(parts) == 2 and parts[1] == ref_rel:
return parts[0]
except OSError:
return "unknown"
return "unknown"
return head_text
def infer_mace_identity(repo_root: Path) -> tuple[str, str]:
cargo_path = repo_root / "Cargo.toml"
if not cargo_path.exists():
return ("mace commit id", "unknown")
try:
with cargo_path.open("rb") as f:
cargo_obj = tomllib.load(f)
except (OSError, tomllib.TOMLDecodeError):
return ("mace commit id", "unknown")
deps = cargo_obj.get("dependencies", {})
if not isinstance(deps, dict):
return ("mace commit id", "unknown")
mace_dep = deps.get("mace-kv")
if mace_dep is None:
return ("mace commit id", "unknown")
if isinstance(mace_dep, str):
requirement = mace_dep.strip() or "*"
lock_version = resolve_lockfile_version(repo_root, "mace-kv", requirement)
if lock_version != "unknown":
return ("mace version", lock_version)
return ("mace version", resolve_crates_io_version("mace-kv", requirement))
if isinstance(mace_dep, dict):
path_value = mace_dep.get("path")
if isinstance(path_value, str) and path_value.strip():
dep_path = Path(path_value.strip())
mace_repo = dep_path if dep_path.is_absolute() else (repo_root / dep_path)
return ("mace commit id", resolve_git_head(mace_repo.resolve()))
version_req = mace_dep.get("version")
if isinstance(version_req, str) and version_req.strip():
requirement = version_req.strip()
lock_version = resolve_lockfile_version(repo_root, "mace-kv", requirement)
if lock_version != "unknown":
return ("mace version", lock_version)
return ("mace version", resolve_crates_io_version("mace-kv", requirement))
return ("mace commit id", "unknown")
def infer_rocksdb_version(repo_root: Path) -> str:
vcpkg_path = repo_root / "rocksdb" / "vcpkg.json"
if not vcpkg_path.exists():
return "unknown"
try:
obj = json.loads(vcpkg_path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
return "unknown"
for item in obj.get("overrides", []):
if item.get("name") == "rocksdb" and isinstance(item.get("version"), str):
return item["version"]
return "unknown"
def parse_args() -> argparse.Namespace:
@ -238,15 +532,23 @@ def build_report_payload(rows: list[dict[str, Any]], engines: set[str], kv_pairs
}
def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str, source_csv_b64: str) -> str:
def render_html(
payload: dict[str, Any],
source_csv: str,
source_csv_name: str,
source_csv_b64: str,
mace_label: str,
mace_value: str,
rocksdb_version: str,
) -> str:
payload_json = json.dumps(payload, ensure_ascii=False)
return f"""<!DOCTYPE html>
<html lang="zh-CN">
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Benchmark 报告</title>
<title>Benchmark Report</title>
<style>
:root {{
--bg: #f7f8fc;
@ -259,7 +561,7 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
* {{ box-sizing: border-box; }}
body {{
margin: 0;
font-family: "Noto Sans SC", "Segoe UI", sans-serif;
font-family: "Segoe UI", sans-serif;
color: var(--text);
background: radial-gradient(circle at top right, #e8eeff 0%, var(--bg) 45%);
}}
@ -391,22 +693,30 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
</head>
<body>
<div class="container">
<h1>Benchmark 报告</h1>
<h1>Benchmark Report</h1>
<div class="sub">
数据来源:
CSV source:
<a id="source-download" class="source-link" href="#" download="{source_csv_name}">
<code>{source_csv}</code>
</a>
(点击下载原始 CSV)
(click to download raw CSV)
</div>
<div class="sub">
Test tool source code:
<a class="source-link" href="{TEST_TOOL_SOURCE_URL}" target="_blank" rel="noopener noreferrer">{TEST_TOOL_SOURCE_URL}</a>
<br />
{mace_label}: <code>{mace_value}</code>
<br />
rocksdb version: <code>{rocksdb_version}</code>
</div>
<div class="legend-wrap">
<div class="card">
<div class="legend-title">颜色: key/value ( engine 保持一致)</div>
<div class="legend-title">Color: key/value pairs (consistent across engines)</div>
<div id="kv-legend" class="legend-list"></div>
</div>
<div class="card">
<div class="legend-title">填充样式: engine</div>
<div class="legend-title">Fill style: engine</div>
<div id="engine-legend" class="legend-list"></div>
</div>
</div>
@ -654,7 +964,7 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
</div>
</div>
<div class="chart-card">
<div class="chart-tools"><button type="button" class="reset-btn" id="chart-reset-${{wIdx}}">还原缩放</button></div>
<div class="chart-tools"><button type="button" class="reset-btn" id="chart-reset-${{wIdx}}">Reset zoom</button></div>
<canvas id="metric-${{wIdx}}"></canvas>
</div>
`;
@ -722,7 +1032,18 @@ def main() -> int:
payload = build_report_payload(rows, engines, kv_pairs)
csv_bytes = csv_path.read_bytes()
csv_b64 = base64.b64encode(csv_bytes).decode("ascii")
html = render_html(payload, str(csv_path), csv_path.name, csv_b64)
repo_root = Path(__file__).resolve().parent.parent
mace_label, mace_value = infer_mace_identity(repo_root)
rocksdb_version = infer_rocksdb_version(repo_root)
html = render_html(
payload,
str(csv_path),
csv_path.name,
csv_b64,
mace_label,
mace_value,
rocksdb_version,
)
output_path.write_text(html, encoding="utf-8")
print(f"HTML written to: {output_path}")

View File

@ -1082,7 +1082,8 @@ fn run_one_op(
ReadPath::Snapshot => {
if let Ok(view) = bucket.view() {
for item in view.seek(prefix).take(scan_len.max(1)) {
std::hint::black_box(item);
std::hint::black_box(item.key());
std::hint::black_box(item.val());
}
true
} else {
@ -1092,7 +1093,8 @@ fn run_one_op(
ReadPath::RwTxn => {
if let Ok(tx) = bucket.begin() {
for item in tx.seek(prefix).take(scan_len.max(1)) {
std::hint::black_box(item);
std::hint::black_box(item.key());
std::hint::black_box(item.val());
}
tx.commit().is_ok()
} else {