fix scan
This commit is contained in:
parent
2f8b7ee588
commit
eb7316b0ca
@ -4,7 +4,11 @@ import argparse
|
|||||||
import base64
|
import base64
|
||||||
import csv
|
import csv
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
import statistics
|
import statistics
|
||||||
|
import tomllib
|
||||||
|
import urllib.error
|
||||||
|
import urllib.request
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any
|
from typing import Any
|
||||||
@ -19,12 +23,302 @@ ENGINE_STYLE_FALLBACK = [
|
|||||||
WORKLOAD_TEMPLATE = [
|
WORKLOAD_TEMPLATE = [
|
||||||
("W1", "W1 (95R/5U, uniform)"),
|
("W1", "W1 (95R/5U, uniform)"),
|
||||||
("W2", "W2 (95R/5U, zipf)"),
|
("W2", "W2 (95R/5U, zipf)"),
|
||||||
("W3", "W3 (50R/50U)"),
|
("W3", "W3 (50R/50U, uniform)"),
|
||||||
("W4", "W4 (5R/95U)"),
|
("W4", "W4 (5R/95U, uniform)"),
|
||||||
("W5", "W5 (70R/25U/5S)"),
|
("W5", "W5 (70R/25U/5S, uniform)"),
|
||||||
("W6", "W6 (100% scan)"),
|
("W6", "W6 (100% scan, uniform)"),
|
||||||
]
|
]
|
||||||
WORKLOAD_LABELS = dict(WORKLOAD_TEMPLATE)
|
WORKLOAD_LABELS = dict(WORKLOAD_TEMPLATE)
|
||||||
|
TEST_TOOL_SOURCE_URL = "https://github.com/abbycin/kv_bench"
|
||||||
|
|
||||||
|
|
||||||
|
SEMVER_RE = re.compile(r"^v?(\d+)(?:\.(\d+))?(?:\.(\d+))?(?:[-+].*)?$")
|
||||||
|
|
||||||
|
|
||||||
|
def parse_semver(version: str) -> tuple[int, int, int] | None:
|
||||||
|
m = SEMVER_RE.match(version.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
major = int(m.group(1))
|
||||||
|
minor = int(m.group(2) or "0")
|
||||||
|
patch = int(m.group(3) or "0")
|
||||||
|
return (major, minor, patch)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_semver_with_components(version: str) -> tuple[tuple[int, int, int], int] | None:
|
||||||
|
m = SEMVER_RE.match(version.strip())
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
major = int(m.group(1))
|
||||||
|
has_minor = m.group(2) is not None
|
||||||
|
has_patch = m.group(3) is not None
|
||||||
|
minor = int(m.group(2) or "0")
|
||||||
|
patch = int(m.group(3) or "0")
|
||||||
|
count = 1 + int(has_minor) + int(has_patch)
|
||||||
|
return (major, minor, patch), count
|
||||||
|
|
||||||
|
|
||||||
|
def wildcard_bounds(token: str) -> tuple[tuple[int, int, int], tuple[int, int, int]] | None:
|
||||||
|
parts = [p.strip() for p in token.split(".")]
|
||||||
|
prefix: list[int] = []
|
||||||
|
wildcard_seen = False
|
||||||
|
for part in parts:
|
||||||
|
if part in ("*", "x", "X"):
|
||||||
|
wildcard_seen = True
|
||||||
|
break
|
||||||
|
if not part.isdigit():
|
||||||
|
return None
|
||||||
|
prefix.append(int(part))
|
||||||
|
if not wildcard_seen:
|
||||||
|
return None
|
||||||
|
if not prefix:
|
||||||
|
return ((0, 0, 0), (10**9, 0, 0))
|
||||||
|
|
||||||
|
lower = (prefix + [0, 0, 0])[:3]
|
||||||
|
if len(prefix) == 1:
|
||||||
|
upper = (prefix[0] + 1, 0, 0)
|
||||||
|
elif len(prefix) == 2:
|
||||||
|
upper = (prefix[0], prefix[1] + 1, 0)
|
||||||
|
else:
|
||||||
|
upper = (prefix[0], prefix[1], prefix[2] + 1)
|
||||||
|
return ((lower[0], lower[1], lower[2]), upper)
|
||||||
|
|
||||||
|
|
||||||
|
def version_satisfies_clause(version: tuple[int, int, int], clause: str) -> bool:
|
||||||
|
part = clause.strip()
|
||||||
|
if not part:
|
||||||
|
return True
|
||||||
|
if part in ("*", "x", "X"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
if part.startswith("^"):
|
||||||
|
parsed = parse_semver(part[1:].strip())
|
||||||
|
if parsed is None:
|
||||||
|
return False
|
||||||
|
major, minor, patch = parsed
|
||||||
|
if major > 0:
|
||||||
|
upper = (major + 1, 0, 0)
|
||||||
|
elif minor > 0:
|
||||||
|
upper = (0, minor + 1, 0)
|
||||||
|
else:
|
||||||
|
upper = (0, 0, patch + 1)
|
||||||
|
return version >= parsed and version < upper
|
||||||
|
|
||||||
|
if part.startswith("~"):
|
||||||
|
parsed_with_count = parse_semver_with_components(part[1:].strip())
|
||||||
|
if parsed_with_count is None:
|
||||||
|
return False
|
||||||
|
parsed, count = parsed_with_count
|
||||||
|
major, minor, _ = parsed
|
||||||
|
if count <= 1:
|
||||||
|
upper = (major + 1, 0, 0)
|
||||||
|
else:
|
||||||
|
upper = (major, minor + 1, 0)
|
||||||
|
return version >= parsed and version < upper
|
||||||
|
|
||||||
|
wildcard = wildcard_bounds(part)
|
||||||
|
if wildcard is not None:
|
||||||
|
lower, upper = wildcard
|
||||||
|
return version >= lower and version < upper
|
||||||
|
|
||||||
|
m = re.match(r"^(>=|<=|>|<|=)?\s*(.+)$", part)
|
||||||
|
if not m:
|
||||||
|
return False
|
||||||
|
op = m.group(1)
|
||||||
|
raw_token = m.group(2).strip()
|
||||||
|
token = parse_semver(raw_token)
|
||||||
|
if token is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
if op == ">=":
|
||||||
|
return version >= token
|
||||||
|
if op == "<=":
|
||||||
|
return version <= token
|
||||||
|
if op == ">":
|
||||||
|
return version > token
|
||||||
|
if op == "<":
|
||||||
|
return version < token
|
||||||
|
if op == "=":
|
||||||
|
return version == token
|
||||||
|
|
||||||
|
# Cargo default when no operator is caret.
|
||||||
|
return version_satisfies_clause(version, f"^{raw_token}")
|
||||||
|
|
||||||
|
|
||||||
|
def version_satisfies_requirement(version_text: str, requirement: str) -> bool:
|
||||||
|
version = parse_semver(version_text)
|
||||||
|
if version is None:
|
||||||
|
return False
|
||||||
|
req = requirement.strip()
|
||||||
|
if not req or req == "*":
|
||||||
|
return True
|
||||||
|
clauses = [part.strip() for part in req.split(",")]
|
||||||
|
return all(version_satisfies_clause(version, clause) for clause in clauses)
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_crates_io_version(crate_name: str, requirement: str) -> str:
|
||||||
|
url = f"https://crates.io/api/v1/crates/{crate_name}"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url,
|
||||||
|
headers={
|
||||||
|
"User-Agent": "kv_bench-csv_to_html",
|
||||||
|
"Accept": "application/json",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||||
|
payload = json.loads(resp.read().decode("utf-8"))
|
||||||
|
except (urllib.error.URLError, OSError, json.JSONDecodeError):
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
candidates: list[str] = []
|
||||||
|
for item in payload.get("versions", []):
|
||||||
|
ver = item.get("num")
|
||||||
|
if not isinstance(ver, str):
|
||||||
|
continue
|
||||||
|
if item.get("yanked", False):
|
||||||
|
continue
|
||||||
|
if "-" in ver:
|
||||||
|
continue
|
||||||
|
if version_satisfies_requirement(ver, requirement):
|
||||||
|
candidates.append(ver)
|
||||||
|
|
||||||
|
if not candidates:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
candidates.sort(key=lambda v: parse_semver(v) or (0, 0, 0), reverse=True)
|
||||||
|
return candidates[0]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_lockfile_version(
|
||||||
|
repo_root: Path,
|
||||||
|
crate_name: str,
|
||||||
|
requirement: str,
|
||||||
|
) -> str:
|
||||||
|
lock_path = repo_root / "Cargo.lock"
|
||||||
|
if not lock_path.exists():
|
||||||
|
return "unknown"
|
||||||
|
try:
|
||||||
|
with lock_path.open("rb") as f:
|
||||||
|
lock_obj = tomllib.load(f)
|
||||||
|
except (OSError, tomllib.TOMLDecodeError):
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
versions: list[str] = []
|
||||||
|
packages = lock_obj.get("package", [])
|
||||||
|
if not isinstance(packages, list):
|
||||||
|
return "unknown"
|
||||||
|
for pkg in packages:
|
||||||
|
if not isinstance(pkg, dict):
|
||||||
|
continue
|
||||||
|
if pkg.get("name") != crate_name:
|
||||||
|
continue
|
||||||
|
ver = pkg.get("version")
|
||||||
|
if not isinstance(ver, str):
|
||||||
|
continue
|
||||||
|
if "-" in ver:
|
||||||
|
continue
|
||||||
|
if version_satisfies_requirement(ver, requirement):
|
||||||
|
versions.append(ver)
|
||||||
|
|
||||||
|
if not versions:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
versions.sort(key=lambda v: parse_semver(v) or (0, 0, 0), reverse=True)
|
||||||
|
return versions[0]
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_git_head(repo_path: Path) -> str:
|
||||||
|
git_dir = repo_path / ".git"
|
||||||
|
head_path = git_dir / "HEAD"
|
||||||
|
if not head_path.exists():
|
||||||
|
return "unknown"
|
||||||
|
try:
|
||||||
|
head_text = head_path.read_text(encoding="utf-8").strip()
|
||||||
|
except OSError:
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
if head_text.startswith("ref: "):
|
||||||
|
ref_rel = head_text[5:].strip()
|
||||||
|
ref_path = git_dir / ref_rel
|
||||||
|
if ref_path.exists():
|
||||||
|
try:
|
||||||
|
return ref_path.read_text(encoding="utf-8").strip()
|
||||||
|
except OSError:
|
||||||
|
return "unknown"
|
||||||
|
packed_refs = git_dir / "packed-refs"
|
||||||
|
if packed_refs.exists():
|
||||||
|
try:
|
||||||
|
for line in packed_refs.read_text(encoding="utf-8").splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#") or line.startswith("^"):
|
||||||
|
continue
|
||||||
|
parts = line.split(" ")
|
||||||
|
if len(parts) == 2 and parts[1] == ref_rel:
|
||||||
|
return parts[0]
|
||||||
|
except OSError:
|
||||||
|
return "unknown"
|
||||||
|
return "unknown"
|
||||||
|
return head_text
|
||||||
|
|
||||||
|
|
||||||
|
def infer_mace_identity(repo_root: Path) -> tuple[str, str]:
|
||||||
|
cargo_path = repo_root / "Cargo.toml"
|
||||||
|
if not cargo_path.exists():
|
||||||
|
return ("mace commit id", "unknown")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with cargo_path.open("rb") as f:
|
||||||
|
cargo_obj = tomllib.load(f)
|
||||||
|
except (OSError, tomllib.TOMLDecodeError):
|
||||||
|
return ("mace commit id", "unknown")
|
||||||
|
|
||||||
|
deps = cargo_obj.get("dependencies", {})
|
||||||
|
if not isinstance(deps, dict):
|
||||||
|
return ("mace commit id", "unknown")
|
||||||
|
mace_dep = deps.get("mace-kv")
|
||||||
|
if mace_dep is None:
|
||||||
|
return ("mace commit id", "unknown")
|
||||||
|
|
||||||
|
if isinstance(mace_dep, str):
|
||||||
|
requirement = mace_dep.strip() or "*"
|
||||||
|
lock_version = resolve_lockfile_version(repo_root, "mace-kv", requirement)
|
||||||
|
if lock_version != "unknown":
|
||||||
|
return ("mace version", lock_version)
|
||||||
|
return ("mace version", resolve_crates_io_version("mace-kv", requirement))
|
||||||
|
|
||||||
|
if isinstance(mace_dep, dict):
|
||||||
|
path_value = mace_dep.get("path")
|
||||||
|
if isinstance(path_value, str) and path_value.strip():
|
||||||
|
dep_path = Path(path_value.strip())
|
||||||
|
mace_repo = dep_path if dep_path.is_absolute() else (repo_root / dep_path)
|
||||||
|
return ("mace commit id", resolve_git_head(mace_repo.resolve()))
|
||||||
|
version_req = mace_dep.get("version")
|
||||||
|
if isinstance(version_req, str) and version_req.strip():
|
||||||
|
requirement = version_req.strip()
|
||||||
|
lock_version = resolve_lockfile_version(repo_root, "mace-kv", requirement)
|
||||||
|
if lock_version != "unknown":
|
||||||
|
return ("mace version", lock_version)
|
||||||
|
return ("mace version", resolve_crates_io_version("mace-kv", requirement))
|
||||||
|
|
||||||
|
return ("mace commit id", "unknown")
|
||||||
|
|
||||||
|
|
||||||
|
def infer_rocksdb_version(repo_root: Path) -> str:
|
||||||
|
vcpkg_path = repo_root / "rocksdb" / "vcpkg.json"
|
||||||
|
if not vcpkg_path.exists():
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
try:
|
||||||
|
obj = json.loads(vcpkg_path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
for item in obj.get("overrides", []):
|
||||||
|
if item.get("name") == "rocksdb" and isinstance(item.get("version"), str):
|
||||||
|
return item["version"]
|
||||||
|
|
||||||
|
return "unknown"
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
@ -238,15 +532,23 @@ def build_report_payload(rows: list[dict[str, Any]], engines: set[str], kv_pairs
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str, source_csv_b64: str) -> str:
|
def render_html(
|
||||||
|
payload: dict[str, Any],
|
||||||
|
source_csv: str,
|
||||||
|
source_csv_name: str,
|
||||||
|
source_csv_b64: str,
|
||||||
|
mace_label: str,
|
||||||
|
mace_value: str,
|
||||||
|
rocksdb_version: str,
|
||||||
|
) -> str:
|
||||||
payload_json = json.dumps(payload, ensure_ascii=False)
|
payload_json = json.dumps(payload, ensure_ascii=False)
|
||||||
|
|
||||||
return f"""<!DOCTYPE html>
|
return f"""<!DOCTYPE html>
|
||||||
<html lang="zh-CN">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8" />
|
<meta charset="UTF-8" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
<title>Benchmark 报告</title>
|
<title>Benchmark Report</title>
|
||||||
<style>
|
<style>
|
||||||
:root {{
|
:root {{
|
||||||
--bg: #f7f8fc;
|
--bg: #f7f8fc;
|
||||||
@ -259,7 +561,7 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
|
|||||||
* {{ box-sizing: border-box; }}
|
* {{ box-sizing: border-box; }}
|
||||||
body {{
|
body {{
|
||||||
margin: 0;
|
margin: 0;
|
||||||
font-family: "Noto Sans SC", "Segoe UI", sans-serif;
|
font-family: "Segoe UI", sans-serif;
|
||||||
color: var(--text);
|
color: var(--text);
|
||||||
background: radial-gradient(circle at top right, #e8eeff 0%, var(--bg) 45%);
|
background: radial-gradient(circle at top right, #e8eeff 0%, var(--bg) 45%);
|
||||||
}}
|
}}
|
||||||
@ -391,22 +693,30 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
|
|||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<h1>Benchmark 报告</h1>
|
<h1>Benchmark Report</h1>
|
||||||
<div class="sub">
|
<div class="sub">
|
||||||
数据来源:
|
CSV source:
|
||||||
<a id="source-download" class="source-link" href="#" download="{source_csv_name}">
|
<a id="source-download" class="source-link" href="#" download="{source_csv_name}">
|
||||||
<code>{source_csv}</code>
|
<code>{source_csv}</code>
|
||||||
</a>
|
</a>
|
||||||
(点击下载原始 CSV)
|
(click to download raw CSV)
|
||||||
|
</div>
|
||||||
|
<div class="sub">
|
||||||
|
Test tool source code:
|
||||||
|
<a class="source-link" href="{TEST_TOOL_SOURCE_URL}" target="_blank" rel="noopener noreferrer">{TEST_TOOL_SOURCE_URL}</a>
|
||||||
|
<br />
|
||||||
|
{mace_label}: <code>{mace_value}</code>
|
||||||
|
<br />
|
||||||
|
rocksdb version: <code>{rocksdb_version}</code>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="legend-wrap">
|
<div class="legend-wrap">
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="legend-title">颜色: key/value 对 (跨 engine 保持一致)</div>
|
<div class="legend-title">Color: key/value pairs (consistent across engines)</div>
|
||||||
<div id="kv-legend" class="legend-list"></div>
|
<div id="kv-legend" class="legend-list"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="legend-title">填充样式: engine</div>
|
<div class="legend-title">Fill style: engine</div>
|
||||||
<div id="engine-legend" class="legend-list"></div>
|
<div id="engine-legend" class="legend-list"></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -654,7 +964,7 @@ def render_html(payload: dict[str, Any], source_csv: str, source_csv_name: str,
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="chart-card">
|
<div class="chart-card">
|
||||||
<div class="chart-tools"><button type="button" class="reset-btn" id="chart-reset-${{wIdx}}">还原缩放</button></div>
|
<div class="chart-tools"><button type="button" class="reset-btn" id="chart-reset-${{wIdx}}">Reset zoom</button></div>
|
||||||
<canvas id="metric-${{wIdx}}"></canvas>
|
<canvas id="metric-${{wIdx}}"></canvas>
|
||||||
</div>
|
</div>
|
||||||
`;
|
`;
|
||||||
@ -722,7 +1032,18 @@ def main() -> int:
|
|||||||
payload = build_report_payload(rows, engines, kv_pairs)
|
payload = build_report_payload(rows, engines, kv_pairs)
|
||||||
csv_bytes = csv_path.read_bytes()
|
csv_bytes = csv_path.read_bytes()
|
||||||
csv_b64 = base64.b64encode(csv_bytes).decode("ascii")
|
csv_b64 = base64.b64encode(csv_bytes).decode("ascii")
|
||||||
html = render_html(payload, str(csv_path), csv_path.name, csv_b64)
|
repo_root = Path(__file__).resolve().parent.parent
|
||||||
|
mace_label, mace_value = infer_mace_identity(repo_root)
|
||||||
|
rocksdb_version = infer_rocksdb_version(repo_root)
|
||||||
|
html = render_html(
|
||||||
|
payload,
|
||||||
|
str(csv_path),
|
||||||
|
csv_path.name,
|
||||||
|
csv_b64,
|
||||||
|
mace_label,
|
||||||
|
mace_value,
|
||||||
|
rocksdb_version,
|
||||||
|
)
|
||||||
|
|
||||||
output_path.write_text(html, encoding="utf-8")
|
output_path.write_text(html, encoding="utf-8")
|
||||||
print(f"HTML written to: {output_path}")
|
print(f"HTML written to: {output_path}")
|
||||||
|
|||||||
@ -1082,7 +1082,8 @@ fn run_one_op(
|
|||||||
ReadPath::Snapshot => {
|
ReadPath::Snapshot => {
|
||||||
if let Ok(view) = bucket.view() {
|
if let Ok(view) = bucket.view() {
|
||||||
for item in view.seek(prefix).take(scan_len.max(1)) {
|
for item in view.seek(prefix).take(scan_len.max(1)) {
|
||||||
std::hint::black_box(item);
|
std::hint::black_box(item.key());
|
||||||
|
std::hint::black_box(item.val());
|
||||||
}
|
}
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
@ -1092,7 +1093,8 @@ fn run_one_op(
|
|||||||
ReadPath::RwTxn => {
|
ReadPath::RwTxn => {
|
||||||
if let Ok(tx) = bucket.begin() {
|
if let Ok(tx) = bucket.begin() {
|
||||||
for item in tx.seek(prefix).take(scan_len.max(1)) {
|
for item in tx.seek(prefix).take(scan_len.max(1)) {
|
||||||
std::hint::black_box(item);
|
std::hint::black_box(item.key());
|
||||||
|
std::hint::black_box(item.val());
|
||||||
}
|
}
|
||||||
tx.commit().is_ok()
|
tx.commit().is_ok()
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user