1175 lines
34 KiB
Rust
1175 lines
34 KiB
Rust
use clap::{ArgAction, Parser};
|
|
#[cfg(target_os = "linux")]
|
|
use logger::Logger;
|
|
use mace::{Mace, Options};
|
|
#[cfg(feature = "custom_alloc")]
|
|
use myalloc::{MyAlloc, print_filtered_trace};
|
|
use rand::rngs::StdRng;
|
|
use rand::seq::SliceRandom;
|
|
use rand::{Rng, SeedableRng};
|
|
use std::fs::OpenOptions;
|
|
use std::io::{BufWriter, Write};
|
|
use std::path::Path;
|
|
use std::process::exit;
|
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
use std::sync::{Arc, Barrier, Mutex};
|
|
use std::thread::JoinHandle;
|
|
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
|
|
|
#[cfg(feature = "custom_alloc")]
|
|
#[global_allocator]
|
|
static GLOBAL: MyAlloc = MyAlloc;
|
|
|
|
const LAT_BUCKETS: usize = 64;
|
|
const PREFIX_GROUPS: usize = 1024;
|
|
const PREFILL_BATCH: usize = 1024;
|
|
|
|
#[derive(Parser, Debug, Clone)]
|
|
#[command(author, version, about, long_about = None)]
|
|
struct Args {
|
|
#[arg(short = 'p', long)]
|
|
path: String,
|
|
|
|
#[arg(short = 'm', long, default_value = "insert")]
|
|
mode: String,
|
|
|
|
#[arg(long)]
|
|
workload: Option<String>,
|
|
|
|
#[arg(short = 'k', long, default_value_t = 16)]
|
|
key_size: usize,
|
|
|
|
#[arg(short = 'v', long, default_value_t = 1024)]
|
|
value_size: usize,
|
|
|
|
#[arg(short = 't', long, default_value_t = 4)]
|
|
threads: usize,
|
|
|
|
#[arg(short = 'i', long, default_value_t = 10000)]
|
|
iterations: usize,
|
|
|
|
#[arg(long, default_value_t = false)]
|
|
random: bool,
|
|
|
|
#[arg(long, default_value_t = 8192)]
|
|
blob_size: usize,
|
|
|
|
#[arg(long, default_value_t = true)]
|
|
shared_keyspace: bool,
|
|
|
|
#[arg(long, action = ArgAction::SetTrue)]
|
|
no_shared_keyspace: bool,
|
|
|
|
#[arg(long, default_value_t = 0)]
|
|
prefill_keys: usize,
|
|
|
|
#[arg(long, default_value_t = 0)]
|
|
warmup_secs: u64,
|
|
|
|
#[arg(long, default_value_t = 0)]
|
|
measure_secs: u64,
|
|
|
|
#[arg(long, default_value_t = 100)]
|
|
scan_len: usize,
|
|
|
|
#[arg(long, default_value_t = 0.99)]
|
|
zipf_theta: f64,
|
|
|
|
#[arg(long, default_value = "snapshot")]
|
|
read_path: String,
|
|
|
|
#[arg(long, default_value = "relaxed")]
|
|
durability: String,
|
|
|
|
#[arg(long, default_value = "benchmark_results.csv")]
|
|
result_file: String,
|
|
|
|
#[arg(long, default_value_t = true)]
|
|
cleanup: bool,
|
|
|
|
#[arg(long, action = ArgAction::SetTrue)]
|
|
no_cleanup: bool,
|
|
|
|
#[arg(long, default_value_t = false)]
|
|
skip_prefill: bool,
|
|
|
|
#[arg(long, default_value_t = false)]
|
|
reuse_path: bool,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
enum Distribution {
|
|
Uniform,
|
|
Zipf,
|
|
}
|
|
|
|
impl Distribution {
|
|
fn as_str(self) -> &'static str {
|
|
match self {
|
|
Distribution::Uniform => "uniform",
|
|
Distribution::Zipf => "zipf",
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
enum ReadPath {
|
|
Snapshot,
|
|
RwTxn,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
|
enum DurabilityMode {
|
|
Relaxed,
|
|
Durable,
|
|
}
|
|
|
|
impl DurabilityMode {
|
|
fn parse(raw: &str) -> Option<Self> {
|
|
match raw.trim().to_ascii_lowercase().as_str() {
|
|
"relaxed" => Some(Self::Relaxed),
|
|
"durable" => Some(Self::Durable),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn as_str(self) -> &'static str {
|
|
match self {
|
|
DurabilityMode::Relaxed => "relaxed",
|
|
DurabilityMode::Durable => "durable",
|
|
}
|
|
}
|
|
}
|
|
|
|
impl ReadPath {
|
|
fn parse(raw: &str) -> Option<Self> {
|
|
match raw.trim().to_ascii_lowercase().as_str() {
|
|
"snapshot" => Some(Self::Snapshot),
|
|
"rw_txn" | "rwtxn" | "txn" => Some(Self::RwTxn),
|
|
_ => None,
|
|
}
|
|
}
|
|
|
|
fn as_str(self) -> &'static str {
|
|
match self {
|
|
ReadPath::Snapshot => "snapshot",
|
|
ReadPath::RwTxn => "rw_txn",
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct WorkloadSpec {
|
|
id: String,
|
|
mode_label: String,
|
|
distribution: Distribution,
|
|
read_pct: u8,
|
|
update_pct: u8,
|
|
scan_pct: u8,
|
|
scan_len: usize,
|
|
requires_prefill: bool,
|
|
insert_only: bool,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
struct ThreadRange {
|
|
start: usize,
|
|
len: usize,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct MachineMeta {
|
|
host: String,
|
|
os: String,
|
|
arch: String,
|
|
kernel: String,
|
|
cpu_cores: usize,
|
|
mem_total_kb: u64,
|
|
mem_available_kb: u64,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, Default)]
|
|
struct Quantiles {
|
|
p50_us: u64,
|
|
p95_us: u64,
|
|
p99_us: u64,
|
|
p999_us: u64,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct ResultRow {
|
|
ts_epoch_ms: u128,
|
|
engine: &'static str,
|
|
workload_id: String,
|
|
mode: String,
|
|
durability_mode: DurabilityMode,
|
|
threads: usize,
|
|
key_size: usize,
|
|
value_size: usize,
|
|
prefill_keys: usize,
|
|
shared_keyspace: bool,
|
|
distribution: Distribution,
|
|
zipf_theta: f64,
|
|
read_pct: u8,
|
|
update_pct: u8,
|
|
scan_pct: u8,
|
|
scan_len: usize,
|
|
read_path: ReadPath,
|
|
warmup_secs: u64,
|
|
measure_secs: u64,
|
|
total_ops: u64,
|
|
error_ops: u64,
|
|
ops_per_sec: f64,
|
|
quantiles: Quantiles,
|
|
elapsed_us: u64,
|
|
meta: MachineMeta,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct ThreadStats {
|
|
total_ops: u64,
|
|
error_ops: u64,
|
|
hist: [u64; LAT_BUCKETS],
|
|
}
|
|
|
|
impl Default for ThreadStats {
|
|
fn default() -> Self {
|
|
Self {
|
|
total_ops: 0,
|
|
error_ops: 0,
|
|
hist: [0; LAT_BUCKETS],
|
|
}
|
|
}
|
|
}
|
|
|
|
fn parse_workload(args: &Args) -> Result<WorkloadSpec, String> {
|
|
if let Some(w) = args.workload.as_ref() {
|
|
let id = w.trim().to_ascii_uppercase();
|
|
let spec = match id.as_str() {
|
|
"W1" => WorkloadSpec {
|
|
id,
|
|
mode_label: "mixed".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 95,
|
|
update_pct: 5,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
"W2" => WorkloadSpec {
|
|
id,
|
|
mode_label: "mixed".into(),
|
|
distribution: Distribution::Zipf,
|
|
read_pct: 95,
|
|
update_pct: 5,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
"W3" => WorkloadSpec {
|
|
id,
|
|
mode_label: "mixed".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 50,
|
|
update_pct: 50,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
"W4" => WorkloadSpec {
|
|
id,
|
|
mode_label: "mixed".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 5,
|
|
update_pct: 95,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
"W5" => WorkloadSpec {
|
|
id,
|
|
mode_label: "mixed".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 70,
|
|
update_pct: 25,
|
|
scan_pct: 5,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
"W6" => WorkloadSpec {
|
|
id,
|
|
mode_label: "scan".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 0,
|
|
update_pct: 0,
|
|
scan_pct: 100,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
},
|
|
_ => {
|
|
return Err(format!(
|
|
"invalid workload `{}` (supported: W1, W2, W3, W4, W5, W6)",
|
|
w
|
|
));
|
|
}
|
|
};
|
|
return Ok(spec);
|
|
}
|
|
|
|
let mode = args.mode.trim().to_ascii_lowercase();
|
|
match mode.as_str() {
|
|
"insert" => Ok(WorkloadSpec {
|
|
id: "LEGACY_INSERT".into(),
|
|
mode_label: "insert".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 0,
|
|
update_pct: 100,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: false,
|
|
insert_only: true,
|
|
}),
|
|
"get" => Ok(WorkloadSpec {
|
|
id: "LEGACY_GET".into(),
|
|
mode_label: "get".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 100,
|
|
update_pct: 0,
|
|
scan_pct: 0,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
}),
|
|
"scan" => Ok(WorkloadSpec {
|
|
id: "LEGACY_SCAN".into(),
|
|
mode_label: "scan".into(),
|
|
distribution: Distribution::Uniform,
|
|
read_pct: 0,
|
|
update_pct: 0,
|
|
scan_pct: 100,
|
|
scan_len: args.scan_len,
|
|
requires_prefill: true,
|
|
insert_only: false,
|
|
}),
|
|
_ => Err(format!(
|
|
"invalid mode `{}` (supported: insert, get, scan)",
|
|
args.mode
|
|
)),
|
|
}
|
|
}
|
|
|
|
fn workload_runs_gc(spec: &WorkloadSpec) -> bool {
|
|
spec.requires_prefill
|
|
}
|
|
|
|
fn split_ranges(total: usize, n: usize) -> Vec<ThreadRange> {
|
|
let mut ranges = Vec::with_capacity(n);
|
|
if n == 0 {
|
|
return ranges;
|
|
}
|
|
let base = total / n;
|
|
let rem = total % n;
|
|
let mut start = 0;
|
|
for tid in 0..n {
|
|
let len = base + usize::from(tid < rem);
|
|
ranges.push(ThreadRange { start, len });
|
|
start += len;
|
|
}
|
|
ranges
|
|
}
|
|
|
|
fn make_shared_key(id: usize, key_size: usize) -> Vec<u8> {
|
|
let group = id % PREFIX_GROUPS;
|
|
let mut key = format!("s{:03x}_{:010x}", group, id).into_bytes();
|
|
if key.len() < key_size {
|
|
key.resize(key_size, b'x');
|
|
}
|
|
key
|
|
}
|
|
|
|
fn make_thread_key(tid: usize, local_id: usize, key_size: usize) -> Vec<u8> {
|
|
let mut key = format!("t{:03x}_{:010x}", tid % PREFIX_GROUPS, local_id).into_bytes();
|
|
if key.len() < key_size {
|
|
key.resize(key_size, b'x');
|
|
}
|
|
key
|
|
}
|
|
|
|
fn make_shared_prefix(key_id: usize) -> Vec<u8> {
|
|
let group = key_id % PREFIX_GROUPS;
|
|
format!("s{:03x}_", group).into_bytes()
|
|
}
|
|
|
|
fn make_thread_prefix(tid: usize) -> Vec<u8> {
|
|
format!("t{:03x}_", tid % PREFIX_GROUPS).into_bytes()
|
|
}
|
|
|
|
fn latency_bucket(us: u64) -> usize {
|
|
let v = us.max(1);
|
|
(63 - v.leading_zeros() as usize).min(LAT_BUCKETS - 1)
|
|
}
|
|
|
|
fn histogram_quantile_us(hist: &[u64; LAT_BUCKETS], q: f64) -> u64 {
|
|
let total: u64 = hist.iter().sum();
|
|
if total == 0 {
|
|
return 0;
|
|
}
|
|
let target = ((total as f64) * q).ceil() as u64;
|
|
let mut acc = 0u64;
|
|
for (idx, cnt) in hist.iter().enumerate() {
|
|
acc += *cnt;
|
|
if acc >= target {
|
|
return if idx == 0 { 1 } else { 1u64 << idx };
|
|
}
|
|
}
|
|
1u64 << (LAT_BUCKETS - 1)
|
|
}
|
|
|
|
fn now_epoch_ms() -> u128 {
|
|
SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.unwrap_or_default()
|
|
.as_millis()
|
|
}
|
|
|
|
fn read_proc_value_kb(key: &str) -> u64 {
|
|
let Ok(content) = std::fs::read_to_string("/proc/meminfo") else {
|
|
return 0;
|
|
};
|
|
for line in content.lines() {
|
|
if let Some(rest) = line.strip_prefix(key) {
|
|
let num = rest
|
|
.split_whitespace()
|
|
.next()
|
|
.unwrap_or("0")
|
|
.parse::<u64>()
|
|
.unwrap_or(0);
|
|
return num;
|
|
}
|
|
}
|
|
0
|
|
}
|
|
|
|
fn gather_machine_meta() -> MachineMeta {
|
|
let host = std::fs::read_to_string("/proc/sys/kernel/hostname")
|
|
.ok()
|
|
.map(|s| s.trim().to_string())
|
|
.filter(|s| !s.is_empty())
|
|
.or_else(|| std::env::var("HOSTNAME").ok())
|
|
.unwrap_or_else(|| "unknown".to_string());
|
|
|
|
let kernel = std::fs::read_to_string("/proc/sys/kernel/osrelease")
|
|
.ok()
|
|
.map(|s| s.trim().to_string())
|
|
.unwrap_or_else(|| "unknown".to_string());
|
|
|
|
MachineMeta {
|
|
host,
|
|
os: std::env::consts::OS.to_string(),
|
|
arch: std::env::consts::ARCH.to_string(),
|
|
kernel,
|
|
cpu_cores: std::thread::available_parallelism()
|
|
.map(|n| n.get())
|
|
.unwrap_or(1),
|
|
mem_total_kb: read_proc_value_kb("MemTotal:"),
|
|
mem_available_kb: read_proc_value_kb("MemAvailable:"),
|
|
}
|
|
}
|
|
|
|
fn csv_escape(raw: &str) -> String {
|
|
raw.replace([',', '\n', '\r'], " ")
|
|
}
|
|
|
|
fn result_header() -> &'static str {
|
|
"schema_version,ts_epoch_ms,engine,workload_id,mode,durability_mode,threads,key_size,value_size,prefill_keys,shared_keyspace,distribution,zipf_theta,read_pct,update_pct,scan_pct,scan_len,read_path,warmup_secs,measure_secs,total_ops,error_ops,ops_per_sec,p50_us,p95_us,p99_us,p999_us,elapsed_us,host,os,arch,kernel,cpu_cores,mem_total_kb,mem_available_kb"
|
|
}
|
|
|
|
fn result_row_csv(row: &ResultRow) -> String {
|
|
format!(
|
|
"v2,{},{},{},{},{},{},{},{},{},{},{},{:.4},{},{},{},{},{},{},{},{},{},{:.3},{},{},{},{},{},{},{},{},{},{},{},{}",
|
|
row.ts_epoch_ms,
|
|
row.engine,
|
|
csv_escape(&row.workload_id),
|
|
csv_escape(&row.mode),
|
|
row.durability_mode.as_str(),
|
|
row.threads,
|
|
row.key_size,
|
|
row.value_size,
|
|
row.prefill_keys,
|
|
row.shared_keyspace,
|
|
row.distribution.as_str(),
|
|
row.zipf_theta,
|
|
row.read_pct,
|
|
row.update_pct,
|
|
row.scan_pct,
|
|
row.scan_len,
|
|
row.read_path.as_str(),
|
|
row.warmup_secs,
|
|
row.measure_secs,
|
|
row.total_ops,
|
|
row.error_ops,
|
|
row.ops_per_sec,
|
|
row.quantiles.p50_us,
|
|
row.quantiles.p95_us,
|
|
row.quantiles.p99_us,
|
|
row.quantiles.p999_us,
|
|
row.elapsed_us,
|
|
csv_escape(&row.meta.host),
|
|
csv_escape(&row.meta.os),
|
|
csv_escape(&row.meta.arch),
|
|
csv_escape(&row.meta.kernel),
|
|
row.meta.cpu_cores,
|
|
row.meta.mem_total_kb,
|
|
row.meta.mem_available_kb,
|
|
)
|
|
}
|
|
|
|
fn append_result_row(path: &str, row: &ResultRow) -> std::io::Result<()> {
|
|
let exists = Path::new(path).exists();
|
|
let file = OpenOptions::new().create(true).append(true).open(path)?;
|
|
let mut writer = BufWriter::new(file);
|
|
if !exists {
|
|
writer.write_all(result_header().as_bytes())?;
|
|
writer.write_all(b"\n")?;
|
|
}
|
|
writer.write_all(result_row_csv(row).as_bytes())?;
|
|
writer.write_all(b"\n")?;
|
|
writer.flush()
|
|
}
|
|
|
|
fn sample_zipf_like(rng: &mut StdRng, n: usize, theta: f64) -> usize {
|
|
if n <= 1 {
|
|
return 0;
|
|
}
|
|
let t = theta.clamp(0.0001, 0.9999);
|
|
let u: f64 = rng.random();
|
|
let scaled = u.powf(1.0 / (1.0 - t));
|
|
let idx = (scaled * n as f64) as usize;
|
|
idx.min(n - 1)
|
|
}
|
|
|
|
fn pick_key_id(
|
|
rng: &mut StdRng,
|
|
distribution: Distribution,
|
|
theta: f64,
|
|
shared: bool,
|
|
prefill_keys: usize,
|
|
local_len: usize,
|
|
) -> Option<usize> {
|
|
if shared {
|
|
if prefill_keys == 0 {
|
|
return None;
|
|
}
|
|
return Some(match distribution {
|
|
Distribution::Uniform => rng.random_range(0..prefill_keys),
|
|
Distribution::Zipf => sample_zipf_like(rng, prefill_keys, theta),
|
|
});
|
|
}
|
|
|
|
if local_len == 0 {
|
|
return None;
|
|
}
|
|
|
|
Some(match distribution {
|
|
Distribution::Uniform => rng.random_range(0..local_len),
|
|
Distribution::Zipf => sample_zipf_like(rng, local_len, theta),
|
|
})
|
|
}
|
|
|
|
#[derive(Clone, Copy)]
|
|
enum OpKind {
|
|
Read,
|
|
Update,
|
|
Scan,
|
|
}
|
|
|
|
fn pick_op_kind(rng: &mut StdRng, spec: &WorkloadSpec) -> OpKind {
|
|
if spec.insert_only {
|
|
return OpKind::Update;
|
|
}
|
|
if spec.scan_pct == 100 {
|
|
return OpKind::Scan;
|
|
}
|
|
let roll: u8 = rng.random_range(0..100);
|
|
if roll < spec.read_pct {
|
|
OpKind::Read
|
|
} else if roll < spec.read_pct.saturating_add(spec.update_pct) {
|
|
OpKind::Update
|
|
} else {
|
|
OpKind::Scan
|
|
}
|
|
}
|
|
|
|
fn main() {
|
|
#[cfg(target_os = "linux")]
|
|
{
|
|
Logger::init().add_file("kv_bench.log", true);
|
|
log::set_max_level(log::LevelFilter::Error);
|
|
}
|
|
|
|
let args = Args::parse();
|
|
let path = Path::new(&args.path);
|
|
let shared_keyspace = args.shared_keyspace && !args.no_shared_keyspace;
|
|
let cleanup = args.cleanup && !args.no_cleanup;
|
|
|
|
if args.path.is_empty() {
|
|
eprintln!("path is empty");
|
|
exit(1);
|
|
}
|
|
if path.exists() && !args.reuse_path {
|
|
eprintln!("path {:?} already exists", args.path);
|
|
exit(1);
|
|
}
|
|
if args.skip_prefill && !args.reuse_path {
|
|
eprintln!("--skip-prefill requires --reuse-path");
|
|
exit(1);
|
|
}
|
|
if args.skip_prefill && !path.exists() {
|
|
eprintln!(
|
|
"--skip-prefill requires existing path, but `{}` does not exist",
|
|
args.path
|
|
);
|
|
exit(1);
|
|
}
|
|
if args.threads == 0 {
|
|
eprintln!("threads must be greater than 0");
|
|
exit(1);
|
|
}
|
|
if args.key_size < 16 || args.value_size < 16 {
|
|
eprintln!("key_size and value_size must be >= 16");
|
|
exit(1);
|
|
}
|
|
if !(0.0..1.0).contains(&args.zipf_theta) {
|
|
eprintln!("zipf_theta must be in range (0, 1)");
|
|
exit(1);
|
|
}
|
|
|
|
let read_path = match ReadPath::parse(&args.read_path) {
|
|
Some(r) => r,
|
|
None => {
|
|
eprintln!(
|
|
"invalid read_path `{}` (supported: snapshot, rw_txn)",
|
|
args.read_path
|
|
);
|
|
exit(1);
|
|
}
|
|
};
|
|
|
|
let durability_mode = match DurabilityMode::parse(&args.durability) {
|
|
Some(v) => v,
|
|
None => {
|
|
eprintln!(
|
|
"invalid durability `{}` (supported: relaxed, durable)",
|
|
args.durability
|
|
);
|
|
exit(1);
|
|
}
|
|
};
|
|
|
|
let workload = match parse_workload(&args) {
|
|
Ok(w) => w,
|
|
Err(e) => {
|
|
eprintln!("{}", e);
|
|
exit(1);
|
|
}
|
|
};
|
|
let legacy_mode = workload.id.starts_with("LEGACY_");
|
|
let effective_warmup_secs = if legacy_mode { 0 } else { args.warmup_secs };
|
|
let effective_measure_secs = if legacy_mode { 0 } else { args.measure_secs };
|
|
|
|
let mixed_workload = workload.read_pct > 0 && workload.update_pct > 0;
|
|
if mixed_workload && !shared_keyspace {
|
|
eprintln!("mixed workloads require shared keyspace");
|
|
exit(1);
|
|
}
|
|
|
|
let prefill_keys = if workload.requires_prefill {
|
|
if args.prefill_keys > 0 {
|
|
args.prefill_keys
|
|
} else {
|
|
args.iterations.max(1)
|
|
}
|
|
} else {
|
|
args.prefill_keys
|
|
};
|
|
|
|
if workload.requires_prefill && prefill_keys == 0 {
|
|
eprintln!("prefill_keys must be > 0 for read/mixed/scan workloads");
|
|
exit(1);
|
|
}
|
|
|
|
let thread_prefill_ranges = split_ranges(prefill_keys, args.threads);
|
|
|
|
let mut opt = Options::new(path);
|
|
opt.sync_on_write = durability_mode == DurabilityMode::Durable;
|
|
opt.inline_size = args.blob_size;
|
|
opt.cache_capacity = 3 << 30;
|
|
opt.data_file_size = 64 << 20;
|
|
opt.max_log_size = 1 << 30;
|
|
opt.default_arenas = 128;
|
|
opt.tmp_store = cleanup;
|
|
|
|
let db = Mace::new(opt.validate().unwrap()).unwrap();
|
|
db.disable_gc();
|
|
let bkt = if args.reuse_path {
|
|
db.get_bucket("default")
|
|
.or_else(|_| db.new_bucket("default"))
|
|
.unwrap()
|
|
} else {
|
|
db.new_bucket("default").unwrap()
|
|
};
|
|
let value = Arc::new(vec![b'0'; args.value_size]);
|
|
|
|
if workload.requires_prefill && !args.skip_prefill {
|
|
let mut fill_handles = Vec::with_capacity(thread_prefill_ranges.len());
|
|
for (tid, tr) in thread_prefill_ranges.iter().copied().enumerate() {
|
|
let bucket = bkt.clone();
|
|
let v = value.clone();
|
|
let key_size = args.key_size;
|
|
let shared = shared_keyspace;
|
|
fill_handles.push(std::thread::spawn(move || {
|
|
coreid::bind_core(tid);
|
|
let mut in_batch = 0usize;
|
|
let mut tx = bucket.begin().unwrap();
|
|
for i in 0..tr.len {
|
|
let key = if shared {
|
|
make_shared_key(tr.start + i, key_size)
|
|
} else {
|
|
make_thread_key(tid, i, key_size)
|
|
};
|
|
tx.put(key.as_slice(), v.as_slice()).unwrap();
|
|
in_batch += 1;
|
|
if in_batch >= PREFILL_BATCH {
|
|
tx.commit().unwrap();
|
|
tx = bucket.begin().unwrap();
|
|
in_batch = 0;
|
|
}
|
|
}
|
|
if in_batch > 0 {
|
|
tx.commit().unwrap();
|
|
}
|
|
}));
|
|
}
|
|
for h in fill_handles {
|
|
h.join().unwrap();
|
|
}
|
|
}
|
|
|
|
if workload_runs_gc(&workload) {
|
|
db.enable_gc();
|
|
db.start_gc();
|
|
}
|
|
|
|
let op_counts = split_ranges(args.iterations, args.threads);
|
|
let ready_barrier = Arc::new(Barrier::new(args.threads + 1));
|
|
let measure_barrier = Arc::new(Barrier::new(args.threads + 1));
|
|
let measure_start = Arc::new(Mutex::new(None::<Instant>));
|
|
let insert_counter = Arc::new(AtomicUsize::new(0));
|
|
|
|
let handles: Vec<JoinHandle<ThreadStats>> = (0..args.threads)
|
|
.map(|tid| {
|
|
let bucket = bkt.clone();
|
|
let v = value.clone();
|
|
let spec = workload.clone();
|
|
let ready = Arc::clone(&ready_barrier);
|
|
let measure = Arc::clone(&measure_barrier);
|
|
let measure_start_slot = Arc::clone(&measure_start);
|
|
let ins_ctr = Arc::clone(&insert_counter);
|
|
let key_size = args.key_size;
|
|
let random_insert = args.random;
|
|
let read_path_mode = read_path;
|
|
let warmup_secs = effective_warmup_secs;
|
|
let measure_secs = effective_measure_secs;
|
|
let distribution = spec.distribution;
|
|
let zipf_theta = args.zipf_theta;
|
|
let scan_len = spec.scan_len;
|
|
let shared = shared_keyspace;
|
|
let prefill_key_count = prefill_keys;
|
|
let local_key_len = thread_prefill_ranges[tid].len;
|
|
let local_op_start = op_counts[tid].start;
|
|
let local_op_count = op_counts[tid].len;
|
|
|
|
std::thread::spawn(move || {
|
|
coreid::bind_core(tid);
|
|
let seed = (now_epoch_ms() as u64)
|
|
^ (tid as u64)
|
|
.wrapping_add(1)
|
|
.wrapping_mul(0x9E37_79B9_7F4A_7C15)
|
|
^ (prefill_key_count as u64).wrapping_shl(7);
|
|
let mut rng = StdRng::seed_from_u64(seed);
|
|
let mut stats = ThreadStats::default();
|
|
let mut local_insert_idx = 0usize;
|
|
|
|
let mut count_indices: Vec<usize> = (0..local_op_count).collect();
|
|
if random_insert && spec.insert_only {
|
|
count_indices.shuffle(&mut rng);
|
|
}
|
|
|
|
ready.wait();
|
|
|
|
if warmup_secs > 0 {
|
|
let deadline = Instant::now() + Duration::from_secs(warmup_secs);
|
|
while Instant::now() < deadline {
|
|
let op = pick_op_kind(&mut rng, &spec);
|
|
run_one_op(
|
|
op,
|
|
&bucket,
|
|
&v,
|
|
&mut rng,
|
|
&spec,
|
|
distribution,
|
|
zipf_theta,
|
|
read_path_mode,
|
|
key_size,
|
|
scan_len,
|
|
shared,
|
|
prefill_key_count,
|
|
local_key_len,
|
|
tid,
|
|
&ins_ctr,
|
|
&mut local_insert_idx,
|
|
None,
|
|
None,
|
|
);
|
|
}
|
|
}
|
|
|
|
measure.wait();
|
|
{
|
|
let now = Instant::now();
|
|
let mut slot = measure_start_slot.lock().unwrap();
|
|
if slot.map_or(true, |prev| now < prev) {
|
|
*slot = Some(now);
|
|
}
|
|
}
|
|
|
|
if measure_secs > 0 {
|
|
let deadline = Instant::now() + Duration::from_secs(measure_secs);
|
|
while Instant::now() < deadline {
|
|
let op = pick_op_kind(&mut rng, &spec);
|
|
run_one_op(
|
|
op,
|
|
&bucket,
|
|
&v,
|
|
&mut rng,
|
|
&spec,
|
|
distribution,
|
|
zipf_theta,
|
|
read_path_mode,
|
|
key_size,
|
|
scan_len,
|
|
shared,
|
|
prefill_key_count,
|
|
local_key_len,
|
|
tid,
|
|
&ins_ctr,
|
|
&mut local_insert_idx,
|
|
None,
|
|
Some(&mut stats),
|
|
);
|
|
}
|
|
} else {
|
|
for idx in count_indices {
|
|
let fixed_insert_id = if spec.insert_only {
|
|
Some(if shared { local_op_start + idx } else { idx })
|
|
} else {
|
|
None
|
|
};
|
|
let op = if spec.insert_only {
|
|
OpKind::Update
|
|
} else {
|
|
pick_op_kind(&mut rng, &spec)
|
|
};
|
|
run_one_op(
|
|
op,
|
|
&bucket,
|
|
&v,
|
|
&mut rng,
|
|
&spec,
|
|
distribution,
|
|
zipf_theta,
|
|
read_path_mode,
|
|
key_size,
|
|
scan_len,
|
|
shared,
|
|
prefill_key_count,
|
|
local_key_len,
|
|
tid,
|
|
&ins_ctr,
|
|
&mut local_insert_idx,
|
|
fixed_insert_id,
|
|
Some(&mut stats),
|
|
);
|
|
}
|
|
}
|
|
|
|
stats
|
|
})
|
|
})
|
|
.collect();
|
|
|
|
ready_barrier.wait();
|
|
measure_barrier.wait();
|
|
{
|
|
let now = Instant::now();
|
|
let mut slot = measure_start.lock().unwrap();
|
|
if slot.map_or(true, |prev| now < prev) {
|
|
*slot = Some(now);
|
|
}
|
|
}
|
|
|
|
let mut merged_hist = [0u64; LAT_BUCKETS];
|
|
let mut total_ops = 0u64;
|
|
let mut error_ops = 0u64;
|
|
|
|
for h in handles {
|
|
let s = h.join().unwrap();
|
|
total_ops += s.total_ops;
|
|
error_ops += s.error_ops;
|
|
for (i, v) in s.hist.iter().enumerate() {
|
|
merged_hist[i] += *v;
|
|
}
|
|
}
|
|
|
|
let measure_end = Instant::now();
|
|
let measure_started = (*measure_start.lock().unwrap()).unwrap_or(measure_end);
|
|
let elapsed_us = measure_end.duration_since(measure_started).as_micros() as u64;
|
|
let ops_per_sec = if elapsed_us == 0 {
|
|
0.0
|
|
} else {
|
|
(total_ops as f64) * 1_000_000.0 / (elapsed_us as f64)
|
|
};
|
|
|
|
let quantiles = Quantiles {
|
|
p50_us: histogram_quantile_us(&merged_hist, 0.50),
|
|
p95_us: histogram_quantile_us(&merged_hist, 0.95),
|
|
p99_us: histogram_quantile_us(&merged_hist, 0.99),
|
|
p999_us: histogram_quantile_us(&merged_hist, 0.999),
|
|
};
|
|
|
|
let row = ResultRow {
|
|
ts_epoch_ms: now_epoch_ms(),
|
|
engine: "mace",
|
|
workload_id: workload.id.clone(),
|
|
mode: workload.mode_label.clone(),
|
|
durability_mode,
|
|
threads: args.threads,
|
|
key_size: args.key_size,
|
|
value_size: args.value_size,
|
|
prefill_keys,
|
|
shared_keyspace,
|
|
distribution: workload.distribution,
|
|
zipf_theta: args.zipf_theta,
|
|
read_pct: workload.read_pct,
|
|
update_pct: workload.update_pct,
|
|
scan_pct: workload.scan_pct,
|
|
scan_len: workload.scan_len,
|
|
read_path,
|
|
warmup_secs: effective_warmup_secs,
|
|
measure_secs: effective_measure_secs,
|
|
total_ops,
|
|
error_ops,
|
|
ops_per_sec,
|
|
quantiles,
|
|
elapsed_us,
|
|
meta: gather_machine_meta(),
|
|
};
|
|
|
|
if let Err(e) = append_result_row(&args.result_file, &row) {
|
|
eprintln!("failed to write result file {}: {}", args.result_file, e);
|
|
exit(1);
|
|
}
|
|
|
|
println!(
|
|
"engine=mace workload={} mode={} durability={} threads={} ops={} err={} qps={:.2} p99_us={} result_file={}",
|
|
row.workload_id,
|
|
row.mode,
|
|
row.durability_mode.as_str(),
|
|
row.threads,
|
|
row.total_ops,
|
|
row.error_ops,
|
|
row.ops_per_sec,
|
|
row.quantiles.p99_us,
|
|
args.result_file
|
|
);
|
|
|
|
drop(db);
|
|
#[cfg(feature = "custom_alloc")]
|
|
print_filtered_trace(|x, y| log::info!("{}{}", x, y));
|
|
}
|
|
|
|
#[allow(clippy::too_many_arguments)]
|
|
fn run_one_op(
|
|
op: OpKind,
|
|
bucket: &mace::Bucket,
|
|
value: &Arc<Vec<u8>>,
|
|
rng: &mut StdRng,
|
|
spec: &WorkloadSpec,
|
|
distribution: Distribution,
|
|
zipf_theta: f64,
|
|
read_path: ReadPath,
|
|
key_size: usize,
|
|
scan_len: usize,
|
|
shared_keyspace: bool,
|
|
prefill_keys: usize,
|
|
local_key_len: usize,
|
|
tid: usize,
|
|
insert_counter: &AtomicUsize,
|
|
local_insert_idx: &mut usize,
|
|
fixed_insert_id: Option<usize>,
|
|
stats: Option<&mut ThreadStats>,
|
|
) {
|
|
let start = stats.as_ref().map(|_| Instant::now());
|
|
|
|
let ok = match op {
|
|
OpKind::Read => {
|
|
let maybe_id = pick_key_id(
|
|
rng,
|
|
distribution,
|
|
zipf_theta,
|
|
shared_keyspace,
|
|
prefill_keys,
|
|
local_key_len,
|
|
);
|
|
if let Some(id) = maybe_id {
|
|
let key = if shared_keyspace {
|
|
make_shared_key(id, key_size)
|
|
} else {
|
|
make_thread_key(tid, id, key_size)
|
|
};
|
|
match read_path {
|
|
ReadPath::Snapshot => {
|
|
bucket.view().ok().and_then(|tx| tx.get(key).ok()).is_some()
|
|
}
|
|
ReadPath::RwTxn => {
|
|
if let Ok(tx) = bucket.begin() {
|
|
let get_ok = tx.get(key).is_ok();
|
|
let commit_ok = tx.commit().is_ok();
|
|
get_ok && commit_ok
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
OpKind::Update => {
|
|
let key_opt = if spec.insert_only {
|
|
if let Some(id) = fixed_insert_id {
|
|
if shared_keyspace {
|
|
Some(make_shared_key(id, key_size))
|
|
} else {
|
|
Some(make_thread_key(tid, id, key_size))
|
|
}
|
|
} else if shared_keyspace {
|
|
let id = insert_counter.fetch_add(1, Ordering::Relaxed);
|
|
Some(make_shared_key(id, key_size))
|
|
} else {
|
|
let id = *local_insert_idx;
|
|
*local_insert_idx += 1;
|
|
Some(make_thread_key(tid, id, key_size))
|
|
}
|
|
} else {
|
|
let maybe_id = pick_key_id(
|
|
rng,
|
|
distribution,
|
|
zipf_theta,
|
|
shared_keyspace,
|
|
prefill_keys,
|
|
local_key_len,
|
|
);
|
|
if let Some(id) = maybe_id {
|
|
if shared_keyspace {
|
|
Some(make_shared_key(id, key_size))
|
|
} else {
|
|
Some(make_thread_key(tid, id, key_size))
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
};
|
|
|
|
if let Some(key) = key_opt {
|
|
if let Ok(tx) = bucket.begin() {
|
|
let write_ok = if spec.insert_only {
|
|
tx.put(key.as_slice(), value.as_slice()).is_ok()
|
|
} else {
|
|
tx.upsert(key.as_slice(), value.as_slice()).is_ok()
|
|
};
|
|
if !write_ok {
|
|
false
|
|
} else {
|
|
tx.commit().is_ok()
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
OpKind::Scan => {
|
|
let prefix_opt = if shared_keyspace {
|
|
let maybe_id = pick_key_id(
|
|
rng,
|
|
distribution,
|
|
zipf_theta,
|
|
true,
|
|
prefill_keys,
|
|
local_key_len,
|
|
);
|
|
maybe_id.map(make_shared_prefix)
|
|
} else {
|
|
Some(make_thread_prefix(tid))
|
|
};
|
|
|
|
if let Some(prefix) = prefix_opt {
|
|
match read_path {
|
|
ReadPath::Snapshot => {
|
|
if let Ok(view) = bucket.view() {
|
|
for item in view.seek(prefix).take(scan_len.max(1)) {
|
|
std::hint::black_box(item);
|
|
}
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
ReadPath::RwTxn => {
|
|
if let Ok(tx) = bucket.begin() {
|
|
for item in tx.seek(prefix).take(scan_len.max(1)) {
|
|
std::hint::black_box(item);
|
|
}
|
|
tx.commit().is_ok()
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
};
|
|
|
|
if let Some(stats) = stats {
|
|
stats.total_ops += 1;
|
|
if !ok {
|
|
stats.error_ops += 1;
|
|
}
|
|
if let Some(start) = start {
|
|
let us = start.elapsed().as_micros() as u64;
|
|
let idx = latency_bucket(us);
|
|
stats.hist[idx] += 1;
|
|
}
|
|
}
|
|
}
|