Compare commits

...

2 Commits

Author SHA1 Message Date
abf82f735c
refine test 2026-03-03 21:32:10 +08:00
80b4745118
align rocksdb and mace 2026-03-03 13:00:01 +08:00
5 changed files with 132 additions and 110 deletions

View File

@ -17,9 +17,11 @@
#include <rocksdb/utilities/transaction.h> #include <rocksdb/utilities/transaction.h>
#include <rocksdb/utilities/transaction_db.h> #include <rocksdb/utilities/transaction_db.h>
#include <algorithm>
#include <barrier> #include <barrier>
#include <filesystem> #include <filesystem>
#include <format> #include <format>
#include <numeric>
#include <string> #include <string>
#include <pthread.h> #include <pthread.h>
@ -47,6 +49,13 @@ static void bind_core(size_t tid) {
(void) pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &set); (void) pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &set);
} }
static void require_ok(const rocksdb::Status &st, const char *what) {
if (!st.ok()) {
fmt::println(stderr, "{} failed: {}", what, st.ToString());
std::abort();
}
}
struct Args { struct Args {
size_t threads; size_t threads;
size_t iterations; size_t iterations;
@ -135,11 +144,11 @@ int main(int argc, char *argv[]) {
cfo.min_blob_size = args.blob_size; cfo.min_blob_size = args.blob_size;
cfo.disable_auto_compactions = true; cfo.disable_auto_compactions = true;
cfo.max_compaction_bytes = (1ULL << 60); cfo.max_compaction_bytes = (1ULL << 60);
cfo.level0_stop_writes_trigger = 100000; cfo.level0_stop_writes_trigger = 1000000;
cfo.level0_slowdown_writes_trigger = 100000; cfo.level0_slowdown_writes_trigger = 1000000;
cfo.level0_file_num_compaction_trigger = 100000; cfo.level0_file_num_compaction_trigger = 1000000;
cfo.write_buffer_size = 64 << 20; cfo.write_buffer_size = 64 << 20;
cfo.max_write_buffer_number = 64; cfo.max_write_buffer_number = 128;
// use 3GB block cache // use 3GB block cache
auto cache = rocksdb::NewLRUCache(3 << 30); auto cache = rocksdb::NewLRUCache(3 << 30);
@ -154,65 +163,58 @@ int main(int argc, char *argv[]) {
options.create_if_missing = true; options.create_if_missing = true;
options.allow_concurrent_memtable_write = true; options.allow_concurrent_memtable_write = true;
options.enable_pipelined_write = true; options.enable_pipelined_write = true;
options.env->SetBackgroundThreads(4, rocksdb::Env::Priority::HIGH); options.max_background_flushes = 8;
options.env->SetBackgroundThreads(8, rocksdb::Env::Priority::HIGH);
auto wopt = rocksdb::WriteOptions(); auto wopt = rocksdb::WriteOptions();
wopt.no_slowdown = true; wopt.no_slowdown = true;
// wopt.disableWAL = true;
std::vector<std::thread> wg; std::vector<std::thread> wg;
std::atomic<uint64_t> total_op{0}; std::atomic<uint64_t> total_op{0};
rocksdb::OptimisticTransactionDB *db; rocksdb::OptimisticTransactionDB *db;
auto b = nm::Instant::now(); auto b = nm::Instant::now();
std::vector<rocksdb::ColumnFamilyHandle *> handles{}; std::vector<rocksdb::ColumnFamilyHandle *> handles{};
auto s = rocksdb::OptimisticTransactionDB::Open(options, args.path, cfd, &handles, &db); auto s = rocksdb::OptimisticTransactionDB::Open(options, args.path, cfd, &handles, &db);
assert(s.ok()); require_ok(s, "open db");
std::barrier ready_barrier{static_cast<ptrdiff_t>(args.threads + 1)}; std::barrier ready_barrier{static_cast<ptrdiff_t>(args.threads + 1)};
std::barrier start_barrier{static_cast<ptrdiff_t>(args.threads + 1)}; std::barrier start_barrier{static_cast<ptrdiff_t>(args.threads + 1)};
std::random_device rd{}; std::random_device rd{};
std::mt19937 gen(rd());
std::string val(args.value_size, 'x'); std::string val(args.value_size, 'x');
auto *handle = handles[0]; auto *handle = handles[0];
if (args.mode == "get" || args.mode == "scan") { if (args.mode == "get" || args.mode == "scan") {
auto *kv = db->BeginTransaction(wopt); std::vector<std::thread> fill_threads;
for (size_t tid = 0; tid < args.threads; ++tid) { for (size_t tid = 0; tid < args.threads; ++tid) {
size_t count = args.iterations / args.threads + (tid < args.iterations % args.threads ? 1 : 0); fill_threads.emplace_back([&, tid] {
for (size_t i = 0; i < count; ++i) { bind_core(tid);
auto key = std::format("key_{}_{}", tid, i); size_t count = args.iterations / args.threads + (tid < args.iterations % args.threads ? 1 : 0);
key.resize(args.key_size, 'x'); const size_t batch_size = 10000;
kv->Put(handle, key, val); for (size_t i = 0; i < count; i += batch_size) {
} auto *kv = db->BeginTransaction(wopt);
for (size_t j = 0; j < batch_size && (i + j) < count; ++j) {
auto key = std::format("key_{}_{}", tid, i + j);
key.resize(args.key_size, 'x');
require_ok(kv->Put(handle, key, val), "fill put");
}
require_ok(kv->Commit(), "fill commit");
delete kv;
}
});
} }
kv->Commit(); for (auto &t: fill_threads)
delete kv; t.join();
delete handle; delete handle;
delete db; delete db;
handles.clear(); handles.clear();
// re-open db // re-open db
s = rocksdb::OptimisticTransactionDB::Open(options, args.path, cfd, &handles, &db); s = rocksdb::OptimisticTransactionDB::Open(options, args.path, cfd, &handles, &db);
assert(s.ok()); require_ok(s, "reopen db");
handle = handles[0]; handle = handles[0];
std::uniform_int_distribution<size_t> tid_dist(0, args.threads - 1);
for (size_t i = 0; i < args.iterations; ++i) {
auto tid = tid_dist(gen);
size_t count = args.iterations / args.threads + (tid < args.iterations % args.threads ? 1 : 0);
std::uniform_int_distribution<size_t> key_dist(0, count - 1);
auto idx = key_dist(gen);
auto key = std::format("key_{}_{}", tid, idx);
key.resize(args.key_size, 'x');
auto s = db->Get(rocksdb::ReadOptions(), key, &val);
if (!s.ok()) {
std::terminate();
}
}
} }
auto *snapshot = db->GetSnapshot();
auto base_seed = rd(); auto base_seed = rd();
for (size_t tid = 0; tid < args.threads; ++tid) { for (size_t tid = 0; tid < args.threads; ++tid) {
wg.emplace_back([&, tid] { wg.emplace_back([&, tid] {
@ -226,56 +228,60 @@ int main(int argc, char *argv[]) {
ropt.iterate_upper_bound = &upper_bound_slice; ropt.iterate_upper_bound = &upper_bound_slice;
} }
ropt.prefix_same_as_start = true; ropt.prefix_same_as_start = true;
ropt.snapshot = snapshot;
size_t round = 0; size_t round = 0;
std::mt19937 mixed_gen(static_cast<uint32_t>(base_seed) ^ static_cast<uint32_t>(0x9e3779b9U * (tid + 1))); std::mt19937 thread_gen(static_cast<uint32_t>(base_seed) ^ static_cast<uint32_t>(tid));
std::uniform_int_distribution<int> mixed_dist(0, 99); std::uniform_int_distribution<int> mixed_dist(0, 99);
size_t key_count = args.iterations / args.threads + (tid < args.iterations % args.threads ? 1 : 0); size_t key_count = args.iterations / args.threads + (tid < args.iterations % args.threads ? 1 : 0);
std::vector<size_t> indices(key_count);
std::iota(indices.begin(), indices.end(), 0);
if (args.random) {
std::shuffle(indices.begin(), indices.end(), thread_gen);
}
ready_barrier.arrive_and_wait(); ready_barrier.arrive_and_wait();
start_barrier.arrive_and_wait(); start_barrier.arrive_and_wait();
if (args.mode == "insert") { if (args.mode == "insert") {
for (size_t i = 0; i < key_count; ++i) { for (size_t i: indices) {
auto key = std::format("key_{}_{}", tid, i); auto key = std::format("key_{}_{}", tid, i);
key.resize(args.key_size, 'x'); key.resize(args.key_size, 'x');
round += 1; round += 1;
auto *kv = db->BeginTransaction(wopt); auto *kv = db->BeginTransaction(wopt);
kv->Put(handle, key, val); require_ok(kv->Put(handle, key, val), "insert put");
kv->Commit(); require_ok(kv->Commit(), "insert commit");
delete kv; delete kv;
} }
} else if (args.mode == "get") { } else if (args.mode == "get") {
for (size_t i = 0; i < key_count; ++i) { // rocksdb has no dedicated read-only txn in this bench path, use direct get for fair read-path
// comparison with mace view
for (size_t i: indices) {
auto key = std::format("key_{}_{}", tid, i); auto key = std::format("key_{}_{}", tid, i);
key.resize(args.key_size, 'x'); key.resize(args.key_size, 'x');
round += 1; round += 1;
auto *kv = db->BeginTransaction(wopt); require_ok(db->Get(ropt, handle, key, &rval), "get");
kv->Get(ropt, handle, key, &rval);
kv->Commit();
delete kv;
} }
} else if (args.mode == "mixed") { } else if (args.mode == "mixed") {
for (size_t i = 0; i < key_count; ++i) { for (size_t i: indices) {
auto key = std::format("key_{}_{}", tid, i); auto key = std::format("key_{}_{}", tid, i);
key.resize(args.key_size, 'x'); key.resize(args.key_size, 'x');
round += 1; round += 1;
auto is_insert = mixed_dist(mixed_gen) < static_cast<int>(args.insert_ratio); auto is_insert = mixed_dist(thread_gen) < static_cast<int>(args.insert_ratio);
auto *kv = db->BeginTransaction(wopt); auto *kv = db->BeginTransaction(wopt);
if (is_insert) { if (is_insert) {
kv->Put(handle, key, val); require_ok(kv->Put(handle, key, val), "mixed put");
} else { } else {
kv->Get(ropt, handle, key, &rval); auto st = kv->Get(ropt, handle, key, &rval);
if (!st.ok() && !st.IsNotFound()) {
require_ok(st, "mixed get");
}
} }
kv->Commit(); require_ok(kv->Commit(), "mixed commit");
delete kv; delete kv;
} }
} else if (args.mode == "scan") { } else if (args.mode == "scan") {
auto *iter = db->NewIterator(ropt); auto *iter = db->NewIterator(ropt);
iter->Seek(prefix); iter->Seek(prefix);
size_t n = 0;
while (iter->Valid()) { while (iter->Valid()) {
round += 1; round += 1;
auto k = iter->key(); auto k = iter->key();
@ -283,8 +289,8 @@ int main(int argc, char *argv[]) {
black_box(k); black_box(k);
black_box(v); black_box(v);
iter->Next(); iter->Next();
n += 1;
} }
require_ok(iter->status(), "scan iterate");
delete iter; delete iter;
} }
total_op.fetch_add(round, std::memory_order::relaxed); total_op.fetch_add(round, std::memory_order::relaxed);
@ -303,7 +309,13 @@ int main(int argc, char *argv[]) {
return args.insert_ratio; return args.insert_ratio;
return args.mode == "insert" ? 100 : 0; return args.mode == "insert" ? 100 : 0;
}(); }();
uint64_t ops = total_op.load(std::memory_order_relaxed) / b.elapse_sec(); const auto elapsed_us = b.elapse_usec();
uint64_t ops = 0;
const auto total = total_op.load(std::memory_order_relaxed);
if (elapsed_us > 0) {
ops = static_cast<uint64_t>(static_cast<double>(total) * 1000000.0 / elapsed_us);
}
if (args.mode == "insert") { if (args.mode == "insert") {
if (args.random) { if (args.random) {
args.mode = "random_insert"; args.mode = "random_insert";
@ -311,9 +323,8 @@ int main(int argc, char *argv[]) {
args.mode = "sequential_insert"; args.mode = "sequential_insert";
} }
} }
fmt::println("{},{},{},{},{},{},{}", args.mode, args.threads, args.key_size, args.value_size, ratio, (uint64_t) ops, fmt::println("{},{},{},{},{},{},{}", args.mode, args.threads, args.key_size, args.value_size, ratio, ops,
(uint64_t) b.elapse_ms()); static_cast<uint64_t>(elapsed_us));
db->ReleaseSnapshot(snapshot);
delete handle; delete handle;
delete db; delete db;
std::filesystem::remove_all(args.path); std::filesystem::remove_all(args.path);

View File

@ -47,8 +47,8 @@ function samples() {
done done
} }
echo mode,threads,key_size,value_size,insert_ratio,ops,elasped > "${script_dir}/mace.csv" echo mode,threads,key_size,value_size,insert_ratio,ops,elapsed_us > "${script_dir}/mace.csv"
samples "$1" 2>> "${script_dir}/mace.csv" samples "$1" 1>> "${script_dir}/mace.csv"
if [ -x "${script_dir}/bin/python" ]; then if [ -x "${script_dir}/bin/python" ]; then
(cd "${script_dir}" && "${script_dir}/bin/python" plot.py mace.csv) (cd "${script_dir}" && "${script_dir}/bin/python" plot.py mace.csv)
else else

View File

@ -3,6 +3,7 @@ import matplotlib.pyplot as plt
from adjustText import adjust_text from adjustText import adjust_text
import sys import sys
def real_mode(m): def real_mode(m):
if m == "mixed": if m == "mixed":
return "Mixed (70% Get, 30% Insert)" return "Mixed (70% Get, 30% Insert)"
@ -12,20 +13,29 @@ def real_mode(m):
return "Sequential Scan" return "Sequential Scan"
return m.capitalize() return m.capitalize()
name = sys.argv[1] name = sys.argv[1]
prefix = name.split(".")[0] prefix = name.split(".")[0]
# 读取数据
# read benchmark data
# keep compatibility with older csv files that used elapsed/elasped
# and normalize to elapsed_us
df = pd.read_csv(f"./{name}") df = pd.read_csv(f"./{name}")
if "elapsed_us" not in df.columns:
if "elapsed" in df.columns:
df = df.rename(columns={"elapsed": "elapsed_us"})
elif "elasped" in df.columns:
df = df.rename(columns={"elasped": "elapsed_us"})
# 按 mode 分组 # group by mode
modes = df["mode"].unique() modes = df["mode"].unique()
for mode in modes: for mode in modes:
plt.figure(figsize=(16, 9)) plt.figure(figsize=(16, 9))
subset = df[df["mode"] == mode] subset = df[df["mode"] == mode]
# 按 key_size/value_size 分组 # group by key/value size
key_value_combinations = subset.groupby(["key_size", "value_size"]) key_value_combinations = subset.groupby(["key_size", "value_size"])
texts = [] texts = []
@ -34,19 +44,17 @@ for mode in modes:
x = group["threads"] x = group["threads"]
y = group["ops"] y = group["ops"]
# 绘制折线 # draw line
line, = plt.plot(x, y, marker="o", label=label) line, = plt.plot(x, y, marker="o", label=label)
# 添加文本标签 # add labels
for xi, yi, ops in zip(x, y, group["ops"]): for xi, yi, ops in zip(x, y, group["ops"]):
texts.append( texts.append(
plt.text(xi, yi, f"{int(ops)}", color=line.get_color(), fontsize=12) plt.text(xi, yi, f"{int(ops)}", color=line.get_color(), fontsize=12)
) )
# 自动调整文本位置 adjust_text(texts, arrowprops=dict(arrowstyle="->", color="gray"))
adjust_text(texts, arrowprops=dict(arrowstyle="->", color='gray'))
# 设置图表样式
plt.title(f"{prefix.upper()}: {real_mode(mode)}", fontsize=16) plt.title(f"{prefix.upper()}: {real_mode(mode)}", fontsize=16)
plt.xlabel("Threads", fontsize=14) plt.xlabel("Threads", fontsize=14)
plt.ylabel("OPS", fontsize=14) plt.ylabel("OPS", fontsize=14)

View File

@ -47,7 +47,7 @@ function samples() {
done done
} }
echo mode,threads,key_size,value_size,insert_ratio,ops,elapsed > "${script_dir}/rocksdb.csv" echo mode,threads,key_size,value_size,insert_ratio,ops,elapsed_us > "${script_dir}/rocksdb.csv"
samples "$1" 1>> "${script_dir}/rocksdb.csv" samples "$1" 1>> "${script_dir}/rocksdb.csv"
if [ -x "${script_dir}/bin/python" ]; then if [ -x "${script_dir}/bin/python" ]; then
(cd "${script_dir}" && "${script_dir}/bin/python" plot.py rocksdb.csv) (cd "${script_dir}" && "${script_dir}/bin/python" plot.py rocksdb.csv)

View File

@ -4,7 +4,6 @@ use logger::Logger;
use mace::{Mace, Options}; use mace::{Mace, Options};
#[cfg(feature = "custom_alloc")] #[cfg(feature = "custom_alloc")]
use myalloc::{MyAlloc, print_filtered_trace}; use myalloc::{MyAlloc, print_filtered_trace};
use rand::prelude::*;
use std::path::Path; use std::path::Path;
use std::process::exit; use std::process::exit;
use std::sync::Arc; use std::sync::Arc;
@ -85,17 +84,17 @@ fn main() {
eprintln!("Error: Insert ratio must be between 0 and 100"); eprintln!("Error: Insert ratio must be between 0 and 100");
exit(1); exit(1);
} }
let mut opt = Options::new(path); let mut opt = Options::new(path);
opt.sync_on_write = false; opt.sync_on_write = false;
opt.over_provision = false;
opt.inline_size = args.blob_size; opt.inline_size = args.blob_size;
opt.tmp_store = args.mode != "get" && args.mode != "scan"; opt.tmp_store = args.mode != "get" && args.mode != "scan";
opt.cache_capacity = 3 << 30; opt.cache_capacity = 3 << 30;
opt.data_file_size = 64 << 20; opt.data_file_size = 64 << 20;
opt.max_log_size = 1 << 30; opt.max_log_size = 1 << 30;
opt.default_arenas = 128;
let mut saved = opt.clone(); let mut saved = opt.clone();
saved.tmp_store = false;
saved.tmp_store = true;
let mut db = Mace::new(opt.validate().unwrap()).unwrap(); let mut db = Mace::new(opt.validate().unwrap()).unwrap();
db.disable_gc(); db.disable_gc();
let mut bkt = db.new_bucket("default").unwrap(); let mut bkt = db.new_bucket("default").unwrap();
@ -103,42 +102,41 @@ fn main() {
let value = Arc::new(vec![b'0'; args.value_size]); let value = Arc::new(vec![b'0'; args.value_size]);
if args.mode == "get" || args.mode == "scan" { if args.mode == "get" || args.mode == "scan" {
let pre_tx = bkt.begin().unwrap(); let mut fill_handles = vec![];
for tid in 0..args.threads { for tid in 0..args.threads {
let count = args.iterations / args.threads let bkt_clone = bkt.clone();
let val_clone = value.clone();
let key_count = args.iterations / args.threads
+ if tid < args.iterations % args.threads { + if tid < args.iterations % args.threads {
1 1
} else { } else {
0 0
}; };
for i in 0..count { let key_size = args.key_size;
let mut key = format!("key_{tid}_{i}").into_bytes(); fill_handles.push(std::thread::spawn(move || {
key.resize(args.key_size, b'x'); coreid::bind_core(tid);
pre_tx.put(&key, &*value).unwrap(); const BATCH_SIZE: usize = 10000;
} for i in (0..key_count).step_by(BATCH_SIZE) {
let tx = bkt_clone.begin().unwrap();
for j in 0..BATCH_SIZE {
if i + j >= key_count {
break;
}
let mut key = format!("key_{tid}_{}", i + j).into_bytes();
key.resize(key_size, b'x');
tx.put(&key, &*val_clone).unwrap();
}
tx.commit().unwrap();
}
}));
}
for h in fill_handles {
h.join().unwrap();
} }
pre_tx.commit().unwrap();
drop(bkt); drop(bkt);
drop(db); drop(db);
saved.tmp_store = true;
db = Mace::new(saved.validate().unwrap()).unwrap(); db = Mace::new(saved.validate().unwrap()).unwrap();
bkt = db.get_bucket("default").unwrap(); bkt = db.get_bucket("default").unwrap();
let mut rng = rand::rng();
for _ in 0..args.iterations {
let tid = rng.random_range(0..args.threads);
let count = args.iterations / args.threads
+ if tid < args.iterations % args.threads {
1
} else {
0
};
let idx = rng.random_range(0..count);
let mut key = format!("key_{tid}_{idx}").into_bytes();
key.resize(args.key_size, b'x');
let view = bkt.view().unwrap();
view.get(&key).unwrap();
}
} }
let mut key_counts = vec![args.iterations / args.threads; args.threads]; let mut key_counts = vec![args.iterations / args.threads; args.threads];
@ -162,15 +160,22 @@ fn main() {
let key_count = key_counts[tid]; let key_count = key_counts[tid];
let key_size = args.key_size; let key_size = args.key_size;
let prefix = format!("key_{tid}_"); let prefix = format!("key_{tid}_");
let is_random = args.random;
std::thread::spawn(move || { std::thread::spawn(move || {
coreid::bind_core(tid); coreid::bind_core(tid);
let mut round = 0; let mut round = 0;
let mut indices: Vec<usize> = (0..key_count).collect();
if is_random {
use rand::seq::SliceRandom;
indices.shuffle(&mut rand::rng());
}
ready_barrier.wait(); ready_barrier.wait();
start_barrier.wait(); start_barrier.wait();
match mode.as_str() { match mode.as_str() {
"insert" => { "insert" => {
for i in 0..key_count { for i in indices {
let mut key = format!("key_{tid}_{i}").into_bytes(); let mut key = format!("key_{tid}_{i}").into_bytes();
key.resize(key_size, b'x'); key.resize(key_size, b'x');
round += 1; round += 1;
@ -180,7 +185,7 @@ fn main() {
} }
} }
"get" => { "get" => {
for i in 0..key_count { for i in indices {
let mut key = format!("key_{tid}_{i}").into_bytes(); let mut key = format!("key_{tid}_{i}").into_bytes();
key.resize(key_size, b'x'); key.resize(key_size, b'x');
round += 1; round += 1;
@ -190,7 +195,7 @@ fn main() {
} }
} }
"mixed" => { "mixed" => {
for i in 0..key_count { for i in indices {
let mut key = format!("key_{tid}_{i}").into_bytes(); let mut key = format!("key_{tid}_{i}").into_bytes();
key.resize(key_size, b'x'); key.resize(key_size, b'x');
let is_insert = rand::random_range(0..100) < insert_ratio; let is_insert = rand::random_range(0..100) < insert_ratio;
@ -231,9 +236,13 @@ fn main() {
x.join().unwrap(); x.join().unwrap();
} }
let duration = start_time.elapsed(); let elapsed_us = start_time.elapsed().as_micros() as u64;
let total = total_ops.load(std::sync::atomic::Ordering::Relaxed); let total = total_ops.load(std::sync::atomic::Ordering::Relaxed);
let ops = (total as f64 / duration.as_secs_f64()) as usize; let ops = if elapsed_us == 0 {
0
} else {
((total as u128 * 1_000_000u128) / elapsed_us as u128) as usize
};
let ratio = if args.mode == "mixed" { let ratio = if args.mode == "mixed" {
args.insert_ratio args.insert_ratio
@ -250,15 +259,9 @@ fn main() {
mode = "sequential_insert".into(); mode = "sequential_insert".into();
} }
} }
eprintln!( println!(
"{},{},{},{},{},{},{}", "{},{},{},{},{},{},{}",
mode, mode, args.threads, args.key_size, args.value_size, ratio, ops, elapsed_us
args.threads,
args.key_size,
args.value_size,
ratio,
ops,
duration.as_millis()
); );
drop(db); drop(db);
#[cfg(feature = "custom_alloc")] #[cfg(feature = "custom_alloc")]