Blame - nearby/util/handle_map/benches/benches.rs - beto-core

blob: f1ee42713c6aa24c9082789502e666863319b86b [file] [log] [blame]

Better Together Rust Devs	b902b34	2023-07-31 08:21:33 -0700	[diff] [blame]	1	// Copyright 2023 Google LLC
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14	use criterion::{black_box, criterion_group, criterion_main, Criterion};
				15	use handle_map::*;
				16	use std::sync::Arc;
				17	use std::thread;
				18	use std::time::{Duration, Instant};
				19
				20	// For the sake of these benchmarks, we just want
				21	// to be able to allocate an almost-arbitrary number of handles
				22	// if needed by the particular benchmark.
				23	const MAX_ACTIVE_HANDLES: u32 = u32::MAX - 1;
				24
				25	// The default number of threads+shards to use
				26	// in multi-threaded benchmarks, chosen
				27	// to represent a typical multi-threaded use-case.
				28	const DEFAULT_SHARDS_AND_THREADS: u8 = 8;
				29
				30	/// We're using u8's for the wrapped object type to deliberately ignore
				31	/// costs related to accessing the underlying data - we just care about
				32	/// benchmarking handle-map operations.
				33	type BenchHandleMap = HandleMap<u8>;
				34
				35	fn build_handle_map(num_shards: u8) -> BenchHandleMap {
				36	let dimensions = HandleMapDimensions { num_shards, max_active_handles: MAX_ACTIVE_HANDLES };
				37	HandleMap::with_dimensions(dimensions)
				38	}
				39
				40	/// Benchmark for repeated reads on a single thread
				41	fn single_threaded_read_benchmark(c: &mut Criterion) {
				42	// The number of shards should not matter much here.
				43	let handle_map = build_handle_map(8);
				44	// Pre-populate the map with a single handle.
				45	let handle = handle_map.allocate(\|\| 0xFF).unwrap();
				46	let handle_map_ref = &handle_map;
				47	// Perform repeated reads
				48	c.bench_function("single-threaded reads", \|b\| {
				49	b.iter(\|\| {
				50	let guard = handle_map_ref.get(black_box(handle)).unwrap();
				51	black_box(*guard)
				52	})
				53	});
				54	}
				55
				56	/// Benchmark for repeated writes on a single thread
				57	fn single_threaded_write_benchmark(c: &mut Criterion) {
				58	// The number of shards should not matter much here.
				59	let handle_map = build_handle_map(8);
				60	// Pre-populate the map with a single handle.
				61	let handle = handle_map.allocate(\|\| 0xFF).unwrap();
				62	let handle_map_ref = &handle_map;
				63	// Perform repeated writes
				64	c.bench_function("single-threaded writes", \|b\| {
				65	b.iter(\|\| {
				66	let mut guard = handle_map_ref.get_mut(black_box(handle)).unwrap();
				67	guard = black_box(0);
				68	})
				69	});
				70	}
				71
				72	/// Benchmark for repeated allocations on a single thread
				73	#[allow(unused_must_use)]
				74	fn single_threaded_allocate_benchmark(c: &mut Criterion) {
				75	// The number of shards here is chosen to be quasi-reasonable.
				76	// Realistically, performance will fluctuate somewhat with this
				77	// due to the difference in the number of internal hash-map collisions,
				78	// but ultimately we only care about the asymptotic behavior.
				79	let handle_map = build_handle_map(8);
				80	let handle_map_ref = &handle_map;
				81	// Perform repeated allocations
				82	c.bench_function("single-threaded allocations", \|b\| {
				83	b.iter(\|\| {
				84	handle_map_ref.allocate(\|\| black_box(0xEE));
				85	})
				86	});
				87	}
				88
				89	/// Benchmark for repeated allocation->deallocation on a single thread
				90	/// starting from an empty map.
				91	#[allow(unused_must_use)]
				92	fn single_threaded_allocate_deallocate_near_empty_benchmark(c: &mut Criterion) {
				93	// The number of shards should not matter much here.
				94	let handle_map = build_handle_map(8);
				95	let handle_map_ref = &handle_map;
				96	// Perform repeated allocation/deallocation pairs
				97	c.bench_function("single-threaded allocate/deallocate pairs (empty init state)", \|b\| {
				98	b.iter(\|\| {
				99	let handle = handle_map_ref.allocate(\|\| black_box(0xDD)).unwrap();
				100	handle_map_ref.deallocate(handle);
				101	})
				102	});
				103	}
				104
				105	/// Benchmark for repeated allocation->deallocation starting
				106	/// from a map with a thousand elements per shard.
				107	#[allow(unused_must_use)]
				108	fn single_threaded_allocate_deallocate_one_thousand_benchmark(c: &mut Criterion) {
				109	let handle_map = build_handle_map(8);
				110	for _ in 0..(8 * 1000) {
				111	handle_map.allocate(\|\| black_box(0xFF)).unwrap();
				112	}
				113	let handle_map_ref = &handle_map;
				114
				115	// Perform repeated allocation/deallocation pairs
				116	c.bench_function(
				117	"single-threaded allocate/deallocate pairs (init one thousand elements per shard)",
				118	\|b\| {
				119	b.iter(\|\| {
				120	let handle = handle_map_ref.allocate(\|\| black_box(0xDD)).unwrap();
				121	handle_map_ref.deallocate(handle);
				122	})
				123	},
				124	);
				125	}
				126
				127	/// Benchmark for repeated allocation->deallocation starting
				128	/// from a map with a million elements per shard.
				129	#[allow(unused_must_use)]
				130	fn single_threaded_allocate_deallocate_one_million_benchmark(c: &mut Criterion) {
				131	let handle_map = build_handle_map(8);
				132	for _ in 0..(8 * 1000000) {
				133	handle_map.allocate(\|\| black_box(0xFF)).unwrap();
				134	}
				135	let handle_map_ref = &handle_map;
				136
				137	// Perform repeated allocation/deallocation pairs
				138	c.bench_function(
				139	"single-threaded allocate/deallocate pairs (init one million elements per shard)",
				140	\|b\| {
				141	b.iter(\|\| {
				142	let handle = handle_map_ref.allocate(\|\| black_box(0xDD)).unwrap();
				143	handle_map_ref.deallocate(handle);
				144	})
				145	},
				146	);
				147	}
				148
				149	/// Helper function for creating a multi-threaded benchmark
				150	/// with the set number of threads. Untimed Initialization of state
				151	/// should occur prior to this call. This method will repeatedly
				152	/// execute the benchmark code on all threads
				153	fn bench_multithreaded<F>(name: &str, num_threads: u8, benchable_fn_ref: Arc<F>, c: &mut Criterion)
				154	where
				155	F: Fn() + Send + Sync + 'static,
				156	{
				157	c.bench_function(name, move \|b\| {
				158	b.iter_custom(\|iters\| {
				159	// The returned results from the threads will be their timings
				160	let mut join_handles = Vec::new();
				161	for _ in 0..num_threads {
				162	let benchable_fn_ref_clone = benchable_fn_ref.clone();
				163	let join_handle = thread::spawn(move \|\| {
				164	let start = Instant::now();
				165	for _ in 0..iters {
				166	benchable_fn_ref_clone();
				167	}
				168	start.elapsed()
				169	});
				170	join_handles.push(join_handle);
				171	}
				172	//Threads spawned, now collect the results
				173	let mut total_duration = Duration::from_nanos(0);
				174	for join_handle in join_handles {
				175	let thread_duration = join_handle.join().unwrap();
				176	total_duration += thread_duration;
				177	}
				178	total_duration /= num_threads as u32;
				179	total_duration
				180	})
				181	});
				182	}
				183
				184	/// Defines a number of reads/writes to perform [relative
				185	/// to other operations performed in a given test]
				186	#[derive(Debug, Clone, Copy)]
				187	struct ReadWriteCount {
				188	num_reads: usize,
				189	num_writes: usize,
				190	}
				191
				192	const READS_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 4, num_writes: 0 };
				193
				194	const READ_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 3, num_writes: 1 };
				195
				196	const BALANCED: ReadWriteCount = ReadWriteCount { num_reads: 2, num_writes: 2 };
				197
				198	const WRITE_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 1, num_writes: 3 };
				199
				200	const WRITES_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 0, num_writes: 4 };
				201
				202	const READ_WRITE_COUNTS: [ReadWriteCount; 5] =
				203	[READS_ONLY, READ_LEANING, BALANCED, WRITE_LEANING, WRITES_ONLY];
				204
				205	/// Benchmarks a repeated allocate/[X writes]/[Y reads]/deallocate workflow across
				206	/// the default number of threads and shards.
				207	fn lifecycle_read_and_write_multithreaded(per_object_rw_count: ReadWriteCount, c: &mut Criterion) {
				208	let name = format!(
				209	"lifecycle_read_and_write_multithreaded(reads/object: {}, writes/object:{})",
				210	per_object_rw_count.num_reads, per_object_rw_count.num_writes
				211	);
				212	// We need an Arc to be able to pass this off to `bench_multithreaded`.
				213	let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
				214	bench_multithreaded(
				215	&name,
				216	DEFAULT_SHARDS_AND_THREADS,
				217	Arc::new(move \|\| {
				218	let handle = handle_map.allocate(\|\| black_box(0xFF)).unwrap();
				219	for _ in 0..per_object_rw_count.num_writes {
				220	let mut write_guard = handle_map.get_mut(handle).unwrap();
				221	write_guard = black_box(1);
				222	}
				223	for _ in 0..per_object_rw_count.num_reads {
				224	let read_guard = handle_map.get_mut(handle).unwrap();
				225	black_box(*read_guard);
				226	}
				227	handle_map.deallocate(handle).unwrap();
				228	}),
				229	c,
				230	);
				231	}
				232
				233	fn lifecycle_benchmarks(c: &mut Criterion) {
				234	for read_write_count in READ_WRITE_COUNTS {
				235	// Using 8 separate shards to roughly match the number of active threads.
				236	lifecycle_read_and_write_multithreaded(read_write_count, c);
				237	}
				238	}
				239
				240	/// Benchmarks repeated cycles of accessing the given number of distinct objects, each
				241	/// with the given number of reads and writes across the given number of threads.
				242	fn read_and_write_contention_multithreaded(
				243	num_threads: u8,
				244	num_distinct_objects: u32,
				245	per_object_rw_count: ReadWriteCount,
				246	c: &mut Criterion,
				247	) {
				248	let name = format!("read_and_write_contention_multithreaded(threads: {}, objects: {}, reads/object: {}, writes/object:{})",
				249	num_threads, num_distinct_objects, per_object_rw_count.num_reads, per_object_rw_count.num_writes);
				250	// Default to 8 shards, ultimately doesn't matter much since we only ever access a few shards.
				251	// This needs to be `'static` in order to pass a ref off to `bench_multithreaded`.
				252	let handle_map = Arc::new(build_handle_map(8));
				253	let mut handles = Vec::new();
				254	for i in 0..num_distinct_objects {
				255	let handle = handle_map.allocate(\|\| i as u8).unwrap();
				256	handles.push(handle);
				257	}
				258
				259	bench_multithreaded(
				260	&name,
				261	num_threads,
				262	Arc::new(move \|\| {
				263	// Deliberately performing all writes first, with
				264	// the goal of getting staggered timings from
				265	// all of the threads for subsequent iterations.
				266	// We also perform reads and writes across all objects
				267	// in batches to get something resembling uniform access.
				268	for _ in 0..per_object_rw_count.num_writes {
				269	for handle in handles.iter() {
				270	let mut write_guard = handle_map.get_mut(*handle).unwrap();
				271	write_guard = black_box(1);
				272	}
				273	}
				274	for _ in 0..per_object_rw_count.num_reads {
				275	for handle in handles.iter() {
				276	let read_guard = handle_map.get(*handle).unwrap();
				277	black_box(*read_guard);
				278	}
				279	}
				280	}),
				281	c,
				282	);
				283	}
				284
				285	fn read_write_contention_benchmarks(c: &mut Criterion) {
				286	for num_threads in [2, 4, 8].iter() {
				287	for num_distinct_objects in [1u32, 2u32, 4u32].iter() {
				288	if num_distinct_objects >= &(*num_threads as u32) {
				289	continue;
				290	}
				291	for read_write_count in READ_WRITE_COUNTS {
				292	read_and_write_contention_multithreaded(
				293	*num_threads,
				294	*num_distinct_objects,
				295	read_write_count,
				296	c,
				297	);
				298	}
				299	}
				300	}
				301	}
				302
				303	/// Benchmarks allocation (starting from empty) for the default number of shards and threads
				304	fn allocator_contention_benchmark(c: &mut Criterion) {
				305	let name = "allocator_contention_multithreaded";
				306	// We need an Arc to pass this off to `bench_multithreaded`
				307	let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
				308	bench_multithreaded(
				309	name,
				310	DEFAULT_SHARDS_AND_THREADS,
				311	Arc::new(move \|\| {
				312	handle_map.allocate(\|\| black_box(0xFF)).unwrap();
				313	}),
				314	c,
				315	);
				316	}
				317
				318	criterion_group!(
				319	benches,
				320	single_threaded_read_benchmark,
				321	single_threaded_write_benchmark,
				322	single_threaded_allocate_benchmark,
				323	single_threaded_allocate_deallocate_near_empty_benchmark,
				324	single_threaded_allocate_deallocate_one_thousand_benchmark,
				325	single_threaded_allocate_deallocate_one_million_benchmark,
				326	lifecycle_benchmarks,
				327	read_write_contention_benchmarks,
				328	allocator_contention_benchmark,
				329	);
				330	criterion_main!(benches);