blob: f1ee42713c6aa24c9082789502e666863319b86b [file] [log] [blame]
Better Together Rust Devsb902b342023-07-31 08:21:33 -07001// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14use criterion::{black_box, criterion_group, criterion_main, Criterion};
15use handle_map::*;
16use std::sync::Arc;
17use std::thread;
18use std::time::{Duration, Instant};
19
20// For the sake of these benchmarks, we just want
21// to be able to allocate an almost-arbitrary number of handles
22// if needed by the particular benchmark.
23const MAX_ACTIVE_HANDLES: u32 = u32::MAX - 1;
24
25// The default number of threads+shards to use
26// in multi-threaded benchmarks, chosen
27// to represent a typical multi-threaded use-case.
28const DEFAULT_SHARDS_AND_THREADS: u8 = 8;
29
30/// We're using u8's for the wrapped object type to deliberately ignore
31/// costs related to accessing the underlying data - we just care about
32/// benchmarking handle-map operations.
33type BenchHandleMap = HandleMap<u8>;
34
35fn build_handle_map(num_shards: u8) -> BenchHandleMap {
36 let dimensions = HandleMapDimensions { num_shards, max_active_handles: MAX_ACTIVE_HANDLES };
37 HandleMap::with_dimensions(dimensions)
38}
39
40/// Benchmark for repeated reads on a single thread
41fn single_threaded_read_benchmark(c: &mut Criterion) {
42 // The number of shards should not matter much here.
43 let handle_map = build_handle_map(8);
44 // Pre-populate the map with a single handle.
45 let handle = handle_map.allocate(|| 0xFF).unwrap();
46 let handle_map_ref = &handle_map;
47 // Perform repeated reads
48 c.bench_function("single-threaded reads", |b| {
49 b.iter(|| {
50 let guard = handle_map_ref.get(black_box(handle)).unwrap();
51 black_box(*guard)
52 })
53 });
54}
55
56/// Benchmark for repeated writes on a single thread
57fn single_threaded_write_benchmark(c: &mut Criterion) {
58 // The number of shards should not matter much here.
59 let handle_map = build_handle_map(8);
60 // Pre-populate the map with a single handle.
61 let handle = handle_map.allocate(|| 0xFF).unwrap();
62 let handle_map_ref = &handle_map;
63 // Perform repeated writes
64 c.bench_function("single-threaded writes", |b| {
65 b.iter(|| {
66 let mut guard = handle_map_ref.get_mut(black_box(handle)).unwrap();
67 *guard *= black_box(0);
68 })
69 });
70}
71
72/// Benchmark for repeated allocations on a single thread
73#[allow(unused_must_use)]
74fn single_threaded_allocate_benchmark(c: &mut Criterion) {
75 // The number of shards here is chosen to be quasi-reasonable.
76 // Realistically, performance will fluctuate somewhat with this
77 // due to the difference in the number of internal hash-map collisions,
78 // but ultimately we only care about the asymptotic behavior.
79 let handle_map = build_handle_map(8);
80 let handle_map_ref = &handle_map;
81 // Perform repeated allocations
82 c.bench_function("single-threaded allocations", |b| {
83 b.iter(|| {
84 handle_map_ref.allocate(|| black_box(0xEE));
85 })
86 });
87}
88
89/// Benchmark for repeated allocation->deallocation on a single thread
90/// starting from an empty map.
91#[allow(unused_must_use)]
92fn single_threaded_allocate_deallocate_near_empty_benchmark(c: &mut Criterion) {
93 // The number of shards should not matter much here.
94 let handle_map = build_handle_map(8);
95 let handle_map_ref = &handle_map;
96 // Perform repeated allocation/deallocation pairs
97 c.bench_function("single-threaded allocate/deallocate pairs (empty init state)", |b| {
98 b.iter(|| {
99 let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
100 handle_map_ref.deallocate(handle);
101 })
102 });
103}
104
105/// Benchmark for repeated allocation->deallocation starting
106/// from a map with a thousand elements per shard.
107#[allow(unused_must_use)]
108fn single_threaded_allocate_deallocate_one_thousand_benchmark(c: &mut Criterion) {
109 let handle_map = build_handle_map(8);
110 for _ in 0..(8 * 1000) {
111 handle_map.allocate(|| black_box(0xFF)).unwrap();
112 }
113 let handle_map_ref = &handle_map;
114
115 // Perform repeated allocation/deallocation pairs
116 c.bench_function(
117 "single-threaded allocate/deallocate pairs (init one thousand elements per shard)",
118 |b| {
119 b.iter(|| {
120 let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
121 handle_map_ref.deallocate(handle);
122 })
123 },
124 );
125}
126
127/// Benchmark for repeated allocation->deallocation starting
128/// from a map with a million elements per shard.
129#[allow(unused_must_use)]
130fn single_threaded_allocate_deallocate_one_million_benchmark(c: &mut Criterion) {
131 let handle_map = build_handle_map(8);
132 for _ in 0..(8 * 1000000) {
133 handle_map.allocate(|| black_box(0xFF)).unwrap();
134 }
135 let handle_map_ref = &handle_map;
136
137 // Perform repeated allocation/deallocation pairs
138 c.bench_function(
139 "single-threaded allocate/deallocate pairs (init one million elements per shard)",
140 |b| {
141 b.iter(|| {
142 let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
143 handle_map_ref.deallocate(handle);
144 })
145 },
146 );
147}
148
149/// Helper function for creating a multi-threaded benchmark
150/// with the set number of threads. Untimed Initialization of state
151/// should occur prior to this call. This method will repeatedly
152/// execute the benchmark code on all threads
153fn bench_multithreaded<F>(name: &str, num_threads: u8, benchable_fn_ref: Arc<F>, c: &mut Criterion)
154where
155 F: Fn() + Send + Sync + 'static,
156{
157 c.bench_function(name, move |b| {
158 b.iter_custom(|iters| {
159 // The returned results from the threads will be their timings
160 let mut join_handles = Vec::new();
161 for _ in 0..num_threads {
162 let benchable_fn_ref_clone = benchable_fn_ref.clone();
163 let join_handle = thread::spawn(move || {
164 let start = Instant::now();
165 for _ in 0..iters {
166 benchable_fn_ref_clone();
167 }
168 start.elapsed()
169 });
170 join_handles.push(join_handle);
171 }
172 //Threads spawned, now collect the results
173 let mut total_duration = Duration::from_nanos(0);
174 for join_handle in join_handles {
175 let thread_duration = join_handle.join().unwrap();
176 total_duration += thread_duration;
177 }
178 total_duration /= num_threads as u32;
179 total_duration
180 })
181 });
182}
183
184/// Defines a number of reads/writes to perform [relative
185/// to other operations performed in a given test]
186#[derive(Debug, Clone, Copy)]
187struct ReadWriteCount {
188 num_reads: usize,
189 num_writes: usize,
190}
191
192const READS_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 4, num_writes: 0 };
193
194const READ_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 3, num_writes: 1 };
195
196const BALANCED: ReadWriteCount = ReadWriteCount { num_reads: 2, num_writes: 2 };
197
198const WRITE_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 1, num_writes: 3 };
199
200const WRITES_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 0, num_writes: 4 };
201
202const READ_WRITE_COUNTS: [ReadWriteCount; 5] =
203 [READS_ONLY, READ_LEANING, BALANCED, WRITE_LEANING, WRITES_ONLY];
204
205/// Benchmarks a repeated allocate/[X writes]/[Y reads]/deallocate workflow across
206/// the default number of threads and shards.
207fn lifecycle_read_and_write_multithreaded(per_object_rw_count: ReadWriteCount, c: &mut Criterion) {
208 let name = format!(
209 "lifecycle_read_and_write_multithreaded(reads/object: {}, writes/object:{})",
210 per_object_rw_count.num_reads, per_object_rw_count.num_writes
211 );
212 // We need an Arc to be able to pass this off to `bench_multithreaded`.
213 let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
214 bench_multithreaded(
215 &name,
216 DEFAULT_SHARDS_AND_THREADS,
217 Arc::new(move || {
218 let handle = handle_map.allocate(|| black_box(0xFF)).unwrap();
219 for _ in 0..per_object_rw_count.num_writes {
220 let mut write_guard = handle_map.get_mut(handle).unwrap();
221 *write_guard *= black_box(1);
222 }
223 for _ in 0..per_object_rw_count.num_reads {
224 let read_guard = handle_map.get_mut(handle).unwrap();
225 black_box(*read_guard);
226 }
227 handle_map.deallocate(handle).unwrap();
228 }),
229 c,
230 );
231}
232
233fn lifecycle_benchmarks(c: &mut Criterion) {
234 for read_write_count in READ_WRITE_COUNTS {
235 // Using 8 separate shards to roughly match the number of active threads.
236 lifecycle_read_and_write_multithreaded(read_write_count, c);
237 }
238}
239
240/// Benchmarks repeated cycles of accessing the given number of distinct objects, each
241/// with the given number of reads and writes across the given number of threads.
242fn read_and_write_contention_multithreaded(
243 num_threads: u8,
244 num_distinct_objects: u32,
245 per_object_rw_count: ReadWriteCount,
246 c: &mut Criterion,
247) {
248 let name = format!("read_and_write_contention_multithreaded(threads: {}, objects: {}, reads/object: {}, writes/object:{})",
249 num_threads, num_distinct_objects, per_object_rw_count.num_reads, per_object_rw_count.num_writes);
250 // Default to 8 shards, ultimately doesn't matter much since we only ever access a few shards.
251 // This needs to be `'static` in order to pass a ref off to `bench_multithreaded`.
252 let handle_map = Arc::new(build_handle_map(8));
253 let mut handles = Vec::new();
254 for i in 0..num_distinct_objects {
255 let handle = handle_map.allocate(|| i as u8).unwrap();
256 handles.push(handle);
257 }
258
259 bench_multithreaded(
260 &name,
261 num_threads,
262 Arc::new(move || {
263 // Deliberately performing all writes first, with
264 // the goal of getting staggered timings from
265 // all of the threads for subsequent iterations.
266 // We also perform reads and writes across all objects
267 // in batches to get something resembling uniform access.
268 for _ in 0..per_object_rw_count.num_writes {
269 for handle in handles.iter() {
270 let mut write_guard = handle_map.get_mut(*handle).unwrap();
271 *write_guard *= black_box(1);
272 }
273 }
274 for _ in 0..per_object_rw_count.num_reads {
275 for handle in handles.iter() {
276 let read_guard = handle_map.get(*handle).unwrap();
277 black_box(*read_guard);
278 }
279 }
280 }),
281 c,
282 );
283}
284
285fn read_write_contention_benchmarks(c: &mut Criterion) {
286 for num_threads in [2, 4, 8].iter() {
287 for num_distinct_objects in [1u32, 2u32, 4u32].iter() {
288 if num_distinct_objects >= &(*num_threads as u32) {
289 continue;
290 }
291 for read_write_count in READ_WRITE_COUNTS {
292 read_and_write_contention_multithreaded(
293 *num_threads,
294 *num_distinct_objects,
295 read_write_count,
296 c,
297 );
298 }
299 }
300 }
301}
302
303/// Benchmarks allocation (starting from empty) for the default number of shards and threads
304fn allocator_contention_benchmark(c: &mut Criterion) {
305 let name = "allocator_contention_multithreaded";
306 // We need an Arc to pass this off to `bench_multithreaded`
307 let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
308 bench_multithreaded(
309 name,
310 DEFAULT_SHARDS_AND_THREADS,
311 Arc::new(move || {
312 handle_map.allocate(|| black_box(0xFF)).unwrap();
313 }),
314 c,
315 );
316}
317
318criterion_group!(
319 benches,
320 single_threaded_read_benchmark,
321 single_threaded_write_benchmark,
322 single_threaded_allocate_benchmark,
323 single_threaded_allocate_deallocate_near_empty_benchmark,
324 single_threaded_allocate_deallocate_one_thousand_benchmark,
325 single_threaded_allocate_deallocate_one_million_benchmark,
326 lifecycle_benchmarks,
327 read_write_contention_benchmarks,
328 allocator_contention_benchmark,
329);
330criterion_main!(benches);