Project import generated by Copybara.

GitOrigin-RevId: f70580d4e9be3db690b9b6bdfb8e253a496b0ecd
Change-Id: I88739121e946b84d3268a4fb62731070c76b309d
diff --git a/nearby/presence/handle_map/benches/benches.rs b/nearby/presence/handle_map/benches/benches.rs
new file mode 100644
index 0000000..f1ee427
--- /dev/null
+++ b/nearby/presence/handle_map/benches/benches.rs
@@ -0,0 +1,330 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use handle_map::*;
+use std::sync::Arc;
+use std::thread;
+use std::time::{Duration, Instant};
+
+// For the sake of these benchmarks, we just want
+// to be able to allocate an almost-arbitrary number of handles
+// if needed by the particular benchmark.
+const MAX_ACTIVE_HANDLES: u32 = u32::MAX - 1;
+
+// The default number of threads+shards to use
+// in multi-threaded benchmarks, chosen
+// to represent a typical multi-threaded use-case.
+const DEFAULT_SHARDS_AND_THREADS: u8 = 8;
+
+/// We're using u8's for the wrapped object type to deliberately ignore
+/// costs related to accessing the underlying data - we just care about
+/// benchmarking handle-map operations.
+type BenchHandleMap = HandleMap<u8>;
+
+fn build_handle_map(num_shards: u8) -> BenchHandleMap {
+    let dimensions = HandleMapDimensions { num_shards, max_active_handles: MAX_ACTIVE_HANDLES };
+    HandleMap::with_dimensions(dimensions)
+}
+
+/// Benchmark for repeated reads on a single thread
+fn single_threaded_read_benchmark(c: &mut Criterion) {
+    // The number of shards should not matter much here.
+    let handle_map = build_handle_map(8);
+    // Pre-populate the map with a single handle.
+    let handle = handle_map.allocate(|| 0xFF).unwrap();
+    let handle_map_ref = &handle_map;
+    // Perform repeated reads
+    c.bench_function("single-threaded reads", |b| {
+        b.iter(|| {
+            let guard = handle_map_ref.get(black_box(handle)).unwrap();
+            black_box(*guard)
+        })
+    });
+}
+
+/// Benchmark for repeated writes on a single thread
+fn single_threaded_write_benchmark(c: &mut Criterion) {
+    // The number of shards should not matter much here.
+    let handle_map = build_handle_map(8);
+    // Pre-populate the map with a single handle.
+    let handle = handle_map.allocate(|| 0xFF).unwrap();
+    let handle_map_ref = &handle_map;
+    // Perform repeated writes
+    c.bench_function("single-threaded writes", |b| {
+        b.iter(|| {
+            let mut guard = handle_map_ref.get_mut(black_box(handle)).unwrap();
+            *guard *= black_box(0);
+        })
+    });
+}
+
+/// Benchmark for repeated allocations on a single thread
+#[allow(unused_must_use)]
+fn single_threaded_allocate_benchmark(c: &mut Criterion) {
+    // The number of shards here is chosen to be quasi-reasonable.
+    // Realistically, performance will fluctuate somewhat with this
+    // due to the difference in the number of internal hash-map collisions,
+    // but ultimately we only care about the asymptotic behavior.
+    let handle_map = build_handle_map(8);
+    let handle_map_ref = &handle_map;
+    // Perform repeated allocations
+    c.bench_function("single-threaded allocations", |b| {
+        b.iter(|| {
+            handle_map_ref.allocate(|| black_box(0xEE));
+        })
+    });
+}
+
+/// Benchmark for repeated allocation->deallocation on a single thread
+/// starting from an empty map.
+#[allow(unused_must_use)]
+fn single_threaded_allocate_deallocate_near_empty_benchmark(c: &mut Criterion) {
+    // The number of shards should not matter much here.
+    let handle_map = build_handle_map(8);
+    let handle_map_ref = &handle_map;
+    // Perform repeated allocation/deallocation pairs
+    c.bench_function("single-threaded allocate/deallocate pairs (empty init state)", |b| {
+        b.iter(|| {
+            let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
+            handle_map_ref.deallocate(handle);
+        })
+    });
+}
+
+/// Benchmark for repeated allocation->deallocation starting
+/// from a map with a thousand elements per shard.
+#[allow(unused_must_use)]
+fn single_threaded_allocate_deallocate_one_thousand_benchmark(c: &mut Criterion) {
+    let handle_map = build_handle_map(8);
+    for _ in 0..(8 * 1000) {
+        handle_map.allocate(|| black_box(0xFF)).unwrap();
+    }
+    let handle_map_ref = &handle_map;
+
+    // Perform repeated allocation/deallocation pairs
+    c.bench_function(
+        "single-threaded allocate/deallocate pairs (init one thousand elements per shard)",
+        |b| {
+            b.iter(|| {
+                let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
+                handle_map_ref.deallocate(handle);
+            })
+        },
+    );
+}
+
+/// Benchmark for repeated allocation->deallocation starting
+/// from a map with a million elements per shard.
+#[allow(unused_must_use)]
+fn single_threaded_allocate_deallocate_one_million_benchmark(c: &mut Criterion) {
+    let handle_map = build_handle_map(8);
+    for _ in 0..(8 * 1000000) {
+        handle_map.allocate(|| black_box(0xFF)).unwrap();
+    }
+    let handle_map_ref = &handle_map;
+
+    // Perform repeated allocation/deallocation pairs
+    c.bench_function(
+        "single-threaded allocate/deallocate pairs (init one million elements per shard)",
+        |b| {
+            b.iter(|| {
+                let handle = handle_map_ref.allocate(|| black_box(0xDD)).unwrap();
+                handle_map_ref.deallocate(handle);
+            })
+        },
+    );
+}
+
+/// Helper function for creating a multi-threaded benchmark
+/// with the set number of threads. Untimed Initialization of state
+/// should occur prior to this call. This method will repeatedly
+/// execute the benchmark code on all threads
+fn bench_multithreaded<F>(name: &str, num_threads: u8, benchable_fn_ref: Arc<F>, c: &mut Criterion)
+where
+    F: Fn() + Send + Sync + 'static,
+{
+    c.bench_function(name, move |b| {
+        b.iter_custom(|iters| {
+            // The returned results from the threads will be their timings
+            let mut join_handles = Vec::new();
+            for _ in 0..num_threads {
+                let benchable_fn_ref_clone = benchable_fn_ref.clone();
+                let join_handle = thread::spawn(move || {
+                    let start = Instant::now();
+                    for _ in 0..iters {
+                        benchable_fn_ref_clone();
+                    }
+                    start.elapsed()
+                });
+                join_handles.push(join_handle);
+            }
+            //Threads spawned, now collect the results
+            let mut total_duration = Duration::from_nanos(0);
+            for join_handle in join_handles {
+                let thread_duration = join_handle.join().unwrap();
+                total_duration += thread_duration;
+            }
+            total_duration /= num_threads as u32;
+            total_duration
+        })
+    });
+}
+
+/// Defines a number of reads/writes to perform [relative
+/// to other operations performed in a given test]
+#[derive(Debug, Clone, Copy)]
+struct ReadWriteCount {
+    num_reads: usize,
+    num_writes: usize,
+}
+
+const READS_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 4, num_writes: 0 };
+
+const READ_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 3, num_writes: 1 };
+
+const BALANCED: ReadWriteCount = ReadWriteCount { num_reads: 2, num_writes: 2 };
+
+const WRITE_LEANING: ReadWriteCount = ReadWriteCount { num_reads: 1, num_writes: 3 };
+
+const WRITES_ONLY: ReadWriteCount = ReadWriteCount { num_reads: 0, num_writes: 4 };
+
+const READ_WRITE_COUNTS: [ReadWriteCount; 5] =
+    [READS_ONLY, READ_LEANING, BALANCED, WRITE_LEANING, WRITES_ONLY];
+
+/// Benchmarks a repeated allocate/[X writes]/[Y reads]/deallocate workflow across
+/// the default number of threads and shards.
+fn lifecycle_read_and_write_multithreaded(per_object_rw_count: ReadWriteCount, c: &mut Criterion) {
+    let name = format!(
+        "lifecycle_read_and_write_multithreaded(reads/object: {}, writes/object:{})",
+        per_object_rw_count.num_reads, per_object_rw_count.num_writes
+    );
+    // We need an Arc to be able to pass this off to `bench_multithreaded`.
+    let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
+    bench_multithreaded(
+        &name,
+        DEFAULT_SHARDS_AND_THREADS,
+        Arc::new(move || {
+            let handle = handle_map.allocate(|| black_box(0xFF)).unwrap();
+            for _ in 0..per_object_rw_count.num_writes {
+                let mut write_guard = handle_map.get_mut(handle).unwrap();
+                *write_guard *= black_box(1);
+            }
+            for _ in 0..per_object_rw_count.num_reads {
+                let read_guard = handle_map.get_mut(handle).unwrap();
+                black_box(*read_guard);
+            }
+            handle_map.deallocate(handle).unwrap();
+        }),
+        c,
+    );
+}
+
+fn lifecycle_benchmarks(c: &mut Criterion) {
+    for read_write_count in READ_WRITE_COUNTS {
+        // Using 8 separate shards to roughly match the number of active threads.
+        lifecycle_read_and_write_multithreaded(read_write_count, c);
+    }
+}
+
+/// Benchmarks repeated cycles of accessing the given number of distinct objects, each
+/// with the given number of reads and writes across the given number of threads.
+fn read_and_write_contention_multithreaded(
+    num_threads: u8,
+    num_distinct_objects: u32,
+    per_object_rw_count: ReadWriteCount,
+    c: &mut Criterion,
+) {
+    let name = format!("read_and_write_contention_multithreaded(threads: {}, objects: {}, reads/object: {}, writes/object:{})",
+                       num_threads, num_distinct_objects, per_object_rw_count.num_reads, per_object_rw_count.num_writes);
+    // Default to 8 shards, ultimately doesn't matter much since we only ever access a few shards.
+    // This needs to be `'static` in order to pass a ref off to `bench_multithreaded`.
+    let handle_map = Arc::new(build_handle_map(8));
+    let mut handles = Vec::new();
+    for i in 0..num_distinct_objects {
+        let handle = handle_map.allocate(|| i as u8).unwrap();
+        handles.push(handle);
+    }
+
+    bench_multithreaded(
+        &name,
+        num_threads,
+        Arc::new(move || {
+            // Deliberately performing all writes first, with
+            // the goal of getting staggered timings from
+            // all of the threads for subsequent iterations.
+            // We also perform reads and writes across all objects
+            // in batches to get something resembling uniform access.
+            for _ in 0..per_object_rw_count.num_writes {
+                for handle in handles.iter() {
+                    let mut write_guard = handle_map.get_mut(*handle).unwrap();
+                    *write_guard *= black_box(1);
+                }
+            }
+            for _ in 0..per_object_rw_count.num_reads {
+                for handle in handles.iter() {
+                    let read_guard = handle_map.get(*handle).unwrap();
+                    black_box(*read_guard);
+                }
+            }
+        }),
+        c,
+    );
+}
+
+fn read_write_contention_benchmarks(c: &mut Criterion) {
+    for num_threads in [2, 4, 8].iter() {
+        for num_distinct_objects in [1u32, 2u32, 4u32].iter() {
+            if num_distinct_objects >= &(*num_threads as u32) {
+                continue;
+            }
+            for read_write_count in READ_WRITE_COUNTS {
+                read_and_write_contention_multithreaded(
+                    *num_threads,
+                    *num_distinct_objects,
+                    read_write_count,
+                    c,
+                );
+            }
+        }
+    }
+}
+
+/// Benchmarks allocation (starting from empty) for the default number of shards and threads
+fn allocator_contention_benchmark(c: &mut Criterion) {
+    let name = "allocator_contention_multithreaded";
+    // We need an Arc to pass this off to `bench_multithreaded`
+    let handle_map = Arc::new(build_handle_map(DEFAULT_SHARDS_AND_THREADS));
+    bench_multithreaded(
+        name,
+        DEFAULT_SHARDS_AND_THREADS,
+        Arc::new(move || {
+            handle_map.allocate(|| black_box(0xFF)).unwrap();
+        }),
+        c,
+    );
+}
+
+criterion_group!(
+    benches,
+    single_threaded_read_benchmark,
+    single_threaded_write_benchmark,
+    single_threaded_allocate_benchmark,
+    single_threaded_allocate_deallocate_near_empty_benchmark,
+    single_threaded_allocate_deallocate_one_thousand_benchmark,
+    single_threaded_allocate_deallocate_one_million_benchmark,
+    lifecycle_benchmarks,
+    read_write_contention_benchmarks,
+    allocator_contention_benchmark,
+);
+criterion_main!(benches);