| /* |
| * Copyright 2020 The WebRTC Project Authors. All rights reserved. |
| * |
| * Use of this source code is governed by a BSD-style license |
| * that can be found in the LICENSE file in the root of the source |
| * tree. An additional intellectual property rights grant can be found |
| * in the file PATENTS. All contributing project authors may |
| * be found in the AUTHORS file in the root of the source tree. |
| */ |
| |
| #include "benchmark/benchmark.h" |
| #include "rtc_base/synchronization/mutex.h" |
| #include "rtc_base/system/unused.h" |
| |
| namespace webrtc { |
| |
| class PerfTestData { |
| public: |
| PerfTestData() : cache_line_barrier_1_(), cache_line_barrier_2_() { |
| cache_line_barrier_1_[0]++; // Avoid 'is not used'. |
| cache_line_barrier_2_[0]++; // Avoid 'is not used'. |
| } |
| |
| int AddToCounter(int add) { |
| MutexLock mu(&mu_); |
| my_counter_ += add; |
| return 0; |
| } |
| |
| private: |
| uint8_t cache_line_barrier_1_[64]; |
| Mutex mu_; |
| uint8_t cache_line_barrier_2_[64]; |
| int64_t my_counter_ = 0; |
| }; |
| |
| void BM_LockWithMutex(benchmark::State& state) { |
| static PerfTestData test_data; |
| for (auto s : state) { |
| RTC_UNUSED(s); |
| benchmark::DoNotOptimize(test_data.AddToCounter(2)); |
| } |
| } |
| |
| BENCHMARK(BM_LockWithMutex)->Threads(1); |
| BENCHMARK(BM_LockWithMutex)->Threads(2); |
| BENCHMARK(BM_LockWithMutex)->Threads(4); |
| BENCHMARK(BM_LockWithMutex)->ThreadPerCpu(); |
| |
| } // namespace webrtc |
| |
| /* |
| |
| Results: |
| |
| NB when reproducing: Remember to turn of power management features such as CPU |
| scaling before running! |
| |
| pthreads (Linux): |
| ---------------------------------------------------------------------- |
| Run on (12 X 4500 MHz CPU s) |
| CPU Caches: |
| L1 Data 32 KiB (x6) |
| L1 Instruction 32 KiB (x6) |
| L2 Unified 1024 KiB (x6) |
| L3 Unified 8448 KiB (x1) |
| Load Average: 0.26, 0.28, 0.44 |
| ---------------------------------------------------------------------- |
| Benchmark Time CPU Iterations |
| ---------------------------------------------------------------------- |
| BM_LockWithMutex/threads:1 13.4 ns 13.4 ns 52192906 |
| BM_LockWithMutex/threads:2 44.2 ns 88.4 ns 8189944 |
| BM_LockWithMutex/threads:4 52.0 ns 198 ns 3743244 |
| BM_LockWithMutex/threads:12 84.9 ns 944 ns 733524 |
| |
| std::mutex performs like the pthread implementation (Linux). |
| |
| Abseil (Linux): |
| ---------------------------------------------------------------------- |
| Run on (12 X 4500 MHz CPU s) |
| CPU Caches: |
| L1 Data 32 KiB (x6) |
| L1 Instruction 32 KiB (x6) |
| L2 Unified 1024 KiB (x6) |
| L3 Unified 8448 KiB (x1) |
| Load Average: 0.27, 0.24, 0.37 |
| ---------------------------------------------------------------------- |
| Benchmark Time CPU Iterations |
| ---------------------------------------------------------------------- |
| BM_LockWithMutex/threads:1 15.0 ns 15.0 ns 46550231 |
| BM_LockWithMutex/threads:2 91.1 ns 182 ns 4059212 |
| BM_LockWithMutex/threads:4 40.8 ns 131 ns 5496560 |
| BM_LockWithMutex/threads:12 37.0 ns 130 ns 5377668 |
| |
| */ |