benchmark 1.9.1
Loading...
Searching...
No Matches
benchmark.h
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Support for registering benchmarks for functions.
16
17/* Example usage:
18// Define a function that executes the code to be measured a
19// specified number of times:
20static void BM_StringCreation(benchmark::State& state) {
21 for (auto _ : state)
22 std::string empty_string;
23}
24
25// Register the function as a benchmark
26BENCHMARK(BM_StringCreation);
27
28// Define another benchmark
29static void BM_StringCopy(benchmark::State& state) {
30 std::string x = "hello";
31 for (auto _ : state)
32 std::string copy(x);
33}
34BENCHMARK(BM_StringCopy);
35
36// Augment the main() program to invoke benchmarks if specified
37// via the --benchmark_filter command line flag. E.g.,
38// my_unittest --benchmark_filter=all
39// my_unittest --benchmark_filter=BM_StringCreation
40// my_unittest --benchmark_filter=String
41// my_unittest --benchmark_filter='Copy|Creation'
42int main(int argc, char** argv) {
43 benchmark::Initialize(&argc, argv);
44 benchmark::RunSpecifiedBenchmarks();
45 benchmark::Shutdown();
46 return 0;
47}
48
49// Sometimes a family of microbenchmarks can be implemented with
50// just one routine that takes an extra argument to specify which
51// one of the family of benchmarks to run. For example, the following
52// code defines a family of microbenchmarks for measuring the speed
53// of memcpy() calls of different lengths:
54
55static void BM_memcpy(benchmark::State& state) {
56 char* src = new char[state.range(0)]; char* dst = new char[state.range(0)];
57 memset(src, 'x', state.range(0));
58 for (auto _ : state)
59 memcpy(dst, src, state.range(0));
60 state.SetBytesProcessed(state.iterations() * state.range(0));
61 delete[] src; delete[] dst;
62}
63BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(8<<10);
64
65// The preceding code is quite repetitive, and can be replaced with the
66// following short-hand. The following invocation will pick a few
67// appropriate arguments in the specified range and will generate a
68// microbenchmark for each such argument.
69BENCHMARK(BM_memcpy)->Range(8, 8<<10);
70
71// You might have a microbenchmark that depends on two inputs. For
72// example, the following code defines a family of microbenchmarks for
73// measuring the speed of set insertion.
74static void BM_SetInsert(benchmark::State& state) {
75 set<int> data;
76 for (auto _ : state) {
77 state.PauseTiming();
78 data = ConstructRandomSet(state.range(0));
79 state.ResumeTiming();
80 for (int j = 0; j < state.range(1); ++j)
81 data.insert(RandomNumber());
82 }
83}
84BENCHMARK(BM_SetInsert)
85 ->Args({1<<10, 128})
86 ->Args({2<<10, 128})
87 ->Args({4<<10, 128})
88 ->Args({8<<10, 128})
89 ->Args({1<<10, 512})
90 ->Args({2<<10, 512})
91 ->Args({4<<10, 512})
92 ->Args({8<<10, 512});
93
94// The preceding code is quite repetitive, and can be replaced with
95// the following short-hand. The following macro will pick a few
96// appropriate arguments in the product of the two specified ranges
97// and will generate a microbenchmark for each such pair.
98BENCHMARK(BM_SetInsert)->Ranges({{1<<10, 8<<10}, {128, 512}});
99
100// For more complex patterns of inputs, passing a custom function
101// to Apply allows programmatic specification of an
102// arbitrary set of arguments to run the microbenchmark on.
103// The following example enumerates a dense range on
104// one parameter, and a sparse range on the second.
105static void CustomArguments(benchmark::internal::Benchmark* b) {
106 for (int i = 0; i <= 10; ++i)
107 for (int j = 32; j <= 1024*1024; j *= 8)
108 b->Args({i, j});
109}
110BENCHMARK(BM_SetInsert)->Apply(CustomArguments);
111
112// Templated microbenchmarks work the same way:
113// Produce then consume 'size' messages 'iters' times
114// Measures throughput in the absence of multiprogramming.
115template <class Q> int BM_Sequential(benchmark::State& state) {
116 Q q;
117 typename Q::value_type v;
118 for (auto _ : state) {
119 for (int i = state.range(0); i--; )
120 q.push(v);
121 for (int e = state.range(0); e--; )
122 q.Wait(&v);
123 }
124 // actually messages, not bytes:
125 state.SetBytesProcessed(state.iterations() * state.range(0));
126}
127BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
128
129Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
130benchmark. This option overrides the `benchmark_min_time` flag.
131
132void BM_test(benchmark::State& state) {
133 ... body ...
134}
135BENCHMARK(BM_test)->MinTime(2.0); // Run for at least 2 seconds.
136
137In a multithreaded test, it is guaranteed that none of the threads will start
138until all have reached the loop start, and all will have finished before any
139thread exits the loop body. As such, any global setup or teardown you want to
140do can be wrapped in a check against the thread index:
141
142static void BM_MultiThreaded(benchmark::State& state) {
143 if (state.thread_index() == 0) {
144 // Setup code here.
145 }
146 for (auto _ : state) {
147 // Run the test as normal.
148 }
149 if (state.thread_index() == 0) {
150 // Teardown code here.
151 }
152}
153BENCHMARK(BM_MultiThreaded)->Threads(4);
154
155
156If a benchmark runs a few milliseconds it may be hard to visually compare the
157measured times, since the output data is given in nanoseconds per default. In
158order to manually set the time unit, you can specify it manually:
159
160BENCHMARK(BM_test)->Unit(benchmark::kMillisecond);
161*/
162
163#ifndef BENCHMARK_BENCHMARK_H_
164#define BENCHMARK_BENCHMARK_H_
165
166// The _MSVC_LANG check should detect Visual Studio 2015 Update 3 and newer.
167#if __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
168#define BENCHMARK_HAS_CXX11
169#endif
170
171// This _MSC_VER check should detect VS 2017 v15.3 and newer.
172#if __cplusplus >= 201703L || \
173 (defined(_MSC_VER) && _MSC_VER >= 1911 && _MSVC_LANG >= 201703L)
174#define BENCHMARK_HAS_CXX17
175#endif
176
177#include <stdint.h>
178
179#include <algorithm>
180#include <cassert>
181#include <cstddef>
182#include <iosfwd>
183#include <limits>
184#include <map>
185#include <set>
186#include <string>
187#include <utility>
188#include <vector>
189
190#include "benchmark/export.h"
191
192#if defined(BENCHMARK_HAS_CXX11)
193#include <atomic>
194#include <initializer_list>
195#include <type_traits>
196#include <utility>
197#endif
198
199#if defined(_MSC_VER)
200#include <intrin.h> // for _ReadWriteBarrier
201#endif
202
203#ifndef BENCHMARK_HAS_CXX11
204#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
205 TypeName(const TypeName&); \
206 TypeName& operator=(const TypeName&)
207#else
208#define BENCHMARK_DISALLOW_COPY_AND_ASSIGN(TypeName) \
209 TypeName(const TypeName&) = delete; \
210 TypeName& operator=(const TypeName&) = delete
211#endif
212
213#ifdef BENCHMARK_HAS_CXX17
214#define BENCHMARK_UNUSED [[maybe_unused]]
215#elif defined(__GNUC__) || defined(__clang__)
216#define BENCHMARK_UNUSED __attribute__((unused))
217#else
218#define BENCHMARK_UNUSED
219#endif
220
221// Used to annotate functions, methods and classes so they
222// are not optimized by the compiler. Useful for tests
223// where you expect loops to stay in place churning cycles
224#if defined(__clang__)
225#define BENCHMARK_DONT_OPTIMIZE __attribute__((optnone))
226#elif defined(__GNUC__) || defined(__GNUG__)
227#define BENCHMARK_DONT_OPTIMIZE __attribute__((optimize(0)))
228#else
229// MSVC & Intel do not have a no-optimize attribute, only line pragmas
230#define BENCHMARK_DONT_OPTIMIZE
231#endif
232
233#if defined(__GNUC__) || defined(__clang__)
234#define BENCHMARK_ALWAYS_INLINE __attribute__((always_inline))
235#elif defined(_MSC_VER) && !defined(__clang__)
236#define BENCHMARK_ALWAYS_INLINE __forceinline
237#define __func__ __FUNCTION__
238#else
239#define BENCHMARK_ALWAYS_INLINE
240#endif
241
242#define BENCHMARK_INTERNAL_TOSTRING2(x) #x
243#define BENCHMARK_INTERNAL_TOSTRING(x) BENCHMARK_INTERNAL_TOSTRING2(x)
244
245// clang-format off
246#if (defined(__GNUC__) && !defined(__NVCC__) && !defined(__NVCOMPILER)) || defined(__clang__)
247#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
248#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
249#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
250 _Pragma("GCC diagnostic push") \
251 _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"")
252#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("GCC diagnostic pop")
253#elif defined(__NVCOMPILER)
254#define BENCHMARK_BUILTIN_EXPECT(x, y) __builtin_expect(x, y)
255#define BENCHMARK_DEPRECATED_MSG(msg) __attribute__((deprecated(msg)))
256#define BENCHMARK_DISABLE_DEPRECATED_WARNING \
257 _Pragma("diagnostic push") \
258 _Pragma("diag_suppress deprecated_entity_with_custom_message")
259#define BENCHMARK_RESTORE_DEPRECATED_WARNING _Pragma("diagnostic pop")
260#else
261#define BENCHMARK_BUILTIN_EXPECT(x, y) x
262#define BENCHMARK_DEPRECATED_MSG(msg)
263#define BENCHMARK_WARNING_MSG(msg) \
264 __pragma(message(__FILE__ "(" BENCHMARK_INTERNAL_TOSTRING( \
265 __LINE__) ") : warning note: " msg))
266#define BENCHMARK_DISABLE_DEPRECATED_WARNING
267#define BENCHMARK_RESTORE_DEPRECATED_WARNING
268#endif
269// clang-format on
270
271#if defined(__GNUC__) && !defined(__clang__)
272#define BENCHMARK_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
273#endif
274
275#ifndef __has_builtin
276#define __has_builtin(x) 0
277#endif
278
279#if defined(__GNUC__) || __has_builtin(__builtin_unreachable)
280#define BENCHMARK_UNREACHABLE() __builtin_unreachable()
281#elif defined(_MSC_VER)
282#define BENCHMARK_UNREACHABLE() __assume(false)
283#else
284#define BENCHMARK_UNREACHABLE() ((void)0)
285#endif
286
287#ifdef BENCHMARK_HAS_CXX11
288#define BENCHMARK_OVERRIDE override
289#else
290#define BENCHMARK_OVERRIDE
291#endif
292
293#if defined(__GNUC__)
294// Determine the cacheline size based on architecture
295#if defined(__i386__) || defined(__x86_64__)
296#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
297#elif defined(__powerpc64__)
298#define BENCHMARK_INTERNAL_CACHELINE_SIZE 128
299#elif defined(__aarch64__)
300#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
301#elif defined(__arm__)
302// Cache line sizes for ARM: These values are not strictly correct since
303// cache line sizes depend on implementations, not architectures. There
304// are even implementations with cache line sizes configurable at boot
305// time.
306#if defined(__ARM_ARCH_5T__)
307#define BENCHMARK_INTERNAL_CACHELINE_SIZE 32
308#elif defined(__ARM_ARCH_7A__)
309#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
310#endif // ARM_ARCH
311#endif // arches
312#endif // __GNUC__
313
314#ifndef BENCHMARK_INTERNAL_CACHELINE_SIZE
315// A reasonable default guess. Note that overestimates tend to waste more
316// space, while underestimates tend to waste more time.
317#define BENCHMARK_INTERNAL_CACHELINE_SIZE 64
318#endif
319
320#if defined(__GNUC__)
321// Indicates that the declared object be cache aligned using
322// `BENCHMARK_INTERNAL_CACHELINE_SIZE` (see above).
323#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
324 __attribute__((aligned(BENCHMARK_INTERNAL_CACHELINE_SIZE)))
325#elif defined(_MSC_VER)
326#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED \
327 __declspec(align(BENCHMARK_INTERNAL_CACHELINE_SIZE))
328#else
329#define BENCHMARK_INTERNAL_CACHELINE_ALIGNED
330#endif
331
332#if defined(_MSC_VER)
333#pragma warning(push)
334// C4251: <symbol> needs to have dll-interface to be used by clients of class
335#pragma warning(disable : 4251)
336#endif // _MSC_VER_
337
338namespace benchmark {
339class BenchmarkReporter;
340
341// Default number of minimum benchmark running time in seconds.
342const char kDefaultMinTimeStr[] = "0.5s";
343
344// Returns the version of the library.
345BENCHMARK_EXPORT std::string GetBenchmarkVersion();
346
347BENCHMARK_EXPORT void PrintDefaultHelp();
348
349BENCHMARK_EXPORT void Initialize(int* argc, char** argv,
350 void (*HelperPrinterf)() = PrintDefaultHelp);
351BENCHMARK_EXPORT void Shutdown();
352
353// Report to stdout all arguments in 'argv' as unrecognized except the first.
354// Returns true there is at least on unrecognized argument (i.e. 'argc' > 1).
355BENCHMARK_EXPORT bool ReportUnrecognizedArguments(int argc, char** argv);
356
357// Returns the current value of --benchmark_filter.
358BENCHMARK_EXPORT std::string GetBenchmarkFilter();
359
360// Sets a new value to --benchmark_filter. (This will override this flag's
361// current value).
362// Should be called after `benchmark::Initialize()`, as
363// `benchmark::Initialize()` will override the flag's value.
364BENCHMARK_EXPORT void SetBenchmarkFilter(std::string value);
365
366// Returns the current value of --v (command line value for verbosity).
367BENCHMARK_EXPORT int32_t GetBenchmarkVerbosity();
368
369// Creates a default display reporter. Used by the library when no display
370// reporter is provided, but also made available for external use in case a
371// custom reporter should respect the `--benchmark_format` flag as a fallback
372BENCHMARK_EXPORT BenchmarkReporter* CreateDefaultDisplayReporter();
373
374// Generate a list of benchmarks matching the specified --benchmark_filter flag
375// and if --benchmark_list_tests is specified return after printing the name
376// of each matching benchmark. Otherwise run each matching benchmark and
377// report the results.
378//
379// spec : Specify the benchmarks to run. If users do not specify this arg,
380// then the value of FLAGS_benchmark_filter
381// will be used.
382//
383// The second and third overload use the specified 'display_reporter' and
384// 'file_reporter' respectively. 'file_reporter' will write to the file
385// specified
386// by '--benchmark_out'. If '--benchmark_out' is not given the
387// 'file_reporter' is ignored.
388//
389// RETURNS: The number of matching benchmarks.
390BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks();
391BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(std::string spec);
392
393BENCHMARK_EXPORT size_t
394RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter);
395BENCHMARK_EXPORT size_t
396RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, std::string spec);
397
398BENCHMARK_EXPORT size_t RunSpecifiedBenchmarks(
399 BenchmarkReporter* display_reporter, BenchmarkReporter* file_reporter);
400BENCHMARK_EXPORT size_t
401RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
402 BenchmarkReporter* file_reporter, std::string spec);
403
404// TimeUnit is passed to a benchmark in order to specify the order of magnitude
405// for the measured time.
406enum TimeUnit { kNanosecond, kMicrosecond, kMillisecond, kSecond };
407
408BENCHMARK_EXPORT TimeUnit GetDefaultTimeUnit();
409
410// Sets the default time unit the benchmarks use
411// Has to be called before the benchmark loop to take effect
412BENCHMARK_EXPORT void SetDefaultTimeUnit(TimeUnit unit);
413
414// If a MemoryManager is registered (via RegisterMemoryManager()),
415// it can be used to collect and report allocation metrics for a run of the
416// benchmark.
418 public:
419 static const int64_t TombstoneValue;
420
421 struct Result {
422 Result()
423 : num_allocs(0),
424 max_bytes_used(0),
425 total_allocated_bytes(TombstoneValue),
426 net_heap_growth(TombstoneValue) {}
427
428 // The number of allocations made in total between Start and Stop.
429 int64_t num_allocs;
430
431 // The peak memory use between Start and Stop.
432 int64_t max_bytes_used;
433
434 // The total memory allocated, in bytes, between Start and Stop.
435 // Init'ed to TombstoneValue if metric not available.
436 int64_t total_allocated_bytes;
437
438 // The net changes in memory, in bytes, between Start and Stop.
439 // ie., total_allocated_bytes - total_deallocated_bytes.
440 // Init'ed to TombstoneValue if metric not available.
441 int64_t net_heap_growth;
442 };
443
444 virtual ~MemoryManager() {}
445
446 // Implement this to start recording allocation information.
447 virtual void Start() = 0;
448
449 // Implement this to stop recording and fill out the given Result structure.
450 virtual void Stop(Result& result) = 0;
451};
452
453// Register a MemoryManager instance that will be used to collect and report
454// allocation measurements for benchmark runs.
455BENCHMARK_EXPORT
456void RegisterMemoryManager(MemoryManager* memory_manager);
457
458// If a ProfilerManager is registered (via RegisterProfilerManager()), the
459// benchmark will be run an additional time under the profiler to collect and
460// report profile metrics for the run of the benchmark.
462 public:
463 virtual ~ProfilerManager() {}
464
465 // This is called after `Setup()` code and right before the benchmark is run.
466 virtual void AfterSetupStart() = 0;
467
468 // This is called before `Teardown()` code and right after the benchmark
469 // completes.
470 virtual void BeforeTeardownStop() = 0;
471};
472
473// Register a ProfilerManager instance that will be used to collect and report
474// profile measurements for benchmark runs.
475BENCHMARK_EXPORT
476void RegisterProfilerManager(ProfilerManager* profiler_manager);
477
478// Add a key-value pair to output as part of the context stanza in the report.
479BENCHMARK_EXPORT
480void AddCustomContext(const std::string& key, const std::string& value);
481
482namespace internal {
483class Benchmark;
484class BenchmarkImp;
485class BenchmarkFamilies;
486
487BENCHMARK_EXPORT std::map<std::string, std::string>*& GetGlobalContext();
488
489BENCHMARK_EXPORT
490void UseCharPointer(char const volatile*);
491
492// Take ownership of the pointer and register the benchmark. Return the
493// registered benchmark.
494BENCHMARK_EXPORT Benchmark* RegisterBenchmarkInternal(Benchmark*);
495
496// Ensure that the standard streams are properly initialized in every TU.
497BENCHMARK_EXPORT int InitializeStreams();
498BENCHMARK_UNUSED static int stream_init_anchor = InitializeStreams();
499
500} // namespace internal
501
502#if (!defined(__GNUC__) && !defined(__clang__)) || defined(__pnacl__) || \
503 defined(__EMSCRIPTEN__)
504#define BENCHMARK_HAS_NO_INLINE_ASSEMBLY
505#endif
506
507// Force the compiler to flush pending writes to global memory. Acts as an
508// effective read/write barrier
509#ifdef BENCHMARK_HAS_CXX11
510inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
511 std::atomic_signal_fence(std::memory_order_acq_rel);
512}
513#endif
514
515// The DoNotOptimize(...) function can be used to prevent a value or
516// expression from being optimized away by the compiler. This function is
517// intended to add little to no overhead.
518// See: https://youtu.be/nXaxk27zwlk?t=2441
519#ifndef BENCHMARK_HAS_NO_INLINE_ASSEMBLY
520#if !defined(__GNUC__) || defined(__llvm__) || defined(__INTEL_COMPILER)
521template <class Tp>
522BENCHMARK_DEPRECATED_MSG(
523 "The const-ref version of this method can permit "
524 "undesired compiler optimizations in benchmarks")
525inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
526 asm volatile("" : : "r,m"(value) : "memory");
527}
528
529template <class Tp>
530inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
531#if defined(__clang__)
532 asm volatile("" : "+r,m"(value) : : "memory");
533#else
534 asm volatile("" : "+m,r"(value) : : "memory");
535#endif
536}
537
538#ifdef BENCHMARK_HAS_CXX11
539template <class Tp>
540inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
541#if defined(__clang__)
542 asm volatile("" : "+r,m"(value) : : "memory");
543#else
544 asm volatile("" : "+m,r"(value) : : "memory");
545#endif
546}
547#endif
548#elif defined(BENCHMARK_HAS_CXX11) && (__GNUC__ >= 5)
549// Workaround for a bug with full argument copy overhead with GCC.
550// See: #1340 and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105519
551template <class Tp>
552BENCHMARK_DEPRECATED_MSG(
553 "The const-ref version of this method can permit "
554 "undesired compiler optimizations in benchmarks")
555inline BENCHMARK_ALWAYS_INLINE
556 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
557 (sizeof(Tp) <= sizeof(Tp*))>::type
558 DoNotOptimize(Tp const& value) {
559 asm volatile("" : : "r,m"(value) : "memory");
560}
561
562template <class Tp>
563BENCHMARK_DEPRECATED_MSG(
564 "The const-ref version of this method can permit "
565 "undesired compiler optimizations in benchmarks")
566inline BENCHMARK_ALWAYS_INLINE
567 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
568 (sizeof(Tp) > sizeof(Tp*))>::type
569 DoNotOptimize(Tp const& value) {
570 asm volatile("" : : "m"(value) : "memory");
571}
572
573template <class Tp>
574inline BENCHMARK_ALWAYS_INLINE
575 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
576 (sizeof(Tp) <= sizeof(Tp*))>::type
577 DoNotOptimize(Tp& value) {
578 asm volatile("" : "+m,r"(value) : : "memory");
579}
580
581template <class Tp>
582inline BENCHMARK_ALWAYS_INLINE
583 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
584 (sizeof(Tp) > sizeof(Tp*))>::type
585 DoNotOptimize(Tp& value) {
586 asm volatile("" : "+m"(value) : : "memory");
587}
588
589template <class Tp>
590inline BENCHMARK_ALWAYS_INLINE
591 typename std::enable_if<std::is_trivially_copyable<Tp>::value &&
592 (sizeof(Tp) <= sizeof(Tp*))>::type
593 DoNotOptimize(Tp&& value) {
594 asm volatile("" : "+m,r"(value) : : "memory");
595}
596
597template <class Tp>
598inline BENCHMARK_ALWAYS_INLINE
599 typename std::enable_if<!std::is_trivially_copyable<Tp>::value ||
600 (sizeof(Tp) > sizeof(Tp*))>::type
601 DoNotOptimize(Tp&& value) {
602 asm volatile("" : "+m"(value) : : "memory");
603}
604
605#else
606// Fallback for GCC < 5. Can add some overhead because the compiler is forced
607// to use memory operations instead of operations with registers.
608// TODO: Remove if GCC < 5 will be unsupported.
609template <class Tp>
610BENCHMARK_DEPRECATED_MSG(
611 "The const-ref version of this method can permit "
612 "undesired compiler optimizations in benchmarks")
613inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
614 asm volatile("" : : "m"(value) : "memory");
615}
616
617template <class Tp>
618inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
619 asm volatile("" : "+m"(value) : : "memory");
620}
621
622#ifdef BENCHMARK_HAS_CXX11
623template <class Tp>
624inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
625 asm volatile("" : "+m"(value) : : "memory");
626}
627#endif
628#endif
629
630#ifndef BENCHMARK_HAS_CXX11
631inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() {
632 asm volatile("" : : : "memory");
633}
634#endif
635#elif defined(_MSC_VER)
636template <class Tp>
637BENCHMARK_DEPRECATED_MSG(
638 "The const-ref version of this method can permit "
639 "undesired compiler optimizations in benchmarks")
640inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
641 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
642 _ReadWriteBarrier();
643}
644
645#ifndef BENCHMARK_HAS_CXX11
646inline BENCHMARK_ALWAYS_INLINE void ClobberMemory() { _ReadWriteBarrier(); }
647#endif
648#else
649#ifdef BENCHMARK_HAS_CXX11
650template <class Tp>
651inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp&& value) {
652 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
653}
654#else
655template <class Tp>
656BENCHMARK_DEPRECATED_MSG(
657 "The const-ref version of this method can permit "
658 "undesired compiler optimizations in benchmarks")
659inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp const& value) {
660 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
661}
662
663template <class Tp>
664inline BENCHMARK_ALWAYS_INLINE void DoNotOptimize(Tp& value) {
665 internal::UseCharPointer(&reinterpret_cast<char const volatile&>(value));
666}
667#endif
668// FIXME Add ClobberMemory() for non-gnu and non-msvc compilers, before C++11.
669#endif
670
671// This class is used for user-defined counters.
672class Counter {
673 public:
674 enum Flags {
675 kDefaults = 0,
676 // Mark the counter as a rate. It will be presented divided
677 // by the duration of the benchmark.
678 kIsRate = 1 << 0,
679 // Mark the counter as a thread-average quantity. It will be
680 // presented divided by the number of threads.
681 kAvgThreads = 1 << 1,
682 // Mark the counter as a thread-average rate. See above.
683 kAvgThreadsRate = kIsRate | kAvgThreads,
684 // Mark the counter as a constant value, valid/same for *every* iteration.
685 // When reporting, it will be *multiplied* by the iteration count.
686 kIsIterationInvariant = 1 << 2,
687 // Mark the counter as a constant rate.
688 // When reporting, it will be *multiplied* by the iteration count
689 // and then divided by the duration of the benchmark.
690 kIsIterationInvariantRate = kIsRate | kIsIterationInvariant,
691 // Mark the counter as a iteration-average quantity.
692 // It will be presented divided by the number of iterations.
693 kAvgIterations = 1 << 3,
694 // Mark the counter as a iteration-average rate. See above.
695 kAvgIterationsRate = kIsRate | kAvgIterations,
696
697 // In the end, invert the result. This is always done last!
698 kInvert = 1 << 31
699 };
700
701 enum OneK {
702 // 1'000 items per 1k
703 kIs1000 = 1000,
704 // 1'024 items per 1k
705 kIs1024 = 1024
706 };
707
708 double value;
709 Flags flags;
710 OneK oneK;
711
712 BENCHMARK_ALWAYS_INLINE
713 Counter(double v = 0., Flags f = kDefaults, OneK k = kIs1000)
714 : value(v), flags(f), oneK(k) {}
715
716 BENCHMARK_ALWAYS_INLINE operator double const &() const { return value; }
717 BENCHMARK_ALWAYS_INLINE operator double&() { return value; }
718};
719
720// A helper for user code to create unforeseen combinations of Flags, without
721// having to do this cast manually each time, or providing this operator.
722Counter::Flags inline operator|(const Counter::Flags& LHS,
723 const Counter::Flags& RHS) {
724 return static_cast<Counter::Flags>(static_cast<int>(LHS) |
725 static_cast<int>(RHS));
726}
727
728// This is the container for the user-defined counters.
729typedef std::map<std::string, Counter> UserCounters;
730
731// BigO is passed to a benchmark in order to specify the asymptotic
732// computational
733// complexity for the benchmark. In case oAuto is selected, complexity will be
734// calculated automatically to the best fit.
735enum BigO { oNone, o1, oN, oNSquared, oNCubed, oLogN, oNLogN, oAuto, oLambda };
736
737typedef int64_t ComplexityN;
738
739typedef int64_t IterationCount;
740
741enum StatisticUnit { kTime, kPercentage };
742
743// BigOFunc is passed to a benchmark in order to specify the asymptotic
744// computational complexity for the benchmark.
745typedef double(BigOFunc)(ComplexityN);
746
747// StatisticsFunc is passed to a benchmark in order to compute some descriptive
748// statistics over all the measurements of some type
749typedef double(StatisticsFunc)(const std::vector<double>&);
750
751namespace internal {
753 std::string name_;
754 StatisticsFunc* compute_;
755 StatisticUnit unit_;
756
757 Statistics(const std::string& name, StatisticsFunc* compute,
758 StatisticUnit unit = kTime)
759 : name_(name), compute_(compute), unit_(unit) {}
760};
761
763class ThreadTimer;
764class ThreadManager;
766
767enum AggregationReportMode
768#if defined(BENCHMARK_HAS_CXX11)
769 : unsigned
770#else
771#endif
772{
773 // The mode has not been manually specified
774 ARM_Unspecified = 0,
775 // The mode is user-specified.
776 // This may or may not be set when the following bit-flags are set.
777 ARM_Default = 1U << 0U,
778 // File reporter should only output aggregates.
779 ARM_FileReportAggregatesOnly = 1U << 1U,
780 // Display reporter should only output aggregates
781 ARM_DisplayReportAggregatesOnly = 1U << 2U,
782 // Both reporters should only display aggregates.
783 ARM_ReportAggregatesOnly =
784 ARM_FileReportAggregatesOnly | ARM_DisplayReportAggregatesOnly
785};
786
787enum Skipped
788#if defined(BENCHMARK_HAS_CXX11)
789 : unsigned
790#endif
791{
792 NotSkipped = 0,
793 SkippedWithMessage,
794 SkippedWithError
795};
796
797} // namespace internal
798
799#if defined(_MSC_VER)
800#pragma warning(push)
801// C4324: 'benchmark::State': structure was padded due to alignment specifier
802#pragma warning(disable : 4324)
803#endif // _MSC_VER_
804// State is passed to a running Benchmark and contains state for the
805// benchmark to use.
806class BENCHMARK_EXPORT BENCHMARK_INTERNAL_CACHELINE_ALIGNED State {
807 public:
808 struct StateIterator;
809 friend struct StateIterator;
810
811 // Returns iterators used to run each iteration of a benchmark using a
812 // C++11 ranged-based for loop. These functions should not be called directly.
813 //
814 // REQUIRES: The benchmark has not started running yet. Neither begin nor end
815 // have been called previously.
816 //
817 // NOTE: KeepRunning may not be used after calling either of these functions.
818 inline BENCHMARK_ALWAYS_INLINE StateIterator begin();
819 inline BENCHMARK_ALWAYS_INLINE StateIterator end();
820
821 // Returns true if the benchmark should continue through another iteration.
822 // NOTE: A benchmark may not return from the test until KeepRunning() has
823 // returned false.
824 inline bool KeepRunning();
825
826 // Returns true iff the benchmark should run n more iterations.
827 // REQUIRES: 'n' > 0.
828 // NOTE: A benchmark must not return from the test until KeepRunningBatch()
829 // has returned false.
830 // NOTE: KeepRunningBatch() may overshoot by up to 'n' iterations.
831 //
832 // Intended usage:
833 // while (state.KeepRunningBatch(1000)) {
834 // // process 1000 elements
835 // }
836 inline bool KeepRunningBatch(IterationCount n);
837
838 // REQUIRES: timer is running and 'SkipWithMessage(...)' or
839 // 'SkipWithError(...)' has not been called by the current thread.
840 // Stop the benchmark timer. If not called, the timer will be
841 // automatically stopped after the last iteration of the benchmark loop.
842 //
843 // For threaded benchmarks the PauseTiming() function only pauses the timing
844 // for the current thread.
845 //
846 // NOTE: The "real time" measurement is per-thread. If different threads
847 // report different measurements the largest one is reported.
848 //
849 // NOTE: PauseTiming()/ResumeTiming() are relatively
850 // heavyweight, and so their use should generally be avoided
851 // within each benchmark iteration, if possible.
852 void PauseTiming();
853
854 // REQUIRES: timer is not running and 'SkipWithMessage(...)' or
855 // 'SkipWithError(...)' has not been called by the current thread.
856 // Start the benchmark timer. The timer is NOT running on entrance to the
857 // benchmark function. It begins running after control flow enters the
858 // benchmark loop.
859 //
860 // NOTE: PauseTiming()/ResumeTiming() are relatively
861 // heavyweight, and so their use should generally be avoided
862 // within each benchmark iteration, if possible.
863 void ResumeTiming();
864
865 // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
866 // called previously by the current thread.
867 // Report the benchmark as resulting in being skipped with the specified
868 // 'msg'.
869 // After this call the user may explicitly 'return' from the benchmark.
870 //
871 // If the ranged-for style of benchmark loop is used, the user must explicitly
872 // break from the loop, otherwise all future iterations will be run.
873 // If the 'KeepRunning()' loop is used the current thread will automatically
874 // exit the loop at the end of the current iteration.
875 //
876 // For threaded benchmarks only the current thread stops executing and future
877 // calls to `KeepRunning()` will block until all threads have completed
878 // the `KeepRunning()` loop. If multiple threads report being skipped only the
879 // first skip message is used.
880 //
881 // NOTE: Calling 'SkipWithMessage(...)' does not cause the benchmark to exit
882 // the current scope immediately. If the function is called from within
883 // the 'KeepRunning()' loop the current iteration will finish. It is the users
884 // responsibility to exit the scope as needed.
885 void SkipWithMessage(const std::string& msg);
886
887 // REQUIRES: 'SkipWithMessage(...)' or 'SkipWithError(...)' has not been
888 // called previously by the current thread.
889 // Report the benchmark as resulting in an error with the specified 'msg'.
890 // After this call the user may explicitly 'return' from the benchmark.
891 //
892 // If the ranged-for style of benchmark loop is used, the user must explicitly
893 // break from the loop, otherwise all future iterations will be run.
894 // If the 'KeepRunning()' loop is used the current thread will automatically
895 // exit the loop at the end of the current iteration.
896 //
897 // For threaded benchmarks only the current thread stops executing and future
898 // calls to `KeepRunning()` will block until all threads have completed
899 // the `KeepRunning()` loop. If multiple threads report an error only the
900 // first error message is used.
901 //
902 // NOTE: Calling 'SkipWithError(...)' does not cause the benchmark to exit
903 // the current scope immediately. If the function is called from within
904 // the 'KeepRunning()' loop the current iteration will finish. It is the users
905 // responsibility to exit the scope as needed.
906 void SkipWithError(const std::string& msg);
907
908 // Returns true if 'SkipWithMessage(...)' or 'SkipWithError(...)' was called.
909 bool skipped() const { return internal::NotSkipped != skipped_; }
910
911 // Returns true if an error has been reported with 'SkipWithError(...)'.
912 bool error_occurred() const { return internal::SkippedWithError == skipped_; }
913
914 // REQUIRES: called exactly once per iteration of the benchmarking loop.
915 // Set the manually measured time for this benchmark iteration, which
916 // is used instead of automatically measured time if UseManualTime() was
917 // specified.
918 //
919 // For threaded benchmarks the final value will be set to the largest
920 // reported values.
921 void SetIterationTime(double seconds);
922
923 // Set the number of bytes processed by the current benchmark
924 // execution. This routine is typically called once at the end of a
925 // throughput oriented benchmark.
926 //
927 // REQUIRES: a benchmark has exited its benchmarking loop.
928 BENCHMARK_ALWAYS_INLINE
929 void SetBytesProcessed(int64_t bytes) {
930 counters["bytes_per_second"] =
931 Counter(static_cast<double>(bytes), Counter::kIsRate, Counter::kIs1024);
932 }
933
934 BENCHMARK_ALWAYS_INLINE
935 int64_t bytes_processed() const {
936 if (counters.find("bytes_per_second") != counters.end())
937 return static_cast<int64_t>(counters.at("bytes_per_second"));
938 return 0;
939 }
940
941 // If this routine is called with complexity_n > 0 and complexity report is
942 // requested for the
943 // family benchmark, then current benchmark will be part of the computation
944 // and complexity_n will
945 // represent the length of N.
946 BENCHMARK_ALWAYS_INLINE
947 void SetComplexityN(ComplexityN complexity_n) {
948 complexity_n_ = complexity_n;
949 }
950
951 BENCHMARK_ALWAYS_INLINE
952 ComplexityN complexity_length_n() const { return complexity_n_; }
953
954 // If this routine is called with items > 0, then an items/s
955 // label is printed on the benchmark report line for the currently
956 // executing benchmark. It is typically called at the end of a processing
957 // benchmark where a processing items/second output is desired.
958 //
959 // REQUIRES: a benchmark has exited its benchmarking loop.
960 BENCHMARK_ALWAYS_INLINE
961 void SetItemsProcessed(int64_t items) {
962 counters["items_per_second"] =
963 Counter(static_cast<double>(items), benchmark::Counter::kIsRate);
964 }
965
966 BENCHMARK_ALWAYS_INLINE
967 int64_t items_processed() const {
968 if (counters.find("items_per_second") != counters.end())
969 return static_cast<int64_t>(counters.at("items_per_second"));
970 return 0;
971 }
972
973 // If this routine is called, the specified label is printed at the
974 // end of the benchmark report line for the currently executing
975 // benchmark. Example:
976 // static void BM_Compress(benchmark::State& state) {
977 // ...
978 // double compress = input_size / output_size;
979 // state.SetLabel(StrFormat("compress:%.1f%%", 100.0*compression));
980 // }
981 // Produces output that looks like:
982 // BM_Compress 50 50 14115038 compress:27.3%
983 //
984 // REQUIRES: a benchmark has exited its benchmarking loop.
985 void SetLabel(const std::string& label);
986
987 // Range arguments for this run. CHECKs if the argument has been set.
988 BENCHMARK_ALWAYS_INLINE
989 int64_t range(std::size_t pos = 0) const {
990 assert(range_.size() > pos);
991 return range_[pos];
992 }
993
994 BENCHMARK_DEPRECATED_MSG("use 'range(0)' instead")
995 int64_t range_x() const { return range(0); }
996
997 BENCHMARK_DEPRECATED_MSG("use 'range(1)' instead")
998 int64_t range_y() const { return range(1); }
999
1000 // Number of threads concurrently executing the benchmark.
1001 BENCHMARK_ALWAYS_INLINE
1002 int threads() const { return threads_; }
1003
1004 // Index of the executing thread. Values from [0, threads).
1005 BENCHMARK_ALWAYS_INLINE
1006 int thread_index() const { return thread_index_; }
1007
1008 BENCHMARK_ALWAYS_INLINE
1009 IterationCount iterations() const {
1010 if (BENCHMARK_BUILTIN_EXPECT(!started_, false)) {
1011 return 0;
1012 }
1013 return max_iterations - total_iterations_ + batch_leftover_;
1014 }
1015
1016 BENCHMARK_ALWAYS_INLINE
1017 std::string name() const { return name_; }
1018
1019 private:
1020 // items we expect on the first cache line (ie 64 bytes of the struct)
1021 // When total_iterations_ is 0, KeepRunning() and friends will return false.
1022 // May be larger than max_iterations.
1023 IterationCount total_iterations_;
1024
1025 // When using KeepRunningBatch(), batch_leftover_ holds the number of
1026 // iterations beyond max_iters that were run. Used to track
1027 // completed_iterations_ accurately.
1028 IterationCount batch_leftover_;
1029
1030 public:
1031 const IterationCount max_iterations;
1032
1033 private:
1034 bool started_;
1035 bool finished_;
1036 internal::Skipped skipped_;
1037
1038 // items we don't need on the first cache line
1039 std::vector<int64_t> range_;
1040
1041 ComplexityN complexity_n_;
1042
1043 public:
1044 // Container for user-defined counters.
1045 UserCounters counters;
1046
1047 private:
1048 State(std::string name, IterationCount max_iters,
1049 const std::vector<int64_t>& ranges, int thread_i, int n_threads,
1051 internal::PerfCountersMeasurement* perf_counters_measurement,
1052 ProfilerManager* profiler_manager);
1053
1054 void StartKeepRunning();
1055 // Implementation of KeepRunning() and KeepRunningBatch().
1056 // is_batch must be true unless n is 1.
1057 inline bool KeepRunningInternal(IterationCount n, bool is_batch);
1058 void FinishKeepRunning();
1059
1060 const std::string name_;
1061 const int thread_index_;
1062 const int threads_;
1063
1064 internal::ThreadTimer* const timer_;
1065 internal::ThreadManager* const manager_;
1066 internal::PerfCountersMeasurement* const perf_counters_measurement_;
1067 ProfilerManager* const profiler_manager_;
1068
1069 friend class internal::BenchmarkInstance;
1070};
1071#if defined(_MSC_VER)
1072#pragma warning(pop)
1073#endif // _MSC_VER_
1074
1075inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunning() {
1076 return KeepRunningInternal(1, /*is_batch=*/false);
1077}
1078
1079inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningBatch(IterationCount n) {
1080 return KeepRunningInternal(n, /*is_batch=*/true);
1081}
1082
1083inline BENCHMARK_ALWAYS_INLINE bool State::KeepRunningInternal(IterationCount n,
1084 bool is_batch) {
1085 // total_iterations_ is set to 0 by the constructor, and always set to a
1086 // nonzero value by StartKepRunning().
1087 assert(n > 0);
1088 // n must be 1 unless is_batch is true.
1089 assert(is_batch || n == 1);
1090 if (BENCHMARK_BUILTIN_EXPECT(total_iterations_ >= n, true)) {
1091 total_iterations_ -= n;
1092 return true;
1093 }
1094 if (!started_) {
1095 StartKeepRunning();
1096 if (!skipped() && total_iterations_ >= n) {
1097 total_iterations_ -= n;
1098 return true;
1099 }
1100 }
1101 // For non-batch runs, total_iterations_ must be 0 by now.
1102 if (is_batch && total_iterations_ != 0) {
1103 batch_leftover_ = n - total_iterations_;
1104 total_iterations_ = 0;
1105 return true;
1106 }
1107 FinishKeepRunning();
1108 return false;
1109}
1110
1112 struct BENCHMARK_UNUSED Value {};
1113 typedef std::forward_iterator_tag iterator_category;
1114 typedef Value value_type;
1115 typedef Value reference;
1116 typedef Value pointer;
1117 typedef std::ptrdiff_t difference_type;
1118
1119 private:
1120 friend class State;
1121 BENCHMARK_ALWAYS_INLINE
1122 StateIterator() : cached_(0), parent_() {}
1123
1124 BENCHMARK_ALWAYS_INLINE
1125 explicit StateIterator(State* st)
1126 : cached_(st->skipped() ? 0 : st->max_iterations), parent_(st) {}
1127
1128 public:
1129 BENCHMARK_ALWAYS_INLINE
1130 Value operator*() const { return Value(); }
1131
1132 BENCHMARK_ALWAYS_INLINE
1133 StateIterator& operator++() {
1134 assert(cached_ > 0);
1135 --cached_;
1136 return *this;
1137 }
1138
1139 BENCHMARK_ALWAYS_INLINE
1140 bool operator!=(StateIterator const&) const {
1141 if (BENCHMARK_BUILTIN_EXPECT(cached_ != 0, true)) return true;
1142 parent_->FinishKeepRunning();
1143 return false;
1144 }
1145
1146 private:
1147 IterationCount cached_;
1148 State* const parent_;
1149};
1150
1151inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::begin() {
1152 return StateIterator(this);
1153}
1154inline BENCHMARK_ALWAYS_INLINE State::StateIterator State::end() {
1155 StartKeepRunning();
1156 return StateIterator();
1157}
1158
1159namespace internal {
1160
1161typedef void(Function)(State&);
1162
1163// ------------------------------------------------------
1164// Benchmark registration object. The BENCHMARK() macro expands
1165// into an internal::Benchmark* object. Various methods can
1166// be called on this object to change the properties of the benchmark.
1167// Each method returns "this" so that multiple method calls can
1168// chained into one expression.
1169class BENCHMARK_EXPORT Benchmark {
1170 public:
1171 virtual ~Benchmark();
1172
1173 // Note: the following methods all return "this" so that multiple
1174 // method calls can be chained together in one expression.
1175
1176 // Specify the name of the benchmark
1177 Benchmark* Name(const std::string& name);
1178
1179 // Run this benchmark once with "x" as the extra argument passed
1180 // to the function.
1181 // REQUIRES: The function passed to the constructor must accept an arg1.
1182 Benchmark* Arg(int64_t x);
1183
1184 // Run this benchmark with the given time unit for the generated output report
1185 Benchmark* Unit(TimeUnit unit);
1186
1187 // Run this benchmark once for a number of values picked from the
1188 // range [start..limit]. (start and limit are always picked.)
1189 // REQUIRES: The function passed to the constructor must accept an arg1.
1190 Benchmark* Range(int64_t start, int64_t limit);
1191
1192 // Run this benchmark once for all values in the range [start..limit] with
1193 // specific step
1194 // REQUIRES: The function passed to the constructor must accept an arg1.
1195 Benchmark* DenseRange(int64_t start, int64_t limit, int step = 1);
1196
1197 // Run this benchmark once with "args" as the extra arguments passed
1198 // to the function.
1199 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1200 Benchmark* Args(const std::vector<int64_t>& args);
1201
1202 // Equivalent to Args({x, y})
1203 // NOTE: This is a legacy C++03 interface provided for compatibility only.
1204 // New code should use 'Args'.
1205 Benchmark* ArgPair(int64_t x, int64_t y) {
1206 std::vector<int64_t> args;
1207 args.push_back(x);
1208 args.push_back(y);
1209 return Args(args);
1210 }
1211
1212 // Run this benchmark once for a number of values picked from the
1213 // ranges [start..limit]. (starts and limits are always picked.)
1214 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1215 Benchmark* Ranges(const std::vector<std::pair<int64_t, int64_t> >& ranges);
1216
1217 // Run this benchmark once for each combination of values in the (cartesian)
1218 // product of the supplied argument lists.
1219 // REQUIRES: The function passed to the constructor must accept arg1, arg2 ...
1220 Benchmark* ArgsProduct(const std::vector<std::vector<int64_t> >& arglists);
1221
1222 // Equivalent to ArgNames({name})
1223 Benchmark* ArgName(const std::string& name);
1224
1225 // Set the argument names to display in the benchmark name. If not called,
1226 // only argument values will be shown.
1227 Benchmark* ArgNames(const std::vector<std::string>& names);
1228
1229 // Equivalent to Ranges({{lo1, hi1}, {lo2, hi2}}).
1230 // NOTE: This is a legacy C++03 interface provided for compatibility only.
1231 // New code should use 'Ranges'.
1232 Benchmark* RangePair(int64_t lo1, int64_t hi1, int64_t lo2, int64_t hi2) {
1233 std::vector<std::pair<int64_t, int64_t> > ranges;
1234 ranges.push_back(std::make_pair(lo1, hi1));
1235 ranges.push_back(std::make_pair(lo2, hi2));
1236 return Ranges(ranges);
1237 }
1238
1239 // Have "setup" and/or "teardown" invoked once for every benchmark run.
1240 // If the benchmark is multi-threaded (will run in k threads concurrently),
1241 // the setup callback will be be invoked exactly once (not k times) before
1242 // each run with k threads. Time allowing (e.g. for a short benchmark), there
1243 // may be multiple such runs per benchmark, each run with its own
1244 // "setup"/"teardown".
1245 //
1246 // If the benchmark uses different size groups of threads (e.g. via
1247 // ThreadRange), the above will be true for each size group.
1248 //
1249 // The callback will be passed a State object, which includes the number
1250 // of threads, thread-index, benchmark arguments, etc.
1251 //
1252 // The callback must not be NULL or self-deleting.
1253 Benchmark* Setup(void (*setup)(const benchmark::State&));
1254 Benchmark* Teardown(void (*teardown)(const benchmark::State&));
1255
1256 // Pass this benchmark object to *func, which can customize
1257 // the benchmark by calling various methods like Arg, Args,
1258 // Threads, etc.
1259 Benchmark* Apply(void (*func)(Benchmark* benchmark));
1260
1261 // Set the range multiplier for non-dense range. If not called, the range
1262 // multiplier kRangeMultiplier will be used.
1263 Benchmark* RangeMultiplier(int multiplier);
1264
1265 // Set the minimum amount of time to use when running this benchmark. This
1266 // option overrides the `benchmark_min_time` flag.
1267 // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
1268 Benchmark* MinTime(double t);
1269
1270 // Set the minimum amount of time to run the benchmark before taking runtimes
1271 // of this benchmark into account. This
1272 // option overrides the `benchmark_min_warmup_time` flag.
1273 // REQUIRES: `t >= 0` and `Iterations` has not been called on this benchmark.
1274 Benchmark* MinWarmUpTime(double t);
1275
1276 // Specify the amount of iterations that should be run by this benchmark.
1277 // This option overrides the `benchmark_min_time` flag.
1278 // REQUIRES: 'n > 0' and `MinTime` has not been called on this benchmark.
1279 //
1280 // NOTE: This function should only be used when *exact* iteration control is
1281 // needed and never to control or limit how long a benchmark runs, where
1282 // `--benchmark_min_time=<N>s` or `MinTime(...)` should be used instead.
1283 Benchmark* Iterations(IterationCount n);
1284
1285 // Specify the amount of times to repeat this benchmark. This option overrides
1286 // the `benchmark_repetitions` flag.
1287 // REQUIRES: `n > 0`
1288 Benchmark* Repetitions(int n);
1289
1290 // Specify if each repetition of the benchmark should be reported separately
1291 // or if only the final statistics should be reported. If the benchmark
1292 // is not repeated then the single result is always reported.
1293 // Applies to *ALL* reporters (display and file).
1294 Benchmark* ReportAggregatesOnly(bool value = true);
1295
1296 // Same as ReportAggregatesOnly(), but applies to display reporter only.
1297 Benchmark* DisplayAggregatesOnly(bool value = true);
1298
1299 // By default, the CPU time is measured only for the main thread, which may
1300 // be unrepresentative if the benchmark uses threads internally. If called,
1301 // the total CPU time spent by all the threads will be measured instead.
1302 // By default, only the main thread CPU time will be measured.
1303 Benchmark* MeasureProcessCPUTime();
1304
1305 // If a particular benchmark should use the Wall clock instead of the CPU time
1306 // (be it either the CPU time of the main thread only (default), or the
1307 // total CPU usage of the benchmark), call this method. If called, the elapsed
1308 // (wall) time will be used to control how many iterations are run, and in the
1309 // printing of items/second or MB/seconds values.
1310 // If not called, the CPU time used by the benchmark will be used.
1311 Benchmark* UseRealTime();
1312
1313 // If a benchmark must measure time manually (e.g. if GPU execution time is
1314 // being
1315 // measured), call this method. If called, each benchmark iteration should
1316 // call
1317 // SetIterationTime(seconds) to report the measured time, which will be used
1318 // to control how many iterations are run, and in the printing of items/second
1319 // or MB/second values.
1320 Benchmark* UseManualTime();
1321
1322 // Set the asymptotic computational complexity for the benchmark. If called
1323 // the asymptotic computational complexity will be shown on the output.
1324 Benchmark* Complexity(BigO complexity = benchmark::oAuto);
1325
1326 // Set the asymptotic computational complexity for the benchmark. If called
1327 // the asymptotic computational complexity will be shown on the output.
1328 Benchmark* Complexity(BigOFunc* complexity);
1329
1330 // Add this statistics to be computed over all the values of benchmark run
1331 Benchmark* ComputeStatistics(const std::string& name,
1332 StatisticsFunc* statistics,
1333 StatisticUnit unit = kTime);
1334
1335 // Support for running multiple copies of the same benchmark concurrently
1336 // in multiple threads. This may be useful when measuring the scaling
1337 // of some piece of code.
1338
1339 // Run one instance of this benchmark concurrently in t threads.
1340 Benchmark* Threads(int t);
1341
1342 // Pick a set of values T from [min_threads,max_threads].
1343 // min_threads and max_threads are always included in T. Run this
1344 // benchmark once for each value in T. The benchmark run for a
1345 // particular value t consists of t threads running the benchmark
1346 // function concurrently. For example, consider:
1347 // BENCHMARK(Foo)->ThreadRange(1,16);
1348 // This will run the following benchmarks:
1349 // Foo in 1 thread
1350 // Foo in 2 threads
1351 // Foo in 4 threads
1352 // Foo in 8 threads
1353 // Foo in 16 threads
1354 Benchmark* ThreadRange(int min_threads, int max_threads);
1355
1356 // For each value n in the range, run this benchmark once using n threads.
1357 // min_threads and max_threads are always included in the range.
1358 // stride specifies the increment. E.g. DenseThreadRange(1, 8, 3) starts
1359 // a benchmark with 1, 4, 7 and 8 threads.
1360 Benchmark* DenseThreadRange(int min_threads, int max_threads, int stride = 1);
1361
1362 // Equivalent to ThreadRange(NumCPUs(), NumCPUs())
1363 Benchmark* ThreadPerCpu();
1364
1365 virtual void Run(State& state) = 0;
1366
1367 TimeUnit GetTimeUnit() const;
1368
1369 protected:
1370 explicit Benchmark(const std::string& name);
1371 void SetName(const std::string& name);
1372
1373 public:
1374 const char* GetName() const;
1375 int ArgsCnt() const;
1376 const char* GetArgName(int arg) const;
1377
1378 private:
1379 friend class BenchmarkFamilies;
1380 friend class BenchmarkInstance;
1381
1382 std::string name_;
1383 AggregationReportMode aggregation_report_mode_;
1384 std::vector<std::string> arg_names_; // Args for all benchmark runs
1385 std::vector<std::vector<int64_t> > args_; // Args for all benchmark runs
1386
1387 TimeUnit time_unit_;
1388 bool use_default_time_unit_;
1389
1390 int range_multiplier_;
1391 double min_time_;
1392 double min_warmup_time_;
1393 IterationCount iterations_;
1394 int repetitions_;
1395 bool measure_process_cpu_time_;
1396 bool use_real_time_;
1397 bool use_manual_time_;
1398 BigO complexity_;
1399 BigOFunc* complexity_lambda_;
1400 std::vector<Statistics> statistics_;
1401 std::vector<int> thread_counts_;
1402
1403 typedef void (*callback_function)(const benchmark::State&);
1404 callback_function setup_;
1405 callback_function teardown_;
1406
1407 Benchmark(Benchmark const&)
1408#if defined(BENCHMARK_HAS_CXX11)
1409 = delete
1410#endif
1411 ;
1412
1413 Benchmark& operator=(Benchmark const&)
1414#if defined(BENCHMARK_HAS_CXX11)
1415 = delete
1416#endif
1417 ;
1418};
1419
1420} // namespace internal
1421
1422// Create and register a benchmark with the specified 'name' that invokes
1423// the specified functor 'fn'.
1424//
1425// RETURNS: A pointer to the registered benchmark.
1426internal::Benchmark* RegisterBenchmark(const std::string& name,
1427 internal::Function* fn);
1428
1429#if defined(BENCHMARK_HAS_CXX11)
1430template <class Lambda>
1431internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn);
1432#endif
1433
1434// Remove all registered benchmarks. All pointers to previously registered
1435// benchmarks are invalidated.
1436BENCHMARK_EXPORT void ClearRegisteredBenchmarks();
1437
1438namespace internal {
1439// The class used to hold all Benchmarks created from static function.
1440// (ie those created using the BENCHMARK(...) macros.
1441class BENCHMARK_EXPORT FunctionBenchmark : public Benchmark {
1442 public:
1443 FunctionBenchmark(const std::string& name, Function* func)
1444 : Benchmark(name), func_(func) {}
1445
1446 void Run(State& st) BENCHMARK_OVERRIDE;
1447
1448 private:
1449 Function* func_;
1450};
1451
1452#ifdef BENCHMARK_HAS_CXX11
1453template <class Lambda>
1454class LambdaBenchmark : public Benchmark {
1455 public:
1456 void Run(State& st) BENCHMARK_OVERRIDE { lambda_(st); }
1457
1458 private:
1459 template <class OLambda>
1460 LambdaBenchmark(const std::string& name, OLambda&& lam)
1461 : Benchmark(name), lambda_(std::forward<OLambda>(lam)) {}
1462
1463 LambdaBenchmark(LambdaBenchmark const&) = delete;
1464
1465 template <class Lam> // NOLINTNEXTLINE(readability-redundant-declaration)
1466 friend Benchmark* ::benchmark::RegisterBenchmark(const std::string&, Lam&&);
1467
1468 Lambda lambda_;
1469};
1470#endif
1471} // namespace internal
1472
1473inline internal::Benchmark* RegisterBenchmark(const std::string& name,
1474 internal::Function* fn) {
1475 // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
1476 // codechecker_intentional [cplusplus.NewDeleteLeaks]
1477 return internal::RegisterBenchmarkInternal(
1478 ::new internal::FunctionBenchmark(name, fn));
1479}
1480
1481#ifdef BENCHMARK_HAS_CXX11
1482template <class Lambda>
1483internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn) {
1484 using BenchType =
1485 internal::LambdaBenchmark<typename std::decay<Lambda>::type>;
1486 // FIXME: this should be a `std::make_unique<>()` but we don't have C++14.
1487 // codechecker_intentional [cplusplus.NewDeleteLeaks]
1488 return internal::RegisterBenchmarkInternal(
1489 ::new BenchType(name, std::forward<Lambda>(fn)));
1490}
1491#endif
1492
1493#if defined(BENCHMARK_HAS_CXX11) && \
1494 (!defined(BENCHMARK_GCC_VERSION) || BENCHMARK_GCC_VERSION >= 409)
1495template <class Lambda, class... Args>
1496internal::Benchmark* RegisterBenchmark(const std::string& name, Lambda&& fn,
1497 Args&&... args) {
1498 return benchmark::RegisterBenchmark(
1499 name, [=](benchmark::State& st) { fn(st, args...); });
1500}
1501#else
1502#define BENCHMARK_HAS_NO_VARIADIC_REGISTER_BENCHMARK
1503#endif
1504
1505// The base class for all fixture tests.
1507 public:
1508 Fixture() : internal::Benchmark("") {}
1509
1510 void Run(State& st) BENCHMARK_OVERRIDE {
1511 this->SetUp(st);
1512 this->BenchmarkCase(st);
1513 this->TearDown(st);
1514 }
1515
1516 // These will be deprecated ...
1517 virtual void SetUp(const State&) {}
1518 virtual void TearDown(const State&) {}
1519 // ... In favor of these.
1520 virtual void SetUp(State& st) { SetUp(const_cast<const State&>(st)); }
1521 virtual void TearDown(State& st) { TearDown(const_cast<const State&>(st)); }
1522
1523 protected:
1524 virtual void BenchmarkCase(State&) = 0;
1525};
1526} // namespace benchmark
1527
1528// ------------------------------------------------------
1529// Macro to register benchmarks
1530
1531// Check that __COUNTER__ is defined and that __COUNTER__ increases by 1
1532// every time it is expanded. X + 1 == X + 0 is used in case X is defined to be
1533// empty. If X is empty the expression becomes (+1 == +0).
1534#if defined(__COUNTER__) && (__COUNTER__ + 1 == __COUNTER__ + 0)
1535#define BENCHMARK_PRIVATE_UNIQUE_ID __COUNTER__
1536#else
1537#define BENCHMARK_PRIVATE_UNIQUE_ID __LINE__
1538#endif
1539
1540// Helpers for generating unique variable names
1541#ifdef BENCHMARK_HAS_CXX11
1542#define BENCHMARK_PRIVATE_NAME(...) \
1543 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, \
1544 __VA_ARGS__)
1545#else
1546#define BENCHMARK_PRIVATE_NAME(n) \
1547 BENCHMARK_PRIVATE_CONCAT(benchmark_uniq_, BENCHMARK_PRIVATE_UNIQUE_ID, n)
1548#endif // BENCHMARK_HAS_CXX11
1549
1550#define BENCHMARK_PRIVATE_CONCAT(a, b, c) BENCHMARK_PRIVATE_CONCAT2(a, b, c)
1551#define BENCHMARK_PRIVATE_CONCAT2(a, b, c) a##b##c
1552// Helper for concatenation with macro name expansion
1553#define BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method) \
1554 BaseClass##_##Method##_Benchmark
1555
1556#define BENCHMARK_PRIVATE_DECLARE(n) \
1557 /* NOLINTNEXTLINE(misc-use-anonymous-namespace) */ \
1558 static ::benchmark::internal::Benchmark* BENCHMARK_PRIVATE_NAME(n) \
1559 BENCHMARK_UNUSED
1560
1561#ifdef BENCHMARK_HAS_CXX11
1562#define BENCHMARK(...) \
1563 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1564 (::benchmark::internal::RegisterBenchmarkInternal( \
1565 new ::benchmark::internal::FunctionBenchmark(#__VA_ARGS__, \
1566 __VA_ARGS__)))
1567#else
1568#define BENCHMARK(n) \
1569 BENCHMARK_PRIVATE_DECLARE(n) = \
1570 (::benchmark::internal::RegisterBenchmarkInternal( \
1571 new ::benchmark::internal::FunctionBenchmark(#n, n)))
1572#endif // BENCHMARK_HAS_CXX11
1573
1574// Old-style macros
1575#define BENCHMARK_WITH_ARG(n, a) BENCHMARK(n)->Arg((a))
1576#define BENCHMARK_WITH_ARG2(n, a1, a2) BENCHMARK(n)->Args({(a1), (a2)})
1577#define BENCHMARK_WITH_UNIT(n, t) BENCHMARK(n)->Unit((t))
1578#define BENCHMARK_RANGE(n, lo, hi) BENCHMARK(n)->Range((lo), (hi))
1579#define BENCHMARK_RANGE2(n, l1, h1, l2, h2) \
1580 BENCHMARK(n)->RangePair({{(l1), (h1)}, {(l2), (h2)}})
1581
1582#ifdef BENCHMARK_HAS_CXX11
1583
1584// Register a benchmark which invokes the function specified by `func`
1585// with the additional arguments specified by `...`.
1586//
1587// For example:
1588//
1589// template <class ...ExtraArgs>`
1590// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1591// [...]
1592//}
1593// /* Registers a benchmark named "BM_takes_args/int_string_test` */
1594// BENCHMARK_CAPTURE(BM_takes_args, int_string_test, 42, std::string("abc"));
1595#define BENCHMARK_CAPTURE(func, test_case_name, ...) \
1596 BENCHMARK_PRIVATE_DECLARE(_benchmark_) = \
1597 (::benchmark::internal::RegisterBenchmarkInternal( \
1598 new ::benchmark::internal::FunctionBenchmark( \
1599 #func "/" #test_case_name, \
1600 [](::benchmark::State& st) { func(st, __VA_ARGS__); })))
1601
1602#endif // BENCHMARK_HAS_CXX11
1603
1604// This will register a benchmark for a templatized function. For example:
1605//
1606// template<int arg>
1607// void BM_Foo(int iters);
1608//
1609// BENCHMARK_TEMPLATE(BM_Foo, 1);
1610//
1611// will register BM_Foo<1> as a benchmark.
1612#define BENCHMARK_TEMPLATE1(n, a) \
1613 BENCHMARK_PRIVATE_DECLARE(n) = \
1614 (::benchmark::internal::RegisterBenchmarkInternal( \
1615 new ::benchmark::internal::FunctionBenchmark(#n "<" #a ">", n<a>)))
1616
1617#define BENCHMARK_TEMPLATE2(n, a, b) \
1618 BENCHMARK_PRIVATE_DECLARE(n) = \
1619 (::benchmark::internal::RegisterBenchmarkInternal( \
1620 new ::benchmark::internal::FunctionBenchmark(#n "<" #a "," #b ">", \
1621 n<a, b>)))
1622
1623#ifdef BENCHMARK_HAS_CXX11
1624#define BENCHMARK_TEMPLATE(n, ...) \
1625 BENCHMARK_PRIVATE_DECLARE(n) = \
1626 (::benchmark::internal::RegisterBenchmarkInternal( \
1627 new ::benchmark::internal::FunctionBenchmark( \
1628 #n "<" #__VA_ARGS__ ">", n<__VA_ARGS__>)))
1629#else
1630#define BENCHMARK_TEMPLATE(n, a) BENCHMARK_TEMPLATE1(n, a)
1631#endif
1632
1633#ifdef BENCHMARK_HAS_CXX11
1634// This will register a benchmark for a templatized function,
1635// with the additional arguments specified by `...`.
1636//
1637// For example:
1638//
1639// template <typename T, class ...ExtraArgs>`
1640// void BM_takes_args(benchmark::State& state, ExtraArgs&&... extra_args) {
1641// [...]
1642//}
1643// /* Registers a benchmark named "BM_takes_args<void>/int_string_test` */
1644// BENCHMARK_TEMPLATE1_CAPTURE(BM_takes_args, void, int_string_test, 42,
1645// std::string("abc"));
1646#define BENCHMARK_TEMPLATE1_CAPTURE(func, a, test_case_name, ...) \
1647 BENCHMARK_CAPTURE(func<a>, test_case_name, __VA_ARGS__)
1648
1649#define BENCHMARK_TEMPLATE2_CAPTURE(func, a, b, test_case_name, ...) \
1650 BENCHMARK_PRIVATE_DECLARE(func) = \
1651 (::benchmark::internal::RegisterBenchmarkInternal( \
1652 new ::benchmark::internal::FunctionBenchmark( \
1653 #func "<" #a "," #b ">" \
1654 "/" #test_case_name, \
1655 [](::benchmark::State& st) { func<a, b>(st, __VA_ARGS__); })))
1656#endif // BENCHMARK_HAS_CXX11
1657
1658#define BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1659 class BaseClass##_##Method##_Benchmark : public BaseClass { \
1660 public: \
1661 BaseClass##_##Method##_Benchmark() { \
1662 this->SetName(#BaseClass "/" #Method); \
1663 } \
1664 \
1665 protected: \
1666 void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1667 };
1668
1669#define BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1670 class BaseClass##_##Method##_Benchmark : public BaseClass<a> { \
1671 public: \
1672 BaseClass##_##Method##_Benchmark() { \
1673 this->SetName(#BaseClass "<" #a ">/" #Method); \
1674 } \
1675 \
1676 protected: \
1677 void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1678 };
1679
1680#define BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1681 class BaseClass##_##Method##_Benchmark : public BaseClass<a, b> { \
1682 public: \
1683 BaseClass##_##Method##_Benchmark() { \
1684 this->SetName(#BaseClass "<" #a "," #b ">/" #Method); \
1685 } \
1686 \
1687 protected: \
1688 void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1689 };
1690
1691#ifdef BENCHMARK_HAS_CXX11
1692#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, ...) \
1693 class BaseClass##_##Method##_Benchmark : public BaseClass<__VA_ARGS__> { \
1694 public: \
1695 BaseClass##_##Method##_Benchmark() { \
1696 this->SetName(#BaseClass "<" #__VA_ARGS__ ">/" #Method); \
1697 } \
1698 \
1699 protected: \
1700 void BenchmarkCase(::benchmark::State&) BENCHMARK_OVERRIDE; \
1701 };
1702#else
1703#define BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(n, a) \
1704 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(n, a)
1705#endif
1706
1707#define BENCHMARK_DEFINE_F(BaseClass, Method) \
1708 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1709 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1710
1711#define BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a) \
1712 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1713 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1714
1715#define BENCHMARK_TEMPLATE2_DEFINE_F(BaseClass, Method, a, b) \
1716 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1717 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1718
1719#ifdef BENCHMARK_HAS_CXX11
1720#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, ...) \
1721 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1722 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1723#else
1724#define BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, a) \
1725 BENCHMARK_TEMPLATE1_DEFINE_F(BaseClass, Method, a)
1726#endif
1727
1728#define BENCHMARK_REGISTER_F(BaseClass, Method) \
1729 BENCHMARK_PRIVATE_REGISTER_F(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method))
1730
1731#define BENCHMARK_PRIVATE_REGISTER_F(TestName) \
1732 BENCHMARK_PRIVATE_DECLARE(TestName) = \
1733 (::benchmark::internal::RegisterBenchmarkInternal(new TestName()))
1734
1735// This macro will define and register a benchmark within a fixture class.
1736#define BENCHMARK_F(BaseClass, Method) \
1737 BENCHMARK_PRIVATE_DECLARE_F(BaseClass, Method) \
1738 BENCHMARK_REGISTER_F(BaseClass, Method); \
1739 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1740
1741#define BENCHMARK_TEMPLATE1_F(BaseClass, Method, a) \
1742 BENCHMARK_TEMPLATE1_PRIVATE_DECLARE_F(BaseClass, Method, a) \
1743 BENCHMARK_REGISTER_F(BaseClass, Method); \
1744 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1745
1746#define BENCHMARK_TEMPLATE2_F(BaseClass, Method, a, b) \
1747 BENCHMARK_TEMPLATE2_PRIVATE_DECLARE_F(BaseClass, Method, a, b) \
1748 BENCHMARK_REGISTER_F(BaseClass, Method); \
1749 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1750
1751#ifdef BENCHMARK_HAS_CXX11
1752#define BENCHMARK_TEMPLATE_F(BaseClass, Method, ...) \
1753 BENCHMARK_TEMPLATE_PRIVATE_DECLARE_F(BaseClass, Method, __VA_ARGS__) \
1754 BENCHMARK_REGISTER_F(BaseClass, Method); \
1755 void BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::BenchmarkCase
1756#else
1757#define BENCHMARK_TEMPLATE_F(BaseClass, Method, a) \
1758 BENCHMARK_TEMPLATE1_F(BaseClass, Method, a)
1759#endif
1760
1761// Helper macro to create a main routine in a test that runs the benchmarks
1762// Note the workaround for Hexagon simulator passing argc != 0, argv = NULL.
1763#define BENCHMARK_MAIN() \
1764 int main(int argc, char** argv) { \
1765 char arg0_default[] = "benchmark"; \
1766 char* args_default = arg0_default; \
1767 if (!argv) { \
1768 argc = 1; \
1769 argv = &args_default; \
1770 } \
1771 ::benchmark::Initialize(&argc, argv); \
1772 if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1; \
1773 ::benchmark::RunSpecifiedBenchmarks(); \
1774 ::benchmark::Shutdown(); \
1775 return 0; \
1776 } \
1777 int main(int, char**)
1778
1779// ------------------------------------------------------
1780// Benchmark Reporters
1781
1782namespace benchmark {
1783
1784struct BENCHMARK_EXPORT CPUInfo {
1785 struct CacheInfo {
1786 std::string type;
1787 int level;
1788 int size;
1789 int num_sharing;
1790 };
1791
1792 enum Scaling { UNKNOWN, ENABLED, DISABLED };
1793
1794 int num_cpus;
1795 Scaling scaling;
1796 double cycles_per_second;
1797 std::vector<CacheInfo> caches;
1798 std::vector<double> load_avg;
1799
1800 static const CPUInfo& Get();
1801
1802 private:
1803 CPUInfo();
1804 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(CPUInfo);
1805};
1806
1807// Adding Struct for System Information
1808struct BENCHMARK_EXPORT SystemInfo {
1809 std::string name;
1810 static const SystemInfo& Get();
1811
1812 private:
1813 SystemInfo();
1814 BENCHMARK_DISALLOW_COPY_AND_ASSIGN(SystemInfo);
1815};
1816
1817// BenchmarkName contains the components of the Benchmark's name
1818// which allows individual fields to be modified or cleared before
1819// building the final name using 'str()'.
1820struct BENCHMARK_EXPORT BenchmarkName {
1821 std::string function_name;
1822 std::string args;
1823 std::string min_time;
1824 std::string min_warmup_time;
1825 std::string iterations;
1826 std::string repetitions;
1827 std::string time_type;
1828 std::string threads;
1829
1830 // Return the full name of the benchmark with each non-empty
1831 // field separated by a '/'
1832 std::string str() const;
1833};
1834
1835// Interface for custom benchmark result printers.
1836// By default, benchmark reports are printed to stdout. However an application
1837// can control the destination of the reports by calling
1838// RunSpecifiedBenchmarks and passing it a custom reporter object.
1839// The reporter object must implement the following interface.
1840class BENCHMARK_EXPORT BenchmarkReporter {
1841 public:
1842 struct Context {
1843 CPUInfo const& cpu_info;
1844 SystemInfo const& sys_info;
1845 // The number of chars in the longest benchmark name.
1846 size_t name_field_width;
1847 static const char* executable_name;
1848 Context();
1849 };
1850
1851 struct BENCHMARK_EXPORT Run {
1852 static const int64_t no_repetition_index = -1;
1853 enum RunType { RT_Iteration, RT_Aggregate };
1854
1855 Run()
1856 : run_type(RT_Iteration),
1857 aggregate_unit(kTime),
1858 skipped(internal::NotSkipped),
1859 iterations(1),
1860 threads(1),
1861 time_unit(GetDefaultTimeUnit()),
1862 real_accumulated_time(0),
1863 cpu_accumulated_time(0),
1864 max_heapbytes_used(0),
1865 use_real_time_for_initial_big_o(false),
1866 complexity(oNone),
1867 complexity_lambda(),
1868 complexity_n(0),
1869 report_big_o(false),
1870 report_rms(false),
1871 memory_result(NULL),
1872 allocs_per_iter(0.0) {}
1873
1874 std::string benchmark_name() const;
1875 BenchmarkName run_name;
1876 int64_t family_index;
1877 int64_t per_family_instance_index;
1878 RunType run_type;
1879 std::string aggregate_name;
1880 StatisticUnit aggregate_unit;
1881 std::string report_label; // Empty if not set by benchmark.
1882 internal::Skipped skipped;
1883 std::string skip_message;
1884
1885 IterationCount iterations;
1886 int64_t threads;
1887 int64_t repetition_index;
1888 int64_t repetitions;
1889 TimeUnit time_unit;
1890 double real_accumulated_time;
1891 double cpu_accumulated_time;
1892
1893 // Return a value representing the real time per iteration in the unit
1894 // specified by 'time_unit'.
1895 // NOTE: If 'iterations' is zero the returned value represents the
1896 // accumulated time.
1897 double GetAdjustedRealTime() const;
1898
1899 // Return a value representing the cpu time per iteration in the unit
1900 // specified by 'time_unit'.
1901 // NOTE: If 'iterations' is zero the returned value represents the
1902 // accumulated time.
1903 double GetAdjustedCPUTime() const;
1904
1905 // This is set to 0.0 if memory tracing is not enabled.
1906 double max_heapbytes_used;
1907
1908 // By default Big-O is computed for CPU time, but that is not what you want
1909 // to happen when manual time was requested, which is stored as real time.
1910 bool use_real_time_for_initial_big_o;
1911
1912 // Keep track of arguments to compute asymptotic complexity
1913 BigO complexity;
1914 BigOFunc* complexity_lambda;
1915 ComplexityN complexity_n;
1916
1917 // what statistics to compute from the measurements
1918 const std::vector<internal::Statistics>* statistics;
1919
1920 // Inform print function whether the current run is a complexity report
1921 bool report_big_o;
1922 bool report_rms;
1923
1924 UserCounters counters;
1925
1926 // Memory metrics.
1927 const MemoryManager::Result* memory_result;
1928 double allocs_per_iter;
1929 };
1930
1932 PerFamilyRunReports() : num_runs_total(0), num_runs_done(0) {}
1933
1934 // How many runs will all instances of this benchmark perform?
1935 int num_runs_total;
1936
1937 // How many runs have happened already?
1938 int num_runs_done;
1939
1940 // The reports about (non-errneous!) runs of this family.
1941 std::vector<BenchmarkReporter::Run> Runs;
1942 };
1943
1944 // Construct a BenchmarkReporter with the output stream set to 'std::cout'
1945 // and the error stream set to 'std::cerr'
1947
1948 // Called once for every suite of benchmarks run.
1949 // The parameter "context" contains information that the
1950 // reporter may wish to use when generating its report, for example the
1951 // platform under which the benchmarks are running. The benchmark run is
1952 // never started if this function returns false, allowing the reporter
1953 // to skip runs based on the context information.
1954 virtual bool ReportContext(const Context& context) = 0;
1955
1956 // Called once for each group of benchmark runs, gives information about
1957 // the configurations of the runs.
1958 virtual void ReportRunsConfig(double /*min_time*/,
1959 bool /*has_explicit_iters*/,
1960 IterationCount /*iters*/) {}
1961
1962 // Called once for each group of benchmark runs, gives information about
1963 // cpu-time and heap memory usage during the benchmark run. If the group
1964 // of runs contained more than two entries then 'report' contains additional
1965 // elements representing the mean and standard deviation of those runs.
1966 // Additionally if this group of runs was the last in a family of benchmarks
1967 // 'reports' contains additional entries representing the asymptotic
1968 // complexity and RMS of that benchmark family.
1969 virtual void ReportRuns(const std::vector<Run>& report) = 0;
1970
1971 // Called once and only once after ever group of benchmarks is run and
1972 // reported.
1973 virtual void Finalize() {}
1974
1975 // REQUIRES: The object referenced by 'out' is valid for the lifetime
1976 // of the reporter.
1977 void SetOutputStream(std::ostream* out) {
1978 assert(out);
1979 output_stream_ = out;
1980 }
1981
1982 // REQUIRES: The object referenced by 'err' is valid for the lifetime
1983 // of the reporter.
1984 void SetErrorStream(std::ostream* err) {
1985 assert(err);
1986 error_stream_ = err;
1987 }
1988
1989 std::ostream& GetOutputStream() const { return *output_stream_; }
1990
1991 std::ostream& GetErrorStream() const { return *error_stream_; }
1992
1993 virtual ~BenchmarkReporter();
1994
1995 // Write a human readable string to 'out' representing the specified
1996 // 'context'.
1997 // REQUIRES: 'out' is non-null.
1998 static void PrintBasicContext(std::ostream* out, Context const& context);
1999
2000 private:
2001 std::ostream* output_stream_;
2002 std::ostream* error_stream_;
2003};
2004
2005// Simple reporter that outputs benchmark data to the console. This is the
2006// default reporter used by RunSpecifiedBenchmarks().
2007class BENCHMARK_EXPORT ConsoleReporter : public BenchmarkReporter {
2008 public:
2009 enum OutputOptions {
2010 OO_None = 0,
2011 OO_Color = 1,
2012 OO_Tabular = 2,
2013 OO_ColorTabular = OO_Color | OO_Tabular,
2014 OO_Defaults = OO_ColorTabular
2015 };
2016 explicit ConsoleReporter(OutputOptions opts_ = OO_Defaults)
2017 : output_options_(opts_), name_field_width_(0), printed_header_(false) {}
2018
2019 bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
2020 void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
2021
2022 protected:
2023 virtual void PrintRunData(const Run& report);
2024 virtual void PrintHeader(const Run& report);
2025
2026 OutputOptions output_options_;
2027 size_t name_field_width_;
2028 UserCounters prev_counters_;
2029 bool printed_header_;
2030};
2031
2032class BENCHMARK_EXPORT JSONReporter : public BenchmarkReporter {
2033 public:
2034 JSONReporter() : first_report_(true) {}
2035 bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
2036 void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
2037 void Finalize() BENCHMARK_OVERRIDE;
2038
2039 private:
2040 void PrintRunData(const Run& report);
2041
2042 bool first_report_;
2043};
2044
2045class BENCHMARK_EXPORT BENCHMARK_DEPRECATED_MSG(
2046 "The CSV Reporter will be removed in a future release") CSVReporter
2047 : public BenchmarkReporter {
2048 public:
2049 CSVReporter() : printed_header_(false) {}
2050 bool ReportContext(const Context& context) BENCHMARK_OVERRIDE;
2051 void ReportRuns(const std::vector<Run>& reports) BENCHMARK_OVERRIDE;
2052
2053 private:
2054 void PrintRunData(const Run& report);
2055
2056 bool printed_header_;
2057 std::set<std::string> user_counter_names_;
2058};
2059
2060inline const char* GetTimeUnitString(TimeUnit unit) {
2061 switch (unit) {
2062 case kSecond:
2063 return "s";
2064 case kMillisecond:
2065 return "ms";
2066 case kMicrosecond:
2067 return "us";
2068 case kNanosecond:
2069 return "ns";
2070 }
2071 BENCHMARK_UNREACHABLE();
2072}
2073
2074inline double GetTimeUnitMultiplier(TimeUnit unit) {
2075 switch (unit) {
2076 case kSecond:
2077 return 1;
2078 case kMillisecond:
2079 return 1e3;
2080 case kMicrosecond:
2081 return 1e6;
2082 case kNanosecond:
2083 return 1e9;
2084 }
2085 BENCHMARK_UNREACHABLE();
2086}
2087
2088// Creates a list of integer values for the given range and multiplier.
2089// This can be used together with ArgsProduct() to allow multiple ranges
2090// with different multipliers.
2091// Example:
2092// ArgsProduct({
2093// CreateRange(0, 1024, /*multi=*/32),
2094// CreateRange(0, 100, /*multi=*/4),
2095// CreateDenseRange(0, 4, /*step=*/1),
2096// });
2097BENCHMARK_EXPORT
2098std::vector<int64_t> CreateRange(int64_t lo, int64_t hi, int multi);
2099
2100// Creates a list of integer values for the given range and step.
2101BENCHMARK_EXPORT
2102std::vector<int64_t> CreateDenseRange(int64_t start, int64_t limit, int step);
2103
2104} // namespace benchmark
2105
2106#if defined(_MSC_VER)
2107#pragma warning(pop)
2108#endif
2109
2110#endif // BENCHMARK_BENCHMARK_H_
Definition benchmark.h:1840
Definition benchmark.h:2007
Definition benchmark.h:672
Definition benchmark.h:1506
Definition benchmark.h:2032
Definition benchmark.h:417
Definition benchmark.h:461
Definition benchmark.h:806
Definition benchmark_register.cc:73
Definition benchmark_api_internal.h:18
Definition benchmark.h:1169
Definition benchmark.h:1441
Definition perf_counters.h:149
Definition thread_manager.h:12
Definition thread_timer.h:10
Definition benchmark.h:1820
Definition benchmark.h:1842
Definition benchmark.h:1851
Definition benchmark.h:1785
Definition benchmark.h:1784
Definition benchmark.h:421
Definition benchmark.h:1112
Definition benchmark.h:1111
Definition benchmark.h:1808
Definition benchmark.h:752