Optimize RoboCaller::AddReceiver() for code size
Essentially, instead of having the inlined UntypedFunction::Create(f)
return an UntypedFunction which is then passed as an argument to
non-inlined RoboCallerReceivers::AddReceiverImpl(), we let
UntypedFunction::PrepareArgs(f) return a few different kinds of
trivial structs (depending on what sort of type f has) which are
passed as arguments to non-inlined RoboCallerReceivers::AddReceiver()
(which then converts them to UntypedFunction by calling
UntypedFunction::Create()). These structs are smaller than
UntypedFunction and optimized for argument passing, so many fewer
instructions are needed.
Example code:
struct Foo {
void Receive(int, float, int, float);
void TestAddLambdaReceiver();
webrtc::RoboCaller<int, float, int, float> rc;
};
void Foo::TestAddLambdaReceiver() {
rc.AddReceiver([this](int a, float b, int c, float d){
Receive(a, b, c, d);});
}
On arm32, we get before this CL:
Foo::TestAddLambdaReceiver():
push {r11, lr}
mov r11, sp
sub sp, sp, #24
ldr r1, .LCPI0_0
mov r2, #0
stm sp, {r0, r2}
add r1, pc, r1
str r2, [sp, #20]
str r1, [sp, #16]
mov r1, sp
bl RoboCallerReceivers::AddReceiverImpl
mov sp, r11
pop {r11, pc}
.LCPI0_0:
.long CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
ldr r0, [r0]
b Foo::Receive(int, float, int, float)
After this CL:
Foo::TestAddLambdaReceiver():
ldr r3, .LCPI0_0
mov r2, r0
add r3, pc, r3
b RoboCallerReceivers::AddReceiver<1u>
.LCPI0_0:
.long CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>
CallInlineStorage<Foo::TestAddLambdaReceiver()::$_0>:
ldr r0, [r0]
b Foo::Receive(int, float, int, float)
(Symbol names abbreviated so that they'll fit on one line.)
So a reduction from 64 to 28 bytes. The improvements on arm64 and
x86_64 are similar.
Bug: webrtc:11943
Change-Id: I93fbba083be0235051c3279d3e3f6852a4a9fdad
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/185960
Commit-Queue: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32244}
diff --git a/rtc_base/BUILD.gn b/rtc_base/BUILD.gn
index a09c06e..489a5c6 100644
--- a/rtc_base/BUILD.gn
+++ b/rtc_base/BUILD.gn
@@ -57,6 +57,7 @@
":untyped_function",
"../api:function_view",
"system:assume",
+ "system:inline",
]
}
diff --git a/rtc_base/robo_caller.cc b/rtc_base/robo_caller.cc
index a97687a..a7c35cb 100644
--- a/rtc_base/robo_caller.cc
+++ b/rtc_base/robo_caller.cc
@@ -16,10 +16,6 @@
RoboCallerReceivers::RoboCallerReceivers() = default;
RoboCallerReceivers::~RoboCallerReceivers() = default;
-void RoboCallerReceivers::AddReceiverImpl(UntypedFunction* f) {
- receivers_.push_back(std::move(*f));
-}
-
void RoboCallerReceivers::Foreach(
rtc::FunctionView<void(UntypedFunction&)> fv) {
for (auto& r : receivers_) {
@@ -27,5 +23,18 @@
}
}
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<1>);
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<2>);
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<3>);
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<4>);
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::NontrivialUntypedFunctionArgs);
+template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::FunctionPointerUntypedFunctionArgs);
+
} // namespace robo_caller_impl
} // namespace webrtc
diff --git a/rtc_base/robo_caller.h b/rtc_base/robo_caller.h
index 573e7b6..9df6a48 100644
--- a/rtc_base/robo_caller.h
+++ b/rtc_base/robo_caller.h
@@ -16,6 +16,7 @@
#include "api/function_view.h"
#include "rtc_base/system/assume.h"
+#include "rtc_base/system/inline.h"
#include "rtc_base/untyped_function.h"
namespace webrtc {
@@ -30,20 +31,30 @@
RoboCallerReceivers& operator=(RoboCallerReceivers&&) = delete;
~RoboCallerReceivers();
- void AddReceiver(UntypedFunction&& f) {
- AddReceiverImpl(&f);
- // Assume that f was moved from and is now trivially destructible.
- // This helps the compiler optimize away the destructor call.
- RTC_ASSUME(f.IsTriviallyDestructible());
+ template <typename UntypedFunctionArgsT>
+ RTC_NO_INLINE void AddReceiver(UntypedFunctionArgsT args) {
+ receivers_.push_back(UntypedFunction::Create(args));
}
void Foreach(rtc::FunctionView<void(UntypedFunction&)> fv);
private:
- void AddReceiverImpl(UntypedFunction* f);
std::vector<UntypedFunction> receivers_;
};
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<1>);
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<2>);
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<3>);
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::TrivialUntypedFunctionArgs<4>);
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::NontrivialUntypedFunctionArgs);
+extern template void RoboCallerReceivers::AddReceiver(
+ UntypedFunction::FunctionPointerUntypedFunctionArgs);
+
} // namespace robo_caller_impl
// A collection of receivers (callable objects) that can be called all at once.
@@ -71,7 +82,7 @@
template <typename F>
void AddReceiver(F&& f) {
receivers_.AddReceiver(
- UntypedFunction::Create<void(ArgT...)>(std::forward<F>(f)));
+ UntypedFunction::PrepareArgs<void(ArgT...)>(std::forward<F>(f)));
}
// Calls all receivers with the given arguments.
diff --git a/rtc_base/untyped_function.h b/rtc_base/untyped_function.h
index 9220d3f..16c5ba0 100644
--- a/rtc_base/untyped_function.h
+++ b/rtc_base/untyped_function.h
@@ -11,6 +11,8 @@
#ifndef RTC_BASE_UNTYPED_FUNCTION_H_
#define RTC_BASE_UNTYPED_FUNCTION_H_
+#include <cstddef>
+#include <cstring>
#include <memory>
#include <type_traits>
#include <utility>
@@ -25,9 +27,17 @@
union VoidUnion {
void* void_ptr;
FunVoid* fun_ptr;
- typename std::aligned_storage<16>::type inline_storage;
+ typename std::aligned_storage<4 * sizeof(uintptr_t)>::type inline_storage;
};
+// Returns the number of elements of the `inline_storage` array required to
+// store an object of type T.
+template <typename T>
+constexpr size_t InlineStorageSize() {
+ // sizeof(T) / sizeof(uintptr_t), but rounded up.
+ return (sizeof(T) + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
+}
+
template <typename T>
struct CallHelpers;
template <typename RetT, typename... ArgT>
@@ -64,76 +74,155 @@
// size.
class UntypedFunction final {
public:
- // Create function for lambdas and other callables; it accepts every type of
- // argument except those noted in its enable_if call.
+ // The *UntypedFunctionArgs structs are used to transfer arguments from
+ // PrepareArgs() to Create(). They are trivial, but may own heap allocations,
+ // so make sure to pass them to Create() exactly once!
+ //
+ // The point of doing Create(PrepareArgs(foo)) instead of just Create(foo) is
+ // to separate the code that has to be inlined (PrepareArgs) from the code
+ // that can be noninlined (Create); the *UntypedFunctionArgs types are
+ // designed to efficiently carry the required information from one to the
+ // other.
+ template <size_t N>
+ struct TrivialUntypedFunctionArgs {
+ // We use an uintptr_t array here instead of std::aligned_storage, because
+ // the former can be efficiently passed in registers when using
+ // TrivialUntypedFunctionArgs as a function argument. (We can't do the same
+ // in VoidUnion, because std::aligned_storage but not uintptr_t can be
+ // legally reinterpret_casted to arbitrary types.
+ // TrivialUntypedFunctionArgs, on the other hand, only needs to handle
+ // placement new and memcpy.)
+ alignas(std::max_align_t) uintptr_t inline_storage[N];
+ webrtc_function_impl::FunVoid* call;
+ };
+ struct NontrivialUntypedFunctionArgs {
+ void* void_ptr;
+ webrtc_function_impl::FunVoid* call;
+ void (*del)(webrtc_function_impl::VoidUnion*);
+ };
+ struct FunctionPointerUntypedFunctionArgs {
+ webrtc_function_impl::FunVoid* fun_ptr;
+ webrtc_function_impl::FunVoid* call;
+ };
+
+ // Create function for lambdas and other callables that are trivial and small;
+ // it accepts every type of argument except those noted in its enable_if call.
template <
typename Signature,
typename F,
+ typename F_deref = typename std::remove_reference<F>::type,
typename std::enable_if<
// Not for function pointers; we have another overload for that below.
- !std::is_function<typename std::remove_pointer<
- typename std::remove_reference<F>::type>::type>::value &&
+ !std::is_function<
+ typename std::remove_pointer<F_deref>::type>::value &&
- // Not for nullptr; we have another overload for that below.
+ // Not for nullptr; we have a constructor for that below.
!std::is_same<std::nullptr_t,
typename std::remove_cv<F>::type>::value &&
- // Not for UntypedFunction objects; we have another overload for that.
+ // Not for UntypedFunction objects; use move construction or
+ // assignment.
!std::is_same<UntypedFunction,
- typename std::remove_cv<typename std::remove_reference<
- F>::type>::type>::value>::type* = nullptr>
- static UntypedFunction Create(F&& f) {
- using F_deref = typename std::remove_reference<F>::type;
- // TODO(C++17): Use `constexpr if` here. The compiler appears to do the
- // right thing anyway w.r.t. resolving the branch statically and
- // eliminating dead code, but it would be good for readability.
- if (std::is_trivially_move_constructible<F_deref>::value &&
- std::is_trivially_destructible<F_deref>::value &&
- sizeof(F_deref) <=
- sizeof(webrtc_function_impl::VoidUnion::inline_storage)) {
- // The callable is trivial and small enough, so we just store its bytes
- // in the inline storage.
- webrtc_function_impl::VoidUnion vu;
- new (&vu.inline_storage) F_deref(std::forward<F>(f));
- return UntypedFunction(
- vu,
- reinterpret_cast<webrtc_function_impl::FunVoid*>(
- webrtc_function_impl::CallHelpers<
- Signature>::template CallInlineStorage<F_deref>),
- nullptr);
- } else {
- // The callable is either nontrivial or too large, so we can't keep it
- // in the inline storage; use the heap instead.
- webrtc_function_impl::VoidUnion vu;
- vu.void_ptr = new F_deref(std::forward<F>(f));
- return UntypedFunction(
- vu,
- reinterpret_cast<webrtc_function_impl::FunVoid*>(
- webrtc_function_impl::CallHelpers<
- Signature>::template CallVoidPtr<F_deref>),
- static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
- [](webrtc_function_impl::VoidUnion* vu) {
- // Assuming that this pointer isn't null allows the
- // compiler to eliminate a null check in the (inlined)
- // delete operation.
- RTC_ASSUME(vu->void_ptr != nullptr);
- delete reinterpret_cast<F_deref*>(vu->void_ptr);
- }));
- }
+ typename std::remove_cv<F_deref>::type>::value &&
+
+ // Only for trivial callables that will fit in
+ // VoidUnion::inline_storage.
+ std::is_trivially_move_constructible<F_deref>::value &&
+ std::is_trivially_destructible<F_deref>::value &&
+ sizeof(F_deref) <=
+ sizeof(webrtc_function_impl::VoidUnion::inline_storage)>::type* =
+ nullptr,
+ size_t InlineSize = webrtc_function_impl::InlineStorageSize<F_deref>()>
+ static TrivialUntypedFunctionArgs<InlineSize> PrepareArgs(F&& f) {
+ // The callable is trivial and small enough, so we just store its bytes
+ // in the inline storage.
+ TrivialUntypedFunctionArgs<InlineSize> args;
+ new (&args.inline_storage) F_deref(std::forward<F>(f));
+ args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
+ webrtc_function_impl::CallHelpers<
+ Signature>::template CallInlineStorage<F_deref>);
+ return args;
+ }
+ template <size_t InlineSize>
+ static UntypedFunction Create(TrivialUntypedFunctionArgs<InlineSize> args) {
+ webrtc_function_impl::VoidUnion vu;
+ std::memcpy(&vu.inline_storage, args.inline_storage,
+ sizeof(args.inline_storage));
+ return UntypedFunction(vu, args.call, nullptr);
+ }
+
+ // Create function for lambdas and other callables that are nontrivial or
+ // large; it accepts every type of argument except those noted in its
+ // enable_if call.
+ template <
+ typename Signature,
+ typename F,
+ typename F_deref = typename std::remove_reference<F>::type,
+ typename std::enable_if<
+ // Not for function pointers; we have another overload for that below.
+ !std::is_function<
+ typename std::remove_pointer<F_deref>::type>::value &&
+
+ // Not for nullptr; we have a constructor for that below.
+ !std::is_same<std::nullptr_t,
+ typename std::remove_cv<F>::type>::value &&
+
+ // Not for UntypedFunction objects; use move construction or
+ // assignment.
+ !std::is_same<UntypedFunction,
+ typename std::remove_cv<F_deref>::type>::value &&
+
+ // Only for nontrivial callables, or callables that won't fit in
+ // VoidUnion::inline_storage.
+ !(std::is_trivially_move_constructible<F_deref>::value &&
+ std::is_trivially_destructible<F_deref>::value &&
+ sizeof(F_deref) <= sizeof(webrtc_function_impl::VoidUnion::
+ inline_storage))>::type* = nullptr>
+ static NontrivialUntypedFunctionArgs PrepareArgs(F&& f) {
+ // The callable is either nontrivial or too large, so we can't keep it
+ // in the inline storage; use the heap instead.
+ NontrivialUntypedFunctionArgs args;
+ args.void_ptr = new F_deref(std::forward<F>(f));
+ args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
+ webrtc_function_impl::CallHelpers<Signature>::template CallVoidPtr<
+ F_deref>);
+ args.del = static_cast<void (*)(webrtc_function_impl::VoidUnion*)>(
+ [](webrtc_function_impl::VoidUnion* vu) {
+ // Assuming that this pointer isn't null allows the
+ // compiler to eliminate a null check in the (inlined)
+ // delete operation.
+ RTC_ASSUME(vu->void_ptr != nullptr);
+ delete reinterpret_cast<F_deref*>(vu->void_ptr);
+ });
+ return args;
+ }
+ static UntypedFunction Create(NontrivialUntypedFunctionArgs args) {
+ webrtc_function_impl::VoidUnion vu;
+ vu.void_ptr = args.void_ptr;
+ return UntypedFunction(vu, args.call, args.del);
}
// Create function that accepts function pointers. If the argument is null,
// the result is an empty UntypedFunction.
template <typename Signature>
- static UntypedFunction Create(Signature* f) {
+ static FunctionPointerUntypedFunctionArgs PrepareArgs(Signature* f) {
+ FunctionPointerUntypedFunctionArgs args;
+ args.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
+ args.call = reinterpret_cast<webrtc_function_impl::FunVoid*>(
+ webrtc_function_impl::CallHelpers<Signature>::CallFunPtr);
+ return args;
+ }
+ static UntypedFunction Create(FunctionPointerUntypedFunctionArgs args) {
webrtc_function_impl::VoidUnion vu;
- vu.fun_ptr = reinterpret_cast<webrtc_function_impl::FunVoid*>(f);
- return UntypedFunction(
- vu,
- f ? reinterpret_cast<webrtc_function_impl::FunVoid*>(
- webrtc_function_impl::CallHelpers<Signature>::CallFunPtr)
- : nullptr,
- nullptr);
+ vu.fun_ptr = args.fun_ptr;
+ return UntypedFunction(vu, args.fun_ptr == nullptr ? nullptr : args.call,
+ nullptr);
+ }
+
+ // Prepares arguments and creates an UntypedFunction in one go.
+ template <typename Signature, typename F>
+ static UntypedFunction Create(F&& f) {
+ return Create(PrepareArgs<Signature>(std::forward<F>(f)));
}
// Default constructor. Creates an empty UntypedFunction.