Reland "Activating AVX2 support by default"
This is a reland of ad148272b89394978915cb00e1c1be552d908a42
Original change's description:
> Activating AVX2 support by default
>
> This CL activates the newly added AVX2 support by default.
> The activation is done beneath a kill-switch.
>
> Beyond the above, the CL also changes an incorrect DCHECK_GT
> to a DCHECK_GE.
>
> Bug: webrtc:11663
> Change-Id: I231ccb2f5efabf74cd8190411daa954b2b94a2a0
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/183042
> Commit-Queue: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#32193}
Bug: webrtc:11663
Change-Id: Ib41dc1d1c5865f2828699c462939d15d5562df47
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186262
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32270}
diff --git a/common_audio/fir_filter_avx2.cc b/common_audio/fir_filter_avx2.cc
index f9b31f8..26468e2 100644
--- a/common_audio/fir_filter_avx2.cc
+++ b/common_audio/fir_filter_avx2.cc
@@ -32,7 +32,7 @@
AlignedMalloc(sizeof(float) * (max_input_length + state_length_),
32))) {
// Add zeros at the end of the coefficients.
- RTC_DCHECK_GT(coefficients_length_, unaligned_coefficients_length);
+ RTC_DCHECK_GE(coefficients_length_, unaligned_coefficients_length);
size_t padding = coefficients_length_ - unaligned_coefficients_length;
memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0]));
// The coefficients are reversed to compensate for the order in which the
diff --git a/modules/BUILD.gn b/modules/BUILD.gn
index b780bb3c..bb6b7cc 100644
--- a/modules/BUILD.gn
+++ b/modules/BUILD.gn
@@ -121,6 +121,7 @@
"../resources/audio_processing/agc/agc_with_circular_buffer.dat",
"../resources/audio_processing/output_data_fixed.pb",
"../resources/audio_processing/output_data_float.pb",
+ "../resources/audio_processing/output_data_float_avx2.pb",
"../resources/audio_processing/output_data_mac.pb",
"../resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm",
"../resources/audio_processing/transient/audio16kHz.pcm",
diff --git a/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
index efd7b04..4e7493e 100644
--- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@@ -48,6 +48,7 @@
#include "rtc_base/system/arch.h"
#include "rtc_base/thread_annotations.h"
#include "system_wrappers/include/clock.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
#include "system_wrappers/include/sleep.h"
#include "test/audio_decoder_proxy_factory.h"
#include "test/gtest.h"
@@ -937,35 +938,59 @@
#if (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \
defined(WEBRTC_CODEC_ILBC)
TEST_F(AcmReceiverBitExactnessOldApi, 8kHzOutput) {
- Run(8000, PlatformChecksum("6c204b289486b0695b08a9e94fab1948",
- "ff5ffee2ee92f8fe61d9f2010b8a68a3",
- "53494a96f3db4a5b07d723e0cbac0ad7",
- "4598140b5e4f7ee66c5adad609e65a3e",
- "516c2859126ea4913f30d51af4a4f3dc"));
+ std::string others_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "6edbfe69b965a8687b8744ed1b8eb5a7"
+ : "6c204b289486b0695b08a9e94fab1948";
+ std::string win64_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "405a50f0bcb8827e20aa944299fc59f6"
+ : "ff5ffee2ee92f8fe61d9f2010b8a68a3";
+ Run(8000,
+ PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+ "53494a96f3db4a5b07d723e0cbac0ad7",
+ "4598140b5e4f7ee66c5adad609e65a3e",
+ "516c2859126ea4913f30d51af4a4f3dc"));
}
TEST_F(AcmReceiverBitExactnessOldApi, 16kHzOutput) {
- Run(16000, PlatformChecksum("226dbdbce2354399c6df05371042cda3",
- "9c80bf5ec496c41ce8112e1523bf8c83",
- "11a6f170fdaffa81a2948af121f370af",
- "f2aad418af974a3b1694d5ae5cc2c3c7",
- "6133301a18be95c416984182816d859f"));
+ std::string others_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "295f031e051f1770b4ab4107dba768b5"
+ : "226dbdbce2354399c6df05371042cda3";
+ std::string win64_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "58fd62a5c49ee513f9fa6fe7dbf62c97"
+ : "9c80bf5ec496c41ce8112e1523bf8c83";
+ Run(16000,
+ PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+ "11a6f170fdaffa81a2948af121f370af",
+ "f2aad418af974a3b1694d5ae5cc2c3c7",
+ "6133301a18be95c416984182816d859f"));
}
TEST_F(AcmReceiverBitExactnessOldApi, 32kHzOutput) {
- Run(32000, PlatformChecksum("f94665cc0e904d5d5cf0394e30ee4edd",
- "697934bcf0849f80d76ce20854161220",
- "3609aa5288c1d512e8e652ceabecb495",
- "100869c8dcde51346c2073e52a272d98",
- "55363bc9cdda6464a58044919157827b"));
+ std::string others_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "2895e5ab3146eaa78fa6843ed60e7e37"
+ : "f94665cc0e904d5d5cf0394e30ee4edd";
+ std::string win64_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "04ce6a1dac5ffdd8438d804623d0132f"
+ : "697934bcf0849f80d76ce20854161220";
+ Run(32000,
+ PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+ "3609aa5288c1d512e8e652ceabecb495",
+ "100869c8dcde51346c2073e52a272d98",
+ "55363bc9cdda6464a58044919157827b"));
}
TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutput) {
- Run(48000, PlatformChecksum("2955d0b83602541fd92d9b820ebce68d",
- "f4a8386a6a49439ced60ed9a7c7f75fd",
- "d8169dfeba708b5212bdc365e08aee9d",
- "bd44bf97e7899186532f91235cef444d",
- "47594deaab5d9166cfbf577203b2563e"));
+ std::string others_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "640bca210e1b8dd229224d2a0c79ff1f"
+ : "2955d0b83602541fd92d9b820ebce68d";
+ std::string win64_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "f59833d9b0924f4b0704707dd3589f80"
+ : "f4a8386a6a49439ced60ed9a7c7f75fd";
+ Run(48000,
+ PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+ "d8169dfeba708b5212bdc365e08aee9d",
+ "bd44bf97e7899186532f91235cef444d",
+ "47594deaab5d9166cfbf577203b2563e"));
}
TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutputExternalDecoder) {
@@ -1043,9 +1068,14 @@
rtc::scoped_refptr<rtc::RefCountedObject<ADFactory>> factory(
new rtc::RefCountedObject<ADFactory>);
+ std::string others_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "640bca210e1b8dd229224d2a0c79ff1f"
+ : "2955d0b83602541fd92d9b820ebce68d";
+ std::string win64_checksum_reference =
+ GetCPUInfo(kAVX2) != 0 ? "f59833d9b0924f4b0704707dd3589f80"
+ : "f4a8386a6a49439ced60ed9a7c7f75fd";
Run(48000,
- PlatformChecksum("2955d0b83602541fd92d9b820ebce68d",
- "f4a8386a6a49439ced60ed9a7c7f75fd",
+ PlatformChecksum(others_checksum_reference, win64_checksum_reference,
"d8169dfeba708b5212bdc365e08aee9d",
"bd44bf97e7899186532f91235cef444d",
"47594deaab5d9166cfbf577203b2563e"),
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 93ddc97..bd18d4d 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -45,6 +45,7 @@
#include "rtc_base/system/arch.h"
#include "rtc_base/task_queue_for_test.h"
#include "rtc_base/thread.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
#include "test/gtest.h"
#include "test/testsupport/file_utils.h"
@@ -348,6 +349,19 @@
return true;
}
+// Returns the reference file name that matches the current CPU
+// architecture/optimizations.
+std::string GetReferenceFilename() {
+#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
+ return test::ResourcePath("audio_processing/output_data_fixed", "pb");
+#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
+ if (GetCPUInfo(kAVX2) != 0) {
+ return test::ResourcePath("audio_processing/output_data_float_avx2", "pb");
+ }
+ return test::ResourcePath("audio_processing/output_data_float", "pb");
+#endif
+}
+
class ApmTest : public ::testing::Test {
protected:
ApmTest();
@@ -415,13 +429,7 @@
ApmTest::ApmTest()
: output_path_(test::OutputPath()),
-#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
- ref_filename_(
- test::ResourcePath("audio_processing/output_data_fixed", "pb")),
-#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
- ref_filename_(
- test::ResourcePath("audio_processing/output_data_float", "pb")),
-#endif
+ ref_filename_(GetReferenceFilename()),
output_sample_rate_hz_(0),
num_output_channels_(0),
far_file_(NULL),
@@ -1775,7 +1783,7 @@
max_output_average - kMaxOutputAverageOffset,
kMaxOutputAverageNear);
#if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
- const double kFloatNear = 0.0005;
+ const double kFloatNear = 0.002;
EXPECT_NEAR(test->rms_dbfs_average(), rms_dbfs_average, kFloatNear);
#endif
} else {
diff --git a/resources/audio_processing/output_data_float_avx2.pb.sha1 b/resources/audio_processing/output_data_float_avx2.pb.sha1
new file mode 100644
index 0000000..539623e
--- /dev/null
+++ b/resources/audio_processing/output_data_float_avx2.pb.sha1
@@ -0,0 +1 @@
+514543fbee78d0a71e87adb92e23138d762d1da8
\ No newline at end of file
diff --git a/system_wrappers/BUILD.gn b/system_wrappers/BUILD.gn
index af9aa65..b446648 100644
--- a/system_wrappers/BUILD.gn
+++ b/system_wrappers/BUILD.gn
@@ -31,6 +31,7 @@
defines = []
libs = []
deps = [
+ ":field_trial",
"../api:array_view",
"../api/units:timestamp",
"../modules:module_api_public",
diff --git a/system_wrappers/source/cpu_features.cc b/system_wrappers/source/cpu_features.cc
index e40c65a..0f81212 100644
--- a/system_wrappers/source/cpu_features.cc
+++ b/system_wrappers/source/cpu_features.cc
@@ -12,6 +12,7 @@
#include "rtc_base/system/arch.h"
#include "system_wrappers/include/cpu_features_wrapper.h"
+#include "system_wrappers/include/field_trial.h"
#if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER)
#include <intrin.h>
@@ -77,7 +78,8 @@
return 0 != (cpu_info[2] & 0x00000001);
}
#if defined(WEBRTC_ENABLE_AVX2)
- if (feature == kAVX2) {
+ if (feature == kAVX2 &&
+ !webrtc::field_trial::IsEnabled("WebRTC-Avx2SupportKillSwitch")) {
int cpu_info7[4];
__cpuid(cpu_info7, 0);
int num_ids = cpu_info7[0];
diff --git a/webrtc.gni b/webrtc.gni
index ba93242..ca8acdb 100644
--- a/webrtc.gni
+++ b/webrtc.gni
@@ -242,8 +242,12 @@
rtc_include_internal_audio_device = !build_with_chromium
# Set this to true to enable the avx2 support in webrtc.
- # TODO(bugs.webrtc.org/11663): Default this to true and eventually remove.
- rtc_enable_avx2 = false
+ # TODO: Make sure that AVX2 works also for non-clang compilers.
+ if (is_clang == true) {
+ rtc_enable_avx2 = true
+ } else {
+ rtc_enable_avx2 = false
+ }
# Include tests in standalone checkout.
rtc_include_tests = !build_with_chromium && !build_with_mozilla