Reland "Activating AVX2 support by default"

This is a reland of ad148272b89394978915cb00e1c1be552d908a42

Original change's description:
> Activating AVX2 support by default
>
> This CL activates the newly added AVX2 support by default.
> The activation is done beneath a kill-switch.
>
> Beyond the above, the CL also changes an incorrect DCHECK_GT
> to a DCHECK_GE.
>
> Bug: webrtc:11663
> Change-Id: I231ccb2f5efabf74cd8190411daa954b2b94a2a0
> Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/183042
> Commit-Queue: Per Åhgren <peah@webrtc.org>
> Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
> Reviewed-by: Sam Zackrisson <saza@webrtc.org>
> Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#32193}

Bug: webrtc:11663
Change-Id: Ib41dc1d1c5865f2828699c462939d15d5562df47
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/186262
Reviewed-by: Karl Wiberg <kwiberg@webrtc.org>
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Reviewed-by: Sam Zackrisson <saza@webrtc.org>
Commit-Queue: Per Åhgren <peah@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32270}
diff --git a/common_audio/fir_filter_avx2.cc b/common_audio/fir_filter_avx2.cc
index f9b31f8..26468e2 100644
--- a/common_audio/fir_filter_avx2.cc
+++ b/common_audio/fir_filter_avx2.cc
@@ -32,7 +32,7 @@
           AlignedMalloc(sizeof(float) * (max_input_length + state_length_),
                         32))) {
   // Add zeros at the end of the coefficients.
-  RTC_DCHECK_GT(coefficients_length_, unaligned_coefficients_length);
+  RTC_DCHECK_GE(coefficients_length_, unaligned_coefficients_length);
   size_t padding = coefficients_length_ - unaligned_coefficients_length;
   memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0]));
   // The coefficients are reversed to compensate for the order in which the
diff --git a/modules/BUILD.gn b/modules/BUILD.gn
index b780bb3c..bb6b7cc 100644
--- a/modules/BUILD.gn
+++ b/modules/BUILD.gn
@@ -121,6 +121,7 @@
     "../resources/audio_processing/agc/agc_with_circular_buffer.dat",
     "../resources/audio_processing/output_data_fixed.pb",
     "../resources/audio_processing/output_data_float.pb",
+    "../resources/audio_processing/output_data_float_avx2.pb",
     "../resources/audio_processing/output_data_mac.pb",
     "../resources/audio_processing/transient/ajm-macbook-1-spke16m.pcm",
     "../resources/audio_processing/transient/audio16kHz.pcm",
diff --git a/modules/audio_coding/acm2/audio_coding_module_unittest.cc b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
index efd7b04..4e7493e 100644
--- a/modules/audio_coding/acm2/audio_coding_module_unittest.cc
+++ b/modules/audio_coding/acm2/audio_coding_module_unittest.cc
@@ -48,6 +48,7 @@
 #include "rtc_base/system/arch.h"
 #include "rtc_base/thread_annotations.h"
 #include "system_wrappers/include/clock.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
 #include "system_wrappers/include/sleep.h"
 #include "test/audio_decoder_proxy_factory.h"
 #include "test/gtest.h"
@@ -937,35 +938,59 @@
 #if (defined(WEBRTC_CODEC_ISAC) || defined(WEBRTC_CODEC_ISACFX)) && \
     defined(WEBRTC_CODEC_ILBC)
 TEST_F(AcmReceiverBitExactnessOldApi, 8kHzOutput) {
-  Run(8000, PlatformChecksum("6c204b289486b0695b08a9e94fab1948",
-                             "ff5ffee2ee92f8fe61d9f2010b8a68a3",
-                             "53494a96f3db4a5b07d723e0cbac0ad7",
-                             "4598140b5e4f7ee66c5adad609e65a3e",
-                             "516c2859126ea4913f30d51af4a4f3dc"));
+  std::string others_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "6edbfe69b965a8687b8744ed1b8eb5a7"
+                             : "6c204b289486b0695b08a9e94fab1948";
+  std::string win64_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "405a50f0bcb8827e20aa944299fc59f6"
+                             : "ff5ffee2ee92f8fe61d9f2010b8a68a3";
+  Run(8000,
+      PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+                       "53494a96f3db4a5b07d723e0cbac0ad7",
+                       "4598140b5e4f7ee66c5adad609e65a3e",
+                       "516c2859126ea4913f30d51af4a4f3dc"));
 }
 
 TEST_F(AcmReceiverBitExactnessOldApi, 16kHzOutput) {
-  Run(16000, PlatformChecksum("226dbdbce2354399c6df05371042cda3",
-                              "9c80bf5ec496c41ce8112e1523bf8c83",
-                              "11a6f170fdaffa81a2948af121f370af",
-                              "f2aad418af974a3b1694d5ae5cc2c3c7",
-                              "6133301a18be95c416984182816d859f"));
+  std::string others_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "295f031e051f1770b4ab4107dba768b5"
+                             : "226dbdbce2354399c6df05371042cda3";
+  std::string win64_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "58fd62a5c49ee513f9fa6fe7dbf62c97"
+                             : "9c80bf5ec496c41ce8112e1523bf8c83";
+  Run(16000,
+      PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+                       "11a6f170fdaffa81a2948af121f370af",
+                       "f2aad418af974a3b1694d5ae5cc2c3c7",
+                       "6133301a18be95c416984182816d859f"));
 }
 
 TEST_F(AcmReceiverBitExactnessOldApi, 32kHzOutput) {
-  Run(32000, PlatformChecksum("f94665cc0e904d5d5cf0394e30ee4edd",
-                              "697934bcf0849f80d76ce20854161220",
-                              "3609aa5288c1d512e8e652ceabecb495",
-                              "100869c8dcde51346c2073e52a272d98",
-                              "55363bc9cdda6464a58044919157827b"));
+  std::string others_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "2895e5ab3146eaa78fa6843ed60e7e37"
+                             : "f94665cc0e904d5d5cf0394e30ee4edd";
+  std::string win64_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "04ce6a1dac5ffdd8438d804623d0132f"
+                             : "697934bcf0849f80d76ce20854161220";
+  Run(32000,
+      PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+                       "3609aa5288c1d512e8e652ceabecb495",
+                       "100869c8dcde51346c2073e52a272d98",
+                       "55363bc9cdda6464a58044919157827b"));
 }
 
 TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutput) {
-  Run(48000, PlatformChecksum("2955d0b83602541fd92d9b820ebce68d",
-                              "f4a8386a6a49439ced60ed9a7c7f75fd",
-                              "d8169dfeba708b5212bdc365e08aee9d",
-                              "bd44bf97e7899186532f91235cef444d",
-                              "47594deaab5d9166cfbf577203b2563e"));
+  std::string others_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "640bca210e1b8dd229224d2a0c79ff1f"
+                             : "2955d0b83602541fd92d9b820ebce68d";
+  std::string win64_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "f59833d9b0924f4b0704707dd3589f80"
+                             : "f4a8386a6a49439ced60ed9a7c7f75fd";
+  Run(48000,
+      PlatformChecksum(others_checksum_reference, win64_checksum_reference,
+                       "d8169dfeba708b5212bdc365e08aee9d",
+                       "bd44bf97e7899186532f91235cef444d",
+                       "47594deaab5d9166cfbf577203b2563e"));
 }
 
 TEST_F(AcmReceiverBitExactnessOldApi, 48kHzOutputExternalDecoder) {
@@ -1043,9 +1068,14 @@
 
   rtc::scoped_refptr<rtc::RefCountedObject<ADFactory>> factory(
       new rtc::RefCountedObject<ADFactory>);
+  std::string others_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "640bca210e1b8dd229224d2a0c79ff1f"
+                             : "2955d0b83602541fd92d9b820ebce68d";
+  std::string win64_checksum_reference =
+      GetCPUInfo(kAVX2) != 0 ? "f59833d9b0924f4b0704707dd3589f80"
+                             : "f4a8386a6a49439ced60ed9a7c7f75fd";
   Run(48000,
-      PlatformChecksum("2955d0b83602541fd92d9b820ebce68d",
-                       "f4a8386a6a49439ced60ed9a7c7f75fd",
+      PlatformChecksum(others_checksum_reference, win64_checksum_reference,
                        "d8169dfeba708b5212bdc365e08aee9d",
                        "bd44bf97e7899186532f91235cef444d",
                        "47594deaab5d9166cfbf577203b2563e"),
diff --git a/modules/audio_processing/audio_processing_unittest.cc b/modules/audio_processing/audio_processing_unittest.cc
index 93ddc97..bd18d4d 100644
--- a/modules/audio_processing/audio_processing_unittest.cc
+++ b/modules/audio_processing/audio_processing_unittest.cc
@@ -45,6 +45,7 @@
 #include "rtc_base/system/arch.h"
 #include "rtc_base/task_queue_for_test.h"
 #include "rtc_base/thread.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
 #include "test/gtest.h"
 #include "test/testsupport/file_utils.h"
 
@@ -348,6 +349,19 @@
   return true;
 }
 
+// Returns the reference file name that matches the current CPU
+// architecture/optimizations.
+std::string GetReferenceFilename() {
+#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
+  return test::ResourcePath("audio_processing/output_data_fixed", "pb");
+#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
+  if (GetCPUInfo(kAVX2) != 0) {
+    return test::ResourcePath("audio_processing/output_data_float_avx2", "pb");
+  }
+  return test::ResourcePath("audio_processing/output_data_float", "pb");
+#endif
+}
+
 class ApmTest : public ::testing::Test {
  protected:
   ApmTest();
@@ -415,13 +429,7 @@
 
 ApmTest::ApmTest()
     : output_path_(test::OutputPath()),
-#if defined(WEBRTC_AUDIOPROC_FIXED_PROFILE)
-      ref_filename_(
-          test::ResourcePath("audio_processing/output_data_fixed", "pb")),
-#elif defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
-      ref_filename_(
-          test::ResourcePath("audio_processing/output_data_float", "pb")),
-#endif
+      ref_filename_(GetReferenceFilename()),
       output_sample_rate_hz_(0),
       num_output_channels_(0),
       far_file_(NULL),
@@ -1775,7 +1783,7 @@
                   max_output_average - kMaxOutputAverageOffset,
                   kMaxOutputAverageNear);
 #if defined(WEBRTC_AUDIOPROC_FLOAT_PROFILE)
-      const double kFloatNear = 0.0005;
+      const double kFloatNear = 0.002;
       EXPECT_NEAR(test->rms_dbfs_average(), rms_dbfs_average, kFloatNear);
 #endif
     } else {
diff --git a/resources/audio_processing/output_data_float_avx2.pb.sha1 b/resources/audio_processing/output_data_float_avx2.pb.sha1
new file mode 100644
index 0000000..539623e
--- /dev/null
+++ b/resources/audio_processing/output_data_float_avx2.pb.sha1
@@ -0,0 +1 @@
+514543fbee78d0a71e87adb92e23138d762d1da8
\ No newline at end of file
diff --git a/system_wrappers/BUILD.gn b/system_wrappers/BUILD.gn
index af9aa65..b446648 100644
--- a/system_wrappers/BUILD.gn
+++ b/system_wrappers/BUILD.gn
@@ -31,6 +31,7 @@
   defines = []
   libs = []
   deps = [
+    ":field_trial",
     "../api:array_view",
     "../api/units:timestamp",
     "../modules:module_api_public",
diff --git a/system_wrappers/source/cpu_features.cc b/system_wrappers/source/cpu_features.cc
index e40c65a..0f81212 100644
--- a/system_wrappers/source/cpu_features.cc
+++ b/system_wrappers/source/cpu_features.cc
@@ -12,6 +12,7 @@
 
 #include "rtc_base/system/arch.h"
 #include "system_wrappers/include/cpu_features_wrapper.h"
+#include "system_wrappers/include/field_trial.h"
 
 #if defined(WEBRTC_ARCH_X86_FAMILY) && defined(_MSC_VER)
 #include <intrin.h>
@@ -77,7 +78,8 @@
     return 0 != (cpu_info[2] & 0x00000001);
   }
 #if defined(WEBRTC_ENABLE_AVX2)
-  if (feature == kAVX2) {
+  if (feature == kAVX2 &&
+      !webrtc::field_trial::IsEnabled("WebRTC-Avx2SupportKillSwitch")) {
     int cpu_info7[4];
     __cpuid(cpu_info7, 0);
     int num_ids = cpu_info7[0];
diff --git a/webrtc.gni b/webrtc.gni
index ba93242..ca8acdb 100644
--- a/webrtc.gni
+++ b/webrtc.gni
@@ -242,8 +242,12 @@
   rtc_include_internal_audio_device = !build_with_chromium
 
   # Set this to true to enable the avx2 support in webrtc.
-  # TODO(bugs.webrtc.org/11663): Default this to true and eventually remove.
-  rtc_enable_avx2 = false
+  # TODO: Make sure that AVX2 works also for non-clang compilers.
+  if (is_clang == true) {
+    rtc_enable_avx2 = true
+  } else {
+    rtc_enable_avx2 = false
+  }
 
   # Include tests in standalone checkout.
   rtc_include_tests = !build_with_chromium && !build_with_mozilla