Revert "Fix getStats() freeze bug affecting Chromium but not WebRTC standalone."
This reverts commit 05d43c6f7fe497fed0f2c8714e2042dd07a86df2.
Reason for revert: It breaks some Chromium trybots:
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/linux_chromium_asan_rel_ng/206387
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/linux_chromium_tsan_rel_ng/207737
https://ci.chromium.org/p/chromium/builders/luci.chromium.try/win10_chromium_x64_rel_ng/202283
Original change's description:
> Fix getStats() freeze bug affecting Chromium but not WebRTC standalone.
>
> PeerConnection::Close() is, per-spec, a blocking operation.
> Unfortunately, PeerConnection is implemented to own resources used by
> the network thread, and Close() - on the signaling thread - destroys
> these resources. As such, tasks run in parallel like getStats() get into
> race conditions with Close() unless synchronized. The mechanism in-place
> is RTCStatsCollector::WaitForPendingRequest(), it waits until the
> network thread is done with the in-parallel stats request.
>
> Prior to this CL, this was implemented by performing
> rtc::Thread::ProcessMessages() in a loop until the network thread had
> posted a task on the signaling thread to say that it was done which
> would then get processed by ProcessMessages(). In WebRTC this works, and
> the test is RTCStatsIntegrationTest.GetsStatsWhileClosingPeerConnection.
>
> But because Chromium's thread wrapper does no support
> ProcessMessages(), calling getStats() followed by close() in Chrome
> resulted in waiting forever (https://crbug.com/850907).
>
> In this CL, the process messages loop is removed. Instead, the shared
> resources are guarded by an rtc::Event. WaitForPendingRequest() still
> blocks the signaling thread, but only while shared resources are in use
> by the network thread. After this CL, calling WaitForPendingRequest() no
> longer has any unexpected side-effects since it no longer processes
> other messages that might have been posted on the thread.
>
> The resource ownership and threading model of WebRTC deserves to be
> revisited, but this fixes a common Chromium crash without redesigning
> PeerConnection, in a way that does not cause more blocking than what
> the other PeerConnection methods are already doing.
>
> Note: An alternative to using rtc::Event is to use resource locks and
> to not perform the stats collection on the network thread if the
> request was cancelled before the start of processing, but this has very
> little benefit in terms of performance: once the network thread starts
> collecting the stats, it would use the lock until collection is
> completed, blocking the signaling thread trying to acquire that lock
> anyway. This defeats the purpose and is a riskier change, since
> cancelling partial collection in this inherently racy edge-case would
> have observable differences from the returned stats, which may cause
> more regressions.
>
> Bug: chromium:850907
> Change-Id: Idceeee0bddc0c9d5518b58a2b263abb2bbf47cff
> Reviewed-on: https://webrtc-review.googlesource.com/c/121567
> Commit-Queue: Henrik Boström <hbos@webrtc.org>
> Reviewed-by: Steve Anton <steveanton@webrtc.org>
> Cr-Commit-Position: refs/heads/master@{#26707}
TBR=steveanton@webrtc.org,hbos@webrtc.org
Change-Id: Icd82cdd5bd086a90999f7fd5f8616e1f2d2153bf
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: chromium:850907
Reviewed-on: https://webrtc-review.googlesource.com/c/123225
Reviewed-by: Mirko Bonadei <mbonadei@webrtc.org>
Commit-Queue: Mirko Bonadei <mbonadei@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26721}
diff --git a/pc/BUILD.gn b/pc/BUILD.gn
index 75ee47f..fab9632 100644
--- a/pc/BUILD.gn
+++ b/pc/BUILD.gn
@@ -217,7 +217,6 @@
"../rtc_base:checks",
"../rtc_base:rtc_base",
"../rtc_base:rtc_base_approved",
- "../rtc_base:rtc_post_message_with_functor",
"../rtc_base/system:rtc_export",
"../rtc_base/third_party/base64",
"../rtc_base/third_party/sigslot",
diff --git a/pc/rtc_stats_collector.cc b/pc/rtc_stats_collector.cc
index 3287b92..a25a785 100644
--- a/pc/rtc_stats_collector.cc
+++ b/pc/rtc_stats_collector.cc
@@ -25,7 +25,6 @@
#include "pc/peer_connection.h"
#include "pc/rtc_stats_traversal.h"
#include "rtc_base/checks.h"
-#include "rtc_base/post_message_with_functor.h"
#include "rtc_base/strings/string_builder.h"
#include "rtc_base/time_utils.h"
#include "rtc_base/trace_event.h"
@@ -754,8 +753,6 @@
network_thread_(pc->network_thread()),
num_pending_partial_reports_(0),
partial_report_timestamp_us_(0),
- network_report_event_(true /* manual_reset */,
- true /* initially_signaled */),
cache_timestamp_us_(0),
cache_lifetime_us_(cache_lifetime_us) {
RTC_DCHECK(pc_);
@@ -802,7 +799,7 @@
// reentrancy problems.
std::vector<RequestInfo> requests;
requests.swap(requests_);
- rtc::PostMessageWithFunctor(
+ invoker_.AsyncInvoke<void>(
RTC_FROM_HERE, signaling_thread_,
rtc::Bind(&RTCStatsCollector::DeliverCachedReport, this, cached_report_,
std::move(requests)));
@@ -833,14 +830,10 @@
// network thread, where it more naturally belongs.
call_stats_ = pc_->GetCallStats();
- // Don't touch |network_report_| on the signaling thread until
- // ProducePartialResultsOnNetworkThread() has signaled the
- // |network_report_event_|.
- network_report_event_.Reset();
- rtc::PostMessageWithFunctor(
+ invoker_.AsyncInvoke<void>(
RTC_FROM_HERE, network_thread_,
rtc::Bind(&RTCStatsCollector::ProducePartialResultsOnNetworkThread,
- this, timestamp_us));
+ rtc::scoped_refptr<RTCStatsCollector>(this), timestamp_us));
ProducePartialResultsOnSignalingThread(timestamp_us);
}
}
@@ -852,117 +845,89 @@
void RTCStatsCollector::WaitForPendingRequest() {
RTC_DCHECK(signaling_thread_->IsCurrent());
- // If a request is pending, blocks until the |network_report_event_| is
- // signaled and then delivers the result. Otherwise this is a NO-OP.
- MergeNetworkReport_s();
+ if (num_pending_partial_reports_) {
+ rtc::Thread::Current()->ProcessMessages(0);
+ while (num_pending_partial_reports_) {
+ rtc::Thread::Current()->SleepMs(1);
+ rtc::Thread::Current()->ProcessMessages(0);
+ }
+ }
}
void RTCStatsCollector::ProducePartialResultsOnSignalingThread(
int64_t timestamp_us) {
RTC_DCHECK(signaling_thread_->IsCurrent());
- partial_report_ = RTCStatsReport::Create(timestamp_us);
+ rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
+ timestamp_us);
- ProducePartialResultsOnSignalingThreadImpl(timestamp_us,
- partial_report_.get());
+ ProduceDataChannelStats_s(timestamp_us, report.get());
+ ProduceMediaStreamStats_s(timestamp_us, report.get());
+ ProduceMediaStreamTrackStats_s(timestamp_us, report.get());
+ ProducePeerConnectionStats_s(timestamp_us, report.get());
- // ProducePartialResultsOnSignalingThread() is running synchronously on the
- // signaling thread, so it is always the first partial result delivered on the
- // signaling thread. The request is not complete until MergeNetworkReport_s()
- // happens; we don't have to do anything here.
- RTC_DCHECK_GT(num_pending_partial_reports_, 1);
- --num_pending_partial_reports_;
-}
-
-void RTCStatsCollector::ProducePartialResultsOnSignalingThreadImpl(
- int64_t timestamp_us,
- RTCStatsReport* partial_report) {
- RTC_DCHECK(signaling_thread_->IsCurrent());
- ProduceDataChannelStats_s(timestamp_us, partial_report);
- ProduceMediaStreamStats_s(timestamp_us, partial_report);
- ProduceMediaStreamTrackStats_s(timestamp_us, partial_report);
- ProducePeerConnectionStats_s(timestamp_us, partial_report);
+ AddPartialResults(report);
}
void RTCStatsCollector::ProducePartialResultsOnNetworkThread(
int64_t timestamp_us) {
RTC_DCHECK(network_thread_->IsCurrent());
- // Touching |network_report_| on this thread is safe by this method because
- // |network_report_event_| is reset before this method is invoked.
- network_report_ = RTCStatsReport::Create(timestamp_us);
+ rtc::scoped_refptr<RTCStatsReport> report = RTCStatsReport::Create(
+ timestamp_us);
std::map<std::string, cricket::TransportStats> transport_stats_by_name =
pc_->GetTransportStatsByNames(transport_names_);
+
std::map<std::string, CertificateStatsPair> transport_cert_stats =
PrepareTransportCertificateStats_n(transport_stats_by_name);
- ProducePartialResultsOnNetworkThreadImpl(
- timestamp_us, transport_stats_by_name, transport_cert_stats,
- network_report_.get());
-
- // Signal that it is now safe to touch |network_report_| on the signaling
- // thread, and post a task to merge it into the final results.
- network_report_event_.Set();
- rtc::PostMessageWithFunctor(
- RTC_FROM_HERE, signaling_thread_,
- rtc::Bind(&RTCStatsCollector::MergeNetworkReport_s, this));
-}
-
-void RTCStatsCollector::ProducePartialResultsOnNetworkThreadImpl(
- int64_t timestamp_us,
- const std::map<std::string, cricket::TransportStats>&
- transport_stats_by_name,
- const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
- RTCStatsReport* partial_report) {
- RTC_DCHECK(network_thread_->IsCurrent());
- ProduceCertificateStats_n(timestamp_us, transport_cert_stats, partial_report);
- ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, partial_report);
+ ProduceCertificateStats_n(timestamp_us, transport_cert_stats, report.get());
+ ProduceCodecStats_n(timestamp_us, transceiver_stats_infos_, report.get());
ProduceIceCandidateAndPairStats_n(timestamp_us, transport_stats_by_name,
- call_stats_, partial_report);
- ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_,
- partial_report);
+ call_stats_, report.get());
+ ProduceRTPStreamStats_n(timestamp_us, transceiver_stats_infos_, report.get());
ProduceTransportStats_n(timestamp_us, transport_stats_by_name,
- transport_cert_stats, partial_report);
+ transport_cert_stats, report.get());
+
+ AddPartialResults(report);
}
-void RTCStatsCollector::MergeNetworkReport_s() {
- RTC_DCHECK(signaling_thread_->IsCurrent());
- // The |network_report_event_| must be signaled for it to be safe to touch
- // |network_report_|. This is normally not blocking, but if
- // WaitForPendingRequest() is called while a request is pending, we might have
- // to wait until the network thread is done touching |network_report_|.
- network_report_event_.Wait(rtc::Event::kForever);
- if (!network_report_) {
- // Normally, MergeNetworkReport_s() is executed because it is posted from
- // the network thread. But if WaitForPendingRequest() is called while a
- // request is pending, an early call to MergeNetworkReport_s() is made,
- // merging the report and setting |network_report_| to null. If so, when the
- // previously posted MergeNetworkReport_s() is later executed, the report is
- // already null and nothing needs to be done here.
+void RTCStatsCollector::AddPartialResults(
+ const rtc::scoped_refptr<RTCStatsReport>& partial_report) {
+ if (!signaling_thread_->IsCurrent()) {
+ invoker_.AsyncInvoke<void>(RTC_FROM_HERE, signaling_thread_,
+ rtc::Bind(&RTCStatsCollector::AddPartialResults_s,
+ rtc::scoped_refptr<RTCStatsCollector>(this),
+ partial_report));
return;
}
- RTC_DCHECK_GT(num_pending_partial_reports_, 0);
- RTC_DCHECK(partial_report_);
- partial_report_->TakeMembersFrom(network_report_);
- network_report_ = nullptr;
- --num_pending_partial_reports_;
- // |network_report_| is currently the only partial report collected
- // asynchronously, so |num_pending_partial_reports_| must now be 0 and we are
- // ready to deliver the result.
- RTC_DCHECK_EQ(num_pending_partial_reports_, 0);
- cache_timestamp_us_ = partial_report_timestamp_us_;
- cached_report_ = partial_report_;
- partial_report_ = nullptr;
- transceiver_stats_infos_.clear();
- // Trace WebRTC Stats when getStats is called on Javascript.
- // This allows access to WebRTC stats from trace logs. To enable them,
- // select the "webrtc_stats" category when recording traces.
- TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
- cached_report_->ToJson());
+ AddPartialResults_s(partial_report);
+}
- // Deliver report and clear |requests_|.
- std::vector<RequestInfo> requests;
- requests.swap(requests_);
- DeliverCachedReport(cached_report_, std::move(requests));
+void RTCStatsCollector::AddPartialResults_s(
+ rtc::scoped_refptr<RTCStatsReport> partial_report) {
+ RTC_DCHECK(signaling_thread_->IsCurrent());
+ RTC_DCHECK_GT(num_pending_partial_reports_, 0);
+ if (!partial_report_)
+ partial_report_ = partial_report;
+ else
+ partial_report_->TakeMembersFrom(partial_report);
+ --num_pending_partial_reports_;
+ if (!num_pending_partial_reports_) {
+ cache_timestamp_us_ = partial_report_timestamp_us_;
+ cached_report_ = partial_report_;
+ partial_report_ = nullptr;
+ transceiver_stats_infos_.clear();
+ // Trace WebRTC Stats when getStats is called on Javascript.
+ // This allows access to WebRTC stats from trace logs. To enable them,
+ // select the "webrtc_stats" category when recording traces.
+ TRACE_EVENT_INSTANT1("webrtc_stats", "webrtc_stats", "report",
+ cached_report_->ToJson());
+
+ // Deliver report and clear |requests_|.
+ std::vector<RequestInfo> requests;
+ requests.swap(requests_);
+ DeliverCachedReport(cached_report_, std::move(requests));
+ }
}
void RTCStatsCollector::DeliverCachedReport(
diff --git a/pc/rtc_stats_collector.h b/pc/rtc_stats_collector.h
index 4837fc0..a3f1593 100644
--- a/pc/rtc_stats_collector.h
+++ b/pc/rtc_stats_collector.h
@@ -27,7 +27,7 @@
#include "pc/data_channel.h"
#include "pc/peer_connection_internal.h"
#include "pc/track_media_info_map.h"
-#include "rtc_base/event.h"
+#include "rtc_base/async_invoker.h"
#include "rtc_base/ref_count.h"
#include "rtc_base/ssl_identity.h"
#include "rtc_base/third_party/sigslot/sigslot.h"
@@ -77,21 +77,14 @@
RTCStatsCollector(PeerConnectionInternal* pc, int64_t cache_lifetime_us);
~RTCStatsCollector();
- struct CertificateStatsPair {
- std::unique_ptr<rtc::SSLCertificateStats> local;
- std::unique_ptr<rtc::SSLCertificateStats> remote;
- };
+ // Stats gathering on a particular thread. Calls |AddPartialResults| before
+ // returning. Virtual for the sake of testing.
+ virtual void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
+ virtual void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
- // Stats gathering on a particular thread. Virtual for the sake of testing.
- virtual void ProducePartialResultsOnSignalingThreadImpl(
- int64_t timestamp_us,
- RTCStatsReport* partial_report);
- virtual void ProducePartialResultsOnNetworkThreadImpl(
- int64_t timestamp_us,
- const std::map<std::string, cricket::TransportStats>&
- transport_stats_by_name,
- const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
- RTCStatsReport* partial_report);
+ // Can be called on any thread.
+ void AddPartialResults(
+ const rtc::scoped_refptr<RTCStatsReport>& partial_report);
private:
class RequestInfo {
@@ -137,6 +130,11 @@
void GetStatsReportInternal(RequestInfo request);
+ struct CertificateStatsPair {
+ std::unique_ptr<rtc::SSLCertificateStats> local;
+ std::unique_ptr<rtc::SSLCertificateStats> remote;
+ };
+
// Structure for tracking stats about each RtpTransceiver managed by the
// PeerConnection. This can either by a Plan B style or Unified Plan style
// transceiver (i.e., can have 0 or many senders and receivers).
@@ -152,6 +150,7 @@
std::unique_ptr<TrackMediaInfoMap> track_media_info_map;
};
+ void AddPartialResults_s(rtc::scoped_refptr<RTCStatsReport> partial_report);
void DeliverCachedReport(
rtc::scoped_refptr<const RTCStatsReport> cached_report,
std::vector<RequestInfo> requests);
@@ -212,13 +211,6 @@
std::vector<RtpTransceiverStatsInfo> PrepareTransceiverStatsInfos_s() const;
std::set<std::string> PrepareTransportNames_s() const;
- // Stats gathering on a particular thread.
- void ProducePartialResultsOnSignalingThread(int64_t timestamp_us);
- void ProducePartialResultsOnNetworkThread(int64_t timestamp_us);
- // Merges |network_report_| into |partial_report_| and completes the request.
- // This is a NO-OP if |network_report_| is null.
- void MergeNetworkReport_s();
-
// Slots for signals (sigslot) that are wired up to |pc_|.
void OnDataChannelCreated(DataChannel* channel);
// Slots for signals (sigslot) that are wired up to |channel|.
@@ -229,24 +221,12 @@
rtc::Thread* const signaling_thread_;
rtc::Thread* const worker_thread_;
rtc::Thread* const network_thread_;
+ rtc::AsyncInvoker invoker_;
int num_pending_partial_reports_;
int64_t partial_report_timestamp_us_;
- // Reports that are produced on the signaling thread or the network thread are
- // merged into this report. It is only touched on the signaling thread. Once
- // all partial reports are merged this is the result of a request.
rtc::scoped_refptr<RTCStatsReport> partial_report_;
std::vector<RequestInfo> requests_;
- // Holds the result of ProducePartialResultsOnNetworkThread(). It is merged
- // into |partial_report_| on the signaling thread and then nulled by
- // MergeNetworkReport_s(). Thread-safety is ensured by using
- // |network_report_event_|.
- rtc::scoped_refptr<RTCStatsReport> network_report_;
- // If set, it is safe to touch the |network_report_| on the signaling thread.
- // This is reset before async-invoking ProducePartialResultsOnNetworkThread()
- // and set when ProducePartialResultsOnNetworkThread() is complete, after it
- // has updated the value of |network_report_|.
- rtc::Event network_report_event_;
// Set in |GetStatsReport|, read in |ProducePartialResultsOnNetworkThread| and
// |ProducePartialResultsOnSignalingThread|, reset after work is complete. Not
diff --git a/pc/rtc_stats_collector_unittest.cc b/pc/rtc_stats_collector_unittest.cc
index 3be4ae7..f628f82 100644
--- a/pc/rtc_stats_collector_unittest.cc
+++ b/pc/rtc_stats_collector_unittest.cc
@@ -2249,9 +2249,7 @@
worker_thread_(pc->worker_thread()),
network_thread_(pc->network_thread()) {}
- void ProducePartialResultsOnSignalingThreadImpl(
- int64_t timestamp_us,
- RTCStatsReport* partial_report) override {
+ void ProducePartialResultsOnSignalingThread(int64_t timestamp_us) override {
EXPECT_TRUE(signaling_thread_->IsCurrent());
{
rtc::CritScope cs(&lock_);
@@ -2259,15 +2257,13 @@
++produced_on_signaling_thread_;
}
- partial_report->AddStats(std::unique_ptr<const RTCStats>(
+ rtc::scoped_refptr<RTCStatsReport> signaling_report =
+ RTCStatsReport::Create(0);
+ signaling_report->AddStats(std::unique_ptr<const RTCStats>(
new RTCTestStats("SignalingThreadStats", timestamp_us)));
+ AddPartialResults(signaling_report);
}
- void ProducePartialResultsOnNetworkThreadImpl(
- int64_t timestamp_us,
- const std::map<std::string, cricket::TransportStats>&
- transport_stats_by_name,
- const std::map<std::string, CertificateStatsPair>& transport_cert_stats,
- RTCStatsReport* partial_report) override {
+ void ProducePartialResultsOnNetworkThread(int64_t timestamp_us) override {
EXPECT_TRUE(network_thread_->IsCurrent());
{
rtc::CritScope cs(&lock_);
@@ -2275,8 +2271,11 @@
++produced_on_network_thread_;
}
- partial_report->AddStats(std::unique_ptr<const RTCStats>(
+ rtc::scoped_refptr<RTCStatsReport> network_report =
+ RTCStatsReport::Create(0);
+ network_report->AddStats(std::unique_ptr<const RTCStats>(
new RTCTestStats("NetworkThreadStats", timestamp_us)));
+ AddPartialResults(network_report);
}
private: