Fix possible deadlock when handling SCTP_SEND_FAILED_EVENT notification.
When processing an INIT for existing SCTP association, which may happen
when transferring to a new remote endpoint, if there are chunks still on
the output queue, an SCTP_SEND_FAILED_EVENT notification will be
delivered while holding the INP lock. When processing this notification,
WebRTC was calling usrsctp_getladdrs in order to look up the
SctpTransport* associated with the socket, but this also tries to
acquire the INP lock, resulting in a deadlock.
The temporary fix is to simply not subscribe to the
SCTP_SEND_FAILED_EVENT notification; it was only used for logging
anyway.
Bug: chromium:1137936
Change-Id: I077efb4c769d6f7855d2fee2d266f19396b16f9d
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/188502
Commit-Queue: Taylor <deadbeef@webrtc.org>
Reviewed-by: Harald Alvestrand <hta@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#32427}
diff --git a/media/sctp/sctp_transport.cc b/media/sctp/sctp_transport.cc
index 23c87de..a74c664 100644
--- a/media/sctp/sctp_transport.cc
+++ b/media/sctp/sctp_transport.cc
@@ -900,9 +900,11 @@
}
// Subscribe to SCTP event notifications.
+ // TODO(crbug.com/1137936): Subscribe to SCTP_SEND_FAILED_EVENT once deadlock
+ // is fixed upstream, or we switch to the upcall API:
+ // https://github.com/sctplab/usrsctp/issues/537
int event_types[] = {SCTP_ASSOC_CHANGE, SCTP_PEER_ADDR_CHANGE,
- SCTP_SEND_FAILED_EVENT, SCTP_SENDER_DRY_EVENT,
- SCTP_STREAM_RESET_EVENT};
+ SCTP_SENDER_DRY_EVENT, SCTP_STREAM_RESET_EVENT};
struct sctp_event event = {0};
event.se_assoc_id = SCTP_ALL_ASSOC;
event.se_on = 1;
diff --git a/media/sctp/sctp_transport_unittest.cc b/media/sctp/sctp_transport_unittest.cc
index 540d2bd..46fbbc8 100644
--- a/media/sctp/sctp_transport_unittest.cc
+++ b/media/sctp/sctp_transport_unittest.cc
@@ -791,4 +791,52 @@
EXPECT_FALSE(SendData(transport1(), 1, eleven_characters, &result));
}
+// Regression test for: crbug.com/1137936
+TEST_F(SctpTransportTest, SctpRestartWithPendingDataDoesNotDeadlock) {
+ // In order to trigger a restart, we'll connect two transports, then
+ // disconnect them and connect the first to a third, which will initiate the
+ // new handshake.
+ FakeDtlsTransport fake_dtls1("fake dtls 1", 0);
+ FakeDtlsTransport fake_dtls2("fake dtls 2", 0);
+ FakeDtlsTransport fake_dtls3("fake dtls 3", 0);
+ SctpFakeDataReceiver recv1;
+ SctpFakeDataReceiver recv2;
+ SctpFakeDataReceiver recv3;
+
+ std::unique_ptr<SctpTransport> transport1(
+ CreateTransport(&fake_dtls1, &recv1));
+ std::unique_ptr<SctpTransport> transport2(
+ CreateTransport(&fake_dtls2, &recv2));
+ std::unique_ptr<SctpTransport> transport3(
+ CreateTransport(&fake_dtls3, &recv3));
+ SctpTransportObserver observer(transport1.get());
+
+ // Connect the first two transports.
+ fake_dtls1.SetDestination(&fake_dtls2, /*asymmetric=*/false);
+ transport1->OpenStream(1);
+ transport2->OpenStream(1);
+ transport1->Start(5000, 5000, kSctpSendBufferSize);
+ transport2->Start(5000, 5000, kSctpSendBufferSize);
+
+ // Sanity check that we can send data.
+ SendDataResult result;
+ ASSERT_TRUE(SendData(transport1.get(), 1, "foo", &result));
+ ASSERT_TRUE_WAIT(ReceivedData(&recv2, 1, "foo"), kDefaultTimeout);
+
+ // Disconnect the transports and attempt to send a message, which will be
+ // stored in an output queue; this is necessary to reproduce the bug.
+ fake_dtls1.SetDestination(nullptr, /*asymmetric=*/false);
+ EXPECT_TRUE(SendData(transport1.get(), 1, "bar", &result));
+
+ // Now connect to the third transport.
+ fake_dtls1.SetDestination(&fake_dtls3, /*asymmetric=*/false);
+ transport3->OpenStream(1);
+ transport3->Start(5000, 5000, kSctpSendBufferSize);
+
+ // Send data from the new endpoint to the original endpoint. If data is
+ // received that means the restart must have been successful.
+ EXPECT_TRUE(SendData(transport3.get(), 1, "baz", &result));
+ EXPECT_TRUE_WAIT(ReceivedData(&recv1, 1, "baz"), kDefaultTimeout);
+}
+
} // namespace cricket