Add source-side perf upload script for WebRTC.
This effectively makes WebRTC upload histogram sets instead of Chart
JSON. Histogram sets is the newest format used by Chromium. I'm doing
this because it's nice to use the most modern thing, but mostly because
it's the default for PinPoint. This means I don't have to implement and
support a new read quest for Chart JSON.
This script has to be source side, because we need third_party/catapult
to write correct histograms. This script will be called from recipes.
I also considered generating histogram JSON directly in
test/testsupport/perf_test.cc, which could have avoided this conversion
from Chart JSON to histogram sets, but I can't because there is no C++
API for histogram sets.
Bug: webrtc:11084
Change-Id: If0d2315d2057112b3c2d54a9cfd12e59b5858a18
Reviewed-on: https://webrtc-review.googlesource.com/c/src/+/159780
Reviewed-by: Artem Titov <titovartem@webrtc.org>
Commit-Queue: Patrik Höglund <phoglund@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#29818}
diff --git a/.vpython b/.vpython
index 05bbe14..fb75db5 100644
--- a/.vpython
+++ b/.vpython
@@ -31,6 +31,12 @@
version: "version:5.2.2"
>
+# Used by tools_webrtc/perf/webrtc_dashboard_upload.py.
+wheel: <
+ name: "infra/python/wheels/httplib2-py2_py3"
+ version: "version:0.10.3"
+>
+
# Used by:
# build/toolchain/win
wheel: <
diff --git a/tools_webrtc/perf/histogram_util.py b/tools_webrtc/perf/histogram_util.py
new file mode 100644
index 0000000..aabd5b8
--- /dev/null
+++ b/tools_webrtc/perf/histogram_util.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""Upload data to the chrome perf dashboard via add_histograms endpoint."""
+
+import os
+import sys
+import logging
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+CHECKOUT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir, os.pardir))
+sys.path.insert(0, os.path.join(CHECKOUT_ROOT, 'third_party', 'catapult',
+ 'tracing'))
+
+from tracing.value import histogram
+from tracing.value import histogram_set
+from tracing.value.diagnostics import generic_set
+from tracing.value.diagnostics import reserved_infos
+
+# Enums aren't supported in Chromium's python env, so do something similar:
+class ImprovementDirection(object):
+ DEFAULT = 1
+ BIGGER_IS_BETTER = 2
+ SMALLER_IS_BETTER = 3
+
+
+def MakeWebRtcHistogramSet(stats, commit_pos, commit_hash, master, bot,
+ test_suite, build_url):
+ """Converts a dict of stats into a list of points with additional info.
+
+ Args:
+ stats: A list of histograms to upload.
+ piper_revision: Baseline piper revision that the test was run on.
+ commit_hash: WebRTC commit hash that the test was run on.
+ master:
+ bot: Bot name as it will show up in the perf dashboard.
+ test_suite: Top-level identifier of the test for Chrome perf dashboard.
+ build_url: An URL pointing to the bot status page for this build.
+
+ Returns:
+ A histogram set in format that expect Chrome perf dashboard.
+ """
+ common_diagnostics = {
+ reserved_infos.MASTERS: master,
+ reserved_infos.BOTS: bot,
+ reserved_infos.POINT_ID: commit_pos,
+ reserved_infos.BENCHMARKS: test_suite,
+ reserved_infos.WEBRTC_REVISIONS: str(commit_hash),
+ reserved_infos.BUILD_URLS: build_url,
+ }
+
+ hs = histogram_set.HistogramSet()
+ for h in stats:
+ hs.AddHistogram(h)
+
+ for k, v in common_diagnostics.items():
+ hs.AddSharedDiagnosticToAllHistograms(k.name, generic_set.GenericSet([v]))
+
+ return hs
+
+
+def LoadHistograms(data):
+ """Load histograms from Chart JSON format json file and fix them for API.
+
+ Args:
+ data: parsed json object of Chart JSON format.
+
+ Raises:
+ RuntimeError: input data contains standard deviation section.
+ Returns:
+ list of loaded histograms.
+ """
+ stats = []
+ for metric, story in data['charts'].items():
+ for story_name, story_desc in story.items():
+ units = story_desc['units'].strip()
+ if 'std' in story_desc:
+ # TODO(bugs.webrtc.org/11084): This seems bad to throw away?
+ logging.debug('std is not supported, specify list of values instead.')
+
+ if 'value' in story_desc:
+ values = [story_desc['value']]
+ else:
+ values = list(story_desc['values'])
+
+ improvement_direction = ImprovementDirection.DEFAULT
+ if 'improvement_direction' in story_desc:
+ if story_desc['improvement_direction'] == 'bigger_is_better':
+ improvement_direction = ImprovementDirection.BIGGER_IS_BETTER
+ elif story_desc['improvement_direction'] == 'smaller_is_better':
+ improvement_direction = ImprovementDirection.SMALLER_IS_BETTER
+ if 'higher_is_better' in story_desc:
+ if story_desc['higher_is_better']:
+ improvement_direction = ImprovementDirection.BIGGER_IS_BETTER
+ else:
+ improvement_direction = ImprovementDirection.SMALLER_IS_BETTER
+
+ new_metric, new_units, new_values = _FixUnits(metric, units, values)
+ h = _BuildHistogram(new_metric, story_name, new_units, new_values,
+ improvement_direction)
+ stats.append(h)
+ return stats
+
+
+def _FixUnits(metric_name, units, values):
+ """Fix units and metric name with values if required.
+
+ Args:
+ metric_name: origin metric name
+ units: raw trimmed units
+ values: origin values
+
+ Returns:
+ (metric_name, units, values) triple with fixed content
+ """
+ if units == 'bps':
+ return metric_name, 'bytesPerSecond', [v / 8.0 for v in values]
+ elif units == 'dB':
+ return metric_name + '_dB', 'unitless', values
+ elif units == 'fps':
+ return metric_name + '_fps', 'Hz', values
+ elif units == 'frames':
+ return metric_name, 'count', values
+ elif units == 'ms':
+ return metric_name, 'msBestFitFormat', values
+ elif units == '%':
+ return metric_name + '_%', 'unitless', values
+ else:
+ return metric_name, units, values
+
+
+def _BuildHistogram(metric_name, story_name, units, values,
+ improvement_direction):
+ """Build histogram. Uses unitless for unsupported units."""
+ if units not in histogram.UNIT_NAMES:
+ logging.debug(
+ 'Unsupported unit %s will be replaced by \'unitless\'', units)
+ units = 'unitless'
+ if improvement_direction is ImprovementDirection.BIGGER_IS_BETTER:
+ units = units + '_biggerIsBetter'
+ elif improvement_direction is ImprovementDirection.SMALLER_IS_BETTER:
+ units = units + '_smallerIsBetter'
+ h = histogram.Histogram(metric_name, units,
+ histogram.HistogramBinBoundaries.SINGULAR)
+ h.diagnostics[reserved_infos.STORIES.name] = generic_set.GenericSet(
+ [story_name])
+ h.CustomizeSummaryOptions({
+ 'std': False,
+ 'avg': False,
+ 'count': False,
+ 'max': False,
+ 'min': False,
+ 'sum': False
+ })
+ for v in values:
+ h.AddSample(v)
+ return h
diff --git a/tools_webrtc/perf/histogram_util_test.py b/tools_webrtc/perf/histogram_util_test.py
new file mode 100644
index 0000000..51d9982
--- /dev/null
+++ b/tools_webrtc/perf/histogram_util_test.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import os
+import sys
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+CHECKOUT_ROOT = os.path.abspath(os.path.join(SCRIPT_DIR, os.pardir, os.pardir))
+sys.path.insert(0, os.path.join(CHECKOUT_ROOT, 'third_party', 'catapult',
+ 'tracing'))
+sys.path.append(os.path.join(CHECKOUT_ROOT, 'third_party', 'pymock'))
+
+import json
+import mock
+import unittest
+
+import histogram_util as u
+
+from tracing.value import histogram
+from tracing.value.diagnostics import generic_set
+from tracing.value.diagnostics import reserved_infos
+
+
+class HistogramUploaderUnittest(unittest.TestCase):
+
+ def testLoadHistogramsWithValues(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "values": [0.6, 0.5, 0.7],
+ "units": "unitless_biggerIsBetter"
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "unitless_biggerIsBetter")
+ self.assertEqual(stats[0].sample_values, [0.6, 0.5, 0.7])
+
+ def testLoadHistogramsWithValue(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "value": 0.3,
+ "units": "unitless_biggerIsBetter"
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "unitless_biggerIsBetter")
+ self.assertEqual(stats[0].sample_values, [0.3])
+
+ def testLoadHistogramsWithUnknownUnit(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "value": 0.3,
+ "units": "good_score_biggerIsBetter"
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "unitless")
+ self.assertEqual(stats[0].sample_values, [0.3])
+
+ def testLoadHistogramsWithStd(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "value": 0.3,
+ "std": 0.1,
+ "units": "unitless",
+ "higher_is_better": true
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "unitless_biggerIsBetter")
+ self.assertEqual(stats[0].sample_values, [0.3])
+
+ def testLoadHistogramsMsBiggerIsBetter(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "value": 0.3,
+ "std": 0.1,
+ "units": "ms",
+ "improvement_direction": "bigger_is_better"
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "msBestFitFormat_biggerIsBetter")
+ self.assertEqual(stats[0].sample_values, [0.3])
+
+ def testLoadHistogramsBps(self):
+ data = json.loads("""
+ {
+ "format_version": "1.0",
+ "charts": {
+ "audio_score": {
+ "AV": {
+ "type": "scalar",
+ "values": [240, 160],
+ "std": 0.1,
+ "units": "bps"
+ }
+ }
+ }
+ }
+ """)
+ stats = u.LoadHistograms(data)
+ self.assertEqual(len(stats), 1)
+ self.assertEqual(stats[0].name, "audio_score")
+ self.assertEqual(stats[0].unit, "bytesPerSecond")
+ self.assertEqual(stats[0].sample_values, [30, 20])
+
+ def testMakeWebRtcHistogramSet(self):
+ h = histogram.Histogram("audio_score", "unitless_biggerIsBetter",
+ histogram.HistogramBinBoundaries.SINGULAR)
+ h.AddSample(0.5)
+ h.diagnostics[reserved_infos.STORIES.name] = generic_set.GenericSet(["AV"])
+ h.CustomizeSummaryOptions({
+ "std": False,
+ "avg": False,
+ "count": False,
+ "max": False,
+ "min": False,
+ "sum": False
+ })
+ stats = [h]
+ build_url = ('https://ci.chromium.org/p/webrtc/builders/ci/'
+ 'Android64%20%28M%20Nexus5X%29%28dbg%29')
+ hs = u.MakeWebRtcHistogramSet(
+ stats, commit_pos=123456789,
+ commit_hash="da39a3ee5e6b4b0d3255bfef95601890afd80709",
+ master="master", bot="bot", test_suite="webrtc_test_suite",
+ build_url=build_url)
+
+ expected = [{
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": [123456789]
+ }, {
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": ["webrtc_test_suite"]
+ }, {
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": ["bot"]
+ }, {
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": ["master"]
+ }, {
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": ["da39a3ee5e6b4b0d3255bfef95601890afd80709"]
+ }, {
+ "guid": mock.ANY,
+ "type": "GenericSet",
+ "values": [build_url]
+ }, {
+ "sampleValues": [0.5],
+ "name": "audio_score",
+ "running": [1, 0.5, -0.6931471805599453, 0.5, 0.5, 0.5, 0],
+ "diagnostics": {
+ "benchmarks": mock.ANY,
+ "bots": mock.ANY,
+ "buildUrls": mock.ANY,
+ "pointId": mock.ANY,
+ "masters": mock.ANY,
+ "stories": {
+ "type": "GenericSet",
+ "values": ["AV"]
+ },
+ "webrtcRevisions": mock.ANY
+ },
+ "allBins": [[1]],
+ "summaryOptions": {
+ "avg": False,
+ "count": False,
+ "max": False,
+ "min": False,
+ "std": False,
+ "sum": False
+ },
+ "unit": "unitless_biggerIsBetter"
+ }]
+ self.maxDiff = None # pylint: disable=C0103
+ self.assertItemsEqual(expected, hs.AsDicts())
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tools_webrtc/perf/webrtc_dashboard_upload.py b/tools_webrtc/perf/webrtc_dashboard_upload.py
new file mode 100644
index 0000000..d04374a
--- /dev/null
+++ b/tools_webrtc/perf/webrtc_dashboard_upload.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python
+# Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS. All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+"""Converts and uploads results to the Chrome perf dashboard.
+
+This conversion step is needed because test/testsupport/perf_test.cc can't
+output histograms natively. There is, unfortunately, no C++ API for histograms.
+This script is in python so it can depend on Catapult's python API instead.
+See histogram_util.py for how this is done. We should move to the C++ API and
+delete the scripts in this dir as soon as there is a C++ API (less conversions =
+easier to understand).
+
+This script can't be in recipes, because we can't access the catapult APIs from
+there. It needs to be here source-side.
+
+This script is adapted from the downstream variant like this:
+ * Follows upstream naming conventions.
+ * Downstream-only parameters and concepts go away.
+ * oAuth tokens are generated by luci-auth.
+"""
+
+import argparse
+import httplib2
+import json
+import sys
+import subprocess
+import zlib
+
+import histogram_util
+
+
+def _GenerateOauthToken():
+ args = ['luci-auth', 'token']
+ p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ if p.wait() == 0:
+ output = p.stdout.read()
+ return output.strip()
+ else:
+ raise RuntimeError(
+ 'Error generating authentication token.\nStdout: %s\nStderr:%s' %
+ (p.stdout.read(), p.stderr.read()))
+
+
+def _SendHistogramSetJson(url, histogram_json, oauth_token):
+ """Make a HTTP POST with the given JSON to the Performance Dashboard.
+
+ Args:
+ url: URL of Performance Dashboard instance, e.g.
+ "https://chromeperf.appspot.com".
+ histogram_json: a JSON object that contains the data to be sent.
+ oauth_token: An oauth token to use for authorization.
+ """
+ headers = {'Authorization': 'Bearer %s' % oauth_token}
+ serialized = json.dumps(histogram_json.AsDicts(), indent=4)
+ data = zlib.compress(serialized)
+
+ http = httplib2.Http()
+ response, content = http.request(url + '/add_histograms', method='POST',
+ body=data, headers=headers)
+ return response, content
+
+
+def _LoadHistogramSetJson(options):
+ with options.input_results_file as f:
+ json_data = json.load(f)
+
+ histograms = histogram_util.LoadHistograms(json_data)
+ hs = histogram_util.MakeWebRtcHistogramSet(
+ stats=histograms,
+ commit_pos=options.commit_position,
+ commit_hash=options.webrtc_git_hash,
+ master=options.perf_dashboard_machine_group,
+ bot=options.bot,
+ test_suite=options.test_suite,
+ build_url=options.build_page_url)
+
+ return hs
+
+
+def _CreateParser():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--perf-dashboard-machine-group', required=True,
+ help='The "master" the bots are grouped under. This '
+ 'string is the group in the the perf dashboard path '
+ 'group/bot/perf_id/metric/subtest.')
+ parser.add_argument('--bot', required=True,
+ help='The bot running the test (e.g. '
+ 'webrtc-win-large-tests).')
+ parser.add_argument('--test-suite', required=True,
+ help='The key for the test in the dashboard (i.e. what '
+ 'you select in the top-level test suite selector in the '
+ 'dashboard')
+ parser.add_argument('--webrtc-git-hash', required=True,
+ help='webrtc.googlesource.com commit hash.')
+ parser.add_argument('--commit-position', type=int, required=True,
+ help='Commit pos corresponding to the git hash.')
+ parser.add_argument('--build-page-url', required=True,
+ help='URL to the build page for this build.')
+ parser.add_argument('--dashboard-url', required=True,
+ help='Which dashboard to use.')
+ parser.add_argument('--input-results-file', type=argparse.FileType(),
+ required=True,
+ help='A JSON file with output from WebRTC tests.')
+ parser.add_argument('--output-json-file', type=argparse.FileType('w'),
+ help='Where to write the output (for debugging).')
+ return parser
+
+
+def main(args):
+ parser = _CreateParser()
+ options = parser.parse_args(args)
+
+ histogram_json = _LoadHistogramSetJson(options)
+
+ if options.output_json_file:
+ with options.output_json_file as output_file:
+ json.dump(histogram_json.AsDicts(), output_file, indent=4)
+
+ oauth_token = _GenerateOauthToken()
+ response, content = _SendHistogramSetJson(
+ options.dashboard_url, histogram_json, oauth_token)
+
+ if response.status == 200:
+ return 0
+ else:
+ print("Upload failed with %d: %s\n\n%s" % (response.status, response.reason,
+ content))
+ return 1
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))