blob: 7946fe2aeb47cc09a8bde3d964fb24ca01ff4a2d [file] [log] [blame]
#!/usr/bin/env python
# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
"""Finds the APM configuration that maximizes a provided metric by
parsing the output generated
from __future__ import division
import collections
import logging
import os
import quality_assessment.data_access as data_access
import quality_assessment.collect_data as collect_data
def _InstanceArgumentsParser():
"""Arguments parser factory. Extends the arguments from 'collect_data'
with a few extra for selecting what parameters to optimize for.
parser = collect_data.InstanceArgumentsParser()
parser.description = (
'Rudimentary optimization of a function over different parameter'
parser.add_argument('-n', '--config_dir', required=False,
help=('path to the folder with the configuration files'),
parser.add_argument('-p', '--params', required=True, nargs='+',
help=('parameters to parse from the config files in'
parser.add_argument('-z', '--params_not_to_optimize', required=False,
nargs='+', default=[],
help=('parameters from `params` not to be optimized for'))
return parser
def _ConfigurationAndScores(data_frame, params,
params_not_to_optimize, config_dir):
"""Returns a list of all configurations and scores.
data_frame: A pandas data frame with the scores and config name
returned by _FindScores.
params: The parameter names to parse from configs the config
params_not_to_optimize: The parameter names which shouldn't affect
the optimal parameter
selection. E.g., fixed settings and not
tunable parameters.
config_dir: Path to folder with config files.
Dictionary of the form
{param_combination: [{params: {param1: value1, ...},
scores: {score1: value1, ...}}]}.
The key `param_combination` runs over all parameter combinations
of the parameters in `params` and not in
`params_not_to_optimize`. A corresponding value is a list of all
param combinations for params in `params_not_to_optimize` and
their scores.
results = collections.defaultdict(list)
config_names = data_frame['apm_config'].drop_duplicates().values.tolist()
score_names = data_frame['eval_score_name'].drop_duplicates().values.tolist()
# Normalize the scores
normalization_constants = {}
for score_name in score_names:
scores = data_frame[data_frame.eval_score_name == score_name].score
normalization_constants[score_name] = max(scores)
params_to_optimize = [p for p in params if p not in params_not_to_optimize]
param_combination = collections.namedtuple("ParamCombination",
for config_name in config_names:
config_json = data_access.AudioProcConfigFile.Load(
os.path.join(config_dir, config_name + ".json"))
scores = {}
data_cell = data_frame[data_frame.apm_config == config_name]
for score_name in score_names:
data_cell_scores = data_cell[data_cell.eval_score_name ==
scores[score_name] = sum(data_cell_scores) / len(data_cell_scores)
scores[score_name] /= normalization_constants[score_name]
result = {'scores': scores, 'params': {}}
config_optimize_params = {}
for param in params:
if param in params_to_optimize:
config_optimize_params[param] = config_json['-' + param]
result['params'][param] = config_json['-' + param]
current_param_combination = param_combination( # pylint: disable=star-args
return results
def _FindOptimalParameter(configs_and_scores, score_weighting):
"""Finds the config producing the maximal score.
configs_and_scores: structure of the form returned by
score_weighting: a function to weight together all score values of
the form [{params: {param1: value1, ...}, scores:
{score1: value1, ...}}] into a numeric
the config that has the largest values of |score_weighting| applied
to its scores.
min_score = float('+inf')
best_params = None
for config in configs_and_scores:
scores_and_params = configs_and_scores[config]
current_score = score_weighting(scores_and_params)
if current_score < min_score:
min_score = current_score
best_params = config
logging.debug("Score: %f", current_score)
logging.debug("Config: %s", str(config))
return best_params
def _ExampleWeighting(scores_and_configs):
"""Example argument to `_FindOptimalParameter`
scores_and_configs: a list of configs and scores, in the form
described in _FindOptimalParameter
numeric value, the sum of all scores
res = 0
for score_config in scores_and_configs:
res += sum(score_config['scores'].values())
return res
def main():
# Init.
# TODO(alessiob): INFO once debugged.
parser = _InstanceArgumentsParser()
args = parser.parse_args()
# Get the scores.
src_path = collect_data.ConstructSrcPath(args)
logging.debug('Src path <%s>', src_path)
scores_data_frame = collect_data.FindScores(src_path, args)
all_scores = _ConfigurationAndScores(scores_data_frame,
opt_param = _FindOptimalParameter(all_scores, _ExampleWeighting)'Optimal parameter combination: <%s>', opt_param)'It\'s score values: <%s>', all_scores[opt_param])
if __name__ == "__main__":