Adding PRESUBMIT check on orphan headers files.

We want all .h files to be tracked by a GN target. This CL adds a
presubmit check to ensure that newly added targets are tracked by
GN.

In this CL we add a directory called 'presubmit_checks_lib' so we
can avoid to put everything in PRESUBMIT.py (especially if we want
to test some checks). I tried to use 'tools-webrtc' but it is not
easy to include a python module from a directory with '-' in the
name.

BUG=webrtc:7514

Review-Url: https://codereview.webrtc.org/2872493002
Cr-Commit-Position: refs/heads/master@{#18069}
diff --git a/PRESUBMIT.py b/PRESUBMIT.py
index 694624e..e13c9a0 100755
--- a/PRESUBMIT.py
+++ b/PRESUBMIT.py
@@ -575,6 +575,7 @@
   results.extend(_CheckJSONParseErrors(input_api, output_api))
   results.extend(_RunPythonTests(input_api, output_api))
   results.extend(_CheckUsageOfGoogleProtobufNamespace(input_api, output_api))
+  results.extend(_CheckOrphanHeaders(input_api, output_api))
   return results
 
 
@@ -602,3 +603,33 @@
       input_api, output_api,
       json_url='http://webrtc-status.appspot.com/current?format=json'))
   return results
+
+
+def _CheckOrphanHeaders(input_api, output_api):
+  # We need to wait until we have an input_api object and use this
+  # roundabout construct to import prebubmit_checks_lib because this file is
+  # eval-ed and thus doesn't have __file__.
+  error_msg = """Header file {} is not listed in any GN target.
+  Please create a target or add it to an existing one in {}"""
+  results = []
+  original_sys_path = sys.path
+  try:
+    sys.path = sys.path + [input_api.os_path.join(
+        input_api.PresubmitLocalPath(), 'tools_webrtc', 'presubmit_checks_lib')]
+    from check_orphan_headers import GetBuildGnPathFromFilePath
+    from check_orphan_headers import IsHeaderInBuildGn
+  finally:
+    # Restore sys.path to what it was before.
+    sys.path = original_sys_path
+
+  for f in input_api.AffectedSourceFiles(input_api.FilterSourceFile):
+    if f.LocalPath().endswith('.h'):
+      file_path = os.path.abspath(f.LocalPath())
+      root_dir = os.getcwd()
+      gn_file_path = GetBuildGnPathFromFilePath(file_path, os.path.exists,
+                                                root_dir)
+      in_build_gn = IsHeaderInBuildGn(file_path, gn_file_path)
+      if not in_build_gn:
+        results.append(output_api.PresubmitError(error_msg.format(
+            file_path, gn_file_path)))
+  return results
diff --git a/tools_webrtc/presubmit_checks_lib/check_orphan_headers.py b/tools_webrtc/presubmit_checks_lib/check_orphan_headers.py
new file mode 100644
index 0000000..707c49f
--- /dev/null
+++ b/tools_webrtc/presubmit_checks_lib/check_orphan_headers.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import os
+import re
+
+
+# TARGET_RE matches a GN target, and extracts the target name and the contents.
+TARGET_RE = re.compile(r'(?P<indent>\s*)\w+\("(?P<target_name>\w+)"\) {'
+                       r'(?P<target_contents>.*?)'
+                       r'(?P=indent)}',
+                       re.MULTILINE | re.DOTALL)
+
+# SOURCES_RE matches a block of sources inside a GN target.
+SOURCES_RE = re.compile(r'sources \+?= \[(?P<sources>.*?)\]',
+                        re.MULTILINE | re.DOTALL)
+
+SOURCE_FILE_RE = re.compile(r'.*\"(?P<source_file>.*)\"')
+
+
+class NoBuildGnFoundError(Exception):
+  pass
+
+
+class WrongFileTypeError(Exception):
+  pass
+
+
+def _ReadFile(file_path):
+  """Returns the content of file_path in a string.
+
+  Args:
+    file_path: the path of the file to read.
+  Returns:
+    A string with the content of the file.
+  """
+  with open(file_path) as f:
+    return f.read()
+
+
+def GetBuildGnPathFromFilePath(file_path, file_exists_check, root_dir_path):
+  """Returns the BUILD.gn file responsible for file_path.
+
+  Args:
+    file_path: the absolute path to the .h file to check.
+    file_exists_check: a function that defines how to check if a file exists
+      on the file system.
+    root_dir_path: the absolute path of the root of project.
+
+  Returns:
+    A string with the absolute path to the BUILD.gn file responsible to include
+    file_path in a target.
+  """
+  if not file_path.endswith('.h'):
+    raise WrongFileTypeError(
+        'File {} is not an header file (.h)'.format(file_path))
+  candidate_dir = os.path.dirname(file_path)
+  while candidate_dir.startswith(root_dir_path):
+    candidate_build_gn_path = os.path.join(candidate_dir, 'BUILD.gn')
+    if file_exists_check(candidate_build_gn_path):
+      return candidate_build_gn_path
+    else:
+      candidate_dir = os.path.abspath(os.path.join(candidate_dir,
+                                                   os.pardir))
+  raise NoBuildGnFoundError(
+      'No BUILD.gn file found for file: `{}`'.format(file_path))
+
+
+def IsHeaderInBuildGn(header_path, build_gn_path):
+  """Returns True if the header is listed in the BUILD.gn file.
+
+  Args:
+    header_path: the absolute path to the header to check.
+    build_gn_path: the absolute path to the header to check.
+
+  Returns:
+    bool: True if the header_path is an header that is listed in
+      at least one GN target in the BUILD.gn file specified by
+      the argument build_gn_path.
+  """
+  target_abs_path = os.path.dirname(build_gn_path)
+  build_gn_content = _ReadFile(build_gn_path)
+  headers_in_build_gn = GetHeadersInBuildGnFileSources(build_gn_content,
+                                                       target_abs_path)
+  return header_path in headers_in_build_gn
+
+
+def GetHeadersInBuildGnFileSources(file_content, target_abs_path):
+  """Returns a set with all the .h files in the file_content.
+
+  Args:
+    file_content: a string with the content of the BUILD.gn file.
+    target_abs_path: the absolute path to the directory where the
+      BUILD.gn file lives.
+
+  Returns:
+    A set with all the headers (.h file) in the file_content.
+    The set contains absolute paths.
+  """
+  headers_in_sources = set([])
+  for target_match in TARGET_RE.finditer(file_content):
+    target_contents = target_match.group('target_contents')
+    for sources_match in SOURCES_RE.finditer(target_contents):
+      sources = sources_match.group('sources')
+      for source_file_match in SOURCE_FILE_RE.finditer(sources):
+        source_file = source_file_match.group('source_file')
+        if source_file.endswith('.h'):
+          headers_in_sources.add(os.path.join(target_abs_path, source_file))
+  return headers_in_sources
diff --git a/tools_webrtc/presubmit_checks_lib/check_orphan_headers_test.py b/tools_webrtc/presubmit_checks_lib/check_orphan_headers_test.py
new file mode 100755
index 0000000..d6561b3
--- /dev/null
+++ b/tools_webrtc/presubmit_checks_lib/check_orphan_headers_test.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+# Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import unittest
+
+import check_orphan_headers
+
+
+class GetBuildGnPathFromFilePathTest(unittest.TestCase):
+
+  def testGetBuildGnFromSameDirectory(self):
+    file_path = '/home/projects/webrtc/base/foo.h'
+    expected_build_path = '/home/projects/webrtc/base/BUILD.gn'
+    file_exists = lambda p: p == '/home/projects/webrtc/base/BUILD.gn'
+    src_dir_path = '/home/projects/webrtc'
+    self.assertEqual(
+      expected_build_path,
+      check_orphan_headers.GetBuildGnPathFromFilePath(file_path,
+                                                      file_exists,
+                                                      src_dir_path))
+
+  def testGetBuildPathFromParentDirectory(self):
+    file_path = '/home/projects/webrtc/base/foo.h'
+    expected_build_path = '/home/projects/webrtc/BUILD.gn'
+    file_exists = lambda p: p == '/home/projects/webrtc/BUILD.gn'
+    src_dir_path = '/home/projects/webrtc'
+    self.assertEqual(
+      expected_build_path,
+      check_orphan_headers.GetBuildGnPathFromFilePath(file_path,
+                                                      file_exists,
+                                                      src_dir_path))
+
+  def testExceptionIfNoBuildGnFilesAreFound(self):
+    with self.assertRaises(check_orphan_headers.NoBuildGnFoundError):
+      file_path = '/home/projects/webrtc/base/foo.h'
+      file_exists = lambda p: False
+      src_dir_path = '/home/projects/webrtc'
+      check_orphan_headers.GetBuildGnPathFromFilePath(file_path,
+                                                      file_exists,
+                                                      src_dir_path)
+
+  def testExceptionIfFilePathIsNotAnHeader(self):
+    with self.assertRaises(check_orphan_headers.WrongFileTypeError):
+      file_path = '/home/projects/webrtc/base/foo.cc'
+      file_exists = lambda p: False
+      src_dir_path = '/home/projects/webrtc'
+      check_orphan_headers.GetBuildGnPathFromFilePath(file_path,
+                                                      file_exists,
+                                                      src_dir_path)
+
+
+class GetHeadersInBuildGnFileSourcesTest(unittest.TestCase):
+
+  def testEmptyFileReturnsEmptySet(self):
+    self.assertEqual(
+      set([]),
+      check_orphan_headers.GetHeadersInBuildGnFileSources('', '/a/b'))
+
+  def testReturnsSetOfHeadersFromFileContent(self):
+    file_content = """
+    # Some comments
+    if (is_android) {
+      import("//a/b/c.gni")
+      import("//d/e/f.gni")
+    }
+    source_set("foo") {
+      sources = ["foo.h"]
+      deps = [":bar"]
+    }
+    rtc_static_library("bar") {
+      sources = [
+        "bar.h",
+        "bar.cc",
+      ]
+      deps = [":bar"]
+    }
+    source_set("baz_foo") {
+      sources = ["baz/foo.h"]
+    }
+    """
+    self.assertEqual(
+      set(['/a/b/foo.h', '/a/b/bar.h', '/a/b/baz/foo.h']),
+      check_orphan_headers.GetHeadersInBuildGnFileSources(file_content,
+                                                          '/a/b'))
+
+
+if __name__ == '__main__':
+  unittest.main()