fix encoding issues with inputs for better Python 3 support

When running subprocesses like '!<(which foo)', we return subprocess's
stdout contents as bytes, not as a string.  Make sure we decode it into
UTF-8 first.

When reading gyp files, we don't specify an encoding, so the default
might be ASCII which breaks when the gyp files contain UTF-8.  Change
it to always read it as a binary stream before decoding to UTF-8.

With those fixes in place, we need to rework all the type checks for
strings & numbers.  Introduce some aliases to the right set of types
for the active version of Python, and then change all the type checks
to use isinstance.

Bug: 926296
Test: building CrOS gyp packages with python3 works again
Test: `./gyptest.py -a` mostly passes (4 failures look toolchain related)
Change-Id: I416acf140d01948748f5cc09e966b52f166b82d0
Reviewed-on: https://chromium-review.googlesource.com/c/external/gyp/+/1697943
Reviewed-by: Dirk Pranke <dpranke@chromium.org>
diff --git a/pylib/gyp/input.py b/pylib/gyp/input.py
index 39bdbbb..42c279c 100644
--- a/pylib/gyp/input.py
+++ b/pylib/gyp/input.py
@@ -54,6 +54,23 @@
 per_process_data = {}
 per_process_aux_data = {}
 
+try:
+  _str_types = (basestring,)
+# There's no basestring in python3.
+except NameError:
+  _str_types = (str,)
+
+try:
+  _int_types = (int, long)
+# There's no long in python3.
+except NameError:
+  _int_types = (int,)
+
+# Shortcuts as we use these combos a lot.
+_str_int_types = _str_types + _int_types
+_str_int_list_types = _str_int_types + (list,)
+
+
 def IsPathSection(section):
   # If section ends in one of the '=+?!' characters, it's applied to a section
   # without the trailing characters.  '/' is notably absent from this list,
@@ -221,7 +238,7 @@
     return data[build_file_path]
 
   if os.path.exists(build_file_path):
-    build_file_contents = open(build_file_path).read()
+    build_file_contents = open(build_file_path, 'rb').read().decode('utf-8')
   else:
     raise GypError("%s not found (cwd: %s)" % (build_file_path, os.getcwd()))
 
@@ -638,7 +655,7 @@
 
   The canonical form is such that str(int(string)) == string.
   """
-  if type(string) is str:
+  if isinstance(string, _str_types):
     # This function is called a lot so for maximum performance, avoid
     # involving regexps which would otherwise make the code much
     # shorter. Regexps would need twice the time of this function.
@@ -908,7 +925,7 @@
             # in python 2.5 and later.
             raise GypError("Call to '%s' returned exit status %d while in %s." %
                            (contents, p.returncode, build_file))
-          replacement = p_stdout.rstrip()
+          replacement = p_stdout.decode('utf-8').rstrip()
 
         cached_command_results[cache_key] = replacement
       else:
@@ -938,7 +955,7 @@
 
     if type(replacement) is list:
       for item in replacement:
-        if not contents[-1] == '/' and type(item) not in (str, int):
+        if not contents[-1] == '/' and not isinstance(item, _str_int_types):
           raise GypError('Variable ' + contents +
                          ' must expand to a string or list of strings; ' +
                          'list contains a ' +
@@ -948,7 +965,7 @@
       # with conditions sections.
       ProcessVariablesAndConditionsInList(replacement, phase, variables,
                                           build_file)
-    elif type(replacement) not in (str, int):
+    elif not isinstance(replacement, _str_int_types):
           raise GypError('Variable ' + str(contents) +
                          ' must expand to a string or list of strings; ' +
                          'found a ' + replacement.__class__.__name__)
@@ -1067,7 +1084,7 @@
   # use a command expansion directly inside a condition.
   cond_expr_expanded = ExpandVariables(cond_expr, phase, variables,
                                        build_file)
-  if type(cond_expr_expanded) not in (str, int):
+  if not isinstance(cond_expr_expanded, _str_int_types):
     raise ValueError(
           'Variable expansion in this context permits str and int ' + \
             'only, found ' + cond_expr_expanded.__class__.__name__)
@@ -1143,7 +1160,7 @@
   # Any keys with plain string values in the_dict become automatic variables.
   # The variable name is the key name with a "_" character prepended.
   for key, value in the_dict.items():
-    if type(value) in (str, int, list):
+    if isinstance(value, _str_int_list_types):
       variables['_' + key] = value
 
 
@@ -1156,7 +1173,7 @@
   # (it could be a list or it could be parentless because it is a root dict),
   # the_dict_key will be None.
   for key, value in the_dict.get('variables', {}).items():
-    if type(value) not in (str, int, list):
+    if not isinstance(value, _str_int_list_types):
       continue
 
     if key.endswith('%'):
@@ -1209,9 +1226,9 @@
 
   for key, value in the_dict.items():
     # Skip "variables", which was already processed if present.
-    if key != 'variables' and type(value) is str:
+    if key != 'variables' and isinstance(value, _str_types):
       expanded = ExpandVariables(value, phase, variables, build_file)
-      if type(expanded) not in (str, int):
+      if not isinstance(expanded, _str_int_types):
         raise ValueError(
               'Variable expansion in this context permits str and int ' + \
               'only, found ' + expanded.__class__.__name__ + ' for ' + key)
@@ -1268,7 +1285,7 @@
   for key, value in the_dict.items():
     # Skip "variables" and string values, which were already processed if
     # present.
-    if key == 'variables' or type(value) is str:
+    if key == 'variables' or isinstance(value, _str_types):
       continue
     if type(value) is dict:
       # Pass a copy of the variables dict so that subdicts can't influence
@@ -1282,7 +1299,7 @@
       # copy is necessary here.
       ProcessVariablesAndConditionsInList(value, phase, variables,
                                           build_file)
-    elif type(value) is not int:
+    elif not isinstance(value, _int_types):
       raise TypeError('Unknown type ' + value.__class__.__name__ + \
                       ' for ' + key)
 
@@ -1299,9 +1316,9 @@
       ProcessVariablesAndConditionsInDict(item, phase, variables, build_file)
     elif type(item) is list:
       ProcessVariablesAndConditionsInList(item, phase, variables, build_file)
-    elif type(item) is str:
+    elif isinstance(item, _str_types):
       expanded = ExpandVariables(item, phase, variables, build_file)
-      if type(expanded) in (str, int):
+      if isinstance(expanded, _str_int_types):
         the_list[index] = expanded
       elif type(expanded) is list:
         the_list[index:index+1] = expanded
@@ -1315,7 +1332,7 @@
               'Variable expansion in this context permits strings and ' + \
               'lists only, found ' + expanded.__class__.__name__ + ' at ' + \
               index)
-    elif type(item) is not int:
+    elif not isinstance(item, _int_types):
       raise TypeError('Unknown type ' + item.__class__.__name__ + \
                       ' at index ' + index)
     index = index + 1
@@ -2050,14 +2067,14 @@
   hashable_to_set = set(x for x in to if is_hashable(x))
   for item in fro:
     singleton = False
-    if type(item) in (str, int):
+    if isinstance(item, _str_int_types):
       # The cheap and easy case.
       if is_paths:
         to_item = MakePathRelative(to_file, fro_file, item)
       else:
         to_item = item
 
-      if not (type(item) is str and item.startswith('-')):
+      if not (isinstance(item, _str_types) and item.startswith('-')):
         # Any string that doesn't begin with a "-" is a singleton - it can
         # only appear once in a list, to be enforced by the list merge append
         # or prepend.
@@ -2114,8 +2131,8 @@
     # modified.
     if k in to:
       bad_merge = False
-      if type(v) in (str, int):
-        if type(to[k]) not in (str, int):
+      if isinstance(v, _str_int_types):
+        if not isinstance(to[k], _str_int_types):
           bad_merge = True
       elif type(v) is not type(to[k]):
         bad_merge = True
@@ -2125,7 +2142,7 @@
             'Attempt to merge dict value of type ' + v.__class__.__name__ + \
             ' into incompatible type ' + to[k].__class__.__name__ + \
             ' for key ' + k)
-    if type(v) in (str, int):
+    if isinstance(v, _str_int_types):
       # Overwrite the existing value, if any.  Cheap and easy.
       is_path = IsPathSection(k)
       if is_path:
@@ -2600,7 +2617,7 @@
                    "must be a list." %
                    (target_name, build_file))
   working_directory = run_as.get('working_directory')
-  if working_directory and type(working_directory) is not str:
+  if working_directory and not isinstance(working_directory, _str_types):
     raise GypError("The 'working_directory' for 'run_as' in target %s "
                    "in file %s should be a string." %
                    (target_name, build_file))
@@ -2635,7 +2652,7 @@
   # Use items instead of iteritems because there's no need to try to look at
   # reinserted keys and their associated values.
   for k, v in the_dict.items():
-    if type(v) is int:
+    if isinstance(v, _int_types):
       v = str(v)
       the_dict[k] = v
     elif type(v) is dict:
@@ -2643,7 +2660,7 @@
     elif type(v) is list:
       TurnIntIntoStrInList(v)
 
-    if type(k) is int:
+    if isinstance(k, _int_types):
       del the_dict[k]
       the_dict[str(k)] = v
 
@@ -2652,7 +2669,7 @@
   """Given list the_list, recursively converts all integers into strings.
   """
   for index, item in enumerate(the_list):
-    if type(item) is int:
+    if isinstance(item, _int_types):
       the_list[index] = str(item)
     elif type(item) is dict:
       TurnIntIntoStrInDict(item)