Allow generation and input of machine-readable component data.

Add hwid_tool command options to produce probe data
in vanilla yaml format, which can then be fed back into
another command which will add that to the component
database.  The hwid_tool behavior is modified to always
update the database files with data changes.  This is
as opposed to creating a series of copies with incremental
changes (which is not very useful, since we have git history).

BUG=chrome-os-partner:6003
TEST=hwid_tool w/ assimilate_probe_data, probe_device, or
     board_create

Change-Id: I2bc4f4e20d8f2277dcb0fad57f3aef81daa1f75c
Reviewed-on: https://gerrit.chromium.org/gerrit/17392
Commit-Ready: Tammo Spalink <tammo@chromium.org>
Reviewed-by: Tammo Spalink <tammo@chromium.org>
Tested-by: Tammo Spalink <tammo@chromium.org>
diff --git a/hwid_database.py b/hwid_database.py
index 7b616d9..bc01eed 100644
--- a/hwid_database.py
+++ b/hwid_database.py
@@ -8,6 +8,16 @@
 import yaml
 
 
+def YamlWrite(structured_data):
+  """Wrap yaml.dump to make calling convention consistent."""
+  return yaml.dump(structured_data, default_flow_style=False)
+
+
+def YamlRead(serialized_data):
+  """Wrap yaml.load to make calling convention consistent."""
+  return yaml.safe_load(serialized_data)
+
+
 class InvalidDataError(ValueError):
   """Error in (en/de)coding or validating data."""
   pass
@@ -28,7 +38,7 @@
 
     After generating the output data, run decode on that to validate.
     """
-    yaml_data = yaml.dump(self, default_flow_style=False)
+    yaml_data = YamlWrite(self)
     self.Decode(yaml_data)
     return yaml_data
 
@@ -48,7 +58,7 @@
         return elt_type(**elt_data)
       return elt_data
     try:
-      field_dict = yaml.safe_load(data)
+      field_dict = YamlRead(data)
     except yaml.YAMLError, e:
       raise InvalidDataError("YAML deserialization error: %s" % e)
     c.ValidateSchema(field_dict)
diff --git a/hwid_tool.py b/hwid_tool.py
index ced72d3..5d2b4d3 100755
--- a/hwid_tool.py
+++ b/hwid_tool.py
@@ -17,7 +17,7 @@
 from common import Error, Obj
 from bom_names import BOM_NAME_SET
 from hwid_database import InvalidDataError, MakeDatastoreSubclass
-from probe import Probe
+from hwid_database import YamlWrite, YamlRead
 
 
 # The expected location of HWID data within a factory image.
@@ -181,6 +181,9 @@
   print ''
 
 
+# TODO(tammo): Move the below read and write into the hwid_database module.
+
+
 def ReadDatastore(path):
   """Read the component_db and all device data files."""
   data = Obj(comp_db={}, device_db={})
@@ -201,6 +204,29 @@
   return data
 
 
+def WriteDatastore(path, data):
+  """Write the component_db and all device data files."""
+  def WriteOnDiff(filename, raw_internal_data):
+    full_path = os.path.join(path, filename)
+    internal_data = (DATA_FILE_WARNING_MESSAGE_HEADER.split('\n') +
+                     raw_internal_data.strip('\n').split('\n'))
+    if os.path.exists(full_path):
+      with open(full_path, 'r') as f:
+        file_data = map(lambda s: s.strip('\n'), f.readlines())
+      diff = [line for line in difflib.unified_diff(file_data, internal_data)]
+      if not diff:
+        return
+      logging.info('updating %s with changes:\n%s' %
+                   (filename, '\n'.join(diff)))
+    else:
+      logging.info('creating new data file %s' % filename)
+    with open(full_path, 'w') as f:
+      f.write('%s\n' % '\n'.join(internal_data))
+  WriteOnDiff(COMPONENT_DB_FILENAME, data.comp_db.Encode())
+  for device_name, device in data.device_db.items():
+    WriteOnDiff(device_name, device.Encode())
+
+
 def GetAvailableBomNames(data, board, count):
   """Return count random bom names that are not yet used by board."""
   existing_bom_names = set(bn for bn in data.device_db[board].hwid_map)
@@ -242,12 +268,19 @@
   return None
 
 
-def CalcComponentDbClassMap(comp_db):
-  """Return dict of (comp: comp_class) mappings."""
-  return dict((comp, comp_class) for comp in comp_map
+def CalcCompDbClassMap(comp_db):
+  """Return dict of (comp_name: comp_class) mappings."""
+  return dict((comp_name, comp_class) for comp_name in comp_map
               for comp_class, comp_map in comp_db.component_registry.items())
 
 
+def CalcCompDbProbeValMap(comp_db):
+  """Return dict of (probe_value: comp_name) mappings."""
+  return dict((probe_value, comp_name)
+              for comp_map in comp_db.component_registry.values()
+              for comp_name, probe_value in comp_map.items())
+
+
 def CalcReverseComponentMap(hwid_map):
   """Return dict of (comp_class: dict of (component: bom name set)) mappings.
 
@@ -440,7 +473,7 @@
   """
   def ClassifyInputComponents(comp_list):
     """Return dict of (comp_class: comp list), associating comps to classes."""
-    comp_db_class_map = CalcComponentDbClassMap(data.comp_db)
+    comp_db_class_map = CalcCompDbClassMap(data.comp_db)
     comp_class_subset = set(comp_db_class_map[comp] for comp in comp_list)
     return dict((comp_class, [comp for comp in comp_list
                               if comp_db_class_map[comp] == comp_class])
@@ -479,10 +512,7 @@
 def CookComponentProbeResults(comp_db, probe_results):
   """TODO(tammo): Add more here XXX."""
   match = Obj(known={}, unknown={})
-  comp_reference_map = dict(
-      (probe_value, comp)
-      for comp_map in comp_db.component_registry.values()
-      for comp, probe_value in comp_map.items())
+  comp_reference_map = CalcCompDbProbeValMap(comp_db)
   for probe_class, probe_value in probe_results.components.items():
     if probe_value is None:
       continue
@@ -678,37 +708,20 @@
       IndentedStructuredPrint(0, comp_class + ':', comp_map)
 
 
-@Command('validate_data',
-         CmdArg('-w', '--write_processed_output', action='store_true'))
-def ValidateDataCommand(config, data):
-  """Canonically format all file data, report any differences with originals.
-
-  Differences are reported as unified diffs.
-  """
-  def Diff(filename, raw_internal_data):
-    full_path = os.path.join(config.data_path, filename)
-    internal_data = (DATA_FILE_WARNING_MESSAGE_HEADER.split('\n') +
-                     raw_internal_data.strip('\n').split('\n'))
-    with open(full_path, 'r') as f:
-      file_data = map(lambda s: s.strip('\n'), f.readlines())
-    for l in difflib.unified_diff(file_data, internal_data):
-      print l
-    if config.write_processed_output:
-      with open(full_path + '_presanity', 'w') as of:
-          of.write('%s\n' % '\n'.join(file_data))
-      with open(full_path, 'w') as of:
-          of.write('%s\n' % '\n'.join(internal_data))
-  Diff(COMPONENT_DB_FILENAME, data.comp_db.Encode())
-  for device_name, device in data.device_db.items():
-    Diff(device_name, device.Encode())
-
-
 @Command('probe_device',
          CmdArg('-b', '--board'),
-         CmdArg('-c', '--classes', nargs='*'))
+         CmdArg('-c', '--classes', nargs='*'),
+         CmdArg('-r', '--raw', action='store_true'))
 def ProbeDeviceProperties(config, data):
   # TODO(tammo): Implement classes arg behavior.
+  # TODO(tammo): Move this command into gooftool to avoid having to
+  # load the probe module here. The probe module depends on other
+  # modules that are not available except on DUT machines.
+  from probe import Probe
   probe_results = Probe(data.comp_db.component_registry)
+  if config.raw:
+    print YamlWrite(probe_results.__dict__)
+    return
   IndentedStructuredPrint(0, 'component probe results:',
                           probe_results.components)
   missing_classes = (set(data.comp_db.component_registry) -
@@ -738,6 +751,63 @@
                             cooked_device_details.initial_config_set)
 
 
+@Command('assimilate_probe_data',
+         CmdArg('-b', '--board'))
+def AssimilateProbeData(config, data):
+  """Read new data from stdin then merge into existing data.
+
+  TODO(tammo): Add more here.
+  """
+  probe_results = Obj(**YamlRead(sys.stdin.read()))
+  components = getattr(probe_results, 'components', {})
+  registry = data.comp_db.component_registry
+  if not set(components) <= set(registry):
+    logging.critical('data contains component classes that are not preset in '
+                     'the component_db, specifically %r' %
+                     sorted(set(components) - set(registry)))
+  reverse_registry = CalcCompDbProbeValMap(data.comp_db)
+  for comp_class, probe_value in components.items():
+    if probe_value is None or probe_value in reverse_registry:
+      continue
+    comp_map = registry[comp_class]
+    comp_map['%s_%d' % (comp_class, len(comp_map))] = probe_value
+  if not config.board:
+    if (hasattr(probe_results, 'volatile') or
+        hasattr(probe_results, 'initial_config')):
+      logging.warning('volatile and/or initial_config data is only '
+                      'assimilated when a board is specified')
+    return
+  device = data.device_db[config.board]
+
+
+@Command('board_create',
+         CmdArg('board_name'))
+def CreateBoard(config, data):
+  """Create an fresh empty board with specified name."""
+  if not config.board_name.isalpha():
+    print 'ERROR: Board names must be alpha-only.'
+    return
+  board_name = config.board_name.upper()
+  if board_name in data.device_db:
+    print 'ERROR: Board %s already exists.' % board_name
+    return
+  device = Device(
+      bitmap_file_path='',
+      hash_map={},
+      hwid_list_deprecated=[],
+      hwid_list_eol=[],
+      hwid_list_qualified=[],
+      hwid_list_supported=[],
+      hwid_map={},
+      initial_config_map={},
+      initial_config_use_map={},
+      release_map={},
+      variant_map={},
+      volatile_map={},
+      vpd_ro_field_list=[])
+  data.device_db[board_name] = device
+
+
 class HackedArgumentParser(ArgumentParser):
   """Replace the usage and help strings to better format command names.
 
@@ -814,6 +884,7 @@
   except Exception, e:
     logging.exception(e)
     sys.exit('UNCAUGHT RUNTIME EXCEPTION %s' % e)
+  WriteDatastore(config.data_path, data)
 
 
 if __name__ == '__main__':