Store each file as "hash/<sha1>", list of hashes as "meta/@<rev>" and serve them at "serve_file/@<rev>/<file-path>".

BUG=459167
R=mnaganov@chromium.org

Review URL: https://codereview.chromium.org/954693002

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/chrome-devtools-frontend@294235 0039d316-1c4b-4281-b951-d872f2087c98
diff --git a/gae/config.py b/gae/config.py
index 5f55e2f..e241262 100644
--- a/gae/config.py
+++ b/gae/config.py
@@ -8,7 +8,9 @@
 
 DEFAULT = {
   'bucket': '/gs/chrome-devtools-frontend/%s',
-  'max_age': 7 * 24 * 60,  # in seconds
+  'hash_path': 'hash/%s',
+  'max_age': 7 * 24 * 60 * 60,  # in seconds
+  'meta_path': 'meta/%s',
   'revision_path': 'revs/%s',
   'version_path': 'vers/%s',
   'zip_path': 'zips/%s.zip',
@@ -21,6 +23,12 @@
   def get_full_path(self, path):
     return self.conf['bucket'] % path
 
+  def get_hash_path(self, hash):
+    return self.conf['hash_path'] % hash
+
+  def get_meta_path(self, rev):
+    return self.conf['meta_path'] % rev
+
   def get_max_age(self):
     return self.conf['max_age']
 
diff --git a/gae/main.py b/gae/main.py
index e9b427c..53263da 100644
--- a/gae/main.py
+++ b/gae/main.py
@@ -23,30 +23,68 @@
     self.response.write(str(current_config))
 
 class Server(webapp2.RequestHandler):
-  def get(self, tag_type, tag, path):
-    cache_key_name = '/res/%s/%s/%s' % (tag_type, tag, path)
-    content = cache.get_content(cache_key_name)
-    helper = config.ConfigHelper(current_config)
+  def get_hash_cache_key(self, rev, path):
+    return '/hashes/%s/%s' % (rev, path)
 
-    if not content:
-      if tag_type == 'rev':
-        meta_file_name = helper.get_revision_path(tag)
-      elif tag_type =='ver':
-        meta_file_name = helper.get_version_path(tag)
-      else:
-        self.abort(404)
+  def get_file_hash(self, helper, rev, path):
+    file_hash = cache.get_content(self.get_hash_cache_key(rev, path))
 
+    if not file_hash:
+      meta_cache_key = '/meta-parsed/%s' % rev
+      if cache.get_content(meta_cache_key):
+        return None
+
+      meta_file_name = helper.get_meta_path(rev)
       meta_content = file_reader.read(helper, meta_file_name)
-      if meta_content:
-        zip_file_name = meta_content.strip(' \t\n')
-      else:
-        self.abort(404)
+      if not meta_content:
+        return None
 
-      content = zip_proxy.read(helper, zip_file_name, path)
+      for line in meta_content.split('\n'):
+        hash_and_path = line.split(':')
+        if len(hash_and_path) == 2:
+          (line_hash, line_path) = hash_and_path
+          cache.set_content(self.get_hash_cache_key(rev, line_path), line_hash)
+          if line_path == path:
+            file_hash = line_hash
+      cache.set_content(meta_cache_key, 'parsed')
+
+    return file_hash
+
+  def get(self, tag_type, tag, path):
+    helper = config.ConfigHelper(current_config)
+    content = None
+
+    if tag_type == 'file':
+      file_hash = self.get_file_hash(helper, tag, path)
+      if not file_hash:
+        self.abort(404)
+      content = file_reader.read(helper, helper.get_hash_path(file_hash))
       if not content:
         self.abort(404)
       content = content_processor.process(path, content)
-      cache.set_content(cache_key_name, content)
+
+    else:
+      cache_key_name = '/res/%s/%s/%s' % (tag_type, tag, path)
+      content = cache.get_content(cache_key_name)
+      if not content:
+        if tag_type == 'rev':
+          meta_file_name = helper.get_revision_path(tag)
+        elif tag_type =='ver':
+          meta_file_name = helper.get_version_path(tag)
+        else:
+          self.abort(404)
+
+        meta_content = file_reader.read(helper, meta_file_name)
+        if meta_content:
+          zip_file_name = meta_content.strip(' \t\n')
+        else:
+          self.abort(404)
+
+        content = zip_proxy.read(helper, zip_file_name, path)
+        if not content:
+          self.abort(404)
+        content = content_processor.process(path, content)
+        cache.set_content(cache_key_name, content)
 
     self.response.headers['Content-Type'] = content_type.from_path(path)
     self.response.headers['Access-Control-Allow-Origin'] = '*'
diff --git a/gce/uploader_iteration.sh b/gce/uploader_iteration.sh
index 9003ea0..ce2e083 100755
--- a/gce/uploader_iteration.sh
+++ b/gce/uploader_iteration.sh
@@ -62,7 +62,9 @@
     fi
     ZIPS_DIR="$LOCAL_STORAGE_DIR/zips"
     REVS_DIR="$LOCAL_STORAGE_DIR/revs"
-    mkdir -p $ZIPS_DIR $REVS_DIR
+    META_DIR="$LOCAL_STORAGE_DIR/meta"
+    HASH_DIR="$LOCAL_STORAGE_DIR/hash"
+    mkdir -p $ZIPS_DIR $REVS_DIR $META_DIR $HASH_DIR
 
     if echo "$BLINK_REVISIONS_BLACKLIST" | grep -w $REVISION; then
         echo "Revision $REVISION is in the blacklist, skipping over"
@@ -129,6 +131,15 @@
     find -type f -not -name $MANIFEST_FILE_NAME -print | sed -e 's#^\./##' >> $MANIFEST_FILE_NAME
 
     zip -9r $ZIP_FILE * || exit $EXIT_ZIP_FRONTEND
+
+    # Copy each file to hash/<sha1>, write <sha1>:<path> to meta file.
+    META_FILE_NAME="$META_DIR/@$REVISION"
+    for FILE_NAME in $(find -type f); do
+        FILE_SHA1=$(sha1sum $FILE_NAME | awk '{print $1}')
+        cp $FILE_NAME "$HASH_DIR/$FILE_SHA1"
+        echo "$FILE_SHA1:${FILE_NAME##./}" >> $META_FILE_NAME
+    done;
+
     popd
 
     ZIP_SHA1=$(sha1sum $ZIP_FILE | awk '{print $1}')