Limit memory allocation when reading invalid Huffman codes.

This is a backported fix for: CVE-2020-36332

This is a merge of:
dce5d7643177633ebe3513af492ea8c08c299cf3
39cb9aad85ca7bb1d193013460db1f8cc6bff109
067031eaed11e91d9914e1e872738c7bdf075e0b

Change-Id: I166e3d7ccef73be26ff9eb4fa8943efd3dac2d81
diff --git a/src/dec/vp8l.c b/src/dec/vp8l.c
index cb2e317..b57c2c2 100644
--- a/src/dec/vp8l.c
+++ b/src/dec/vp8l.c
@@ -362,11 +362,14 @@
   uint32_t* huffman_image = NULL;
   HTreeGroup* htree_groups = NULL;
   HuffmanCode* huffman_tables = NULL;
-  HuffmanCode* next = NULL;
+  HuffmanCode* huffman_table = NULL;
   int num_htree_groups = 1;
+  int num_htree_groups_max = 1;
   int max_alphabet_size = 0;
   int* code_lengths = NULL;
   const int table_size = kTableSize[color_cache_bits];
+  int* mapping = NULL;
+  int ok = 0;
 
   if (allow_recursion && VP8LReadBits(br, 1)) {
     // use meta Huffman codes.
@@ -383,10 +386,36 @@
       // The huffman data is stored in red and green bytes.
       const int group = (huffman_image[i] >> 8) & 0xffff;
       huffman_image[i] = group;
-      if (group >= num_htree_groups) {
-        num_htree_groups = group + 1;
+      if (group >= num_htree_groups_max) {
+        num_htree_groups_max = group + 1;
       }
     }
+    // Check the validity of num_htree_groups_max. If it seems too big, use a
+    // smaller value for later. This will prevent big memory allocations to end
+    // up with a bad bitstream anyway.
+    // The value of 1000 is totally arbitrary. We know that num_htree_groups_max
+    // is smaller than (1 << 16) and should be smaller than the number of pixels
+    // (though the format allows it to be bigger).
+    if (num_htree_groups_max > 1000 || num_htree_groups_max > xsize * ysize) {
+      // Create a mapping from the used indices to the minimal set of used
+      // values [0, num_htree_groups)
+      mapping = (int*)WebPSafeMalloc(num_htree_groups_max, sizeof(*mapping));
+      if (mapping == NULL) {
+        dec->status_ = VP8_STATUS_OUT_OF_MEMORY;
+        goto Error;
+      }
+      // -1 means a value is unmapped, and therefore unused in the Huffman
+      // image.
+      memset(mapping, 0xff, num_htree_groups_max * sizeof(*mapping));
+      for (num_htree_groups = 0, i = 0; i < huffman_pixs; ++i) {
+        // Get the current mapping for the group and remap the Huffman image.
+        int* const mapped_group = &mapping[huffman_image[i]];
+        if (*mapped_group == -1) *mapped_group = num_htree_groups++;
+        huffman_image[i] = *mapped_group;
+      }
+    } else {
+      num_htree_groups = num_htree_groups_max;
+    }
   }
 
   if (br->eos_) goto Error;
@@ -413,72 +442,89 @@
     goto Error;
   }
 
-  next = huffman_tables;
-  for (i = 0; i < num_htree_groups; ++i) {
-    HTreeGroup* const htree_group = &htree_groups[i];
-    HuffmanCode** const htrees = htree_group->htrees;
-    int size;
-    int total_size = 0;
-    int is_trivial_literal = 1;
-    int max_bits = 0;
-    for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
-      int alphabet_size = kAlphabetSize[j];
-      htrees[j] = next;
-      if (j == 0 && color_cache_bits > 0) {
-        alphabet_size += 1 << color_cache_bits;
-      }
-      size = ReadHuffmanCode(alphabet_size, dec, code_lengths, next);
-      if (size == 0) {
-        goto Error;
-      }
-      if (is_trivial_literal && kLiteralMap[j] == 1) {
-        is_trivial_literal = (next->bits == 0);
-      }
-      total_size += next->bits;
-      next += size;
-      if (j <= ALPHA) {
-        int local_max_bits = code_lengths[0];
-        int k;
-        for (k = 1; k < alphabet_size; ++k) {
-          if (code_lengths[k] > local_max_bits) {
-            local_max_bits = code_lengths[k];
-          }
+  huffman_table = huffman_tables;
+  for (i = 0; i < num_htree_groups_max; ++i) {
+    // If the index "i" is unused in the Huffman image, just make sure the
+    // coefficients are valid but do not store them.
+    if (mapping != NULL && mapping[i] == -1) {
+      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+        int alphabet_size = kAlphabetSize[j];
+        if (j == 0 && color_cache_bits > 0) {
+          alphabet_size += (1 << color_cache_bits);
         }
-        max_bits += local_max_bits;
+        // Passing in NULL so that nothing gets filled.
+        if (!ReadHuffmanCode(alphabet_size, dec, code_lengths, NULL)) {
+          goto Error;
+        }
       }
-    }
-    htree_group->is_trivial_literal = is_trivial_literal;
-    htree_group->is_trivial_code = 0;
-    if (is_trivial_literal) {
-      const int red = htrees[RED][0].value;
-      const int blue = htrees[BLUE][0].value;
-      const int alpha = htrees[ALPHA][0].value;
-      htree_group->literal_arb =
-          ((uint32_t)alpha << 24) | (red << 16) | blue;
-      if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
-        htree_group->is_trivial_code = 1;
-        htree_group->literal_arb |= htrees[GREEN][0].value << 8;
+    } else {
+      HTreeGroup* const htree_group =
+          &htree_groups[(mapping == NULL) ? i : mapping[i]];
+      HuffmanCode** const htrees = htree_group->htrees;
+      int size;
+      int total_size = 0;
+      int is_trivial_literal = 1;
+      int max_bits = 0;
+      for (j = 0; j < HUFFMAN_CODES_PER_META_CODE; ++j) {
+        int alphabet_size = kAlphabetSize[j];
+        htrees[j] = huffman_table;
+        if (j == 0 && color_cache_bits > 0) {
+          alphabet_size += (1 << color_cache_bits);
+        }
+        size = ReadHuffmanCode(alphabet_size, dec, code_lengths, huffman_table);
+        if (size == 0) {
+          goto Error;
+        }
+        if (is_trivial_literal && kLiteralMap[j] == 1) {
+          is_trivial_literal = (huffman_table->bits == 0);
+        }
+        total_size += huffman_table->bits;
+        huffman_table += size;
+        if (j <= ALPHA) {
+          int local_max_bits = code_lengths[0];
+          int k;
+          for (k = 1; k < alphabet_size; ++k) {
+            if (code_lengths[k] > local_max_bits) {
+              local_max_bits = code_lengths[k];
+            }
+          }
+          max_bits += local_max_bits;
+        }
       }
+      htree_group->is_trivial_literal = is_trivial_literal;
+      htree_group->is_trivial_code = 0;
+      if (is_trivial_literal) {
+        const int red = htrees[RED][0].value;
+        const int blue = htrees[BLUE][0].value;
+        const int alpha = htrees[ALPHA][0].value;
+        htree_group->literal_arb = ((uint32_t)alpha << 24) | (red << 16) | blue;
+        if (total_size == 0 && htrees[GREEN][0].value < NUM_LITERAL_CODES) {
+          htree_group->is_trivial_code = 1;
+          htree_group->literal_arb |= htrees[GREEN][0].value << 8;
+        }
+      }
+      htree_group->use_packed_table =
+          !htree_group->is_trivial_code && (max_bits < HUFFMAN_PACKED_BITS);
+      if (htree_group->use_packed_table) BuildPackedTable(htree_group);
     }
-    htree_group->use_packed_table = !htree_group->is_trivial_code &&
-                                    (max_bits < HUFFMAN_PACKED_BITS);
-    if (htree_group->use_packed_table) BuildPackedTable(htree_group);
   }
-  WebPSafeFree(code_lengths);
+  ok = 1;
 
-  // All OK. Finalize pointers and return.
+  // All OK. Finalize pointers.
   hdr->huffman_image_ = huffman_image;
   hdr->num_htree_groups_ = num_htree_groups;
   hdr->htree_groups_ = htree_groups;
   hdr->huffman_tables_ = huffman_tables;
-  return 1;
 
  Error:
   WebPSafeFree(code_lengths);
-  WebPSafeFree(huffman_image);
-  WebPSafeFree(huffman_tables);
-  VP8LHtreeGroupsFree(htree_groups);
-  return 0;
+  WebPSafeFree(mapping);
+  if (!ok) {
+    WebPSafeFree(huffman_image);
+    WebPSafeFree(huffman_tables);
+    VP8LHtreeGroupsFree(htree_groups);
+  }
+  return ok;
 }
 
 //------------------------------------------------------------------------------