Handle image lazy loading on Wikipedia

Hopefully Wikipedia will use <img> instead of <span> as placeholder.

Score changes (as of @150267738):

https://x20web.corp.google.com/~wychen/domdistillerscore/wiki-lazy/top-mobile-mhtml.html
  3 image entries changed (all wikipedia pages):
   Precision remains 1.0
   Recall 0.077 → 0.538
   F1     0.143 → 0.700

Performance changes:
  distillable-desktop-mhtml is 3.5% slower.

BUG=647667
R=mdjones@chromium.org

Review-Url: https://codereview.chromium.org/2729143003 .
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index b1d6e5b..a615b73 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -9,6 +9,7 @@
 import com.google.gwt.dom.client.ImageElement;
 import com.google.gwt.dom.client.NodeList;
 import org.chromium.distiller.DomUtil;
+import org.chromium.distiller.JavaScript;
 import org.chromium.distiller.LogUtil;
 import org.chromium.distiller.webdocument.WebFigure;
 import org.chromium.distiller.webdocument.WebImage;
@@ -31,6 +32,7 @@
         relevantTags.add("IMG");
         relevantTags.add("PICTURE");
         relevantTags.add("FIGURE");
+        relevantTags.add("SPAN");
     }
 
     private static final String[] LAZY_IMAGE_ATTRIBUTES =
@@ -75,6 +77,19 @@
             return new WebFigure(img, width, height, imgSrc, figcaption);
         }
 
+        if ("SPAN".equals(e.getTagName())) {
+            if (!e.getAttribute("class").contains("lazy-image-placeholder")) {
+                return null;
+            }
+            // Image lazy loading on Wikipedia.
+            ie = Document.get().createImageElement();
+            imgSrc = e.getAttribute("data-src");
+            width = JavaScript.parseInt(e.getAttribute("data-width"));
+            height = JavaScript.parseInt(e.getAttribute("data-height"));
+            ie.setAttribute("srcset", e.getAttribute("data-srcset"));
+            return new WebImage(ie, width, height, imgSrc);
+        }
+
         extractImageAttributes(ie);
         return new WebImage(e, width, height, imgSrc);
     }
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index c72e56c..654f6cb 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -99,6 +99,8 @@
                 "<source srcset=\"image100 100w, //example.org/image300 300w\">" +
                 "<img>" +
             "</picture></figure>" +
+            "<span class=\"lazy-image-placeholder\" data-src=\"/image\" " +
+                "data-srcset=\"/image2x 2x\" data-width=\"20\" data-height=\"10\"></span>" +
             "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" data-src=\"image2\">" +
             "<table role=\"grid\"><tbody><tr><td>" +
                 "<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"https://example.com/image2x 2x, /image4x 4x,\">" +
@@ -115,6 +117,8 @@
                 "<source srcset=\"http://example.com/path/image100 100w, http://example.org/image300 300w\">" +
                 "<img>" +
             "</picture></figure>" +
+            "<img srcset=\"http://example.com/image2x 2x\" src=\"http://example.com/image\" " +
+                "width=\"20\" height=\"10\">" +
             "<img alt=\"b\" src=\"http://example.com/path/image2\">" +
             "<table role=\"grid\"><tbody><tr><td>" +
                 "<img alt=\"b\" src=\"http://example.com/image\" " +