Support extraction of lazily-loaded images
BUG=481111
R=mdjones@chromium.org
Review URL: https://codereview.chromium.org/2000093005 .
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index c9b527a..5b4eb00 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -6,6 +6,7 @@
import com.google.gwt.dom.client.Element;
import com.google.gwt.dom.client.ImageElement;
+import org.chromium.distiller.LogUtil;
import org.chromium.distiller.webdocument.WebImage;
import java.util.HashSet;
@@ -21,6 +22,8 @@
// TODO(mdjones): Add "DIV" to this list for css images and possibly captions.
relevantTags.add("IMG");
}
+ private static final String[] LAZY_IMAGE_ATTRIBUTES =
+ {"data-src", "data-original", "datasrc", "data-url"};
@Override
public Set<String> getRelevantTagNames() {
@@ -41,13 +44,29 @@
// This will get the absolute URL of the image and
// the displayed image dimension.
ImageElement imageElement = ImageElement.as(e);
- imgSrc = imageElement.getSrc();
- // As an ImageElement is manipulated here, it is possible
- // to get the real dimensions.
- width = imageElement.getWidth();
- height = imageElement.getHeight();
+ // Try to get lazily-loaded images before falling back to get the src attribute.
+ for(String attr: LAZY_IMAGE_ATTRIBUTES) {
+ imgSrc = imageElement.getAttribute(attr);
+ if (!imgSrc.isEmpty())
+ break;
+ }
+ if (!imgSrc.isEmpty()) {
+ // We cannot trust the dimension if the image is not loaded yet.
+ // In some cases there are 1x1 placeholder images.
+ width = 0;
+ height = 0;
+ } else {
+ imgSrc = imageElement.getSrc();
+ // As an ImageElement is manipulated here, it is possible
+ // to get the real dimensions.
+ width = imageElement.getWidth();
+ height = imageElement.getHeight();
+ }
}
+ if (LogUtil.isLoggable(LogUtil.DEBUG_LEVEL_VISIBILITY_INFO)) {
+ LogUtil.logToConsole("Extracted WebImage: " + imgSrc);
+ }
return new WebImage(e, width, height, imgSrc);
}
}
diff --git a/java/org/chromium/distiller/webdocument/WebImage.java b/java/org/chromium/distiller/webdocument/WebImage.java
index 0c14d46..ceb3a40 100644
--- a/java/org/chromium/distiller/webdocument/WebImage.java
+++ b/java/org/chromium/distiller/webdocument/WebImage.java
@@ -45,6 +45,7 @@
if (textOnly) return "";
ImageElement ie = ImageElement.as(Element.as(imgElement.cloneNode(false)));
+ ie.setSrc(srcUrl);
ie.setSrc(ie.getSrc());
// If computed width or height is zero, do not override them
// to keep them visible.
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index ddc97e3..c1457c6 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -94,7 +94,7 @@
final String html =
"<h1>" + CONTENT_TEXT + "</h1>" +
"<img id=\"a\" style=\"typo\" align=\"left\" src=\"image\" srcset=\"image200 200w, //example.org/image400 400w\">" +
- "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" src=\"image2\">" +
+ "<img id=\"b\" style=\"align: left\" alt=\"b\" data-dummy=\"c\" data-src=\"image2\">" +
"<table role=\"grid\"><tbody><tr><td>" +
"<img id=\"c\" style=\"a\" alt=\"b\" src=\"/image\" srcset=\"https://example.com/image2x 2x, /image4x 4x,\">" +
"<img id=\"d\" style=\"a\" align=\"left\" src=\"/image2\">" +
@@ -157,7 +157,7 @@
"<p style=\"\">" +
CONTENT_TEXT +
"</p>" +
- "<img style=\"align: left\" src=\"/test.png\">" +
+ "<img style=\"align: left\" data-src=\"/test.png\">" +
"<table style=\"position: absolute\">" +
"<tbody style=\"font-size: 2\">" +
"<tr style=\"z-index: 0\">" +
diff --git a/javatests/org/chromium/distiller/EmbedExtractorTest.java b/javatests/org/chromium/distiller/EmbedExtractorTest.java
index 5c23f0c..2b7a2f8 100644
--- a/javatests/org/chromium/distiller/EmbedExtractorTest.java
+++ b/javatests/org/chromium/distiller/EmbedExtractorTest.java
@@ -380,4 +380,24 @@
assertEquals(38, result.getHeight());
assertEquals(38, result.getWidth());
}
+
+ private void extractLazilyLoadedImage(String attr) {
+ ImageElement image = TestUtil.createImage();
+ image.setAttribute(attr, "image.png");
+ mBody.appendChild(image);
+
+ mHead.setInnerHTML("<base href=\"http://example.com/\">");
+
+ EmbedExtractor extractor = new ImageExtractor();
+ WebImage result = (WebImage) extractor.extract(image);
+ assertNotNull(result);
+ assertEquals("<img src=\"http://example.com/image.png\">", result.generateOutput(false));
+ }
+
+ public void testImageExtractorLazy() {
+ extractLazilyLoadedImage("data-src");
+ extractLazilyLoadedImage("datasrc");
+ extractLazilyLoadedImage("data-original");
+ extractLazilyLoadedImage("data-url");
+ }
}