Retain image sizes

Small images lost their dimension attributes and got bigger than they
were before the distillation.

Our solution is to retain width and height attributes from the original
image.

Contributors: marcelorcorrea@hp.com, dalmirsilva@hp.com

BUG=591143
R=mdjones@chromium.org, wychen@chromium.org

Review URL: https://codereview.chromium.org/1754213004 .

Patch from dalmirsilva <dalmirsilva@hp.com>.
diff --git a/java/org/chromium/distiller/DomUtil.java b/java/org/chromium/distiller/DomUtil.java
index 27690c4..0c0b8f0 100644
--- a/java/org/chromium/distiller/DomUtil.java
+++ b/java/org/chromium/distiller/DomUtil.java
@@ -321,6 +321,8 @@
                 !"alt".equals(name) &&
                 !"srcset".equals(name) &&
                 !"dir".equals(name) &&
+                !"width".equals(name) &&
+                !"height".equals(name) &&
                 !"title".equals(name)) {
                 imgElement.removeAttribute(name);
             } else {
diff --git a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
index a450ea9..c9b527a 100644
--- a/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
+++ b/java/org/chromium/distiller/extractors/embeds/ImageExtractor.java
@@ -33,11 +33,21 @@
             return null;
         }
         String imgSrc = "";
+        // Getting OffSetWidth/Height as default values, even they are
+        // affected by padding, border, etc.
+        int width = e.getOffsetWidth();
+        int height = e.getOffsetHeight();
         if ("IMG".equals(e.getTagName())) {
-            // This will get the absolute URL of the image.
-            imgSrc = ImageElement.as(e).getSrc();
+            // This will get the absolute URL of the image and
+            // the displayed image dimension.
+            ImageElement imageElement = ImageElement.as(e);
+            imgSrc = imageElement.getSrc();
+            // As an ImageElement is manipulated here, it is possible
+            // to get the real dimensions.
+            width = imageElement.getWidth();
+            height = imageElement.getHeight();
         }
 
-        return new WebImage(e, e.getOffsetWidth(), e.getOffsetHeight(), imgSrc);
+        return new WebImage(e, width, height, imgSrc);
     }
 }
diff --git a/java/org/chromium/distiller/webdocument/WebImage.java b/java/org/chromium/distiller/webdocument/WebImage.java
index cd68677..0c14d46 100644
--- a/java/org/chromium/distiller/webdocument/WebImage.java
+++ b/java/org/chromium/distiller/webdocument/WebImage.java
@@ -46,6 +46,12 @@
 
         ImageElement ie = ImageElement.as(Element.as(imgElement.cloneNode(false)));
         ie.setSrc(ie.getSrc());
+        // If computed width or height is zero, do not override them
+        // to keep them visible.
+        if (width > 0 && height > 0) {
+            ie.setWidth(width);
+            ie.setHeight(height);
+        }
         DomUtil.makeSrcSetAbsolute(ie);
         DomUtil.stripImageElement(ie);
 
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index 9429fc1..d54ecf6 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -203,6 +203,42 @@
                 TestUtil.removeAllDirAttributes(extractedContent));
     }
 
+    public void testKeepingWidthAndHeightAttributes() {
+        String html =
+            "<h1>" +
+                CONTENT_TEXT +
+            "</h1>" +
+            "<p>" +
+                CONTENT_TEXT +
+            "</p>" +
+            "<img style=\"align: left\" src=\"/test.png\" " +
+                    "width=\"200\" height=\"300\">" +
+            "<img style=\"align: left\" src=\"/test.png\" " +
+                    "width=\"200\">" +
+            "<img style=\"align: left\" src=\"/test.png\">";
+
+        final String expected =
+            "<h1>" +
+                CONTENT_TEXT +
+            "</h1>" +
+            "<p>" +
+                CONTENT_TEXT +
+            "</p>" +
+            "<img src=\"http://example.com/test.png\" " +
+                    "width=\"200\" height=\"300\">" +
+            "<img src=\"http://example.com/test.png\" " +
+                    "width=\"200\">" +
+            "<img src=\"http://example.com/test.png\">";
+
+        mHead.setInnerHTML("<base href=\"http://example.com/\">");
+        mBody.setInnerHTML(html);
+
+        ContentExtractor extractor = new ContentExtractor(mRoot);
+        String extractedContent = extractor.extractContent();
+        assertEquals(expected,
+                TestUtil.removeAllDirAttributes(extractedContent));
+    }
+
     public void testPreserveOrderedList() {
         Element outerListTag = Document.get().createElement("OL");
         mBody.appendChild(outerListTag);
diff --git a/javatests/org/chromium/distiller/EmbedExtractorTest.java b/javatests/org/chromium/distiller/EmbedExtractorTest.java
index b68f52e..00e7267 100644
--- a/javatests/org/chromium/distiller/EmbedExtractorTest.java
+++ b/javatests/org/chromium/distiller/EmbedExtractorTest.java
@@ -4,20 +4,31 @@
 
 package org.chromium.distiller;
 
+import com.google.gwt.dom.client.Style;
 import org.chromium.distiller.webdocument.WebEmbed;
+import org.chromium.distiller.webdocument.WebImage;
 import org.chromium.distiller.extractors.embeds.EmbedExtractor;
 import org.chromium.distiller.extractors.embeds.TwitterExtractor;
 import org.chromium.distiller.extractors.embeds.VimeoExtractor;
 import org.chromium.distiller.extractors.embeds.YouTubeExtractor;
+import org.chromium.distiller.extractors.embeds.ImageExtractor;
 
 import com.google.gwt.dom.client.Document;
 import com.google.gwt.dom.client.Element;
 import com.google.gwt.dom.client.IFrameElement;
+import com.google.gwt.dom.client.ImageElement;
 
 import java.util.List;
 
 public class EmbedExtractorTest extends DomDistillerJsTestCase {
 
+    /**
+     * 5x5 red dot image
+     */
+    final String IMAGE_BASE64 = "" +
+            "AAAANSUhEUgAAAAUAAAAFCAYAAACNbyblAAAAHElEQVQI12P4//8/" +
+            "w38GIAXDIBKE0DHxgljNBAAO9TXL0Y4OHwAAAABJRU5ErkJggg==";
+
     public void testYouTubeExtractor() {
         Element youtube = TestUtil.createIframe();
         youtube.setAttribute("src", "http://www.youtube.com/embed/M7lc1UVf-VE?autoplay=1");
@@ -200,4 +211,127 @@
         result = (WebEmbed) extractor.extract(notTwitter);
         assertNull(result);
     }
+
+    public void testImageExtractorWithWidthHeightAttributes() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.setAttribute("width", "32");
+        image.setAttribute("height", "32");
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(32, result.getWidth());
+        assertEquals(32, result.getHeight());
+    }
+
+    public void testImageExtractorWithNoAttributes() {
+        ImageElement image = TestUtil.createImage();
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(0, result.getWidth());
+        assertEquals(0, result.getHeight());
+    }
+
+    public void testImageExtractorWithSettingDimension() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(5, result.getWidth());
+        assertEquals(5, result.getHeight());
+    }
+
+    public void testImageExtractorWithOneAttribute() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.setWidth(32);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(32, result.getWidth());
+        assertEquals(32, result.getHeight());
+    }
+
+    public void testImageExtractorWithHeightCSS() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.getStyle().setHeight(100, Style.Unit.PX);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(100, result.getWidth());
+        assertEquals(100, result.getHeight());
+    }
+
+    public void testImageExtractorWithWidthHeightCSSPX() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.getStyle().setHeight(100, Style.Unit.PX);
+        image.getStyle().setWidth(48, Style.Unit.PX);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(48, result.getWidth());
+        assertEquals(100, result.getHeight());
+    }
+
+    public void testImageExtractorWithWidthAttributeHeightCSS() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.getStyle().setHeight(100, Style.Unit.PX);
+        image.setAttribute("width", "144");
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(144, result.getWidth());
+        assertEquals(100, result.getHeight());
+    }
+
+    public void testImageExtractorWithAttributesCSS() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.setAttribute("width", "32");
+        image.setAttribute("height", "32");
+        image.getStyle().setHeight(48, Style.Unit.PX);
+        image.getStyle().setWidth(48, Style.Unit.PX);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(48, result.getWidth());
+        assertEquals(48, result.getHeight());
+    }
+
+    public void testImageExtractorWithAttributesCSSHeightCMAndWidthAttrb() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.getStyle().setHeight(1, Style.Unit.CM);
+        image.setWidth(50);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(38, result.getHeight());
+        assertEquals(50, result.getWidth());
+    }
+
+    public void testImageExtractorWithAttributesCSSHeightCM() {
+        ImageElement image = TestUtil.createImage();
+        image.setSrc(IMAGE_BASE64);
+        image.getStyle().setHeight(1, Style.Unit.CM);
+        mBody.appendChild(image);
+        EmbedExtractor extractor = new ImageExtractor();
+        WebImage result = (WebImage) extractor.extract(image);
+        assertNotNull(result);
+        assertEquals(38, result.getHeight());
+        assertEquals(38, result.getWidth());
+    }
 }