| // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "pdf/pdfium/pdfium_page.h" |
| |
| #include <math.h> |
| #include <stddef.h> |
| |
| #include <algorithm> |
| #include <memory> |
| #include <utility> |
| |
| #include "base/logging.h" |
| #include "base/numerics/safe_math.h" |
| #include "base/strings/string_number_conversions.h" |
| #include "base/strings/string_util.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "pdf/pdfium/pdfium_api_string_buffer_adapter.h" |
| #include "pdf/pdfium/pdfium_engine.h" |
| #include "printing/units.h" |
| |
| using printing::ConvertUnitDouble; |
| using printing::kPointsPerInch; |
| using printing::kPixelsPerInch; |
| |
| namespace { |
| |
| pp::FloatRect FloatPageRectToPixelRect(FPDF_PAGE page, |
| const pp::FloatRect& input) { |
| int output_width = FPDF_GetPageWidth(page); |
| int output_height = FPDF_GetPageHeight(page); |
| |
| int min_x; |
| int min_y; |
| int max_x; |
| int max_y; |
| FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.x(), |
| input.y(), &min_x, &min_y); |
| FPDF_PageToDevice(page, 0, 0, output_width, output_height, 0, input.right(), |
| input.bottom(), &max_x, &max_y); |
| |
| if (max_x < min_x) |
| std::swap(min_x, max_x); |
| if (max_y < min_y) |
| std::swap(min_y, max_y); |
| |
| pp::FloatRect output_rect( |
| ConvertUnitDouble(min_x, kPointsPerInch, kPixelsPerInch), |
| ConvertUnitDouble(min_y, kPointsPerInch, kPixelsPerInch), |
| ConvertUnitDouble(max_x - min_x, kPointsPerInch, kPixelsPerInch), |
| ConvertUnitDouble(max_y - min_y, kPointsPerInch, kPixelsPerInch)); |
| return output_rect; |
| } |
| |
| pp::FloatRect GetFloatCharRectInPixels(FPDF_PAGE page, |
| FPDF_TEXTPAGE text_page, |
| int index) { |
| double left, right, bottom, top; |
| FPDFText_GetCharBox(text_page, index, &left, &right, &bottom, &top); |
| if (right < left) |
| std::swap(left, right); |
| if (bottom < top) |
| std::swap(top, bottom); |
| pp::FloatRect page_coords(left, top, right - left, bottom - top); |
| return FloatPageRectToPixelRect(page, page_coords); |
| } |
| |
| bool OverlapsOnYAxis(const pp::FloatRect& a, const pp::FloatRect& b) { |
| return !(a.IsEmpty() || b.IsEmpty() || a.bottom() < b.y() || |
| b.bottom() < a.y()); |
| } |
| |
| } // namespace |
| |
| namespace chrome_pdf { |
| |
| PDFiumPage::LinkTarget::LinkTarget() : page(-1) {} |
| |
| PDFiumPage::LinkTarget::LinkTarget(const LinkTarget& other) |
| : url(other.url), page(other.page), y_in_pixels(other.y_in_pixels) {} |
| |
| PDFiumPage::LinkTarget::~LinkTarget() {} |
| |
| PDFiumPage::PDFiumPage(PDFiumEngine* engine, |
| int i, |
| const pp::Rect& r, |
| bool available) |
| : engine_(engine), |
| page_(nullptr), |
| text_page_(nullptr), |
| index_(i), |
| loading_count_(0), |
| rect_(r), |
| calculated_links_(false), |
| available_(available) {} |
| |
| PDFiumPage::PDFiumPage(const PDFiumPage& that) = default; |
| |
| PDFiumPage::~PDFiumPage() { |
| DCHECK_EQ(0, loading_count_); |
| } |
| |
| void PDFiumPage::Unload() { |
| // Do not unload while in the middle of a load. |
| if (loading_count_) |
| return; |
| |
| if (text_page_) { |
| FPDFText_ClosePage(text_page_); |
| text_page_ = nullptr; |
| } |
| |
| if (page_) { |
| if (engine_->form()) { |
| FORM_OnBeforeClosePage(page_, engine_->form()); |
| } |
| FPDF_ClosePage(page_); |
| page_ = nullptr; |
| } |
| } |
| |
| FPDF_PAGE PDFiumPage::GetPage() { |
| ScopedUnsupportedFeature scoped_unsupported_feature(engine_); |
| ScopedSubstFont scoped_subst_font(engine_); |
| if (!available_) |
| return nullptr; |
| if (!page_) { |
| ScopedLoadCounter scoped_load(this); |
| page_ = FPDF_LoadPage(engine_->doc(), index_); |
| if (page_ && engine_->form()) { |
| FORM_OnAfterLoadPage(page_, engine_->form()); |
| } |
| } |
| return page_; |
| } |
| |
| FPDF_PAGE PDFiumPage::GetPrintPage() { |
| ScopedUnsupportedFeature scoped_unsupported_feature(engine_); |
| ScopedSubstFont scoped_subst_font(engine_); |
| if (!available_) |
| return nullptr; |
| if (!page_) { |
| ScopedLoadCounter scoped_load(this); |
| page_ = FPDF_LoadPage(engine_->doc(), index_); |
| } |
| return page_; |
| } |
| |
| void PDFiumPage::ClosePrintPage() { |
| // Do not close |page_| while in the middle of a load. |
| if (loading_count_) |
| return; |
| |
| if (page_) { |
| FPDF_ClosePage(page_); |
| page_ = nullptr; |
| } |
| } |
| |
| FPDF_TEXTPAGE PDFiumPage::GetTextPage() { |
| if (!available_) |
| return nullptr; |
| if (!text_page_) { |
| ScopedLoadCounter scoped_load(this); |
| text_page_ = FPDFText_LoadPage(GetPage()); |
| } |
| return text_page_; |
| } |
| |
| void PDFiumPage::GetTextRunInfo(int start_char_index, |
| uint32_t* out_len, |
| double* out_font_size, |
| pp::FloatRect* out_bounds) { |
| FPDF_PAGE page = GetPage(); |
| FPDF_TEXTPAGE text_page = GetTextPage(); |
| int chars_count = FPDFText_CountChars(text_page); |
| int char_index = start_char_index; |
| while ( |
| char_index < chars_count && |
| base::IsUnicodeWhitespace(FPDFText_GetUnicode(text_page, char_index))) { |
| char_index++; |
| } |
| int text_run_font_size = FPDFText_GetFontSize(text_page, char_index); |
| pp::FloatRect text_run_bounds = |
| GetFloatCharRectInPixels(page, text_page, char_index); |
| if (char_index < chars_count) |
| char_index++; |
| while (char_index < chars_count) { |
| unsigned int character = FPDFText_GetUnicode(text_page, char_index); |
| |
| if (!base::IsUnicodeWhitespace(character)) { |
| // TODO(dmazzoni): this assumes horizontal text. |
| // https://crbug.com/580311 |
| pp::FloatRect char_rect = |
| GetFloatCharRectInPixels(page, text_page, char_index); |
| if (!char_rect.IsEmpty() && !OverlapsOnYAxis(text_run_bounds, char_rect)) |
| break; |
| |
| int font_size = FPDFText_GetFontSize(text_page, char_index); |
| if (font_size != text_run_font_size) |
| break; |
| |
| // Heuristic: split a text run after a space longer than 3 average |
| // characters. |
| double avg_char_width = |
| text_run_bounds.width() / (char_index - start_char_index); |
| if (char_rect.x() - text_run_bounds.right() > avg_char_width * 3) |
| break; |
| |
| text_run_bounds = text_run_bounds.Union(char_rect); |
| } |
| |
| char_index++; |
| } |
| |
| // Some PDFs have missing or obviously bogus font sizes; substitute the |
| // height of the bounding box in those cases. |
| if (text_run_font_size <= 1 || |
| text_run_font_size < text_run_bounds.height() / 2 || |
| text_run_font_size > text_run_bounds.height() * 2) { |
| text_run_font_size = text_run_bounds.height(); |
| } |
| |
| *out_len = char_index - start_char_index; |
| *out_font_size = text_run_font_size; |
| *out_bounds = text_run_bounds; |
| } |
| |
| uint32_t PDFiumPage::GetCharUnicode(int char_index) { |
| FPDF_TEXTPAGE text_page = GetTextPage(); |
| return FPDFText_GetUnicode(text_page, char_index); |
| } |
| |
| pp::FloatRect PDFiumPage::GetCharBounds(int char_index) { |
| FPDF_PAGE page = GetPage(); |
| FPDF_TEXTPAGE text_page = GetTextPage(); |
| return GetFloatCharRectInPixels(page, text_page, char_index); |
| } |
| |
| PDFiumPage::Area PDFiumPage::GetCharIndex(const pp::Point& point, |
| int rotation, |
| int* char_index, |
| int* form_type, |
| LinkTarget* target) { |
| if (!available_) |
| return NONSELECTABLE_AREA; |
| pp::Point point2 = point - rect_.point(); |
| double new_x; |
| double new_y; |
| FPDF_DeviceToPage(GetPage(), 0, 0, rect_.width(), rect_.height(), rotation, |
| point2.x(), point2.y(), &new_x, &new_y); |
| |
| // hit detection tolerance, in points. |
| constexpr double kTolerance = 20.0; |
| int rv = FPDFText_GetCharIndexAtPos(GetTextPage(), new_x, new_y, kTolerance, |
| kTolerance); |
| *char_index = rv; |
| |
| FPDF_LINK link = FPDFLink_GetLinkAtPoint(GetPage(), new_x, new_y); |
| int control = |
| FPDFPage_HasFormFieldAtPoint(engine_->form(), GetPage(), new_x, new_y); |
| |
| // If there is a control and link at the same point, figure out their z-order |
| // to determine which is on top. |
| if (link && control > FPDF_FORMFIELD_UNKNOWN) { |
| int control_z_order = FPDFPage_FormFieldZOrderAtPoint( |
| engine_->form(), GetPage(), new_x, new_y); |
| int link_z_order = FPDFLink_GetLinkZOrderAtPoint(GetPage(), new_x, new_y); |
| DCHECK_NE(control_z_order, link_z_order); |
| if (control_z_order > link_z_order) { |
| *form_type = control; |
| return FormTypeToArea(*form_type); |
| } |
| |
| // We don't handle all possible link types of the PDF. For example, |
| // launch actions, cross-document links, etc. |
| // In that case, GetLinkTarget() will return NONSELECTABLE_AREA |
| // and we should proceed with area detection. |
| Area area = GetLinkTarget(link, target); |
| if (area != NONSELECTABLE_AREA) |
| return area; |
| } else if (link) { |
| // We don't handle all possible link types of the PDF. For example, |
| // launch actions, cross-document links, etc. |
| // See identical block above. |
| Area area = GetLinkTarget(link, target); |
| if (area != NONSELECTABLE_AREA) |
| return area; |
| } else if (control > FPDF_FORMFIELD_UNKNOWN) { |
| *form_type = control; |
| return FormTypeToArea(*form_type); |
| } |
| |
| if (rv < 0) |
| return NONSELECTABLE_AREA; |
| |
| return GetLink(*char_index, target) != -1 ? WEBLINK_AREA : TEXT_AREA; |
| } |
| |
| // static |
| PDFiumPage::Area PDFiumPage::FormTypeToArea(int form_type) { |
| switch (form_type) { |
| case FPDF_FORMFIELD_COMBOBOX: |
| case FPDF_FORMFIELD_TEXTFIELD: |
| #if defined(PDF_ENABLE_XFA) |
| // TODO(bug_353450): figure out selection and copying for XFA fields. |
| case FPDF_FORMFIELD_XFA: |
| #endif |
| return FORM_TEXT_AREA; |
| default: |
| return NONSELECTABLE_AREA; |
| } |
| } |
| |
| base::char16 PDFiumPage::GetCharAtIndex(int index) { |
| if (!available_) |
| return L'\0'; |
| return static_cast<base::char16>(FPDFText_GetUnicode(GetTextPage(), index)); |
| } |
| |
| int PDFiumPage::GetCharCount() { |
| if (!available_) |
| return 0; |
| return FPDFText_CountChars(GetTextPage()); |
| } |
| |
| PDFiumPage::Area PDFiumPage::GetLinkTarget(FPDF_LINK link, LinkTarget* target) { |
| FPDF_DEST dest = FPDFLink_GetDest(engine_->doc(), link); |
| if (dest) |
| return GetDestinationTarget(dest, target); |
| |
| FPDF_ACTION action = FPDFLink_GetAction(link); |
| if (!action) |
| return NONSELECTABLE_AREA; |
| |
| switch (FPDFAction_GetType(action)) { |
| case PDFACTION_GOTO: { |
| FPDF_DEST dest = FPDFAction_GetDest(engine_->doc(), action); |
| if (dest) |
| return GetDestinationTarget(dest, target); |
| // TODO(thestig): We don't fully support all types of the in-document |
| // links. Need to implement that. There is a bug to track that: |
| // https://crbug.com/55776 |
| return NONSELECTABLE_AREA; |
| } |
| case PDFACTION_URI: |
| return GetURITarget(action, target); |
| // TODO(thestig): Support remaining types for https://crbug.com/55776 |
| case PDFACTION_LAUNCH: |
| case PDFACTION_REMOTEGOTO: |
| default: |
| return NONSELECTABLE_AREA; |
| } |
| } |
| |
| PDFiumPage::Area PDFiumPage::GetDestinationTarget(FPDF_DEST destination, |
| LinkTarget* target) { |
| if (!target) |
| return DOCLINK_AREA; |
| |
| target->page = FPDFDest_GetPageIndex(engine_->doc(), destination); |
| GetPageYTarget(destination, target); |
| |
| return DOCLINK_AREA; |
| } |
| |
| void PDFiumPage::GetPageYTarget(FPDF_DEST destination, LinkTarget* target) { |
| if (!available_) { |
| target->y_in_pixels.reset(); |
| return; |
| } |
| |
| FPDF_BOOL has_x_coord; |
| FPDF_BOOL has_y_coord; |
| FPDF_BOOL has_zoom; |
| FS_FLOAT x; |
| FS_FLOAT y; |
| FS_FLOAT zoom; |
| FPDF_BOOL success = FPDFDest_GetLocationInPage( |
| destination, &has_x_coord, &has_y_coord, &has_zoom, &x, &y, &zoom); |
| |
| if (!success || !has_x_coord || !has_y_coord) { |
| target->y_in_pixels.reset(); |
| return; |
| } |
| |
| pp::FloatRect page_rect(x, y, 0, 0); |
| pp::FloatRect pixel_rect(FloatPageRectToPixelRect(GetPage(), page_rect)); |
| target->y_in_pixels = pixel_rect.y(); |
| } |
| |
| PDFiumPage::Area PDFiumPage::GetURITarget(FPDF_ACTION uri_action, |
| LinkTarget* target) const { |
| if (target) { |
| size_t buffer_size = |
| FPDFAction_GetURIPath(engine_->doc(), uri_action, nullptr, 0); |
| if (buffer_size > 0) { |
| PDFiumAPIStringBufferAdapter<std::string> api_string_adapter( |
| &target->url, buffer_size, true); |
| void* data = api_string_adapter.GetData(); |
| size_t bytes_written = |
| FPDFAction_GetURIPath(engine_->doc(), uri_action, data, buffer_size); |
| api_string_adapter.Close(bytes_written); |
| } |
| } |
| return WEBLINK_AREA; |
| } |
| |
| int PDFiumPage::GetLink(int char_index, LinkTarget* target) { |
| if (!available_) |
| return -1; |
| |
| CalculateLinks(); |
| |
| // Get the bounding box of the rect again, since it might have moved because |
| // of the tolerance above. |
| double left, right, bottom, top; |
| FPDFText_GetCharBox(GetTextPage(), char_index, &left, &right, &bottom, &top); |
| |
| pp::Point origin( |
| PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0).point()); |
| for (size_t i = 0; i < links_.size(); ++i) { |
| for (const auto& rect : links_[i].rects) { |
| if (rect.Contains(origin)) { |
| if (target) |
| target->url = links_[i].url; |
| return i; |
| } |
| } |
| } |
| return -1; |
| } |
| |
| std::vector<int> PDFiumPage::GetLinks(pp::Rect text_area, |
| std::vector<LinkTarget>* targets) { |
| std::vector<int> links; |
| if (!available_) |
| return links; |
| |
| CalculateLinks(); |
| |
| for (size_t i = 0; i < links_.size(); ++i) { |
| for (const auto& rect : links_[i].rects) { |
| if (rect.Intersects(text_area)) { |
| if (targets) { |
| LinkTarget target; |
| target.url = links_[i].url; |
| targets->push_back(target); |
| } |
| links.push_back(i); |
| } |
| } |
| } |
| return links; |
| } |
| |
| void PDFiumPage::CalculateLinks() { |
| if (calculated_links_) |
| return; |
| |
| calculated_links_ = true; |
| FPDF_PAGELINK links = FPDFLink_LoadWebLinks(GetTextPage()); |
| int count = FPDFLink_CountWebLinks(links); |
| for (int i = 0; i < count; ++i) { |
| base::string16 url; |
| int url_length = FPDFLink_GetURL(links, i, nullptr, 0); |
| if (url_length > 0) { |
| PDFiumAPIStringBufferAdapter<base::string16> api_string_adapter( |
| &url, url_length, true); |
| unsigned short* data = |
| reinterpret_cast<unsigned short*>(api_string_adapter.GetData()); |
| int actual_length = FPDFLink_GetURL(links, i, data, url_length); |
| api_string_adapter.Close(actual_length); |
| } |
| Link link; |
| link.url = base::UTF16ToUTF8(url); |
| |
| // If the link cannot be converted to a pp::Var, then it is not possible to |
| // pass it to JS. In this case, ignore the link like other PDF viewers. |
| // See http://crbug.com/312882 for an example. |
| pp::Var link_var(link.url); |
| if (!link_var.is_string()) |
| continue; |
| |
| // Make sure all the characters in the URL are valid per RFC 1738. |
| // http://crbug.com/340326 has a sample bad PDF. |
| // GURL does not work correctly, e.g. it just strips \t \r \n. |
| bool is_invalid_url = false; |
| for (size_t j = 0; j < link.url.length(); ++j) { |
| // Control characters are not allowed. |
| // 0x7F is also a control character. |
| // 0x80 and above are not in US-ASCII. |
| if (link.url[j] < ' ' || link.url[j] >= '\x7F') { |
| is_invalid_url = true; |
| break; |
| } |
| } |
| if (is_invalid_url) |
| continue; |
| |
| int rect_count = FPDFLink_CountRects(links, i); |
| for (int j = 0; j < rect_count; ++j) { |
| double left, top, right, bottom; |
| FPDFLink_GetRect(links, i, j, &left, &top, &right, &bottom); |
| link.rects.push_back( |
| PageToScreen(pp::Point(), 1.0, left, top, right, bottom, 0)); |
| } |
| links_.push_back(link); |
| } |
| FPDFLink_CloseWebLinks(links); |
| } |
| |
| pp::Rect PDFiumPage::PageToScreen(const pp::Point& offset, |
| double zoom, |
| double left, |
| double top, |
| double right, |
| double bottom, |
| int rotation) const { |
| if (!available_) |
| return pp::Rect(); |
| |
| double start_x = (rect_.x() - offset.x()) * zoom; |
| double start_y = (rect_.y() - offset.y()) * zoom; |
| double size_x = rect_.width() * zoom; |
| double size_y = rect_.height() * zoom; |
| if (!base::IsValueInRangeForNumericType<int>(start_x) || |
| !base::IsValueInRangeForNumericType<int>(start_y) || |
| !base::IsValueInRangeForNumericType<int>(size_x) || |
| !base::IsValueInRangeForNumericType<int>(size_y)) { |
| return pp::Rect(); |
| } |
| |
| int new_left; |
| int new_top; |
| int new_right; |
| int new_bottom; |
| FPDF_PageToDevice(page_, static_cast<int>(start_x), static_cast<int>(start_y), |
| static_cast<int>(ceil(size_x)), |
| static_cast<int>(ceil(size_y)), rotation, left, top, |
| &new_left, &new_top); |
| FPDF_PageToDevice(page_, static_cast<int>(start_x), static_cast<int>(start_y), |
| static_cast<int>(ceil(size_x)), |
| static_cast<int>(ceil(size_y)), rotation, right, bottom, |
| &new_right, &new_bottom); |
| |
| // If the PDF is rotated, the horizontal/vertical coordinates could be |
| // flipped. See |
| // http://www.netl.doe.gov/publications/proceedings/03/ubc/presentations/Goeckner-pres.pdf |
| if (new_right < new_left) |
| std::swap(new_right, new_left); |
| if (new_bottom < new_top) |
| std::swap(new_bottom, new_top); |
| |
| base::CheckedNumeric<int32_t> new_size_x = new_right; |
| new_size_x -= new_left; |
| new_size_x += 1; |
| base::CheckedNumeric<int32_t> new_size_y = new_bottom; |
| new_size_y -= new_top; |
| new_size_y += 1; |
| if (!new_size_x.IsValid() || !new_size_y.IsValid()) |
| return pp::Rect(); |
| |
| return pp::Rect(new_left, new_top, new_size_x.ValueOrDie(), |
| new_size_y.ValueOrDie()); |
| } |
| |
| PDFiumPage::ScopedLoadCounter::ScopedLoadCounter(PDFiumPage* page) |
| : page_(page) { |
| page_->loading_count_++; |
| } |
| |
| PDFiumPage::ScopedLoadCounter::~ScopedLoadCounter() { |
| page_->loading_count_--; |
| } |
| |
| PDFiumPage::Link::Link() = default; |
| |
| PDFiumPage::Link::Link(const Link& that) = default; |
| |
| PDFiumPage::Link::~Link() = default; |
| |
| } // namespace chrome_pdf |