blob: a8a5aafca6f17c371423b3dc571cdd2d20c62583 [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
syntax = "proto2";
package dom_distiller.proto;
option optimize_for = LITE_RUNTIME;
option java_package = "org.chromium.distiller.proto";
option java_outer_classname = "DomDistillerProtos";
message DistilledContent {
optional string html = 1;
}
message PaginationInfo {
optional string next_page = 1;
optional string prev_page = 2;
optional string canonical_page = 3;
}
message MarkupArticle {
optional string published_time = 1;
optional string modified_time = 2;
optional string expiration_time = 3;
optional string section = 4;
repeated string authors = 5;
}
message MarkupImage {
optional string url = 1;
optional string secure_url = 2;
optional string type = 3;
optional string caption = 4;
optional int32 width = 5;
optional int32 height = 6;
}
message MarkupInfo {
optional string title = 1;
optional string type = 2;
optional string url = 3;
optional string description = 4;
optional string publisher = 5;
optional string copyright = 6;
optional string author = 7;
optional MarkupArticle article = 8;
repeated MarkupImage images = 9;
}
message TimingEntry {
optional string name = 1;
optional double time = 2;
}
message TimingInfo {
optional double markup_parsing_time = 1;
optional double document_construction_time = 2;
optional double article_processing_time = 3;
optional double formatting_time = 4;
optional double total_time = 5;
// A place to hold arbitrary breakdowns of time. The perf scoring/server
// should display these entries with appropriate names.
repeated TimingEntry other_times = 6;
}
message DebugInfo {
optional string log = 1;
}
message StatisticsInfo {
optional int32 word_count = 1;
}
message DomDistillerResult {
optional string title = 1;
optional DistilledContent distilled_content = 2;
optional PaginationInfo pagination_info = 3;
optional MarkupInfo markup_info = 5;
optional TimingInfo timing_info = 6;
optional DebugInfo debug_info = 7;
optional StatisticsInfo statistics_info = 8;
optional string text_direction = 9;
// Represents an image found in the content of a page.
message ContentImage {
optional string url = 1;
}
repeated ContentImage content_images = 10;
}
message DomDistillerOptions {
// Whether to extract only the text (or to include the containing html).
optional bool extract_text_only = 1;
// How much debug output to dump to window.console.
// (0): Logs nothing
// (1): Text Node data for each stage of processing
// (2): (1) and some node visibility information
// (3): (2) and extracted paging information
optional int32 debug_level = 2;
// The original URL of the page, which is used in the heuristics in
// detecting next/prev page links.
optional string original_url = 3;
// Which algorithm to use for next page detection:
// "next" : detect anchors with "next" text
// "pagenum" : detect anchors with numeric page numbers
optional string pagination_algo = 4;
}