| // Copyright 2008 The open-vcdiff Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // |
| // Implements a Decoder for the format described in |
| // RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. |
| // The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
| // |
| // The RFC describes the possibility of using a secondary compressor |
| // to further reduce the size of each section of the VCDIFF output. |
| // That feature is not supported in this implementation of the encoder |
| // and decoder. |
| // No secondary compressor types have been publicly registered with |
| // the IANA at http://www.iana.org/assignments/vcdiff-comp-ids |
| // in the more than five years since the registry was created, so there |
| // is no standard set of compressor IDs which would be generated by other |
| // encoders or accepted by other decoders. |
| |
| #include <config.h> |
| #include "google/vcdecoder.h" |
| #include <limits.h> // NOLINT |
| #include <stddef.h> // size_t, ptrdiff_t |
| #include <stdint.h> // int32_t |
| #include <string.h> // memcpy, memset |
| #include <string> |
| #include "addrcache.h" |
| #include "checksum.h" |
| #include "codetable.h" |
| #include "decodetable.h" |
| #include "headerparser.h" |
| #include "logging.h" |
| #include "google/output_string.h" |
| #include "unique_ptr.h" // auto_ptr, unique_ptr |
| #include "varint_bigendian.h" |
| #include "vcdiff_defs.h" |
| |
| // The FALLTHROUGH_INTENDED macro can be used to annotate implicit fall-through |
| // between switch labels. |
| #if defined(__clang__) && __cplusplus >= 201103L && defined(__has_warning) |
| #if __has_feature(cxx_attributes) && __has_warning("-Wimplicit-fallthrough") |
| #define FALLTHROUGH_INTENDED [[clang::fallthrough]] // NOLINT |
| #endif |
| #endif |
| |
| #ifndef FALLTHROUGH_INTENDED |
| #define FALLTHROUGH_INTENDED do { } while (0) |
| #endif |
| |
| namespace open_vcdiff { |
| |
| // This class is used to parse delta file windows as described |
| // in RFC sections 4.2 and 4.3. Its methods are not thread-safe. |
| // |
| // Here is the window format copied from the RFC: |
| // |
| // Window1 |
| // Win_Indicator - byte |
| // [Source segment size] - integer |
| // [Source segment position] - integer |
| // The delta encoding of the target window |
| // Length of the delta encoding - integer |
| // The delta encoding |
| // Size of the target window - integer |
| // Delta_Indicator - byte |
| // Length of data for ADDs and RUNs - integer |
| // Length of instructions and sizes - integer |
| // Length of addresses for COPYs - integer |
| // Data section for ADDs and RUNs - array of bytes |
| // Instructions and sizes section - array of bytes |
| // Addresses section for COPYs - array of bytes |
| // Window2 |
| // ... |
| // |
| // Sample usage: |
| // |
| // VCDiffDeltaFileWindow delta_window_; |
| // delta_window_.Init(parent); |
| // ParseableChunk parseable_chunk(input_buffer, |
| // input_size, |
| // leftover_unencoded_bytes); |
| // while (!parseable_chunk.Empty()) { |
| // switch (delta_window_.DecodeWindow(&parseable_chunk)) { |
| // case RESULT_END_OF_DATA: |
| // <Read more input and retry DecodeWindow later.> |
| // case RESULT_ERROR: |
| // <Handle error case. An error log message has already been generated.> |
| // } |
| // } |
| // |
| // DecodeWindow consumes only a single window, and needs to be placed within |
| // a loop if multiple windows are to be processed. |
| // |
| class VCDiffDeltaFileWindow { |
| public: |
| VCDiffDeltaFileWindow(); |
| ~VCDiffDeltaFileWindow(); |
| |
| // Init() should be called immediately after constructing the |
| // VCDiffDeltaFileWindow(). It must be called before DecodeWindow() can be |
| // invoked, or an error will occur. |
| void Init(VCDiffStreamingDecoderImpl* parent); |
| |
| // Resets the pointers to the data sections in the current window. |
| void Reset(); |
| |
| bool UseCodeTable(const VCDiffCodeTableData& code_table_data, |
| unsigned char max_mode) { |
| return reader_.UseCodeTable(code_table_data, max_mode); |
| } |
| |
| // Decodes a single delta window using the input data from *parseable_chunk. |
| // Appends the decoded target window to parent_->decoded_target(). Returns |
| // RESULT_SUCCESS if an entire window was decoded, or RESULT_END_OF_DATA if |
| // the end of input was reached before the entire window could be decoded and |
| // more input is expected (only possible if IsInterleaved() is true), or |
| // RESULT_ERROR if an error occurred during decoding. In the RESULT_ERROR |
| // case, the value of parseable_chunk->pointer_ is undefined; otherwise, |
| // parseable_chunk->Advance() is called to point to the input data position |
| // just after the data that has been decoded. |
| // |
| VCDiffResult DecodeWindow(ParseableChunk* parseable_chunk); |
| |
| bool FoundWindowHeader() const { |
| return found_header_; |
| } |
| |
| bool MoreDataExpected() const { |
| // When parsing an interleaved-format delta file, |
| // every time DecodeBody() exits, interleaved_bytes_expected_ |
| // will be decremented by the number of bytes parsed. If it |
| // reaches zero, then there is no more data expected because |
| // the size of the interleaved section (given in the window |
| // header) has been reached. |
| return IsInterleaved() && (interleaved_bytes_expected_ > 0); |
| } |
| |
| size_t target_window_start_pos() const { return target_window_start_pos_; } |
| |
| void set_target_window_start_pos(size_t new_start_pos) { |
| target_window_start_pos_ = new_start_pos; |
| } |
| |
| // Returns the number of bytes remaining to be decoded in the target window. |
| // If not in the process of decoding a window, returns 0. |
| size_t TargetBytesRemaining(); |
| |
| private: |
| // Reads the header of the window section as described in RFC sections 4.2 and |
| // 4.3, up to and including the value "Length of addresses for COPYs". If the |
| // entire header is found, this function sets up the DeltaWindowSections |
| // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so |
| // that the decoder can begin decoding the opcodes in these sections. Returns |
| // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of |
| // available data was reached before the entire header could be read. (The |
| // latter may be an error condition if there is no more data available.) |
| // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the |
| // parsed header. |
| // |
| VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); |
| |
| // After the window header has been parsed as far as the Delta_Indicator, |
| // this function is called to parse the following delta window header fields: |
| // |
| // Length of data for ADDs and RUNs - integer (VarintBE format) |
| // Length of instructions and sizes - integer (VarintBE format) |
| // Length of addresses for COPYs - integer (VarintBE format) |
| // |
| // If has_checksum_ is true, it also looks for the following element: |
| // |
| // Adler32 checksum - unsigned 32-bit integer (VarintBE format) |
| // |
| // It sets up the DeltaWindowSections instructions_and_sizes_, |
| // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format |
| // is being used, all three sections will include the entire window body; if |
| // the standard format is used, three non-overlapping window sections will be |
| // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA |
| // if standard format is being used and there is not enough input data to read |
| // the entire window body. Otherwise, returns RESULT_SUCCESS. |
| VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); |
| |
| // Decodes the body of the window section as described in RFC sections 4.3, |
| // including the sections "Data section for ADDs and RUNs", "Instructions |
| // and sizes section", and "Addresses section for COPYs". These sections |
| // must already have been set up by ReadWindowHeader(). Returns a |
| // non-negative value on success, or RESULT_END_OF_DATA if the end of input |
| // was reached before the entire window could be decoded (only possible if |
| // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
| // decoding. Appends as much of the decoded target window as possible to |
| // parent->decoded_target(). |
| // |
| int DecodeBody(ParseableChunk* parseable_chunk); |
| |
| // Returns the number of bytes already decoded into the target window. |
| size_t TargetBytesDecoded(); |
| |
| // Decodes a single ADD instruction, updating parent_->decoded_target_. |
| VCDiffResult DecodeAdd(size_t size); |
| |
| // Decodes a single RUN instruction, updating parent_->decoded_target_. |
| VCDiffResult DecodeRun(size_t size); |
| |
| // Decodes a single COPY instruction, updating parent_->decoded_target_. |
| VCDiffResult DecodeCopy(size_t size, unsigned char mode); |
| |
| // When using the interleaved format, this function is called both on parsing |
| // the header and on resuming after a RESULT_END_OF_DATA was returned from a |
| // previous call to DecodeBody(). It sets up all three section pointers to |
| // reference the same interleaved stream of instructions, sizes, addresses, |
| // and data. These pointers must be reset every time that work resumes on a |
| // delta window, because the input data string may have been changed or |
| // resized since DecodeBody() last returned. |
| void UpdateInterleavedSectionPointers(const char* data_pos, |
| const char* data_end) { |
| const ptrdiff_t available_data = data_end - data_pos; |
| // Don't read past the end of currently-available data |
| if (available_data > interleaved_bytes_expected_) { |
| instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); |
| } else { |
| instructions_and_sizes_.Init(data_pos, available_data); |
| } |
| data_for_add_and_run_.Init(&instructions_and_sizes_); |
| addresses_for_copy_.Init(&instructions_and_sizes_); |
| } |
| |
| // If true, the interleaved format described in AllowInterleaved() is used |
| // for the current delta file. Only valid after ReadWindowHeader() has been |
| // called and returned a positive number (i.e., the whole header was parsed), |
| // but before the window has finished decoding. |
| // |
| bool IsInterleaved() const { |
| // If the sections are interleaved, both addresses_for_copy_ and |
| // data_for_add_and_run_ should point at instructions_and_sizes_. |
| return !addresses_for_copy_.IsOwned(); |
| } |
| |
| // Executes a single COPY or ADD instruction, appending data to |
| // parent_->decoded_target(). |
| void CopyBytes(const char* data, size_t size); |
| |
| // Executes a single RUN instruction, appending data to |
| // parent_->decoded_target(). |
| void RunByte(unsigned char byte, size_t size); |
| |
| // Advance *parseable_chunk to point to the current position in the |
| // instructions/sizes section. If interleaved format is used, then |
| // decrement the number of expected bytes in the instructions/sizes section |
| // by the number of instruction/size bytes parsed. |
| void UpdateInstructionPointer(ParseableChunk* parseable_chunk); |
| |
| // The parent object which was passed to Init(). |
| VCDiffStreamingDecoderImpl* parent_; |
| |
| // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() |
| // has been called and succeeded in parsing the delta window header, but the |
| // entire window has not yet been decoded. |
| bool found_header_; |
| |
| // Contents and length of the current source window. source_segment_ptr_ |
| // will be non-NULL if (a) the window section header for the current window |
| // has been read, but the window has not yet finished decoding; or |
| // (b) the window did not specify a source segment. |
| const char* source_segment_ptr_; |
| size_t source_segment_length_; |
| |
| // The delta encoding window sections as defined in RFC section 4.3. |
| // The pointer for each section will be incremented as data is consumed and |
| // decoded from that section. If the interleaved format is used, |
| // data_for_add_and_run_ and addresses_for_copy_ will both point to |
| // instructions_and_sizes_; otherwise, they will be separate data sections. |
| // |
| DeltaWindowSection instructions_and_sizes_; |
| DeltaWindowSection data_for_add_and_run_; |
| DeltaWindowSection addresses_for_copy_; |
| |
| // The expected bytes left to decode in instructions_and_sizes_. Only used |
| // for the interleaved format. |
| int interleaved_bytes_expected_; |
| |
| // The expected length of the target window once it has been decoded. |
| size_t target_window_length_; |
| |
| // The index in decoded_target at which the first byte of the current |
| // target window was/will be written. |
| size_t target_window_start_pos_; |
| |
| // If has_checksum_ is true, then expected_checksum_ contains an Adler32 |
| // checksum of the target window data. This is an extension included in the |
| // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. |
| bool has_checksum_; |
| VCDChecksum expected_checksum_; |
| |
| VCDiffCodeTableReader reader_; |
| |
| // Making these private avoids implicit copy constructor & assignment operator |
| VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); |
| void operator=(const VCDiffDeltaFileWindow&); |
| }; |
| |
| // *** Inline methods for VCDiffDeltaFileWindow |
| |
| inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { |
| Reset(); |
| } |
| |
| inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } |
| |
| inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { |
| parent_ = parent; |
| } |
| |
| class VCDiffStreamingDecoderImpl { |
| public: |
| typedef std::string string; |
| |
| // The default maximum target file size (and target window size) if |
| // SetMaximumTargetFileSize() is not called. |
| static const size_t kDefaultMaximumTargetFileSize = 67108864U; // 64 MB |
| |
| // The largest value that can be passed to SetMaximumTargetWindowSize(). |
| // Using a larger value will result in an error. |
| static const size_t kTargetSizeLimit = 2147483647U; // INT32_MAX |
| |
| // A constant that is the default value for planned_target_file_size_, |
| // indicating that the decoder does not have an expected length |
| // for the target data. |
| static const size_t kUnlimitedBytes = static_cast<size_t>(-3); |
| |
| VCDiffStreamingDecoderImpl(); |
| ~VCDiffStreamingDecoderImpl(); |
| |
| // Resets all member variables to their initial states. |
| void Reset(); |
| |
| // These functions are identical to their counterparts |
| // in VCDiffStreamingDecoder. |
| // |
| void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); |
| |
| bool DecodeChunk(const char* data, |
| size_t len, |
| OutputStringInterface* output_string); |
| |
| bool FinishDecoding(); |
| |
| // If true, the version of VCDIFF used in the current delta file allows |
| // for the interleaved format, in which instructions, addresses and data |
| // are all sent interleaved in the instructions section of each window |
| // rather than being sent in separate sections. This is not part of |
| // the VCDIFF draft standard, so we've defined a special version code |
| // 'S' which implies that this feature is available. Even if interleaving |
| // is supported, it is not mandatory; interleaved format will be implied |
| // if the address and data sections are both zero-length. |
| // |
| bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } |
| |
| // If true, the version of VCDIFF used in the current delta file allows |
| // each delta window to contain an Adler32 checksum of the target window data. |
| // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then |
| // this checksum will appear as a variable-length integer, just after the |
| // "length of addresses for COPYs" value and before the window data sections. |
| // It is possible for some windows in a delta file to use the checksum feature |
| // and for others not to use it (and leave the flag bit set to 0.) |
| // Just as with AllowInterleaved(), this extension is not part of the draft |
| // standard and is only available when the version code 'S' is specified. |
| // |
| bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } |
| |
| bool SetMaximumTargetFileSize(size_t new_maximum_target_file_size) { |
| maximum_target_file_size_ = new_maximum_target_file_size; |
| return true; |
| } |
| |
| bool SetMaximumTargetWindowSize(size_t new_maximum_target_window_size) { |
| if (new_maximum_target_window_size > kTargetSizeLimit) { |
| VCD_ERROR << "Specified maximum target window size " |
| << new_maximum_target_window_size << " exceeds limit of " |
| << kTargetSizeLimit << " bytes" << VCD_ENDL; |
| return false; |
| } |
| maximum_target_window_size_ = new_maximum_target_window_size; |
| return true; |
| } |
| |
| // See description of planned_target_file_size_, below. |
| bool HasPlannedTargetFileSize() const { |
| return planned_target_file_size_ != kUnlimitedBytes; |
| } |
| |
| void SetPlannedTargetFileSize(size_t planned_target_file_size) { |
| planned_target_file_size_ = planned_target_file_size; |
| } |
| |
| void AddToTotalTargetWindowSize(size_t window_size) { |
| total_of_target_window_sizes_ += window_size; |
| } |
| |
| // Checks to see whether the decoded target data has reached its planned size. |
| bool ReachedPlannedTargetFileSize() const { |
| if (!HasPlannedTargetFileSize()) { |
| return false; |
| } |
| // The planned target file size should not have been exceeded. |
| // TargetWindowWouldExceedSizeLimits() ensures that the advertised size of |
| // each target window would not make the target file exceed that limit, and |
| // DecodeBody() will return RESULT_ERROR if the actual decoded output ever |
| // exceeds the advertised target window size. |
| if (total_of_target_window_sizes_ > planned_target_file_size_) { |
| VCD_DFATAL << "Internal error: Decoded data size " |
| << total_of_target_window_sizes_ |
| << " exceeds planned target file size " |
| << planned_target_file_size_ << VCD_ENDL; |
| return true; |
| } |
| return total_of_target_window_sizes_ == planned_target_file_size_; |
| } |
| |
| // Checks to see whether adding a new target window of the specified size |
| // would exceed the planned target file size, the maximum target file size, |
| // or the maximum target window size. If so, logs an error and returns true; |
| // otherwise, returns false. |
| bool TargetWindowWouldExceedSizeLimits(size_t window_size) const; |
| |
| // Returns the amount of input data passed to the last DecodeChunk() |
| // that was not consumed by the decoder. This is essential if |
| // SetPlannedTargetFileSize() is being used, in order to preserve the |
| // remaining input data stream once the planned target file has been decoded. |
| size_t GetUnconsumedDataSize() const { |
| return unparsed_bytes_.size(); |
| } |
| |
| // This function will return true if the decoder has parsed a complete delta |
| // file header plus zero or more delta file windows, with no data left over. |
| // It will also return true if no delta data at all was decoded. If these |
| // conditions are not met, then FinishDecoding() should not be called. |
| bool IsDecodingComplete() const { |
| if (!FoundFileHeader()) { |
| // No complete delta file header has been parsed yet. DecodeChunk() |
| // may have received some data that it hasn't yet parsed, in which case |
| // decoding is incomplete. |
| return unparsed_bytes_.empty(); |
| } else if (custom_code_table_decoder_.get()) { |
| // The decoder is in the middle of parsing a custom code table. |
| return false; |
| } else if (delta_window_.FoundWindowHeader()) { |
| // The decoder is in the middle of parsing an interleaved format delta |
| // window. |
| return false; |
| } else if (ReachedPlannedTargetFileSize()) { |
| // The decoder found exactly the planned number of bytes. In this case |
| // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover |
| // data after the end of the delta file. |
| return true; |
| } else { |
| // No complete delta file window has been parsed yet. DecodeChunk() |
| // may have received some data that it hasn't yet parsed, in which case |
| // decoding is incomplete. |
| return unparsed_bytes_.empty(); |
| } |
| } |
| |
| const char* dictionary_ptr() const { return dictionary_ptr_; } |
| |
| size_t dictionary_size() const { return dictionary_size_; } |
| |
| VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } |
| |
| string* decoded_target() { return &decoded_target_; } |
| |
| bool allow_vcd_target() const { return allow_vcd_target_; } |
| |
| void SetAllowVcdTarget(bool allow_vcd_target) { |
| if (start_decoding_was_called_) { |
| VCD_DFATAL << "SetAllowVcdTarget() called after StartDecoding()" |
| << VCD_ENDL; |
| return; |
| } |
| allow_vcd_target_ = allow_vcd_target; |
| } |
| |
| private: |
| // Reads the VCDiff delta file header section as described in RFC section 4.1, |
| // except the custom code table data. Returns RESULT_ERROR if an error |
| // occurred, or RESULT_END_OF_DATA if the end of available data was reached |
| // before the entire header could be read. (The latter may be an error |
| // condition if there is no more data available.) Otherwise, advances |
| // data->position_ past the header and returns RESULT_SUCCESS. |
| // |
| VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); |
| |
| // Indicates whether or not the header has already been read. |
| bool FoundFileHeader() const { return addr_cache_.get() != NULL; } |
| |
| // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta |
| // file header, this function parses the custom cache sizes and initializes |
| // a nested VCDiffStreamingDecoderImpl object that will be used to parse the |
| // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an |
| // error occurred, or RESULT_END_OF_DATA if the end of available data was |
| // reached before the custom cache sizes could be read. Otherwise, returns |
| // the number of bytes read. |
| // |
| int InitCustomCodeTable(const char* data_start, const char* data_end); |
| |
| // If a custom code table was specified in the header section that was parsed |
| // by ReadDeltaFileHeader(), this function makes a recursive call to another |
| // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the |
| // custom code table is expected to be supplied as an embedded VCDIFF |
| // encoding that uses the standard code table. Returns RESULT_ERROR if an |
| // error occurs, or RESULT_END_OF_DATA if the end of available data was |
| // reached before the entire custom code table could be read. Otherwise, |
| // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded |
| // custom code table. If the function returns RESULT_SUCCESS or |
| // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. |
| // |
| VCDiffResult ReadCustomCodeTable(ParseableChunk* data); |
| |
| // Called after the decoder exhausts all input data. This function |
| // copies from decoded_target_ into output_string all the data that |
| // has not yet been output. It sets decoded_target_output_position_ |
| // to mark the start of the next data that needs to be output. |
| void AppendNewOutputText(OutputStringInterface* output_string); |
| |
| // Appends to output_string the portion of decoded_target_ that has |
| // not yet been output, then clears decoded_target_. This function is |
| // called after each complete target window has been decoded if |
| // allow_vcd_target is false. In that case, there is no need to retain |
| // target data from any window except the current window. |
| void FlushDecodedTarget(OutputStringInterface* output_string); |
| |
| // Contents and length of the source (dictionary) data. |
| const char* dictionary_ptr_; |
| size_t dictionary_size_; |
| |
| // This string will be used to store any unparsed bytes left over when |
| // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. |
| // It will also be used to concatenate those unparsed bytes with the data |
| // supplied to the next call to DecodeChunk(), so that they appear in |
| // contiguous memory. |
| string unparsed_bytes_; |
| |
| // The portion of the target file that has been decoded so far. This will be |
| // used to fill the output string for DecodeChunk(), and will also be used to |
| // execute COPY instructions that reference target data. Since the source |
| // window can come from a range of addresses in the previously decoded target |
| // data, the entire target file needs to be available to the decoder, not just |
| // the current target window. |
| string decoded_target_; |
| |
| // The VCDIFF version byte (also known as "header4") from the |
| // delta file header. |
| unsigned char vcdiff_version_code_; |
| |
| VCDiffDeltaFileWindow delta_window_; |
| |
| UNIQUE_PTR<VCDiffAddressCache> addr_cache_; |
| |
| // Will be NULL unless a custom code table has been defined. |
| UNIQUE_PTR<VCDiffCodeTableData> custom_code_table_; |
| |
| // Used to receive the decoded custom code table. |
| string custom_code_table_string_; |
| |
| // If a custom code table is specified, it will be expressed |
| // as an embedded VCDIFF delta file which uses the default code table |
| // as the source file (dictionary). Use a child decoder object |
| // to decode that delta file. |
| UNIQUE_PTR<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; |
| |
| // If set, then the decoder is expecting *exactly* this number of |
| // target bytes to be decoded from one or more delta file windows. |
| // If this number is exceeded while decoding a window, but was not met |
| // before starting on that window, an error will be reported. |
| // If FinishDecoding() is called before this number is met, an error |
| // will also be reported. This feature is used for decoding the |
| // embedded code table data within a VCDIFF delta file; we want to |
| // stop processing the embedded data once the entire code table has |
| // been decoded, and treat the rest of the available data as part |
| // of the enclosing delta file. |
| size_t planned_target_file_size_; |
| |
| size_t maximum_target_file_size_; |
| |
| size_t maximum_target_window_size_; |
| |
| // Contains the sum of the decoded sizes of all target windows seen so far, |
| // including the expected total size of the current target window in progress |
| // (even if some of the current target window has not yet been decoded.) |
| size_t total_of_target_window_sizes_; |
| |
| // Contains the byte position within decoded_target_ of the first data that |
| // has not yet been output by AppendNewOutputText(). |
| size_t decoded_target_output_position_; |
| |
| // This value is used to ensure the correct order of calls to the interface |
| // functions, i.e., a single call to StartDecoding(), followed by zero or |
| // more calls to DecodeChunk(), followed by a single call to |
| // FinishDecoding(). |
| bool start_decoding_was_called_; |
| |
| // If this value is true then the VCD_TARGET flag can be specified to allow |
| // the source segment to be chosen from the previously-decoded target data. |
| // (This is the default behavior.) If it is false, then specifying the |
| // VCD_TARGET flag is considered an error, and the decoder does not need to |
| // keep in memory any decoded target data prior to the current window. |
| bool allow_vcd_target_; |
| |
| // Making these private avoids implicit copy constructor & assignment operator |
| VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); |
| void operator=(const VCDiffStreamingDecoderImpl&); |
| }; |
| |
| // *** Methods for VCDiffStreamingDecoderImpl |
| |
| const size_t VCDiffStreamingDecoderImpl::kDefaultMaximumTargetFileSize; |
| const size_t VCDiffStreamingDecoderImpl::kUnlimitedBytes; |
| |
| VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() |
| : maximum_target_file_size_(kDefaultMaximumTargetFileSize), |
| maximum_target_window_size_(kDefaultMaximumTargetFileSize), |
| allow_vcd_target_(true) { |
| delta_window_.Init(this); |
| Reset(); |
| } |
| |
| // Reset() will delete the component objects without reallocating them. |
| VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } |
| |
| void VCDiffStreamingDecoderImpl::Reset() { |
| start_decoding_was_called_ = false; |
| dictionary_ptr_ = NULL; |
| dictionary_size_ = 0; |
| vcdiff_version_code_ = '\0'; |
| planned_target_file_size_ = kUnlimitedBytes; |
| total_of_target_window_sizes_ = 0; |
| addr_cache_.reset(); |
| custom_code_table_.reset(); |
| custom_code_table_decoder_.reset(); |
| delta_window_.Reset(); |
| decoded_target_output_position_ = 0; |
| } |
| |
| void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, |
| size_t dictionary_size) { |
| if (start_decoding_was_called_) { |
| VCD_DFATAL << "StartDecoding() called twice without FinishDecoding()" |
| << VCD_ENDL; |
| return; |
| } |
| unparsed_bytes_.clear(); |
| decoded_target_.clear(); // delta_window_.Reset() depends on this |
| Reset(); |
| dictionary_ptr_ = dictionary_ptr; |
| dictionary_size_ = dictionary_size; |
| start_decoding_was_called_ = true; |
| } |
| |
| // Reads the VCDiff delta file header section as described in RFC section 4.1: |
| // |
| // Header1 - byte = 0xD6 (ASCII 'V' | 0x80) |
| // Header2 - byte = 0xC3 (ASCII 'C' | 0x80) |
| // Header3 - byte = 0xC4 (ASCII 'D' | 0x80) |
| // Header4 - byte |
| // Hdr_Indicator - byte |
| // [Secondary compressor ID] - byte |
| // [Length of code table data] - integer |
| // [Code table data] |
| // |
| // Initializes the code table and address cache objects. Returns RESULT_ERROR |
| // if an error occurred, and RESULT_END_OF_DATA if the end of available data was |
| // reached before the entire header could be read. (The latter may be an error |
| // condition if there is no more data available.) Otherwise, returns |
| // RESULT_SUCCESS, and removes the header bytes from the data string. |
| // |
| // It's relatively inefficient to expect this function to parse any number of |
| // input bytes available, down to 1 byte, but it is necessary in case the input |
| // is not a properly formatted VCDIFF delta file. If the entire input consists |
| // of two bytes "12", then we should recognize that it does not match the |
| // initial VCDIFF magic number "VCD" and report an error, rather than waiting |
| // indefinitely for more input that will never arrive. |
| // |
| VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( |
| ParseableChunk* data) { |
| if (FoundFileHeader()) { |
| return RESULT_SUCCESS; |
| } |
| size_t data_size = data->UnparsedSize(); |
| const DeltaFileHeader* header = |
| reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); |
| bool wrong_magic_number = false; |
| switch (data_size) { |
| // Verify only the bytes that are available. |
| default: |
| // Found header contents up to and including VCDIFF version |
| vcdiff_version_code_ = header->header4; |
| if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) |
| (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol |
| VCD_ERROR << "Unrecognized VCDIFF format version" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| FALLTHROUGH_INTENDED; |
| case 3: |
| if (header->header3 != 0xC4) { // magic value 'D' | 0x80 |
| wrong_magic_number = true; |
| } |
| FALLTHROUGH_INTENDED; |
| case 2: |
| if (header->header2 != 0xC3) { // magic value 'C' | 0x80 |
| wrong_magic_number = true; |
| } |
| FALLTHROUGH_INTENDED; |
| case 1: |
| if (header->header1 != 0xD6) { // magic value 'V' | 0x80 |
| wrong_magic_number = true; |
| } |
| FALLTHROUGH_INTENDED; |
| case 0: |
| if (wrong_magic_number) { |
| VCD_ERROR << "Did not find VCDIFF header bytes; " |
| "input is not a VCDIFF delta file" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; |
| break; |
| } |
| // Secondary compressor not supported. |
| if (header->hdr_indicator & VCD_DECOMPRESS) { |
| VCD_ERROR << "Secondary compression is not supported" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (header->hdr_indicator & VCD_CODETABLE) { |
| int bytes_parsed = InitCustomCodeTable( |
| data->UnparsedData() + sizeof(DeltaFileHeader), |
| data->End()); |
| switch (bytes_parsed) { |
| case RESULT_ERROR: |
| return RESULT_ERROR; |
| case RESULT_END_OF_DATA: |
| return RESULT_END_OF_DATA; |
| default: |
| data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); |
| } |
| } else { |
| addr_cache_.reset(new VCDiffAddressCache); |
| // addr_cache_->Init() will be called |
| // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| data->Advance(sizeof(DeltaFileHeader)); |
| } |
| return RESULT_SUCCESS; |
| } |
| |
| int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, |
| const char* data_end) { |
| // A custom code table is being specified. Parse the variable-length |
| // cache sizes and begin parsing the encoded custom code table. |
| int32_t near_cache_size = 0, same_cache_size = 0; |
| VCDiffHeaderParser header_parser(data_start, data_end); |
| if (!header_parser.ParseInt32("size of near cache", &near_cache_size) || |
| !header_parser.ParseInt32("size of same cache", &same_cache_size)) { |
| return header_parser.GetResult(); |
| } |
| |
| // We need not check for negative values, ParseInt32() will never return them. |
| if (near_cache_size > UCHAR_MAX) { |
| VCD_DFATAL << "Near cache size " << near_cache_size << " is invalid" |
| << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (same_cache_size > UCHAR_MAX) { |
| VCD_DFATAL << "Same cache size " << same_cache_size << " is invalid" |
| << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| |
| custom_code_table_.reset(new struct VCDiffCodeTableData); |
| memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); |
| custom_code_table_string_.clear(); |
| addr_cache_.reset(new VCDiffAddressCache( |
| static_cast<unsigned char>(near_cache_size), |
| static_cast<unsigned char>(same_cache_size))); |
| // addr_cache_->Init() will be called |
| // from VCDiffStreamingDecoderImpl::DecodeChunk() |
| |
| // If we reach this point (the start of the custom code table) |
| // without encountering a RESULT_END_OF_DATA condition, then we won't call |
| // ReadDeltaFileHeader() again for this delta file. |
| // |
| // Instantiate a recursive decoder to interpret the custom code table |
| // as a VCDIFF encoding of the default code table. |
| custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); |
| custom_code_table_decoder_->StartDecoding( |
| reinterpret_cast<const char*>( |
| &VCDiffCodeTableData::kDefaultCodeTableData), |
| sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); |
| custom_code_table_decoder_->SetPlannedTargetFileSize( |
| sizeof(*custom_code_table_)); |
| return static_cast<int>(header_parser.ParsedSize()); |
| } |
| |
| VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( |
| ParseableChunk* data) { |
| if (!custom_code_table_decoder_.get()) { |
| return RESULT_SUCCESS; |
| } |
| if (!custom_code_table_.get()) { |
| VCD_DFATAL << "Internal error: custom_code_table_decoder_ is set," |
| " but custom_code_table_ is NULL" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| OutputString<string> output_string(&custom_code_table_string_); |
| if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), |
| data->UnparsedSize(), |
| &output_string)) { |
| return RESULT_ERROR; |
| } |
| if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { |
| // Skip over the consumed data. |
| data->Finish(); |
| return RESULT_END_OF_DATA; |
| } |
| if (!custom_code_table_decoder_->FinishDecoding()) { |
| return RESULT_ERROR; |
| } |
| if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { |
| VCD_DFATAL << "Decoded custom code table size (" |
| << custom_code_table_string_.length() |
| << ") does not match size of a code table (" |
| << sizeof(*custom_code_table_) << ")" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| memcpy(custom_code_table_.get(), |
| custom_code_table_string_.data(), |
| sizeof(*custom_code_table_)); |
| custom_code_table_string_.clear(); |
| // Skip over the consumed data. |
| data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); |
| custom_code_table_decoder_.reset(); |
| delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); |
| return RESULT_SUCCESS; |
| } |
| |
| void VCDiffStreamingDecoderImpl::FlushDecodedTarget( |
| OutputStringInterface* output_string) { |
| output_string->append( |
| decoded_target_.data() + decoded_target_output_position_, |
| decoded_target_.size() - decoded_target_output_position_); |
| decoded_target_.clear(); |
| delta_window_.set_target_window_start_pos(0); |
| decoded_target_output_position_ = 0; |
| } |
| |
| void VCDiffStreamingDecoderImpl::AppendNewOutputText( |
| OutputStringInterface* output_string) { |
| const size_t bytes_decoded_this_chunk = |
| decoded_target_.size() - decoded_target_output_position_; |
| if (bytes_decoded_this_chunk > 0) { |
| size_t target_bytes_remaining = delta_window_.TargetBytesRemaining(); |
| if (target_bytes_remaining > 0) { |
| // The decoder is midway through decoding a target window. Resize |
| // output_string to match the expected length. The interface guarantees |
| // not to resize output_string more than once per target window decoded. |
| output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk |
| + target_bytes_remaining); |
| } |
| output_string->append( |
| decoded_target_.data() + decoded_target_output_position_, |
| bytes_decoded_this_chunk); |
| decoded_target_output_position_ = decoded_target_.size(); |
| } |
| } |
| |
| bool VCDiffStreamingDecoderImpl::DecodeChunk( |
| const char* data, |
| size_t len, |
| OutputStringInterface* output_string) { |
| if (!start_decoding_was_called_) { |
| VCD_DFATAL << "DecodeChunk() called without StartDecoding()" << VCD_ENDL; |
| Reset(); |
| return false; |
| } |
| ParseableChunk parseable_chunk(data, len); |
| if (!unparsed_bytes_.empty()) { |
| unparsed_bytes_.append(data, len); |
| parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), |
| unparsed_bytes_.size()); |
| } |
| VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); |
| if (RESULT_SUCCESS == result) { |
| result = ReadCustomCodeTable(&parseable_chunk); |
| } |
| if (RESULT_SUCCESS == result) { |
| while (!parseable_chunk.Empty()) { |
| result = delta_window_.DecodeWindow(&parseable_chunk); |
| if (RESULT_SUCCESS != result) { |
| break; |
| } |
| if (ReachedPlannedTargetFileSize()) { |
| // Found exactly the length we expected. Stop decoding. |
| break; |
| } |
| if (!allow_vcd_target()) { |
| // VCD_TARGET will never be used to reference target data before the |
| // start of the current window, so flush and clear the contents of |
| // decoded_target_. |
| FlushDecodedTarget(output_string); |
| } |
| } |
| } |
| if (RESULT_ERROR == result) { |
| Reset(); // Don't allow further DecodeChunk calls |
| return false; |
| } |
| unparsed_bytes_.assign(parseable_chunk.UnparsedData(), |
| parseable_chunk.UnparsedSize()); |
| AppendNewOutputText(output_string); |
| return true; |
| } |
| |
| // Finishes decoding after all data has been received. Returns true |
| // if decoding of the entire stream was successful. |
| bool VCDiffStreamingDecoderImpl::FinishDecoding() { |
| bool success = true; |
| if (!start_decoding_was_called_) { |
| VCD_WARNING << "FinishDecoding() called before StartDecoding()," |
| " or called after DecodeChunk() returned false" |
| << VCD_ENDL; |
| success = false; |
| } else if (!IsDecodingComplete()) { |
| VCD_ERROR << "FinishDecoding() called before parsing entire" |
| " delta file window" << VCD_ENDL; |
| success = false; |
| } |
| // Reset the object state for the next decode operation |
| Reset(); |
| return success; |
| } |
| |
| bool VCDiffStreamingDecoderImpl::TargetWindowWouldExceedSizeLimits( |
| size_t window_size) const { |
| if (window_size > maximum_target_window_size_) { |
| VCD_ERROR << "Length of target window (" << window_size |
| << ") exceeds limit of " << maximum_target_window_size_ |
| << " bytes" << VCD_ENDL; |
| return true; |
| } |
| if (HasPlannedTargetFileSize()) { |
| // The logical expression to check would be: |
| // |
| // total_of_target_window_sizes_ + window_size > planned_target_file_size_ |
| // |
| // but the addition might cause an integer overflow if target_bytes_to_add |
| // is very large. So it is better to check target_bytes_to_add against |
| // the remaining planned target bytes. |
| size_t remaining_planned_target_file_size = |
| planned_target_file_size_ - total_of_target_window_sizes_; |
| if (window_size > remaining_planned_target_file_size) { |
| VCD_ERROR << "Length of target window (" << window_size |
| << " bytes) plus previous windows (" |
| << total_of_target_window_sizes_ |
| << " bytes) would exceed planned size of " |
| << planned_target_file_size_ << " bytes" << VCD_ENDL; |
| return true; |
| } |
| } |
| size_t remaining_maximum_target_bytes = |
| maximum_target_file_size_ - total_of_target_window_sizes_; |
| if (window_size > remaining_maximum_target_bytes) { |
| VCD_ERROR << "Length of target window (" << window_size |
| << " bytes) plus previous windows (" |
| << total_of_target_window_sizes_ |
| << " bytes) would exceed maximum target file size of " |
| << maximum_target_file_size_ << " bytes" << VCD_ENDL; |
| return true; |
| } |
| return false; |
| } |
| |
| // *** Methods for VCDiffDeltaFileWindow |
| |
| void VCDiffDeltaFileWindow::Reset() { |
| found_header_ = false; |
| |
| // Mark the start of the current target window. |
| target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; |
| target_window_length_ = 0; |
| |
| source_segment_ptr_ = NULL; |
| source_segment_length_ = 0; |
| |
| instructions_and_sizes_.Invalidate(); |
| data_for_add_and_run_.Invalidate(); |
| addresses_for_copy_.Invalidate(); |
| |
| interleaved_bytes_expected_ = 0; |
| |
| has_checksum_ = false; |
| expected_checksum_ = 0; |
| } |
| |
| VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( |
| VCDiffHeaderParser* header_parser) { |
| size_t add_and_run_data_length = 0; |
| size_t instructions_and_sizes_length = 0; |
| size_t addresses_length = 0; |
| if (!header_parser->ParseSectionLengths(has_checksum_, |
| &add_and_run_data_length, |
| &instructions_and_sizes_length, |
| &addresses_length, |
| &expected_checksum_)) { |
| return header_parser->GetResult(); |
| } |
| if (parent_->AllowInterleaved() && |
| (add_and_run_data_length == 0) && |
| (addresses_length == 0)) { |
| // The interleaved format is being used. |
| interleaved_bytes_expected_ = |
| static_cast<int>(instructions_and_sizes_length); |
| UpdateInterleavedSectionPointers(header_parser->UnparsedData(), |
| header_parser->End()); |
| } else { |
| // If interleaved format is not used, then the whole window contents |
| // must be available before decoding can begin. If only part of |
| // the current window is available, then report end of data |
| // and re-parse the whole header when DecodeChunk() is called again. |
| if (header_parser->UnparsedSize() < (add_and_run_data_length + |
| instructions_and_sizes_length + |
| addresses_length)) { |
| return RESULT_END_OF_DATA; |
| } |
| data_for_add_and_run_.Init(header_parser->UnparsedData(), |
| add_and_run_data_length); |
| instructions_and_sizes_.Init(data_for_add_and_run_.End(), |
| instructions_and_sizes_length); |
| addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); |
| if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { |
| VCD_ERROR << "The end of the instructions section " |
| "does not match the end of the delta window" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| } |
| reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), |
| instructions_and_sizes_.End()); |
| return RESULT_SUCCESS; |
| } |
| |
| // Here are the elements of the delta window header to be parsed, |
| // from section 4 of the RFC: |
| // |
| // Window1 |
| // Win_Indicator - byte |
| // [Source segment size] - integer |
| // [Source segment position] - integer |
| // The delta encoding of the target window |
| // Length of the delta encoding - integer |
| // The delta encoding |
| // Size of the target window - integer |
| // Delta_Indicator - byte |
| // Length of data for ADDs and RUNs - integer |
| // Length of instructions and sizes - integer |
| // Length of addresses for COPYs - integer |
| // Data section for ADDs and RUNs - array of bytes |
| // Instructions and sizes section - array of bytes |
| // Addresses section for COPYs - array of bytes |
| // |
| VCDiffResult VCDiffDeltaFileWindow::ReadHeader( |
| ParseableChunk* parseable_chunk) { |
| std::string* decoded_target = parent_->decoded_target(); |
| VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), |
| parseable_chunk->End()); |
| size_t source_segment_position = 0; |
| unsigned char win_indicator = 0; |
| if (!header_parser.ParseWinIndicatorAndSourceSegment( |
| parent_->dictionary_size(), |
| decoded_target->size(), |
| parent_->allow_vcd_target(), |
| &win_indicator, |
| &source_segment_length_, |
| &source_segment_position)) { |
| return header_parser.GetResult(); |
| } |
| has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); |
| if (!header_parser.ParseWindowLengths(&target_window_length_)) { |
| return header_parser.GetResult(); |
| } |
| if (parent_->TargetWindowWouldExceedSizeLimits(target_window_length_)) { |
| // An error has been logged by TargetWindowWouldExceedSizeLimits(). |
| return RESULT_ERROR; |
| } |
| header_parser.ParseDeltaIndicator(); |
| VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); |
| if (RESULT_SUCCESS != setup_return_code) { |
| return setup_return_code; |
| } |
| // Reserve enough space in the output string for the current target window. |
| const size_t wanted_capacity = |
| target_window_start_pos_ + target_window_length_; |
| if (decoded_target->capacity() < wanted_capacity) { |
| decoded_target->reserve(wanted_capacity); |
| } |
| // Get a pointer to the start of the source segment. |
| if (win_indicator & VCD_SOURCE) { |
| source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; |
| } else if (win_indicator & VCD_TARGET) { |
| // This assignment must happen after the reserve(). |
| // decoded_target should not be resized again while processing this window, |
| // so source_segment_ptr_ should remain valid. |
| source_segment_ptr_ = decoded_target->data() + source_segment_position; |
| } |
| // The whole window header was found and parsed successfully. |
| found_header_ = true; |
| parseable_chunk->Advance(header_parser.ParsedSize()); |
| parent_->AddToTotalTargetWindowSize(target_window_length_); |
| return RESULT_SUCCESS; |
| } |
| |
| void VCDiffDeltaFileWindow::UpdateInstructionPointer( |
| ParseableChunk* parseable_chunk) { |
| if (IsInterleaved()) { |
| size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); |
| // Reduce expected instruction segment length by bytes parsed |
| interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); |
| parseable_chunk->Advance(bytes_parsed); |
| } |
| } |
| |
| inline size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { |
| return parent_->decoded_target()->size() - target_window_start_pos_; |
| } |
| |
| size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { |
| if (target_window_length_ == 0) { |
| // There is no window being decoded at present |
| return 0; |
| } else { |
| return target_window_length_ - TargetBytesDecoded(); |
| } |
| } |
| |
| inline void VCDiffDeltaFileWindow::CopyBytes(const char* data, size_t size) { |
| parent_->decoded_target()->append(data, size); |
| } |
| |
| inline void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { |
| parent_->decoded_target()->append(size, byte); |
| } |
| |
| VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { |
| if (size > data_for_add_and_run_.UnparsedSize()) { |
| return RESULT_END_OF_DATA; |
| } |
| // Write the next "size" data bytes |
| CopyBytes(data_for_add_and_run_.UnparsedData(), size); |
| data_for_add_and_run_.Advance(size); |
| return RESULT_SUCCESS; |
| } |
| |
| VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { |
| if (data_for_add_and_run_.Empty()) { |
| return RESULT_END_OF_DATA; |
| } |
| // Write "size" copies of the next data byte |
| RunByte(*data_for_add_and_run_.UnparsedData(), size); |
| data_for_add_and_run_.Advance(1); |
| return RESULT_SUCCESS; |
| } |
| |
| VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, |
| unsigned char mode) { |
| // Keep track of the number of target bytes decoded as a local variable |
| // to avoid recalculating it each time it is needed. |
| size_t target_bytes_decoded = TargetBytesDecoded(); |
| const VCDAddress here_address = |
| static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); |
| const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( |
| here_address, |
| mode, |
| addresses_for_copy_.UnparsedDataAddr(), |
| addresses_for_copy_.End()); |
| switch (decoded_address) { |
| case RESULT_ERROR: |
| VCD_ERROR << "Unable to decode address for COPY" << VCD_ENDL; |
| return RESULT_ERROR; |
| case RESULT_END_OF_DATA: |
| return RESULT_END_OF_DATA; |
| default: |
| if ((decoded_address < 0) || (decoded_address > here_address)) { |
| VCD_DFATAL << "Internal error: unexpected address " << decoded_address |
| << " returned from DecodeAddress, with here_address = " |
| << here_address << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| break; |
| } |
| size_t address = static_cast<size_t>(decoded_address); |
| if ((address + size) <= source_segment_length_) { |
| // Copy all data from source segment |
| CopyBytes(&source_segment_ptr_[address], size); |
| return RESULT_SUCCESS; |
| } |
| // Copy some data from target window... |
| if (address < source_segment_length_) { |
| // ... plus some data from source segment |
| const size_t partial_copy_size = source_segment_length_ - address; |
| CopyBytes(&source_segment_ptr_[address], partial_copy_size); |
| target_bytes_decoded += partial_copy_size; |
| address += partial_copy_size; |
| size -= partial_copy_size; |
| } |
| address -= source_segment_length_; |
| // address is now based at start of target window |
| const char* const target_segment_ptr = parent_->decoded_target()->data() + |
| target_window_start_pos_; |
| while (size > (target_bytes_decoded - address)) { |
| // Recursive copy that extends into the yet-to-be-copied target data |
| const size_t partial_copy_size = target_bytes_decoded - address; |
| CopyBytes(&target_segment_ptr[address], partial_copy_size); |
| target_bytes_decoded += partial_copy_size; |
| address += partial_copy_size; |
| size -= partial_copy_size; |
| } |
| CopyBytes(&target_segment_ptr[address], size); |
| return RESULT_SUCCESS; |
| } |
| |
| int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { |
| if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() |
| != parseable_chunk->UnparsedData())) { |
| VCD_DFATAL << "Internal error: interleaved format is used, but the" |
| " input pointer does not point to the instructions section" |
| << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| while (TargetBytesDecoded() < target_window_length_) { |
| int32_t decoded_size = VCD_INSTRUCTION_ERROR; |
| unsigned char mode = 0; |
| VCDiffInstructionType instruction = |
| reader_.GetNextInstruction(&decoded_size, &mode); |
| switch (instruction) { |
| case VCD_INSTRUCTION_END_OF_DATA: |
| UpdateInstructionPointer(parseable_chunk); |
| return RESULT_END_OF_DATA; |
| case VCD_INSTRUCTION_ERROR: |
| return RESULT_ERROR; |
| default: |
| break; |
| } |
| const size_t size = static_cast<size_t>(decoded_size); |
| // The value of "size" itself could be enormous (say, INT32_MAX) |
| // so check it individually against the limit to protect against |
| // overflow when adding it to something else. |
| if ((size > target_window_length_) || |
| ((size + TargetBytesDecoded()) > target_window_length_)) { |
| VCD_ERROR << VCDiffInstructionName(instruction) |
| << " with size " << size |
| << " plus existing " << TargetBytesDecoded() |
| << " bytes of target data exceeds length of target" |
| " window (" << target_window_length_ << " bytes)" |
| << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| VCDiffResult result = RESULT_SUCCESS; |
| switch (instruction) { |
| case VCD_ADD: |
| result = DecodeAdd(size); |
| break; |
| case VCD_RUN: |
| result = DecodeRun(size); |
| break; |
| case VCD_COPY: |
| result = DecodeCopy(size, mode); |
| break; |
| default: |
| VCD_DFATAL << "Unexpected instruction type " << instruction |
| << "in opcode stream" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| switch (result) { |
| case RESULT_END_OF_DATA: |
| reader_.UnGetInstruction(); |
| UpdateInstructionPointer(parseable_chunk); |
| return RESULT_END_OF_DATA; |
| case RESULT_ERROR: |
| return RESULT_ERROR; |
| case RESULT_SUCCESS: |
| break; |
| } |
| } |
| if (TargetBytesDecoded() != target_window_length_) { |
| VCD_ERROR << "Decoded target window size (" << TargetBytesDecoded() |
| << " bytes) does not match expected size (" |
| << target_window_length_ << " bytes)" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| const char* const target_window_start = |
| parent_->decoded_target()->data() + target_window_start_pos_; |
| if (has_checksum_ && |
| (ComputeAdler32(target_window_start, target_window_length_) |
| != expected_checksum_)) { |
| VCD_ERROR << "Target data does not match checksum; this could mean " |
| "that the wrong dictionary was used" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (!instructions_and_sizes_.Empty()) { |
| VCD_ERROR << "Excess instructions and sizes left over " |
| "after decoding target window" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (!IsInterleaved()) { |
| // Standard format is being used, with three separate sections for the |
| // instructions, data, and addresses. |
| if (!data_for_add_and_run_.Empty()) { |
| VCD_ERROR << "Excess ADD/RUN data left over " |
| "after decoding target window" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (!addresses_for_copy_.Empty()) { |
| VCD_ERROR << "Excess COPY addresses left over " |
| "after decoding target window" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| // Reached the end of the window. Update the ParseableChunk to point to the |
| // end of the addresses section, which is the last section in the window. |
| parseable_chunk->SetPosition(addresses_for_copy_.End()); |
| } else { |
| // Interleaved format is being used. |
| UpdateInstructionPointer(parseable_chunk); |
| } |
| return RESULT_SUCCESS; |
| } |
| |
| VCDiffResult VCDiffDeltaFileWindow::DecodeWindow( |
| ParseableChunk* parseable_chunk) { |
| if (!parent_) { |
| VCD_DFATAL << "Internal error: VCDiffDeltaFileWindow::DecodeWindow() " |
| "called before VCDiffDeltaFileWindow::Init()" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| if (!found_header_) { |
| switch (ReadHeader(parseable_chunk)) { |
| case RESULT_END_OF_DATA: |
| return RESULT_END_OF_DATA; |
| case RESULT_ERROR: |
| return RESULT_ERROR; |
| default: |
| // Reset address cache between windows (RFC section 5.1) |
| if (!parent_->addr_cache()->Init()) { |
| VCD_DFATAL << "Error initializing address cache" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| } |
| } else { |
| // We are resuming a window that was partially decoded before a |
| // RESULT_END_OF_DATA was returned. This can only happen on the first |
| // loop iteration, and only if the interleaved format is enabled and used. |
| if (!IsInterleaved()) { |
| VCD_DFATAL << "Internal error: Resumed decoding of a delta file window" |
| " when interleaved format is not being used" << VCD_ENDL; |
| return RESULT_ERROR; |
| } |
| UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), |
| parseable_chunk->End()); |
| reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), |
| instructions_and_sizes_.End()); |
| } |
| switch (DecodeBody(parseable_chunk)) { |
| case RESULT_END_OF_DATA: |
| if (MoreDataExpected()) { |
| return RESULT_END_OF_DATA; |
| } else { |
| VCD_ERROR << "End of data reached while decoding VCDIFF delta file" |
| << VCD_ENDL; |
| FALLTHROUGH_INTENDED; |
| } |
| case RESULT_ERROR: |
| return RESULT_ERROR; |
| default: |
| break; // DecodeBody succeeded |
| } |
| // Get ready to read a new delta window |
| Reset(); |
| return RESULT_SUCCESS; |
| } |
| |
| // *** Methods for VCDiffStreamingDecoder |
| |
| VCDiffStreamingDecoder::VCDiffStreamingDecoder() |
| : impl_(new VCDiffStreamingDecoderImpl) { } |
| |
| VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } |
| |
| void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { |
| impl_->StartDecoding(source, len); |
| } |
| |
| bool VCDiffStreamingDecoder::DecodeChunkToInterface( |
| const char* data, |
| size_t len, |
| OutputStringInterface* output_string) { |
| return impl_->DecodeChunk(data, len, output_string); |
| } |
| |
| bool VCDiffStreamingDecoder::FinishDecoding() { |
| return impl_->FinishDecoding(); |
| } |
| |
| bool VCDiffStreamingDecoder::SetMaximumTargetFileSize( |
| size_t new_maximum_target_file_size) { |
| return impl_->SetMaximumTargetFileSize(new_maximum_target_file_size); |
| } |
| |
| bool VCDiffStreamingDecoder::SetMaximumTargetWindowSize( |
| size_t new_maximum_target_window_size) { |
| return impl_->SetMaximumTargetWindowSize(new_maximum_target_window_size); |
| } |
| |
| void VCDiffStreamingDecoder::SetAllowVcdTarget(bool allow_vcd_target) { |
| impl_->SetAllowVcdTarget(allow_vcd_target); |
| } |
| |
| bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, |
| size_t dictionary_size, |
| const string& encoding, |
| OutputStringInterface* target) { |
| target->clear(); |
| decoder_.StartDecoding(dictionary_ptr, dictionary_size); |
| if (!decoder_.DecodeChunkToInterface(encoding.data(), |
| encoding.size(), |
| target)) { |
| return false; |
| } |
| return decoder_.FinishDecoding(); |
| } |
| |
| } // namespace open_vcdiff |