blob: 11f5c06444617619e3a1f1f99614ecad5c647898 [file] [log] [blame]
// Copyright 2013 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Author: dsites@google.com (Dick Sites)
//
//
#include "offsetmap.h"
#include <string.h> // for strcmp
#include <stdio.h> // for fprintf, stderr, fclose, etc
#include <algorithm> // for min
using namespace std;
namespace CLD2 {
// Constructor, destructor
OffsetMap::OffsetMap() {
Clear();
}
OffsetMap::~OffsetMap() {
}
// Clear the map
// After:
// next_diff_sub_ is 0
// Windows are the a and a' ranges covered by diffs_[next_diff_sub_-1]
// which is a fake range of width 0 mapping 0=>0
void OffsetMap::Clear() {
diffs_.clear();
pending_op_ = COPY_OP;
pending_length_ = 0;
next_diff_sub_ = 0;
current_lo_aoffset_ = 0;
current_hi_aoffset_ = 0;
current_lo_aprimeoffset_ = 0;
current_hi_aprimeoffset_ = 0;
current_diff_ = 0;
max_aoffset_ = 0; // Largest seen so far
max_aprimeoffset_ = 0; // Largest seen so far
}
static inline char OpPart(const char c) {
return (c >> 6) & 3;
}
static inline char LenPart(const char c) {
return c & 0x3f;
}
// Print map to file, for debugging
void OffsetMap::Printmap(const char* filename) {
FILE* fout;
bool needs_close = false;
if (strcmp(filename, "stdout") == 0) {
fout = stdout;
} else if (strcmp(filename, "stderr") == 0) {
fout = stderr;
} else {
fout = fopen(filename, "w");
needs_close = true;
}
if (fout == NULL) {
fprintf(stderr, "%s did not open\n", filename);
return;
}
Flush(); // Make sure any pending entry gets printed
fprintf(fout, "Offsetmap: %d bytes\n", static_cast<int>(diffs_.size()));
for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
fprintf(fout, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
if ((i % 20) == 19) {fprintf(fout, "\n");}
}
fprintf(fout, "\n");
if (needs_close) {
fclose(fout);
}
}
// Reset to offset 0
void OffsetMap::Reset() {
MaybeFlushAll();
next_diff_sub_ = 0;
current_lo_aoffset_ = 0;
current_hi_aoffset_ = 0;
current_lo_aprimeoffset_ = 0;
current_hi_aprimeoffset_ = 0;
current_diff_ = 0;
}
// Add to mapping from A to A', specifying how many next bytes are
// identical in A and A'
void OffsetMap::Copy(int bytes) {
if (bytes == 0) {return;}
max_aoffset_ += bytes; // Largest seen so far
max_aprimeoffset_ += bytes; // Largest seen so far
if (pending_op_ == COPY_OP) {
pending_length_ += bytes;
} else {
Flush();
pending_op_ = COPY_OP;
pending_length_ = bytes;
}
}
// Add to mapping from A to A', specifying how many next bytes are
// inserted in A' while not advancing in A at all
void OffsetMap::Insert(int bytes){
if (bytes == 0) {return;}
max_aprimeoffset_ += bytes; // Largest seen so far
if (pending_op_ == INSERT_OP) {
pending_length_ += bytes;
} else if ((bytes == 1) &&
(pending_op_ == DELETE_OP) && (pending_length_ == 1)) {
// Special-case exactly delete(1) insert(1) +> copy(1);
// all others backmap inserts to after deletes
pending_op_ = COPY_OP;
} else {
Flush();
pending_op_ = INSERT_OP;
pending_length_ = bytes;
}
}
// Add to mapping from A to A', specifying how many next bytes are
// deleted from A while not advancing in A' at all
void OffsetMap::Delete(int bytes){
if (bytes == 0) {return;}
max_aoffset_ += bytes; // Largest seen so far
if (pending_op_ == DELETE_OP) {
pending_length_ += bytes;
} else if ((bytes == 1) &&
(pending_op_ == INSERT_OP) && (pending_length_ == 1)) {
// Special-case exactly insert(1) delete(1) => copy(1);
// all others backmap deletes to after insertss
pending_op_ = COPY_OP;
} else {
Flush();
pending_op_ = DELETE_OP;
pending_length_ = bytes;
}
}
void OffsetMap::Flush() {
if (pending_length_ == 0) {
return;
}
// We may be emitting a copy op just after a copy op because +1 -1 cancelled
// inbetween. If the lengths don't need a prefix byte, combine them
if ((pending_op_ == COPY_OP) && !diffs_.empty()) {
char c = diffs_[diffs_.size() - 1];
MapOp prior_op = static_cast<MapOp>(OpPart(c));
int prior_len = LenPart(c);
if ((prior_op == COPY_OP) && ((prior_len + pending_length_) <= 0x3f)) {
diffs_[diffs_.size() - 1] += pending_length_;
pending_length_ = 0;
return;
}
}
if (pending_length_ > 0x3f) {
bool non_zero_emitted = false;
for (int shift = 30; shift > 0; shift -= 6) {
int prefix = (pending_length_ >> shift) & 0x3f;
if ((prefix > 0) || non_zero_emitted) {
Emit(PREFIX_OP, prefix);
non_zero_emitted = true;
}
}
}
Emit(pending_op_, pending_length_ & 0x3f);
pending_length_ = 0;
}
// Add one more entry to copy one byte off the end, then flush
void OffsetMap::FlushAll() {
Copy(1);
Flush();
}
// Flush all if necessary
void OffsetMap::MaybeFlushAll() {
if ((0 < pending_length_) || diffs_.empty()) {
FlushAll();
}
}
// Len may be 0, for example as the low piece of length=64
void OffsetMap::Emit(MapOp op, int len) {
char c = (static_cast<char>(op) << 6) | (len & 0x3f);
diffs_.push_back(c);
}
void OffsetMap::DumpString() {
for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
fprintf(stderr, "%c%02d ", "&=+-"[OpPart(diffs_[i])], LenPart(diffs_[i]));
}
fprintf(stderr, "\n");
// Print running table of correspondences
fprintf(stderr, " op A => A' (A forward-maps to A')\n");
int aoffset = 0;
int aprimeoffset = 0;
int length = 0;
for (int i = 0; i < static_cast<int>(diffs_.size()); ++i) {
char c = diffs_[i];
MapOp op = static_cast<MapOp>(OpPart(c));
int len = LenPart(c);
length = (length << 6) + len;
if (op == COPY_OP) {
aoffset += length;
aprimeoffset += length;
length = 0;
} else if (op == INSERT_OP) {
aoffset += 0;
aprimeoffset += length;
length = 0;
} else if (op == DELETE_OP) {
aoffset += length;
aprimeoffset += 0;
length = 0;
} else { // (op == PREFIX_OP)
// Do nothing else
}
fprintf(stderr, "[%3d] %c%02d %6d %6d%s\n",
i, "&=+-"[op], len,
aoffset, aprimeoffset,
(next_diff_sub_ == i) ? " <==next_diff_sub_" : "");
}
fprintf(stderr, "\n");
}
void OffsetMap::DumpWindow() {
fprintf(stderr, "DumpWindow(A => A'): max_aoffset_ = %d, "
"max_aprimeoffset_ = %d, next_diff_sub_ = %d<br>\n",
max_aoffset_, max_aprimeoffset_, next_diff_sub_);
fprintf(stderr, "A [%u..%u)\n",
current_lo_aoffset_, current_hi_aoffset_);
fprintf(stderr, "A' [%u..%u)\n",
current_lo_aprimeoffset_, current_hi_aprimeoffset_);
fprintf(stderr, " diff = %d\n", current_diff_);
DumpString();
}
//----------------------------------------------------------------------------//
// The guts of the 2013 design //
// If there are three ranges a b c in diffs_, we can be in one of five //
// states: LEFT of a, in ranges a b c, or RIGHT of c //
// In each state, there are windows A[Alo..Ahi), A'[A'lo..A'hi) and diffs_ //
// position next_diff_sub_ //
// There also are mapping constants max_aoffset_ and max_aprimeoffset_ //
// If LEFT, Alo=Ahi=0, A'lo=A'hi=0 and next_diff_sub_=0 //
// If RIGHT, Alo=Ahi=max_aoffset_, A'lo=A'hi=max_aprimeoffset_ and //
// next_diff_sub_=diffs_.size() //
// Otherwise, at least one of A[) and A'[) is non-empty and the first bytes //
// correspond to each other. If range i is active, next_diff_sub_ is at //
// the first byte of range i+1. Because of the length-prefix operator, //
// an individual range item in diffs_ may be multiple bytes //
// In all cases aprimeoffset = aoffset + current_diff_ //
// i.e. current_diff_ = aprimeoffset - aoffset //
// //
// In the degenerate case of diffs_.empty(), there are only two states //
// LEFT and RIGHT and the mapping is the identity mapping. //
// The initial state is LEFT. //
// It is an error to move left into LEFT or right into RIGHT, but the code //
// below is robust in these cases. //
//----------------------------------------------------------------------------//
void OffsetMap::SetLeft() {
current_lo_aoffset_ = 0;
current_hi_aoffset_ = 0;
current_lo_aprimeoffset_ = 0;
current_hi_aprimeoffset_ = 0;
current_diff_ = 0;
next_diff_sub_ = 0;
}
void OffsetMap::SetRight() {
current_lo_aoffset_ = max_aoffset_;
current_hi_aoffset_ = max_aoffset_;
current_lo_aprimeoffset_ = max_aprimeoffset_;
current_hi_aprimeoffset_ = max_aprimeoffset_;
current_diff_ = max_aprimeoffset_ - max_aoffset_;
next_diff_sub_ = 0;
}
// Back up over previous range, 1..5 bytes
// Return subscript at the beginning of that. Pins at 0
int OffsetMap::Backup(int sub) {
if (sub <= 0) {return 0;}
--sub;
while ((0 < sub) &&
(static_cast<MapOp>(OpPart(diffs_[sub - 1]) == PREFIX_OP))) {
--sub;
}
return sub;
}
// Parse next range, 1..5 bytes
// Return subscript just off the end of that
int OffsetMap::ParseNext(int sub, MapOp* op, int* length) {
*op = PREFIX_OP;
*length = 0;
char c;
while ((sub < static_cast<int>(diffs_.size())) && (*op == PREFIX_OP)) {
c = diffs_[sub++];
*op = static_cast<MapOp>(OpPart(c));
int len = LenPart(c);
*length = (*length << 6) + len;
}
// If mal-formed or in RIGHT, this will return with op = PREFIX_OP
// Mal-formed can include a trailing prefix byte with no following op
return sub;
}
// Parse previous range, 1..5 bytes
// Return current subscript
int OffsetMap::ParsePrevious(int sub, MapOp* op, int* length) {
sub = Backup(sub);
return ParseNext(sub, op, length);
}
// Quick debugging dump; does not parse multi-byte items, so just length & 0x3f
void OffsetMap::PrintPosition(const char* str) {
MapOp op = PREFIX_OP;
int length = 0;
if ((0 < next_diff_sub_) && (next_diff_sub_ <= static_cast<int>(diffs_.size()))) {
op = static_cast<MapOp>(OpPart(diffs_[next_diff_sub_ - 1]));
length = LenPart(diffs_[next_diff_sub_ - 1]);
}
fprintf(stderr, "%s[%d] %c%02d = A[%d..%d) ==> A'[%d..%d)\n",
str,
next_diff_sub_, "&=+-"[op], length,
current_lo_aoffset_, current_hi_aoffset_,
current_lo_aprimeoffset_, current_hi_aprimeoffset_);
}
// Move active window one range to the right
// Return true if move was OK
bool OffsetMap::MoveRight() {
// If at last range or RIGHT, set to RIGHT, return error
if (next_diff_sub_ >= static_cast<int>(diffs_.size())) {
SetRight();
return false;
}
// Actually OK to move right
MapOp op;
int length;
bool retval = true;
// If mal-formed or in RIGHT, this will return with op = PREFIX_OP
next_diff_sub_ = ParseNext(next_diff_sub_, &op, &length);
current_lo_aoffset_ = current_hi_aoffset_;
current_lo_aprimeoffset_ = current_hi_aprimeoffset_;
if (op == COPY_OP) {
current_hi_aoffset_ = current_lo_aoffset_ + length;
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
} else if (op == INSERT_OP) {
current_hi_aoffset_ = current_lo_aoffset_ + 0;
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + length;
} else if (op == DELETE_OP) {
current_hi_aoffset_ = current_lo_aoffset_ + length;
current_hi_aprimeoffset_ = current_lo_aprimeoffset_ + 0;
} else {
SetRight();
retval = false;
}
current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
return retval;
}
// Move active window one range to the left
// Return true if move was OK
bool OffsetMap::MoveLeft() {
// If at first range or LEFT, set to LEFT, return error
if (next_diff_sub_ <= 0) {
SetLeft();
return false;
}
// Back up over current active window
next_diff_sub_ = Backup(next_diff_sub_);
if (next_diff_sub_ <= 0) {
SetLeft();
return false;
}
// Actually OK to move left
MapOp op;
int length;
bool retval = true;
// If mal-formed or in LEFT, this will return with op = PREFIX_OP
next_diff_sub_ = ParsePrevious(next_diff_sub_, &op, &length);
current_hi_aoffset_ = current_lo_aoffset_;
current_hi_aprimeoffset_ = current_lo_aprimeoffset_;
if (op == COPY_OP) {
current_lo_aoffset_ = current_hi_aoffset_ - length;
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
} else if (op == INSERT_OP) {
current_lo_aoffset_ = current_hi_aoffset_ - 0;
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - length;
} else if (op == DELETE_OP) {
current_lo_aoffset_ = current_hi_aoffset_ - length;
current_lo_aprimeoffset_ = current_hi_aprimeoffset_ - 0;
} else {
SetLeft();
retval = false;
}
current_diff_ = current_lo_aprimeoffset_ - current_lo_aoffset_;
return true;
}
// Map an offset in A' to the corresponding offset in A
int OffsetMap::MapBack(int aprimeoffset){
MaybeFlushAll();
if (aprimeoffset < 0) {return 0;}
if (max_aprimeoffset_ <= aprimeoffset) {
return (aprimeoffset - max_aprimeoffset_) + max_aoffset_;
}
// If current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_,
// use current mapping, else move window left/right
bool ok = true;
while (ok && (aprimeoffset < current_lo_aprimeoffset_)) {
ok = MoveLeft();
}
while (ok && (current_hi_aprimeoffset_ <= aprimeoffset)) {
ok = MoveRight();
}
// So now current_lo_aprimeoffset_ <= aprimeoffset < current_hi_aprimeoffset_
int aoffset = aprimeoffset - current_diff_;
if (aoffset >= current_hi_aoffset_) {
// A' is in an insert region, all bytes of which backmap to A=hi_aoffset_
aoffset = current_hi_aoffset_;
}
return aoffset;
}
// Map an offset in A to the corresponding offset in A'
int OffsetMap::MapForward(int aoffset){
MaybeFlushAll();
if (aoffset < 0) {return 0;}
if (max_aoffset_ <= aoffset) {
return (aoffset - max_aoffset_) + max_aprimeoffset_;
}
// If current_lo_aoffset_ <= aoffset < current_hi_aoffset_,
// use current mapping, else move window left/right
bool ok = true;
while (ok && (aoffset < current_lo_aoffset_)) {
ok = MoveLeft();
}
while (ok && (current_hi_aoffset_ <= aoffset)) {
ok = MoveRight();
}
int aprimeoffset = aoffset + current_diff_;
if (aprimeoffset >= current_hi_aprimeoffset_) {
// A is in a delete region, all bytes of which map to A'=hi_aprimeoffset_
aprimeoffset = current_hi_aprimeoffset_;
}
return aprimeoffset;
}
// static
bool OffsetMap::CopyInserts(OffsetMap* source, OffsetMap* dest) {
bool ok = true;
while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
ok = source->MoveRight();
if (source->current_lo_aoffset_ != source->current_hi_aoffset_) {
return false;
}
dest->Insert(
source->current_hi_aprimeoffset_ - source->current_lo_aprimeoffset_);
}
return true;
}
// static
bool OffsetMap::CopyDeletes(OffsetMap* source, OffsetMap* dest) {
bool ok = true;
while (ok && (source->next_diff_sub_ != source->diffs_.size())) {
ok = source->MoveRight();
if (source->current_lo_aprimeoffset_ != source->current_hi_aprimeoffset_) {
return false;
}
dest->Delete(source->current_hi_aoffset_ - source->current_lo_aoffset_);
}
return true;
}
// static
void OffsetMap::ComposeOffsetMap(
OffsetMap* g, OffsetMap* f, OffsetMap* h) {
h->Clear();
f->Reset();
g->Reset();
int lo = 0;
for (;;) {
// Consume delete operations in f. This moves A without moving
// A' and A''.
if (lo >= g->current_hi_aoffset_ && CopyInserts(g, h)) {
if (lo >= f->current_hi_aprimeoffset_ && CopyDeletes(f, h)) {
// fprintf(stderr,
// "ComposeOffsetMap ERROR, f is longer than g.<br>\n");
}
// FlushAll(), called by Reset(), MapForward() or MapBack(), has
// added an extra COPY_OP to f and g, so this function has
// composed an extra COPY_OP in h from those. To avoid
// FlushAll() adds one more extra COPY_OP to h later, dispatch
// Flush() right now.
h->Flush();
return;
}
// Consume insert operations in g. This moves A'' without moving A
// and A'.
if (lo >= f->current_hi_aprimeoffset_) {
if (!CopyDeletes(f, h)) {
// fprintf(stderr,
// "ComposeOffsetMap ERROR, g is longer than f.<br>\n");
}
}
// Compose one operation which moves A' from lo to hi.
int hi = min(f->current_hi_aprimeoffset_, g->current_hi_aoffset_);
if (f->current_lo_aoffset_ != f->current_hi_aoffset_ &&
g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
h->Copy(hi - lo);
} else if (f->current_lo_aoffset_ != f->current_hi_aoffset_) {
h->Delete(hi - lo);
} else if (g->current_lo_aprimeoffset_ != g->current_hi_aprimeoffset_) {
h->Insert(hi - lo);
}
lo = hi;
}
}
// For testing only -- force a mapping
void OffsetMap::StuffIt(const string& diffs,
int max_aoffset, int max_aprimeoffset) {
Clear();
diffs_ = diffs;
max_aoffset_ = max_aoffset;
max_aprimeoffset_ = max_aprimeoffset;
}
} // namespace CLD2