blob: 1c5663334b9bd76313ae3eedd315a7f2fc1bc820 [file] [log] [blame]
// Copyright 2016 The LUCI Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package proto
import (
"bytes"
"fmt"
"regexp"
"strings"
"unicode"
"github.com/golang/protobuf/proto"
)
var startRE = regexp.MustCompile(`^(.*)<<\s*([_a-zA-Z]+)\s*$`)
const endREStr = `^\s*%s\s*$`
func findLeftWhitespace(s string) string {
for i, r := range s {
if !unicode.IsSpace(r) {
return s[:i]
}
}
return s
}
func findBytewiseLCP(a, b string) string {
if len(a) == 0 || len(b) == 0 {
return ""
} else if a == b {
return a
}
short := a
if len(b) < len(a) {
short = b
}
for i := 0; i < len(short); i++ {
if a[i] != b[i] {
return a[:i]
}
}
return short
}
// writeProtoString writes the given lines into the output writer while
// correctly escaping it. This code is heavily inspired by
// "github.com/golang/protobuf/proto/text.go".
func writeProtoStringLines(w *bytes.Buffer, skip int, lines []string) {
// equivalent to C's isprint.
isprint := func(c byte) bool {
return c >= 0x20 && c < 0x7f
}
w.WriteByte('"')
for lIdx, line := range lines {
if lIdx != 0 {
// to get a "\\n".join(lines) effect: newlines between lines, but not
// trailing.
w.WriteString(`\n`)
}
// Loop over the bytes, not the runes.
for i := skip; i < len(line); i++ {
// Divergence from C++: we don't escape apostrophes.
// There's no need to escape them, and the C++ parser
// copes with a naked apostrophe.
switch c := line[i]; c {
case '\n':
w.WriteString(`\n`)
case '\r':
w.WriteString(`\r`)
case '\t':
w.WriteString(`\t`)
case '"':
w.WriteString(`\"`)
case '\\':
w.WriteString(`\\`)
default:
if isprint(c) {
w.WriteByte(c)
} else {
fmt.Fprintf(w, "\\%03o", c)
}
}
}
}
w.WriteByte('"')
}
// ParseMultilineStrings looks for bash-style heredocs and replaces them with
// single-line text-proto-escaped strings.
//
// This looks line by line for /<<\s*([_a-zA-Z]+)\s*$/. If this is found, the
// scanner then looks until it finds /^\s*\1\s*$/. Every line between these is
// joined like "\n".join(lines), and then printed back as an escaped proto
// string. The scanner then loops back to its initial state.
//
// Not that nothing special needs to be done for e.g.
// some_key: "string with << angles"
//
// Such a line would be left alone, because the trailing quote (which is
// mandatory in text proto) cause the starting regex to not match.
//
// For convenience, the inner lines will be treated with the equivalent of
// python's `textwrap.dedent`; any common leading whitespace that occurs on
// every line will be removed. Although both tabs and spaces count as
// whitespace, they are not equivalent (i.e. only exactly-matching whitespace
// prefixes count)
//
// The only error this may return is if there's an open heredoc without a
// matching close marker.
//
// Example:
// this: <<EOF
// would
// turn \ninto
// a "single"
// line
// EOF
//
// Turns into the same as:
// this: "would\nturn \\ninto\n a \"single\"\nline"
func ParseMultilineStrings(text string) (string, error) {
terminator := ""
terminatorRE := (*regexp.Regexp)(nil)
needNL := false
findLead := true
leadingSpace := ""
var mlineBuf []string
outBuf := bytes.Buffer{}
outBuf.Grow(len(text))
for _, line := range strings.SplitAfter(text, "\n") {
if terminator == "" {
if needNL {
outBuf.WriteByte('\n')
needNL = false
}
if mtch := startRE.FindStringSubmatch(line); mtch != nil {
_, _ = outBuf.WriteString(mtch[1])
terminator = mtch[2]
terminatorRE = regexp.MustCompile(fmt.Sprintf(endREStr, regexp.QuoteMeta(terminator)))
} else {
outBuf.WriteString(line)
}
} else {
if terminatorRE.MatchString(line) {
writeProtoStringLines(&outBuf, len(leadingSpace), mlineBuf)
findLead = true
terminator = ""
terminatorRE = nil
needNL = true
mlineBuf = mlineBuf[:0]
} else {
if findLead {
findLead = false
leadingSpace = findLeftWhitespace(line)
} else {
lead := findLeftWhitespace(line)
if len(lead) == len(line) {
// totally whitespace lines (or empty lines) don't count for leading
// space calculation and will be written as just an empty line.
} else {
leadingSpace = findBytewiseLCP(leadingSpace, lead)
}
}
mlineBuf = append(mlineBuf, strings.TrimSuffix(line, "\n"))
}
}
}
if terminator != "" {
return "", fmt.Errorf("failed to find matching terminator %q", terminator)
}
return outBuf.String(), nil
}
// UnmarshalTextML behaves the same as proto.UnmarshalText, except that it
// allows for multiline strings in the manner of ParseMultilineStrings.
func UnmarshalTextML(s string, pb proto.Message) error {
s, err := ParseMultilineStrings(s)
if err != nil {
return err
}
return proto.UnmarshalText(s, pb)
}