common/proto/multiline.go - infra/luci/luci-go - Git at Google

 // Copyright 2016 The LUCI Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package proto

 import (
 	"bytes"
 	"fmt"
 	"regexp"
 	"strings"
 	"unicode"

 	"github.com/golang/protobuf/proto"
 )

 var startRE = regexp.MustCompile(`^(.*)<<\s*([_a-zA-Z]+)\s*$`)

 const endREStr = `^\s*%s\s*$`

 func findLeftWhitespace(s string) string {
 	for i, r := range s {
 		if !unicode.IsSpace(r) {
 			return s[:i]
 		}
 	}
 	return s
 }

 func findBytewiseLCP(a, b string) string {
 	if len(a) == 0 || len(b) == 0 {
 		return ""
 	} else if a == b {
 		return a
 	}

 	short := a
 	if len(b) < len(a) {
 		short = b
 	}

 	for i := 0; i < len(short); i++ {
 		if a[i] != b[i] {
 			return a[:i]
 		}
 	}
 	return short
 }

 // writeProtoString writes the given lines into the output writer while
 // correctly escaping it. This code is heavily inspired by
 // "github.com/golang/protobuf/proto/text.go".
 func writeProtoStringLines(w *bytes.Buffer, skip int, lines []string) {
 	// equivalent to C's isprint.
 	isprint := func(c byte) bool {
 		return c >= 0x20 && c < 0x7f
 	}

 	w.WriteByte('"')
 	for lIdx, line := range lines {
 		if lIdx != 0 {
 			// to get a "\\n".join(lines) effect: newlines between lines, but not
 			// trailing.
 			w.WriteString(`\n`)
 		}
 		// Loop over the bytes, not the runes.
 		for i := skip; i < len(line); i++ {
 			// Divergence from C++: we don't escape apostrophes.
 			// There's no need to escape them, and the C++ parser
 			// copes with a naked apostrophe.
 			switch c := line[i]; c {
 			case '\n':
 				w.WriteString(`\n`)
 			case '\r':
 				w.WriteString(`\r`)
 			case '\t':
 				w.WriteString(`\t`)
 			case '"':
 				w.WriteString(`\"`)
 			case '\\':
 				w.WriteString(`\\`)
 			default:
 				if isprint(c) {
 					w.WriteByte(c)
 				} else {
 					fmt.Fprintf(w, "\\%03o", c)
 				}
 			}
 		}
 	}
 	w.WriteByte('"')
 }

 // ParseMultilineStrings looks for bash-style heredocs and replaces them with
 // single-line text-proto-escaped strings.
 //
 // This looks line by line for /<<\s*([_a-zA-Z]+)\s*$/. If this is found, the
 // scanner then looks until it finds /^\s*\1\s*$/. Every line between these is
 // joined like "\n".join(lines), and then printed back as an escaped proto
 // string. The scanner then loops back to its initial state.
 //
 // Not that nothing special needs to be done for e.g.
 //   some_key: "string with << angles"
 //
 // Such a line would be left alone, because the trailing quote (which is
 // mandatory in text proto) cause the starting regex to not match.
 //
 // For convenience, the inner lines will be treated with the equivalent of
 // python's `textwrap.dedent`; any common leading whitespace that occurs on
 // every line will be removed. Although both tabs and spaces count as
 // whitespace, they are not equivalent (i.e. only exactly-matching whitespace
 // prefixes count)
 //
 // The only error this may return is if there's an open heredoc without a
 // matching close marker.
 //
 // Example:
 //   this: <<EOF
 //	   would
 //	   turn \ninto
 //       a "single"
 //     line
 //   EOF
 //
 // Turns into the same as:
 //   this: "would\nturn \\ninto\n  a \"single\"\nline"
 func ParseMultilineStrings(text string) (string, error) {
 	terminator := ""
 	terminatorRE := (*regexp.Regexp)(nil)
 	needNL := false
 	findLead := true
 	leadingSpace := ""
 	var mlineBuf []string
 	outBuf := bytes.Buffer{}
 	outBuf.Grow(len(text))

 	for _, line := range strings.SplitAfter(text, "\n") {
 		if terminator == "" {
 			if needNL {
 				outBuf.WriteByte('\n')
 				needNL = false
 			}
 			if mtch := startRE.FindStringSubmatch(line); mtch != nil {
 				_, _ = outBuf.WriteString(mtch[1])
 				terminator = mtch[2]
 				terminatorRE = regexp.MustCompile(fmt.Sprintf(endREStr, regexp.QuoteMeta(terminator)))
 			} else {
 				outBuf.WriteString(line)
 			}
 		} else {
 			if terminatorRE.MatchString(line) {
 				writeProtoStringLines(&outBuf, len(leadingSpace), mlineBuf)
 				findLead = true
 				terminator = ""
 				terminatorRE = nil
 				needNL = true
 				mlineBuf = mlineBuf[:0]
 			} else {
 				if findLead {
 					findLead = false
 					leadingSpace = findLeftWhitespace(line)
 				} else {
 					lead := findLeftWhitespace(line)
 					if len(lead) == len(line) {
 						// totally whitespace lines (or empty lines) don't count for leading
 						// space calculation and will be written as just an empty line.
 					} else {
 						leadingSpace = findBytewiseLCP(leadingSpace, lead)
 					}
 				}
 				mlineBuf = append(mlineBuf, strings.TrimSuffix(line, "\n"))
 			}
 		}
 	}

 	if terminator != "" {
 		return "", fmt.Errorf("failed to find matching terminator %q", terminator)
 	}

 	return outBuf.String(), nil
 }

 // UnmarshalTextML behaves the same as proto.UnmarshalText, except that it
 // allows for multiline strings in the manner of ParseMultilineStrings.
 func UnmarshalTextML(s string, pb proto.Message) error {
 	s, err := ParseMultilineStrings(s)
 	if err != nil {
 		return err
 	}
 	return proto.UnmarshalText(s, pb)
 }
	// Copyright 2016 The LUCI Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package proto

	import (
	"bytes"
	"fmt"
	"regexp"
	"strings"
	"unicode"

	"github.com/golang/protobuf/proto"
	)

	var startRE = regexp.MustCompile(`^(.)<<\s([_a-zA-Z]+)\s*$`)

	const endREStr = `^\s%s\s$`

	func findLeftWhitespace(s string) string {
	for i, r := range s {
	if !unicode.IsSpace(r) {
	return s[:i]
	}
	}
	return s
	}

	func findBytewiseLCP(a, b string) string {
	if len(a) == 0 \|\| len(b) == 0 {
	return ""
	} else if a == b {
	return a
	}

	short := a
	if len(b) < len(a) {
	short = b
	}

	for i := 0; i < len(short); i++ {
	if a[i] != b[i] {
	return a[:i]
	}
	}
	return short
	}

	// writeProtoString writes the given lines into the output writer while
	// correctly escaping it. This code is heavily inspired by
	// "github.com/golang/protobuf/proto/text.go".
	func writeProtoStringLines(w *bytes.Buffer, skip int, lines []string) {
	// equivalent to C's isprint.
	isprint := func(c byte) bool {
	return c >= 0x20 && c < 0x7f
	}

	w.WriteByte('"')
	for lIdx, line := range lines {
	if lIdx != 0 {
	// to get a "\\n".join(lines) effect: newlines between lines, but not
	// trailing.
	w.WriteString(`\n`)
	}
	// Loop over the bytes, not the runes.
	for i := skip; i < len(line); i++ {
	// Divergence from C++: we don't escape apostrophes.
	// There's no need to escape them, and the C++ parser
	// copes with a naked apostrophe.
	switch c := line[i]; c {
	case '\n':
	w.WriteString(`\n`)
	case '\r':
	w.WriteString(`\r`)
	case '\t':
	w.WriteString(`\t`)
	case '"':
	w.WriteString(`\"`)
	case '\\':
	w.WriteString(`\\`)
	default:
	if isprint(c) {
	w.WriteByte(c)
	} else {
	fmt.Fprintf(w, "\\%03o", c)
	}
	}
	}
	}
	w.WriteByte('"')
	}

	// ParseMultilineStrings looks for bash-style heredocs and replaces them with
	// single-line text-proto-escaped strings.
	//
	// This looks line by line for /<<\s([_a-zA-Z]+)\s$/. If this is found, the
	// scanner then looks until it finds /^\s\1\s$/. Every line between these is
	// joined like "\n".join(lines), and then printed back as an escaped proto
	// string. The scanner then loops back to its initial state.
	//
	// Not that nothing special needs to be done for e.g.
	// some_key: "string with << angles"
	//
	// Such a line would be left alone, because the trailing quote (which is
	// mandatory in text proto) cause the starting regex to not match.
	//
	// For convenience, the inner lines will be treated with the equivalent of
	// python's `textwrap.dedent`; any common leading whitespace that occurs on
	// every line will be removed. Although both tabs and spaces count as
	// whitespace, they are not equivalent (i.e. only exactly-matching whitespace
	// prefixes count)
	//
	// The only error this may return is if there's an open heredoc without a
	// matching close marker.
	//
	// Example:
	// this: <<EOF
	// would
	// turn \ninto
	// a "single"
	// line
	// EOF
	//
	// Turns into the same as:
	// this: "would\nturn \\ninto\n a \"single\"\nline"
	func ParseMultilineStrings(text string) (string, error) {
	terminator := ""
	terminatorRE := (*regexp.Regexp)(nil)
	needNL := false
	findLead := true
	leadingSpace := ""
	var mlineBuf []string
	outBuf := bytes.Buffer{}
	outBuf.Grow(len(text))

	for _, line := range strings.SplitAfter(text, "\n") {
	if terminator == "" {
	if needNL {
	outBuf.WriteByte('\n')
	needNL = false
	}
	if mtch := startRE.FindStringSubmatch(line); mtch != nil {
	_, _ = outBuf.WriteString(mtch[1])
	terminator = mtch[2]
	terminatorRE = regexp.MustCompile(fmt.Sprintf(endREStr, regexp.QuoteMeta(terminator)))
	} else {
	outBuf.WriteString(line)
	}
	} else {
	if terminatorRE.MatchString(line) {
	writeProtoStringLines(&outBuf, len(leadingSpace), mlineBuf)
	findLead = true
	terminator = ""
	terminatorRE = nil
	needNL = true
	mlineBuf = mlineBuf[:0]
	} else {
	if findLead {
	findLead = false
	leadingSpace = findLeftWhitespace(line)
	} else {
	lead := findLeftWhitespace(line)
	if len(lead) == len(line) {
	// totally whitespace lines (or empty lines) don't count for leading
	// space calculation and will be written as just an empty line.
	} else {
	leadingSpace = findBytewiseLCP(leadingSpace, lead)
	}
	}
	mlineBuf = append(mlineBuf, strings.TrimSuffix(line, "\n"))
	}
	}
	}

	if terminator != "" {
	return "", fmt.Errorf("failed to find matching terminator %q", terminator)
	}

	return outBuf.String(), nil
	}

	// UnmarshalTextML behaves the same as proto.UnmarshalText, except that it
	// allows for multiline strings in the manner of ParseMultilineStrings.
	func UnmarshalTextML(s string, pb proto.Message) error {
	s, err := ParseMultilineStrings(s)
	if err != nil {
	return err
	}
	return proto.UnmarshalText(s, pb)
	}