common/proto/mask/parse.go - infra/luci/luci-go - Git at Google

 // Copyright 2020 The LUCI Authors.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
 // You may obtain a copy of the License at
 //
 //      http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS,
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.

 package mask

 import (
 	"fmt"
 	"strings"
 	"unicode"

 	"google.golang.org/protobuf/reflect/protoreflect"
 )

 // path models the parsed path which consists of a slice of segments
 type path []string

 const pathDelimiter = '.'

 // parsePath parses a path string to a slice of segments (See grammar in pkg
 // doc).
 //
 // If isJSONName is true, parsing the field name using JSON field name instead
 // of its canonical form. However, the result segments in path will use
 // canonical field name.
 func parsePath(rawPath string, descriptor protoreflect.MessageDescriptor, isJSONName bool) (path, error) {
 	ctx := &parseCtx{
 		curDescriptor: descriptor,
 	}
 	return parsePathWithContext(rawPath, ctx, isJSONName)
 }

 func parsePathWithContext(rawPath string, ctx *parseCtx, isJSONName bool) (path, error) {
 	t := &tokenizer{
 		path:      rawPath,
 		delimiter: pathDelimiter,
 	}
 	ret := path{}
 	for t.hasMoreTokens() {
 		if tok, err := t.nextToken(); err != nil {
 			return nil, err
 		} else {
 			seg, err := parseSegment(tok, isJSONName, ctx)
 			if err != nil {
 				return nil, err
 			}
 			ret = append(ret, seg)
 		}
 	}

 	return ret, nil
 }

 // parseCtx defines context during path parsing
 type parseCtx struct {
 	curDescriptor protoreflect.MessageDescriptor
 	isList        bool
 	mustBeLast    bool
 }

 // advanceToField advances the context to the next field of current message.
 // Returns the canonical form of field name. Returns error when the supplied
 // field doesn't exist in message or the current message descriptor is nil
 //(meaning scalar field).
 //
 // If isJSONName is true, we will assume the given field name is JSON name and
 // look up the JSON name instead of the field name.
 func (ctx *parseCtx) advanceToField(fieldName string, isJSONName bool) (string, error) {
 	msgDesc := ctx.curDescriptor
 	if msgDesc == nil {
 		return "", fmt.Errorf("can't advance to field when current descriptor is nil")
 	}
 	var fieldDesc protoreflect.FieldDescriptor
 	if isJSONName {
 		fieldDesc = msgDesc.Fields().ByJSONName(fieldName)
 	} else {
 		fieldDesc = msgDesc.Fields().ByName(protoreflect.Name(fieldName))
 	}
 	if fieldDesc == nil {
 		return "", fmt.Errorf("field %q does not exist in message %s", fieldName, msgDesc.Name())
 	}
 	ctx.curDescriptor = fieldDesc.Message()
 	ctx.isList = fieldDesc.IsList()
 	return string(fieldDesc.Name()), nil
 }

 // mapKeyKindToTokenType defines the mapping between the kind of mapkey
 // and the expected token type of the token in path string.
 var mapKeyKindToTokenType = map[protoreflect.Kind]tokenType{
 	protoreflect.Int32Kind:    intLiteral,
 	protoreflect.Int64Kind:    intLiteral,
 	protoreflect.Sint32Kind:   intLiteral,
 	protoreflect.Sint64Kind:   intLiteral,
 	protoreflect.Uint32Kind:   intLiteral,
 	protoreflect.Uint64Kind:   intLiteral,
 	protoreflect.Sfixed32Kind: intLiteral,
 	protoreflect.Fixed32Kind:  intLiteral,
 	protoreflect.Sfixed64Kind: intLiteral,
 	protoreflect.Fixed64Kind:  intLiteral,
 	protoreflect.BoolKind:     boolLiteral,
 	protoreflect.StringKind:   strLiteral,
 }

 // parseSegment parses a token to a segment string and updates the prase context
 // accordingly.
 //
 // If isJSONName is true, the token value is expected to be JSON name of
 // a field of a message instead of canonical name. However, the return segment
 // will always be canonical name.
 func parseSegment(tok token, isJSONName bool, ctx *parseCtx) (string, error) {
 	switch desc := ctx.curDescriptor; {
 	case ctx.mustBeLast:
 		return "", fmt.Errorf("expected end of string; got token: %q", tok.value)
 	case ctx.isList:
 		// The current segment corresponds to a list field (non-map entry repeated
 		// field). Only star is allowed
 		if tok.typ != star {
 			return "", fmt.Errorf("expected a star following a repeated field; got token: %q", tok.value)
 		}
 		ctx.isList = false
 		return "*", nil

 	case desc == nil:
 		return "", fmt.Errorf("scalar field cannot have subfield: %q", tok.value)

 	case desc.IsMapEntry():
 		if tok.typ != star {
 			keyKind := desc.Fields().ByName(protoreflect.Name("key")).Kind()
 			if expectTokenType, found := mapKeyKindToTokenType[keyKind]; !found {
 				return "", fmt.Errorf("unexpected map key kind %s", keyKind)
 			} else if expectTokenType != tok.typ {
 				return "", fmt.Errorf("expected map key kind %s; got token: %q", keyKind, tok.value)
 			}
 		}

 		if _, err := ctx.advanceToField("value", false); err != nil {
 			return "", err
 		}
 		return tok.value, nil

 	case tok.typ == star:
 		// a star cannot be followed by any subfields if it does not corresponds to
 		// a repeated field
 		ctx.mustBeLast = true
 		return "*", nil

 	case tok.typ != strLiteral:
 		return "", fmt.Errorf("expected a field name of type string; got token: %q", tok.value)

 	default:
 		return ctx.advanceToField(tok.value, isJSONName)
 	}
 }

 // tokenizer breaks a path string into tokens(segments)
 type tokenizer struct {
 	path      string
 	delimiter byte
 	pos       int
 }

 // token is a composite of token type and the raw string value. It represents
 // a segment in the path
 type token struct {
 	typ   tokenType
 	value string
 }

 // tokenType models different types of segment defined in grammar (see pkg doc).
 // Note that, quoted string will also be treated as string literal.
 type tokenType int8

 const (
 	star tokenType = iota
 	strLiteral
 	boolLiteral
 	intLiteral
 )

 // hasMoreTokens tests if there are more tokens available from the path string
 func (t tokenizer) hasMoreTokens() bool {
 	return t.pos < len(t.path)
 }

 // nextToken returns the next token in the path string. Always call
 // hasMoreTokens before calling this function. Otherwise, This function call
 // will panic with index out of range when there is no more token available.
 func (t *tokenizer) nextToken() (token, error) {
 	if t.pos > 0 {
 		// if not reading the first token, expecting a delimiter
 		if t.path[t.pos] != t.delimiter {
 			return token{}, fmt.Errorf("expected delimiter: %c; got %c", t.delimiter, t.path[t.pos])
 		}
 		t.pos++ // swallow the delimiter
 		if t.pos == len(t.path) {
 			return token{}, fmt.Errorf("path can't end with delimiter: %c", t.delimiter)
 		}
 	}

 	switch b, pathLen := t.path[t.pos], len(t.path); {
 	case b == '`':
 		t.pos++ // swallow the starting backtick
 		sb := &strings.Builder{}
 		for {
 			nextBacktickRel := strings.IndexRune(t.path[t.pos:], '`')
 			if nextBacktickRel == -1 {
 				sb.WriteString(t.path[t.pos:])
 				return token{}, fmt.Errorf("a quoted string is never closed; got: %q", sb)
 			}
 			nextBacktickAbs := t.pos + nextBacktickRel
 			sb.WriteString(t.path[t.pos:nextBacktickAbs])
 			t.pos = nextBacktickAbs + 1 // Swallow the discovered backtick as well
 			if t.pos >= pathLen || t.path[t.pos] != '`' {
 				// Stop if eof or the discovered backtick is not for escaping
 				break
 			}
 			sb.WriteByte('`')
 			t.pos++ // Swallow the escaped backtick
 		}
 		return token{
 			typ:   strLiteral,
 			value: sb.String(),
 		}, nil

 	case b == '*':
 		t.pos++ // swallow the star
 		return token{
 			typ:   star,
 			value: "*",
 		}, nil

 	// Check if '-' is the last character or look ahead to see if it is followed
 	// by a digit
 	case b == '-' && (t.pos+1 == pathLen || !unicode.IsDigit(rune(t.path[t.pos+1]))):
 		return token{}, fmt.Errorf("expected digit following minus sign for negative numbers; got minus sign only")

 	case b == '-' || unicode.IsDigit(rune(b)):
 		start := t.pos
 		t.pos++ // swallow the first digit or minus sign felled through
 		n := strings.IndexFunc(t.path[t.pos:], func(r rune) bool { return !unicode.IsDigit(r) })
 		if n == -1 {
 			t.pos = pathLen
 		} else {
 			t.pos += n
 		}
 		return token{
 			typ:   intLiteral,
 			value: t.path[start:t.pos],
 		}, nil

 	case b == '_' || unicode.IsLetter(rune(b)):
 		start := t.pos
 		t.pos++ // swallow the underscore or first letter
 		n := strings.IndexFunc(t.path[t.pos:], isInvalidStringChar)
 		if n == -1 {
 			t.pos = pathLen
 		} else {
 			t.pos += n
 		}
 		typ, val := strLiteral, t.path[start:t.pos]
 		if val == "true" || val == "false" {
 			typ = boolLiteral
 		}
 		return token{
 			typ:   typ,
 			value: val,
 		}, nil

 	default:
 		return token{}, fmt.Errorf("unexpected token: %c", b)
 	}
 }

 // isInvalidStringChar tells whether the given rune represents an invalid
 // character in a string literal according to the grammar.
 func isInvalidStringChar(r rune) bool {
 	return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_'
 }
	// Copyright 2020 The LUCI Authors.
	//
	// Licensed under the Apache License, Version 2.0 (the "License");
	// you may not use this file except in compliance with the License.
	// You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS,
	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	// See the License for the specific language governing permissions and
	// limitations under the License.

	package mask

	import (
	"fmt"
	"strings"
	"unicode"

	"google.golang.org/protobuf/reflect/protoreflect"
	)

	// path models the parsed path which consists of a slice of segments
	type path []string

	const pathDelimiter = '.'

	// parsePath parses a path string to a slice of segments (See grammar in pkg
	// doc).
	//
	// If isJSONName is true, parsing the field name using JSON field name instead
	// of its canonical form. However, the result segments in path will use
	// canonical field name.
	func parsePath(rawPath string, descriptor protoreflect.MessageDescriptor, isJSONName bool) (path, error) {
	ctx := &parseCtx{
	curDescriptor: descriptor,
	}
	return parsePathWithContext(rawPath, ctx, isJSONName)
	}

	func parsePathWithContext(rawPath string, ctx *parseCtx, isJSONName bool) (path, error) {
	t := &tokenizer{
	path: rawPath,
	delimiter: pathDelimiter,
	}
	ret := path{}
	for t.hasMoreTokens() {
	if tok, err := t.nextToken(); err != nil {
	return nil, err
	} else {
	seg, err := parseSegment(tok, isJSONName, ctx)
	if err != nil {
	return nil, err
	}
	ret = append(ret, seg)
	}
	}

	return ret, nil
	}

	// parseCtx defines context during path parsing
	type parseCtx struct {
	curDescriptor protoreflect.MessageDescriptor
	isList bool
	mustBeLast bool
	}

	// advanceToField advances the context to the next field of current message.
	// Returns the canonical form of field name. Returns error when the supplied
	// field doesn't exist in message or the current message descriptor is nil
	//(meaning scalar field).
	//
	// If isJSONName is true, we will assume the given field name is JSON name and
	// look up the JSON name instead of the field name.
	func (ctx *parseCtx) advanceToField(fieldName string, isJSONName bool) (string, error) {
	msgDesc := ctx.curDescriptor
	if msgDesc == nil {
	return "", fmt.Errorf("can't advance to field when current descriptor is nil")
	}
	var fieldDesc protoreflect.FieldDescriptor
	if isJSONName {
	fieldDesc = msgDesc.Fields().ByJSONName(fieldName)
	} else {
	fieldDesc = msgDesc.Fields().ByName(protoreflect.Name(fieldName))
	}
	if fieldDesc == nil {
	return "", fmt.Errorf("field %q does not exist in message %s", fieldName, msgDesc.Name())
	}
	ctx.curDescriptor = fieldDesc.Message()
	ctx.isList = fieldDesc.IsList()
	return string(fieldDesc.Name()), nil
	}

	// mapKeyKindToTokenType defines the mapping between the kind of mapkey
	// and the expected token type of the token in path string.
	var mapKeyKindToTokenType = map[protoreflect.Kind]tokenType{
	protoreflect.Int32Kind: intLiteral,
	protoreflect.Int64Kind: intLiteral,
	protoreflect.Sint32Kind: intLiteral,
	protoreflect.Sint64Kind: intLiteral,
	protoreflect.Uint32Kind: intLiteral,
	protoreflect.Uint64Kind: intLiteral,
	protoreflect.Sfixed32Kind: intLiteral,
	protoreflect.Fixed32Kind: intLiteral,
	protoreflect.Sfixed64Kind: intLiteral,
	protoreflect.Fixed64Kind: intLiteral,
	protoreflect.BoolKind: boolLiteral,
	protoreflect.StringKind: strLiteral,
	}

	// parseSegment parses a token to a segment string and updates the prase context
	// accordingly.
	//
	// If isJSONName is true, the token value is expected to be JSON name of
	// a field of a message instead of canonical name. However, the return segment
	// will always be canonical name.
	func parseSegment(tok token, isJSONName bool, ctx *parseCtx) (string, error) {
	switch desc := ctx.curDescriptor; {
	case ctx.mustBeLast:
	return "", fmt.Errorf("expected end of string; got token: %q", tok.value)
	case ctx.isList:
	// The current segment corresponds to a list field (non-map entry repeated
	// field). Only star is allowed
	if tok.typ != star {
	return "", fmt.Errorf("expected a star following a repeated field; got token: %q", tok.value)
	}
	ctx.isList = false
	return "*", nil

	case desc == nil:
	return "", fmt.Errorf("scalar field cannot have subfield: %q", tok.value)

	case desc.IsMapEntry():
	if tok.typ != star {
	keyKind := desc.Fields().ByName(protoreflect.Name("key")).Kind()
	if expectTokenType, found := mapKeyKindToTokenType[keyKind]; !found {
	return "", fmt.Errorf("unexpected map key kind %s", keyKind)
	} else if expectTokenType != tok.typ {
	return "", fmt.Errorf("expected map key kind %s; got token: %q", keyKind, tok.value)
	}
	}

	if _, err := ctx.advanceToField("value", false); err != nil {
	return "", err
	}
	return tok.value, nil

	case tok.typ == star:
	// a star cannot be followed by any subfields if it does not corresponds to
	// a repeated field
	ctx.mustBeLast = true
	return "*", nil

	case tok.typ != strLiteral:
	return "", fmt.Errorf("expected a field name of type string; got token: %q", tok.value)

	default:
	return ctx.advanceToField(tok.value, isJSONName)
	}
	}

	// tokenizer breaks a path string into tokens(segments)
	type tokenizer struct {
	path string
	delimiter byte
	pos int
	}

	// token is a composite of token type and the raw string value. It represents
	// a segment in the path
	type token struct {
	typ tokenType
	value string
	}

	// tokenType models different types of segment defined in grammar (see pkg doc).
	// Note that, quoted string will also be treated as string literal.
	type tokenType int8

	const (
	star tokenType = iota
	strLiteral
	boolLiteral
	intLiteral
	)

	// hasMoreTokens tests if there are more tokens available from the path string
	func (t tokenizer) hasMoreTokens() bool {
	return t.pos < len(t.path)
	}

	// nextToken returns the next token in the path string. Always call
	// hasMoreTokens before calling this function. Otherwise, This function call
	// will panic with index out of range when there is no more token available.
	func (t *tokenizer) nextToken() (token, error) {
	if t.pos > 0 {
	// if not reading the first token, expecting a delimiter
	if t.path[t.pos] != t.delimiter {
	return token{}, fmt.Errorf("expected delimiter: %c; got %c", t.delimiter, t.path[t.pos])
	}
	t.pos++ // swallow the delimiter
	if t.pos == len(t.path) {
	return token{}, fmt.Errorf("path can't end with delimiter: %c", t.delimiter)
	}
	}

	switch b, pathLen := t.path[t.pos], len(t.path); {
	case b == '`':
	t.pos++ // swallow the starting backtick
	sb := &strings.Builder{}
	for {
	nextBacktickRel := strings.IndexRune(t.path[t.pos:], '`')
	if nextBacktickRel == -1 {
	sb.WriteString(t.path[t.pos:])
	return token{}, fmt.Errorf("a quoted string is never closed; got: %q", sb)
	}
	nextBacktickAbs := t.pos + nextBacktickRel
	sb.WriteString(t.path[t.pos:nextBacktickAbs])
	t.pos = nextBacktickAbs + 1 // Swallow the discovered backtick as well
	if t.pos >= pathLen \|\| t.path[t.pos] != '`' {
	// Stop if eof or the discovered backtick is not for escaping
	break
	}
	sb.WriteByte('`')
	t.pos++ // Swallow the escaped backtick
	}
	return token{
	typ: strLiteral,
	value: sb.String(),
	}, nil

	case b == '*':
	t.pos++ // swallow the star
	return token{
	typ: star,
	value: "*",
	}, nil

	// Check if '-' is the last character or look ahead to see if it is followed
	// by a digit
	case b == '-' && (t.pos+1 == pathLen \|\| !unicode.IsDigit(rune(t.path[t.pos+1]))):
	return token{}, fmt.Errorf("expected digit following minus sign for negative numbers; got minus sign only")

	case b == '-' \|\| unicode.IsDigit(rune(b)):
	start := t.pos
	t.pos++ // swallow the first digit or minus sign felled through
	n := strings.IndexFunc(t.path[t.pos:], func(r rune) bool { return !unicode.IsDigit(r) })
	if n == -1 {
	t.pos = pathLen
	} else {
	t.pos += n
	}
	return token{
	typ: intLiteral,
	value: t.path[start:t.pos],
	}, nil

	case b == '_' \|\| unicode.IsLetter(rune(b)):
	start := t.pos
	t.pos++ // swallow the underscore or first letter
	n := strings.IndexFunc(t.path[t.pos:], isInvalidStringChar)
	if n == -1 {
	t.pos = pathLen
	} else {
	t.pos += n
	}
	typ, val := strLiteral, t.path[start:t.pos]
	if val == "true" \|\| val == "false" {
	typ = boolLiteral
	}
	return token{
	typ: typ,
	value: val,
	}, nil

	default:
	return token{}, fmt.Errorf("unexpected token: %c", b)
	}
	}

	// isInvalidStringChar tells whether the given rune represents an invalid
	// character in a string literal according to the grammar.
	func isInvalidStringChar(r rune) bool {
	return !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '_'
	}