text_parser.go - external/github.com/DHowett/go-plist - Git at Google

 // Parser for text plist formats.
 // @see https://github.com/apple/swift-corelibs-foundation/blob/master/CoreFoundation/Parsing.subproj/CFOldStylePList.c
 // @see https://github.com/gnustep/libs-base/blob/master/Source/NSPropertyList.m
 // This parser also handles strings files.

 package plist

 import (
 	"encoding/base64"
 	"encoding/binary"
 	"errors"
 	"fmt"
 	"io"
 	"io/ioutil"
 	"runtime"
 	"strings"
 	"time"
 	"unicode/utf16"
 	"unicode/utf8"
 )

 type textPlistParser struct {
 	reader io.Reader
 	format int

 	input string
 	start int
 	pos   int
 	width int
 }

 func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
 	if len(buffer)%2 != 0 {
 		return "", errors.New("truncated utf16")
 	}

 	tmp := make([]uint16, len(buffer)/2)
 	for i := 0; i < len(buffer); i += 2 {
 		tmp[i/2] = bo.Uint16(buffer[i : i+2])
 	}
 	return string(utf16.Decode(tmp)), nil
 }

 func guessEncodingAndConvert(buffer []byte) (string, error) {
 	if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
 		// UTF-8 BOM
 		return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
 	} else if len(buffer) >= 2 {
 		// UTF-16 guesses

 		switch {
 		// stream is big-endian (BOM is FE FF or head is 00 XX)
 		case (buffer[0] == 0xFE && buffer[1] == 0xFF):
 			return convertU16(buffer[2:], binary.BigEndian)
 		case (buffer[0] == 0 && buffer[1] != 0):
 			return convertU16(buffer, binary.BigEndian)

 		// stream is little-endian (BOM is FE FF or head is XX 00)
 		case (buffer[0] == 0xFF && buffer[1] == 0xFE):
 			return convertU16(buffer[2:], binary.LittleEndian)
 		case (buffer[0] != 0 && buffer[1] == 0):
 			return convertU16(buffer, binary.LittleEndian)
 		}
 	}

 	// fallback: assume ASCII (not great!)
 	return zeroCopy8BitString(buffer, 0, len(buffer)), nil
 }

 func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
 	defer func() {
 		if r := recover(); r != nil {
 			if _, ok := r.(runtime.Error); ok {
 				panic(r)
 			}
 			// Wrap all non-invalid-plist errors.
 			parseError = plistParseError{"text", r.(error)}
 		}
 	}()

 	buffer, err := ioutil.ReadAll(p.reader)
 	if err != nil {
 		panic(err)
 	}

 	p.input, err = guessEncodingAndConvert(buffer)
 	if err != nil {
 		panic(err)
 	}

 	val := p.parsePlistValue()

 	p.skipWhitespaceAndComments()
 	if p.peek() != eof {
 		if _, ok := val.(cfString); !ok {
 			p.error("garbage after end of document")
 		}

 		// Try parsing as .strings.
 		// See -[NSDictionary propertyListFromStringsFileFormat:].
 		p.start = 0
 		p.pos = 0
 		val = p.parseDictionary(true)
 	}

 	pval = val

 	return
 }

 const eof rune = -1

 func (p *textPlistParser) error(e string, args ...interface{}) {
 	line := strings.Count(p.input[:p.pos], "\n")
 	char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
 	panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
 }

 func (p *textPlistParser) next() rune {
 	if int(p.pos) >= len(p.input) {
 		p.width = 0
 		return eof
 	}
 	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
 	p.width = w
 	p.pos += p.width
 	return r
 }

 func (p *textPlistParser) backup() {
 	p.pos -= p.width
 }

 func (p *textPlistParser) peek() rune {
 	r := p.next()
 	p.backup()
 	return r
 }

 func (p *textPlistParser) emit() string {
 	s := p.input[p.start:p.pos]
 	p.start = p.pos
 	return s
 }

 func (p *textPlistParser) ignore() {
 	p.start = p.pos
 }

 func (p *textPlistParser) empty() bool {
 	return p.start == p.pos
 }

 func (p *textPlistParser) scanUntil(ch rune) {
 	if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
 		p.pos += x
 		return
 	}
 	p.pos = len(p.input)
 }

 func (p *textPlistParser) scanUntilAny(chs string) {
 	if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
 		p.pos += x
 		return
 	}
 	p.pos = len(p.input)
 }

 func (p *textPlistParser) scanCharactersInSet(ch *characterSet) {
 	for ch.Contains(p.next()) {
 	}
 	p.backup()
 }

 func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) {
 	var r rune
 	for {
 		r = p.next()
 		if r == eof || ch.Contains(r) {
 			break
 		}
 	}
 	p.backup()
 }

 func (p *textPlistParser) skipWhitespaceAndComments() {
 	for {
 		p.scanCharactersInSet(&whitespace)
 		if strings.HasPrefix(p.input[p.pos:], "//") {
 			p.scanCharactersNotInSet(&newlineCharacterSet)
 		} else if strings.HasPrefix(p.input[p.pos:], "/*") {
 			if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
 				p.pos += x + 2 // skip the */ as well
 				continue       // consume more whitespace
 			} else {
 				p.error("unexpected eof in block comment")
 			}
 		} else {
 			break
 		}
 	}
 	p.ignore()
 }

 func (p *textPlistParser) parseOctalDigits(max int) uint64 {
 	var val uint64

 	for i := 0; i < max; i++ {
 		r := p.next()

 		if r >= '0' && r <= '7' {
 			val <<= 3
 			val |= uint64((r - '0'))
 		} else {
 			p.backup()
 			break
 		}
 	}
 	return val
 }

 func (p *textPlistParser) parseHexDigits(max int) uint64 {
 	var val uint64

 	for i := 0; i < max; i++ {
 		r := p.next()

 		if r >= 'a' && r <= 'f' {
 			val <<= 4
 			val |= 10 + uint64((r - 'a'))
 		} else if r >= 'A' && r <= 'F' {
 			val <<= 4
 			val |= 10 + uint64((r - 'A'))
 		} else if r >= '0' && r <= '9' {
 			val <<= 4
 			val |= uint64((r - '0'))
 		} else {
 			p.backup()
 			break
 		}
 	}
 	return val
 }

 // the \ has already been consumed
 func (p *textPlistParser) parseEscape() string {
 	var s string
 	switch p.next() {
 	case 'a':
 		s = "\a"
 	case 'b':
 		s = "\b"
 	case 'v':
 		s = "\v"
 	case 'f':
 		s = "\f"
 	case 't':
 		s = "\t"
 	case 'r':
 		s = "\r"
 	case 'n':
 		s = "\n"
 	case '\\':
 		s = `\`
 	case '"':
 		s = `"`
 	case 'x': // This is our extension.
 		s = string(rune(p.parseHexDigits(2)))
 	case 'u', 'U': // 'u' is a GNUstep extension.
 		s = string(rune(p.parseHexDigits(4)))
 	case '0', '1', '2', '3', '4', '5', '6', '7':
 		p.backup() // we've already consumed one of the digits
 		s = string(rune(p.parseOctalDigits(3)))
 	default:
 		p.backup() // everything else should be accepted
 	}
 	p.ignore() // skip the entire escape sequence
 	return s
 }

 // the " has already been consumed
 func (p *textPlistParser) parseQuotedString() cfString {
 	p.ignore() // ignore the "

 	slowPath := false
 	s := ""

 	for {
 		p.scanUntilAny(`"\`)
 		switch p.peek() {
 		case eof:
 			p.error("unexpected eof in quoted string")
 		case '"':
 			section := p.emit()
 			p.pos++ // skip "
 			if !slowPath {
 				return cfString(section)
 			} else {
 				s += section
 				return cfString(s)
 			}
 		case '\\':
 			slowPath = true
 			s += p.emit()
 			p.next() // consume \
 			s += p.parseEscape()
 		}
 	}
 }

 func (p *textPlistParser) parseUnquotedString() cfString {
 	p.scanCharactersNotInSet(&gsQuotable)
 	s := p.emit()
 	if s == "" {
 		p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
 	}

 	return cfString(s)
 }

 // the { has already been consumed
 func (p *textPlistParser) parseDictionary(ignoreEof bool) cfValue {
 	//p.ignore() // ignore the {
 	var keypv cfValue
 	keys := make([]string, 0, 32)
 	values := make([]cfValue, 0, 32)
 outer:
 	for {
 		p.skipWhitespaceAndComments()

 		switch p.next() {
 		case eof:
 			if !ignoreEof {
 				p.error("unexpected eof in dictionary")
 			}
 			fallthrough
 		case '}':
 			break outer
 		case '"':
 			keypv = p.parseQuotedString()
 		default:
 			p.backup()
 			keypv = p.parseUnquotedString()
 		}

 		// INVARIANT: key can't be nil; parseQuoted and parseUnquoted
 		// will panic out before they return nil.

 		p.skipWhitespaceAndComments()

 		var val cfValue
 		n := p.next()
 		if n == ';' {
 			// This is supposed to be .strings-specific.
 			// GNUstep parses this as an empty string.
 			// Apple copies the key like we do.
 			val = keypv
 		} else if n == '=' {
 			// whitespace is consumed within
 			val = p.parsePlistValue()

 			p.skipWhitespaceAndComments()

 			if p.next() != ';' {
 				p.error("missing ; in dictionary")
 			}
 		} else {
 			p.error("missing = in dictionary")
 		}

 		keys = append(keys, string(keypv.(cfString)))
 		values = append(values, val)
 	}

 	dict := &cfDictionary{keys: keys, values: values}
 	return dict.maybeUID(p.format == OpenStepFormat)
 }

 // the ( has already been consumed
 func (p *textPlistParser) parseArray() *cfArray {
 	//p.ignore() // ignore the (
 	values := make([]cfValue, 0, 32)
 outer:
 	for {
 		p.skipWhitespaceAndComments()

 		switch p.next() {
 		case eof:
 			p.error("unexpected eof in array")
 		case ')':
 			break outer // done here
 		case ',':
 			continue // restart; ,) is valid and we don't want to blow it
 		default:
 			p.backup()
 		}

 		pval := p.parsePlistValue() // whitespace is consumed within
 		if str, ok := pval.(cfString); ok && string(str) == "" {
 			// Empty strings in arrays are apparently skipped?
 			// TODO: Figure out why this was implemented.
 			continue
 		}
 		values = append(values, pval)
 	}
 	return &cfArray{values}
 }

 // the <* have already been consumed
 func (p *textPlistParser) parseGNUStepValue() cfValue {
 	typ := p.next()

 	if typ == '>' || typ == eof { // <*>, <*EOF
 		p.error("invalid GNUStep extended value")
 	}

 	if typ != 'I' && typ != 'R' && typ != 'B' && typ != 'D' {
 		// early out: no need to collect the value if we'll fail to understand it
 		p.error("unknown GNUStep extended value type `" + string(typ) + "'")
 	}

 	if p.peek() == '"' { // <*x"
 		p.next()
 	}

 	p.ignore()
 	p.scanUntil('>')

 	if p.peek() == eof { // <*xEOF or <*x"EOF
 		p.error("unterminated GNUStep extended value")
 	}

 	if p.empty() { // <*x>, <*x"">
 		p.error("empty GNUStep extended value")
 	}

 	v := p.emit()
 	p.next() // consume the >

 	if v[len(v)-1] == '"' {
 		// GNUStep tolerates malformed quoted values, as in <*I5"> and <*I"5>
 		// It purportedly does so by stripping the trailing quote
 		v = v[:len(v)-1]
 	}

 	switch typ {
 	case 'I':
 		if v[0] == '-' {
 			n := mustParseInt(v, 10, 64)
 			return &cfNumber{signed: true, value: uint64(n)}
 		} else {
 			n := mustParseUint(v, 10, 64)
 			return &cfNumber{signed: false, value: n}
 		}
 	case 'R':
 		n := mustParseFloat(v, 64)
 		return &cfReal{wide: true, value: n} // TODO(DH) 32/64
 	case 'B':
 		b := v[0] == 'Y'
 		return cfBoolean(b)
 	case 'D':
 		t, err := time.Parse(textPlistTimeLayout, v)
 		if err != nil {
 			p.error(err.Error())
 		}

 		return cfDate(t.In(time.UTC))
 	}
 	// We should never get here; we checked the type above
 	return nil
 }

 // the <[ have already been consumed
 func (p *textPlistParser) parseGNUStepBase64() cfData {
 	p.ignore()
 	p.scanUntil(']')
 	v := p.emit()

 	if p.next() != ']' {
 		p.error("invalid GNUStep base64 data (expected ']')")
 	}

 	if p.next() != '>' {
 		p.error("invalid GNUStep base64 data (expected '>')")
 	}

 	// Emulate NSDataBase64DecodingIgnoreUnknownCharacters
 	filtered := strings.Map(base64ValidChars.Map, v)
 	data, err := base64.StdEncoding.DecodeString(filtered)
 	if err != nil {
 		p.error("invalid GNUStep base64 data: " + err.Error())
 	}
 	return cfData(data)
 }

 // The < has already been consumed
 func (p *textPlistParser) parseHexData() cfData {
 	buf := make([]byte, 256)
 	i := 0
 	c := 0

 	for {
 		r := p.next()
 		switch r {
 		case eof:
 			p.error("unexpected eof in data")
 		case '>':
 			if c&1 == 1 {
 				p.error("uneven number of hex digits in data")
 			}
 			p.ignore()
 			return cfData(buf[:i])
 		// Apple and GNUstep both want these in pairs. We are a bit more lax.
 		// GS accepts comments too, but that seems like a lot of work.
 		case ' ', '\t', '\n', '\r', '\u2028', '\u2029':
 			continue
 		}

 		buf[i] <<= 4
 		if r >= 'a' && r <= 'f' {
 			buf[i] |= 10 + byte((r - 'a'))
 		} else if r >= 'A' && r <= 'F' {
 			buf[i] |= 10 + byte((r - 'A'))
 		} else if r >= '0' && r <= '9' {
 			buf[i] |= byte((r - '0'))
 		} else {
 			p.error("unexpected hex digit `%c'", r)
 		}

 		c++
 		if c&1 == 0 {
 			i++
 			if i >= len(buf) {
 				realloc := make([]byte, len(buf)*2)
 				copy(realloc, buf)
 				buf = realloc
 			}
 		}
 	}
 }

 func (p *textPlistParser) parsePlistValue() cfValue {
 	for {
 		p.skipWhitespaceAndComments()

 		switch p.next() {
 		case eof:
 			return &cfDictionary{}
 		case '<':
 			switch p.next() {
 			case '*':
 				p.format = GNUStepFormat
 				return p.parseGNUStepValue()
 			case '[':
 				p.format = GNUStepFormat
 				return p.parseGNUStepBase64()
 			default:
 				p.backup()
 				return p.parseHexData()
 			}
 		case '"':
 			return p.parseQuotedString()
 		case '{':
 			return p.parseDictionary(false)
 		case '(':
 			return p.parseArray()
 		default:
 			p.backup()
 			return p.parseUnquotedString()
 		}
 	}
 }

 func newTextPlistParser(r io.Reader) *textPlistParser {
 	return &textPlistParser{
 		reader: r,
 		format: OpenStepFormat,
 	}
 }
	// Parser for text plist formats.
	// @see https://github.com/apple/swift-corelibs-foundation/blob/master/CoreFoundation/Parsing.subproj/CFOldStylePList.c
	// @see https://github.com/gnustep/libs-base/blob/master/Source/NSPropertyList.m
	// This parser also handles strings files.

	package plist

	import (
	"encoding/base64"
	"encoding/binary"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"runtime"
	"strings"
	"time"
	"unicode/utf16"
	"unicode/utf8"
	)

	type textPlistParser struct {
	reader io.Reader
	format int

	input string
	start int
	pos int
	width int
	}

	func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
	if len(buffer)%2 != 0 {
	return "", errors.New("truncated utf16")
	}

	tmp := make([]uint16, len(buffer)/2)
	for i := 0; i < len(buffer); i += 2 {
	tmp[i/2] = bo.Uint16(buffer[i : i+2])
	}
	return string(utf16.Decode(tmp)), nil
	}

	func guessEncodingAndConvert(buffer []byte) (string, error) {
	if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
	// UTF-8 BOM
	return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
	} else if len(buffer) >= 2 {
	// UTF-16 guesses

	switch {
	// stream is big-endian (BOM is FE FF or head is 00 XX)
	case (buffer[0] == 0xFE && buffer[1] == 0xFF):
	return convertU16(buffer[2:], binary.BigEndian)
	case (buffer[0] == 0 && buffer[1] != 0):
	return convertU16(buffer, binary.BigEndian)

	// stream is little-endian (BOM is FE FF or head is XX 00)
	case (buffer[0] == 0xFF && buffer[1] == 0xFE):
	return convertU16(buffer[2:], binary.LittleEndian)
	case (buffer[0] != 0 && buffer[1] == 0):
	return convertU16(buffer, binary.LittleEndian)
	}
	}

	// fallback: assume ASCII (not great!)
	return zeroCopy8BitString(buffer, 0, len(buffer)), nil
	}

	func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
	defer func() {
	if r := recover(); r != nil {
	if _, ok := r.(runtime.Error); ok {
	panic(r)
	}
	// Wrap all non-invalid-plist errors.
	parseError = plistParseError{"text", r.(error)}
	}
	}()

	buffer, err := ioutil.ReadAll(p.reader)
	if err != nil {
	panic(err)
	}

	p.input, err = guessEncodingAndConvert(buffer)
	if err != nil {
	panic(err)
	}

	val := p.parsePlistValue()

	p.skipWhitespaceAndComments()
	if p.peek() != eof {
	if _, ok := val.(cfString); !ok {
	p.error("garbage after end of document")
	}

	// Try parsing as .strings.
	// See -[NSDictionary propertyListFromStringsFileFormat:].
	p.start = 0
	p.pos = 0
	val = p.parseDictionary(true)
	}

	pval = val

	return
	}

	const eof rune = -1

	func (p *textPlistParser) error(e string, args ...interface{}) {
	line := strings.Count(p.input[:p.pos], "\n")
	char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
	panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
	}

	func (p *textPlistParser) next() rune {
	if int(p.pos) >= len(p.input) {
	p.width = 0
	return eof
	}
	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
	p.width = w
	p.pos += p.width
	return r
	}

	func (p *textPlistParser) backup() {
	p.pos -= p.width
	}

	func (p *textPlistParser) peek() rune {
	r := p.next()
	p.backup()
	return r
	}

	func (p *textPlistParser) emit() string {
	s := p.input[p.start:p.pos]
	p.start = p.pos
	return s
	}

	func (p *textPlistParser) ignore() {
	p.start = p.pos
	}

	func (p *textPlistParser) empty() bool {
	return p.start == p.pos
	}

	func (p *textPlistParser) scanUntil(ch rune) {
	if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
	p.pos += x
	return
	}
	p.pos = len(p.input)
	}

	func (p *textPlistParser) scanUntilAny(chs string) {
	if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
	p.pos += x
	return
	}
	p.pos = len(p.input)
	}

	func (p textPlistParser) scanCharactersInSet(ch characterSet) {
	for ch.Contains(p.next()) {
	}
	p.backup()
	}

	func (p textPlistParser) scanCharactersNotInSet(ch characterSet) {
	var r rune
	for {
	r = p.next()
	if r == eof \|\| ch.Contains(r) {
	break
	}
	}
	p.backup()
	}

	func (p *textPlistParser) skipWhitespaceAndComments() {
	for {
	p.scanCharactersInSet(&whitespace)
	if strings.HasPrefix(p.input[p.pos:], "//") {
	p.scanCharactersNotInSet(&newlineCharacterSet)
	} else if strings.HasPrefix(p.input[p.pos:], "/*") {
	if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
	p.pos += x + 2 // skip the */ as well
	continue // consume more whitespace
	} else {
	p.error("unexpected eof in block comment")
	}
	} else {
	break
	}
	}
	p.ignore()
	}

	func (p *textPlistParser) parseOctalDigits(max int) uint64 {
	var val uint64

	for i := 0; i < max; i++ {
	r := p.next()

	if r >= '0' && r <= '7' {
	val <<= 3
	val \|= uint64((r - '0'))
	} else {
	p.backup()
	break
	}
	}
	return val
	}

	func (p *textPlistParser) parseHexDigits(max int) uint64 {
	var val uint64

	for i := 0; i < max; i++ {
	r := p.next()

	if r >= 'a' && r <= 'f' {
	val <<= 4
	val \|= 10 + uint64((r - 'a'))
	} else if r >= 'A' && r <= 'F' {
	val <<= 4
	val \|= 10 + uint64((r - 'A'))
	} else if r >= '0' && r <= '9' {
	val <<= 4
	val \|= uint64((r - '0'))
	} else {
	p.backup()
	break
	}
	}
	return val
	}

	// the \ has already been consumed
	func (p *textPlistParser) parseEscape() string {
	var s string
	switch p.next() {
	case 'a':
	s = "\a"
	case 'b':
	s = "\b"
	case 'v':
	s = "\v"
	case 'f':
	s = "\f"
	case 't':
	s = "\t"
	case 'r':
	s = "\r"
	case 'n':
	s = "\n"
	case '\\':
	s = `\`
	case '"':
	s = `"`
	case 'x': // This is our extension.
	s = string(rune(p.parseHexDigits(2)))
	case 'u', 'U': // 'u' is a GNUstep extension.
	s = string(rune(p.parseHexDigits(4)))
	case '0', '1', '2', '3', '4', '5', '6', '7':
	p.backup() // we've already consumed one of the digits
	s = string(rune(p.parseOctalDigits(3)))
	default:
	p.backup() // everything else should be accepted
	}
	p.ignore() // skip the entire escape sequence
	return s
	}

	// the " has already been consumed
	func (p *textPlistParser) parseQuotedString() cfString {
	p.ignore() // ignore the "

	slowPath := false
	s := ""

	for {
	p.scanUntilAny(`"\`)
	switch p.peek() {
	case eof:
	p.error("unexpected eof in quoted string")
	case '"':
	section := p.emit()
	p.pos++ // skip "
	if !slowPath {
	return cfString(section)
	} else {
	s += section
	return cfString(s)
	}
	case '\\':
	slowPath = true
	s += p.emit()
	p.next() // consume \
	s += p.parseEscape()
	}
	}
	}

	func (p *textPlistParser) parseUnquotedString() cfString {
	p.scanCharactersNotInSet(&gsQuotable)
	s := p.emit()
	if s == "" {
	p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
	}

	return cfString(s)
	}

	// the { has already been consumed
	func (p *textPlistParser) parseDictionary(ignoreEof bool) cfValue {
	//p.ignore() // ignore the {
	var keypv cfValue
	keys := make([]string, 0, 32)
	values := make([]cfValue, 0, 32)
	outer:
	for {
	p.skipWhitespaceAndComments()

	switch p.next() {
	case eof:
	if !ignoreEof {
	p.error("unexpected eof in dictionary")
	}
	fallthrough
	case '}':
	break outer
	case '"':
	keypv = p.parseQuotedString()
	default:
	p.backup()
	keypv = p.parseUnquotedString()
	}

	// INVARIANT: key can't be nil; parseQuoted and parseUnquoted
	// will panic out before they return nil.

	p.skipWhitespaceAndComments()

	var val cfValue
	n := p.next()
	if n == ';' {
	// This is supposed to be .strings-specific.
	// GNUstep parses this as an empty string.
	// Apple copies the key like we do.
	val = keypv
	} else if n == '=' {
	// whitespace is consumed within
	val = p.parsePlistValue()

	p.skipWhitespaceAndComments()

	if p.next() != ';' {
	p.error("missing ; in dictionary")
	}
	} else {
	p.error("missing = in dictionary")
	}

	keys = append(keys, string(keypv.(cfString)))
	values = append(values, val)
	}

	dict := &cfDictionary{keys: keys, values: values}
	return dict.maybeUID(p.format == OpenStepFormat)
	}

	// the ( has already been consumed
	func (p textPlistParser) parseArray() cfArray {
	//p.ignore() // ignore the (
	values := make([]cfValue, 0, 32)
	outer:
	for {
	p.skipWhitespaceAndComments()

	switch p.next() {
	case eof:
	p.error("unexpected eof in array")
	case ')':
	break outer // done here
	case ',':
	continue // restart; ,) is valid and we don't want to blow it
	default:
	p.backup()
	}

	pval := p.parsePlistValue() // whitespace is consumed within
	if str, ok := pval.(cfString); ok && string(str) == "" {
	// Empty strings in arrays are apparently skipped?
	// TODO: Figure out why this was implemented.
	continue
	}
	values = append(values, pval)
	}
	return &cfArray{values}
	}

	// the <* have already been consumed
	func (p *textPlistParser) parseGNUStepValue() cfValue {
	typ := p.next()

	if typ == '>' \|\| typ == eof { // <>, <EOF
	p.error("invalid GNUStep extended value")
	}

	if typ != 'I' && typ != 'R' && typ != 'B' && typ != 'D' {
	// early out: no need to collect the value if we'll fail to understand it
	p.error("unknown GNUStep extended value type `" + string(typ) + "'")
	}

	if p.peek() == '"' { // <*x"
	p.next()
	}

	p.ignore()
	p.scanUntil('>')

	if p.peek() == eof { // <xEOF or <x"EOF
	p.error("unterminated GNUStep extended value")
	}

	if p.empty() { // <x>, <x"">
	p.error("empty GNUStep extended value")
	}

	v := p.emit()
	p.next() // consume the >

	if v[len(v)-1] == '"' {
	// GNUStep tolerates malformed quoted values, as in <I5"> and <I"5>
	// It purportedly does so by stripping the trailing quote
	v = v[:len(v)-1]
	}

	switch typ {
	case 'I':
	if v[0] == '-' {
	n := mustParseInt(v, 10, 64)
	return &cfNumber{signed: true, value: uint64(n)}
	} else {
	n := mustParseUint(v, 10, 64)
	return &cfNumber{signed: false, value: n}
	}
	case 'R':
	n := mustParseFloat(v, 64)
	return &cfReal{wide: true, value: n} // TODO(DH) 32/64
	case 'B':
	b := v[0] == 'Y'
	return cfBoolean(b)
	case 'D':
	t, err := time.Parse(textPlistTimeLayout, v)
	if err != nil {
	p.error(err.Error())
	}

	return cfDate(t.In(time.UTC))
	}
	// We should never get here; we checked the type above
	return nil
	}

	// the <[ have already been consumed
	func (p *textPlistParser) parseGNUStepBase64() cfData {
	p.ignore()
	p.scanUntil(']')
	v := p.emit()

	if p.next() != ']' {
	p.error("invalid GNUStep base64 data (expected ']')")
	}

	if p.next() != '>' {
	p.error("invalid GNUStep base64 data (expected '>')")
	}

	// Emulate NSDataBase64DecodingIgnoreUnknownCharacters
	filtered := strings.Map(base64ValidChars.Map, v)
	data, err := base64.StdEncoding.DecodeString(filtered)
	if err != nil {
	p.error("invalid GNUStep base64 data: " + err.Error())
	}
	return cfData(data)
	}

	// The < has already been consumed
	func (p *textPlistParser) parseHexData() cfData {
	buf := make([]byte, 256)
	i := 0
	c := 0

	for {
	r := p.next()
	switch r {
	case eof:
	p.error("unexpected eof in data")
	case '>':
	if c&1 == 1 {
	p.error("uneven number of hex digits in data")
	}
	p.ignore()
	return cfData(buf[:i])
	// Apple and GNUstep both want these in pairs. We are a bit more lax.
	// GS accepts comments too, but that seems like a lot of work.
	case ' ', '\t', '\n', '\r', '\u2028', '\u2029':
	continue
	}

	buf[i] <<= 4
	if r >= 'a' && r <= 'f' {
	buf[i] \|= 10 + byte((r - 'a'))
	} else if r >= 'A' && r <= 'F' {
	buf[i] \|= 10 + byte((r - 'A'))
	} else if r >= '0' && r <= '9' {
	buf[i] \|= byte((r - '0'))
	} else {
	p.error("unexpected hex digit `%c'", r)
	}

	c++
	if c&1 == 0 {
	i++
	if i >= len(buf) {
	realloc := make([]byte, len(buf)*2)
	copy(realloc, buf)
	buf = realloc
	}
	}
	}
	}

	func (p *textPlistParser) parsePlistValue() cfValue {
	for {
	p.skipWhitespaceAndComments()

	switch p.next() {
	case eof:
	return &cfDictionary{}
	case '<':
	switch p.next() {
	case '*':
	p.format = GNUStepFormat
	return p.parseGNUStepValue()
	case '[':
	p.format = GNUStepFormat
	return p.parseGNUStepBase64()
	default:
	p.backup()
	return p.parseHexData()
	}
	case '"':
	return p.parseQuotedString()
	case '{':
	return p.parseDictionary(false)
	case '(':
	return p.parseArray()
	default:
	p.backup()
	return p.parseUnquotedString()
	}
	}
	}

	func newTextPlistParser(r io.Reader) *textPlistParser {
	return &textPlistParser{
	reader: r,
	format: OpenStepFormat,
	}
	}