blob: c60423ff8e018cb3fc05095cd315422d30cf3589 [file] [log] [blame]
// Parser for text plist formats.
// @see https://github.com/apple/swift-corelibs-foundation/blob/master/CoreFoundation/Parsing.subproj/CFOldStylePList.c
// @see https://github.com/gnustep/libs-base/blob/master/Source/NSPropertyList.m
// This parser also handles strings files.
package plist
import (
"encoding/base64"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"runtime"
"strings"
"time"
"unicode/utf16"
"unicode/utf8"
)
type textPlistParser struct {
reader io.Reader
format int
input string
start int
pos int
width int
}
func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
if len(buffer)%2 != 0 {
return "", errors.New("truncated utf16")
}
tmp := make([]uint16, len(buffer)/2)
for i := 0; i < len(buffer); i += 2 {
tmp[i/2] = bo.Uint16(buffer[i : i+2])
}
return string(utf16.Decode(tmp)), nil
}
func guessEncodingAndConvert(buffer []byte) (string, error) {
if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
// UTF-8 BOM
return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
} else if len(buffer) >= 2 {
// UTF-16 guesses
switch {
// stream is big-endian (BOM is FE FF or head is 00 XX)
case (buffer[0] == 0xFE && buffer[1] == 0xFF):
return convertU16(buffer[2:], binary.BigEndian)
case (buffer[0] == 0 && buffer[1] != 0):
return convertU16(buffer, binary.BigEndian)
// stream is little-endian (BOM is FE FF or head is XX 00)
case (buffer[0] == 0xFF && buffer[1] == 0xFE):
return convertU16(buffer[2:], binary.LittleEndian)
case (buffer[0] != 0 && buffer[1] == 0):
return convertU16(buffer, binary.LittleEndian)
}
}
// fallback: assume ASCII (not great!)
return zeroCopy8BitString(buffer, 0, len(buffer)), nil
}
func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
defer func() {
if r := recover(); r != nil {
if _, ok := r.(runtime.Error); ok {
panic(r)
}
// Wrap all non-invalid-plist errors.
parseError = plistParseError{"text", r.(error)}
}
}()
buffer, err := ioutil.ReadAll(p.reader)
if err != nil {
panic(err)
}
p.input, err = guessEncodingAndConvert(buffer)
if err != nil {
panic(err)
}
val := p.parsePlistValue()
p.skipWhitespaceAndComments()
if p.peek() != eof {
if _, ok := val.(cfString); !ok {
p.error("garbage after end of document")
}
// Try parsing as .strings.
// See -[NSDictionary propertyListFromStringsFileFormat:].
p.start = 0
p.pos = 0
val = p.parseDictionary(true)
}
pval = val
return
}
const eof rune = -1
func (p *textPlistParser) error(e string, args ...interface{}) {
line := strings.Count(p.input[:p.pos], "\n")
char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
}
func (p *textPlistParser) next() rune {
if int(p.pos) >= len(p.input) {
p.width = 0
return eof
}
r, w := utf8.DecodeRuneInString(p.input[p.pos:])
p.width = w
p.pos += p.width
return r
}
func (p *textPlistParser) backup() {
p.pos -= p.width
}
func (p *textPlistParser) peek() rune {
r := p.next()
p.backup()
return r
}
func (p *textPlistParser) emit() string {
s := p.input[p.start:p.pos]
p.start = p.pos
return s
}
func (p *textPlistParser) ignore() {
p.start = p.pos
}
func (p *textPlistParser) empty() bool {
return p.start == p.pos
}
func (p *textPlistParser) scanUntil(ch rune) {
if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
p.pos += x
return
}
p.pos = len(p.input)
}
func (p *textPlistParser) scanUntilAny(chs string) {
if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
p.pos += x
return
}
p.pos = len(p.input)
}
func (p *textPlistParser) scanCharactersInSet(ch *characterSet) {
for ch.Contains(p.next()) {
}
p.backup()
}
func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) {
var r rune
for {
r = p.next()
if r == eof || ch.Contains(r) {
break
}
}
p.backup()
}
func (p *textPlistParser) skipWhitespaceAndComments() {
for {
p.scanCharactersInSet(&whitespace)
if strings.HasPrefix(p.input[p.pos:], "//") {
p.scanCharactersNotInSet(&newlineCharacterSet)
} else if strings.HasPrefix(p.input[p.pos:], "/*") {
if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
p.pos += x + 2 // skip the */ as well
continue // consume more whitespace
} else {
p.error("unexpected eof in block comment")
}
} else {
break
}
}
p.ignore()
}
func (p *textPlistParser) parseOctalDigits(max int) uint64 {
var val uint64
for i := 0; i < max; i++ {
r := p.next()
if r >= '0' && r <= '7' {
val <<= 3
val |= uint64((r - '0'))
} else {
p.backup()
break
}
}
return val
}
func (p *textPlistParser) parseHexDigits(max int) uint64 {
var val uint64
for i := 0; i < max; i++ {
r := p.next()
if r >= 'a' && r <= 'f' {
val <<= 4
val |= 10 + uint64((r - 'a'))
} else if r >= 'A' && r <= 'F' {
val <<= 4
val |= 10 + uint64((r - 'A'))
} else if r >= '0' && r <= '9' {
val <<= 4
val |= uint64((r - '0'))
} else {
p.backup()
break
}
}
return val
}
// the \ has already been consumed
func (p *textPlistParser) parseEscape() string {
var s string
switch p.next() {
case 'a':
s = "\a"
case 'b':
s = "\b"
case 'v':
s = "\v"
case 'f':
s = "\f"
case 't':
s = "\t"
case 'r':
s = "\r"
case 'n':
s = "\n"
case '\\':
s = `\`
case '"':
s = `"`
case 'x': // This is our extension.
s = string(rune(p.parseHexDigits(2)))
case 'u', 'U': // 'u' is a GNUstep extension.
s = string(rune(p.parseHexDigits(4)))
case '0', '1', '2', '3', '4', '5', '6', '7':
p.backup() // we've already consumed one of the digits
s = string(rune(p.parseOctalDigits(3)))
default:
p.backup() // everything else should be accepted
}
p.ignore() // skip the entire escape sequence
return s
}
// the " has already been consumed
func (p *textPlistParser) parseQuotedString() cfString {
p.ignore() // ignore the "
slowPath := false
s := ""
for {
p.scanUntilAny(`"\`)
switch p.peek() {
case eof:
p.error("unexpected eof in quoted string")
case '"':
section := p.emit()
p.pos++ // skip "
if !slowPath {
return cfString(section)
} else {
s += section
return cfString(s)
}
case '\\':
slowPath = true
s += p.emit()
p.next() // consume \
s += p.parseEscape()
}
}
}
func (p *textPlistParser) parseUnquotedString() cfString {
p.scanCharactersNotInSet(&gsQuotable)
s := p.emit()
if s == "" {
p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
}
return cfString(s)
}
// the { has already been consumed
func (p *textPlistParser) parseDictionary(ignoreEof bool) cfValue {
//p.ignore() // ignore the {
var keypv cfValue
keys := make([]string, 0, 32)
values := make([]cfValue, 0, 32)
outer:
for {
p.skipWhitespaceAndComments()
switch p.next() {
case eof:
if !ignoreEof {
p.error("unexpected eof in dictionary")
}
fallthrough
case '}':
break outer
case '"':
keypv = p.parseQuotedString()
default:
p.backup()
keypv = p.parseUnquotedString()
}
// INVARIANT: key can't be nil; parseQuoted and parseUnquoted
// will panic out before they return nil.
p.skipWhitespaceAndComments()
var val cfValue
n := p.next()
if n == ';' {
// This is supposed to be .strings-specific.
// GNUstep parses this as an empty string.
// Apple copies the key like we do.
val = keypv
} else if n == '=' {
// whitespace is consumed within
val = p.parsePlistValue()
p.skipWhitespaceAndComments()
if p.next() != ';' {
p.error("missing ; in dictionary")
}
} else {
p.error("missing = in dictionary")
}
keys = append(keys, string(keypv.(cfString)))
values = append(values, val)
}
dict := &cfDictionary{keys: keys, values: values}
return dict.maybeUID(p.format == OpenStepFormat)
}
// the ( has already been consumed
func (p *textPlistParser) parseArray() *cfArray {
//p.ignore() // ignore the (
values := make([]cfValue, 0, 32)
outer:
for {
p.skipWhitespaceAndComments()
switch p.next() {
case eof:
p.error("unexpected eof in array")
case ')':
break outer // done here
case ',':
continue // restart; ,) is valid and we don't want to blow it
default:
p.backup()
}
pval := p.parsePlistValue() // whitespace is consumed within
if str, ok := pval.(cfString); ok && string(str) == "" {
// Empty strings in arrays are apparently skipped?
// TODO: Figure out why this was implemented.
continue
}
values = append(values, pval)
}
return &cfArray{values}
}
// the <* have already been consumed
func (p *textPlistParser) parseGNUStepValue() cfValue {
typ := p.next()
if typ == '>' || typ == eof { // <*>, <*EOF
p.error("invalid GNUStep extended value")
}
if typ != 'I' && typ != 'R' && typ != 'B' && typ != 'D' {
// early out: no need to collect the value if we'll fail to understand it
p.error("unknown GNUStep extended value type `" + string(typ) + "'")
}
if p.peek() == '"' { // <*x"
p.next()
}
p.ignore()
p.scanUntil('>')
if p.peek() == eof { // <*xEOF or <*x"EOF
p.error("unterminated GNUStep extended value")
}
if p.empty() { // <*x>, <*x"">
p.error("empty GNUStep extended value")
}
v := p.emit()
p.next() // consume the >
if v[len(v)-1] == '"' {
// GNUStep tolerates malformed quoted values, as in <*I5"> and <*I"5>
// It purportedly does so by stripping the trailing quote
v = v[:len(v)-1]
}
switch typ {
case 'I':
if v[0] == '-' {
n := mustParseInt(v, 10, 64)
return &cfNumber{signed: true, value: uint64(n)}
} else {
n := mustParseUint(v, 10, 64)
return &cfNumber{signed: false, value: n}
}
case 'R':
n := mustParseFloat(v, 64)
return &cfReal{wide: true, value: n} // TODO(DH) 32/64
case 'B':
b := v[0] == 'Y'
return cfBoolean(b)
case 'D':
t, err := time.Parse(textPlistTimeLayout, v)
if err != nil {
p.error(err.Error())
}
return cfDate(t.In(time.UTC))
}
// We should never get here; we checked the type above
return nil
}
// the <[ have already been consumed
func (p *textPlistParser) parseGNUStepBase64() cfData {
p.ignore()
p.scanUntil(']')
v := p.emit()
if p.next() != ']' {
p.error("invalid GNUStep base64 data (expected ']')")
}
if p.next() != '>' {
p.error("invalid GNUStep base64 data (expected '>')")
}
// Emulate NSDataBase64DecodingIgnoreUnknownCharacters
filtered := strings.Map(base64ValidChars.Map, v)
data, err := base64.StdEncoding.DecodeString(filtered)
if err != nil {
p.error("invalid GNUStep base64 data: " + err.Error())
}
return cfData(data)
}
// The < has already been consumed
func (p *textPlistParser) parseHexData() cfData {
buf := make([]byte, 256)
i := 0
c := 0
for {
r := p.next()
switch r {
case eof:
p.error("unexpected eof in data")
case '>':
if c&1 == 1 {
p.error("uneven number of hex digits in data")
}
p.ignore()
return cfData(buf[:i])
// Apple and GNUstep both want these in pairs. We are a bit more lax.
// GS accepts comments too, but that seems like a lot of work.
case ' ', '\t', '\n', '\r', '\u2028', '\u2029':
continue
}
buf[i] <<= 4
if r >= 'a' && r <= 'f' {
buf[i] |= 10 + byte((r - 'a'))
} else if r >= 'A' && r <= 'F' {
buf[i] |= 10 + byte((r - 'A'))
} else if r >= '0' && r <= '9' {
buf[i] |= byte((r - '0'))
} else {
p.error("unexpected hex digit `%c'", r)
}
c++
if c&1 == 0 {
i++
if i >= len(buf) {
realloc := make([]byte, len(buf)*2)
copy(realloc, buf)
buf = realloc
}
}
}
}
func (p *textPlistParser) parsePlistValue() cfValue {
for {
p.skipWhitespaceAndComments()
switch p.next() {
case eof:
return &cfDictionary{}
case '<':
switch p.next() {
case '*':
p.format = GNUStepFormat
return p.parseGNUStepValue()
case '[':
p.format = GNUStepFormat
return p.parseGNUStepBase64()
default:
p.backup()
return p.parseHexData()
}
case '"':
return p.parseQuotedString()
case '{':
return p.parseDictionary(false)
case '(':
return p.parseArray()
default:
p.backup()
return p.parseUnquotedString()
}
}
}
func newTextPlistParser(r io.Reader) *textPlistParser {
return &textPlistParser{
reader: r,
format: OpenStepFormat,
}
}