| // Parser for text plist formats. |
| // @see https://github.com/apple/swift-corelibs-foundation/blob/master/CoreFoundation/Parsing.subproj/CFOldStylePList.c |
| // @see https://github.com/gnustep/libs-base/blob/master/Source/NSPropertyList.m |
| // This parser also handles strings files. |
| |
| package plist |
| |
| import ( |
| "encoding/base64" |
| "encoding/binary" |
| "errors" |
| "fmt" |
| "io" |
| "io/ioutil" |
| "runtime" |
| "strings" |
| "time" |
| "unicode/utf16" |
| "unicode/utf8" |
| ) |
| |
| type textPlistParser struct { |
| reader io.Reader |
| format int |
| |
| input string |
| start int |
| pos int |
| width int |
| } |
| |
| func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) { |
| if len(buffer)%2 != 0 { |
| return "", errors.New("truncated utf16") |
| } |
| |
| tmp := make([]uint16, len(buffer)/2) |
| for i := 0; i < len(buffer); i += 2 { |
| tmp[i/2] = bo.Uint16(buffer[i : i+2]) |
| } |
| return string(utf16.Decode(tmp)), nil |
| } |
| |
| func guessEncodingAndConvert(buffer []byte) (string, error) { |
| if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF { |
| // UTF-8 BOM |
| return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil |
| } else if len(buffer) >= 2 { |
| // UTF-16 guesses |
| |
| switch { |
| // stream is big-endian (BOM is FE FF or head is 00 XX) |
| case (buffer[0] == 0xFE && buffer[1] == 0xFF): |
| return convertU16(buffer[2:], binary.BigEndian) |
| case (buffer[0] == 0 && buffer[1] != 0): |
| return convertU16(buffer, binary.BigEndian) |
| |
| // stream is little-endian (BOM is FE FF or head is XX 00) |
| case (buffer[0] == 0xFF && buffer[1] == 0xFE): |
| return convertU16(buffer[2:], binary.LittleEndian) |
| case (buffer[0] != 0 && buffer[1] == 0): |
| return convertU16(buffer, binary.LittleEndian) |
| } |
| } |
| |
| // fallback: assume ASCII (not great!) |
| return zeroCopy8BitString(buffer, 0, len(buffer)), nil |
| } |
| |
| func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) { |
| defer func() { |
| if r := recover(); r != nil { |
| if _, ok := r.(runtime.Error); ok { |
| panic(r) |
| } |
| // Wrap all non-invalid-plist errors. |
| parseError = plistParseError{"text", r.(error)} |
| } |
| }() |
| |
| buffer, err := ioutil.ReadAll(p.reader) |
| if err != nil { |
| panic(err) |
| } |
| |
| p.input, err = guessEncodingAndConvert(buffer) |
| if err != nil { |
| panic(err) |
| } |
| |
| val := p.parsePlistValue() |
| |
| p.skipWhitespaceAndComments() |
| if p.peek() != eof { |
| if _, ok := val.(cfString); !ok { |
| p.error("garbage after end of document") |
| } |
| |
| // Try parsing as .strings. |
| // See -[NSDictionary propertyListFromStringsFileFormat:]. |
| p.start = 0 |
| p.pos = 0 |
| val = p.parseDictionary(true) |
| } |
| |
| pval = val |
| |
| return |
| } |
| |
| const eof rune = -1 |
| |
| func (p *textPlistParser) error(e string, args ...interface{}) { |
| line := strings.Count(p.input[:p.pos], "\n") |
| char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1 |
| panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char)) |
| } |
| |
| func (p *textPlistParser) next() rune { |
| if int(p.pos) >= len(p.input) { |
| p.width = 0 |
| return eof |
| } |
| r, w := utf8.DecodeRuneInString(p.input[p.pos:]) |
| p.width = w |
| p.pos += p.width |
| return r |
| } |
| |
| func (p *textPlistParser) backup() { |
| p.pos -= p.width |
| } |
| |
| func (p *textPlistParser) peek() rune { |
| r := p.next() |
| p.backup() |
| return r |
| } |
| |
| func (p *textPlistParser) emit() string { |
| s := p.input[p.start:p.pos] |
| p.start = p.pos |
| return s |
| } |
| |
| func (p *textPlistParser) ignore() { |
| p.start = p.pos |
| } |
| |
| func (p *textPlistParser) empty() bool { |
| return p.start == p.pos |
| } |
| |
| func (p *textPlistParser) scanUntil(ch rune) { |
| if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 { |
| p.pos += x |
| return |
| } |
| p.pos = len(p.input) |
| } |
| |
| func (p *textPlistParser) scanUntilAny(chs string) { |
| if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 { |
| p.pos += x |
| return |
| } |
| p.pos = len(p.input) |
| } |
| |
| func (p *textPlistParser) scanCharactersInSet(ch *characterSet) { |
| for ch.Contains(p.next()) { |
| } |
| p.backup() |
| } |
| |
| func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) { |
| var r rune |
| for { |
| r = p.next() |
| if r == eof || ch.Contains(r) { |
| break |
| } |
| } |
| p.backup() |
| } |
| |
| func (p *textPlistParser) skipWhitespaceAndComments() { |
| for { |
| p.scanCharactersInSet(&whitespace) |
| if strings.HasPrefix(p.input[p.pos:], "//") { |
| p.scanCharactersNotInSet(&newlineCharacterSet) |
| } else if strings.HasPrefix(p.input[p.pos:], "/*") { |
| if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 { |
| p.pos += x + 2 // skip the */ as well |
| continue // consume more whitespace |
| } else { |
| p.error("unexpected eof in block comment") |
| } |
| } else { |
| break |
| } |
| } |
| p.ignore() |
| } |
| |
| func (p *textPlistParser) parseOctalDigits(max int) uint64 { |
| var val uint64 |
| |
| for i := 0; i < max; i++ { |
| r := p.next() |
| |
| if r >= '0' && r <= '7' { |
| val <<= 3 |
| val |= uint64((r - '0')) |
| } else { |
| p.backup() |
| break |
| } |
| } |
| return val |
| } |
| |
| func (p *textPlistParser) parseHexDigits(max int) uint64 { |
| var val uint64 |
| |
| for i := 0; i < max; i++ { |
| r := p.next() |
| |
| if r >= 'a' && r <= 'f' { |
| val <<= 4 |
| val |= 10 + uint64((r - 'a')) |
| } else if r >= 'A' && r <= 'F' { |
| val <<= 4 |
| val |= 10 + uint64((r - 'A')) |
| } else if r >= '0' && r <= '9' { |
| val <<= 4 |
| val |= uint64((r - '0')) |
| } else { |
| p.backup() |
| break |
| } |
| } |
| return val |
| } |
| |
| // the \ has already been consumed |
| func (p *textPlistParser) parseEscape() string { |
| var s string |
| switch p.next() { |
| case 'a': |
| s = "\a" |
| case 'b': |
| s = "\b" |
| case 'v': |
| s = "\v" |
| case 'f': |
| s = "\f" |
| case 't': |
| s = "\t" |
| case 'r': |
| s = "\r" |
| case 'n': |
| s = "\n" |
| case '\\': |
| s = `\` |
| case '"': |
| s = `"` |
| case 'x': // This is our extension. |
| s = string(rune(p.parseHexDigits(2))) |
| case 'u', 'U': // 'u' is a GNUstep extension. |
| s = string(rune(p.parseHexDigits(4))) |
| case '0', '1', '2', '3', '4', '5', '6', '7': |
| p.backup() // we've already consumed one of the digits |
| s = string(rune(p.parseOctalDigits(3))) |
| default: |
| p.backup() // everything else should be accepted |
| } |
| p.ignore() // skip the entire escape sequence |
| return s |
| } |
| |
| // the " has already been consumed |
| func (p *textPlistParser) parseQuotedString() cfString { |
| p.ignore() // ignore the " |
| |
| slowPath := false |
| s := "" |
| |
| for { |
| p.scanUntilAny(`"\`) |
| switch p.peek() { |
| case eof: |
| p.error("unexpected eof in quoted string") |
| case '"': |
| section := p.emit() |
| p.pos++ // skip " |
| if !slowPath { |
| return cfString(section) |
| } else { |
| s += section |
| return cfString(s) |
| } |
| case '\\': |
| slowPath = true |
| s += p.emit() |
| p.next() // consume \ |
| s += p.parseEscape() |
| } |
| } |
| } |
| |
| func (p *textPlistParser) parseUnquotedString() cfString { |
| p.scanCharactersNotInSet(&gsQuotable) |
| s := p.emit() |
| if s == "" { |
| p.error("invalid unquoted string (found an unquoted character that should be quoted?)") |
| } |
| |
| return cfString(s) |
| } |
| |
| // the { has already been consumed |
| func (p *textPlistParser) parseDictionary(ignoreEof bool) cfValue { |
| //p.ignore() // ignore the { |
| var keypv cfValue |
| keys := make([]string, 0, 32) |
| values := make([]cfValue, 0, 32) |
| outer: |
| for { |
| p.skipWhitespaceAndComments() |
| |
| switch p.next() { |
| case eof: |
| if !ignoreEof { |
| p.error("unexpected eof in dictionary") |
| } |
| fallthrough |
| case '}': |
| break outer |
| case '"': |
| keypv = p.parseQuotedString() |
| default: |
| p.backup() |
| keypv = p.parseUnquotedString() |
| } |
| |
| // INVARIANT: key can't be nil; parseQuoted and parseUnquoted |
| // will panic out before they return nil. |
| |
| p.skipWhitespaceAndComments() |
| |
| var val cfValue |
| n := p.next() |
| if n == ';' { |
| // This is supposed to be .strings-specific. |
| // GNUstep parses this as an empty string. |
| // Apple copies the key like we do. |
| val = keypv |
| } else if n == '=' { |
| // whitespace is consumed within |
| val = p.parsePlistValue() |
| |
| p.skipWhitespaceAndComments() |
| |
| if p.next() != ';' { |
| p.error("missing ; in dictionary") |
| } |
| } else { |
| p.error("missing = in dictionary") |
| } |
| |
| keys = append(keys, string(keypv.(cfString))) |
| values = append(values, val) |
| } |
| |
| dict := &cfDictionary{keys: keys, values: values} |
| return dict.maybeUID(p.format == OpenStepFormat) |
| } |
| |
| // the ( has already been consumed |
| func (p *textPlistParser) parseArray() *cfArray { |
| //p.ignore() // ignore the ( |
| values := make([]cfValue, 0, 32) |
| outer: |
| for { |
| p.skipWhitespaceAndComments() |
| |
| switch p.next() { |
| case eof: |
| p.error("unexpected eof in array") |
| case ')': |
| break outer // done here |
| case ',': |
| continue // restart; ,) is valid and we don't want to blow it |
| default: |
| p.backup() |
| } |
| |
| pval := p.parsePlistValue() // whitespace is consumed within |
| if str, ok := pval.(cfString); ok && string(str) == "" { |
| // Empty strings in arrays are apparently skipped? |
| // TODO: Figure out why this was implemented. |
| continue |
| } |
| values = append(values, pval) |
| } |
| return &cfArray{values} |
| } |
| |
| // the <* have already been consumed |
| func (p *textPlistParser) parseGNUStepValue() cfValue { |
| typ := p.next() |
| |
| if typ == '>' || typ == eof { // <*>, <*EOF |
| p.error("invalid GNUStep extended value") |
| } |
| |
| if typ != 'I' && typ != 'R' && typ != 'B' && typ != 'D' { |
| // early out: no need to collect the value if we'll fail to understand it |
| p.error("unknown GNUStep extended value type `" + string(typ) + "'") |
| } |
| |
| if p.peek() == '"' { // <*x" |
| p.next() |
| } |
| |
| p.ignore() |
| p.scanUntil('>') |
| |
| if p.peek() == eof { // <*xEOF or <*x"EOF |
| p.error("unterminated GNUStep extended value") |
| } |
| |
| if p.empty() { // <*x>, <*x""> |
| p.error("empty GNUStep extended value") |
| } |
| |
| v := p.emit() |
| p.next() // consume the > |
| |
| if v[len(v)-1] == '"' { |
| // GNUStep tolerates malformed quoted values, as in <*I5"> and <*I"5> |
| // It purportedly does so by stripping the trailing quote |
| v = v[:len(v)-1] |
| } |
| |
| switch typ { |
| case 'I': |
| if v[0] == '-' { |
| n := mustParseInt(v, 10, 64) |
| return &cfNumber{signed: true, value: uint64(n)} |
| } else { |
| n := mustParseUint(v, 10, 64) |
| return &cfNumber{signed: false, value: n} |
| } |
| case 'R': |
| n := mustParseFloat(v, 64) |
| return &cfReal{wide: true, value: n} // TODO(DH) 32/64 |
| case 'B': |
| b := v[0] == 'Y' |
| return cfBoolean(b) |
| case 'D': |
| t, err := time.Parse(textPlistTimeLayout, v) |
| if err != nil { |
| p.error(err.Error()) |
| } |
| |
| return cfDate(t.In(time.UTC)) |
| } |
| // We should never get here; we checked the type above |
| return nil |
| } |
| |
| // the <[ have already been consumed |
| func (p *textPlistParser) parseGNUStepBase64() cfData { |
| p.ignore() |
| p.scanUntil(']') |
| v := p.emit() |
| |
| if p.next() != ']' { |
| p.error("invalid GNUStep base64 data (expected ']')") |
| } |
| |
| if p.next() != '>' { |
| p.error("invalid GNUStep base64 data (expected '>')") |
| } |
| |
| // Emulate NSDataBase64DecodingIgnoreUnknownCharacters |
| filtered := strings.Map(base64ValidChars.Map, v) |
| data, err := base64.StdEncoding.DecodeString(filtered) |
| if err != nil { |
| p.error("invalid GNUStep base64 data: " + err.Error()) |
| } |
| return cfData(data) |
| } |
| |
| // The < has already been consumed |
| func (p *textPlistParser) parseHexData() cfData { |
| buf := make([]byte, 256) |
| i := 0 |
| c := 0 |
| |
| for { |
| r := p.next() |
| switch r { |
| case eof: |
| p.error("unexpected eof in data") |
| case '>': |
| if c&1 == 1 { |
| p.error("uneven number of hex digits in data") |
| } |
| p.ignore() |
| return cfData(buf[:i]) |
| // Apple and GNUstep both want these in pairs. We are a bit more lax. |
| // GS accepts comments too, but that seems like a lot of work. |
| case ' ', '\t', '\n', '\r', '\u2028', '\u2029': |
| continue |
| } |
| |
| buf[i] <<= 4 |
| if r >= 'a' && r <= 'f' { |
| buf[i] |= 10 + byte((r - 'a')) |
| } else if r >= 'A' && r <= 'F' { |
| buf[i] |= 10 + byte((r - 'A')) |
| } else if r >= '0' && r <= '9' { |
| buf[i] |= byte((r - '0')) |
| } else { |
| p.error("unexpected hex digit `%c'", r) |
| } |
| |
| c++ |
| if c&1 == 0 { |
| i++ |
| if i >= len(buf) { |
| realloc := make([]byte, len(buf)*2) |
| copy(realloc, buf) |
| buf = realloc |
| } |
| } |
| } |
| } |
| |
| func (p *textPlistParser) parsePlistValue() cfValue { |
| for { |
| p.skipWhitespaceAndComments() |
| |
| switch p.next() { |
| case eof: |
| return &cfDictionary{} |
| case '<': |
| switch p.next() { |
| case '*': |
| p.format = GNUStepFormat |
| return p.parseGNUStepValue() |
| case '[': |
| p.format = GNUStepFormat |
| return p.parseGNUStepBase64() |
| default: |
| p.backup() |
| return p.parseHexData() |
| } |
| case '"': |
| return p.parseQuotedString() |
| case '{': |
| return p.parseDictionary(false) |
| case '(': |
| return p.parseArray() |
| default: |
| p.backup() |
| return p.parseUnquotedString() |
| } |
| } |
| } |
| |
| func newTextPlistParser(r io.Reader) *textPlistParser { |
| return &textPlistParser{ |
| reader: r, |
| format: OpenStepFormat, |
| } |
| } |