[text] Rewrite the text plist parser to be like text/template/parser

This commit overhauls the text property list parser, reducing the cost
in time and memory and overall sanity required to parse text property
list documents.

Herein, support is also added for:
* UTF-16 text property lists (#26)
* Proper scanning of UTF-8 codepoints
* Encoding conversion (UTF-16{BE,LE) +- BOM -> UTF-8)
* Empty data values, <>
* Error messages that include line and column info (#25)
* Legacy strings file format (dictionary without { }) (#27)
* Shortcut strings file format (dictionaries without values) (#27)
* Short hex/unicode/octal escapes (\x2, \u33, \0)
* Empty documents parsing as empty dictionaries
* Detection of garbage after the end of a document

The character tables have been augmented with their own characterSet
type, which allows them to report on their own residence. All characters
outside the 0-255 range will be considered "not in set" for now.

In the benchmarks below, *Step(Parse|Decode) operate on a relatively
small synthetic property list that contains every property list type.
BigParse operates on a ~700kb binary property list created by converting
the iTunes software update catalog from XML to GNUStep or OpenStep.
Pretty benchmarks include whitespace.

benchmark                             old ns/op     new ns/op     delta
BenchmarkBigGNUStepParse-4            125008990     33544860      -73.17%
BenchmarkBigPrettyGNUStepParse-4      54869160      38049063      -30.65%
BenchmarkBigOpenStepParse-4           124436480     31491614      -74.69%
BenchmarkBigPrettyOpenStepParse-4     54080760      34542446      -36.13%
BenchmarkOpenStepParse-4              20177         13894         -31.14%
BenchmarkGNUStepParse-4               18742         15087         -19.50%

benchmark                             old allocs     new allocs     delta
BenchmarkBigGNUStepParse-4            2248154        120655         -94.63%
BenchmarkBigPrettyGNUStepParse-4      969515         120655         -87.56%
BenchmarkBigOpenStepParse-4           2251448        120655         -94.64%
BenchmarkBigPrettyOpenStepParse-4     969541         120655         -87.56%
BenchmarkOpenStepParse-4              234            44             -81.20%
BenchmarkGNUStepParse-4               186            47             -74.73%

benchmark                             old bytes     new bytes     delta
BenchmarkBigGNUStepParse-4            67633657      24006777      -64.50%
BenchmarkBigPrettyGNUStepParse-4      30100843      24006784      -20.25%
BenchmarkBigOpenStepParse-4           67657126      24023625      -64.49%
BenchmarkBigPrettyOpenStepParse-4     30101001      24023619      -20.19%
BenchmarkOpenStepParse-4              15376         10192         -33.71%
BenchmarkGNUStepParse-4               14992         10320         -31.16%

Fixes #25
Fixes #26
Fixes #27
diff --git a/invalid_text_test.go b/invalid_text_test.go
index c7c3dc5..8b5845e 100644
--- a/invalid_text_test.go
+++ b/invalid_text_test.go
@@ -23,13 +23,19 @@
 	{"Missing Equals in Dictionary", `{"A"A;}`},
 	{"Missing Semicolon in Dictionary", `{"A"=A}`},
 	{"Invalid GNUStep type", "<*F33>"},
-	{"Invalid GNUStep type data", "(<*I>"},
+	{"Invalid GNUStep int", "(<*I>"},
+	{"Invalid GNUStep date", "<*D5>"},
+	{"Truncated GNUStep value", "<*I3"},
 	{"Invalid data", "<EQ>"},
-	{"Truncated unicode escape", `"\u231"`},
-	{"Truncated hex escape", `"\x2"`},
-	{"Truncated octal escape", `"\02"`},
+	{"Truncated unicode escape", `"\u231`},
+	{"Truncated hex escape", `"\x2`},
+	{"Truncated octal escape", `"\02`},
 	{"Truncated data", `<33`},
+	{"Uneven data", `<3>`},
 	{"Truncated block comment", `/* hello`},
+	{"Truncated quoted string", `"hi`},
+	{"Garbage after end of non-string", "<ab> cde"},
+	{"Broken UTF-16", "\xFE\xFF\x01"},
 }
 
 func TestInvalidTextPlists(t *testing.T) {
diff --git a/text_generator.go b/text_generator.go
index 31eb9d6..53078ba 100644
--- a/text_generator.go
+++ b/text_generator.go
@@ -11,7 +11,7 @@
 	writer io.Writer
 	format int
 
-	quotableTable *[4]uint64
+	quotableTable *characterSet
 
 	indent string
 	depth  int
@@ -49,7 +49,7 @@
 			s += us
 		} else {
 			c := uint8(r)
-			if (*p.quotableTable)[c/64]&(1<<(c%64)) > 0 {
+			if p.quotableTable.ContainsByte(c) {
 				quot = true
 			}
 
diff --git a/text_parser.go b/text_parser.go
index 1cf2ad5..7e49d6f 100644
--- a/text_parser.go
+++ b/text_parser.go
@@ -1,26 +1,64 @@
 package plist
 
 import (
-	"bufio"
-	"encoding/hex"
+	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
+	"io/ioutil"
 	"runtime"
 	"strings"
 	"time"
+	"unicode/utf16"
+	"unicode/utf8"
 )
 
-type byteReader interface {
-	io.Reader
-	io.ByteScanner
-	Peek(n int) ([]byte, error)
-	ReadBytes(delim byte) ([]byte, error)
+type textPlistParser struct {
+	reader io.Reader
+	format int
+
+	input string
+	start int
+	pos   int
+	width int
 }
 
-type textPlistParser struct {
-	reader             byteReader
-	whitespaceReplacer *strings.Replacer
-	format             int
+func convertU16(buffer []byte, bo binary.ByteOrder) (string, error) {
+	if len(buffer)%2 != 0 {
+		return "", errors.New("truncated utf16")
+	}
+
+	tmp := make([]uint16, len(buffer)/2)
+	for i := 0; i < len(buffer); i += 2 {
+		tmp[i/2] = bo.Uint16(buffer[i : i+2])
+	}
+	return string(utf16.Decode(tmp)), nil
+}
+
+func guessEncodingAndConvert(buffer []byte) (string, error) {
+	if len(buffer) >= 3 && buffer[0] == 0xEF && buffer[1] == 0xBB && buffer[2] == 0xBF {
+		// UTF-8 BOM
+		return zeroCopy8BitString(buffer, 3, len(buffer)-3), nil
+	} else if len(buffer) >= 2 {
+		// UTF-16 guesses
+
+		switch {
+		// stream is big-endian (BOM is FE FF or head is 00 XX)
+		case (buffer[0] == 0xFE && buffer[1] == 0xFF):
+			return convertU16(buffer[2:], binary.BigEndian)
+		case (buffer[0] == 0 && buffer[1] != 0):
+			return convertU16(buffer, binary.BigEndian)
+
+		// stream is little-endian (BOM is FE FF or head is XX 00)
+		case (buffer[0] == 0xFF && buffer[1] == 0xFE):
+			return convertU16(buffer[2:], binary.LittleEndian)
+		case (buffer[0] != 0 && buffer[1] == 0):
+			return convertU16(buffer, binary.LittleEndian)
+		}
+	}
+
+	// fallback: assume ASCII (not great!)
+	return zeroCopy8BitString(buffer, 0, len(buffer)), nil
 }
 
 func (p *textPlistParser) parseDocument() (pval cfValue, parseError error) {
@@ -29,209 +67,295 @@
 			if _, ok := r.(runtime.Error); ok {
 				panic(r)
 			}
-			if _, ok := r.(invalidPlistError); ok {
-				parseError = r.(error)
-			} else {
-				// Wrap all non-invalid-plist errors.
-				parseError = plistParseError{"text", r.(error)}
-			}
+			// Wrap all non-invalid-plist errors.
+			parseError = plistParseError{"text", r.(error)}
 		}
 	}()
-	pval = p.parsePlistValue()
+
+	buffer, err := ioutil.ReadAll(p.reader)
+	if err != nil {
+		panic(err)
+	}
+
+	p.input, err = guessEncodingAndConvert(buffer)
+	if err != nil {
+		panic(err)
+	}
+
+	val := p.parsePlistValue()
+
+	p.skipWhitespaceAndComments()
+	if p.peek() != eof {
+		if _, ok := val.(cfString); !ok {
+			p.error("garbage after end of document")
+		}
+
+		p.start = 0
+		p.pos = 0
+		val = p.parseDictionary(true)
+	}
+
+	pval = val
+
 	return
 }
 
-func (p *textPlistParser) chugWhitespace() {
-ws:
+const eof rune = -1
+
+func (p *textPlistParser) error(e string, args ...interface{}) {
+	line := strings.Count(p.input[:p.pos], "\n")
+	char := p.pos - strings.LastIndex(p.input[:p.pos], "\n") - 1
+	panic(fmt.Errorf("%s at line %d character %d", fmt.Sprintf(e, args...), line, char))
+}
+
+func (p *textPlistParser) next() rune {
+	if int(p.pos) >= len(p.input) {
+		p.width = 0
+		return eof
+	}
+	r, w := utf8.DecodeRuneInString(p.input[p.pos:])
+	p.width = w
+	p.pos += p.width
+	return r
+}
+
+func (p *textPlistParser) backup() {
+	p.pos -= p.width
+}
+
+func (p *textPlistParser) peek() rune {
+	r := p.next()
+	p.backup()
+	return r
+}
+
+func (p *textPlistParser) emit() string {
+	s := p.input[p.start:p.pos]
+	p.start = p.pos
+	return s
+}
+
+func (p *textPlistParser) ignore() {
+	p.start = p.pos
+}
+
+func (p *textPlistParser) empty() bool {
+	return p.start == p.pos
+}
+
+func (p *textPlistParser) scanUntil(ch rune) {
+	if x := strings.IndexRune(p.input[p.pos:], ch); x >= 0 {
+		p.pos += x
+		return
+	}
+	p.pos = len(p.input)
+}
+
+func (p *textPlistParser) scanUntilAny(chs string) {
+	if x := strings.IndexAny(p.input[p.pos:], chs); x >= 0 {
+		p.pos += x
+		return
+	}
+	p.pos = len(p.input)
+}
+
+func (p *textPlistParser) scanCharactersInSet(ch *characterSet) {
+	for ch.Contains(p.next()) {
+	}
+	p.backup()
+}
+
+func (p *textPlistParser) scanCharactersNotInSet(ch *characterSet) {
+	var r rune
 	for {
-		c, err := p.reader.ReadByte()
-		if err != nil && err != io.EOF {
-			panic(err)
-		}
-		if whitespace[c/64]&(1<<(c%64)) == 0 {
-			if c == '/' && err != io.EOF {
-				// A / at the end of the file is not the begining of a comment.
-				cs, err := p.reader.Peek(1)
-				if err != nil && err != io.EOF {
-					panic(err)
-				}
-				if err == io.EOF {
-					return
-				}
-				c = cs[0]
-				switch c {
-				case '/':
-					for {
-						c, err = p.reader.ReadByte()
-						if err != nil && err != io.EOF {
-							panic(err)
-						} else if err == io.EOF {
-							break
-						}
-						// TODO: UTF-8
-						if c == '\n' || c == '\r' {
-							break
-						}
-					}
-				case '*':
-					// Peek returned a value here, so it is safe to read.
-					_, _ = p.reader.ReadByte()
-					star := false
-					for {
-						c, err = p.reader.ReadByte()
-						if err != nil {
-							panic(err)
-						}
-						if c == '*' {
-							star = true
-						} else if c == '/' && star {
-							break
-						} else {
-							star = false
-						}
-					}
-				default:
-					p.reader.UnreadByte() // Not the beginning of a // or /* comment
-					break ws
-				}
-				continue
-			}
-			p.reader.UnreadByte()
+		r = p.next()
+		if r == eof || ch.Contains(r) {
 			break
 		}
 	}
+	p.backup()
 }
 
-func (p *textPlistParser) parseQuotedString() cfString {
-	escaping := false
-	s := ""
+func (p *textPlistParser) skipWhitespaceAndComments() {
 	for {
-		byt, err := p.reader.ReadByte()
-		// EOF here is an error: we're inside a quoted string!
-		if err != nil {
-			panic(err)
-		}
-		c := rune(byt)
-		if !escaping {
-			if c == '"' {
-				break
-			} else if c == '\\' {
-				escaping = true
-				continue
+		p.scanCharactersInSet(&whitespace)
+		if strings.HasPrefix(p.input[p.pos:], "//") {
+			p.scanCharactersNotInSet(&newlineCharacterSet)
+		} else if strings.HasPrefix(p.input[p.pos:], "/*") {
+			if x := strings.Index(p.input[p.pos:], "*/"); x >= 0 {
+				p.pos += x + 2 // skip the */ as well
+				continue       // consume more whitespace
+			} else {
+				p.error("unexpected eof in block comment")
 			}
 		} else {
-			escaping = false
-			// Everything that is not listed here passes through unharmed.
-			switch c {
-			case 'a':
-				c = '\a'
-			case 'b':
-				c = '\b'
-			case 'v':
-				c = '\v'
-			case 'f':
-				c = '\f'
-			case 't':
-				c = '\t'
-			case 'r':
-				c = '\r'
-			case 'n':
-				c = '\n'
-			case 'x', 'u', 'U': // hex and unicode
-				l := 4
-				if c == 'x' {
-					l = 2
-				}
-				hex := make([]byte, l)
-				p.reader.Read(hex)
-				newc := mustParseInt(string(hex), 16, 16)
-				c = rune(newc)
-			case '0', '1', '2', '3', '4', '5', '6', '7': // octal!
-				oct := make([]byte, 3)
-				oct[0] = uint8(c)
-				p.reader.Read(oct[1:])
-				newc := mustParseInt(string(oct), 8, 16)
-				c = rune(newc)
-			}
+			break
 		}
-		s += string(c)
 	}
-	return cfString(s)
+	p.ignore()
+}
+
+func (p *textPlistParser) parseOctalDigits(max int) uint64 {
+	var val uint64
+
+	for i := 0; i < max; i++ {
+		r := p.next()
+
+		if r >= '0' && r <= '7' {
+			val <<= 3
+			val |= uint64((r - '0'))
+		} else {
+			p.backup()
+			break
+		}
+	}
+	return val
+}
+
+func (p *textPlistParser) parseHexDigits(max int) uint64 {
+	var val uint64
+
+	for i := 0; i < max; i++ {
+		r := p.next()
+
+		if r >= 'a' && r <= 'f' {
+			val <<= 4
+			val |= 10 + uint64((r - 'a'))
+		} else if r >= 'A' && r <= 'F' {
+			val <<= 4
+			val |= 10 + uint64((r - 'A'))
+		} else if r >= '0' && r <= '9' {
+			val <<= 4
+			val |= uint64((r - '0'))
+		} else {
+			p.backup()
+			break
+		}
+	}
+	return val
+}
+
+// the \ has already been consumed
+func (p *textPlistParser) parseEscape() string {
+	var s string
+	switch p.next() {
+	case 'a':
+		s = "\a"
+	case 'b':
+		s = "\b"
+	case 'v':
+		s = "\v"
+	case 'f':
+		s = "\f"
+	case 't':
+		s = "\t"
+	case 'r':
+		s = "\r"
+	case 'n':
+		s = "\n"
+	case '\\':
+		s = `\`
+	case '"':
+		s = `"`
+	case 'x':
+		s = string(rune(p.parseHexDigits(2)))
+	case 'u', 'U':
+		s = string(rune(p.parseHexDigits(4)))
+	case '0', '1', '2', '3', '4', '5', '6', '7':
+		p.backup() // we've already consumed one of the digits
+		s = string(rune(p.parseOctalDigits(3)))
+	default:
+		p.backup() // everything else should be accepted
+	}
+	p.ignore() // skip the entire escape sequence
+	return s
+}
+
+// the " has already been consumed
+func (p *textPlistParser) parseQuotedString() cfString {
+	p.ignore() // ignore the "
+
+	slowPath := false
+	s := ""
+
+	for {
+		p.scanUntilAny(`"\`)
+		switch p.peek() {
+		case eof:
+			p.error("unexpected eof in quoted string")
+		case '"':
+			section := p.emit()
+			p.pos++ // skip "
+			if !slowPath {
+				return cfString(section)
+			} else {
+				s += section
+				return cfString(s)
+			}
+		case '\\':
+			slowPath = true
+			s += p.emit()
+			p.next() // consume \
+			s += p.parseEscape()
+		}
+	}
 }
 
 func (p *textPlistParser) parseUnquotedString() cfString {
-	s := ""
-	for {
-		c, err := p.reader.ReadByte()
-		if err != nil {
-			if err == io.EOF {
-				break
-			}
-			panic(err)
-		}
-		// if we encounter a character that must be quoted, we're done.
-		// the GNUStep quote table is more lax here, so we use it instead of the OpenStep one.
-		if gsQuotable[c/64]&(1<<(c%64)) > 0 {
-			p.reader.UnreadByte()
-			break
-		}
-		s += string(c)
-	}
-
+	p.scanCharactersNotInSet(&gsQuotable)
+	s := p.emit()
 	if s == "" {
-		panic(errors.New("invalid unquoted string (found an unquoted character that should be quoted?)"))
+		p.error("invalid unquoted string (found an unquoted character that should be quoted?)")
 	}
 
 	return cfString(s)
 }
 
-func (p *textPlistParser) parseDictionary() *cfDictionary {
+// the { has already been consumed
+func (p *textPlistParser) parseDictionary(ignoreEof bool) *cfDictionary {
+	//p.ignore() // ignore the {
 	var keypv cfValue
 	keys := make([]string, 0, 32)
 	values := make([]cfValue, 0, 32)
+outer:
 	for {
-		p.chugWhitespace()
+		p.skipWhitespaceAndComments()
 
-		c, err := p.reader.ReadByte()
-		// EOF here is an error: we're inside a dictionary!
-		if err != nil {
-			panic(err)
-		}
-
-		if c == '}' {
-			break
-		} else if c == '"' {
+		switch p.next() {
+		case eof:
+			if !ignoreEof {
+				p.error("unexpected eof in dictionary")
+			}
+			fallthrough
+		case '}':
+			break outer
+		case '"':
 			keypv = p.parseQuotedString()
-		} else {
-			p.reader.UnreadByte() // Whoops, ate part of the string
+		default:
+			p.backup()
 			keypv = p.parseUnquotedString()
 		}
-		if keypv == nil {
-			// TODO better error
-			panic(errors.New("missing dictionary key"))
-		}
 
-		p.chugWhitespace()
-		c, err = p.reader.ReadByte()
-		if err != nil {
-			panic(err)
-		}
+		// INVARIANT: key can't be nil; parseQuoted and parseUnquoted
+		// will panic out before they return nil.
 
-		if c != '=' {
-			panic(errors.New("missing = in dictionary"))
-		}
+		p.skipWhitespaceAndComments()
 
-		// whitespace is guzzled within
-		val := p.parsePlistValue()
+		var val cfValue
+		n := p.next()
+		if n == ';' {
+			val = keypv
+		} else if n == '=' {
+			// whitespace is consumed within
+			val = p.parsePlistValue()
 
-		p.chugWhitespace()
-		c, err = p.reader.ReadByte()
-		if err != nil {
-			panic(err)
-		}
+			p.skipWhitespaceAndComments()
 
-		if c != ';' {
-			panic(errors.New("missing ; in dictionary"))
+			if p.next() != ';' {
+				p.error("missing ; in dictionary")
+			}
+		} else {
+			p.error("missing = in dictionary")
 		}
 
 		keys = append(keys, string(keypv.(cfString)))
@@ -241,23 +365,26 @@
 	return &cfDictionary{keys: keys, values: values}
 }
 
+// the ( has already been consumed
 func (p *textPlistParser) parseArray() *cfArray {
+	//p.ignore() // ignore the (
 	values := make([]cfValue, 0, 32)
+outer:
 	for {
-		c, err := p.reader.ReadByte()
-		// EOF here is an error: we're inside an array!
-		if err != nil {
-			panic(err)
+		p.skipWhitespaceAndComments()
+
+		switch p.next() {
+		case eof:
+			p.error("unexpected eof in array")
+		case ')':
+			break outer // done here
+		case ',':
+			continue // restart; ,) is valid and we don't want to blow it
+		default:
+			p.backup()
 		}
 
-		if c == ')' {
-			break
-		} else if c == ',' {
-			continue
-		}
-
-		p.reader.UnreadByte()
-		pval := p.parsePlistValue()
+		pval := p.parsePlistValue() // whitespace is consumed within
 		if str, ok := pval.(cfString); ok && string(str) == "" {
 			// Empty strings in arrays are apparently skipped?
 			// TODO: Figure out why this was implemented.
@@ -268,95 +395,121 @@
 	return &cfArray{values}
 }
 
-func (p *textPlistParser) parseGNUStepValue(v []byte) cfValue {
-	if len(v) < 3 {
-		panic(errors.New("invalid GNUStep extended value"))
+// the <* have already been consumed
+func (p *textPlistParser) parseGNUStepValue() cfValue {
+	typ := p.next()
+	p.ignore()
+	p.scanUntil('>')
+
+	if typ == eof || typ == '>' || p.empty() || p.peek() == eof {
+		p.error("invalid GNUStep extended value")
 	}
-	typ := v[1]
-	v = v[2:]
+
+	v := p.emit()
+	p.next() // consume the >
+
 	switch typ {
 	case 'I':
 		if v[0] == '-' {
-			n := mustParseInt(string(v), 10, 64)
+			n := mustParseInt(v, 10, 64)
 			return &cfNumber{signed: true, value: uint64(n)}
 		} else {
-			n := mustParseUint(string(v), 10, 64)
+			n := mustParseUint(v, 10, 64)
 			return &cfNumber{signed: false, value: n}
 		}
 	case 'R':
-		n := mustParseFloat(string(v), 64)
+		n := mustParseFloat(v, 64)
 		return &cfReal{wide: true, value: n} // TODO(DH) 32/64
 	case 'B':
 		b := v[0] == 'Y'
 		return cfBoolean(b)
 	case 'D':
-		t, err := time.Parse(textPlistTimeLayout, string(v))
+		t, err := time.Parse(textPlistTimeLayout, v)
 		if err != nil {
-			panic(err)
+			p.error(err.Error())
 		}
 
 		return cfDate(t.In(time.UTC))
 	}
-	panic(errors.New("invalid GNUStep type " + string(typ)))
+	p.error("invalid GNUStep type " + string(typ))
+	return nil
+}
+
+// The < has already been consumed
+func (p *textPlistParser) parseHexData() cfData {
+	buf := make([]byte, 256)
+	i := 0
+	c := 0
+
+	for {
+		r := p.next()
+		switch r {
+		case eof:
+			p.error("unexpected eof in data")
+		case '>':
+			if c&1 == 1 {
+				p.error("uneven number of hex digits in data")
+			}
+			p.ignore()
+			return cfData(buf[:i])
+		case ' ', '\t', '\n', '\r', '\u2028', '\u2029': // more lax than apple here: skip spaces
+			continue
+		}
+
+		buf[i] <<= 4
+		if r >= 'a' && r <= 'f' {
+			buf[i] |= 10 + byte((r - 'a'))
+		} else if r >= 'A' && r <= 'F' {
+			buf[i] |= 10 + byte((r - 'A'))
+		} else if r >= '0' && r <= '9' {
+			buf[i] |= byte((r - '0'))
+		} else {
+			p.error("unexpected hex digit `%c'", r)
+		}
+
+		c++
+		if c&1 == 0 {
+			i++
+			if i >= len(buf) {
+				realloc := make([]byte, len(buf)*2)
+				copy(realloc, buf)
+				buf = realloc
+			}
+		}
+	}
 }
 
 func (p *textPlistParser) parsePlistValue() cfValue {
 	for {
-		p.chugWhitespace()
+		p.skipWhitespaceAndComments()
 
-		c, err := p.reader.ReadByte()
-		if err != nil && err != io.EOF {
-			panic(err)
-		}
-		switch c {
+		switch p.next() {
+		case eof:
+			return &cfDictionary{}
 		case '<':
-			bytes, err := p.reader.ReadBytes('>')
-			if err != nil {
-				panic(err)
-			}
-			bytes = bytes[:len(bytes)-1]
-
-			if len(bytes) == 0 {
-				panic(errors.New("invalid empty angle-bracketed element"))
-			}
-
-			if bytes[0] == '*' {
+			if p.next() == '*' {
 				p.format = GNUStepFormat
-				return p.parseGNUStepValue(bytes)
-			} else {
-				s := p.whitespaceReplacer.Replace(string(bytes))
-				data, err := hex.DecodeString(s)
-				if err != nil {
-					panic(err)
-				}
-				return cfData(data)
+				return p.parseGNUStepValue()
 			}
+
+			p.backup()
+			return p.parseHexData()
 		case '"':
 			return p.parseQuotedString()
 		case '{':
-			return p.parseDictionary()
+			return p.parseDictionary(false)
 		case '(':
 			return p.parseArray()
 		default:
-			if gsQuotable[c/64]&(1<<(c%64)) > 0 {
-				panic(errors.New("unexpected non-quotable character at root level"))
-			}
-			p.reader.UnreadByte() // Place back in buffer for parseUnquotedString
+			p.backup()
 			return p.parseUnquotedString()
 		}
 	}
 }
 
 func newTextPlistParser(r io.Reader) *textPlistParser {
-	var reader byteReader
-	if rd, ok := r.(byteReader); ok {
-		reader = rd
-	} else {
-		reader = bufio.NewReader(r)
-	}
 	return &textPlistParser{
-		reader:             reader,
-		whitespaceReplacer: strings.NewReplacer("\t", "", "\n", "", " ", "", "\r", ""),
-		format:             OpenStepFormat,
+		reader: r,
+		format: OpenStepFormat,
 	}
 }
diff --git a/text_tables.go b/text_tables.go
index ec6586b..319c55c 100644
--- a/text_tables.go
+++ b/text_tables.go
@@ -1,9 +1,19 @@
 package plist
 
+type characterSet [4]uint64
+
+func (s *characterSet) Contains(ch rune) bool {
+	return ch >= 0 && ch <= 255 && s.ContainsByte(byte(ch))
+}
+
+func (s *characterSet) ContainsByte(ch byte) bool {
+	return (s[ch/64]&(1<<(ch%64)) > 0)
+}
+
 // Bitmap of characters that must be inside a quoted string
 // when written to an old-style property list
 // Low bits represent lower characters, and each uint64 represents 64 characters.
-var gsQuotable = [4]uint64{
+var gsQuotable = characterSet{
 	0x78001385ffffffff,
 	0xa800000138000000,
 	0xffffffffffffffff,
@@ -11,16 +21,23 @@
 }
 
 // 7f instead of 3f in the top line: CFOldStylePlist.c says . is valid, but they quote it.
-var osQuotable = [4]uint64{
+var osQuotable = characterSet{
 	0xf4007f6fffffffff,
 	0xf8000001f8000001,
 	0xffffffffffffffff,
 	0xffffffffffffffff,
 }
 
-var whitespace = [4]uint64{
+var whitespace = characterSet{
 	0x0000000100003f00,
 	0x0000000000000000,
 	0x0000000000000000,
 	0x0000000000000000,
 }
+
+var newlineCharacterSet = characterSet{
+	0x0000000000002400,
+	0x0000000000000000,
+	0x0000000000000000,
+	0x0000000000000000,
+}
diff --git a/text_test.go b/text_test.go
index c654037..95e87cf 100644
--- a/text_test.go
+++ b/text_test.go
@@ -61,12 +61,13 @@
 	{
 		Name: "Escapes",
 		Data: struct {
-			A, B, V, F, T, R, N, Hex1, Unicode1, Unicode2, Octal1 string
+			W, A, B, V, F, T, R, N, Hex1, Unicode1, Unicode2, Octal1 string
 		}{
-			"\a", "\b", "\v", "\f", "\t", "\r", "\n", "\u00ab", "\u00ac", "\u00ad", "\033",
+			"w", "\a", "\b", "\v", "\f", "\t", "\r", "\n", "\u00ab", "\u00ac", "\u00ad", "\033",
 		},
 		Expected: map[int][]byte{
 			OpenStepFormat: []byte(`{
+				W="\w";
 				A="\a";
 				B="\b";
 				V="\v";
@@ -168,6 +169,22 @@
 		SkipEncode: map[int]bool{OpenStepFormat: true},
 	},
 	{
+		Name: "Various Truncated Escapes",
+		Data: "\x01\x02\x03\x04\x057",
+		Expected: map[int][]byte{
+			OpenStepFormat: []byte(`"\x1\u02\U003\4\0057"`),
+		},
+		SkipEncode: map[int]bool{OpenStepFormat: true},
+	},
+	{
+		Name: "Various Case-Insensitive Escapes",
+		Data: "\u00AB\uCDEF",
+		Expected: map[int][]byte{
+			OpenStepFormat: []byte(`"\xaB\uCdEf"`),
+		},
+		SkipEncode: map[int]bool{OpenStepFormat: true},
+	},
+	{
 		Name: "Data long enough to trigger implementation-specific reallocation", // this is for coverage :(
 		Data: []byte{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
 		Expected: map[int][]byte{
@@ -175,6 +192,22 @@
 		},
 		SkipEncode: map[int]bool{OpenStepFormat: true},
 	},
+	{
+		Name: "Empty Document",
+		Data: map[string]interface{}{}, // Defined to be an empty dictionary
+		Expected: map[int][]byte{
+			OpenStepFormat: []byte{},
+		},
+		SkipEncode: map[int]bool{OpenStepFormat: true},
+	},
+	{
+		Name: "Document consisting of only whitespace",
+		Data: map[string]interface{}{}, // Defined to be an empty dictionary
+		Expected: map[int][]byte{
+			OpenStepFormat: []byte(" \n\t"),
+		},
+		SkipEncode: map[int]bool{OpenStepFormat: true},
+	},
 }
 
 func TestTextDecode(t *testing.T) {
diff --git a/zerocopy.go b/zerocopy.go
index 025c908..999f401 100644
--- a/zerocopy.go
+++ b/zerocopy.go
@@ -8,6 +8,10 @@
 )
 
 func zeroCopy8BitString(buf []byte, off int, len int) string {
+	if len == 0 {
+		return ""
+	}
+
 	var s string
 	hdr := (*reflect.StringHeader)(unsafe.Pointer(&s))
 	hdr.Data = uintptr(unsafe.Pointer(&buf[off]))