Support the latest pkg:csslib, prepare to release 0.14.0+1

commit: e0622ba6722f729df69d8cf1e8604915c08989d5 [log] [tgz]
author: Kevin Moore <kevmoo@google.com> Thu Apr 11 01:13:28 2019
committer: Kevin Moore <kevmoo@google.com> Thu Apr 11 01:14:34 2019
tree: babe036cf324bf75d1e6daef35f76931a031418b
parent: 2b88ac76d43756019749d1c598a0fab41914420c [diff]
parent: 01b99128f478373139fae9fe93fc90c23c934d2f [diff]
diff --git a/.travis.yml b/.travis.yml
index 353f809..337ceca 100644
--- a/.travis.yml
+++ b/.travis.yml

@@ -1,11 +1,12 @@
 language: dart
+
 dart:
-  - 2.0.0
   - dev
+  - 2.0.0
 
 dart_task:
   - test: -p vm
-  - test: -p chrome,firefox
+  - test: -p chrome
   - dartanalyzer: --fatal-warnings --fatal-infos .
 
 matrix:
@@ -15,7 +16,7 @@
 
 # Only building master means that we don't run two builds for each pull request.
 branches:
-  only: [master, v0_13]
+  only: [master]
 
 cache:
  directories:

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 350ac50..139ec41 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md

@@ -1,7 +1,14 @@
-## 0.13.4+2
+## 0.14.0+1
 
 - Support `package:css` `>=0.13.2 <0.16.0`.
 
+## 0.14.0
+
+*BREAKING CHANGES*
+
+- Drop support for encodings other than UTF-8 and ASCII.
+- Removed `parser_console.dart` library.
+
 ## 0.13.4+1
 
 * Fixes to readme and pubspec.

diff --git a/lib/parser_console.dart b/lib/parser_console.dart
deleted file mode 100644
index 28dee14..0000000
--- a/lib/parser_console.dart
+++ /dev/null

@@ -1,42 +0,0 @@
-/// This library adds `dart:io` support to the HTML5 parser. Call
-/// [initDartIOSupport] before calling the [parse] methods and they will accept
-/// a [RandomAccessFile] as input, in addition to the other input types.
-library parser_console;
-
-import 'dart:io';
-import 'parser.dart';
-import 'src/inputstream.dart' as inputstream;
-
-/// Adds support to the [HtmlParser] for running on a console VM. In particular
-/// this means it will be able to handle `dart:io` and [RandomAccessFile]s as
-/// input to the various [parse] methods.
-void useConsole() {
-  inputstream.consoleSupport = _ConsoleSupport();
-}
-
-class _ConsoleSupport extends inputstream.ConsoleSupport {
-  List<int> bytesFromFile(source) {
-    if (source is! RandomAccessFile) return null;
-    return readAllBytesFromFile(source);
-  }
-}
-
-// TODO(jmesserly): this should be `RandomAccessFile.readAllBytes`.
-/// Synchronously reads all bytes from the [file].
-List<int> readAllBytesFromFile(RandomAccessFile file) {
-  int length = file.lengthSync();
-  var bytes = List<int>(length);
-
-  int bytesRead = 0;
-  while (bytesRead < length) {
-    int read = file.readIntoSync(bytes, bytesRead, length - bytesRead);
-    if (read <= 0) {
-      // This could happen if, for example, the file was resized while
-      // we're reading. Just shrink the bytes array and move on.
-      bytes = bytes.sublist(0, bytesRead);
-      break;
-    }
-    bytesRead += read;
-  }
-  return bytes;
-}

diff --git a/lib/src/char_encodings.dart b/lib/src/char_encodings.dart
deleted file mode 100644
index ba10a4a..0000000
--- a/lib/src/char_encodings.dart
+++ /dev/null

@@ -1,228 +0,0 @@
-/// Decodes bytes using the correct name. See [decodeBytes].
-library char_encodings;
-
-import 'dart:collection';
-import 'package:utf/utf.dart';
-
-// TODO(jmesserly): this function is conspicuously absent from dart:utf.
-/// Returns true if the [bytes] starts with a UTF-8 byte order mark.
-/// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is
-/// used in HTML to detect the UTF-
-bool hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
-  int end = length != null ? offset + length : bytes.length;
-  return (offset + 3) <= end &&
-      bytes[offset] == 0xEF &&
-      bytes[offset + 1] == 0xBB &&
-      bytes[offset + 2] == 0xBF;
-}
-
-// TODO(jmesserly): it's unfortunate that this has to be one-shot on the entire
-// file, but dart:utf does not expose stream-based decoders yet.
-/// Decodes the [bytes] with the provided [encoding] and returns an iterable for
-/// the codepoints. Supports the major unicode encodings as well as ascii and
-/// and windows-1252 encodings.
-Iterable<int> decodeBytes(String encoding, List<int> bytes,
-    [int offset = 0,
-    int length,
-    int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-  if (length == null) length = bytes.length;
-  final replace = replacementCodepoint;
-  switch (encoding) {
-    case 'ascii':
-      bytes = bytes.sublist(offset, offset + length);
-      // TODO(jmesserly): this was taken from runtime/bin/string_stream.dart
-      for (int byte in bytes) {
-        if (byte > 127) {
-          // TODO(jmesserly): ideally this would be DecoderException, like the
-          // one thrown in runtime/bin/string_stream.dart, but we don't want to
-          // depend on dart:io.
-          throw FormatException("Illegal ASCII character $byte");
-        }
-      }
-      return bytes;
-
-    case 'windows-1252':
-    case 'cp1252':
-      return decodeWindows1252AsIterable(bytes, offset, length, replace);
-
-    case 'utf-8':
-      // NOTE: to match the behavior of the other decode functions, we eat the
-      // utf-8 BOM here.
-      if (hasUtf8Bom(bytes, offset, length)) {
-        offset += 3;
-        length -= 3;
-      }
-      return decodeUtf8AsIterable(bytes, offset, length, replace);
-
-    case 'utf-16':
-      return decodeUtf16AsIterable(bytes, offset, length, replace);
-    case 'utf-16-be':
-      return decodeUtf16beAsIterable(bytes, offset, length, true, replace);
-    case 'utf-16-le':
-      return decodeUtf16leAsIterable(bytes, offset, length, true, replace);
-
-    case 'utf-32':
-      return decodeUtf32AsIterable(bytes, offset, length, replace);
-    case 'utf-32-be':
-      return decodeUtf32beAsIterable(bytes, offset, length, true, replace);
-    case 'utf-32-le':
-      return decodeUtf32leAsIterable(bytes, offset, length, true, replace);
-
-    default:
-      throw ArgumentError('Encoding $encoding not supported');
-  }
-}
-
-// TODO(jmesserly): use dart:utf once http://dartbug.com/6476 is fixed.
-/// Returns the code points for the [input]. This works like [String.charCodes]
-/// but it decodes UTF-16 surrogate pairs.
-List<int> toCodepoints(String input) {
-  var newCodes = <int>[];
-  for (int i = 0; i < input.length; i++) {
-    var c = input.codeUnitAt(i);
-    if (0xD800 <= c && c <= 0xDBFF) {
-      int next = i + 1;
-      if (next < input.length) {
-        var d = input.codeUnitAt(next);
-        if (0xDC00 <= d && d <= 0xDFFF) {
-          c = 0x10000 + ((c - 0xD800) << 10) + (d - 0xDC00);
-          i = next;
-        }
-      }
-    }
-    newCodes.add(c);
-  }
-  return newCodes;
-}
-
-/// Decodes [windows-1252](http://en.wikipedia.org/wiki/Windows-1252) bytes as
-/// an iterable. Thus, the consumer can only convert as much of the input as
-/// needed. Set the [replacementCharacter] to null to throw an [ArgumentError]
-/// rather than replace the bad value.
-IterableWindows1252Decoder decodeWindows1252AsIterable(List<int> bytes,
-    [int offset = 0,
-    int length,
-    int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-  return IterableWindows1252Decoder(
-      bytes, offset, length, replacementCodepoint);
-}
-
-/// Return type of [decodeWindows1252AsIterable] and variants. The Iterable type
-/// provides an iterator on demand and the iterator will only translate bytes
-/// as requested by the user of the iterator. (Note: results are not cached.)
-class IterableWindows1252Decoder extends IterableBase<int> {
-  final List<int> bytes;
-  final int offset;
-  final int length;
-  final int replacementCodepoint;
-
-  IterableWindows1252Decoder(this.bytes,
-      [this.offset = 0,
-      this.length,
-      this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
-
-  Windows1252Decoder get iterator =>
-      Windows1252Decoder(bytes, offset, length, replacementCodepoint);
-}
-
-/// Provides an iterator of Unicode codepoints from windows-1252 encoded bytes.
-/// The parameters can set an offset into a list of bytes (as int), limit the
-/// length of the values to be decoded, and override the default Unicode
-/// replacement character. Set the replacementCharacter to null to throw an
-/// ArgumentError rather than replace the bad value. The return value
-/// from this method can be used as an Iterable (e.g. in a for-loop).
-class Windows1252Decoder implements Iterator<int> {
-  final int replacementCodepoint;
-  final List<int> _bytes;
-  int _offset;
-  final int _length;
-
-  Windows1252Decoder(List<int> bytes,
-      [int offset = 0,
-      int length,
-      this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
-      : _bytes = bytes,
-        _offset = offset - 1,
-        _length = length == null ? bytes.length : length;
-
-  bool get _inRange => _offset >= 0 && _offset < _length;
-  int get current => _inRange ? _mapChar(_bytes[_offset]) : null;
-
-  bool moveNext() {
-    _offset++;
-    return _inRange;
-  }
-
-  int _mapChar(int char) {
-    // TODO(jmesserly): this is duplicating entitiesWindows1252 and
-    // replacementCharacters from constants.dart
-    switch (char) {
-      case 0x80:
-        return 0x20AC; // EURO SIGN
-      case 0x82:
-        return 0x201A; // SINGLE LOW-9 QUOTATION MARK
-      case 0x83:
-        return 0x0192; // LATIN SMALL LETTER F WITH HOOK
-      case 0x84:
-        return 0x201E; // DOUBLE LOW-9 QUOTATION MARK
-      case 0x85:
-        return 0x2026; // HORIZONTAL ELLIPSIS
-      case 0x86:
-        return 0x2020; // DAGGER
-      case 0x87:
-        return 0x2021; // DOUBLE DAGGER
-      case 0x88:
-        return 0x02C6; // MODIFIER LETTER CIRCUMFLEX ACCENT
-      case 0x89:
-        return 0x2030; // PER MILLE SIGN
-      case 0x8A:
-        return 0x0160; // LATIN CAPITAL LETTER S WITH CARON
-      case 0x8B:
-        return 0x2039; // SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-      case 0x8C:
-        return 0x0152; // LATIN CAPITAL LIGATURE OE
-      case 0x8E:
-        return 0x017D; // LATIN CAPITAL LETTER Z WITH CARON
-      case 0x91:
-        return 0x2018; // LEFT SINGLE QUOTATION MARK
-      case 0x92:
-        return 0x2019; // RIGHT SINGLE QUOTATION MARK
-      case 0x93:
-        return 0x201C; // LEFT DOUBLE QUOTATION MARK
-      case 0x94:
-        return 0x201D; // RIGHT DOUBLE QUOTATION MARK
-      case 0x95:
-        return 0x2022; // BULLET
-      case 0x96:
-        return 0x2013; // EN DASH
-      case 0x97:
-        return 0x2014; // EM DASH
-      case 0x98:
-        return 0x02DC; // SMALL TILDE
-      case 0x99:
-        return 0x2122; // TRADE MARK SIGN
-      case 0x9A:
-        return 0x0161; // LATIN SMALL LETTER S WITH CARON
-      case 0x9B:
-        return 0x203A; // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-      case 0x9C:
-        return 0x0153; // LATIN SMALL LIGATURE OE
-      case 0x9E:
-        return 0x017E; // LATIN SMALL LETTER Z WITH CARON
-      case 0x9F:
-        return 0x0178; // LATIN CAPITAL LETTER Y WITH DIAERESIS
-
-      case 0x81:
-      case 0x8D:
-      case 0x8F:
-      case 0x90:
-      case 0x9D:
-        if (replacementCodepoint == null) {
-          throw ArgumentError(
-              "Invalid windows-1252 code point $char at $_offset");
-        }
-        return replacementCodepoint;
-    }
-    return char;
-  }
-}

diff --git a/lib/src/encoding_parser.dart b/lib/src/encoding_parser.dart
index d61e76a..d0f40d6 100644
--- a/lib/src/encoding_parser.dart
+++ b/lib/src/encoding_parser.dart

@@ -1,7 +1,5 @@
-library encoding_parser;
-
 import 'constants.dart';
-import 'inputstream.dart';
+import 'html_input_stream.dart';
 
 // TODO(jmesserly): I converted StopIteration to StateError("No more elements").
 // Seems strange to throw this from outside of an iterator though.
@@ -10,15 +8,15 @@
 /// raised.
 class EncodingBytes {
   final String _bytes;
-  int _position = -1;
+  int __position = -1;
 
   EncodingBytes(this._bytes);
 
-  int get length => _bytes.length;
+  int get _length => _bytes.length;
 
-  String next() {
-    var p = _position = _position + 1;
-    if (p >= length) {
+  String _next() {
+    var p = __position = __position + 1;
+    if (p >= _length) {
       throw StateError("No more elements");
     } else if (p < 0) {
       throw RangeError(p);
@@ -26,59 +24,59 @@
     return _bytes[p];
   }
 
-  String previous() {
-    var p = _position;
-    if (p >= length) {
+  String _previous() {
+    var p = __position;
+    if (p >= _length) {
       throw StateError("No more elements");
     } else if (p < 0) {
       throw RangeError(p);
     }
-    _position = p = p - 1;
+    __position = p = p - 1;
     return _bytes[p];
   }
 
-  set position(int value) {
-    if (_position >= length) {
+  set _position(int value) {
+    if (__position >= _length) {
       throw StateError("No more elements");
     }
-    _position = value;
+    __position = value;
   }
 
-  int get position {
-    if (_position >= length) {
+  int get _position {
+    if (__position >= _length) {
       throw StateError("No more elements");
     }
-    if (_position >= 0) {
-      return _position;
+    if (__position >= 0) {
+      return __position;
     } else {
       return 0;
     }
   }
 
-  String get currentByte => _bytes[position];
+  String get _currentByte => _bytes[_position];
 
   /// Skip past a list of characters. Defaults to skipping [isWhitespace].
-  String skipChars([CharPreciate skipChars]) {
+  String _skipChars([_CharPredicate skipChars]) {
     if (skipChars == null) skipChars = isWhitespace;
-    var p = position; // use property for the error-checking
-    while (p < length) {
+    var p = _position; // use property for the error-checking
+    while (p < _length) {
       var c = _bytes[p];
       if (!skipChars(c)) {
-        _position = p;
+        __position = p;
         return c;
       }
       p += 1;
     }
-    _position = p;
+    __position = p;
     return null;
   }
 
-  String skipUntil(CharPreciate untilChars) {
-    var p = position;
-    while (p < length) {
+  String _skipUntil(_CharPredicate untilChars) {
+    var p = _position;
+    while (p < _length) {
       var c = _bytes[p];
       if (untilChars(c)) {
-        _position = p;
+        __position = p;
         return c;
       }
       p += 1;
@@ -89,14 +87,14 @@
   /// Look for a sequence of bytes at the start of a string. If the bytes
   /// are found return true and advance the position to the byte after the
   /// match. Otherwise return false and leave the position alone.
-  bool matchBytes(String bytes) {
-    var p = position;
+  bool _matchBytes(String bytes) {
+    var p = _position;
     if (_bytes.length < p + bytes.length) {
       return false;
     }
     var data = _bytes.substring(p, p + bytes.length);
     if (data == bytes) {
-      position += bytes.length;
+      _position += bytes.length;
       return true;
     }
     return false;
@@ -104,19 +102,19 @@
 
   /// Look for the next sequence of bytes matching a given sequence. If
   /// a match is found advance the position to the last byte of the match
-  bool jumpTo(String bytes) {
-    var newPosition = _bytes.indexOf(bytes, position);
+  bool _jumpTo(String bytes) {
+    var newPosition = _bytes.indexOf(bytes, _position);
     if (newPosition >= 0) {
-      _position = newPosition + bytes.length - 1;
+      __position = newPosition + bytes.length - 1;
       return true;
     } else {
       throw StateError("No more elements");
     }
   }
 
-  String slice(int start, [int end]) {
-    if (end == null) end = length;
-    if (end < 0) end += length;
+  String _slice(int start, [int end]) {
+    if (end == null) end = _length;
+    if (end < 0) end += _length;
     return _bytes.substring(start, end);
   }
 }
@@ -126,68 +124,69 @@
 class _DispatchEntry {
   final String pattern;
   final _MethodHandler handler;
+
   _DispatchEntry(this.pattern, this.handler);
 }
 
 /// Mini parser for detecting character encoding from meta elements.
 class EncodingParser {
-  final EncodingBytes data;
-  String encoding;
+  final EncodingBytes _data;
+  String _encoding;
 
   /// [bytes] - the data to work on for encoding detection.
   EncodingParser(List<int> bytes)
       // Note: this is intentionally interpreting bytes as codepoints.
-      : data = EncodingBytes(String.fromCharCodes(bytes).toLowerCase());
+      : _data = EncodingBytes(String.fromCharCodes(bytes).toLowerCase());
 
   String getEncoding() {
     final methodDispatch = [
-      _DispatchEntry("<!--", handleComment),
-      _DispatchEntry("<meta", handleMeta),
-      _DispatchEntry("</", handlePossibleEndTag),
-      _DispatchEntry("<!", handleOther),
-      _DispatchEntry("<?", handleOther),
-      _DispatchEntry("<", handlePossibleStartTag),
+      _DispatchEntry("<!--", _handleComment),
+      _DispatchEntry("<meta", _handleMeta),
+      _DispatchEntry("</", _handlePossibleEndTag),
+      _DispatchEntry("<!", _handleOther),
+      _DispatchEntry("<?", _handleOther),
+      _DispatchEntry("<", _handlePossibleStartTag),
     ];
 
     try {
       for (;;) {
         for (var dispatch in methodDispatch) {
-          if (data.matchBytes(dispatch.pattern)) {
+          if (_data._matchBytes(dispatch.pattern)) {
             var keepParsing = dispatch.handler();
             if (keepParsing) break;
 
             // We found an encoding. Stop.
-            return encoding;
+            return _encoding;
           }
         }
-        data.position += 1;
+        _data._position += 1;
       }
     } on StateError catch (_) {
       // Catch this here to match behavior of Python's StopIteration
       // TODO(jmesserly): refactor to not use exceptions
     }
-    return encoding;
+    return _encoding;
   }
 
   /// Skip over comments.
-  bool handleComment() => data.jumpTo("-->");
+  bool _handleComment() => _data._jumpTo("-->");
 
-  bool handleMeta() {
-    if (!isWhitespace(data.currentByte)) {
+  bool _handleMeta() {
+    if (!isWhitespace(_data._currentByte)) {
       // if we have <meta not followed by a space so just keep going
       return true;
     }
     // We have a valid meta element we want to search for attributes
     while (true) {
       // Try to find the next attribute after the current position
-      var attr = getAttribute();
+      var attr = _getAttribute();
       if (attr == null) return true;
 
       if (attr[0] == "charset") {
         var tentativeEncoding = attr[1];
         var codec = codecName(tentativeEncoding);
         if (codec != null) {
-          encoding = codec;
+          _encoding = codec;
           return false;
         }
       } else if (attr[0] == "content") {
@@ -195,54 +194,54 @@
         var tentativeEncoding = contentParser.parse();
         var codec = codecName(tentativeEncoding);
         if (codec != null) {
-          encoding = codec;
+          _encoding = codec;
           return false;
         }
       }
     }
   }
 
-  bool handlePossibleStartTag() => handlePossibleTag(false);
+  bool _handlePossibleStartTag() => _handlePossibleTag(false);
 
-  bool handlePossibleEndTag() {
-    data.next();
-    return handlePossibleTag(true);
+  bool _handlePossibleEndTag() {
+    _data._next();
+    return _handlePossibleTag(true);
   }
 
-  bool handlePossibleTag(bool endTag) {
-    if (!isLetter(data.currentByte)) {
+  bool _handlePossibleTag(bool endTag) {
+    if (!isLetter(_data._currentByte)) {
       //If the next byte is not an ascii letter either ignore this
       //fragment (possible start tag case) or treat it according to
       //handleOther
       if (endTag) {
-        data.previous();
-        handleOther();
+        _data._previous();
+        _handleOther();
       }
       return true;
     }
 
-    var c = data.skipUntil(isSpaceOrAngleBracket);
+    var c = _data._skipUntil(_isSpaceOrAngleBracket);
     if (c == "<") {
       // return to the first step in the overall "two step" algorithm
       // reprocessing the < byte
-      data.previous();
+      _data._previous();
     } else {
       //Read all attributes
-      var attr = getAttribute();
+      var attr = _getAttribute();
       while (attr != null) {
-        attr = getAttribute();
+        attr = _getAttribute();
       }
     }
     return true;
   }
 
-  bool handleOther() => data.jumpTo(">");
+  bool _handleOther() => _data._jumpTo(">");
 
   /// Return a name,value pair for the next attribute in the stream,
   /// if one is found, or null
-  List<String> getAttribute() {
+  List<String> _getAttribute() {
     // Step 1 (skip chars)
-    var c = data.skipChars((x) => x == "/" || isWhitespace(x));
+    var c = _data._skipChars((x) => x == "/" || isWhitespace(x));
     // Step 2
     if (c == ">" || c == null) {
       return null;
@@ -258,8 +257,8 @@
         break;
       } else if (isWhitespace(c)) {
         // Step 6!
-        c = data.skipChars();
-        c = data.next();
+        c = _data._skipChars();
+        c = _data._next();
         break;
       } else if (c == "/" || c == ">") {
         return [attrName.join(), ""];
@@ -269,27 +268,27 @@
         attrName.add(c);
       }
       // Step 5
-      c = data.next();
+      c = _data._next();
     }
     // Step 7
     if (c != "=") {
-      data.previous();
+      _data._previous();
       return [attrName.join(), ""];
     }
     // Step 8
-    data.next();
+    _data._next();
     // Step 9
-    c = data.skipChars();
+    c = _data._skipChars();
     // Step 10
     if (c == "'" || c == '"') {
       // 10.1
       var quoteChar = c;
       while (true) {
         // 10.2
-        c = data.next();
+        c = _data._next();
         if (c == quoteChar) {
           // 10.3
-          data.next();
+          _data._next();
           return [attrName.join(), attrValue.join()];
         } else if (isLetter(c)) {
           // 10.4
@@ -310,8 +309,8 @@
     }
     // Step 11
     while (true) {
-      c = data.next();
-      if (isSpaceOrAngleBracket(c)) {
+      c = _data._next();
+      if (_isSpaceOrAngleBracket(c)) {
         return [attrName.join(), attrValue.join()];
       } else if (c == null) {
         return null;
@@ -333,34 +332,34 @@
     try {
       // Check if the attr name is charset
       // otherwise return
-      data.jumpTo("charset");
-      data.position += 1;
-      data.skipChars();
-      if (data.currentByte != "=") {
+      data._jumpTo("charset");
+      data._position += 1;
+      data._skipChars();
+      if (data._currentByte != "=") {
         // If there is no = sign keep looking for attrs
         return null;
       }
-      data.position += 1;
-      data.skipChars();
+      data._position += 1;
+      data._skipChars();
       // Look for an encoding between matching quote marks
-      if (data.currentByte == '"' || data.currentByte == "'") {
-        var quoteMark = data.currentByte;
-        data.position += 1;
-        var oldPosition = data.position;
-        if (data.jumpTo(quoteMark)) {
-          return data.slice(oldPosition, data.position);
+      if (data._currentByte == '"' || data._currentByte == "'") {
+        var quoteMark = data._currentByte;
+        data._position += 1;
+        var oldPosition = data._position;
+        if (data._jumpTo(quoteMark)) {
+          return data._slice(oldPosition, data._position);
         } else {
           return null;
         }
       } else {
         // Unquoted value
-        var oldPosition = data.position;
+        var oldPosition = data._position;
         try {
-          data.skipUntil(isWhitespace);
-          return data.slice(oldPosition, data.position);
+          data._skipUntil(isWhitespace);
+          return data._slice(oldPosition, data._position);
         } on StateError catch (_) {
           //Return the whole remaining value
-          return data.slice(oldPosition);
+          return data._slice(oldPosition);
         }
       }
     } on StateError catch (_) {
@@ -369,8 +368,8 @@
   }
 }
 
-bool isSpaceOrAngleBracket(String char) {
+bool _isSpaceOrAngleBracket(String char) {
   return char == ">" || char == "<" || isWhitespace(char);
 }
 
-typedef CharPreciate = bool Function(String char);
+typedef _CharPredicate = bool Function(String char);

diff --git a/lib/src/inputstream.dart b/lib/src/html_input_stream.dart
similarity index 83%
rename from lib/src/inputstream.dart
rename to lib/src/html_input_stream.dart
index dbcf98b..42b1741 100644
--- a/lib/src/inputstream.dart
+++ b/lib/src/html_input_stream.dart

@@ -1,21 +1,12 @@
-library inputstream;
-
 import 'dart:collection';
-import 'package:utf/utf.dart';
+import 'dart:convert' show ascii, utf8;
+
 import 'package:source_span/source_span.dart';
-import 'char_encodings.dart';
+
 import 'constants.dart';
 import 'encoding_parser.dart';
 import 'utils.dart';
 
-/// Hooks to call into dart:io without directly referencing it.
-class ConsoleSupport {
-  List<int> bytesFromFile(source) => null;
-}
-
-// TODO(jmesserly): use lazy init here when supported.
-ConsoleSupport consoleSupport = ConsoleSupport();
-
 /// Provides a unicode stream of characters to the HtmlTokenizer.
 ///
 /// This class takes care of character encoding and removing or replacing
@@ -26,7 +17,7 @@
   static const int numBytesMeta = 512;
 
   /// Encoding to use if no other information can be found.
-  static const String defaultEncoding = 'windows-1252';
+  static const String defaultEncoding = 'utf-8';
 
   /// The name of the character encoding.
   String charEncodingName;
@@ -75,24 +66,14 @@
       this.sourceUrl])
       : charEncodingName = codecName(encoding) {
     if (source is String) {
-      _rawChars = toCodepoints(source);
+      _rawChars = source.runes.toList();
       charEncodingName = 'utf-8';
       charEncodingCertain = true;
     } else if (source is List<int>) {
       _rawBytes = source;
     } else {
-      // TODO(jmesserly): it's unfortunate we need to read all bytes in advance,
-      // but it's necessary because of how the UTF decoders work.
-      _rawBytes = consoleSupport.bytesFromFile(source);
-
-      if (_rawBytes == null) {
-        // TODO(jmesserly): we should accept some kind of stream API too.
-        // Unfortunately dart:io InputStream is async only, which won't work.
-        throw ArgumentError("'source' must be a String or "
-            "List<int> (of bytes). You can also pass a RandomAccessFile if you"
-            "`import 'package:html/parser_console.dart'` and call "
-            "`useConsole()`.");
-      }
+      throw ArgumentError.value(
+          source, 'source', 'Must be a String or List<int>.');
     }
 
     // Detect encoding iff no explicit "transport level" encoding is supplied
@@ -111,7 +92,7 @@
     _chars = <int>[];
 
     if (_rawChars == null) {
-      _rawChars = decodeBytes(charEncodingName, _rawBytes);
+      _rawChars = _decodeBytes(charEncodingName, _rawBytes);
     }
 
     bool skipNewline = false;
@@ -121,7 +102,7 @@
         if (c == NEWLINE) continue;
       }
 
-      if (invalidUnicode(c)) errors.add('invalid-codepoint');
+      if (_invalidUnicode(c)) errors.add('invalid-codepoint');
 
       if (0xD800 <= c && c <= 0xDFFF) {
         c = 0xFFFD;
@@ -196,17 +177,9 @@
   /// encoding otherwise return null.
   String detectBOM() {
     // Try detecting the BOM using bytes from the string
-    if (hasUtf8Bom(_rawBytes)) {
+    if (_hasUtf8Bom(_rawBytes)) {
       return 'utf-8';
     }
-    // Note: we don't need to remember whether it was big or little endian
-    // because the decoder will do that later. It will also eat the BOM for us.
-    if (hasUtf16Bom(_rawBytes)) {
-      return 'utf-16';
-    }
-    if (hasUtf32Bom(_rawBytes)) {
-      return 'utf-32';
-    }
     return null;
   }
 
@@ -262,7 +235,7 @@
 
 // TODO(jmesserly): the Python code used a regex to check for this. But
 // Dart doesn't let you create a regexp with invalid characters.
-bool invalidUnicode(int c) {
+bool _invalidUnicode(int c) {
   if (0x0001 <= c && c <= 0x0008) return true;
   if (0x000E <= c && c <= 0x001F) return true;
   if (0x007F <= c && c <= 0x009F) return true;
@@ -319,3 +292,32 @@
   var canonicalName = encoding.replaceAll(asciiPunctuation, '').toLowerCase();
   return encodings[canonicalName];
 }
+
+/// Returns true if the [bytes] starts with a UTF-8 byte order mark.
+/// Since UTF-8 doesn't have byte order, it's somewhat of a misnomer, but it is
+/// used in HTML to detect the UTF-
+bool _hasUtf8Bom(List<int> bytes, [int offset = 0, int length]) {
+  int end = length != null ? offset + length : bytes.length;
+  return (offset + 3) <= end &&
+      bytes[offset] == 0xEF &&
+      bytes[offset + 1] == 0xBB &&
+      bytes[offset + 2] == 0xBF;
+}
+
+/// Decodes the [bytes] with the provided [encoding] and returns an iterable for
+/// the codepoints. Supports the major unicode encodings as well as ascii and
+/// and windows-1252 encodings.
+Iterable<int> _decodeBytes(String encoding, List<int> bytes) {
+  switch (encoding) {
+    case 'ascii':
+      return ascii.decode(bytes).runes;
+
+    case 'utf-8':
+      // NOTE: To match the behavior of the other decode functions, we eat the
+      // UTF-8 BOM here. This is the default behavior of `utf8.decode`.
+      return utf8.decode(bytes).runes;
+
+    default:
+      throw ArgumentError('Encoding $encoding not supported');
+  }
+}

diff --git a/lib/src/tokenizer.dart b/lib/src/tokenizer.dart
index 48d6365..638663e 100644
--- a/lib/src/tokenizer.dart
+++ b/lib/src/tokenizer.dart

@@ -3,7 +3,7 @@
 import 'dart:collection';
 import 'package:html/parser.dart' show HtmlParser;
 import 'constants.dart';
-import 'inputstream.dart';
+import 'html_input_stream.dart';
 import 'token.dart';
 import 'utils.dart';
 

diff --git a/pubspec.yaml b/pubspec.yaml
index e13c01a..4480d41 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml

@@ -1,5 +1,5 @@
 name: html
-version: 0.13.4+2
+version: 0.14.0+1
 
 description: APIs for parsing and manipulating HTML content outside the browser.
 author: Dart Team <misc@dartlang.org>
@@ -11,7 +11,6 @@
 dependencies:
   csslib: '>=0.13.2 <0.16.0'
   source_span: '>=1.0.0 <2.0.0'
-  utf: '>=0.9.0 <0.10.0'
 
 dev_dependencies:
   path: ^1.6.2

diff --git a/test/data/parser_feature/raw_file.html b/test/data/parser_feature/raw_file.html
deleted file mode 100644
index bcdbf76..0000000
--- a/test/data/parser_feature/raw_file.html
+++ /dev/null

@@ -1,6 +0,0 @@
-<!doctype html>
-<html>
-<body>
-Hello world!
-</body>
-</html>

diff --git a/test/parser_feature_test.dart b/test/parser_feature_test.dart
index 2591a2d..0889f44 100644
--- a/test/parser_feature_test.dart
+++ b/test/parser_feature_test.dart

@@ -1,13 +1,13 @@
 /// Additional feature tests that aren't based on test data.
 library parser_feature_test;
 
-import 'package:test/test.dart';
 import 'package:html/dom.dart';
 import 'package:html/parser.dart';
 import 'package:html/src/constants.dart';
 import 'package:html/src/encoding_parser.dart';
 import 'package:html/src/treebuilder.dart';
 import 'package:source_span/source_span.dart';
+import 'package:test/test.dart';
 
 main() {
   _testElementSpans();

diff --git a/test/parser_test.dart b/test/parser_test.dart
index 1289f61..1db1586 100644
--- a/test/parser_test.dart
+++ b/test/parser_test.dart

@@ -2,13 +2,12 @@
 library parser_test;
 
 import 'dart:convert';
-import 'dart:io';
-import 'package:path/path.dart' as pathos;
-import 'package:test/test.dart';
+
 import 'package:html/dom.dart';
 import 'package:html/parser.dart';
-import 'package:html/parser_console.dart' as parser_console;
-import 'package:html/src/inputstream.dart' as inputstream;
+import 'package:path/path.dart' as pathos;
+import 'package:test/test.dart';
+
 import 'support.dart';
 
 // Run the parse error checks
@@ -71,16 +70,6 @@
 }
 
 void main() {
-  test('dart:io', () {
-    // ensure IO support is unregistered
-    expect(inputstream.consoleSupport,
-        const TypeMatcher<inputstream.ConsoleSupport>());
-    var file = File('$testDataDir/parser_feature/raw_file.html').openSync();
-    expect(() => parse(file), throwsA(const TypeMatcher<ArgumentError>()));
-    parser_console.useConsole();
-    expect(parse(file).body.innerHtml.trim(), 'Hello world!');
-  });
-
   for (var path in getDataFiles('tree-construction')) {
     if (!path.endsWith('.dat')) continue;
 

diff --git a/test/tokenizer_test.dart b/test/tokenizer_test.dart
index 744a496..59dd2aa 100644
--- a/test/tokenizer_test.dart
+++ b/test/tokenizer_test.dart

@@ -7,10 +7,8 @@
 import 'dart:mirrors';
 import 'package:path/path.dart' as pathos;
 import 'package:test/test.dart';
-import 'package:html/src/char_encodings.dart';
 import 'package:html/src/token.dart';
 import 'package:html/src/tokenizer.dart';
-import 'package:utf/utf.dart';
 import 'support.dart';
 
 class TokenizerTestParser {
@@ -24,7 +22,7 @@
 
   List parse(String str) {
     // Note: we need to pass bytes to the tokenizer if we want it to handle BOM.
-    var bytes = codepointsToUtf8(toCodepoints(str));
+    var bytes = utf8.encode(str);
     var tokenizer = HtmlTokenizer(bytes, encoding: 'utf-8');
     outputTokens = [];
commit	e0622ba6722f729df69d8cf1e8604915c08989d5	[log] [tgz]
author	Kevin Moore <kevmoo@google.com>	Thu Apr 11 01:13:28 2019
committer	Kevin Moore <kevmoo@google.com>	Thu Apr 11 01:14:34 2019
tree	babe036cf324bf75d1e6daef35f76931a031418b
parent	2b88ac76d43756019749d1c598a0fab41914420c [diff]
parent	01b99128f478373139fae9fe93fc90c23c934d2f [diff]