Merge pull request #3 from kevmoo/cleanup
Cleanup
diff --git a/.gitignore b/.gitignore
index 89f7747..4232a2f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,4 @@
-.buildlog
-.DS_Store
-.idea
+.packages
.pub/
-.settings/
-build/
packages
pubspec.lock
diff --git a/.test_config b/.test_config
new file mode 100644
index 0000000..20d6a22
--- /dev/null
+++ b/.test_config
@@ -0,0 +1,3 @@
+{
+ test_package: true
+}
diff --git a/lib/src/list_range.dart b/lib/src/list_range.dart
index 2f3b34d..159512d 100644
--- a/lib/src/list_range.dart
+++ b/lib/src/list_range.dart
@@ -13,15 +13,15 @@
*/
// TODO(floitsch): Consider removing the extend and switch to implements since
// that's cheaper to allocate.
-class ListRange extends IterableBase {
- final List _source;
+class ListRange extends IterableBase<int> {
+ final List<int> _source;
final int _offset;
final int _length;
- ListRange(source, [offset = 0, length]) :
- this._source = source,
- this._offset = offset,
- this._length = (length == null ? source.length - offset : length) {
+ ListRange(List<int> source, [offset = 0, length])
+ : this._source = source,
+ this._offset = offset,
+ this._length = (length == null ? source.length - offset : length) {
if (_offset < 0 || _offset > _source.length) {
throw new RangeError.value(_offset);
}
diff --git a/lib/src/shared.dart b/lib/src/shared.dart
new file mode 100644
index 0000000..257def3
--- /dev/null
+++ b/lib/src/shared.dart
@@ -0,0 +1,15 @@
+// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+import 'util.dart';
+
+// TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
+/**
+ * Provide a list of Unicode codepoints for a given string.
+ */
+List<int> stringToCodepoints(String str) {
+ // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
+ // So we need to convert.
+ return utf16CodeUnitsToCodepoints(str.codeUnits);
+}
diff --git a/lib/src/utf/utf16.dart b/lib/src/utf16.dart
similarity index 79%
rename from lib/src/utf/utf16.dart
rename to lib/src/utf16.dart
index 8ddd4dd..1fcb482 100644
--- a/lib/src/utf/utf16.dart
+++ b/lib/src/utf16.dart
@@ -2,17 +2,14 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of utf;
+library utf.utf16;
-// TODO(jmesserly): would be nice to have this on String (dartbug.com/6501).
-/**
- * Provide a list of Unicode codepoints for a given string.
- */
-List<int> stringToCodepoints(String str) {
- // Note: str.codeUnits gives us 16-bit code units on all Dart implementations.
- // So we need to convert.
- return utf16CodeUnitsToCodepoints(str.codeUnits);
-}
+import "dart:collection";
+
+import 'constants.dart';
+import 'list_range.dart';
+import 'utf_16_code_unit_decoder.dart';
+import 'util.dart';
/**
* Generate a string from the provided Unicode codepoints.
@@ -23,6 +20,7 @@
String codepointsToString(List<int> codepoints) {
return new String.fromCharCodes(codepoints);
}
+
/**
* Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert
* as much of the input as needed. Determines the byte order from the BOM,
@@ -31,12 +29,14 @@
* rather than replace the bad value. The default value for
* [replacementCodepoint] is U+FFFD.
*/
-IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0,
- int length, int replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new IterableUtf16Decoder._(
- () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length,
- replacementCodepoint), replacementCodepoint);
+ () => new Utf16BytesToCodeUnitsDecoder(
+ bytes, offset, length, replacementCodepoint),
+ replacementCodepoint);
}
/**
@@ -47,12 +47,15 @@
* ArgumentError rather than replace the bad value. The default
* value for the [replacementCodepoint] is U+FFFD.
*/
-IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0,
- int length, bool stripBom = true, int replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new IterableUtf16Decoder._(
- () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom,
- replacementCodepoint), replacementCodepoint);
+ () => new Utf16beBytesToCodeUnitsDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint),
+ replacementCodepoint);
}
/**
@@ -63,12 +66,15 @@
* ArgumentError rather than replace the bad value. The default
* value for the [replacementCodepoint] is U+FFFD.
*/
-IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0,
- int length, bool stripBom = true, int replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
+IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new IterableUtf16Decoder._(
- () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom,
- replacementCodepoint), replacementCodepoint);
+ () => new Utf16leBytesToCodeUnitsDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint),
+ replacementCodepoint);
}
/**
@@ -77,10 +83,12 @@
* ArgumentError rather than replace the bad value. The default
* value for the [replacementCodepoint] is U+FFFD.
*/
-String decodeUtf16(List<int> bytes, [int offset = 0, int length,
+String decodeUtf16(List<int> bytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes,
- offset, length, replacementCodepoint);
+ Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(
+ bytes, offset, length, replacementCodepoint);
List<int> codeunits = decoder.decodeRest();
return new String.fromCharCodes(
utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
@@ -93,11 +101,13 @@
* null to throw an ArgumentError rather than replace the bad value.
* The default value for the [replacementCodepoint] is U+FFFD.
*/
-String decodeUtf16be(List<int> bytes, [int offset = 0, int length,
+String decodeUtf16be(List<int> bytes,
+ [int offset = 0,
+ int length,
bool stripBom = true,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset,
- length, stripBom, replacementCodepoint)).decodeRest();
+ List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint)).decodeRest();
return new String.fromCharCodes(
utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
}
@@ -109,11 +119,13 @@
* null to throw an ArgumentError rather than replace the bad value.
* The default value for the [replacementCodepoint] is U+FFFD.
*/
-String decodeUtf16le(List<int> bytes, [int offset = 0, int length,
+String decodeUtf16le(List<int> bytes,
+ [int offset = 0,
+ int length,
bool stripBom = true,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset,
- length, stripBom, replacementCodepoint)).decodeRest();
+ List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint)).decodeRest();
return new String.fromCharCodes(
utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint));
}
@@ -122,8 +134,7 @@
* Produce a list of UTF-16 encoded bytes. This method prefixes the resulting
* bytes with a big-endian byte-order-marker.
*/
-List<int> encodeUtf16(String str) =>
- encodeUtf16be(str, true);
+List<int> encodeUtf16(String str) => encodeUtf16be(str, true);
/**
* Produce a list of UTF-16BE encoded bytes. By default, this method produces
@@ -216,8 +227,8 @@
IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint);
Utf16CodeUnitDecoder get iterator =>
- new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(),
- replacementCodepoint);
+ new Utf16CodeUnitDecoder.fromListRangeIterator(
+ codeunitsProvider(), replacementCodepoint);
}
/**
@@ -234,8 +245,9 @@
Utf16BytesToCodeUnitsDecoder._fromListRangeIterator(
this.utf16EncodedBytesIterator, this.replacementCodepoint);
- factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
- int offset = 0, int length,
+ factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
if (length == null) {
length = utf16EncodedBytes.length - offset;
@@ -247,8 +259,8 @@
return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2,
length - 2, false, replacementCodepoint);
} else {
- return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset,
- length, false, replacementCodepoint);
+ return new Utf16beBytesToCodeUnitsDecoder(
+ utf16EncodedBytes, offset, length, false, replacementCodepoint);
}
}
@@ -315,12 +327,14 @@
* to produce the code unit (0-(2^16)-1).
*/
class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
- Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
- int offset = 0, int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- super._fromListRangeIterator(
- (new ListRange(utf16EncodedBytes, offset, length)).iterator,
- replacementCodepoint) {
+ Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : super._fromListRangeIterator(
+ (new ListRange(utf16EncodedBytes, offset, length)).iterator,
+ replacementCodepoint) {
if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) {
skip();
}
@@ -340,12 +354,14 @@
* to produce the code unit (0-(2^16)-1).
*/
class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder {
- Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [
- int offset = 0, int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- super._fromListRangeIterator(
- (new ListRange(utf16EncodedBytes, offset, length)).iterator,
- replacementCodepoint) {
+ Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : super._fromListRangeIterator(
+ (new ListRange(utf16EncodedBytes, offset, length)).iterator,
+ replacementCodepoint) {
if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) {
skip();
}
diff --git a/lib/src/utf/utf32.dart b/lib/src/utf32.dart
similarity index 76%
rename from lib/src/utf/utf32.dart
rename to lib/src/utf32.dart
index e51009d..9dfc9fe 100644
--- a/lib/src/utf/utf32.dart
+++ b/lib/src/utf32.dart
@@ -2,7 +2,13 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of utf;
+library utf.utf32;
+
+import "dart:collection";
+
+import 'constants.dart';
+import 'list_range.dart';
+import 'shared.dart';
/**
* Decodes the UTF-32 bytes as an iterable. Thus, the consumer can only convert
@@ -11,8 +17,9 @@
* Set the replacementCharacter to null to throw an ArgumentError
* rather than replace the bad value.
*/
-IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes, [
- int offset = 0, int length,
+IterableUtf32Decoder decodeUtf32AsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new IterableUtf32Decoder._(
() => new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint));
@@ -25,12 +32,13 @@
* Set the replacementCharacter to null to throw an ArgumentError
* rather than replace the bad value.
*/
-IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes, [
- int offset = 0, int length, bool stripBom = true,
+IterableUtf32Decoder decodeUtf32beAsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- return new IterableUtf32Decoder._(
- () => new Utf32beBytesDecoder(bytes, offset, length, stripBom,
- replacementCodepoint));
+ return new IterableUtf32Decoder._(() => new Utf32beBytesDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint));
}
/**
@@ -40,12 +48,13 @@
* Set the replacementCharacter to null to throw an ArgumentError
* rather than replace the bad value.
*/
-IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes, [
- int offset = 0, int length, bool stripBom = true,
+IterableUtf32Decoder decodeUtf32leAsIterable(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- return new IterableUtf32Decoder._(
- () => new Utf32leBytesDecoder(bytes, offset, length, stripBom,
- replacementCodepoint));
+ return new IterableUtf32Decoder._(() => new Utf32leBytesDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint));
}
/**
@@ -55,11 +64,15 @@
* replacement character. Set the replacementCharacter to null to throw an
* ArgumentError rather than replace the bad value.
*/
-String decodeUtf32(List<int> bytes, [int offset = 0, int length,
+String decodeUtf32(List<int> bytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- return new String.fromCharCodes((new Utf32BytesDecoder(bytes, offset, length,
- replacementCodepoint)).decodeRest());
+ return new String.fromCharCodes(
+ (new Utf32BytesDecoder(bytes, offset, length, replacementCodepoint))
+ .decodeRest());
}
+
/**
* Produce a String from a sequence of UTF-32BE encoded bytes. The parameters
* allow an offset into a list of bytes (as int), limiting the length of the
@@ -67,11 +80,13 @@
* replacement character. Set the replacementCharacter to null to throw an
* ArgumentError rather than replace the bad value.
*/
-String decodeUtf32be(
- List<int> bytes, [int offset = 0, int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
- new String.fromCharCodes((new Utf32beBytesDecoder(bytes, offset, length,
- stripBom, replacementCodepoint)).decodeRest());
+String decodeUtf32be(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
+ new String.fromCharCodes((new Utf32beBytesDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint)).decodeRest());
/**
* Produce a String from a sequence of UTF-32LE encoded bytes. The parameters
@@ -80,18 +95,19 @@
* replacement character. Set the replacementCharacter to null to throw an
* ArgumentError rather than replace the bad value.
*/
-String decodeUtf32le(
- List<int> bytes, [int offset = 0, int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
- new String.fromCharCodes((new Utf32leBytesDecoder(bytes, offset, length,
- stripBom, replacementCodepoint)).decodeRest());
+String decodeUtf32le(List<int> bytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) =>
+ new String.fromCharCodes((new Utf32leBytesDecoder(
+ bytes, offset, length, stripBom, replacementCodepoint)).decodeRest());
/**
* Produce a list of UTF-32 encoded bytes. This method prefixes the resulting
* bytes with a big-endian byte-order-marker.
*/
-List<int> encodeUtf32(String str) =>
- encodeUtf32be(str, true);
+List<int> encodeUtf32(String str) => encodeUtf32be(str, true);
/**
* Produce a list of UTF-32BE encoded bytes. By default, this method produces
@@ -99,8 +115,8 @@
*/
List<int> encodeUtf32be(String str, [bool writeBOM = false]) {
List<int> utf32CodeUnits = stringToCodepoints(str);
- List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
- (writeBOM ? 4 : 0));
+ List<int> encoding =
+ new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0));
int i = 0;
if (writeBOM) {
encoding[i++] = 0;
@@ -123,8 +139,8 @@
*/
List<int> encodeUtf32le(String str, [bool writeBOM = false]) {
List<int> utf32CodeUnits = stringToCodepoints(str);
- List<int> encoding = new List<int>(4 * utf32CodeUnits.length +
- (writeBOM ? 4 : 0));
+ List<int> encoding =
+ new List<int>(4 * utf32CodeUnits.length + (writeBOM ? 4 : 0));
int i = 0;
if (writeBOM) {
encoding[i++] = UNICODE_UTF_BOM_LO;
@@ -145,8 +161,7 @@
* Identifies whether a List of bytes starts (based on offset) with a
* byte-order marker (BOM).
*/
-bool hasUtf32Bom(
- List<int> utf32EncodedBytes, [int offset = 0, int length]) {
+bool hasUtf32Bom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {
return hasUtf32beBom(utf32EncodedBytes, offset, length) ||
hasUtf32leBom(utf32EncodedBytes, offset, length);
}
@@ -158,7 +173,8 @@
bool hasUtf32beBom(List<int> utf32EncodedBytes, [int offset = 0, int length]) {
int end = length != null ? offset + length : utf32EncodedBytes.length;
return (offset + 4) <= end &&
- utf32EncodedBytes[offset] == 0 && utf32EncodedBytes[offset + 1] == 0 &&
+ utf32EncodedBytes[offset] == 0 &&
+ utf32EncodedBytes[offset + 1] == 0 &&
utf32EncodedBytes[offset + 2] == UNICODE_UTF_BOM_HI &&
utf32EncodedBytes[offset + 3] == UNICODE_UTF_BOM_LO;
}
@@ -172,7 +188,8 @@
return (offset + 4) <= end &&
utf32EncodedBytes[offset] == UNICODE_UTF_BOM_LO &&
utf32EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI &&
- utf32EncodedBytes[offset + 2] == 0 && utf32EncodedBytes[offset + 3] == 0;
+ utf32EncodedBytes[offset + 2] == 0 &&
+ utf32EncodedBytes[offset + 3] == 0;
}
typedef Utf32BytesDecoder Utf32BytesDecoderProvider();
@@ -204,8 +221,9 @@
Utf32BytesDecoder._fromListRangeIterator(
this.utf32EncodedBytesIterator, this.replacementCodepoint);
- factory Utf32BytesDecoder(List<int> utf32EncodedBytes, [
- int offset = 0, int length,
+ factory Utf32BytesDecoder(List<int> utf32EncodedBytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
if (length == null) {
length = utf32EncodedBytes.length - offset;
@@ -217,8 +235,8 @@
return new Utf32leBytesDecoder(utf32EncodedBytes, offset + 4, length - 4,
false, replacementCodepoint);
} else {
- return new Utf32beBytesDecoder(utf32EncodedBytes, offset, length, false,
- replacementCodepoint);
+ return new Utf32beBytesDecoder(
+ utf32EncodedBytes, offset, length, false, replacementCodepoint);
}
}
@@ -243,8 +261,8 @@
if (remaining < 4) {
utf32EncodedBytesIterator.skip(utf32EncodedBytesIterator.remaining);
if (replacementCodepoint != null) {
- _current = replacementCodepoint;
- return true;
+ _current = replacementCodepoint;
+ return true;
} else {
throw new ArgumentError(
"Invalid UTF32 at ${utf32EncodedBytesIterator.position}");
@@ -283,12 +301,14 @@
* to produce the unicode codepoint.
*/
class Utf32beBytesDecoder extends Utf32BytesDecoder {
- Utf32beBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
- int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- super._fromListRangeIterator(
- (new ListRange(utf32EncodedBytes, offset, length)).iterator,
- replacementCodepoint) {
+ Utf32beBytesDecoder(List<int> utf32EncodedBytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : super._fromListRangeIterator(
+ (new ListRange(utf32EncodedBytes, offset, length)).iterator,
+ replacementCodepoint) {
if (stripBom && hasUtf32beBom(utf32EncodedBytes, offset, length)) {
skip();
}
@@ -312,12 +332,14 @@
* to produce the unicode codepoint.
*/
class Utf32leBytesDecoder extends Utf32BytesDecoder {
- Utf32leBytesDecoder(List<int> utf32EncodedBytes, [int offset = 0,
- int length, bool stripBom = true,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- super._fromListRangeIterator(
- (new ListRange(utf32EncodedBytes, offset, length)).iterator,
- replacementCodepoint) {
+ Utf32leBytesDecoder(List<int> utf32EncodedBytes,
+ [int offset = 0,
+ int length,
+ bool stripBom = true,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : super._fromListRangeIterator(
+ (new ListRange(utf32EncodedBytes, offset, length)).iterator,
+ replacementCodepoint) {
if (stripBom && hasUtf32leBom(utf32EncodedBytes, offset, length)) {
skip();
}
@@ -339,5 +361,5 @@
bool _validCodepoint(int codepoint) {
return (codepoint >= 0 && codepoint < UNICODE_UTF16_RESERVED_LO) ||
(codepoint > UNICODE_UTF16_RESERVED_HI &&
- codepoint < UNICODE_VALID_RANGE_MAX);
+ codepoint < UNICODE_VALID_RANGE_MAX);
}
diff --git a/lib/src/utf/utf8.dart b/lib/src/utf8.dart
similarity index 79%
rename from lib/src/utf/utf8.dart
rename to lib/src/utf8.dart
index ff1b1ed..ecf8707 100644
--- a/lib/src/utf/utf8.dart
+++ b/lib/src/utf8.dart
@@ -2,7 +2,13 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of utf;
+library utf.utf8;
+
+import "dart:collection";
+
+import 'constants.dart';
+import 'list_range.dart';
+import 'shared.dart';
const int _UTF8_ONE_BYTE_MAX = 0x7f;
const int _UTF8_TWO_BYTE_MAX = 0x7ff;
@@ -28,7 +34,8 @@
* as much of the input as needed. Set the replacementCharacter to null to
* throw an ArgumentError rather than replace the bad value.
*/
-IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes, [int offset = 0,
+IterableUtf8Decoder decodeUtf8AsIterable(List<int> bytes,
+ [int offset = 0,
int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new IterableUtf8Decoder(bytes, offset, length, replacementCodepoint);
@@ -41,23 +48,24 @@
* Set the replacementCharacter to null to throw an ArgumentError
* rather than replace the bad value.
*/
-String decodeUtf8(List<int> bytes, [int offset = 0, int length,
+String decodeUtf8(List<int> bytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
return new String.fromCharCodes(
(new Utf8Decoder(bytes, offset, length, replacementCodepoint))
- .decodeRest());
+ .decodeRest());
}
/**
* Produce a sequence of UTF-8 encoded bytes from the provided string.
*/
-List<int> encodeUtf8(String str) =>
- codepointsToUtf8(stringToCodepoints(str));
+List<int> encodeUtf8(String str) => codepointsToUtf8(stringToCodepoints(str));
int _addToEncoding(int offset, int bytes, int value, List<int> buffer) {
while (bytes > 0) {
- buffer[offset + bytes] = _UTF8_SUBSEQUENT_BYTE_BASE |
- (value & _UTF8_LO_SIX_BIT_MASK);
+ buffer[offset + bytes] =
+ _UTF8_SUBSEQUENT_BYTE_BASE | (value & _UTF8_LO_SIX_BIT_MASK);
value = value >> 6;
bytes--;
}
@@ -67,8 +75,7 @@
/**
* Encode code points as UTF-8 code units.
*/
-List<int> codepointsToUtf8(
- List<int> codepoints, [int offset = 0, int length]) {
+List<int> codepointsToUtf8(List<int> codepoints, [int offset = 0, int length]) {
ListRange source = new ListRange(codepoints, offset, length);
int encodedLength = 0;
@@ -96,19 +103,19 @@
encoded[insertAt] = value;
insertAt++;
} else if (value <= _UTF8_TWO_BYTE_MAX) {
- encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE | (
- _UTF8_FIRST_BYTE_OF_TWO_MASK &
- _addToEncoding(insertAt, 1, value, encoded));
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_TWO_BASE |
+ (_UTF8_FIRST_BYTE_OF_TWO_MASK &
+ _addToEncoding(insertAt, 1, value, encoded));
insertAt += 2;
} else if (value <= _UTF8_THREE_BYTE_MAX) {
- encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE | (
- _UTF8_FIRST_BYTE_OF_THREE_MASK &
- _addToEncoding(insertAt, 2, value, encoded));
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_THREE_BASE |
+ (_UTF8_FIRST_BYTE_OF_THREE_MASK &
+ _addToEncoding(insertAt, 2, value, encoded));
insertAt += 3;
} else if (value <= UNICODE_VALID_RANGE_MAX) {
- encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE | (
- _UTF8_FIRST_BYTE_OF_FOUR_MASK &
- _addToEncoding(insertAt, 3, value, encoded));
+ encoded[insertAt] = _UTF8_FIRST_BYTE_OF_FOUR_BASE |
+ (_UTF8_FIRST_BYTE_OF_FOUR_MASK &
+ _addToEncoding(insertAt, 3, value, encoded));
insertAt += 4;
}
}
@@ -117,11 +124,12 @@
// Because UTF-8 specifies byte order, we do not have to follow the pattern
// used by UTF-16 & UTF-32 regarding byte order.
-List<int> utf8ToCodepoints(
- List<int> utf8EncodedBytes, [int offset = 0, int length,
+List<int> utf8ToCodepoints(List<int> utf8EncodedBytes,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
- return new Utf8Decoder(utf8EncodedBytes, offset, length,
- replacementCodepoint).decodeRest();
+ return new Utf8Decoder(utf8EncodedBytes, offset, length, replacementCodepoint)
+ .decodeRest();
}
/**
@@ -137,7 +145,9 @@
final int length;
final int replacementCodepoint;
- IterableUtf8Decoder(this.bytes, [this.offset = 0, this.length = null,
+ IterableUtf8Decoder(this.bytes,
+ [this.offset = 0,
+ this.length = null,
this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
Utf8Decoder get iterator =>
@@ -158,17 +168,16 @@
final int replacementCodepoint;
int _current = null;
- Utf8Decoder(List<int> utf8EncodedBytes, [int offset = 0, int length,
- this.replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- utf8EncodedBytesIterator =
- (new ListRange(utf8EncodedBytes, offset, length)).iterator;
+ Utf8Decoder(List<int> utf8EncodedBytes,
+ [int offset = 0,
+ int length,
+ this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : utf8EncodedBytesIterator =
+ (new ListRange(utf8EncodedBytes, offset, length)).iterator;
-
- Utf8Decoder._fromListRangeIterator(ListRange source, [
- this.replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- utf8EncodedBytesIterator = source.iterator;
+ Utf8Decoder._fromListRangeIterator(ListRange source,
+ [this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : utf8EncodedBytesIterator = source.iterator;
/** Decode the remaininder of the characters in this decoder
* into a [List<int>].
@@ -254,11 +263,10 @@
}
j++;
}
- bool validSequence = (j == additionalBytes && (
- value < UNICODE_UTF16_RESERVED_LO ||
- value > UNICODE_UTF16_RESERVED_HI));
- bool nonOverlong =
- (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
+ bool validSequence = (j == additionalBytes &&
+ (value < UNICODE_UTF16_RESERVED_LO ||
+ value > UNICODE_UTF16_RESERVED_HI));
+ bool nonOverlong = (additionalBytes == 1 && value > _UTF8_ONE_BYTE_MAX) ||
(additionalBytes == 2 && value > _UTF8_TWO_BYTE_MAX) ||
(additionalBytes == 3 && value > _UTF8_THREE_BYTE_MAX);
bool inRange = value <= UNICODE_VALID_RANGE_MAX;
diff --git a/lib/src/utf_16_code_unit_decoder.dart b/lib/src/utf_16_code_unit_decoder.dart
index a0a4b3c..7d9e98f 100644
--- a/lib/src/utf_16_code_unit_decoder.dart
+++ b/lib/src/utf_16_code_unit_decoder.dart
@@ -19,11 +19,12 @@
final int replacementCodepoint;
int _current = null;
- Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length,
- int this.replacementCodepoint =
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) :
- utf16CodeUnitIterator =
- (new ListRange(utf16CodeUnits, offset, length)).iterator;
+ Utf16CodeUnitDecoder(List<int> utf16CodeUnits,
+ [int offset = 0,
+ int length,
+ int this.replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
+ : utf16CodeUnitIterator =
+ (new ListRange(utf16CodeUnits, offset, length)).iterator;
Utf16CodeUnitDecoder.fromListRangeIterator(
ListRangeIterator this.utf16CodeUnitIterator,
@@ -61,7 +62,7 @@
_current = value;
} else {
if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE &&
- nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
+ nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) {
utf16CodeUnitIterator.backup();
}
if (replacementCodepoint != null) {
@@ -80,4 +81,3 @@
return true;
}
}
-
diff --git a/lib/src/utf/utf_stream.dart b/lib/src/utf_stream.dart
similarity index 84%
rename from lib/src/utf/utf_stream.dart
rename to lib/src/utf_stream.dart
index 0936616..83e442f 100644
--- a/lib/src/utf/utf_stream.dart
+++ b/lib/src/utf_stream.dart
@@ -2,7 +2,12 @@
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
-part of utf;
+library utf.utf_stream;
+
+import 'dart:async';
+
+import 'constants.dart';
+import 'util.dart';
// TODO(floitsch): make this transformer reusable.
abstract class _StringDecoder
@@ -16,15 +21,14 @@
_StringDecoder(int this._replacementChar);
Stream<String> bind(Stream<List<int>> stream) {
- return new Stream.eventTransformed(
- stream,
+ return new Stream<String>.eventTransformed(stream,
(EventSink<String> sink) {
- if (_outSink != null) {
- throw new StateError("String decoder already used");
- }
- _outSink = sink;
- return this;
- });
+ if (_outSink != null) {
+ throw new StateError("String decoder already used");
+ }
+ _outSink = sink;
+ return this;
+ });
}
void add(List<int> bytes) {
@@ -117,31 +121,36 @@
class Utf8DecoderTransformer extends _StringDecoder {
Utf8DecoderTransformer(
[int replacementChar = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT])
- : super(replacementChar);
+ : super(replacementChar);
int _processBytes(int getNext()) {
int value = getNext();
- if ((value & 0xFF) != value) return -1; // Not a byte.
+ if ((value & 0xFF) != value) return -1; // Not a byte.
if ((value & 0x80) == 0x80) {
int additionalBytes;
int min;
- if ((value & 0xe0) == 0xc0) { // 110xxxxx
+ if ((value & 0xe0) == 0xc0) {
+ // 110xxxxx
value = value & 0x1F;
additionalBytes = 1;
min = 0x80;
- } else if ((value & 0xf0) == 0xe0) { // 1110xxxx
+ } else if ((value & 0xf0) == 0xe0) {
+ // 1110xxxx
value = value & 0x0F;
additionalBytes = 2;
min = 0x800;
- } else if ((value & 0xf8) == 0xf0) { // 11110xxx
+ } else if ((value & 0xf8) == 0xf0) {
+ // 11110xxx
value = value & 0x07;
additionalBytes = 3;
min = 0x10000;
- } else if ((value & 0xfc) == 0xf8) { // 111110xx
+ } else if ((value & 0xfc) == 0xf8) {
+ // 111110xx
value = value & 0x03;
additionalBytes = 4;
min = 0x200000;
- } else if ((value & 0xfe) == 0xfc) { // 1111110x
+ } else if ((value & 0xfe) == 0xfc) {
+ // 1111110x
value = value & 0x01;
additionalBytes = 5;
min = 0x4000000;
@@ -150,7 +159,7 @@
}
for (int i = 0; i < additionalBytes; i++) {
int next = getNext();
- if (next == null) return 0; // Not enough chars, reset.
+ if (next == null) return 0; // Not enough chars, reset.
if ((next & 0xc0) != 0x80 || (next & 0xff) != next) return -1;
value = value << 6 | (next & 0x3f);
if (additionalBytes >= 3 && i == 0 && value << 12 > 0x10FFFF) {
@@ -167,22 +176,19 @@
}
}
-
abstract class _StringEncoder
implements StreamTransformer<String, List<int>>, EventSink<String> {
-
EventSink<List<int>> _outSink;
Stream<List<int>> bind(Stream<String> stream) {
- return new Stream.eventTransformed(
- stream,
+ return new Stream<List<int>>.eventTransformed(stream,
(EventSink<List<int>> sink) {
- if (_outSink != null) {
- throw new StateError("String encoder already used");
- }
- _outSink = sink;
- return this;
- });
+ if (_outSink != null) {
+ throw new StateError("String encoder already used");
+ }
+ _outSink = sink;
+ return this;
+ });
}
void add(String data) {
@@ -193,7 +199,9 @@
_outSink.addError(error, stackTrace);
}
- void close() { _outSink.close(); }
+ void close() {
+ _outSink.close();
+ }
List<int> _processString(String string);
}
@@ -204,7 +212,6 @@
class Utf8EncoderTransformer extends _StringEncoder {
List<int> _processString(String string) {
var bytes = <int>[];
- int pos = 0;
List<int> codepoints = utf16CodeUnitsToCodepoints(string.codeUnits);
int length = codepoints.length;
for (int i = 0; i < length; i++) {
@@ -219,7 +226,7 @@
additionalBytes = 1;
} else if (charCode <= 0xFFFF) {
// 1110xxxx (xxxx is top 4 bits)
- bytes.add(((charCode >> 12) & 0x0F)| 0xE0);
+ bytes.add(((charCode >> 12) & 0x0F) | 0xE0);
additionalBytes = 2;
} else {
// 11110xxx (xxx is top 3 bits)
@@ -230,7 +237,6 @@
// 10xxxxxx (xxxxxx is next 6 bits from the top).
bytes.add(((charCode >> (6 * (i - 1))) & 0x3F) | 0x80);
}
- pos += additionalBytes + 1;
}
return bytes;
}
diff --git a/lib/src/util.dart b/lib/src/util.dart
index 17427d5..f94f576 100644
--- a/lib/src/util.dart
+++ b/lib/src/util.dart
@@ -11,13 +11,14 @@
/**
* Decodes the utf16 codeunits to codepoints.
*/
-List<int> utf16CodeUnitsToCodepoints(
- List<int> utf16CodeUnits, [int offset = 0, int length,
+List<int> utf16CodeUnitsToCodepoints(List<int> utf16CodeUnits,
+ [int offset = 0,
+ int length,
int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
ListRangeIterator source =
(new ListRange(utf16CodeUnits, offset, length)).iterator;
- Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder
- .fromListRangeIterator(source, replacementCodepoint);
+ Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder.fromListRangeIterator(
+ source, replacementCodepoint);
List<int> codepoints = new List<int>(source.remaining);
int i = 0;
while (decoder.moveNext()) {
@@ -35,12 +36,10 @@
/**
* Encode code points as UTF16 code units.
*/
-List<int> codepointsToUtf16CodeUnits(
- List<int> codepoints,
+List<int> codepointsToUtf16CodeUnits(List<int> codepoints,
[int offset = 0,
- int length,
- int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
-
+ int length,
+ int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) {
ListRange listRange = new ListRange(codepoints, offset, length);
int encodedLength = 0;
for (int value in listRange) {
@@ -66,8 +65,8 @@
int base = value - UNICODE_UTF16_OFFSET;
codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE +
((base & UNICODE_UTF16_HI_MASK) >> 10);
- codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE +
- (base & UNICODE_UTF16_LO_MASK);
+ codeUnitsBuffer[j++] =
+ UNICODE_UTF16_SURROGATE_UNIT_1_BASE + (base & UNICODE_UTF16_LO_MASK);
} else if (replacementCodepoint != null) {
codeUnitsBuffer[j++] = replacementCodepoint;
} else {
diff --git a/lib/utf.dart b/lib/utf.dart
index 30d5db5..40ffcfe 100644
--- a/lib/utf.dart
+++ b/lib/utf.dart
@@ -8,18 +8,10 @@
*/
library utf;
-import "dart:async";
-import "dart:collection";
-
-import "src/constants.dart";
-import 'src/utf_16_code_unit_decoder.dart';
-import 'src/list_range.dart';
-import 'src/util.dart';
-
export 'src/constants.dart';
+export 'src/shared.dart';
export 'src/utf_16_code_unit_decoder.dart';
-
-part "src/utf/utf_stream.dart";
-part "src/utf/utf8.dart";
-part "src/utf/utf16.dart";
-part "src/utf/utf32.dart";
+export 'src/utf_stream.dart';
+export 'src/utf16.dart';
+export 'src/utf32.dart';
+export 'src/utf8.dart';
diff --git a/pubspec.yaml b/pubspec.yaml
index 1f91eac..0a37979 100644
--- a/pubspec.yaml
+++ b/pubspec.yaml
@@ -1,5 +1,5 @@
name: utf
-version: 0.9.0+2
+version: 0.9.1-dev
author: Dart Team <misc@dartlang.org>
description: >
A Unicode library. Intended for advanced use where the built-in facilities
@@ -7,3 +7,5 @@
homepage: https://www.github.com/dart-lang/utf
environment:
sdk: '>=1.0.0 <2.0.0'
+dev_dependencies:
+ test: ^0.12.0
diff --git a/test/expect.dart b/test/expect.dart
new file mode 100644
index 0000000..6945497
--- /dev/null
+++ b/test/expect.dart
@@ -0,0 +1,19 @@
+import 'package:test/test.dart' as ut;
+
+class Expect {
+ static void listEquals(a, b, [String message]) {
+ ut.expect(b, ut.orderedEquals(a), reason: message);
+ }
+
+ static void equals(a, b) {
+ ut.expect(b, ut.equals(a));
+ }
+
+ static void stringEquals(String a, String b, [String message]) {
+ ut.expect(b, ut.equals(a), reason: message);
+ }
+
+ static void isFalse(value) {
+ ut.expect(value, ut.isFalse);
+ }
+}
diff --git a/test/unicode_core_test.dart b/test/unicode_core_test.dart
index 6e13e96..0ee13f0 100755
--- a/test/unicode_core_test.dart
+++ b/test/unicode_core_test.dart
@@ -4,32 +4,30 @@
library utf.unicode_core_test;
-import 'package:expect/expect.dart';
-
+import 'package:test/test.dart';
import 'package:utf/utf.dart';
import 'package:utf/src/util.dart';
+import 'expect.dart';
+
void main() {
- testCodepointsToUtf16CodeUnits();
- testUtf16bytesToCodepoints();
+ test('codepoints to utf16 codepoints', testCodepointsToUtf16CodeUnits);
+ test('utf16 bytes to codepoints', testUtf16bytesToCodepoints);
}
void testCodepointsToUtf16CodeUnits() {
// boundary conditions
Expect.listEquals([], codepointsToUtf16CodeUnits([]), "no input");
Expect.listEquals([0x0], codepointsToUtf16CodeUnits([0x0]), "0");
- Expect.listEquals([0xd800, 0xdc00],
- codepointsToUtf16CodeUnits([0x10000]), "10000");
+ Expect.listEquals(
+ [0xd800, 0xdc00], codepointsToUtf16CodeUnits([0x10000]), "10000");
- Expect.listEquals([0xffff],
- codepointsToUtf16CodeUnits([0xffff]), "ffff");
- Expect.listEquals([0xdbff, 0xdfff],
- codepointsToUtf16CodeUnits([0x10ffff]), "10ffff");
+ Expect.listEquals([0xffff], codepointsToUtf16CodeUnits([0xffff]), "ffff");
+ Expect.listEquals(
+ [0xdbff, 0xdfff], codepointsToUtf16CodeUnits([0x10ffff]), "10ffff");
- Expect.listEquals([0xd7ff],
- codepointsToUtf16CodeUnits([0xd7ff]), "d7ff");
- Expect.listEquals([0xe000],
- codepointsToUtf16CodeUnits([0xe000]), "e000");
+ Expect.listEquals([0xd7ff], codepointsToUtf16CodeUnits([0xd7ff]), "d7ff");
+ Expect.listEquals([0xe000], codepointsToUtf16CodeUnits([0xe000]), "e000");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
codepointsToUtf16CodeUnits([0xd800]), "d800");
@@ -41,52 +39,49 @@
// boundary conditions: First possible values
Expect.listEquals([], utf16CodeUnitsToCodepoints([]), "no input");
Expect.listEquals([0x0], utf16CodeUnitsToCodepoints([0x0]), "0");
- Expect.listEquals([0x10000],
- utf16CodeUnitsToCodepoints([0xd800, 0xdc00]), "10000");
+ Expect.listEquals(
+ [0x10000], utf16CodeUnitsToCodepoints([0xd800, 0xdc00]), "10000");
// boundary conditions: Last possible sequence of a certain length
- Expect.listEquals([0xffff],
- utf16CodeUnitsToCodepoints([0xffff]), "ffff");
- Expect.listEquals([0x10ffff],
- utf16CodeUnitsToCodepoints([0xdbff, 0xdfff]), "10ffff");
+ Expect.listEquals([0xffff], utf16CodeUnitsToCodepoints([0xffff]), "ffff");
+ Expect.listEquals(
+ [0x10ffff], utf16CodeUnitsToCodepoints([0xdbff, 0xdfff]), "10ffff");
// other boundary conditions
- Expect.listEquals([0xd7ff],
- utf16CodeUnitsToCodepoints([0xd7ff]), "d7ff");
- Expect.listEquals([0xe000],
- utf16CodeUnitsToCodepoints([0xe000]), "e000");
+ Expect.listEquals([0xd7ff], utf16CodeUnitsToCodepoints([0xd7ff]), "d7ff");
+ Expect.listEquals([0xe000], utf16CodeUnitsToCodepoints([0xe000]), "e000");
// unexpected continuation bytes
- Expect.listEquals([0xfffd],
- utf16CodeUnitsToCodepoints([0xdc00]),
+ Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0xdc00]),
"dc00 first unexpected continuation byte");
- Expect.listEquals([0xfffd],
- utf16CodeUnitsToCodepoints([0xdfff]),
+ Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0xdfff]),
"dfff last unexpected continuation byte");
- Expect.listEquals([0xfffd],
- utf16CodeUnitsToCodepoints([0xdc00]),
+ Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0xdc00]),
"1 unexpected continuation bytes");
- Expect.listEquals([0xfffd, 0xfffd],
+ Expect.listEquals(
+ [0xfffd, 0xfffd],
utf16CodeUnitsToCodepoints([0xdc00, 0xdc00]),
"2 unexpected continuation bytes");
- Expect.listEquals([0xfffd, 0xfffd ,0xfffd],
+ Expect.listEquals(
+ [0xfffd, 0xfffd, 0xfffd],
utf16CodeUnitsToCodepoints([0xdc00, 0xdc00, 0xdc00]),
"3 unexpected continuation bytes");
// incomplete sequences
- Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0xd800]),
- "d800 last byte missing");
- Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0xdbff]),
- "dbff last byte missing");
+ Expect.listEquals(
+ [0xfffd], utf16CodeUnitsToCodepoints([0xd800]), "d800 last byte missing");
+ Expect.listEquals(
+ [0xfffd], utf16CodeUnitsToCodepoints([0xdbff]), "dbff last byte missing");
// concatenation of incomplete sequences
- Expect.listEquals([0xfffd, 0xfffd],
+ Expect.listEquals(
+ [0xfffd, 0xfffd],
utf16CodeUnitsToCodepoints([0xd800, 0xdbff]),
"d800 dbff last byte missing");
// impossible bytes
- Expect.listEquals([0xfffd], utf16CodeUnitsToCodepoints([0x110000]),
- "110000 out of bounds");
+ Expect.listEquals(
+ [0xfffd], utf16CodeUnitsToCodepoints([0x110000]), "110000 out of bounds");
// overlong sequences not possible in utf16 (nothing < x10000)
// illegal code positions d800-dfff not encodable (< x10000)
diff --git a/test/utf16_test.dart b/test/utf16_test.dart
index 43971ca..cf4a7c8 100755
--- a/test/utf16_test.dart
+++ b/test/utf16_test.dart
@@ -4,9 +4,11 @@
library utf.utf16_test;
-import 'package:expect/expect.dart';
+import 'package:test/test.dart';
import 'package:utf/utf.dart';
+import 'expect.dart';
+
const String testKoreanCharSubset = """
가각갂갃간갅갆갇갈갉갊갋갌갍갎갏감갑값갓갔강갖갗갘같갚갛
개객갞갟갠갡갢갣갤갥갦갧갨갩갪갫갬갭갮갯갰갱갲갳갴갵갶갷
@@ -14,90 +16,89 @@
const String testHanWater = "水";
-const List<int> testKoreanCharSubsetUtf16beBom = const<int>[
- 0xfe, 0xff, 0xac, 0x00, 0xac, 0x01, 0xac, 0x02,
- 0xac, 0x03, 0xac, 0x04, 0xac, 0x05, 0xac, 0x06,
- 0xac, 0x07, 0xac, 0x08, 0xac, 0x09, 0xac, 0x0a,
- 0xac, 0x0b, 0xac, 0x0c, 0xac, 0x0d, 0xac, 0x0e,
- 0xac, 0x0f, 0xac, 0x10, 0xac, 0x11, 0xac, 0x12,
- 0xac, 0x13, 0xac, 0x14, 0xac, 0x15, 0xac, 0x16,
- 0xac, 0x17, 0xac, 0x18, 0xac, 0x19, 0xac, 0x1a,
- 0xac, 0x1b, 0x00, 0x0a, 0xac, 0x1c, 0xac, 0x1d,
- 0xac, 0x1e, 0xac, 0x1f, 0xac, 0x20, 0xac, 0x21,
- 0xac, 0x22, 0xac, 0x23, 0xac, 0x24, 0xac, 0x25,
- 0xac, 0x26, 0xac, 0x27, 0xac, 0x28, 0xac, 0x29,
- 0xac, 0x2a, 0xac, 0x2b, 0xac, 0x2c, 0xac, 0x2d,
- 0xac, 0x2e, 0xac, 0x2f, 0xac, 0x30, 0xac, 0x31,
- 0xac, 0x32, 0xac, 0x33, 0xac, 0x34, 0xac, 0x35,
- 0xac, 0x36, 0xac, 0x37, 0x00, 0x0a, 0xac, 0x38,
- 0xac, 0x39, 0xac, 0x3a, 0xac, 0x3b, 0xac, 0x3c,
- 0xac, 0x3d, 0xac, 0x3e, 0xac, 0x3f, 0xac, 0x40,
- 0xac, 0x41, 0xac, 0x42, 0xac, 0x43, 0xac, 0x44,
- 0xac, 0x45, 0xac, 0x46, 0xac, 0x47, 0xac, 0x48,
- 0xac, 0x49, 0xac, 0x4a, 0xac, 0x4b, 0xac, 0x4c,
- 0xac, 0x4d, 0xac, 0x4e, 0xac, 0x4f, 0xac, 0x50,
- 0xac, 0x51, 0xac, 0x52, 0xac, 0x53];
+const List<int> testKoreanCharSubsetUtf16beBom = const <int>[
+ 0xfe, 0xff, 0xac, 0x00, 0xac, 0x01, 0xac, 0x02, // 8
+ 0xac, 0x03, 0xac, 0x04, 0xac, 0x05, 0xac, 0x06,
+ 0xac, 0x07, 0xac, 0x08, 0xac, 0x09, 0xac, 0x0a,
+ 0xac, 0x0b, 0xac, 0x0c, 0xac, 0x0d, 0xac, 0x0e,
+ 0xac, 0x0f, 0xac, 0x10, 0xac, 0x11, 0xac, 0x12,
+ 0xac, 0x13, 0xac, 0x14, 0xac, 0x15, 0xac, 0x16,
+ 0xac, 0x17, 0xac, 0x18, 0xac, 0x19, 0xac, 0x1a,
+ 0xac, 0x1b, 0x00, 0x0a, 0xac, 0x1c, 0xac, 0x1d,
+ 0xac, 0x1e, 0xac, 0x1f, 0xac, 0x20, 0xac, 0x21,
+ 0xac, 0x22, 0xac, 0x23, 0xac, 0x24, 0xac, 0x25,
+ 0xac, 0x26, 0xac, 0x27, 0xac, 0x28, 0xac, 0x29,
+ 0xac, 0x2a, 0xac, 0x2b, 0xac, 0x2c, 0xac, 0x2d,
+ 0xac, 0x2e, 0xac, 0x2f, 0xac, 0x30, 0xac, 0x31,
+ 0xac, 0x32, 0xac, 0x33, 0xac, 0x34, 0xac, 0x35,
+ 0xac, 0x36, 0xac, 0x37, 0x00, 0x0a, 0xac, 0x38,
+ 0xac, 0x39, 0xac, 0x3a, 0xac, 0x3b, 0xac, 0x3c,
+ 0xac, 0x3d, 0xac, 0x3e, 0xac, 0x3f, 0xac, 0x40,
+ 0xac, 0x41, 0xac, 0x42, 0xac, 0x43, 0xac, 0x44,
+ 0xac, 0x45, 0xac, 0x46, 0xac, 0x47, 0xac, 0x48,
+ 0xac, 0x49, 0xac, 0x4a, 0xac, 0x4b, 0xac, 0x4c,
+ 0xac, 0x4d, 0xac, 0x4e, 0xac, 0x4f, 0xac, 0x50,
+ 0xac, 0x51, 0xac, 0x52, 0xac, 0x53
+];
-const List<int> testKoreanCharSubsetUtf16le = const<int> [
- 0x00, 0xac, 0x01, 0xac, 0x02, 0xac, 0x03, 0xac,
- 0x04, 0xac, 0x05, 0xac, 0x06, 0xac, 0x07, 0xac,
- 0x08, 0xac, 0x09, 0xac, 0x0a, 0xac, 0x0b, 0xac,
- 0x0c, 0xac, 0x0d, 0xac, 0x0e, 0xac, 0x0f, 0xac,
- 0x10, 0xac, 0x11, 0xac, 0x12, 0xac, 0x13, 0xac,
- 0x14, 0xac, 0x15, 0xac, 0x16, 0xac, 0x17, 0xac,
- 0x18, 0xac, 0x19, 0xac, 0x1a, 0xac, 0x1b, 0xac,
- 0x0a, 0x00, 0x1c, 0xac, 0x1d, 0xac, 0x1e, 0xac,
- 0x1f, 0xac, 0x20, 0xac, 0x21, 0xac, 0x22, 0xac,
- 0x23, 0xac, 0x24, 0xac, 0x25, 0xac, 0x26, 0xac,
- 0x27, 0xac, 0x28, 0xac, 0x29, 0xac, 0x2a, 0xac,
- 0x2b, 0xac, 0x2c, 0xac, 0x2d, 0xac, 0x2e, 0xac,
- 0x2f, 0xac, 0x30, 0xac, 0x31, 0xac, 0x32, 0xac,
- 0x33, 0xac, 0x34, 0xac, 0x35, 0xac, 0x36, 0xac,
- 0x37, 0xac, 0x0a, 0x00, 0x38, 0xac, 0x39, 0xac,
- 0x3a, 0xac, 0x3b, 0xac, 0x3c, 0xac, 0x3d, 0xac,
- 0x3e, 0xac, 0x3f, 0xac, 0x40, 0xac, 0x41, 0xac,
- 0x42, 0xac, 0x43, 0xac, 0x44, 0xac, 0x45, 0xac,
- 0x46, 0xac, 0x47, 0xac, 0x48, 0xac, 0x49, 0xac,
- 0x4a, 0xac, 0x4b, 0xac, 0x4c, 0xac, 0x4d, 0xac,
- 0x4e, 0xac, 0x4f, 0xac, 0x50, 0xac, 0x51, 0xac,
- 0x52, 0xac, 0x53, 0xac];
+const List<int> testKoreanCharSubsetUtf16le = const <int>[
+ 0x00, 0xac, 0x01, 0xac, 0x02, 0xac, 0x03, 0xac, // 8
+ 0x04, 0xac, 0x05, 0xac, 0x06, 0xac, 0x07, 0xac,
+ 0x08, 0xac, 0x09, 0xac, 0x0a, 0xac, 0x0b, 0xac,
+ 0x0c, 0xac, 0x0d, 0xac, 0x0e, 0xac, 0x0f, 0xac,
+ 0x10, 0xac, 0x11, 0xac, 0x12, 0xac, 0x13, 0xac,
+ 0x14, 0xac, 0x15, 0xac, 0x16, 0xac, 0x17, 0xac,
+ 0x18, 0xac, 0x19, 0xac, 0x1a, 0xac, 0x1b, 0xac,
+ 0x0a, 0x00, 0x1c, 0xac, 0x1d, 0xac, 0x1e, 0xac,
+ 0x1f, 0xac, 0x20, 0xac, 0x21, 0xac, 0x22, 0xac,
+ 0x23, 0xac, 0x24, 0xac, 0x25, 0xac, 0x26, 0xac,
+ 0x27, 0xac, 0x28, 0xac, 0x29, 0xac, 0x2a, 0xac,
+ 0x2b, 0xac, 0x2c, 0xac, 0x2d, 0xac, 0x2e, 0xac,
+ 0x2f, 0xac, 0x30, 0xac, 0x31, 0xac, 0x32, 0xac,
+ 0x33, 0xac, 0x34, 0xac, 0x35, 0xac, 0x36, 0xac,
+ 0x37, 0xac, 0x0a, 0x00, 0x38, 0xac, 0x39, 0xac,
+ 0x3a, 0xac, 0x3b, 0xac, 0x3c, 0xac, 0x3d, 0xac,
+ 0x3e, 0xac, 0x3f, 0xac, 0x40, 0xac, 0x41, 0xac,
+ 0x42, 0xac, 0x43, 0xac, 0x44, 0xac, 0x45, 0xac,
+ 0x46, 0xac, 0x47, 0xac, 0x48, 0xac, 0x49, 0xac,
+ 0x4a, 0xac, 0x4b, 0xac, 0x4c, 0xac, 0x4d, 0xac,
+ 0x4e, 0xac, 0x4f, 0xac, 0x50, 0xac, 0x51, 0xac,
+ 0x52, 0xac, 0x53, 0xac
+];
void main() {
- testEncodeToUtf16();
- testUtf16BytesToString();
- testIterableMethods();
+ test('encode to utf16', testEncodeToUtf16);
+ test('utf16 bytes to string', testUtf16BytesToString);
+ test('iterable methods', testIterableMethods);
}
void testEncodeToUtf16() {
- Expect.listEquals([], encodeUtf16be("")); // TODO(dcarlson) should we skip bom if empty?
+ Expect.listEquals(
+ [], encodeUtf16be("")); // TODO(dcarlson) should we skip bom if empty?
Expect.listEquals(testKoreanCharSubsetUtf16beBom,
- encodeUtf16(testKoreanCharSubset),
- "encode UTF-16(BE by default) Korean");
+ encodeUtf16(testKoreanCharSubset), "encode UTF-16(BE by default) Korean");
Expect.listEquals(testKoreanCharSubsetUtf16le,
- encodeUtf16le(testKoreanCharSubset),
- "encode UTF-16LE Korean");
+ encodeUtf16le(testKoreanCharSubset), "encode UTF-16LE Korean");
}
void testUtf16BytesToString() {
Expect.stringEquals("", decodeUtf16([]));
- Expect.stringEquals(testHanWater, decodeUtf16([0x6C, 0x34]),
- "Water variation 1");
- Expect.stringEquals(testHanWater, decodeUtf16([0xFE, 0xFF, 0x6C, 0x34]),
- "Water variation 2");
- Expect.stringEquals(testHanWater, decodeUtf16([0xFF, 0xFE, 0x34, 0x6C]),
- "Water variation 3");
+ Expect.stringEquals(
+ testHanWater, decodeUtf16([0x6C, 0x34]), "Water variation 1");
+ Expect.stringEquals(
+ testHanWater, decodeUtf16([0xFE, 0xFF, 0x6C, 0x34]), "Water variation 2");
+ Expect.stringEquals(
+ testHanWater, decodeUtf16([0xFF, 0xFE, 0x34, 0x6C]), "Water variation 3");
- Expect.stringEquals(testHanWater, decodeUtf16be([0x6C, 0x34]),
- "Water variation 4");
- Expect.stringEquals(testHanWater,
- decodeUtf16be([0xFE, 0xFF, 0x6C, 0x34]),
+ Expect.stringEquals(
+ testHanWater, decodeUtf16be([0x6C, 0x34]), "Water variation 4");
+ Expect.stringEquals(testHanWater, decodeUtf16be([0xFE, 0xFF, 0x6C, 0x34]),
"Water variation 5");
- Expect.stringEquals(testHanWater, decodeUtf16le([0x34, 0x6C]),
- "Water variation 6");
- Expect.stringEquals(testHanWater,
- decodeUtf16le([0xFF, 0xFE, 0x34, 0x6C]),
+ Expect.stringEquals(
+ testHanWater, decodeUtf16le([0x34, 0x6C]), "Water variation 6");
+ Expect.stringEquals(testHanWater, decodeUtf16le([0xFF, 0xFE, 0x34, 0x6C]),
"Water variation 7");
Expect.stringEquals(testKoreanCharSubset,
@@ -109,7 +110,7 @@
Expect.isFalse(decodeUtf16AsIterable([]).iterator.moveNext());
IterableUtf16Decoder koreanDecoder =
- decodeUtf16AsIterable(testKoreanCharSubsetUtf16beBom);
+ decodeUtf16AsIterable(testKoreanCharSubsetUtf16beBom);
// get the first character
Expect.equals(testKoreanCharSubset.codeUnits[0], koreanDecoder.first);
// get the whole translation using the Iterable interface
@@ -117,12 +118,17 @@
new String.fromCharCodes(new List<int>.from(koreanDecoder)));
// specify types
- Expect.equals(44032, (new List<int>
- .from(decodeUtf16beAsIterable(testKoreanCharSubsetUtf16beBom)))[0]);
- Expect.equals(44032, (new List<int>
- .from(decodeUtf16leAsIterable(testKoreanCharSubsetUtf16le)))[0]);
+ Expect.equals(
+ 44032,
+ (new List<int>.from(
+ decodeUtf16beAsIterable(testKoreanCharSubsetUtf16beBom)))[0]);
+ Expect.equals(
+ 44032,
+ (new List<int>.from(
+ decodeUtf16leAsIterable(testKoreanCharSubsetUtf16le)))[0]);
bool stripBom = false;
- Expect.equals(UNICODE_BOM, (new List<int>
- .from(decodeUtf16beAsIterable(testKoreanCharSubsetUtf16beBom,
- 0, null, stripBom)))[0]);
+ Expect.equals(
+ UNICODE_BOM,
+ (new List<int>.from(decodeUtf16beAsIterable(
+ testKoreanCharSubsetUtf16beBom, 0, null, stripBom)))[0]);
}
diff --git a/test/utf32_test.dart b/test/utf32_test.dart
index 1a60a6f..0da0ecf 100755
--- a/test/utf32_test.dart
+++ b/test/utf32_test.dart
@@ -4,9 +4,11 @@
library utf.utf32_test;
-import 'package:expect/expect.dart';
+import 'package:test/test.dart';
import 'package:utf/utf.dart';
+import 'expect.dart';
+
const String testKoreanCharSubset = """
가각갂갃간갅갆갇갈갉갊갋갌갍갎갏감갑값갓갔강갖갗갘같갚갛
개객갞갟갠갡갢갣갤갥갦갧갨갩갪갫갬갭갮갯갰갱갲갳갴갵갶갷
@@ -14,109 +16,112 @@
const String testHanTwice = "二";
-const List<int> testKoreanCharSubsetUtf32beBom = const<int>[
- 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0xac, 0x00,
- 0x00, 0x00, 0xac, 0x01, 0x00, 0x00, 0xac, 0x02,
- 0x00, 0x00, 0xac, 0x03, 0x00, 0x00, 0xac, 0x04,
- 0x00, 0x00, 0xac, 0x05, 0x00, 0x00, 0xac, 0x06,
- 0x00, 0x00, 0xac, 0x07, 0x00, 0x00, 0xac, 0x08,
- 0x00, 0x00, 0xac, 0x09, 0x00, 0x00, 0xac, 0x0a,
- 0x00, 0x00, 0xac, 0x0b, 0x00, 0x00, 0xac, 0x0c,
- 0x00, 0x00, 0xac, 0x0d, 0x00, 0x00, 0xac, 0x0e,
- 0x00, 0x00, 0xac, 0x0f, 0x00, 0x00, 0xac, 0x10,
- 0x00, 0x00, 0xac, 0x11, 0x00, 0x00, 0xac, 0x12,
- 0x00, 0x00, 0xac, 0x13, 0x00, 0x00, 0xac, 0x14,
- 0x00, 0x00, 0xac, 0x15, 0x00, 0x00, 0xac, 0x16,
- 0x00, 0x00, 0xac, 0x17, 0x00, 0x00, 0xac, 0x18,
- 0x00, 0x00, 0xac, 0x19, 0x00, 0x00, 0xac, 0x1a,
- 0x00, 0x00, 0xac, 0x1b, 0x00, 0x00, 0x00, 0x0a,
- 0x00, 0x00, 0xac, 0x1c, 0x00, 0x00, 0xac, 0x1d,
- 0x00, 0x00, 0xac, 0x1e, 0x00, 0x00, 0xac, 0x1f,
- 0x00, 0x00, 0xac, 0x20, 0x00, 0x00, 0xac, 0x21,
- 0x00, 0x00, 0xac, 0x22, 0x00, 0x00, 0xac, 0x23,
- 0x00, 0x00, 0xac, 0x24, 0x00, 0x00, 0xac, 0x25,
- 0x00, 0x00, 0xac, 0x26, 0x00, 0x00, 0xac, 0x27,
- 0x00, 0x00, 0xac, 0x28, 0x00, 0x00, 0xac, 0x29,
- 0x00, 0x00, 0xac, 0x2a, 0x00, 0x00, 0xac, 0x2b,
- 0x00, 0x00, 0xac, 0x2c, 0x00, 0x00, 0xac, 0x2d,
- 0x00, 0x00, 0xac, 0x2e, 0x00, 0x00, 0xac, 0x2f,
- 0x00, 0x00, 0xac, 0x30, 0x00, 0x00, 0xac, 0x31,
- 0x00, 0x00, 0xac, 0x32, 0x00, 0x00, 0xac, 0x33,
- 0x00, 0x00, 0xac, 0x34, 0x00, 0x00, 0xac, 0x35,
- 0x00, 0x00, 0xac, 0x36, 0x00, 0x00, 0xac, 0x37,
- 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0xac, 0x38,
- 0x00, 0x00, 0xac, 0x39, 0x00, 0x00, 0xac, 0x3a,
- 0x00, 0x00, 0xac, 0x3b, 0x00, 0x00, 0xac, 0x3c,
- 0x00, 0x00, 0xac, 0x3d, 0x00, 0x00, 0xac, 0x3e,
- 0x00, 0x00, 0xac, 0x3f, 0x00, 0x00, 0xac, 0x40,
- 0x00, 0x00, 0xac, 0x41, 0x00, 0x00, 0xac, 0x42,
- 0x00, 0x00, 0xac, 0x43, 0x00, 0x00, 0xac, 0x44,
- 0x00, 0x00, 0xac, 0x45, 0x00, 0x00, 0xac, 0x46,
- 0x00, 0x00, 0xac, 0x47, 0x00, 0x00, 0xac, 0x48,
- 0x00, 0x00, 0xac, 0x49, 0x00, 0x00, 0xac, 0x4a,
- 0x00, 0x00, 0xac, 0x4b, 0x00, 0x00, 0xac, 0x4c,
- 0x00, 0x00, 0xac, 0x4d, 0x00, 0x00, 0xac, 0x4e,
- 0x00, 0x00, 0xac, 0x4f, 0x00, 0x00, 0xac, 0x50,
- 0x00, 0x00, 0xac, 0x51, 0x00, 0x00, 0xac, 0x52,
- 0x00, 0x00, 0xac, 0x53];
+const List<int> testKoreanCharSubsetUtf32beBom = const <int>[
+ 0x00, 0x00, 0xfe, 0xff, 0x00, 0x00, 0xac, 0x00, // 8
+ 0x00, 0x00, 0xac, 0x01, 0x00, 0x00, 0xac, 0x02,
+ 0x00, 0x00, 0xac, 0x03, 0x00, 0x00, 0xac, 0x04,
+ 0x00, 0x00, 0xac, 0x05, 0x00, 0x00, 0xac, 0x06,
+ 0x00, 0x00, 0xac, 0x07, 0x00, 0x00, 0xac, 0x08,
+ 0x00, 0x00, 0xac, 0x09, 0x00, 0x00, 0xac, 0x0a,
+ 0x00, 0x00, 0xac, 0x0b, 0x00, 0x00, 0xac, 0x0c,
+ 0x00, 0x00, 0xac, 0x0d, 0x00, 0x00, 0xac, 0x0e,
+ 0x00, 0x00, 0xac, 0x0f, 0x00, 0x00, 0xac, 0x10,
+ 0x00, 0x00, 0xac, 0x11, 0x00, 0x00, 0xac, 0x12,
+ 0x00, 0x00, 0xac, 0x13, 0x00, 0x00, 0xac, 0x14,
+ 0x00, 0x00, 0xac, 0x15, 0x00, 0x00, 0xac, 0x16,
+ 0x00, 0x00, 0xac, 0x17, 0x00, 0x00, 0xac, 0x18,
+ 0x00, 0x00, 0xac, 0x19, 0x00, 0x00, 0xac, 0x1a,
+ 0x00, 0x00, 0xac, 0x1b, 0x00, 0x00, 0x00, 0x0a,
+ 0x00, 0x00, 0xac, 0x1c, 0x00, 0x00, 0xac, 0x1d,
+ 0x00, 0x00, 0xac, 0x1e, 0x00, 0x00, 0xac, 0x1f,
+ 0x00, 0x00, 0xac, 0x20, 0x00, 0x00, 0xac, 0x21,
+ 0x00, 0x00, 0xac, 0x22, 0x00, 0x00, 0xac, 0x23,
+ 0x00, 0x00, 0xac, 0x24, 0x00, 0x00, 0xac, 0x25,
+ 0x00, 0x00, 0xac, 0x26, 0x00, 0x00, 0xac, 0x27,
+ 0x00, 0x00, 0xac, 0x28, 0x00, 0x00, 0xac, 0x29,
+ 0x00, 0x00, 0xac, 0x2a, 0x00, 0x00, 0xac, 0x2b,
+ 0x00, 0x00, 0xac, 0x2c, 0x00, 0x00, 0xac, 0x2d,
+ 0x00, 0x00, 0xac, 0x2e, 0x00, 0x00, 0xac, 0x2f,
+ 0x00, 0x00, 0xac, 0x30, 0x00, 0x00, 0xac, 0x31,
+ 0x00, 0x00, 0xac, 0x32, 0x00, 0x00, 0xac, 0x33,
+ 0x00, 0x00, 0xac, 0x34, 0x00, 0x00, 0xac, 0x35,
+ 0x00, 0x00, 0xac, 0x36, 0x00, 0x00, 0xac, 0x37,
+ 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0xac, 0x38,
+ 0x00, 0x00, 0xac, 0x39, 0x00, 0x00, 0xac, 0x3a,
+ 0x00, 0x00, 0xac, 0x3b, 0x00, 0x00, 0xac, 0x3c,
+ 0x00, 0x00, 0xac, 0x3d, 0x00, 0x00, 0xac, 0x3e,
+ 0x00, 0x00, 0xac, 0x3f, 0x00, 0x00, 0xac, 0x40,
+ 0x00, 0x00, 0xac, 0x41, 0x00, 0x00, 0xac, 0x42,
+ 0x00, 0x00, 0xac, 0x43, 0x00, 0x00, 0xac, 0x44,
+ 0x00, 0x00, 0xac, 0x45, 0x00, 0x00, 0xac, 0x46,
+ 0x00, 0x00, 0xac, 0x47, 0x00, 0x00, 0xac, 0x48,
+ 0x00, 0x00, 0xac, 0x49, 0x00, 0x00, 0xac, 0x4a,
+ 0x00, 0x00, 0xac, 0x4b, 0x00, 0x00, 0xac, 0x4c,
+ 0x00, 0x00, 0xac, 0x4d, 0x00, 0x00, 0xac, 0x4e,
+ 0x00, 0x00, 0xac, 0x4f, 0x00, 0x00, 0xac, 0x50,
+ 0x00, 0x00, 0xac, 0x51, 0x00, 0x00, 0xac, 0x52,
+ 0x00, 0x00, 0xac, 0x53
+];
-const List<int> testKoreanCharSubsetUtf32le = const<int>[
- 0x00, 0xac, 0x00, 0x00, 0x01, 0xac, 0x00, 0x00,
- 0x02, 0xac, 0x00, 0x00, 0x03, 0xac, 0x00, 0x00,
- 0x04, 0xac, 0x00, 0x00, 0x05, 0xac, 0x00, 0x00,
- 0x06, 0xac, 0x00, 0x00, 0x07, 0xac, 0x00, 0x00,
- 0x08, 0xac, 0x00, 0x00, 0x09, 0xac, 0x00, 0x00,
- 0x0a, 0xac, 0x00, 0x00, 0x0b, 0xac, 0x00, 0x00,
- 0x0c, 0xac, 0x00, 0x00, 0x0d, 0xac, 0x00, 0x00,
- 0x0e, 0xac, 0x00, 0x00, 0x0f, 0xac, 0x00, 0x00,
- 0x10, 0xac, 0x00, 0x00, 0x11, 0xac, 0x00, 0x00,
- 0x12, 0xac, 0x00, 0x00, 0x13, 0xac, 0x00, 0x00,
- 0x14, 0xac, 0x00, 0x00, 0x15, 0xac, 0x00, 0x00,
- 0x16, 0xac, 0x00, 0x00, 0x17, 0xac, 0x00, 0x00,
- 0x18, 0xac, 0x00, 0x00, 0x19, 0xac, 0x00, 0x00,
- 0x1a, 0xac, 0x00, 0x00, 0x1b, 0xac, 0x00, 0x00,
- 0x0a, 0x00, 0x00, 0x00, 0x1c, 0xac, 0x00, 0x00,
- 0x1d, 0xac, 0x00, 0x00, 0x1e, 0xac, 0x00, 0x00,
- 0x1f, 0xac, 0x00, 0x00, 0x20, 0xac, 0x00, 0x00,
- 0x21, 0xac, 0x00, 0x00, 0x22, 0xac, 0x00, 0x00,
- 0x23, 0xac, 0x00, 0x00, 0x24, 0xac, 0x00, 0x00,
- 0x25, 0xac, 0x00, 0x00, 0x26, 0xac, 0x00, 0x00,
- 0x27, 0xac, 0x00, 0x00, 0x28, 0xac, 0x00, 0x00,
- 0x29, 0xac, 0x00, 0x00, 0x2a, 0xac, 0x00, 0x00,
- 0x2b, 0xac, 0x00, 0x00, 0x2c, 0xac, 0x00, 0x00,
- 0x2d, 0xac, 0x00, 0x00, 0x2e, 0xac, 0x00, 0x00,
- 0x2f, 0xac, 0x00, 0x00, 0x30, 0xac, 0x00, 0x00,
- 0x31, 0xac, 0x00, 0x00, 0x32, 0xac, 0x00, 0x00,
- 0x33, 0xac, 0x00, 0x00, 0x34, 0xac, 0x00, 0x00,
- 0x35, 0xac, 0x00, 0x00, 0x36, 0xac, 0x00, 0x00,
- 0x37, 0xac, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
- 0x38, 0xac, 0x00, 0x00, 0x39, 0xac, 0x00, 0x00,
- 0x3a, 0xac, 0x00, 0x00, 0x3b, 0xac, 0x00, 0x00,
- 0x3c, 0xac, 0x00, 0x00, 0x3d, 0xac, 0x00, 0x00,
- 0x3e, 0xac, 0x00, 0x00, 0x3f, 0xac, 0x00, 0x00,
- 0x40, 0xac, 0x00, 0x00, 0x41, 0xac, 0x00, 0x00,
- 0x42, 0xac, 0x00, 0x00, 0x43, 0xac, 0x00, 0x00,
- 0x44, 0xac, 0x00, 0x00, 0x45, 0xac, 0x00, 0x00,
- 0x46, 0xac, 0x00, 0x00, 0x47, 0xac, 0x00, 0x00,
- 0x48, 0xac, 0x00, 0x00, 0x49, 0xac, 0x00, 0x00,
- 0x4a, 0xac, 0x00, 0x00, 0x4b, 0xac, 0x00, 0x00,
- 0x4c, 0xac, 0x00, 0x00, 0x4d, 0xac, 0x00, 0x00,
- 0x4e, 0xac, 0x00, 0x00, 0x4f, 0xac, 0x00, 0x00,
- 0x50, 0xac, 0x00, 0x00, 0x51, 0xac, 0x00, 0x00,
- 0x52, 0xac, 0x00, 0x00, 0x53, 0xac, 0x00, 0x00];
+const List<int> testKoreanCharSubsetUtf32le = const <int>[
+ 0x00, 0xac, 0x00, 0x00, 0x01, 0xac, 0x00, 0x00, // 8
+ 0x02, 0xac, 0x00, 0x00, 0x03, 0xac, 0x00, 0x00,
+ 0x04, 0xac, 0x00, 0x00, 0x05, 0xac, 0x00, 0x00,
+ 0x06, 0xac, 0x00, 0x00, 0x07, 0xac, 0x00, 0x00,
+ 0x08, 0xac, 0x00, 0x00, 0x09, 0xac, 0x00, 0x00,
+ 0x0a, 0xac, 0x00, 0x00, 0x0b, 0xac, 0x00, 0x00,
+ 0x0c, 0xac, 0x00, 0x00, 0x0d, 0xac, 0x00, 0x00,
+ 0x0e, 0xac, 0x00, 0x00, 0x0f, 0xac, 0x00, 0x00,
+ 0x10, 0xac, 0x00, 0x00, 0x11, 0xac, 0x00, 0x00,
+ 0x12, 0xac, 0x00, 0x00, 0x13, 0xac, 0x00, 0x00,
+ 0x14, 0xac, 0x00, 0x00, 0x15, 0xac, 0x00, 0x00,
+ 0x16, 0xac, 0x00, 0x00, 0x17, 0xac, 0x00, 0x00,
+ 0x18, 0xac, 0x00, 0x00, 0x19, 0xac, 0x00, 0x00,
+ 0x1a, 0xac, 0x00, 0x00, 0x1b, 0xac, 0x00, 0x00,
+ 0x0a, 0x00, 0x00, 0x00, 0x1c, 0xac, 0x00, 0x00,
+ 0x1d, 0xac, 0x00, 0x00, 0x1e, 0xac, 0x00, 0x00,
+ 0x1f, 0xac, 0x00, 0x00, 0x20, 0xac, 0x00, 0x00,
+ 0x21, 0xac, 0x00, 0x00, 0x22, 0xac, 0x00, 0x00,
+ 0x23, 0xac, 0x00, 0x00, 0x24, 0xac, 0x00, 0x00,
+ 0x25, 0xac, 0x00, 0x00, 0x26, 0xac, 0x00, 0x00,
+ 0x27, 0xac, 0x00, 0x00, 0x28, 0xac, 0x00, 0x00,
+ 0x29, 0xac, 0x00, 0x00, 0x2a, 0xac, 0x00, 0x00,
+ 0x2b, 0xac, 0x00, 0x00, 0x2c, 0xac, 0x00, 0x00,
+ 0x2d, 0xac, 0x00, 0x00, 0x2e, 0xac, 0x00, 0x00,
+ 0x2f, 0xac, 0x00, 0x00, 0x30, 0xac, 0x00, 0x00,
+ 0x31, 0xac, 0x00, 0x00, 0x32, 0xac, 0x00, 0x00,
+ 0x33, 0xac, 0x00, 0x00, 0x34, 0xac, 0x00, 0x00,
+ 0x35, 0xac, 0x00, 0x00, 0x36, 0xac, 0x00, 0x00,
+ 0x37, 0xac, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00,
+ 0x38, 0xac, 0x00, 0x00, 0x39, 0xac, 0x00, 0x00,
+ 0x3a, 0xac, 0x00, 0x00, 0x3b, 0xac, 0x00, 0x00,
+ 0x3c, 0xac, 0x00, 0x00, 0x3d, 0xac, 0x00, 0x00,
+ 0x3e, 0xac, 0x00, 0x00, 0x3f, 0xac, 0x00, 0x00,
+ 0x40, 0xac, 0x00, 0x00, 0x41, 0xac, 0x00, 0x00,
+ 0x42, 0xac, 0x00, 0x00, 0x43, 0xac, 0x00, 0x00,
+ 0x44, 0xac, 0x00, 0x00, 0x45, 0xac, 0x00, 0x00,
+ 0x46, 0xac, 0x00, 0x00, 0x47, 0xac, 0x00, 0x00,
+ 0x48, 0xac, 0x00, 0x00, 0x49, 0xac, 0x00, 0x00,
+ 0x4a, 0xac, 0x00, 0x00, 0x4b, 0xac, 0x00, 0x00,
+ 0x4c, 0xac, 0x00, 0x00, 0x4d, 0xac, 0x00, 0x00,
+ 0x4e, 0xac, 0x00, 0x00, 0x4f, 0xac, 0x00, 0x00,
+ 0x50, 0xac, 0x00, 0x00, 0x51, 0xac, 0x00, 0x00,
+ 0x52, 0xac, 0x00, 0x00, 0x53, 0xac, 0x00, 0x00
+];
void main() {
- testUtf32BytesToString();
- testEncodeToUtf32();
- testIterableMethods();
+ test('utf32 bytes to string', testUtf32BytesToString);
+ test('encode to utf32', testEncodeToUtf32);
+ test('iterable methods', testIterableMethods);
}
void testEncodeToUtf32() {
- Expect.listEquals([], encodeUtf32le(""), "no input"); // TODO(dcarlson) skip bom on empty?
+ Expect.listEquals(
+ [], encodeUtf32le(""), "no input"); // TODO(dcarlson) skip bom on empty?
Expect.listEquals(testKoreanCharSubsetUtf32beBom,
- encodeUtf32(testKoreanCharSubset),
- "encode UTF-32(BE by default) Korean");
- Expect.listEquals(testKoreanCharSubsetUtf32le,
+ encodeUtf32(testKoreanCharSubset), "encode UTF-32(BE by default) Korean");
+ Expect.listEquals(
+ testKoreanCharSubsetUtf32le,
encodeUtf32le(testKoreanCharSubset),
"encode UTF-32(LE by default) Korean");
}
@@ -124,35 +129,29 @@
void testUtf32BytesToString() {
Expect.stringEquals("", decodeUtf32([]), "no input");
Expect.stringEquals("\ufffd", decodeUtf32([0]), "single byte");
- Expect.stringEquals("\ufffd", decodeUtf32([0, 0, 0x4e]),
- "short a byte");
- Expect.stringEquals("\u4e8c\ufffd", decodeUtf32([0, 0, 0x4e, 0x8c, 0]),
- "extra byte");
+ Expect.stringEquals("\ufffd", decodeUtf32([0, 0, 0x4e]), "short a byte");
+ Expect.stringEquals(
+ "\u4e8c\ufffd", decodeUtf32([0, 0, 0x4e, 0x8c, 0]), "extra byte");
- Expect.stringEquals(testHanTwice, decodeUtf32([0, 0, 0x4e, 0x8c]),
- "twice variation 1");
+ Expect.stringEquals(
+ testHanTwice, decodeUtf32([0, 0, 0x4e, 0x8c]), "twice variation 1");
Expect.stringEquals(testHanTwice,
- decodeUtf32([0, 0, 0xfe, 0xff, 0, 0, 0x4e, 0x8c]),
- "twice variation 2");
+ decodeUtf32([0, 0, 0xfe, 0xff, 0, 0, 0x4e, 0x8c]), "twice variation 2");
Expect.stringEquals(testHanTwice,
- decodeUtf32([0xff, 0xfe, 0, 0, 0x8c, 0x4e, 0, 0]),
- "twice variation 3");
+ decodeUtf32([0xff, 0xfe, 0, 0, 0x8c, 0x4e, 0, 0]), "twice variation 3");
- Expect.stringEquals(testHanTwice, decodeUtf32be([0, 0, 0x4e, 0x8c]),
- "twice variation 4");
+ Expect.stringEquals(
+ testHanTwice, decodeUtf32be([0, 0, 0x4e, 0x8c]), "twice variation 4");
Expect.stringEquals(testHanTwice,
- decodeUtf32be([0, 0, 0xfe, 0xff, 0, 0, 0x4e, 0x8c]),
- "twice variation 5");
+ decodeUtf32be([0, 0, 0xfe, 0xff, 0, 0, 0x4e, 0x8c]), "twice variation 5");
- Expect.stringEquals(testHanTwice, decodeUtf32le([0x8c, 0x4e, 0, 0]),
- "twice variation 6");
+ Expect.stringEquals(
+ testHanTwice, decodeUtf32le([0x8c, 0x4e, 0, 0]), "twice variation 6");
Expect.stringEquals(testHanTwice,
- decodeUtf32le([0xff, 0xfe, 0, 0, 0x8c, 0x4e, 0, 0]),
- "twice variation 7");
+ decodeUtf32le([0xff, 0xfe, 0, 0, 0x8c, 0x4e, 0, 0]), "twice variation 7");
Expect.stringEquals(testKoreanCharSubset,
- decodeUtf32(testKoreanCharSubsetUtf32beBom),
- "UTF-32BE Korean");
+ decodeUtf32(testKoreanCharSubsetUtf32beBom), "UTF-32BE Korean");
}
void testIterableMethods() {
@@ -160,7 +159,7 @@
Expect.isFalse(decodeUtf32AsIterable([]).iterator.moveNext());
IterableUtf32Decoder koreanDecoder =
- decodeUtf32AsIterable(testKoreanCharSubsetUtf32beBom);
+ decodeUtf32AsIterable(testKoreanCharSubsetUtf32beBom);
// get the first character
Expect.equals(testKoreanCharSubset.codeUnits[0], koreanDecoder.first);
// get the whole translation using the Iterable interface
@@ -168,12 +167,17 @@
new String.fromCharCodes(new List<int>.from(koreanDecoder)));
// specify types
- Expect.equals(44032, (new List<int>
- .from(decodeUtf32beAsIterable(testKoreanCharSubsetUtf32beBom)))[0]);
- Expect.equals(44032, (new List<int>
- .from(decodeUtf32leAsIterable(testKoreanCharSubsetUtf32le)))[0]);
+ Expect.equals(
+ 44032,
+ (new List<int>.from(
+ decodeUtf32beAsIterable(testKoreanCharSubsetUtf32beBom)))[0]);
+ Expect.equals(
+ 44032,
+ (new List<int>.from(
+ decodeUtf32leAsIterable(testKoreanCharSubsetUtf32le)))[0]);
bool stripBom = false;
- Expect.equals(UNICODE_BOM, (new List<int>
- .from(decodeUtf32beAsIterable(testKoreanCharSubsetUtf32beBom,
- 0, null, stripBom)))[0]);
+ Expect.equals(
+ UNICODE_BOM,
+ (new List<int>.from(decodeUtf32beAsIterable(
+ testKoreanCharSubsetUtf32beBom, 0, null, stripBom)))[0]);
}
diff --git a/test/utf82_test.dart b/test/utf82_test.dart
index 7f8cec4..db91a70 100755
--- a/test/utf82_test.dart
+++ b/test/utf82_test.dart
@@ -4,184 +4,187 @@
library utf.utf82_test;
-import 'package:expect/expect.dart';
+import 'package:test/test.dart';
import 'package:utf/utf.dart';
-const String testEnglishPhrase =
- "The quick brown fox jumps over the lazy dog.";
+import 'expect.dart';
-const List<int> testEnglishUtf8 = const<int> [
- 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63,
- 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
- 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
- 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
- 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
- 0x64, 0x6f, 0x67, 0x2e];
+const String testEnglishPhrase = "The quick brown fox jumps over the lazy dog.";
+
+const List<int> testEnglishUtf8 = const <int>[
+ 0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, // 8
+ 0x6b, 0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20,
+ 0x66, 0x6f, 0x78, 0x20, 0x6a, 0x75, 0x6d, 0x70,
+ 0x73, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x74,
+ 0x68, 0x65, 0x20, 0x6c, 0x61, 0x7a, 0x79, 0x20,
+ 0x64, 0x6f, 0x67, 0x2e
+];
const String testDanishPhrase = "Quizdeltagerne spiste jordbær med "
"fløde mens cirkusklovnen Wolther spillede på xylofon.";
-const List<int> testDanishUtf8 = const<int>[
- 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74,
- 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
- 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
- 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
- 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
- 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
- 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
- 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
- 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
- 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
- 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
- 0x6e, 0x2e];
+const List<int> testDanishUtf8 = const <int>[
+ 0x51, 0x75, 0x69, 0x7a, 0x64, 0x65, 0x6c, 0x74, // 8
+ 0x61, 0x67, 0x65, 0x72, 0x6e, 0x65, 0x20, 0x73,
+ 0x70, 0x69, 0x73, 0x74, 0x65, 0x20, 0x6a, 0x6f,
+ 0x72, 0x64, 0x62, 0xc3, 0xa6, 0x72, 0x20, 0x6d,
+ 0x65, 0x64, 0x20, 0x66, 0x6c, 0xc3, 0xb8, 0x64,
+ 0x65, 0x20, 0x6d, 0x65, 0x6e, 0x73, 0x20, 0x63,
+ 0x69, 0x72, 0x6b, 0x75, 0x73, 0x6b, 0x6c, 0x6f,
+ 0x76, 0x6e, 0x65, 0x6e, 0x20, 0x57, 0x6f, 0x6c,
+ 0x74, 0x68, 0x65, 0x72, 0x20, 0x73, 0x70, 0x69,
+ 0x6c, 0x6c, 0x65, 0x64, 0x65, 0x20, 0x70, 0xc3,
+ 0xa5, 0x20, 0x78, 0x79, 0x6c, 0x6f, 0x66, 0x6f,
+ 0x6e, 0x2e
+];
// unusual formatting due to strange editor interaction w/ text direction.
-const String
- testHebrewPhrase = "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
+const String testHebrewPhrase =
+ "דג סקרן שט בים מאוכזב ולפתע מצא לו חברה איך הקליטה";
-const List<int> testHebrewUtf8 = const<int>[
- 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7,
- 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
- 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
- 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
- 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
- 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
- 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
- 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
- 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
- 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
- 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
- 0xd7, 0x94];
+const List<int> testHebrewUtf8 = const <int>[
+ 0xd7, 0x93, 0xd7, 0x92, 0x20, 0xd7, 0xa1, 0xd7, // 8
+ 0xa7, 0xd7, 0xa8, 0xd7, 0x9f, 0x20, 0xd7, 0xa9,
+ 0xd7, 0x98, 0x20, 0xd7, 0x91, 0xd7, 0x99, 0xd7,
+ 0x9d, 0x20, 0xd7, 0x9e, 0xd7, 0x90, 0xd7, 0x95,
+ 0xd7, 0x9b, 0xd7, 0x96, 0xd7, 0x91, 0x20, 0xd7,
+ 0x95, 0xd7, 0x9c, 0xd7, 0xa4, 0xd7, 0xaa, 0xd7,
+ 0xa2, 0x20, 0xd7, 0x9e, 0xd7, 0xa6, 0xd7, 0x90,
+ 0x20, 0xd7, 0x9c, 0xd7, 0x95, 0x20, 0xd7, 0x97,
+ 0xd7, 0x91, 0xd7, 0xa8, 0xd7, 0x94, 0x20, 0xd7,
+ 0x90, 0xd7, 0x99, 0xd7, 0x9a, 0x20, 0xd7, 0x94,
+ 0xd7, 0xa7, 0xd7, 0x9c, 0xd7, 0x99, 0xd7, 0x98,
+ 0xd7, 0x94
+];
const String testRussianPhrase = "Съешь же ещё этих мягких "
"французских булок да выпей чаю";
-const List<int> testRussianUtf8 = const<int>[
- 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88,
- 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
- 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
- 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
- 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
- 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
- 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
- 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
- 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
- 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
- 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
- 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
- 0x87, 0xd0, 0xb0, 0xd1, 0x8e];
+const List<int> testRussianUtf8 = const <int>[
+ 0xd0, 0xa1, 0xd1, 0x8a, 0xd0, 0xb5, 0xd1, 0x88, // 8
+ 0xd1, 0x8c, 0x20, 0xd0, 0xb6, 0xd0, 0xb5, 0x20,
+ 0xd0, 0xb5, 0xd1, 0x89, 0xd1, 0x91, 0x20, 0xd1,
+ 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0xd1, 0x85, 0x20,
+ 0xd0, 0xbc, 0xd1, 0x8f, 0xd0, 0xb3, 0xd0, 0xba,
+ 0xd0, 0xb8, 0xd1, 0x85, 0x20, 0xd1, 0x84, 0xd1,
+ 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd1, 0x86, 0xd1,
+ 0x83, 0xd0, 0xb7, 0xd1, 0x81, 0xd0, 0xba, 0xd0,
+ 0xb8, 0xd1, 0x85, 0x20, 0xd0, 0xb1, 0xd1, 0x83,
+ 0xd0, 0xbb, 0xd0, 0xbe, 0xd0, 0xba, 0x20, 0xd0,
+ 0xb4, 0xd0, 0xb0, 0x20, 0xd0, 0xb2, 0xd1, 0x8b,
+ 0xd0, 0xbf, 0xd0, 0xb5, 0xd0, 0xb9, 0x20, 0xd1,
+ 0x87, 0xd0, 0xb0, 0xd1, 0x8e
+];
const String testGreekPhrase = "Γαζέες καὶ μυρτιὲς δὲν θὰ βρῶ πιὰ "
"στὸ χρυσαφὶ ξέφωτο";
-const List<int> testGreekUtf8 = const<int>[
- 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad,
- 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
- 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
- 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
- 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
- 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
- 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
- 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
- 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
- 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
- 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
- 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
- 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf];
+const List<int> testGreekUtf8 = const <int>[
+ 0xce, 0x93, 0xce, 0xb1, 0xce, 0xb6, 0xce, 0xad, // 8
+ 0xce, 0xb5, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce,
+ 0xb1, 0xe1, 0xbd, 0xb6, 0x20, 0xce, 0xbc, 0xcf,
+ 0x85, 0xcf, 0x81, 0xcf, 0x84, 0xce, 0xb9, 0xe1,
+ 0xbd, 0xb2, 0xcf, 0x82, 0x20, 0xce, 0xb4, 0xe1,
+ 0xbd, 0xb2, 0xce, 0xbd, 0x20, 0xce, 0xb8, 0xe1,
+ 0xbd, 0xb0, 0x20, 0xce, 0xb2, 0xcf, 0x81, 0xe1,
+ 0xbf, 0xb6, 0x20, 0xcf, 0x80, 0xce, 0xb9, 0xe1,
+ 0xbd, 0xb0, 0x20, 0xcf, 0x83, 0xcf, 0x84, 0xe1,
+ 0xbd, 0xb8, 0x20, 0xcf, 0x87, 0xcf, 0x81, 0xcf,
+ 0x85, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x86, 0xe1,
+ 0xbd, 0xb6, 0x20, 0xce, 0xbe, 0xce, 0xad, 0xcf,
+ 0x86, 0xcf, 0x89, 0xcf, 0x84, 0xce, 0xbf
+];
const String testKatakanaPhrase = """
イロハニホヘト チリヌルヲ ワカヨタレソ ツネナラム
ウヰノオクヤマ ケフコエテ アサキユメミシ ヱヒモセスン""";
-const List<int> testKatakanaUtf8 = const<int>[
- 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83,
- 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
- 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
- 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
- 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
- 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
- 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
- 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
- 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
- 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
- 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
- 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
- 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
- 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
- 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
- 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
- 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
- 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
- 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3];
+const List<int> testKatakanaUtf8 = const <int>[
+ 0xe3, 0x82, 0xa4, 0xe3, 0x83, 0xad, 0xe3, 0x83, // 8
+ 0x8f, 0xe3, 0x83, 0x8b, 0xe3, 0x83, 0x9b, 0xe3,
+ 0x83, 0x98, 0xe3, 0x83, 0x88, 0x20, 0xe3, 0x83,
+ 0x81, 0xe3, 0x83, 0xaa, 0xe3, 0x83, 0x8c, 0xe3,
+ 0x83, 0xab, 0xe3, 0x83, 0xb2, 0x20, 0xe3, 0x83,
+ 0xaf, 0xe3, 0x82, 0xab, 0xe3, 0x83, 0xa8, 0xe3,
+ 0x82, 0xbf, 0xe3, 0x83, 0xac, 0xe3, 0x82, 0xbd,
+ 0x20, 0xe3, 0x83, 0x84, 0xe3, 0x83, 0x8d, 0xe3,
+ 0x83, 0x8a, 0xe3, 0x83, 0xa9, 0xe3, 0x83, 0xa0,
+ 0x0a, 0xe3, 0x82, 0xa6, 0xe3, 0x83, 0xb0, 0xe3,
+ 0x83, 0x8e, 0xe3, 0x82, 0xaa, 0xe3, 0x82, 0xaf,
+ 0xe3, 0x83, 0xa4, 0xe3, 0x83, 0x9e, 0x20, 0xe3,
+ 0x82, 0xb1, 0xe3, 0x83, 0x95, 0xe3, 0x82, 0xb3,
+ 0xe3, 0x82, 0xa8, 0xe3, 0x83, 0x86, 0x20, 0xe3,
+ 0x82, 0xa2, 0xe3, 0x82, 0xb5, 0xe3, 0x82, 0xad,
+ 0xe3, 0x83, 0xa6, 0xe3, 0x83, 0xa1, 0xe3, 0x83,
+ 0x9f, 0xe3, 0x82, 0xb7, 0x20, 0xe3, 0x83, 0xb1,
+ 0xe3, 0x83, 0x92, 0xe3, 0x83, 0xa2, 0xe3, 0x82,
+ 0xbb, 0xe3, 0x82, 0xb9, 0xe3, 0x83, 0xb3
+];
void main() {
- testUtf8bytesToCodepoints();
- testUtf8BytesToString();
- testEncodeToUtf8();
- testIterableMethods();
+ test('utf8 bytes to codepoints', testUtf8bytesToCodepoints);
+ test('utf8 bytes to string', testUtf8BytesToString);
+ test('encode to utf8', testEncodeToUtf8);
+ test('iterable methods', testIterableMethods);
}
void testEncodeToUtf8() {
- Expect.listEquals(testEnglishUtf8, encodeUtf8(testEnglishPhrase),
- "english to utf8");
+ Expect.listEquals(
+ testEnglishUtf8, encodeUtf8(testEnglishPhrase), "english to utf8");
- Expect.listEquals(testDanishUtf8, encodeUtf8(testDanishPhrase),
- "encode danish to utf8");
+ Expect.listEquals(
+ testDanishUtf8, encodeUtf8(testDanishPhrase), "encode danish to utf8");
- Expect.listEquals(testHebrewUtf8, encodeUtf8(testHebrewPhrase),
- "Hebrew to utf8");
+ Expect.listEquals(
+ testHebrewUtf8, encodeUtf8(testHebrewPhrase), "Hebrew to utf8");
- Expect.listEquals(testRussianUtf8, encodeUtf8(testRussianPhrase),
- "Russian to utf8");
+ Expect.listEquals(
+ testRussianUtf8, encodeUtf8(testRussianPhrase), "Russian to utf8");
- Expect.listEquals(testGreekUtf8, encodeUtf8(testGreekPhrase),
- "Greek to utf8");
+ Expect.listEquals(
+ testGreekUtf8, encodeUtf8(testGreekPhrase), "Greek to utf8");
- Expect.listEquals(testKatakanaUtf8, encodeUtf8(testKatakanaPhrase),
- "Katakana to utf8");
+ Expect.listEquals(
+ testKatakanaUtf8, encodeUtf8(testKatakanaPhrase), "Katakana to utf8");
}
void testUtf8bytesToCodepoints() {
- Expect.listEquals([954, 972, 963, 956, 949],
- utf8ToCodepoints([0xce, 0xba, 0xcf, 0x8c, 0xcf,
- 0x83, 0xce, 0xbc, 0xce, 0xb5]), "κόσμε");
+ Expect.listEquals(
+ [954, 972, 963, 956, 949],
+ utf8ToCodepoints(
+ [0xce, 0xba, 0xcf, 0x8c, 0xcf, 0x83, 0xce, 0xbc, 0xce, 0xb5]),
+ "κόσμε");
// boundary conditions: First possible sequence of a certain length
Expect.listEquals([], utf8ToCodepoints([]), "no input");
Expect.listEquals([0x0], utf8ToCodepoints([0x0]), "0");
Expect.listEquals([0x80], utf8ToCodepoints([0xc2, 0x80]), "80");
- Expect.listEquals([0x800],
- utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
- Expect.listEquals([0x10000],
- utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
+ Expect.listEquals([0x800], utf8ToCodepoints([0xe0, 0xa0, 0x80]), "800");
+ Expect.listEquals(
+ [0x10000], utf8ToCodepoints([0xf0, 0x90, 0x80, 0x80]), "10000");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80, 0x80]), "200000");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]),
- "4000000");
+ utf8ToCodepoints([0xfc, 0x84, 0x80, 0x80, 0x80, 0x80]), "4000000");
// boundary conditions: Last possible sequence of a certain length
Expect.listEquals([0x7f], utf8ToCodepoints([0x7f]), "7f");
Expect.listEquals([0x7ff], utf8ToCodepoints([0xdf, 0xbf]), "7ff");
- Expect.listEquals([0xffff],
- utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
+ Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "ffff");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf7, 0xbf, 0xbf, 0xbf]), "1fffff");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf, 0xbf]), "3ffffff");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]),
- "4000000");
+ utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf]), "4000000");
// other boundary conditions
- Expect.listEquals([0xd7ff],
- utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
- Expect.listEquals([0xe000],
- utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
+ Expect.listEquals([0xd7ff], utf8ToCodepoints([0xed, 0x9f, 0xbf]), "d7ff");
+ Expect.listEquals([0xe000], utf8ToCodepoints([0xee, 0x80, 0x80]), "e000");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xef, 0xbf, 0xbd]), "fffd");
- Expect.listEquals([0x10ffff],
- utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
+ Expect.listEquals(
+ [0x10ffff], utf8ToCodepoints([0xf4, 0x8f, 0xbf, 0xbf]), "10ffff");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf4, 0x90, 0x80, 0x80]), "110000");
@@ -197,7 +200,8 @@
allContinuationBytes.add(i);
matchingReplacementChars.add(UNICODE_REPLACEMENT_CHARACTER_CODEPOINT);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allContinuationBytes),
"80 - bf => replacement character x 64");
@@ -205,10 +209,10 @@
matchingReplacementChars = <int>[];
for (int i = 0xc0; i < 0xe0; i++) {
allFirstTwoByteSeq.addAll([i, 0x20]);
- matchingReplacementChars.addAll(
- [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+ matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allFirstTwoByteSeq),
"c0 - df + space => replacement character + space x 32");
@@ -216,10 +220,10 @@
matchingReplacementChars = <int>[];
for (int i = 0xe0; i < 0xf0; i++) {
allFirstThreeByteSeq.addAll([i, 0x20]);
- matchingReplacementChars.addAll(
- [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+ matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allFirstThreeByteSeq),
"e0 - ef + space => replacement character x 16");
@@ -227,10 +231,10 @@
matchingReplacementChars = <int>[];
for (int i = 0xf0; i < 0xf8; i++) {
allFirstFourByteSeq.addAll([i, 0x20]);
- matchingReplacementChars.addAll(
- [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+ matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allFirstFourByteSeq),
"f0 - f7 + space => replacement character x 8");
@@ -238,10 +242,10 @@
matchingReplacementChars = <int>[];
for (int i = 0xf8; i < 0xfc; i++) {
allFirstFiveByteSeq.addAll([i, 0x20]);
- matchingReplacementChars.addAll(
- [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+ matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allFirstFiveByteSeq),
"f8 - fb + space => replacement character x 4");
@@ -249,70 +253,77 @@
matchingReplacementChars = <int>[];
for (int i = 0xfc; i < 0xfe; i++) {
allFirstSixByteSeq.addAll([i, 0x20]);
- matchingReplacementChars.addAll(
- [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
+ matchingReplacementChars.addAll([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]);
}
- Expect.listEquals(matchingReplacementChars,
+ Expect.listEquals(
+ matchingReplacementChars,
utf8ToCodepoints(allFirstSixByteSeq),
"fc - fd + space => replacement character x 2");
// Sequences with last continuation byte missing
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xc2]),
- "2-byte sequence with last byte missing");
+ utf8ToCodepoints([0xc2]), "2-byte sequence with last byte missing");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xe0, 0x80]),
- "3-byte sequence with last byte missing");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ utf8ToCodepoints([0xe0, 0x80]), "3-byte sequence with last byte missing");
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf0, 0x80, 0x80]),
"4-byte sequence with last byte missing");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x88, 0x80, 0x80]),
"5-byte sequence with last byte missing");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80]),
"6-byte sequence with last byte missing");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xdf]),
- "2-byte sequence with last byte missing (hi)");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ utf8ToCodepoints([0xdf]), "2-byte sequence with last byte missing (hi)");
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xef, 0xbf]),
"3-byte sequence with last byte missing (hi)");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf7, 0xbf, 0xbf]),
"4-byte sequence with last byte missing (hi)");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfb, 0xbf, 0xbf, 0xbf]),
"5-byte sequence with last byte missing (hi)");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfd, 0xbf, 0xbf, 0xbf, 0xbf]),
"6-byte sequence with last byte missing (hi)");
// Concatenation of incomplete sequences
Expect.listEquals(
- [ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT ],
- utf8ToCodepoints(
- [ 0xc2,
- 0xe0, 0x80,
- 0xf0, 0x80, 0x80,
- 0xf8, 0x88, 0x80, 0x80,
- 0xfc, 0x80, 0x80, 0x80, 0x80,
- 0xdf,
- 0xef, 0xbf,
- 0xf7, 0xbf, 0xbf,
- 0xfb, 0xbf, 0xbf, 0xbf,
- 0xfd, 0xbf, 0xbf, 0xbf, 0xbf ]),
- "Concatenation of incomplete sequences");
+ [
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ],
+ utf8ToCodepoints([
+ 0xc2, // 1
+ 0xe0, 0x80,
+ 0xf0, 0x80, 0x80,
+ 0xf8, 0x88, 0x80, 0x80,
+ 0xfc, 0x80, 0x80, 0x80, 0x80,
+ 0xdf,
+ 0xef, 0xbf,
+ 0xf7, 0xbf, 0xbf,
+ 0xfb, 0xbf, 0xbf, 0xbf,
+ 0xfd, 0xbf, 0xbf, 0xbf, 0xbf
+ ]),
+ "Concatenation of incomplete sequences");
// Impossible bytes
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
@@ -320,11 +331,11 @@
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xff]), "ff");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xfe, 0xfe, 0xff, 0xff]), "fe fe ff ff");
// Overlong sequences
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
@@ -335,7 +346,8 @@
utf8ToCodepoints([0xf0, 0x80, 0x80, 0xaf]), "f0 80 80 af");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0xaf]), "f8 80 80 80 af");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0xaf]),
"fc 80 80 80 80 af");
@@ -347,7 +359,8 @@
utf8ToCodepoints([0xf0, 0x8f, 0xbf, 0xbf]), "f0 8f bf bf");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x87, 0xbf, 0xbf, 0xbf]), "f8 87 bf bf bf");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x83, 0xbf, 0xbf, 0xbf, 0xbf]),
"fc 83 bf bf bf bf");
@@ -359,7 +372,8 @@
utf8ToCodepoints([0xf0, 0x80, 0x80, 0x80]), "f0 80 80 80");
Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xf8, 0x80, 0x80, 0x80, 0x80]), "f8 80 80 80 80");
- Expect.listEquals([UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
+ Expect.listEquals(
+ [UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
utf8ToCodepoints([0xfc, 0x80, 0x80, 0x80, 0x80, 0x80]),
"fc 80 80 80 80 80");
@@ -381,71 +395,58 @@
// Paired UTF-16 surrogates
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]),
- "U+D800 U+DC00");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80]), "U+D800 U+DC00");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]),
- "U+D800 U+DFFF");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xa0, 0x80, 0xed, 0xbf, 0xbf]), "U+D800 U+DFFF");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]),
- "U+DB7F U+DC00");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xb0, 0x80]), "U+DB7F U+DC00");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]),
- "U+DB7F U+DFFF");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xad, 0xbf, 0xed, 0xbf, 0xbf]), "U+DB7F U+DFFF");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]),
- "U+DB80 U+DC00");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xb0, 0x80]), "U+DB80 U+DC00");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]),
- "U+DB80 U+DFFF");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xae, 0x80, 0xed, 0xbf, 0xbf]), "U+DB80 U+DFFF");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]),
- "U+DBFF U+DC00");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xb0, 0x80]), "U+DBFF U+DC00");
Expect.listEquals([
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
- UNICODE_REPLACEMENT_CHARACTER_CODEPOINT],
- utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]),
- "U+DBFF U+DFFF");
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT,
+ UNICODE_REPLACEMENT_CHARACTER_CODEPOINT
+ ], utf8ToCodepoints([0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf]), "U+DBFF U+DFFF");
// Other illegal code positions (???)
- Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]),
- "U+FFFE");
- Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]),
- "U+FFFF");
+ Expect.listEquals([0xfffe], utf8ToCodepoints([0xef, 0xbf, 0xbe]), "U+FFFE");
+ Expect.listEquals([0xffff], utf8ToCodepoints([0xef, 0xbf, 0xbf]), "U+FFFF");
}
void testUtf8BytesToString() {
- Expect.stringEquals(testEnglishPhrase,
- decodeUtf8(testEnglishUtf8), "English");
+ Expect.stringEquals(
+ testEnglishPhrase, decodeUtf8(testEnglishUtf8), "English");
- Expect.stringEquals(testDanishPhrase,
- decodeUtf8(testDanishUtf8), "Danish");
+ Expect.stringEquals(testDanishPhrase, decodeUtf8(testDanishUtf8), "Danish");
- Expect.stringEquals(testHebrewPhrase,
- decodeUtf8(testHebrewUtf8), "Hebrew");
+ Expect.stringEquals(testHebrewPhrase, decodeUtf8(testHebrewUtf8), "Hebrew");
- Expect.stringEquals(testRussianPhrase,
- decodeUtf8(testRussianUtf8), "Russian");
+ Expect.stringEquals(
+ testRussianPhrase, decodeUtf8(testRussianUtf8), "Russian");
- Expect.stringEquals(testGreekPhrase,
- decodeUtf8(testGreekUtf8), "Greek");
+ Expect.stringEquals(testGreekPhrase, decodeUtf8(testGreekUtf8), "Greek");
- Expect.stringEquals(testKatakanaPhrase,
- decodeUtf8(testKatakanaUtf8), "Katakana");
+ Expect.stringEquals(
+ testKatakanaPhrase, decodeUtf8(testKatakanaUtf8), "Katakana");
}
void testIterableMethods() {
diff --git a/test/utf8_test.dart b/test/utf8_test.dart
index 3e8c87e..2ca451c 100644
--- a/test/utf8_test.dart
+++ b/test/utf8_test.dart
@@ -4,45 +4,59 @@
library utf.utf8_test;
-import "package:expect/expect.dart";
+import 'package:test/test.dart';
import "package:utf/utf.dart";
+import 'expect.dart';
+
String decode(List<int> bytes) => decodeUtf8(bytes);
-main() {
- // Google favorite: "Îñţérñåţîöñåļîžåţîờñ".
- String string = decode([0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72,
- 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3,
- 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae,
- 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1,
- 0xbb, 0x9d, 0xc3, 0xb1]);
- Expect.stringEquals("Îñţérñåţîöñåļîžåţîờñ", string);
+void main() {
+ test('Google favorite: "Îñţérñåţîöñåļîžåţîờñ"', () {
+ String string = decode([
+ 0xc3, 0x8e, 0xc3, 0xb1, 0xc5, 0xa3, 0xc3, 0xa9, 0x72, // 8
+ 0xc3, 0xb1, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xc3,
+ 0xb6, 0xc3, 0xb1, 0xc3, 0xa5, 0xc4, 0xbc, 0xc3, 0xae,
+ 0xc5, 0xbe, 0xc3, 0xa5, 0xc5, 0xa3, 0xc3, 0xae, 0xe1,
+ 0xbb, 0x9d, 0xc3, 0xb1
+ ]);
+ Expect.stringEquals("Îñţérñåţîöñåļîžåţîờñ", string);
+ });
- // Blueberry porridge in Danish: "blåbærgrød".
- string = decode([0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72,
- 0xc3, 0xb8, 0x64]);
- Expect.stringEquals("blåbærgrød", string);
+ test('Blueberry porridge in Danish: "blåbærgrød"', () {
+ var string = decode([
+ 0x62, 0x6c, 0xc3, 0xa5, 0x62, 0xc3, 0xa6, 0x72, 0x67, 0x72, // 8
+ 0xc3, 0xb8, 0x64
+ ]);
+ Expect.stringEquals("blåbærgrød", string);
+ });
- // "சிவா அணாமாைல", that is "Siva Annamalai" in Tamil.
- string = decode([0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0,
- 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0,
- 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf,
- 0x88, 0xe0, 0xae, 0xb2]);
- Expect.stringEquals("சிவா அணாமாைல", string);
+ test('"சிவா அணாமாைல", that is "Siva Annamalai" in Tamil.', () {
+ var string = decode([
+ 0xe0, 0xae, 0x9a, 0xe0, 0xae, 0xbf, 0xe0, 0xae, 0xb5, 0xe0, // 8
+ 0xae, 0xbe, 0x20, 0xe0, 0xae, 0x85, 0xe0, 0xae, 0xa3, 0xe0,
+ 0xae, 0xbe, 0xe0, 0xae, 0xae, 0xe0, 0xae, 0xbe, 0xe0, 0xaf,
+ 0x88, 0xe0, 0xae, 0xb2
+ ]);
+ Expect.stringEquals("சிவா அணாமாைல", string);
+ });
- // "िसवा अणामालै", that is "Siva Annamalai" in Devanagari.
- string = decode([0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0,
- 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0,
- 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4,
- 0xb2, 0xe0, 0xa5, 0x88]);
- Expect.stringEquals("िसवा अणामालै", string);
+ test('"िसवा अणामालै", that is "Siva Annamalai" in Devanagari', () {
+ var string = decode([
+ 0xe0, 0xa4, 0xbf, 0xe0, 0xa4, 0xb8, 0xe0, 0xa4, 0xb5, 0xe0, // 8
+ 0xa4, 0xbe, 0x20, 0xe0, 0xa4, 0x85, 0xe0, 0xa4, 0xa3, 0xe0,
+ 0xa4, 0xbe, 0xe0, 0xa4, 0xae, 0xe0, 0xa4, 0xbe, 0xe0, 0xa4,
+ 0xb2, 0xe0, 0xa5, 0x88
+ ]);
+ Expect.stringEquals("िसवा अणामालै", string);
+ });
- // DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12)
- // UTF-8: F0 90 90 92
- string = decode([0xf0, 0x90, 0x90, 0x92]);
- Expect.equals(string.length, 2);
- Expect.equals("𐐒".length, 2);
- Expect.stringEquals("𐐒", string);
-
+ test('DESERET CAPITAL LETTER BEE, unicode 0x10412(0xD801+0xDC12', () {
+ // UTF-8: F0 90 90 92
+ var string = decode([0xf0, 0x90, 0x90, 0x92]);
+ Expect.equals(string.length, 2);
+ Expect.equals("𐐒".length, 2);
+ Expect.stringEquals("𐐒", string);
+ });
// TODO(ahe): Add tests of bad input.
}
diff --git a/test/utf_test.dart b/test/utf_test.dart
index 86d08e4..65035d7 100644
--- a/test/utf_test.dart
+++ b/test/utf_test.dart
@@ -4,13 +4,17 @@
library utf.utf_test;
-import "package:expect/expect.dart";
+import 'package:test/test.dart';
import "package:utf/utf.dart";
+import "expect.dart";
+
main() {
- String str = new String.fromCharCodes([0x1d537]);
- // String.codeUnits gives 16-bit code units, but stringToCodepoints gives
- // back the original code points.
- Expect.listEquals([0xd835, 0xdd37], str.codeUnits);
- Expect.listEquals([0x1d537], stringToCodepoints(str));
+ test('utf', () {
+ String str = new String.fromCharCodes([0x1d537]);
+ // String.codeUnits gives 16-bit code units, but stringToCodepoints gives
+ // back the original code points.
+ Expect.listEquals([0xd835, 0xdd37], str.codeUnits);
+ Expect.listEquals([0x1d537], stringToCodepoints(str));
+ });
}