Reland "Switching SkText to UTF16"

This is a reland of a3262aa1d25eb31f118ee395a0ecae83fd0e27b6

Original change's description:
> Switching SkText to UTF16
>
> Required some changes in SkUnicode to support it
> SkShaper still works on UTF8
>
> Change-Id: I76645668e1d9bf95eb4539a066deea2b24ecf5e9
> Reviewed-on: https://skia-review.googlesource.com/c/skia/+/406360
> Reviewed-by: Julia Lavrova <jlavrova@google.com>
> Commit-Queue: Julia Lavrova <jlavrova@google.com>

Change-Id: Ib21898d4acda8ab78e5e6fd1c7b9b0da41c3fa83
Reviewed-on: https://skia-review.googlesource.com/c/skia/+/407396
Reviewed-by: Julia Lavrova <jlavrova@google.com>
Commit-Queue: Julia Lavrova <jlavrova@google.com>
diff --git a/experimental/sktext/include/Processor.h b/experimental/sktext/include/Processor.h
index 521f9a7..72b5ad6 100644
--- a/experimental/sktext/include/Processor.h
+++ b/experimental/sktext/include/Processor.h
@@ -89,8 +89,8 @@
 
 public:
 
-    Processor(const SkString& text)
-        : fText(text)
+    Processor(std::u16string text)
+        : fText(std::move(text))
         , fUnicode(nullptr) {}
 
     ~Processor() = default;
@@ -157,10 +157,10 @@
     TextRun& run(const size_t index) { return fRuns[index]; }
 
     // Simplification (using default font manager, default font family and default everything possible)
-    static bool drawText(const char* text, SkCanvas* canvas, SkScalar x, SkScalar y);
-    static bool drawText(const char* text, SkCanvas* canvas, SkScalar width);
-    static bool drawText(const char* text, SkCanvas* canvas, TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle, SkScalar x, SkScalar y);
-    static bool drawText(const char* text, SkCanvas* canvas,
+    static bool drawText(std::u16string text, SkCanvas* canvas, SkScalar x, SkScalar y);
+    static bool drawText(std::u16string text, SkCanvas* canvas, SkScalar width);
+    static bool drawText(std::u16string text, SkCanvas* canvas, TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle, SkScalar x, SkScalar y);
+    static bool drawText(std::u16string text, SkCanvas* canvas,
                          TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle,
                          SkSize reqSize, SkScalar x, SkScalar y);
 
@@ -181,7 +181,7 @@
     friend class Shaper;
     friend class Wrapper;
 
-    SkString fText;
+    std::u16string fText;
     SkTArray<FontBlock, true> fFontBlocks;
     //TextFormatStyle fTextFormatStyle;
     //TextFontStyle fTextFontStyle;
@@ -191,6 +191,7 @@
 
     std::unique_ptr<SkUnicode> fUnicode;
     SkTArray<CodeUnitFlags, true> fCodeUnitProperties;
+    SkTArray<size_t, true> fUTF16FromUTF8;
 };
 
 }  // namespace text
diff --git a/experimental/sktext/samples/Text.cpp b/experimental/sktext/samples/Text.cpp
index af59cd63a..64032c7 100644
--- a/experimental/sktext/samples/Text.cpp
+++ b/experimental/sktext/samples/Text.cpp
@@ -27,7 +27,7 @@
 
     void onDrawContent(SkCanvas* canvas) override {
         canvas->drawColor(SK_ColorWHITE);
-        Processor::drawText("Hello word", canvas, 0, 0);
+        Processor::drawText(u"Hello word", canvas, 0, 0);
     }
 
 private:
@@ -42,7 +42,7 @@
     SkString name() override { return SkString("TextSample_Align_Dir"); }
 
     void drawLine(SkCanvas* canvas, SkScalar w, SkScalar h,
-                  const std::string& text,
+                  const std::u16string& text,
                   TextAlign align,
                   TextDirection direction = TextDirection::kLtr) {
         SkColor background = SK_ColorGRAY;
@@ -54,7 +54,7 @@
         canvas->clipRect(SkRect::MakeWH(w, h));
         canvas->drawColor(SK_ColorWHITE);
 
-        Processor::drawText(direction == TextDirection::kRtl ? mirror(text).c_str() : normal(text).c_str(),
+        Processor::drawText(direction == TextDirection::kRtl ? mirror(text) : normal(text),
                             canvas,
                             TextFormatStyle(align, direction),
                             SK_ColorBLACK, SK_ColorLTGRAY,
@@ -62,27 +62,27 @@
                             0, 0);
     }
 
-    SkString mirror(const std::string& text) {
+    std::u16string mirror(const std::u16string& text) {
         std::u16string result;
         result += u"\u202E";
-        //for (auto i = text.size(); i > 0; --i) {
-        //  result += text[i - 1];
-        //}
-        for (auto ch : text) {
-            result += ch;
+        for (auto i = text.size(); i > 0; --i) {
+            result += text[i - 1];
         }
+        //for (auto ch : text) {
+        //    result += ch;
+        //}
         result += u"\u202C";
-        return fUnicode->convertUtf16ToUtf8(result);
+        return result;
     }
 
-    SkString normal(const std::string& text) {
+    std::u16string normal(const std::u16string& text) {
         std::u16string result;
-        result += u"\u202D";
+        //result += u"\u202D";
         for (auto ch : text) {
             result += ch;
         }
-        result += u"\u202C";
-        return fUnicode->convertUtf16ToUtf8(result);
+        //result += u"\u202C";
+        return result;
     }
 
     void onDrawContent(SkCanvas* canvas) override {
@@ -91,7 +91,7 @@
         SkScalar width = this->width() / 4;
         SkScalar height = this->height() / 2;
 
-        const std::string line = "One line of text";
+        const std::u16string line = u"One line of text";
 
         drawLine(canvas, width, height, line, TextAlign::kLeft, TextDirection::kLtr);
         canvas->translate(width, 0);
@@ -110,6 +110,7 @@
         canvas->translate(width, 0);
         drawLine(canvas, width, height, line, TextAlign::kJustify, TextDirection::kRtl);
         canvas->translate(width, 0);
+
     }
 
 private:
@@ -123,7 +124,7 @@
 
     void onDrawContent(SkCanvas* canvas) override {
         canvas->drawColor(SK_ColorWHITE);
-        Processor::drawText("A very_very_very_very_very_very_very_very_very_very "
+        Processor::drawText(u"A very_very_very_very_very_very_very_very_very_very "
                 "very_very_very_very_very_very_very_very_very_very very very very very very very "
                 "very very very very very very very very very very very very very very very very "
                 "very very very very very very very very very very very very very long text", canvas, this->width());
@@ -154,7 +155,7 @@
 
     void onDrawContent(SkCanvas* canvas) override {
         canvas->drawColor(SK_ColorWHITE);
-        Processor::drawText("LONG MIRRORED TEXT SHOULD SHOW RIGHT TO LEFT (AS NORMAL)", canvas, 0, 0);
+        Processor::drawText(u"LONG MIRRORED TEXT SHOULD SHOW RIGHT TO LEFT (AS NORMAL)", canvas, 0, 0);
     }
 
 private:
diff --git a/experimental/sktext/src/Processor.cpp b/experimental/sktext/src/Processor.cpp
index 676a612..fcca22d 100644
--- a/experimental/sktext/src/Processor.cpp
+++ b/experimental/sktext/src/Processor.cpp
@@ -69,28 +69,28 @@
 }
 
 // All at once
-bool Processor::drawText(const char* text, SkCanvas* canvas, SkScalar x, SkScalar y) {
+bool Processor::drawText(std::u16string text, SkCanvas* canvas, SkScalar x, SkScalar y) {
 
-    return drawText(text, canvas, TextFormatStyle(TextAlign::kLeft, TextDirection::kLtr), SK_ColorBLACK, SK_ColorWHITE, SkString("Roboto"), 14, SkFontStyle::Normal(), x, y);
+    return drawText(std::move(text), canvas, TextFormatStyle(TextAlign::kLeft, TextDirection::kLtr), SK_ColorBLACK, SK_ColorWHITE, SkString("Roboto"), 14, SkFontStyle::Normal(), x, y);
 }
 
-bool Processor::drawText(const char* text, SkCanvas* canvas, SkScalar width) {
-    return drawText(text, canvas,
+bool Processor::drawText(std::u16string text, SkCanvas* canvas, SkScalar width) {
+    return drawText(std::move(text), canvas,
                     TextFormatStyle(TextAlign::kLeft, TextDirection::kLtr), SK_ColorBLACK, SK_ColorWHITE, SkString("Roboto"), 14, SkFontStyle::Normal(),
                     SkSize::Make(width, SK_ScalarInfinity), 0, 0);
 }
 
-bool Processor::drawText(const char* text, SkCanvas* canvas, TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle, SkScalar x, SkScalar y) {
-    return drawText(text, canvas, textFormat, foreground, background, fontFamily, fontSize, fontStyle, SkSize::Make(SK_ScalarInfinity, SK_ScalarInfinity), x, y);
+bool Processor::drawText(std::u16string text, SkCanvas* canvas, TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle, SkScalar x, SkScalar y) {
+    return drawText(std::move(text), canvas, textFormat, foreground, background, fontFamily, fontSize, fontStyle, SkSize::Make(SK_ScalarInfinity, SK_ScalarInfinity), x, y);
 }
 
-bool Processor::drawText(const char* text, SkCanvas* canvas,
+bool Processor::drawText(std::u16string text, SkCanvas* canvas,
                          TextFormatStyle textFormat, SkColor foreground, SkColor background, const SkString& fontFamily, SkScalar fontSize, SkFontStyle fontStyle,
                          SkSize reqSize, SkScalar x, SkScalar y) {
 
-    SkString str(text);
-    TextRange textRange(0, str.size());
-    Processor processor(str);
+    TextRange textRange(0, text.size());
+    Processor processor(std::move(text));
+
     if (!processor.computeCodeUnitProperties()) {
         return false;
     }
@@ -163,6 +163,19 @@
         return false;
     }
 
+    // Create utf8 -> utf16 conversion table
+    auto text8 = fUnicode->convertUtf16ToUtf8(fText);
+    size_t utf16Index = 0;
+    fUTF16FromUTF8.push_back_n(text8.size() + 1, utf16Index);
+    fUnicode->forEachCodepoint(text8.c_str(), text8.size(),
+        [this, &utf16Index](SkUnichar unichar, int32_t start, int32_t end) {
+            for (auto i = start; i < end; ++i) {
+                fUTF16FromUTF8[i] = utf16Index;
+            }
+            ++utf16Index;
+       });
+    fUTF16FromUTF8[text8.size()] = utf16Index;
+
     // Get white spaces
     fUnicode->forEachCodepoint(fText.c_str(), fText.size(),
        [this](SkUnichar unichar, int32_t start, int32_t end) {
@@ -174,24 +187,18 @@
        });
 
     // Get line breaks
-    std::vector<SkUnicode::LineBreakBefore> lineBreaks;
-    if (!fUnicode->getLineBreaks(fText.c_str(), fText.size(), &lineBreaks)) {
-        return false;
-    }
-    for (auto& lineBreak : lineBreaks) {
-        fCodeUnitProperties[lineBreak.pos] |= lineBreak.breakType == SkUnicode::LineBreakType::kHardLineBreak
-                                           ? CodeUnitFlags::kHardLineBreakBefore
-                                           : CodeUnitFlags::kSoftLineBreakBefore;
-    }
+    fUnicode->forEachBreak(fText.c_str(), fText.size(), SkUnicode::BreakType::kLines,
+                           [&](SkBreakIterator::Position pos, SkBreakIterator::Status status){
+                                fCodeUnitProperties[pos] |= (status == (SkBreakIterator::Status)SkUnicode::LineBreakType::kHardLineBreak
+                                                               ? CodeUnitFlags::kHardLineBreakBefore
+                                                               : CodeUnitFlags::kSoftLineBreakBefore);
+                            });
 
     // Get graphemes
-    std::vector<SkUnicode::Position> graphemes;
-    if (!fUnicode->getGraphemes(fText.c_str(), fText.size(), &graphemes)) {
-        return false;
-    }
-    for (auto pos : graphemes) {
-        fCodeUnitProperties[pos] |= CodeUnitFlags::kGraphemeStart;
-    }
+    fUnicode->forEachBreak(fText.c_str(), fText.size(), SkUnicode::BreakType::kGraphemes,
+                           [&](SkBreakIterator::Position pos, SkBreakIterator::Status){
+                                fCodeUnitProperties[pos]|= CodeUnitFlags::kGraphemeStart;
+                            });
 
     return true;
 }
diff --git a/experimental/sktext/src/Shaper.cpp b/experimental/sktext/src/Shaper.cpp
index 2efb47d..82b6728 100644
--- a/experimental/sktext/src/Shaper.cpp
+++ b/experimental/sktext/src/Shaper.cpp
@@ -6,29 +6,30 @@
 namespace skia {
 namespace text {
 
+// TODO: SkShaper operates in UTF8 indexes
 // TODO: calculate intrinsic sizes
 // Shape the text in one line
 bool Shaper::process() {
 
-    auto text(fProcessor->fText);
+    SkString text8 = fProcessor->fUnicode->convertUtf16ToUtf8(fProcessor->fText);
     for (auto& block : fProcessor->fFontBlocks) {
 
         SkFont font(this->createFont(block));
 
-        SkShaper::TrivialFontRunIterator fontIter(font, text.size());
-        SkShaper::TrivialLanguageRunIterator langIter(text.c_str(), text.size());
+        SkShaper::TrivialFontRunIterator fontIter(font, text8.size());
+        SkShaper::TrivialLanguageRunIterator langIter(text8.c_str(), text8.size());
         std::unique_ptr<SkShaper::BiDiRunIterator> bidiIter(
             SkShaper::MakeSkUnicodeBidiRunIterator(
-                fProcessor->fUnicode.get(), text.c_str(), text.size(), fDefaultTextDirection == TextDirection::kLtr ? 0 : 1));
+                fProcessor->fUnicode.get(), text8.c_str(), text8.size(), fDefaultTextDirection == TextDirection::kLtr ? 0 : 1));
         std::unique_ptr<SkShaper::ScriptRunIterator> scriptIter(
-            SkShaper::MakeSkUnicodeHbScriptRunIterator(fProcessor->fUnicode.get(), text.c_str(), text.size()));
+            SkShaper::MakeSkUnicodeHbScriptRunIterator(fProcessor->fUnicode.get(), text8.c_str(), text8.size()));
         auto shaper = SkShaper::MakeShapeDontWrapOrReorder();
         if (shaper == nullptr) {
             // For instance, loadICU does not work. We have to stop the process
             return false;
         }
 
-        shaper->shape(text.c_str(), text.size(),
+        shaper->shape(text8.c_str(), text8.size(),
                 fontIter, *bidiIter, *scriptIter, langIter,
                 std::numeric_limits<SkScalar>::max(), this);
     }
@@ -38,6 +39,12 @@
 
 void Shaper::commitRunBuffer(const RunInfo&) {
     fCurrentRun->commit();
+
+    // Convert all utf8 into utf16
+    for (size_t i = 0; i < fCurrentRun->fClusters.size(); ++i) {
+        auto& element = fCurrentRun->fClusters[i];
+        element = fProcessor->fUTF16FromUTF8[element];
+    }
     fProcessor->fRuns.emplace_back(std::move(*fCurrentRun));
 }
 
diff --git a/experimental/sktext/src/TextRun.cpp b/experimental/sktext/src/TextRun.cpp
index 8e2e450..3a53b68 100644
--- a/experimental/sktext/src/TextRun.cpp
+++ b/experimental/sktext/src/TextRun.cpp
@@ -8,19 +8,14 @@
 class Processor;
 
 TextRun::TextRun(const SkShaper::RunHandler::RunInfo& info)
-    : fFont(info.fFont) {
-  fBidiLevel = info.fBidiLevel;
-  fAdvance = info.fAdvance;
-  fUtf8Range = info.utf8Range;
-  fGlyphs.push_back_n(info.glyphCount);
-  fBounds.push_back_n(info.glyphCount);
-  fPositions.push_back_n(info.glyphCount + 1);
-  fClusters.push_back_n(info.glyphCount + 1);
-
-  // To make edge cases easier:
-  fPositions[info.glyphCount] = fAdvance;
-  fClusters[info.glyphCount] =
-      leftToRight() ? info.utf8Range.end() : info.utf8Range.begin();
+    : fFont(info.fFont)
+    , fBidiLevel(info.fBidiLevel)
+    , fAdvance(info.fAdvance)
+    , fUtf8Range(info.utf8Range) {
+    fGlyphs.push_back_n(info.glyphCount);
+    fBounds.push_back_n(info.glyphCount);
+    fPositions.push_back_n(info.glyphCount + 1);
+    fClusters.push_back_n(info.glyphCount + 1);
 }
 
 void TextRun::commit() {
diff --git a/experimental/sktext/src/TextRun.h b/experimental/sktext/src/TextRun.h
index 6bc0e12..c7080ca 100644
--- a/experimental/sktext/src/TextRun.h
+++ b/experimental/sktext/src/TextRun.h
@@ -26,6 +26,7 @@
  private:
   friend class Wrapper;
   friend class Processor;
+  friend class Shaper;
 
   SkFont fFont;
 
diff --git a/modules/skshaper/src/SkUnicode.h b/modules/skshaper/src/SkUnicode.h
index 87134b2..02ab5c9 100644
--- a/modules/skshaper/src/SkUnicode.h
+++ b/modules/skshaper/src/SkUnicode.h
@@ -66,6 +66,7 @@
     virtual Status status() = 0;
     virtual bool isDone() = 0;
     virtual bool setText(const char utftext8[], int utf8Units) = 0;
+    virtual bool setText(const char16_t utftext16[], int utf16Units) = 0;
 };
 
 class SKUNICODE_API SkScriptIterator {
@@ -93,8 +94,8 @@
             BidiLevel level;
         };
         enum class LineBreakType {
-            kSoftLineBreak,
-            kHardLineBreak
+            kSoftLineBreak = 0,
+            kHardLineBreak = 100,
         };
 
         enum class BreakType {
@@ -116,13 +117,14 @@
         virtual bool isSpace(SkUnichar utf8) = 0;
         virtual SkString convertUtf16ToUtf8(const std::u16string& utf16) = 0;
 
-        // Methods used in SkShaper
+        // Methods used in SkShaper and SkText
         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
             (const uint16_t text[], int count, SkBidiIterator::Direction) = 0;
         virtual std::unique_ptr<SkBidiIterator> makeBidiIterator
             (const char text[], int count, SkBidiIterator::Direction) = 0;
         virtual std::unique_ptr<SkBreakIterator> makeBreakIterator
             (const char locale[], BreakType breakType) = 0;
+        virtual std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType type) = 0;
         virtual std::unique_ptr<SkScriptIterator> makeScriptIterator() = 0;
 
         // High level methods (that we actually use somewhere=SkParagraph)
@@ -148,6 +150,55 @@
             }
         }
 
+        template <typename Callback>
+        void forEachCodepoint(const char16_t* utf16, int32_t utf16Units, Callback&& callback) {
+            const char16_t* current = utf16;
+            const char16_t* end = utf16 + utf16Units;
+            while (current < end) {
+                auto before = current - utf16;
+                SkUnichar unichar = SkUTF::NextUTF16((const uint16_t**)&current, (const uint16_t*)end);
+                auto after = current - utf16;
+                callback(unichar, before, after);
+            }
+        }
+
+        template <typename Callback>
+        void forEachBidiRegion(const uint16_t utf16[], int utf16Units, SkBidiIterator::Direction dir, Callback&& callback) {
+            auto iter = makeBidiIterator(utf16, utf16Units, dir);
+            const uint16_t* start16 = utf16;
+            const uint16_t* end16 = utf16 + utf16Units;
+            SkBidiIterator::Level currentLevel = 0;
+
+            SkBidiIterator::Position pos16 = 0;
+            while (pos16 <= iter->getLength()) {
+                auto level = iter->getLevelAt(pos16);
+                if (pos16 == 0) {
+                    currentLevel = level;
+                } else if (level != currentLevel) {
+                    callback(pos16, start16 - utf16, currentLevel);
+                    currentLevel = level;
+                }
+                if (start16 == end16) {
+                    break;
+                }
+                SkUnichar u = SkUTF::NextUTF16(&start16, end16);
+                pos16 += SkUTF::ToUTF16(u);
+            }
+        }
+
+        template <typename Callback>
+        void forEachBreak(const char16_t utf16[], int utf16Units, SkUnicode::BreakType type, Callback&& callback) {
+            auto iter = makeBreakIterator(type);
+            iter->setText(utf16, utf16Units);
+            while (true) {
+                auto pos = iter->next();
+                if (iter->isDone()) {
+                    break;
+                }
+                callback(pos, iter->status());
+            }
+        }
+
         virtual void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) = 0;
 
         static std::unique_ptr<SkUnicode> Make();
diff --git a/modules/skshaper/src/SkUnicode_icu.cpp b/modules/skshaper/src/SkUnicode_icu.cpp
index 4a3a2e5..93d4fd7 100644
--- a/modules/skshaper/src/SkUnicode_icu.cpp
+++ b/modules/skshaper/src/SkUnicode_icu.cpp
@@ -184,6 +184,23 @@
         fLastResult = 0;
         return true;
     }
+    bool setText(const char16_t utftext16[], int utf16Units) override {
+        UErrorCode status = U_ZERO_ERROR;
+        ICUUText text(utext_openUChars(nullptr, reinterpret_cast<const UChar*>(&utftext16[0]), utf16Units, &status));
+
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        SkASSERT(text);
+        ubrk_setUText(fBreakIterator.get(), text.get(), &status);
+        if (U_FAILURE(status)) {
+            SkDEBUGF("Break error: %s", u_errorName(status));
+            return false;
+        }
+        fLastResult = 0;
+        return true;
+    }
 };
 
 class SkIcuBreakIteratorCache {
@@ -427,6 +444,9 @@
         }
         return std::unique_ptr<SkBreakIterator>(new SkBreakIterator_icu(std::move(iterator)));
     }
+    std::unique_ptr<SkBreakIterator> makeBreakIterator(BreakType breakType) override {
+        return makeBreakIterator(uloc_getDefault(), breakType);
+    }
     std::unique_ptr<SkScriptIterator> makeScriptIterator() override {
         return SkScriptIterator_icu::makeScriptIterator();
     }