Merge pull request #1421 from gennadiycivil/master upstream cl 182543808

commit: 46ab9ecf1f861493e49ade39ef2f794ce34dc0b4 [log] [tgz]
author: Gennadiy Civil <gennadiycivil@users.noreply.github.com> Wed Jan 24 21:31:29 2018
committer: GitHub <noreply@github.com> Wed Jan 24 21:31:29 2018
tree: 6dbd5eed1921e3ee8fb549e0514852d70768482a
parent: 21cf8360036df596467c100561e16563d551191e [diff]
parent: b9651c04ef7bfb1fc87fb17c5d8d960d1437cc56 [diff]
diff --git a/googletest/docs/AdvancedGuide.md b/googletest/docs/AdvancedGuide.md
index ccb087c..e1df20d 100644
--- a/googletest/docs/AdvancedGuide.md
+++ b/googletest/docs/AdvancedGuide.md

@@ -1951,6 +1951,17 @@
 _Availability:_ Linux, Windows, Mac.  (In Google Test 1.3.0 and lower,
 the default behavior is that the elapsed time is **not** printed.)
 
+**Availability**: Linux, Windows, Mac.
+
+#### Suppressing UTF-8 Text Output
+
+In case of assertion failures, gUnit prints expected and actual values of type
+`string` both as hex-encoded strings as well as in readable UTF-8 text if they
+contain valid non-ASCII UTF-8 characters. If you want to suppress the UTF-8 text
+because, for example, you don't have an UTF-8 compatible output medium, run the
+test program with `--gunit_print_utf8=0` or set the `GUNIT_PRINT_UTF8`
+environment variable to `0`.
+
 ### Generating an XML Report ###
 
 Google Test can emit a detailed XML report to a file in addition to its normal

diff --git a/googletest/include/gtest/gtest.h b/googletest/include/gtest/gtest.h
index 93b755f..01994e6 100644
--- a/googletest/include/gtest/gtest.h
+++ b/googletest/include/gtest/gtest.h

@@ -115,6 +115,9 @@
 // test.
 GTEST_DECLARE_bool_(print_time);
 
+// This flags control whether Google Test prints UTF8 characters as text.
+GTEST_DECLARE_bool_(print_utf8);
+
 // This flag specifies the random number seed.
 GTEST_DECLARE_int32_(random_seed);
 

diff --git a/googletest/src/gtest-internal-inl.h b/googletest/src/gtest-internal-inl.h
index 5ec0af9..099761a 100644
--- a/googletest/src/gtest-internal-inl.h
+++ b/googletest/src/gtest-internal-inl.h

@@ -86,6 +86,7 @@
 const char kListTestsFlag[] = "list_tests";
 const char kOutputFlag[] = "output";
 const char kPrintTimeFlag[] = "print_time";
+const char kPrintUTF8Flag[] = "print_utf8";
 const char kRandomSeedFlag[] = "random_seed";
 const char kRepeatFlag[] = "repeat";
 const char kShuffleFlag[] = "shuffle";
@@ -166,6 +167,7 @@
     list_tests_ = GTEST_FLAG(list_tests);
     output_ = GTEST_FLAG(output);
     print_time_ = GTEST_FLAG(print_time);
+    print_utf8_ = GTEST_FLAG(print_utf8);
     random_seed_ = GTEST_FLAG(random_seed);
     repeat_ = GTEST_FLAG(repeat);
     shuffle_ = GTEST_FLAG(shuffle);
@@ -187,6 +189,7 @@
     GTEST_FLAG(list_tests) = list_tests_;
     GTEST_FLAG(output) = output_;
     GTEST_FLAG(print_time) = print_time_;
+    GTEST_FLAG(print_utf8) = print_utf8_;
     GTEST_FLAG(random_seed) = random_seed_;
     GTEST_FLAG(repeat) = repeat_;
     GTEST_FLAG(shuffle) = shuffle_;
@@ -208,6 +211,7 @@
   bool list_tests_;
   std::string output_;
   bool print_time_;
+  bool print_utf8_;
   internal::Int32 random_seed_;
   internal::Int32 repeat_;
   bool shuffle_;

diff --git a/googletest/src/gtest-printers.cc b/googletest/src/gtest-printers.cc
index dd67f64..1bdf243 100644
--- a/googletest/src/gtest-printers.cc
+++ b/googletest/src/gtest-printers.cc

@@ -43,12 +43,13 @@
 // defines Foo.
 
 #include "gtest/gtest-printers.h"
-#include <ctype.h>
 #include <stdio.h>
+#include <cctype>
 #include <cwchar>
 #include <ostream>  // NOLINT
 #include <string>
 #include "gtest/internal/gtest-port.h"
+#include "src/gtest-internal-inl.h"
 
 namespace testing {
 
@@ -262,11 +263,12 @@
 GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_
 GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_
-static void PrintCharsAsStringTo(
+static CharFormat PrintCharsAsStringTo(
     const CharType* begin, size_t len, ostream* os) {
   const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\"";
   *os << kQuoteBegin;
   bool is_previous_hex = false;
+  CharFormat print_format = kAsIs;
   for (size_t index = 0; index < len; ++index) {
     const CharType cur = begin[index];
     if (is_previous_hex && IsXDigit(cur)) {
@@ -276,8 +278,13 @@
       *os << "\" " << kQuoteBegin;
     }
     is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape;
+    // Remember if any characters required hex escaping.
+    if (is_previous_hex) {
+      print_format = kHexEscape;
+    }
   }
   *os << "\"";
+  return print_format;
 }
 
 // Prints a (const) char/wchar_t array of 'len' elements, starting at address
@@ -347,15 +354,88 @@
 }
 #endif  // wchar_t is native
 
+namespace {
+
+bool ContainsUnprintableControlCodes(const char* str, size_t length) {
+  for (size_t i = 0; i < length; i++) {
+    char ch = *str++;
+    if (std::iscntrl(ch)) {
+        switch (ch) {
+        case '\t':
+        case '\n':
+        case '\r':
+          break;
+        default:
+          return true;
+        }
+      }
+  }
+  return false;
+}
+
+bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; }
+
+bool IsValidUTF8(const char* str, size_t length) {
+  const unsigned char *s = reinterpret_cast<const unsigned char *>(str);
+
+  for (size_t i = 0; i < length;) {
+    unsigned char lead = s[i++];
+
+    if (lead <= 0x7f) {
+      continue;  // single-byte character (ASCII) 0..7F
+    }
+    if (lead < 0xc2) {
+      return false;  // trail byte or non-shortest form
+    } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) {
+      ++i;  // 2-byte character
+    } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length &&
+               IsUTF8TrailByte(s[i]) &&
+               IsUTF8TrailByte(s[i + 1]) &&
+               // check for non-shortest form and surrogate
+               (lead != 0xe0 || s[i] >= 0xa0) &&
+               (lead != 0xed || s[i] < 0xa0)) {
+      i += 2;  // 3-byte character
+    } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length &&
+               IsUTF8TrailByte(s[i]) &&
+               IsUTF8TrailByte(s[i + 1]) &&
+               IsUTF8TrailByte(s[i + 2]) &&
+               // check for non-shortest form
+               (lead != 0xf0 || s[i] >= 0x90) &&
+               (lead != 0xf4 || s[i] < 0x90)) {
+      i += 3;  // 4-byte character
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+void ConditionalPrintAsText(const char* str, size_t length, ostream* os) {
+  if (!ContainsUnprintableControlCodes(str, length) &&
+      IsValidUTF8(str, length)) {
+    *os << "\n    As Text: \"" << str << "\"";
+  }
+}
+
+}  // anonymous namespace
+
 // Prints a ::string object.
 #if GTEST_HAS_GLOBAL_STRING
 void PrintStringTo(const ::string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
+  if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
+    if (GTEST_FLAG(print_utf8)) {
+      ConditionalPrintAsText(s.data(), s.size(), os);
+    }
+  }
 }
 #endif  // GTEST_HAS_GLOBAL_STRING
 
 void PrintStringTo(const ::std::string& s, ostream* os) {
-  PrintCharsAsStringTo(s.data(), s.size(), os);
+  if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) {
+    if (GTEST_FLAG(print_utf8)) {
+      ConditionalPrintAsText(s.data(), s.size(), os);
+    }
+  }
 }
 
 // Prints a ::wstring object.

diff --git a/googletest/src/gtest.cc b/googletest/src/gtest.cc
index 3435f9c..2c25f83 100644
--- a/googletest/src/gtest.cc
+++ b/googletest/src/gtest.cc

@@ -246,6 +246,12 @@
     "True iff " GTEST_NAME_
     " should display elapsed time in text output.");
 
+GTEST_DEFINE_bool_(
+    print_utf8,
+    internal::BoolFromGTestEnv("print_utf8", true),
+    "True iff " GTEST_NAME_
+    " prints UTF8 characters as text.");
+
 GTEST_DEFINE_int32_(
     random_seed,
     internal::Int32FromGTestEnv("random_seed", 0),
@@ -5230,6 +5236,7 @@
       ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
       ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
       ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
+      ParseBoolFlag(arg, kPrintUTF8Flag, &GTEST_FLAG(print_utf8)) ||
       ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
       ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
       ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||

diff --git a/googletest/test/gtest-printers_test.cc b/googletest/test/gtest-printers_test.cc
index aff97a2..e30ce7e 100644
--- a/googletest/test/gtest-printers_test.cc
+++ b/googletest/test/gtest-printers_test.cc

@@ -1552,6 +1552,78 @@
   EXPECT_PRINT_TO_STRING_(mutable_str_with_nul, "\"hello\\0 world\"");
 }
 
+  TEST(PrintToStringTest, ContainsNonLatin) {
+  // Sanity test with valid UTF-8. Prints both in hex and as text.
+  std::string non_ascii_str = ::std::string("오전 4:30");
+  EXPECT_PRINT_TO_STRING_(non_ascii_str,
+                          "\"\\xEC\\x98\\xA4\\xEC\\xA0\\x84 4:30\"\n"
+                          "    As Text: \"오전 4:30\"");
+  non_ascii_str = ::std::string("From ä — ẑ");
+  EXPECT_PRINT_TO_STRING_(non_ascii_str,
+                          "\"From \\xC3\\xA4 \\xE2\\x80\\x94 \\xE1\\xBA\\x91\""
+                          "\n    As Text: \"From ä — ẑ\"");
+}
+
+TEST(IsValidUTF8Test, IllFormedUTF8) {
+  // The following test strings are ill-formed UTF-8 and are printed
+  // as hex only (or ASCII, in case of ASCII bytes) because IsValidUTF8() is
+  // expected to fail, thus output does not contain "As Text:".
+
+  static const char *const kTestdata[][2] = {
+    // 2-byte lead byte followed by a single-byte character.
+    {"\xC3\x74", "\"\\xC3t\""},
+    // Valid 2-byte character followed by an orphan trail byte.
+    {"\xC3\x84\xA4", "\"\\xC3\\x84\\xA4\""},
+    // Lead byte without trail byte.
+    {"abc\xC3", "\"abc\\xC3\""},
+    // 3-byte lead byte, single-byte character, orphan trail byte.
+    {"x\xE2\x70\x94", "\"x\\xE2p\\x94\""},
+    // Truncated 3-byte character.
+    {"\xE2\x80", "\"\\xE2\\x80\""},
+    // Truncated 3-byte character followed by valid 2-byte char.
+    {"\xE2\x80\xC3\x84", "\"\\xE2\\x80\\xC3\\x84\""},
+    // Truncated 3-byte character followed by a single-byte character.
+    {"\xE2\x80\x7A", "\"\\xE2\\x80z\""},
+    // 3-byte lead byte followed by valid 3-byte character.
+    {"\xE2\xE2\x80\x94", "\"\\xE2\\xE2\\x80\\x94\""},
+    // 4-byte lead byte followed by valid 3-byte character.
+    {"\xF0\xE2\x80\x94", "\"\\xF0\\xE2\\x80\\x94\""},
+    // Truncated 4-byte character.
+    {"\xF0\xE2\x80", "\"\\xF0\\xE2\\x80\""},
+     // Invalid UTF-8 byte sequences embedded in other chars.
+    {"abc\xE2\x80\x94\xC3\x74xyc", "\"abc\\xE2\\x80\\x94\\xC3txyc\""},
+    {"abc\xC3\x84\xE2\x80\xC3\x84xyz",
+     "\"abc\\xC3\\x84\\xE2\\x80\\xC3\\x84xyz\""},
+    // Non-shortest UTF-8 byte sequences are also ill-formed.
+    // The classics: xC0, xC1 lead byte.
+    {"\xC0\x80", "\"\\xC0\\x80\""},
+    {"\xC1\x81", "\"\\xC1\\x81\""},
+    // Non-shortest sequences.
+    {"\xE0\x80\x80", "\"\\xE0\\x80\\x80\""},
+    {"\xf0\x80\x80\x80", "\"\\xF0\\x80\\x80\\x80\""},
+    // Last valid code point before surrogate range, should be printed as text,
+    // too.
+    {"\xED\x9F\xBF", "\"\\xED\\x9F\\xBF\"\n    As Text: \"퟿\""},
+    // Start of surrogate lead. Surrogates are not printed as text.
+    {"\xED\xA0\x80", "\"\\xED\\xA0\\x80\""},
+    // Last non-private surrogate lead.
+    {"\xED\xAD\xBF", "\"\\xED\\xAD\\xBF\""},
+    // First private-use surrogate lead.
+    {"\xED\xAE\x80", "\"\\xED\\xAE\\x80\""},
+    // Last private-use surrogate lead.
+    {"\xED\xAF\xBF", "\"\\xED\\xAF\\xBF\""},
+    // Mid-point of surrogate trail.
+    {"\xED\xB3\xBF", "\"\\xED\\xB3\\xBF\""},
+    // First valid code point after surrogate range, should be printed as text,
+    // too.
+    {"\xEE\x80\x80", "\"\\xEE\\x80\\x80\"\n    As Text: \"\""}
+  };
+
+  for (int i = 0; i < int(sizeof(kTestdata)/sizeof(kTestdata[0])); ++i) {
+    EXPECT_PRINT_TO_STRING_(kTestdata[i][0], kTestdata[i][1]);
+  }
+}
+
 #undef EXPECT_PRINT_TO_STRING_
 
 TEST(UniversalTersePrintTest, WorksForNonReference) {
commit	46ab9ecf1f861493e49ade39ef2f794ce34dc0b4	[log] [tgz]
author	Gennadiy Civil <gennadiycivil@users.noreply.github.com>	Wed Jan 24 21:31:29 2018
committer	GitHub <noreply@github.com>	Wed Jan 24 21:31:29 2018
tree	6dbd5eed1921e3ee8fb549e0514852d70768482a
parent	21cf8360036df596467c100561e16563d551191e [diff]
parent	b9651c04ef7bfb1fc87fb17c5d8d960d1437cc56 [diff]