diff --git a/DEPS b/DEPS index 8e4e7e0..1deff7a 100644 --- a/DEPS +++ b/DEPS
@@ -300,19 +300,19 @@ # Three lines of non-changing comments so that # the commit queue can handle CLs rolling V8 # and whatever else without interference from each other. - 'src_internal_revision': '89f173641a8b5f919e6729a333c7609cca1197a3', + 'src_internal_revision': '5ad244fa14b6d5e3a5d0cfb44aa0243df579896a', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling Skia # and whatever else without interference from each other. - 'skia_revision': '407f4bb9729566f1081cfcd4f7676d72c6fb6468', + 'skia_revision': '7c9af01962e6fe030b4e617970fc96d293243dea', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling V8 # and whatever else without interference from each other. - 'v8_revision': 'b6ff5b9ce80de8cf26877ffd79165c95a868420f', + 'v8_revision': '22cf2ce70b1350809748b5c40c155dc743c5d97d', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling ANGLE # and whatever else without interference from each other. - 'angle_revision': '43ef50f389e97131dc487081d6c8c02373e4a22c', + 'angle_revision': '4384e49ca9e7d7c8e14f4d7272788352da45a035', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling SwiftShader # and whatever else without interference from each other. @@ -375,7 +375,7 @@ # Three lines of non-changing comments so that # the commit queue can handle CLs rolling catapult # and whatever else without interference from each other. - 'catapult_revision': 'f51324c1c818a663f7d9682f8784c3d3f5a92fe0', + 'catapult_revision': '137943091e19bbf31e5d1b869a479154f1d63c64', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling CrossBench # and whatever else without interference from each other. @@ -455,7 +455,7 @@ # Three lines of non-changing comments so that # the commit queue can handle CLs rolling nearby # and whatever else without interference from each other. - 'nearby_revision': '3a49499de49ca8c3fc5c26a8cee50b0d9cd69e63', + 'nearby_revision': '240159e8722c63f5be342ac8a6c07771478f8bb3', # Three lines of non-changing comments so that # the commit queue can handle CLs rolling securemessage # and whatever else without interference from each other. @@ -807,7 +807,7 @@ 'src/clank': { 'url': Var('chrome_git') + '/clank/internal/apps.git' + '@' + - '95ab4f348c008a69d5ff8bf551892a177d507bb5', + '37fa3ad981bc8d801fff34db986522561cbd97d1', 'condition': 'checkout_android and checkout_src_internal', }, @@ -1208,7 +1208,7 @@ # Tools used when building Chrome for Chrome OS. This affects both the Simple # Chrome workflow, as well as the chromeos-chrome ebuild. 'src/third_party/chromite': { - 'url': Var('chromium_git') + '/chromiumos/chromite.git' + '@' + 'bfcd32f6018702a3bd8b99c6c4e58906de64155c', + 'url': Var('chromium_git') + '/chromiumos/chromite.git' + '@' + '547201a5e52de27e0842e79a5f71e162814fb474', 'condition': 'checkout_chromeos', }, @@ -1243,13 +1243,13 @@ }, 'src/third_party/depot_tools': - Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '422f4d59a426aaf889dbe8d7534ce3da39db4c49', + Var('chromium_git') + '/chromium/tools/depot_tools.git' + '@' + '14123b76a867be96ac0464459e9ee76bedb7301c', 'src/third_party/devtools-frontend/src': Var('chromium_git') + '/devtools/devtools-frontend' + '@' + Var('devtools_frontend_revision'), 'src/third_party/devtools-frontend-internal': { - 'url': Var('chrome_git') + '/devtools/devtools-internal.git' + '@' + '548ae6e540ef842c9d519bd0381ec5cf243ba525', + 'url': Var('chrome_git') + '/devtools/devtools-internal.git' + '@' + '025292f1e8c00c81bde557b536635fac5539424a', 'condition': 'checkout_src_internal', }, @@ -1257,7 +1257,7 @@ Var('chromium_git') + '/chromium/dom-distiller/dist.git' + '@' + '199de96b345ada7c6e7e6ba3d2fa7a6911b8767d', 'src/third_party/eigen3/src': - Var('chromium_git') + '/external/gitlab.com/libeigen/eigen.git' + '@' + '316eab8deb574d150f9cfc7f8b170156dc0cdd9f', + Var('chromium_git') + '/external/gitlab.com/libeigen/eigen.git' + '@' + '7d7576f3262fa15c34d5575637bd8d7ff4a83f16', 'src/third_party/emoji-metadata/src': { 'url': Var('chromium_git') + '/external/github.com/googlefonts/emoji-metadata' + '@' + '045f146fca682a836e01cd265171312bfb300e06', @@ -1580,7 +1580,7 @@ Var('chromium_git') + '/webm/libwebp.git' + '@' + 'fd7b5d48464475408d32d2611bdb6947d4246b97', 'src/third_party/libyuv': - Var('chromium_git') + '/libyuv/libyuv.git' + '@' + '3df47620346483f07a37a0ecb594ad2f6ec0a1d7', + Var('chromium_git') + '/libyuv/libyuv.git' + '@' + '29bcf021c68e5478e1cd0c1099122dbb10eb474e', 'src/third_party/lighttpd': { 'url': Var('chromium_git') + '/chromium/deps/lighttpd.git' + '@' + Var('lighttpd_revision'), @@ -1705,7 +1705,7 @@ Var('chromium_git') + '/external/github.com/cisco/openh264' + '@' + '09a4f3ec842a8932341b195c5b01e141c8a16eb7', 'src/third_party/openscreen/src': - Var('chromium_git') + '/openscreen' + '@' + 'a5fd01ae77550b18ac16e2c28db793f624c52a98', + Var('chromium_git') + '/openscreen' + '@' + 'fd16b7fba06394410af79cb8f1634a41d4166a9d', 'src/third_party/openxr/src': { 'url': Var('chromium_git') + '/external/github.com/KhronosGroup/OpenXR-SDK' + '@' + '58a00cf85c39ad5ec4dc43a769624e420c06179a', @@ -1716,7 +1716,7 @@ Var('pdfium_git') + '/pdfium.git' + '@' + Var('pdfium_revision'), 'src/third_party/perfetto': - Var('android_git') + '/platform/external/perfetto.git' + '@' + 'd173e0a2c2421139efa7267c9dd94567adc3df6d', + Var('android_git') + '/platform/external/perfetto.git' + '@' + '0b04374e77e306b1138711ae2ff0d19e2e670bfb', 'src/third_party/perl': { 'url': Var('chromium_git') + '/chromium/deps/perl.git' + '@' + '6f3e5028eb65d0b4c5fdd792106ac4c84eee1eb3', @@ -1848,7 +1848,7 @@ Var('chromium_git') + '/external/github.com/GoogleChromeLabs/text-fragments-polyfill.git' + '@' + 'c036420683f672d685e27415de0a5f5e85bdc23f', 'src/third_party/tflite/src': - Var('chromium_git') + '/external/github.com/tensorflow/tensorflow.git' + '@' + '5abeda2494b0eb551b5c4689e47008fe374684dc', + Var('chromium_git') + '/external/github.com/tensorflow/tensorflow.git' + '@' + 'f3e12f6b28647da499edb69794745f14869f0df9', 'src/third_party/turbine': { 'packages': [ @@ -1901,7 +1901,7 @@ Var('chromium_git') + '/external/github.com/gpuweb/cts.git' + '@' + 'cf7f6bbb113147b698ad23d06eda62aa091f4ffb', 'src/third_party/webrtc': - Var('webrtc_git') + '/src.git' + '@' + 'de923386a0327e702d3854bd2be2fe9c0b2ccd22', + Var('webrtc_git') + '/src.git' + '@' + 'f6f642d7fa49f40eda902ae8fc4eae8cb7d427b7', # Wuffs' canonical repository is at github.com/google/wuffs, but we use # Skia's mirror of Wuffs, the same as in upstream Skia's DEPS file. @@ -4195,7 +4195,7 @@ 'src/ios_internal': { 'url': Var('chrome_git') + '/chrome/ios_internal.git' + '@' + - '08cd427ccbba2cd30b078e96625ddc1cfd0ab95b', + '2fc7b9699488c291a8305489eb0dcedff61bc448', 'condition': 'checkout_ios and checkout_src_internal', },
diff --git a/ash/accessibility/accessibility_controller_impl.cc b/ash/accessibility/accessibility_controller_impl.cc index ae4803a..7865cf7 100644 --- a/ash/accessibility/accessibility_controller_impl.cc +++ b/ash/accessibility/accessibility_controller_impl.cc
@@ -1022,6 +1022,11 @@ registry->RegisterBooleanPref(prefs::kShouldAlwaysShowAccessibilityMenu, false); + if (::features:: + AreExperimentalAccessibilityColorEnhancementSettingsEnabled()) { + registry->RegisterBooleanPref(prefs::kAccessibilityColorFiltering, false); + } + // TODO(b/266816160): Make ChromeVox prefs are syncable, to so that ChromeOS // backs up users' ChromeVox settings and reflects across their devices. registry->RegisterBooleanPref(prefs::kAccessibilityChromeVoxAutoRead, false); @@ -1205,21 +1210,6 @@ if (::features:: AreExperimentalAccessibilityColorEnhancementSettingsEnabled()) { - registry->RegisterBooleanPref( - prefs::kAccessibilityColorFiltering, false, - user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); - registry->RegisterIntegerPref( - prefs::kAccessibilityGreyscaleAmount, 0, - user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); - registry->RegisterIntegerPref( - prefs::kAccessibilitySaturationAmount, 100, - user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); - registry->RegisterIntegerPref( - prefs::kAccessibilitySepiaAmount, 0, - user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); - registry->RegisterIntegerPref( - prefs::kAccessibilityHueRotationAmount, 0, - user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); registry->RegisterIntegerPref( prefs::kAccessibilityColorVisionCorrectionAmount, 100, user_prefs::PrefRegistrySyncable::SYNCABLE_OS_PREF); @@ -2040,26 +2030,6 @@ &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, base::Unretained(this))); pref_change_registrar_->Add( - prefs::kAccessibilityGreyscaleAmount, - base::BindRepeating( - &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, - base::Unretained(this))); - pref_change_registrar_->Add( - prefs::kAccessibilitySaturationAmount, - base::BindRepeating( - &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, - base::Unretained(this))); - pref_change_registrar_->Add( - prefs::kAccessibilitySepiaAmount, - base::BindRepeating( - &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, - base::Unretained(this))); - pref_change_registrar_->Add( - prefs::kAccessibilityHueRotationAmount, - base::BindRepeating( - &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, - base::Unretained(this))); - pref_change_registrar_->Add( prefs::kAccessibilityColorVisionCorrectionAmount, base::BindRepeating( &AccessibilityControllerImpl::UpdateColorFilteringFromPrefs, @@ -2272,23 +2242,6 @@ false); return; } - const float greyscale_amount = - active_user_prefs_->GetInteger(prefs::kAccessibilityGreyscaleAmount) / - 100.f; - color_enhancement_controller->SetGreyscaleAmount(greyscale_amount); - - const float saturation_amount = - active_user_prefs_->GetInteger(prefs::kAccessibilitySaturationAmount) / - 100.f; - color_enhancement_controller->SetSaturationAmount(saturation_amount); - - const float sepia_amount = - active_user_prefs_->GetInteger(prefs::kAccessibilitySepiaAmount) / 100.f; - color_enhancement_controller->SetSepiaAmount(sepia_amount); - - const int hue_rotation_amount = - active_user_prefs_->GetInteger(prefs::kAccessibilityHueRotationAmount); - color_enhancement_controller->SetHueRotationAmount(hue_rotation_amount); const float cvd_correction_amount = active_user_prefs_->GetInteger(
diff --git a/ash/accessibility/accessibility_controller_unittest.cc b/ash/accessibility/accessibility_controller_unittest.cc index 8e77027..f03ede4 100644 --- a/ash/accessibility/accessibility_controller_unittest.cc +++ b/ash/accessibility/accessibility_controller_unittest.cc
@@ -61,7 +61,8 @@ void SetUp() override { scoped_feature_list_.InitWithFeatures( {media::kLiveCaption, media::kLiveCaptionSystemWideOnChromeOS, - ash::features::kOnDeviceSpeechRecognition}, + ash::features::kOnDeviceSpeechRecognition, + ::features::kExperimentalAccessibilityColorEnhancementSettings}, {}); AshTestBase::SetUp(); } @@ -177,9 +178,11 @@ prefs::kAccessibilityEnhancedNetworkVoicesInSelectToSpeakAllowed)); if (::features:: AreExperimentalAccessibilityColorEnhancementSettingsEnabled()) { - EXPECT_TRUE(prefs->FindPreference(prefs::kAccessibilitySepiaAmount)); - EXPECT_TRUE(prefs->FindPreference(prefs::kAccessibilityHueRotationAmount)); - EXPECT_TRUE(prefs->FindPreference(prefs::kAccessibilityGreyscaleAmount)); + EXPECT_TRUE(prefs->FindPreference(prefs::kAccessibilityColorFiltering)); + EXPECT_TRUE( + prefs->FindPreference(prefs::kAccessibilityColorVisionDeficiencyType)); + EXPECT_TRUE(prefs->FindPreference( + prefs::kAccessibilityColorVisionCorrectionAmount)); } }
diff --git a/ash/ambient/ambient_controller.cc b/ash/ambient/ambient_controller.cc index 328836e..9489b2e9 100644 --- a/ash/ambient/ambient_controller.cc +++ b/ash/ambient/ambient_controller.cc
@@ -254,11 +254,9 @@ ambient::prefs::kAmbientModeManagedScreensaverImageDisplayIntervalSeconds, kManagedScreensaverImageRefreshInterval.InSeconds()); - if (ash::features::IsScreenSaverDurationEnabled()) { - registry->RegisterIntegerPref( - ambient::prefs::kAmbientModeRunningDurationMinutes, - kDefaultScreenSaverDuration.InMinutes()); - } + registry->RegisterIntegerPref( + ambient::prefs::kAmbientModeRunningDurationMinutes, + kDefaultScreenSaverDuration.InMinutes()); } AmbientController::AmbientController(
diff --git a/ash/capture_mode/capture_audio_mixing_unittests.cc b/ash/capture_mode/capture_audio_mixing_unittests.cc index 95a063f..cfd4a41 100644 --- a/ash/capture_mode/capture_audio_mixing_unittests.cc +++ b/ash/capture_mode/capture_audio_mixing_unittests.cc
@@ -14,6 +14,7 @@ #include "ash/public/cpp/capture_mode/capture_mode_test_api.h" #include "ash/style/icon_button.h" #include "ash/test/ash_test_base.h" +#include "base/test/metrics/histogram_tester.h" #include "base/test/scoped_feature_list.h" namespace ash { @@ -132,6 +133,10 @@ } TEST_F(CaptureAudioMixingTest, ServiceWillRecordAudio) { + constexpr char kHistogramNameBase[] = "AudioRecordingMode"; + const std::string histogram_name = BuildHistogramName( + kHistogramNameBase, /*behavior=*/nullptr, /*append_ui_mode_suffix=*/true); + struct { const char* const scope_name; AudioRecordingMode audio_mode; @@ -149,6 +154,10 @@ for (const auto& test_case : kTestCases) { SCOPED_TRACE(test_case.scope_name); + + base::HistogramTester histogram_tester; + histogram_tester.ExpectBucketCount(histogram_name, test_case.audio_mode, 0); + auto* controller = StartSession(); controller->SetAudioRecordingMode(test_case.audio_mode); @@ -163,6 +172,8 @@ controller->EndVideoRecording(EndRecordingReason::kStopRecordingButton); WaitForCaptureFileToBeSaved(); + + histogram_tester.ExpectBucketCount(histogram_name, test_case.audio_mode, 1); } } @@ -189,6 +200,13 @@ }; TEST_F(ProjectorAudioMixingTest, AudioSettingsMenu) { + constexpr char kHistogramNameBase[] = "AudioRecordingMode"; + const std::string histogram_name = BuildHistogramName( + kHistogramNameBase, + CaptureModeTestApi().GetBehavior(BehaviorType::kProjector), + /*append_ui_mode_suffix=*/true); + base::HistogramTester histogram_tester; + StartProjectorModeSession(); auto* event_generator = GetEventGenerator(); @@ -211,6 +229,25 @@ // Microphone should still be selected by default. EXPECT_TRUE(IsAudioOptionChecked(kAudioMicrophone)); EXPECT_FALSE(IsAudioOptionChecked(kAudioSystemAndMicrophone)); + + // End the session and expect the correct audio mode was recorded. + auto* controller = CaptureModeController::Get(); + controller->Stop(); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kMicrophone, 1); + histogram_tester.ExpectBucketCount( + histogram_name, AudioRecordingMode::kSystemAndMicrophone, 0); + + // Start a new session and select `kSystemAndMicrophone`, and expect the + // correct metrics will be recorded when the session ends. + StartProjectorModeSession(); + controller->SetAudioRecordingMode(AudioRecordingMode::kSystemAndMicrophone); + controller->Stop(); + + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kMicrophone, 1); + histogram_tester.ExpectBucketCount( + histogram_name, AudioRecordingMode::kSystemAndMicrophone, 1); } } // namespace ash
diff --git a/ash/capture_mode/capture_mode_behavior.cc b/ash/capture_mode/capture_mode_behavior.cc index 3adb2cc..95a11ea 100644 --- a/ash/capture_mode/capture_mode_behavior.cc +++ b/ash/capture_mode/capture_mode_behavior.cc
@@ -252,10 +252,9 @@ std::vector<message_center::ButtonInfo> GetNotificationButtonsInfo( bool for_video) const override { - CHECK(for_video); - return {message_center::ButtonInfo{l10n_util::GetStringUTF16( - IDS_ASH_SCREEN_CAPTURE_SHARE_TO_YOUTUBE)}, + for_video ? IDS_ASH_SCREEN_CAPTURE_SHARE_TO_YOUTUBE + : IDS_ASH_SCREEN_CAPTURE_BUTTON_EDIT)}, message_center::ButtonInfo{l10n_util::GetStringUTF16( IDS_ASH_SCREEN_CAPTURE_BUTTON_DELETE)}}; } @@ -420,13 +419,12 @@ if (!for_video && !Shell::Get()->session_controller()->IsUserSessionBlocked()) { - message_center::ButtonInfo edit_button( + buttons_info.emplace_back( l10n_util::GetStringUTF16(IDS_ASH_SCREEN_CAPTURE_BUTTON_EDIT)); - buttons_info.push_back(edit_button); } - message_center::ButtonInfo delete_button( + + buttons_info.emplace_back( l10n_util::GetStringUTF16(IDS_ASH_SCREEN_CAPTURE_BUTTON_DELETE)); - buttons_info.push_back(delete_button); return buttons_info; }
diff --git a/ash/capture_mode/capture_mode_controller.cc b/ash/capture_mode/capture_mode_controller.cc index 240bb00..0f371ab 100644 --- a/ash/capture_mode/capture_mode_controller.cc +++ b/ash/capture_mode/capture_mode_controller.cc
@@ -1333,18 +1333,21 @@ } void CaptureModeController::CaptureImage(const CaptureParams& capture_params, - const base::FilePath& path) { + const base::FilePath& path, + const CaptureModeBehavior* behavior) { // Note that |type_| may not necessarily be |kImage| here, since this may be // called to take an instant fullscreen screenshot for the keyboard shortcut, // which doesn't go through the capture mode UI, and doesn't change |type_|. - DCHECK(delegate_->IsCaptureAllowedByPolicy()); + CHECK(delegate_->IsCaptureAllowedByPolicy()); // Stop the capture session now, so as not to take a screenshot of the capture // bar. - if (IsActive()) + if (IsActive()) { + CHECK_EQ(capture_mode_session_->active_behavior(), behavior); Stop(); + } - DCHECK(!capture_params.bounds.IsEmpty()); + CHECK(!capture_params.bounds.IsEmpty()); auto* cursor_manager = Shell::Get()->cursor_manager(); bool was_cursor_originally_blocked = cursor_manager->IsCursorLocked(); @@ -1357,7 +1360,7 @@ capture_params.window, capture_params.bounds, base::BindOnce(&CaptureModeController::OnImageCaptured, weak_ptr_factory_.GetWeakPtr(), path, - was_cursor_originally_blocked)); + was_cursor_originally_blocked, behavior)); ++num_screenshots_taken_in_last_day_; ++num_screenshots_taken_in_last_week_; @@ -1392,6 +1395,7 @@ void CaptureModeController::OnImageCaptured( const base::FilePath& path, bool was_cursor_originally_blocked, + const CaptureModeBehavior* behavior, scoped_refptr<base::RefCountedMemory> png_bytes) { if (!was_cursor_originally_blocked) { auto* shell = Shell::Get(); @@ -1411,25 +1415,24 @@ base::BindOnce(&SaveFile, png_bytes, path, GetFallbackFilePathFromFile(path)), base::BindOnce(&CaptureModeController::OnImageFileSaved, - weak_ptr_factory_.GetWeakPtr(), png_bytes)); + weak_ptr_factory_.GetWeakPtr(), png_bytes, behavior)); } void CaptureModeController::OnImageFileSaved( scoped_refptr<base::RefCountedMemory> png_bytes, + const CaptureModeBehavior* behavior, const base::FilePath& file_saved_path) { if (file_saved_path.empty()) { ShowFailureNotification(); return; } - if (on_file_saved_callback_for_test_) + if (on_file_saved_callback_for_test_) { std::move(on_file_saved_callback_for_test_).Run(file_saved_path); + } DCHECK(png_bytes && png_bytes->size()); const auto image = gfx::Image::CreateFrom1xPNGBytes(png_bytes); CopyImageToClipboard(image); - // TODO(michelefan): Do not hard-code `BehaviorType::kDefault`. Screenshot - // notification should be separated among different behaviors. - CaptureModeBehavior* behavior = GetBehavior(BehaviorType::kDefault); ShowPreviewNotification(file_saved_path, image, CaptureModeType::kImage, behavior); if (Shell::Get()->session_controller()->IsActiveUserSessionStarted()) { @@ -1794,7 +1797,7 @@ } const absl::optional<CaptureParams> capture_params = GetCaptureParams(); - DCHECK(capture_params); + CHECK(capture_params); if (!delegate_->IsCaptureAllowedByPolicy()) { ShowDisabledNotification(CaptureAllowance::kDisallowedByPolicy); @@ -1803,7 +1806,8 @@ } if (type_ == CaptureModeType::kImage) { - CaptureImage(*capture_params, BuildImagePath()); + CaptureImage(*capture_params, BuildImagePath(), + capture_mode_session_->active_behavior()); } else { // HDCP affects only video recording. if (ShouldBlockRecordingForContentProtection(capture_params->window)) { @@ -2045,8 +2049,10 @@ RecordCaptureModeEntryType(entry_type); RecordCaptureModeConfiguration( CaptureModeType::kImage, source, - recording_type_, // This parameter will be ignored. - /*audio_on=*/false, GetBehavior(BehaviorType::kDefault)); + // The values of `recording_type_` and `GetEffectiveAudioRecordingMode()` + // will be ignored, since the type is `kImage`. + recording_type_, GetEffectiveAudioRecordingMode(), + GetBehavior(BehaviorType::kDefault)); } void CaptureModeController::PerformScreenshotsOfAllDisplays() { @@ -2061,9 +2067,11 @@ // whether we should localize the display name. const CaptureParams capture_params{controller->GetRootWindow(), controller->GetRootWindow()->bounds()}; - CaptureImage(capture_params, controllers.size() == 1 - ? BuildImagePath() - : BuildImagePathForDisplay(display_index)); + CaptureImage(capture_params, + controllers.size() == 1 + ? BuildImagePath() + : BuildImagePathForDisplay(display_index), + GetBehavior(BehaviorType::kDefault)); ++display_index; } } @@ -2072,7 +2080,10 @@ aura::Window* given_window) { const CaptureParams capture_params{given_window, gfx::Rect(given_window->bounds().size())}; - CaptureImage(capture_params, BuildImagePath()); + // TODO(michelefan): Add behavior type as an input parameter, if this API is + // used for other entry types in future. + CaptureImage(capture_params, BuildImagePath(), + GetBehavior(BehaviorType::kGameDashboard)); } CaptureModeSaveToLocation CaptureModeController::GetSaveToOption(
diff --git a/ash/capture_mode/capture_mode_controller.h b/ash/capture_mode/capture_mode_controller.h index 666e21ec..55c06c8 100644 --- a/ash/capture_mode/capture_mode_controller.h +++ b/ash/capture_mode/capture_mode_controller.h
@@ -210,12 +210,12 @@ // individual file. Note: this won't start a capture mode session. void CaptureScreenshotsOfAllDisplays(); - // Performs the instantscreen capture for the `given_window` which bypasses + // Performs the instant screen capture for the `given_window` which bypasses // the capture mode session. void CaptureScreenshotOfGivenWindow(aura::Window* given_window); // Called only while a capture session is in progress to perform the actual - // capture depending on the current selected |source_| and |type_|, and ends + // capture depending on the current selected `source_` and `type_`, and ends // the capture session. void PerformCapture(); @@ -391,16 +391,17 @@ // capture parameters they need. They will end the sessions themselves. // They should never be called if IsCaptureAllowed() returns false. void CaptureImage(const CaptureParams& capture_params, - const base::FilePath& path); + const base::FilePath& path, + const CaptureModeBehavior* behavior); void CaptureVideo(const CaptureParams& capture_params); // Called back when an image has been captured to trigger an attempt to save - // the image as a file. |timestamp| is the time at which the capture was - // triggered. |was_cursor_originally_blocked| is whether the cursor was - // blocked at the time the screenshot capture request was made. |png_bytes| is - // the buffer containing the captured image in a PNG format. + // the image as a file. `was_cursor_originally_blocked` is whether the cursor + // was blocked at the time the screenshot capture request was made. + // `png_bytes` is the buffer containing the captured image in a PNG format. void OnImageCaptured(const base::FilePath& path, bool was_cursor_originally_blocked, + const CaptureModeBehavior* behavior, scoped_refptr<base::RefCountedMemory> png_bytes); // Called back when an attempt to save the image file has been completed, with @@ -411,6 +412,7 @@ // clipboard. If saving was successful, then the image was saved in // `file_saved_path`. void OnImageFileSaved(scoped_refptr<base::RefCountedMemory> png_bytes, + const CaptureModeBehavior* behavior, const base::FilePath& file_saved_path); // Called back when the check for custom folder's availability is done in
diff --git a/ash/capture_mode/capture_mode_game_dashboard_unittests.cc b/ash/capture_mode/capture_mode_game_dashboard_unittests.cc index 5ad0aad..219caf2 100644 --- a/ash/capture_mode/capture_mode_game_dashboard_unittests.cc +++ b/ash/capture_mode/capture_mode_game_dashboard_unittests.cc
@@ -649,6 +649,24 @@ } } +TEST_P(GameDashboardCaptureModeHistogramTest, GameAudioRecordingModeHistogram) { + constexpr char kHistogramNameBase[] = "AudioRecordingMode"; + CaptureModeTestApi test_api; + for (const auto audio_mode : + {AudioRecordingMode::kOff, AudioRecordingMode::kMicrophone, + AudioRecordingMode::kSystem, + AudioRecordingMode::kSystemAndMicrophone}) { + const auto histogram_name = BuildHistogramName( + kHistogramNameBase, test_api.GetBehavior(BehaviorType::kGameDashboard), + /*append_ui_mode_suffix=*/true); + histogram_tester_.ExpectBucketCount(histogram_name, audio_mode, 0); + auto* controller = StartGameCaptureModeSession(); + controller->SetAudioRecordingMode(audio_mode); + controller->Stop(); + histogram_tester_.ExpectBucketCount(histogram_name, audio_mode, 1); + } +} + TEST_P(GameDashboardCaptureModeHistogramTest, GameDashboardEndRecordingReasonHistogram) { constexpr char kHistogramNameBase[] = "EndRecordingReason"; @@ -690,6 +708,26 @@ /*expected_count=*/1); } +TEST_P(GameDashboardCaptureModeHistogramTest, + CaptureScreenshotOfGivenWindowMetric) { + constexpr char kHistogramNameBase[] = "SaveLocation"; + const base::FilePath custom_folder = + CreateCustomFolderInUserDownloadsPath("test"); + const auto histogram_name = BuildHistogramName( + kHistogramNameBase, + CaptureModeTestApi().GetBehavior(BehaviorType::kGameDashboard), + /*append_ui_mode_suffix=*/true); + + histogram_tester_.ExpectBucketCount( + histogram_name, CaptureModeSaveToLocation::kCustomizedFolder, 0); + CaptureModeController* controller = CaptureModeController::Get(); + controller->SetCustomCaptureFolder(custom_folder); + controller->CaptureScreenshotOfGivenWindow(game_window()); + const auto file_saved_path = WaitForCaptureFileToBeSaved(); + histogram_tester_.ExpectBucketCount( + histogram_name, CaptureModeSaveToLocation::kCustomizedFolder, 1); +} + INSTANTIATE_TEST_SUITE_P(All, GameDashboardCaptureModeHistogramTest, ::testing::Bool());
diff --git a/ash/capture_mode/capture_mode_metrics.cc b/ash/capture_mode/capture_mode_metrics.cc index fd5bbd3..1f89269 100644 --- a/ash/capture_mode/capture_mode_metrics.cc +++ b/ash/capture_mode/capture_mode_metrics.cc
@@ -18,7 +18,8 @@ constexpr char kEndRecordingReasonHistogramRootWord[] = "EndRecordingReason"; constexpr char kBarButtonHistogramRootWord[] = "BarButtons"; -constexpr char kCaptureAudioOnHistogramRootWord[] = "CaptureAudioOnMetric"; +constexpr char kCaptureAudioRecordingModeHistogramRootWord[] = + "AudioRecordingMode"; constexpr char kCaptureConfigurationHistogramRootWord[] = "CaptureConfiguration"; constexpr char kCaptureRegionAdjustmentHistogramRootWord[] = @@ -83,7 +84,7 @@ void RecordCaptureModeConfiguration(CaptureModeType type, CaptureModeSource source, RecordingType recording_type, - bool audio_on, + AudioRecordingMode audio_mode, const CaptureModeBehavior* behavior) { std::string configuration_histogram_name = BuildHistogramName(kCaptureConfigurationHistogramRootWord, behavior, @@ -92,11 +93,11 @@ GetConfiguration(type, source, recording_type)); if (type == CaptureModeType::kVideo && recording_type != RecordingType::kGif) { - base::UmaHistogramBoolean( - BuildHistogramName(kCaptureAudioOnHistogramRootWord, - /*behavior=*/nullptr, + base::UmaHistogramEnumeration( + BuildHistogramName(kCaptureAudioRecordingModeHistogramRootWord, + behavior, /*append_ui_mode_suffix=*/true), - audio_on); + audio_mode); } }
diff --git a/ash/capture_mode/capture_mode_metrics.h b/ash/capture_mode/capture_mode_metrics.h index 8f6408e..24c271c 100644 --- a/ash/capture_mode/capture_mode_metrics.h +++ b/ash/capture_mode/capture_mode_metrics.h
@@ -140,7 +140,7 @@ void RecordCaptureModeConfiguration(CaptureModeType type, CaptureModeSource source, RecordingType recording_type, - bool audio_on, + AudioRecordingMode audio_mode, const CaptureModeBehavior* behavior); // Records the percent ratio between the area of the user selected region to be
diff --git a/ash/capture_mode/capture_mode_session.cc b/ash/capture_mode/capture_mode_session.cc index c928e9e5..b3576e74 100644 --- a/ash/capture_mode/capture_mode_session.cc +++ b/ash/capture_mode/capture_mode_session.cc
@@ -805,11 +805,9 @@ num_capture_region_adjusted_ = 0; RecordCaptureModeSwitchesFromInitialMode(capture_source_changed_); - RecordCaptureModeConfiguration( - controller_->type(), source, recording_type, - /*audio_on=*/controller_->GetEffectiveAudioRecordingMode() != - AudioRecordingMode::kOff, - active_behavior_); + RecordCaptureModeConfiguration(controller_->type(), source, recording_type, + controller_->GetEffectiveAudioRecordingMode(), + active_behavior_); } void CaptureModeSession::StartCountDown(
diff --git a/ash/capture_mode/capture_mode_types.h b/ash/capture_mode/capture_mode_types.h index 2e729e8..3a8a832 100644 --- a/ash/capture_mode/capture_mode_types.h +++ b/ash/capture_mode/capture_mode_types.h
@@ -71,12 +71,15 @@ kGif, }; -// Defines the supported audio recording modes. +// Defines the supported audio recording modes. Note that these values are +// persisted to histograms so existing values should remain unchanged and new +// values should be added to the end. enum class AudioRecordingMode { - kOff, + kOff = 0, kSystem, kMicrophone, kSystemAndMicrophone, + kMaxValue = kSystemAndMicrophone, }; // Specifies the capture mode behavior types.
diff --git a/ash/capture_mode/capture_mode_unittests.cc b/ash/capture_mode/capture_mode_unittests.cc index d9b4ab7..ca4c40c 100644 --- a/ash/capture_mode/capture_mode_unittests.cc +++ b/ash/capture_mode/capture_mode_unittests.cc
@@ -7158,19 +7158,31 @@ } TEST_P(CaptureModeHistogramTest, VideoRecordingAudioVideoMetrics) { - constexpr char kHistogramNameBase[] = "CaptureAudioOnMetric"; + constexpr char kHistogramNameBase[] = "AudioRecordingMode"; const std::string histogram_name = BuildHistogramName( kHistogramNameBase, /*behavior=*/nullptr, /*append_ui_mode_suffix=*/true); base::HistogramTester histogram_tester; - histogram_tester.ExpectBucketCount(histogram_name, false, 0); - histogram_tester.ExpectBucketCount(histogram_name, true, 0); + histogram_tester.ExpectBucketCount(histogram_name, AudioRecordingMode::kOff, + 0); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kMicrophone, 0); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kSystem, 0); + histogram_tester.ExpectBucketCount( + histogram_name, AudioRecordingMode::kSystemAndMicrophone, 0); - // Perform a video recording with audio off. A false should be recorded. + // Perform a video recording with audio off. `kOff` should be recorded. StartSessionForVideo(); CaptureModeTestApi().SetAudioRecordingMode(AudioRecordingMode::kOff); StartRecording(); - histogram_tester.ExpectBucketCount(histogram_name, false, 1); - histogram_tester.ExpectBucketCount(histogram_name, true, 0); + histogram_tester.ExpectBucketCount(histogram_name, AudioRecordingMode::kOff, + 1); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kMicrophone, 0); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kSystem, 0); + histogram_tester.ExpectBucketCount( + histogram_name, AudioRecordingMode::kSystemAndMicrophone, 0); WaitForSeconds(1); StopRecording(); WaitForCaptureFileToBeSaved(); @@ -7183,13 +7195,19 @@ /*append_ui_mode_suffix=*/true), /*expected_count=*/1); - // Perform a video recording with microphone audio recording on. A true should - // be recorded. + // Perform a video recording with microphone audio recording on. `kMicrophone` + // should be recorded. StartSessionForVideo(); CaptureModeTestApi().SetAudioRecordingMode(AudioRecordingMode::kMicrophone); StartRecording(); - histogram_tester.ExpectBucketCount(histogram_name, false, 1); - histogram_tester.ExpectBucketCount(histogram_name, true, 1); + histogram_tester.ExpectBucketCount(histogram_name, AudioRecordingMode::kOff, + 1); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kMicrophone, 1); + histogram_tester.ExpectBucketCount(histogram_name, + AudioRecordingMode::kSystem, 0); + histogram_tester.ExpectBucketCount( + histogram_name, AudioRecordingMode::kSystemAndMicrophone, 0); StopRecording(); }
diff --git a/ash/clipboard/views/clipboard_history_bitmap_item_view.cc b/ash/clipboard/views/clipboard_history_bitmap_item_view.cc index 77b5b3e..90ff7059 100644 --- a/ash/clipboard/views/clipboard_history_bitmap_item_view.cc +++ b/ash/clipboard/views/clipboard_history_bitmap_item_view.cc
@@ -5,8 +5,8 @@ #include "ash/clipboard/views/clipboard_history_bitmap_item_view.h" #include "ash/clipboard/clipboard_history_item.h" -#include "ash/clipboard/views/clipboard_history_delete_button.h" #include "ash/clipboard/views/clipboard_history_view_constants.h" +#include "ash/style/ash_color_id.h" #include "base/callback_list.h" #include "base/containers/contains.h" #include "base/functional/bind.h" @@ -17,6 +17,7 @@ #include "ui/base/l10n/l10n_util.h" #include "ui/base/metadata/metadata_impl_macros.h" #include "ui/base/models/image_model.h" +#include "ui/chromeos/styles/cros_tokens_color_mappings.h" #include "ui/compositor/layer.h" #include "ui/compositor/layer_animation_observer.h" #include "ui/compositor/scoped_layer_animation_settings.h" @@ -27,7 +28,6 @@ #include "ui/views/background.h" #include "ui/views/border.h" #include "ui/views/controls/image_view.h" -#include "ui/views/layout/box_layout.h" #include "ui/views/layout/fill_layout.h" #include "ui/views/view_class_properties.h" @@ -182,8 +182,6 @@ kColorAshHairlineBorderColor))) .BuildChildren(); } - - InstallDeleteButton(); } BitmapContentsView(const BitmapContentsView& rhs) = delete; BitmapContentsView& operator=(const BitmapContentsView& rhs) = delete; @@ -191,28 +189,6 @@ private: // ContentsView: - ClipboardHistoryDeleteButton* CreateDeleteButton() override { - auto delete_button_container = std::make_unique<views::View>(); - auto* layout_manager = delete_button_container->SetLayoutManager( - std::make_unique<views::BoxLayout>( - views::BoxLayout::Orientation::kHorizontal)); - layout_manager->set_main_axis_alignment( - views::BoxLayout::MainAxisAlignment::kEnd); - layout_manager->set_cross_axis_alignment( - views::BoxLayout::CrossAxisAlignment::kStart); - - auto delete_button = - std::make_unique<ClipboardHistoryDeleteButton>(container_); - delete_button->SetProperty( - views::kMarginsKey, - ClipboardHistoryViews::kBitmapItemDeleteButtonMargins); - ClipboardHistoryDeleteButton* delete_button_ptr = - delete_button_container->AddChildView(std::move(delete_button)); - AddChildView(std::move(delete_button_container)); - - return delete_button_ptr; - } - void OnBoundsChanged(const gfx::Rect& previous_bounds) override { // Create rounded corners around the contents area through the clip path // instead of layer clip. Because we have to avoid using any layer here.
diff --git a/ash/clipboard/views/clipboard_history_item_view.cc b/ash/clipboard/views/clipboard_history_item_view.cc index f61e00d..9d79519 100644 --- a/ash/clipboard/views/clipboard_history_item_view.cc +++ b/ash/clipboard/views/clipboard_history_item_view.cc
@@ -26,7 +26,11 @@ #include "ui/views/animation/ink_drop.h" #include "ui/views/border.h" #include "ui/views/controls/menu/menu_item_view.h" +#include "ui/views/layout/box_layout.h" +#include "ui/views/layout/box_layout_view.h" #include "ui/views/layout/fill_layout.h" +#include "ui/views/metadata/view_factory_internal.h" +#include "ui/views/view_class_properties.h" namespace ash { namespace { @@ -40,6 +44,20 @@ base::ranges::find(items, item_id, &ClipboardHistoryItem::id); return item_iter == items.cend() ? nullptr : &(*item_iter); } + +const gfx::Insets GetDeleteButtonMargins( + crosapi::mojom::ClipboardHistoryDisplayFormat display_format) { + switch (display_format) { + case crosapi::mojom::ClipboardHistoryDisplayFormat::kUnknown: + NOTREACHED_NORETURN(); + case crosapi::mojom::ClipboardHistoryDisplayFormat::kText: + case crosapi::mojom::ClipboardHistoryDisplayFormat::kFile: + return ClipboardHistoryViews::kDefaultItemDeleteButtonMargins; + case crosapi::mojom::ClipboardHistoryDisplayFormat::kPng: + case crosapi::mojom::ClipboardHistoryDisplayFormat::kHtml: + return ClipboardHistoryViews::kBitmapItemDeleteButtonMargins; + } +} } // namespace ClipboardHistoryItemView::ContentsView::ContentsView( @@ -51,34 +69,18 @@ ClipboardHistoryItemView::ContentsView::~ContentsView() = default; -void ClipboardHistoryItemView::ContentsView::InstallDeleteButton() { - delete_button_ = CreateDeleteButton(); -} - -void ClipboardHistoryItemView::ContentsView::OnHostPseudoFocusUpdated() { - delete_button_->SetVisible(container_->ShouldShowDeleteButton()); - - const bool delete_button_focused = container_->IsDeleteButtonPseudoFocused(); - views::InkDrop::Get(delete_button_) - ->GetInkDrop() - ->SetFocused(delete_button_focused); - if (delete_button_focused) { - delete_button_->NotifyAccessibilityEvent(ax::mojom::Event::kHover, - /*send_native_event*/ true); - } -} - // Accepts the event only when |delete_button_| should be the handler. bool ClipboardHistoryItemView::ContentsView::DoesIntersectRect( const views::View* target, const gfx::Rect& rect) const { - if (!delete_button_->GetVisible()) + const views::View* const delete_button = container_->delete_button_; + if (!delete_button->GetVisible()) { return false; + } gfx::RectF rect_in_delete_button(rect); - ConvertRectToTarget(this, delete_button_, &rect_in_delete_button); - return delete_button_->HitTestRect( - gfx::ToEnclosedRect(rect_in_delete_button)); + ConvertRectToTarget(this, delete_button, &rect_in_delete_button); + return delete_button->HitTestRect(gfx::ToEnclosedRect(rect_in_delete_button)); } // static @@ -175,14 +177,20 @@ } void ClipboardHistoryItemView::Init() { - SetFocusBehavior(views::View::FocusBehavior::ACCESSIBLE_ONLY); - SetLayoutManager(std::make_unique<views::FillLayout>()); - - // Ensures that MainButton is below any other child views. - main_button_ = - AddChildView(std::make_unique<ClipboardHistoryMainButton>(this)); - - contents_view_ = AddChildView(CreateContentsView()); + views::Builder<views::View>(this) + .SetFocusBehavior(views::View::FocusBehavior::ACCESSIBLE_ONLY) + .SetLayoutManager(std::make_unique<views::FillLayout>()) + .AddChildren( + // Add the main button below the delete button in the z-order so that + // hovering over the delete button causes it to be recognized as the + // item view's event handler. + views::Builder<views::View>( + std::make_unique<ClipboardHistoryMainButton>(this)) + .CopyAddressTo(&main_button_), + views::Builder<views::View>(CreateContentsView()) + .CopyAddressTo(&contents_view_) + .AddChild(views::Builder<views::View>(CreateDeleteButton()))) + .BuildChildren(); subscription_ = container_->AddSelectedChangedCallback(base::BindRepeating( &ClipboardHistoryItemView::OnSelectionChanged, base::Unretained(this))); @@ -214,15 +222,14 @@ case PseudoFocus::kMainButton: { // The menu item is already selected so show the delete button if the // button is hidden. - views::View* delete_button = contents_view_->delete_button(); - if (!delete_button->GetVisible()) { - delete_button->SetVisible(true); + if (!delete_button_->GetVisible()) { + delete_button_->SetVisible(true); } break; } case PseudoFocus::kDeleteButton: // The delete button already shows, so do nothing. - DCHECK(contents_view_->delete_button()->GetVisible()); + DCHECK(delete_button_->GetVisible()); break; case PseudoFocus::kMaxValue: NOTREACHED(); @@ -319,6 +326,22 @@ } } +std::unique_ptr<views::View> ClipboardHistoryItemView::CreateDeleteButton() { + const auto* const item = GetClipboardHistoryItem(); + CHECK(item); + + return views::Builder<views::BoxLayoutView>() + .SetOrientation(views::BoxLayout::Orientation::kHorizontal) + .SetMainAxisAlignment(views::BoxLayout::MainAxisAlignment::kEnd) + .SetCrossAxisAlignment(views::BoxLayout::CrossAxisAlignment::kStart) + .AddChild(views::Builder<views::Button>( + std::make_unique<ClipboardHistoryDeleteButton>(this)) + .SetProperty(views::kMarginsKey, + GetDeleteButtonMargins(item->display_format())) + .CopyAddressTo(&delete_button_)) + .Build(); +} + bool ClipboardHistoryItemView::ShouldShowDeleteButton() const { return (IsMainButtonPseudoFocused() && IsMouseHovered()) || IsDeleteButtonPseudoFocused() || under_gesture_long_press_; @@ -335,14 +358,29 @@ if (pseudo_focus_ == new_pseudo_focus) return; + // The main button appears highlighted when it has pseudo focus. The button + // needs to be repainted when transitioning to or from a highlighted state. + const bool repaint_main_button = pseudo_focus_ == PseudoFocus::kMainButton || + new_pseudo_focus == PseudoFocus::kMainButton; + pseudo_focus_ = new_pseudo_focus; if (IsMainButtonPseudoFocused()) { NotifyAccessibilityEvent(ax::mojom::Event::kSelection, /*send_native_event=*/true); } - contents_view_->OnHostPseudoFocusUpdated(); - main_button_->OnHostPseudoFocusUpdated(); + delete_button_->SetVisible(ShouldShowDeleteButton()); + views::InkDrop::Get(delete_button_) + ->GetInkDrop() + ->SetFocused(IsDeleteButtonPseudoFocused()); + if (IsDeleteButtonPseudoFocused()) { + delete_button_->NotifyAccessibilityEvent(ax::mojom::Event::kHover, + /*send_native_event*/ true); + } + + if (repaint_main_button) { + main_button_->SchedulePaint(); + } } BEGIN_METADATA(ClipboardHistoryItemView, ContentsView, views::View)
diff --git a/ash/clipboard/views/clipboard_history_item_view.h b/ash/clipboard/views/clipboard_history_item_view.h index 9d6fb11d..a330c64f 100644 --- a/ash/clipboard/views/clipboard_history_item_view.h +++ b/ash/clipboard/views/clipboard_history_item_view.h
@@ -19,9 +19,7 @@ namespace ash { class ClipboardHistory; -class ClipboardHistoryDeleteButton; class ClipboardHistoryItem; -class ClipboardHistoryMainButton; // The base class for menu items of the clipboard history menu. class ASH_EXPORT ClipboardHistoryItemView : public views::View { @@ -80,19 +78,7 @@ ContentsView& operator=(const ContentsView& rhs) = delete; ~ContentsView() override; - // Install DeleteButton on the contents view. - void InstallDeleteButton(); - - void OnHostPseudoFocusUpdated(); - - ClipboardHistoryDeleteButton* delete_button() { return delete_button_; } - const ClipboardHistoryDeleteButton* delete_button() const { - return delete_button_; - } - protected: - virtual ClipboardHistoryDeleteButton* CreateDeleteButton() = 0; - ClipboardHistoryItemView* container() { return container_; } private: @@ -100,10 +86,6 @@ bool DoesIntersectRect(const views::View* target, const gfx::Rect& rect) const override; - // Owned by the view hierarchy. - raw_ptr<ClipboardHistoryDeleteButton, ExperimentalAsh> delete_button_ = - nullptr; - // The parent of ContentsView. const raw_ptr<ClipboardHistoryItemView, ExperimentalAsh> container_; }; @@ -147,6 +129,10 @@ // Calculates the action type when `main_button_` is clicked. clipboard_history_util::Action CalculateActionForMainButtonClick() const; + // Creates the delete button and any necessary containers for its formatting. + // Sets `delete_button_` in the process. + std::unique_ptr<views::View> CreateDeleteButton(); + bool ShouldShowDeleteButton() const; // Called when receiving pseudo focus for the first time. @@ -163,9 +149,10 @@ const raw_ptr<views::MenuItemView, ExperimentalAsh> container_; - raw_ptr<ContentsView, ExperimentalAsh> contents_view_ = nullptr; - - raw_ptr<ClipboardHistoryMainButton, ExperimentalAsh> main_button_ = nullptr; + // Owned by the view hierarchy. + raw_ptr<views::View, ExperimentalAsh> main_button_ = nullptr; + raw_ptr<views::View, ExperimentalAsh> contents_view_ = nullptr; + raw_ptr<views::View, ExperimentalAsh> delete_button_ = nullptr; PseudoFocus pseudo_focus_ = PseudoFocus::kEmpty;
diff --git a/ash/clipboard/views/clipboard_history_main_button.cc b/ash/clipboard/views/clipboard_history_main_button.cc index 2b49f43..e8530c45 100644 --- a/ash/clipboard/views/clipboard_history_main_button.cc +++ b/ash/clipboard/views/clipboard_history_main_button.cc
@@ -55,18 +55,6 @@ ClipboardHistoryMainButton::~ClipboardHistoryMainButton() = default; -void ClipboardHistoryMainButton::OnHostPseudoFocusUpdated() { - SetShouldHighlight(container_->IsMainButtonPseudoFocused()); -} - -void ClipboardHistoryMainButton::SetShouldHighlight(bool should_highlight) { - if (should_highlight_ == should_highlight) - return; - - should_highlight_ = should_highlight; - SchedulePaint(); -} - void ClipboardHistoryMainButton::OnClickCanceled(const ui::Event& event) { DCHECK(event.IsMouseEvent()); @@ -95,8 +83,10 @@ } void ClipboardHistoryMainButton::PaintButtonContents(gfx::Canvas* canvas) { - if (!should_highlight_) + // Only paint a highlight when the button has pseudo focus. + if (!container_->IsMainButtonPseudoFocused()) { return; + } // Highlight the background when the menu item is selected or pressed. cc::PaintFlags flags;
diff --git a/ash/clipboard/views/clipboard_history_main_button.h b/ash/clipboard/views/clipboard_history_main_button.h index d64b53e..e8914c2 100644 --- a/ash/clipboard/views/clipboard_history_main_button.h +++ b/ash/clipboard/views/clipboard_history_main_button.h
@@ -22,11 +22,7 @@ delete; ~ClipboardHistoryMainButton() override; - void OnHostPseudoFocusUpdated(); - private: - void SetShouldHighlight(bool should_highlight); - // views::Button: void OnClickCanceled(const ui::Event& event) override; void OnThemeChanged() override; @@ -35,9 +31,6 @@ // The parent view. const raw_ptr<ClipboardHistoryItemView, ExperimentalAsh> container_; - - // Indicates whether the view should be highlighted. - bool should_highlight_ = false; }; } // namespace ash
diff --git a/ash/clipboard/views/clipboard_history_text_item_view.cc b/ash/clipboard/views/clipboard_history_text_item_view.cc index 9e2c627..6ffa356 100644 --- a/ash/clipboard/views/clipboard_history_text_item_view.cc +++ b/ash/clipboard/views/clipboard_history_text_item_view.cc
@@ -5,9 +5,7 @@ #include "ash/clipboard/views/clipboard_history_text_item_view.h" #include "ash/clipboard/clipboard_history_item.h" -#include "ash/clipboard/views/clipboard_history_delete_button.h" #include "ash/clipboard/views/clipboard_history_label.h" -#include "ash/clipboard/views/clipboard_history_view_constants.h" #include "ui/base/metadata/metadata_impl_macros.h" #include "ui/views/layout/box_layout.h" #include "ui/views/view_class_properties.h" @@ -30,24 +28,11 @@ auto* label = AddChildView(std::make_unique<ClipboardHistoryLabel>(container->text_)); - layout->SetFlexForView(label, /*flex_weight=*/1); - - InstallDeleteButton(); + layout->SetFlexForView(label, /*flex=*/1); } TextContentsView(const TextContentsView& rhs) = delete; TextContentsView& operator=(const TextContentsView& rhs) = delete; ~TextContentsView() override = default; - - private: - // ContentsView: - ClipboardHistoryDeleteButton* CreateDeleteButton() override { - auto delete_button = - std::make_unique<ClipboardHistoryDeleteButton>(container()); - delete_button->SetProperty( - views::kMarginsKey, - ClipboardHistoryViews::kDefaultItemDeleteButtonMargins); - return AddChildView(std::move(delete_button)); - } }; BEGIN_METADATA(ClipboardHistoryTextItemView, TextContentsView, ContentsView)
diff --git a/ash/color_enhancement/color_enhancement_controller.cc b/ash/color_enhancement/color_enhancement_controller.cc index 6cf7e459..328645b 100644 --- a/ash/color_enhancement/color_enhancement_controller.cc +++ b/ash/color_enhancement/color_enhancement_controller.cc
@@ -15,12 +15,6 @@ namespace { -// Sepia filter above .3 should enable cursor compositing. Beyond this point, -// users can perceive the mouse is too white if compositing does not occur. -// TODO (crbug.com/1031959): Check this value with UX to see if it can be -// larger. -const float kMinSepiaPerceptableDifference = 0.3f; - // // Parameters for simulating color vision deficiency. // Copied from the Javascript ColorEnhancer extension: @@ -130,6 +124,8 @@ // channels. correction_matrix.set(1.0, 0.0, 0.7, 0.0, 1.0, 0.7, 0.0, 0.0, 0.0); break; + case ash::ColorVisionDeficiencyType::kGrayscale: + NOTREACHED() << "Grayscale should be handled in SetGreyscaleAmount"; } // For Daltonization of an image `original_img`, we would calculate the @@ -200,36 +196,16 @@ // Note: No need to do cursor compositing since cursors are greyscale already. } -void ColorEnhancementController::SetSaturationAmount(float amount) { - if (saturation_amount_ == amount || amount < 0) - return; - - saturation_amount_ = amount; - // Note: No need to do cursor compositing since cursors are greyscale and not - // impacted by saturation. -} - -void ColorEnhancementController::SetSepiaAmount(float amount) { - if (sepia_amount_ == amount || amount < 0 || amount > 1) - return; - - sepia_amount_ = amount; - // The cursor should be tinted sepia as well. Update cursor compositing. - Shell::Get()->UpdateCursorCompositingEnabled(); -} - -void ColorEnhancementController::SetHueRotationAmount(int amount) { - if (hue_rotation_amount_ == amount || amount < 0 || amount > 359) - return; - - hue_rotation_amount_ = amount; - // Note: No need to do cursor compositing since cursors are greyscale and not - // impacted by hue rotation. -} - void ColorEnhancementController::SetColorVisionCorrectionFilter( ColorVisionDeficiencyType type, float amount) { + if (type == ColorVisionDeficiencyType::kGrayscale) { + SetGreyscaleAmount(amount); + cvd_correction_matrix_.reset(); + return; + } + + SetGreyscaleAmount(0); if ((amount <= 0 || amount > 1) && cvd_correction_matrix_) { cvd_correction_matrix_.reset(); return; @@ -254,18 +230,6 @@ } } -bool ColorEnhancementController::ShouldEnableCursorCompositingForSepia() const { - if (!::features:: - AreExperimentalAccessibilityColorEnhancementSettingsEnabled()) { - return false; - } - - // Enable cursor compositing if the sepia filter is on enough that - // the white mouse cursor stands out. Sepia will not be set on the root - // window if the setting value is greater than 1, so ignore that state. - return sepia_amount_ >= kMinSepiaPerceptableDifference && sepia_amount_ <= 1; -} - void ColorEnhancementController::OnRootWindowAdded(aura::Window* root_window) { UpdateDisplay(root_window); } @@ -288,16 +252,11 @@ // Reset layer state to defaults. layer->SetLayerGrayscale(0.0); layer->SetLayerSaturation(1.0); - layer->SetLayerSepia(0); - layer->SetLayerHueRotation(0); layer->ClearLayerCustomColorMatrix(); return; } layer->SetLayerGrayscale(greyscale_amount_); - layer->SetLayerSaturation(saturation_amount_); - layer->SetLayerSepia(sepia_amount_); - layer->SetLayerHueRotation(hue_rotation_amount_); if (cvd_correction_matrix_) { layer->SetLayerCustomColorMatrix(*cvd_correction_matrix_); } else {
diff --git a/ash/color_enhancement/color_enhancement_controller.h b/ash/color_enhancement/color_enhancement_controller.h index 6e8b0728..85209cf 100644 --- a/ash/color_enhancement/color_enhancement_controller.h +++ b/ash/color_enhancement/color_enhancement_controller.h
@@ -19,6 +19,7 @@ kProtanomaly = 0, kDeuteranomaly = 1, kTritanomaly = 2, + kGrayscale = 3, }; // Controls the color enhancement options on all displays. These options @@ -44,22 +45,11 @@ // Sets greyscale amount. void SetGreyscaleAmount(float amount); - // Sets saturation amount. - void SetSaturationAmount(float amount); - - // Sets sepia amount. - void SetSepiaAmount(float amount); - - // Sets hue rotation amount. - void SetHueRotationAmount(int amount); - // Sets the color vision correction filter type and severity. // `severity` should be between 0 and 1.0, inclusive. void SetColorVisionCorrectionFilter(ColorVisionDeficiencyType type, float severity); - bool ShouldEnableCursorCompositingForSepia() const; - // ShellObserver: void OnRootWindowAdded(aura::Window* root_window) override; @@ -77,19 +67,9 @@ // Indicates if the color filtering options are enabled or disabled. bool color_filtering_enabled_ = false; - // Amount of hue rotation, on the scale of 0 to 359. - int hue_rotation_amount_ = 0; - // Amount of greyscale, on the scale of 0 to 1. float greyscale_amount_ = 0; - // Amount of sepia, on the scale of 0 to 1. - float sepia_amount_ = 0; - - // Amount of saturation where 1 is normal. Values may range from - // 0 to max float. - float saturation_amount_ = 1; - // Color correction matrix. std::unique_ptr<cc::FilterOperation::Matrix> cvd_correction_matrix_; };
diff --git a/ash/color_enhancement/color_enhancement_controller_unittest.cc b/ash/color_enhancement/color_enhancement_controller_unittest.cc index 273372d..72fbc30 100644 --- a/ash/color_enhancement/color_enhancement_controller_unittest.cc +++ b/ash/color_enhancement/color_enhancement_controller_unittest.cc
@@ -69,125 +69,44 @@ TEST_F(ColorEnhancementControllerTest, Greyscale) { PrefService* prefs = GetPrefs(); prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, 0); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 0); + prefs->SetInteger(prefs::kAccessibilityColorVisionDeficiencyType, + ColorVisionDeficiencyType::kGrayscale); EXPECT_FALSE(IsCursorCompositingEnabled()); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FLOAT_EQ(0.f, root_window->layer()->layer_grayscale()); + // No other color filters were set. + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, 100); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 100); EXPECT_FALSE(IsCursorCompositingEnabled()); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FLOAT_EQ(1, root_window->layer()->layer_grayscale()); + // No other color filters were set. + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, 50); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 50); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_grayscale()); + // No other color filters were set. + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } // Greyscale larger than 100% or smaller than 0% does nothing. - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, 500); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 500); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_grayscale()); + // No other color filters were set. + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, -10); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, -10); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_grayscale()); - } -} - -TEST_F(ColorEnhancementControllerTest, Saturation) { - PrefService* prefs = GetPrefs(); - prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); - prefs->SetInteger(prefs::kAccessibilitySaturationAmount, 50); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_saturation()); - } - - prefs->SetInteger(prefs::kAccessibilitySaturationAmount, 500); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(5.f, root_window->layer()->layer_saturation()); - } - - // Saturation smaller than 0% does nothing. - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, -100); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(5.f, root_window->layer()->layer_saturation()); - } -} - -TEST_F(ColorEnhancementControllerTest, HueRotation) { - PrefService* prefs = GetPrefs(); - prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); - prefs->SetInteger(prefs::kAccessibilityHueRotationAmount, 42); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(42.f, root_window->layer()->layer_hue_rotation()); - } - - prefs->SetInteger(prefs::kAccessibilityHueRotationAmount, 180); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(180.f, root_window->layer()->layer_hue_rotation()); - } - - // Hue rotation greater than 359 or smaller than 0 does nothing. - prefs->SetInteger(prefs::kAccessibilityHueRotationAmount, 1972); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(180.f, root_window->layer()->layer_hue_rotation()); - } - prefs->SetInteger(prefs::kAccessibilityHueRotationAmount, -10); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(180.f, root_window->layer()->layer_hue_rotation()); - } -} - -TEST_F(ColorEnhancementControllerTest, Sepia) { - PrefService* prefs = GetPrefs(); - prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 10); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.1f, root_window->layer()->layer_sepia()); - } - - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 99); - EXPECT_TRUE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.99f, root_window->layer()->layer_sepia()); - } - - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 100); - EXPECT_TRUE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(1.0f, root_window->layer()->layer_sepia()); - } - - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 50); - EXPECT_TRUE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_sepia()); - } - - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 0); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_sepia()); - } - - // Sepia smaller than 0 or lareger than 100% does nothing. - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, -10); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_sepia()); - } - - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 150); - EXPECT_FALSE(IsCursorCompositingEnabled()); - for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_sepia()); + // No other color filters were set. + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } } @@ -203,12 +122,15 @@ prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 0); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); + EXPECT_FLOAT_EQ(0.f, root_window->layer()->layer_grayscale()); } // With a non-zero severity, a matrix should be applied. prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 50); for (auto* root_window : Shell::GetAllRootWindows()) { EXPECT_TRUE(root_window->layer()->LayerHasCustomColorMatrix()); + // Grayscale was not impacted. + EXPECT_FLOAT_EQ(0.f, root_window->layer()->layer_grayscale()); } prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 100); for (auto* root_window : Shell::GetAllRootWindows()) { @@ -230,41 +152,39 @@ } } -TEST_F(ColorEnhancementControllerTest, ColorFiltersBehindColorFilteringOption) { +TEST_F(ColorEnhancementControllerTest, GrayscaleBehindColorFilteringOption) { PrefService* prefs = GetPrefs(); // Color filtering off. prefs->SetBoolean(prefs::kAccessibilityColorFiltering, false); - prefs->SetInteger(prefs::kAccessibilitySepiaAmount, 10); - prefs->SetInteger(prefs::kAccessibilityGreyscaleAmount, 50); - prefs->SetInteger(prefs::kAccessibilitySaturationAmount, 50); - prefs->SetInteger(prefs::kAccessibilityHueRotationAmount, 42); + prefs->SetInteger(prefs::kAccessibilityColorVisionCorrectionAmount, 50); + prefs->SetInteger(prefs::kAccessibilityColorVisionDeficiencyType, + ColorVisionDeficiencyType::kGrayscale); // Default values. for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_sepia()); EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_grayscale()); - EXPECT_FLOAT_EQ(1.0f, root_window->layer()->layer_saturation()); - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_hue_rotation()); EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } // Turn on color filtering, values should now be from prefs. prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.1f, root_window->layer()->layer_sepia()); EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_grayscale()); - EXPECT_FLOAT_EQ(0.5f, root_window->layer()->layer_saturation()); - EXPECT_FLOAT_EQ(42.f, root_window->layer()->layer_hue_rotation()); + EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); + } + + prefs->SetInteger(prefs::kAccessibilityColorVisionDeficiencyType, + ColorVisionDeficiencyType::kDeuteranomaly); + prefs->SetBoolean(prefs::kAccessibilityColorFiltering, true); + for (auto* root_window : Shell::GetAllRootWindows()) { + EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_grayscale()); EXPECT_TRUE(root_window->layer()->LayerHasCustomColorMatrix()); } // Turn it off again, expect defaults to be restored. prefs->SetBoolean(prefs::kAccessibilityColorFiltering, false); for (auto* root_window : Shell::GetAllRootWindows()) { - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_sepia()); EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_grayscale()); - EXPECT_FLOAT_EQ(1.0f, root_window->layer()->layer_saturation()); - EXPECT_FLOAT_EQ(0.0f, root_window->layer()->layer_hue_rotation()); EXPECT_FALSE(root_window->layer()->LayerHasCustomColorMatrix()); } }
diff --git a/ash/constants/ash_features.cc b/ash/constants/ash_features.cc index 6d282c7..53c77b42 100644 --- a/ash/constants/ash_features.cc +++ b/ash/constants/ash_features.cc
@@ -1734,7 +1734,7 @@ // deprecated. BASE_FEATURE(kOsSettingsDeprecateSyncMetricsToggle, "OsSettingsDeprecateSyncMetricsToggle", - base::FEATURE_ENABLED_BY_DEFAULT); + base::FEATURE_DISABLED_BY_DEFAULT); // Enables the wayfinding improvements for the ChromeOS Settings revamp BASE_FEATURE(kOsSettingsRevampWayfinding,
diff --git a/ash/constants/ash_pref_names.cc b/ash/constants/ash_pref_names.cc index 04560b16..6196669 100644 --- a/ash/constants/ash_pref_names.cc +++ b/ash/constants/ash_pref_names.cc
@@ -428,18 +428,6 @@ // Whether to enable color filtering settings. const char kAccessibilityColorFiltering[] = "settings.a11y.color_filtering.enabled"; -// How much to greyscale the display. -const char kAccessibilityGreyscaleAmount[] = - "settings.a11y.color_filtering.greyscale_amount"; -// How much to saturate the display. -const char kAccessibilitySaturationAmount[] = - "settings.a11y.color_filtering.saturation_amount"; -// How much sepia the display. -const char kAccessibilitySepiaAmount[] = - "settings.a11y.color_filtering.sepia_amount"; -// How much to rotate the hue on the display. -const char kAccessibilityHueRotationAmount[] = - "settings.a11y.color_filtering.hue_rotation_amount"; // The amount of a color vision correction filter to apply. const char kAccessibilityColorVisionCorrectionAmount[] = "settings.a11y.color_filtering.color_vision_correction_amount";
diff --git a/ash/constants/ash_pref_names.h b/ash/constants/ash_pref_names.h index 3fcfd77..7f623ce 100644 --- a/ash/constants/ash_pref_names.h +++ b/ash/constants/ash_pref_names.h
@@ -211,14 +211,6 @@ COMPONENT_EXPORT(ASH_CONSTANTS) extern const char kAccessibilityColorFiltering[]; COMPONENT_EXPORT(ASH_CONSTANTS) -extern const char kAccessibilityGreyscaleAmount[]; -COMPONENT_EXPORT(ASH_CONSTANTS) -extern const char kAccessibilitySaturationAmount[]; -COMPONENT_EXPORT(ASH_CONSTANTS) -extern const char kAccessibilitySepiaAmount[]; -COMPONENT_EXPORT(ASH_CONSTANTS) -extern const char kAccessibilityHueRotationAmount[]; -COMPONENT_EXPORT(ASH_CONSTANTS) extern const char kAccessibilityColorVisionCorrectionAmount[]; COMPONENT_EXPORT(ASH_CONSTANTS) extern const char kAccessibilityColorVisionDeficiencyType[];
diff --git a/ash/display/cursor_window_controller.cc b/ash/display/cursor_window_controller.cc index 232c30ab..22ef817 100644 --- a/ash/display/cursor_window_controller.cc +++ b/ash/display/cursor_window_controller.cc
@@ -8,7 +8,6 @@ #include "ash/capture_mode/capture_mode_camera_controller.h" #include "ash/capture_mode/capture_mode_controller.h" #include "ash/capture_mode/capture_mode_session.h" -#include "ash/color_enhancement/color_enhancement_controller.h" #include "ash/constants/ash_constants.h" #include "ash/constants/ash_pref_names.h" #include "ash/constants/ash_switches.h" @@ -298,11 +297,6 @@ return true; } - if (shell->color_enhancement_controller() - ->ShouldEnableCursorCompositingForSepia()) { - return true; - } - return prefs->GetBoolean(prefs::kAccessibilityLargeCursorEnabled) || prefs->GetBoolean(prefs::kAccessibilityHighContrastEnabled) || prefs->GetBoolean(prefs::kDockedMagnifierEnabled);
diff --git a/ash/projector/projector_metrics.cc b/ash/projector/projector_metrics.cc index 7429f1b0..9a21d1f 100644 --- a/ash/projector/projector_metrics.cc +++ b/ash/projector/projector_metrics.cc
@@ -34,9 +34,6 @@ constexpr char kProjectorPendingScreencastChangeIntervalHistogramName[] = "Ash.Projector.PendingScreencastChangeInterval"; -constexpr char kProjectorPolicyChangeHandlingErrorHistogramName[] = - "Ash.Projector.PolicyChangeHandlingError"; - constexpr char kProjectorOnDeviceToServerSpeechRecognitionFallbackReasonHistogramName[] = "Ash.Projector.OnDeviceToServerSpeechRecognitionFallbackReason"; @@ -104,13 +101,6 @@ GetHistogramName(kProjectorCreationFlowErrorHistogramName), error); } -ASH_EXPORT void RecordPolicyChangeHandlingError( - ProjectorPolicyChangeHandlingError error) { - base::UmaHistogramEnumeration( - GetHistogramName(kProjectorPolicyChangeHandlingErrorHistogramName), - error); -} - ASH_EXPORT void RecordPendingScreencastBatchIOTaskDuration( const base::TimeDelta duration) { // We don't normally expect the duration is longer than 10s. If this limit is
diff --git a/ash/projector/projector_metrics.h b/ash/projector/projector_metrics.h index a449a948..06456c7 100644 --- a/ash/projector/projector_metrics.h +++ b/ash/projector/projector_metrics.h
@@ -97,19 +97,6 @@ // These enum values represent potential error that occurs at policy value // change handling and log to UMA. Entries should not be renumbered and numeric // values should never be reused. Please keep in sync with -// "ProjectorPolicyChangeHandlingError" in -// src/tools/metrics/histograms/enums.xml. -enum class ProjectorPolicyChangeHandlingError { - kSwaManager = 0, - kWebAppProvider = 1, - kWebAppProviderOnRegistryReady = 2, - kSyncBridge = 3, - kMaxValue = kSyncBridge -}; - -// These enum values represent potential error that occurs at policy value -// change handling and log to UMA. Entries should not be renumbered and numeric -// values should never be reused. Please keep in sync with // "OnDeviceToServerSpeechRecognitionFallbackReason" in // src/tools/metrics/histograms/enums.xml. // This enum is the smiliar to the `OnDeviceRecognitionAvailability` because @@ -170,11 +157,6 @@ // Records the interval between the UI changes of pending screencasts. void RecordPendingScreencastChangeInterval(const base::TimeDelta interval); -// Records potential error occurs at policy change. -// TODO(b/240497023): remove this once confirmed the nullptr should never -// occurs and the nullptr check is converted to DCEHCK. -void RecordPolicyChangeHandlingError(ProjectorPolicyChangeHandlingError error); - void RecordOnDeviceToServerSpeechRecognitionFallbackReason( OnDeviceToServerSpeechRecognitionFallbackReason reason);
diff --git a/ash/touch/touch_selection_magnifier_runner_ash.cc b/ash/touch/touch_selection_magnifier_runner_ash.cc index 20092d9..1e14170 100644 --- a/ash/touch/touch_selection_magnifier_runner_ash.cc +++ b/ash/touch/touch_selection_magnifier_runner_ash.cc
@@ -63,36 +63,61 @@ // Duration of the animation when updating magnifier bounds. constexpr base::TimeDelta kMagnifierTransitionDuration = base::Milliseconds(50); -// Gets the bounds of the magnifier layer given an anchor point. The magnifier -// layer bounds should be horizontally centered above the anchor point (except -// possibly at the edges of the parent container) and include the magnifier -// border and shadows. `magnifier_anchor_point` and returned bounds are in -// coordinates of the magnifier's parent container. -gfx::Rect GetMagnifierLayerBounds(const gfx::Size& parent_container_size, - const gfx::Point& magnifier_anchor_point) { - const gfx::Point origin( - magnifier_anchor_point.x() - kMagnifierSize.width() / 2, - magnifier_anchor_point.y() - kMagnifierSize.height() + - kMagnifierVerticalBoundsOffset); - gfx::Rect magnifier_layer_bounds(origin, kMagnifierSize); - magnifier_layer_bounds.Outset(kMagnifierShadowOutsets); - // Adjust the magnifier layer to be completely within the parent container - // while keeping the magnifier size fixed. - magnifier_layer_bounds.AdjustToFit(gfx::Rect(parent_container_size)); - return magnifier_layer_bounds; +// Gets the bounds of the content that will be magnified, relative to the parent +// (`parent_bounds` should be the parent's bounds in its own coordinate space, +// e.g. {0,0,w,h}). The magnified bounds will be in the same coordinate space as +// `parent_bounds` and are adjusted to be contained within them. +gfx::Rect GetMagnifiedBounds(const gfx::Rect& parent_bounds, + const gfx::Point& focus_center) { + gfx::SizeF magnified_size(kMagnifierSize.width() / kMagnifierScale, + kMagnifierSize.height() / kMagnifierScale); + gfx::PointF origin(focus_center.x() - magnified_size.width() / 2, + focus_center.y() - magnified_size.height() / 2); + + gfx::RectF magnified_bounds(origin, magnified_size); + magnified_bounds.AdjustToFit(gfx::RectF(parent_bounds)); + + // Transform the adjusted magnified_bounds to the layer's scale. It's okay if + // these bounds go outside the container, since they will be offset and then + // fit to the parent. + magnified_size = {kMagnifierScale * magnified_bounds.width(), + kMagnifierScale * magnified_bounds.height()}; + origin = {magnified_bounds.CenterPoint().x() - magnified_size.width() / 2, + magnified_bounds.CenterPoint().y() - magnified_size.height() / 2}; + return gfx::ToEnclosingRect(gfx::RectF(origin, magnified_size)); } -// Gets the zoom layer background offset needed to center `focus_center` in the -// magnified area. `magnifier_layer_bounds` and `focus_center` are in -// coordinates of the magnifier's parent container. -// TODO(b/275014115): Currently the magnifier doesn't show the very edge of the -// screen. Figure out correct background offset to fix this while keeping the -// magnified area completely inside the parent container. -gfx::Point GetZoomLayerBackgroundOffset(const gfx::Rect& magnifier_layer_bounds, - const gfx::Point& focus_center) { - return gfx::Point(0, magnifier_layer_bounds.y() + - kZoomLayerBounds.CenterPoint().y() - - focus_center.y()); +std::pair<gfx::Rect, gfx::Point> GetMagnifierLayerBoundsAndOffset( + const gfx::Size& parent_size, + const gfx::Rect& focus_rect) { + // The parent-relative bounding box of the parent container, which is the + // coordinate space that the magnifier layer's bounds need to be in. + const gfx::Rect parent_bounds(gfx::Point(0, 0), parent_size); + // `magnified_bounds` holds the bounds of the content that will be magnified, + // but that contains the `focus_center`, making it so the user's finger blocks + // it if the final magnified content were shown in place. + gfx::Rect magnified_bounds = + GetMagnifiedBounds(parent_bounds, focus_rect.CenterPoint()); + // To avoid being blocked, offset the bounds (and the background so it + // remains visually consistent) along the Y axis. This must be clamped to + // `parent_bounds` so that it's not drawn off the top edge of the screen. + gfx::Rect layer_bounds = magnified_bounds; + layer_bounds.Offset(0, kMagnifierVerticalBoundsOffset - + magnified_bounds.height() / 2 - + focus_rect.height() / 2); + + layer_bounds.Outset(kMagnifierShadowOutsets); + layer_bounds.AdjustToFit(parent_bounds); + + // `zoom_layer_center` is the center of the zoom layer relative to the + // magnifier layer's parent. Since the magnifier layer has non-uniform outsets + // for the shadows, its center (layer_bounds.CenterPoint()) is not exactly + // the same as the center of the zoom layer. + gfx::Point zoom_layer_center = + kZoomLayerBounds.CenterPoint() + layer_bounds.OffsetFromOrigin(); + gfx::Point offset = gfx::PointAtOffsetFromOrigin( + zoom_layer_center - magnified_bounds.CenterPoint()); + return {layer_bounds, offset}; } // Gets the color to use for the border based on the default native theme. @@ -198,11 +223,13 @@ gfx::BoundingRect(focus_bound.edge_start(), focus_bound.edge_end())); aura::Window* parent_container = GetParentContainer(); aura::Window::ConvertRectToTarget(context, parent_container, &focus_rect); - const gfx::Rect magnifier_layer_bounds = GetMagnifierLayerBounds( - parent_container->bounds().size(), focus_rect.top_center()); + + auto [magnifier_layer_bounds, background_offset] = + GetMagnifierLayerBoundsAndOffset(parent_container->bounds().size(), + focus_rect); + + zoom_layer_->SetBackgroundOffset(background_offset); magnifier_layer_->SetBounds(magnifier_layer_bounds); - zoom_layer_->SetBackgroundOffset(GetZoomLayerBackgroundOffset( - magnifier_layer_bounds, focus_rect.CenterPoint())); // Add magnifier layer to parent container if needed. if (created_new_magnifier_layer) {
diff --git a/ash/wallpaper/wallpaper_controller_impl.cc b/ash/wallpaper/wallpaper_controller_impl.cc index d724899c..e6701be 100644 --- a/ash/wallpaper/wallpaper_controller_impl.cc +++ b/ash/wallpaper/wallpaper_controller_impl.cc
@@ -79,8 +79,6 @@ #include "url/gurl.h" using color_utils::ColorProfile; -using color_utils::LumaRange; -using color_utils::SaturationRange; using FilePathCallback = base::OnceCallback<void(const base::FilePath&)>; @@ -198,39 +196,6 @@ return gfx::ImageSkia::CreateFrom1xBitmap(bitmap); } -// Gets the color profiles for extracting wallpaper prominent colors. -std::vector<ColorProfile> GetProminentColorProfiles() { - return {ColorProfile(LumaRange::DARK, SaturationRange::VIBRANT), - ColorProfile(LumaRange::NORMAL, SaturationRange::VIBRANT), - ColorProfile(LumaRange::LIGHT, SaturationRange::VIBRANT), - ColorProfile(LumaRange::DARK, SaturationRange::MUTED), - ColorProfile(LumaRange::NORMAL, SaturationRange::MUTED), - ColorProfile(LumaRange::LIGHT, SaturationRange::MUTED)}; -} - -// Gets the corresponding color profile type based on the given -// |color_profile|. -ColorProfileType GetColorProfileType(ColorProfile color_profile) { - bool vibrant = color_profile.saturation == SaturationRange::VIBRANT; - switch (color_profile.luma) { - case LumaRange::ANY: - // There should be no color profiles with the ANY luma range. - NOTREACHED(); - break; - case LumaRange::DARK: - return vibrant ? ColorProfileType::DARK_VIBRANT - : ColorProfileType::DARK_MUTED; - case LumaRange::NORMAL: - return vibrant ? ColorProfileType::NORMAL_VIBRANT - : ColorProfileType::NORMAL_MUTED; - case LumaRange::LIGHT: - return vibrant ? ColorProfileType::LIGHT_VIBRANT - : ColorProfileType::LIGHT_MUTED; - } - NOTREACHED(); - return ColorProfileType::DARK_MUTED; -} - // Deletes a list of wallpaper files in |file_list|. void DeleteWallpaperInList(std::vector<base::FilePath> file_list) { for (const base::FilePath& path : file_list) { @@ -532,13 +497,11 @@ : pref_manager_(std::move(pref_manager)), variant_info_fetcher_(std::move(online_fetcher)), blur_manager_(std::make_unique<WallpaperBlurManager>()), - color_profiles_(GetProminentColorProfiles()), wallpaper_reload_delay_(kWallpaperReloadDelay), wallpaper_image_downloader_(std::move(image_downloader)), sequenced_task_runner_(base::ThreadPool::CreateSequencedTaskRunner( {base::MayBlock(), base::TaskPriority::USER_VISIBLE, base::TaskShutdownBehavior::CONTINUE_ON_SHUTDOWN})) { - DCHECK(!color_profiles_.empty()); Shell::Get()->window_tree_host_manager()->AddObserver(this); Shell::Get()->AddShellObserver(this); theme_observation_.Observe(ui::NativeTheme::GetInstanceForNativeUi()); @@ -571,11 +534,7 @@ if (!calculated_colors_) { return kInvalidWallpaperColor; } - - ColorProfileType type = GetColorProfileType(color_profile); - size_t index = static_cast<size_t>(type); - DCHECK_LT(index, calculated_colors_->prominent_colors.size()); - return calculated_colors_->prominent_colors[index]; + return calculated_colors_->GetProminentColor(color_profile); } SkColor WallpaperControllerImpl::GetKMeanColor() const { @@ -2759,8 +2718,8 @@ return; } - color_calculator_ = std::make_unique<WallpaperColorCalculator>( - GetWallpaper(), color_profiles_); + color_calculator_ = + std::make_unique<WallpaperColorCalculator>(GetWallpaper()); if (!color_calculator_->StartCalculation(base::BindOnce( &WallpaperControllerImpl::OnColorCalculationComplete, weak_factory_.GetWeakPtr(), current_wallpaper_->wallpaper_info()))) {
diff --git a/ash/wallpaper/wallpaper_controller_impl.h b/ash/wallpaper/wallpaper_controller_impl.h index a113853..ec92349 100644 --- a/ash/wallpaper/wallpaper_controller_impl.h +++ b/ash/wallpaper/wallpaper_controller_impl.h
@@ -827,9 +827,6 @@ // Empty state is used to denote when colors have not yet been calculated. absl::optional<WallpaperCalculatedColors> calculated_colors_; - // Caches the color profiles that need to do wallpaper color extracting. - const std::vector<color_utils::ColorProfile> color_profiles_; - // Account id of the current user. AccountId current_user_;
diff --git a/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.cc b/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.cc index d668523..f440c0af 100644 --- a/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.cc +++ b/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.cc
@@ -4,8 +4,42 @@ #include "ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.h" +#include "ash/public/cpp/wallpaper/wallpaper_types.h" +#include "base/notreached.h" +#include "ui/gfx/color_analysis.h" + namespace ash { +namespace { + +using ColorProfile = color_utils::ColorProfile; +using LumaRange = color_utils::LumaRange; +using SaturationRange = color_utils::SaturationRange; + +// Gets the corresponding color profile type based on the given |color_profile|. +ColorProfileType GetColorProfileType(ColorProfile color_profile) { + bool vibrant = color_profile.saturation == SaturationRange::VIBRANT; + switch (color_profile.luma) { + case LumaRange::ANY: + // There should be no color profiles with the ANY luma range. + NOTREACHED(); + break; + case LumaRange::DARK: + return vibrant ? ColorProfileType::DARK_VIBRANT + : ColorProfileType::DARK_MUTED; + case LumaRange::NORMAL: + return vibrant ? ColorProfileType::NORMAL_VIBRANT + : ColorProfileType::NORMAL_MUTED; + case LumaRange::LIGHT: + return vibrant ? ColorProfileType::LIGHT_VIBRANT + : ColorProfileType::LIGHT_MUTED; + } + NOTREACHED(); + return ColorProfileType::DARK_MUTED; +} + +} // namespace + WallpaperCalculatedColors::WallpaperCalculatedColors() = default; WallpaperCalculatedColors::WallpaperCalculatedColors( @@ -40,4 +74,12 @@ WallpaperCalculatedColors::~WallpaperCalculatedColors() = default; +SkColor WallpaperCalculatedColors::GetProminentColor( + ColorProfile color_profile) const { + ColorProfileType type = GetColorProfileType(color_profile); + size_t index = static_cast<size_t>(type); + DCHECK_LT(index, prominent_colors.size()); + return prominent_colors[index]; +} + } // namespace ash
diff --git a/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.h b/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.h index 1d7c2f0..53dfa45 100644 --- a/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.h +++ b/ash/wallpaper/wallpaper_utils/wallpaper_calculated_colors.h
@@ -10,6 +10,10 @@ #include "ash/ash_export.h" #include "third_party/skia/include/core/SkColor.h" +namespace color_utils { +struct ColorProfile; +} // namespace color_utils + namespace ash { // Captures the calculated prominent colors and k mean color of a wallpaper. The @@ -34,6 +38,8 @@ ~WallpaperCalculatedColors(); + SkColor GetProminentColor(color_utils::ColorProfile color_profile) const; + std::vector<SkColor> prominent_colors; SkColor k_mean_color = SK_ColorTRANSPARENT; // Result of image sampling algorithm as described in
diff --git a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.cc b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.cc index 93a8d27f..c84dc2f 100644 --- a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.cc +++ b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.cc
@@ -25,13 +25,14 @@ #include "ui/gfx/image/image_skia.h" #include "ui/gfx/image/image_skia_operations.h" -using LumaRange = color_utils::LumaRange; -using SaturationRange = color_utils::SaturationRange; - namespace ash { namespace { +using ColorProfile = color_utils::ColorProfile; +using LumaRange = color_utils::LumaRange; +using SaturationRange = color_utils::SaturationRange; + // The largest image size, in pixels, to synchronously calculate the prominent // color. This is a simple heuristic optimization because extraction on images // smaller than this should run very quickly, and offloading the task to another @@ -42,6 +43,16 @@ // colors. constexpr int kWallpaperSizeForColorCalculation = 256; +// Gets the color profiles for extracting wallpaper prominent colors. +std::vector<ColorProfile> GetProminentColorProfiles() { + return {ColorProfile(LumaRange::DARK, SaturationRange::VIBRANT), + ColorProfile(LumaRange::NORMAL, SaturationRange::VIBRANT), + ColorProfile(LumaRange::LIGHT, SaturationRange::VIBRANT), + ColorProfile(LumaRange::DARK, SaturationRange::MUTED), + ColorProfile(LumaRange::NORMAL, SaturationRange::MUTED), + ColorProfile(LumaRange::LIGHT, SaturationRange::MUTED)}; +} + const gfx::ImageSkia GetResizedImage(const gfx::ImageSkia& image) { if (std::max(image.width(), image.height()) < kWallpaperSizeForColorCalculation) { @@ -149,10 +160,8 @@ } // namespace -WallpaperColorCalculator::WallpaperColorCalculator( - const gfx::ImageSkia& image, - const std::vector<color_utils::ColorProfile>& color_profiles) - : image_(image), color_profiles_(color_profiles) { +WallpaperColorCalculator::WallpaperColorCalculator(const gfx::ImageSkia& image) + : image_(image), color_profiles_(GetProminentColorProfiles()) { // The task runner is used to compute the wallpaper colors on a thread // that doesn't block the UI. The user may or may not be waiting for it. // If we need to shutdown, we can just re-compute the value next time. @@ -192,6 +201,11 @@ task_runner_ = task_runner; } +void WallpaperColorCalculator::SetColorProfiles( + const std::vector<ColorProfile>& color_profiles) { + color_profiles_ = color_profiles; +} + void WallpaperColorCalculator::OnAsyncCalculationComplete( base::TimeTicks async_start_time, WallpaperColorCallback callback,
diff --git a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.h b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.h index 82dfe83f..42ef0023 100644 --- a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.h +++ b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator.h
@@ -26,11 +26,9 @@ // Calculates colors based on a wallpaper image. class ASH_EXPORT WallpaperColorCalculator { public: - // |image|, |color_profiles| are the input parameters to the color calculation - // that is executed on the |task_runner|. - WallpaperColorCalculator( - const gfx::ImageSkia& image, - const std::vector<color_utils::ColorProfile>& color_profiles); + // Passes `image` as a param to the color calculation. Uses the default color + // profiles provided from GetProminentColorProfiles(). + explicit WallpaperColorCalculator(const gfx::ImageSkia& image); WallpaperColorCalculator(const WallpaperColorCalculator&) = delete; WallpaperColorCalculator& operator=(const WallpaperColorCalculator&) = delete; @@ -56,6 +54,10 @@ // Explicitly sets the |task_runner_| for testing. void SetTaskRunnerForTest(scoped_refptr<base::TaskRunner> task_runner); + // Overrides the default color profiles. + void SetColorProfiles( + const std::vector<color_utils::ColorProfile>& color_profiles); + private: // Handles asynchronous calculation results. |async_start_time| is used to // record duration metrics.
diff --git a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator_unittest.cc b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator_unittest.cc index 3be2ad3..6c4dc1f 100644 --- a/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator_unittest.cc +++ b/ash/wallpaper/wallpaper_utils/wallpaper_color_calculator_unittest.cc
@@ -116,11 +116,11 @@ void WallpaperColorCalculatorTest::CreateCalculator( const gfx::ImageSkia& image) { + calculator_ = std::make_unique<WallpaperColorCalculator>(image); std::vector<color_utils::ColorProfile> color_profiles; color_profiles.emplace_back(color_utils::LumaRange::NORMAL, color_utils::SaturationRange::VIBRANT); - calculator_ = - std::make_unique<WallpaperColorCalculator>(image, color_profiles); + calculator_->SetColorProfiles(color_profiles); } // Used to group the asynchronous calculation tests.
diff --git a/ash/wallpaper/wallpaper_view.cc b/ash/wallpaper/wallpaper_view.cc index 3a31f90..a41e1da 100644 --- a/ash/wallpaper/wallpaper_view.cc +++ b/ash/wallpaper/wallpaper_view.cc
@@ -225,8 +225,8 @@ cc::FilterOperations operations; operations.Append( cc::FilterOperation::CreateBlurFilter(blur, SkTileMode::kClamp)); - sk_sp<cc::PaintFilter> filter = cc::RenderSurfaceFilters::BuildImageFilter( - operations, gfx::SizeF(dst.size()), gfx::Vector2dF()); + sk_sp<cc::PaintFilter> filter = + cc::RenderSurfaceFilters::BuildImageFilter(operations); // If the wallpaper can't fill the desktop, paint it directly to the // canvas so that it can blend the image with the rest of background
diff --git a/ash/webui/common/resources/network/apn_list_item.html b/ash/webui/common/resources/network/apn_list_item.html index ac6910c4..82f1c3c 100644 --- a/ash/webui/common/resources/network/apn_list_item.html +++ b/ash/webui/common/resources/network/apn_list_item.html
@@ -44,7 +44,7 @@ <!-- TODO(b/162365553): Implement accessibility attributes. --> <div id="labelWrapper"> <div id="label" aria-hidden="true"> - <div id="apnName">[[apn.accessPointName]]</div> + <div id="apnName">[[getApnDisplayName_(apn)]]</div> <div id="autoDetected" hidden="[[apn.id]]"> [[i18n('apnAutoDetected')]] </div>
diff --git a/ash/webui/common/resources/network/apn_list_item.js b/ash/webui/common/resources/network/apn_list_item.js index f0cf8a5..0f206e4 100644 --- a/ash/webui/common/resources/network/apn_list_item.js +++ b/ash/webui/common/resources/network/apn_list_item.js
@@ -12,7 +12,7 @@ import {I18nBehavior, I18nBehaviorInterface} from '//resources/ash/common/i18n_behavior.js'; import {mixinBehaviors, PolymerElement} from '//resources/polymer/v3_0/polymer/polymer_bundled.min.js'; import {assert} from 'chrome://resources/ash/common/assert.js'; -import {ApnDetailDialogMode, ApnEventData} from 'chrome://resources/ash/common/network/cellular_utils.js'; +import {ApnDetailDialogMode, ApnEventData, getApnDisplayName} from 'chrome://resources/ash/common/network/cellular_utils.js'; import {MojoInterfaceProviderImpl} from 'chrome://resources/ash/common/network/mojo_interface_provider.js'; import {ApnProperties, ApnState, CrosNetworkConfigInterface} from 'chrome://resources/mojo/chromeos/services/network_config/public/mojom/cros_network_config.mojom-webui.js'; @@ -75,6 +75,14 @@ } /** + * @param {!ApnProperties} apn + * @private + */ + getApnDisplayName_(apn) { + return getApnDisplayName(apn); + } + + /** * Opens the three dots menu. * @private */
diff --git a/ash/webui/common/resources/network/cellular_utils.js b/ash/webui/common/resources/network/cellular_utils.js index 08f2e86..676207c 100644 --- a/ash/webui/common/resources/network/cellular_utils.js +++ b/ash/webui/common/resources/network/cellular_utils.js
@@ -169,4 +169,12 @@ shouldGetNetworkDetails = true; } return {deviceState, shouldGetNetworkDetails}; -} \ No newline at end of file +} + +/** + * Returns the display name for |apn|. + * @param {!ApnProperties} apn + */ +export function getApnDisplayName(apn) { + return apn.name || apn.accessPointName; +}
diff --git a/ash/webui/diagnostics_ui/resources/keyboard_tester.ts b/ash/webui/diagnostics_ui/resources/keyboard_tester.ts index 48e7bb8..a509572a 100644 --- a/ash/webui/diagnostics_ui/resources/keyboard_tester.ts +++ b/ash/webui/diagnostics_ui/resources/keyboard_tester.ts
@@ -40,6 +40,12 @@ } } +export interface KeyboardDiagramTopRowKey { + icon?: string; + ariaNameI18n?: string; + text?: string; +} + /** * @fileoverview * 'keyboard-tester' displays a tester UI for a keyboard. @@ -49,37 +55,33 @@ * Map from Mojo TopRowKey constants to keyboard diagram top row key * definitions. */ -const topRowKeyMap: - {[index: number]: {icon?: string, ariaNameI18n?: string, text?: string}} = { - [TopRowKey.kNone]: DiagramTopRowKey['kNone'], - [TopRowKey.kBack]: DiagramTopRowKey['kBack'], - [TopRowKey.kForward]: DiagramTopRowKey['kForward'], - [TopRowKey.kRefresh]: DiagramTopRowKey['kRefresh'], - [TopRowKey.kFullscreen]: DiagramTopRowKey['kFullscreen'], - [TopRowKey.kOverview]: DiagramTopRowKey['kOverview'], - [TopRowKey.kScreenshot]: DiagramTopRowKey['kScreenshot'], - [TopRowKey.kScreenBrightnessDown]: - DiagramTopRowKey['kScreenBrightnessDown'], - [TopRowKey.kScreenBrightnessUp]: DiagramTopRowKey['kScreenBrightnessUp'], - [TopRowKey.kPrivacyScreenToggle]: - DiagramTopRowKey['kPrivacyScreenToggle'], - [TopRowKey.kMicrophoneMute]: DiagramTopRowKey['kMicrophoneMute'], - [TopRowKey.kVolumeMute]: DiagramTopRowKey['kVolumeMute'], - [TopRowKey.kVolumeDown]: DiagramTopRowKey['kVolumeDown'], - [TopRowKey.kVolumeUp]: DiagramTopRowKey['kVolumeUp'], - [TopRowKey.kKeyboardBacklightToggle]: - DiagramTopRowKey['kKeyboardBacklightToggle'], - [TopRowKey.kKeyboardBacklightDown]: - DiagramTopRowKey['kKeyboardBacklightDown'], - [TopRowKey.kKeyboardBacklightUp]: - DiagramTopRowKey['kKeyboardBacklightUp'], - [TopRowKey.kNextTrack]: DiagramTopRowKey['kNextTrack'], - [TopRowKey.kPreviousTrack]: DiagramTopRowKey['kPreviousTrack'], - [TopRowKey.kPlayPause]: DiagramTopRowKey['kPlayPause'], - [TopRowKey.kScreenMirror]: DiagramTopRowKey['kScreenMirror'], - [TopRowKey.kDelete]: DiagramTopRowKey['kDelete'], - [TopRowKey.kUnknown]: DiagramTopRowKey['kUnknown'], - }; +const topRowKeyMap: {[index: number]: KeyboardDiagramTopRowKey} = { + [TopRowKey.kNone]: DiagramTopRowKey['kNone'], + [TopRowKey.kBack]: DiagramTopRowKey['kBack'], + [TopRowKey.kForward]: DiagramTopRowKey['kForward'], + [TopRowKey.kRefresh]: DiagramTopRowKey['kRefresh'], + [TopRowKey.kFullscreen]: DiagramTopRowKey['kFullscreen'], + [TopRowKey.kOverview]: DiagramTopRowKey['kOverview'], + [TopRowKey.kScreenshot]: DiagramTopRowKey['kScreenshot'], + [TopRowKey.kScreenBrightnessDown]: DiagramTopRowKey['kScreenBrightnessDown'], + [TopRowKey.kScreenBrightnessUp]: DiagramTopRowKey['kScreenBrightnessUp'], + [TopRowKey.kPrivacyScreenToggle]: DiagramTopRowKey['kPrivacyScreenToggle'], + [TopRowKey.kMicrophoneMute]: DiagramTopRowKey['kMicrophoneMute'], + [TopRowKey.kVolumeMute]: DiagramTopRowKey['kVolumeMute'], + [TopRowKey.kVolumeDown]: DiagramTopRowKey['kVolumeDown'], + [TopRowKey.kVolumeUp]: DiagramTopRowKey['kVolumeUp'], + [TopRowKey.kKeyboardBacklightToggle]: + DiagramTopRowKey['kKeyboardBacklightToggle'], + [TopRowKey.kKeyboardBacklightDown]: + DiagramTopRowKey['kKeyboardBacklightDown'], + [TopRowKey.kKeyboardBacklightUp]: DiagramTopRowKey['kKeyboardBacklightUp'], + [TopRowKey.kNextTrack]: DiagramTopRowKey['kNextTrack'], + [TopRowKey.kPreviousTrack]: DiagramTopRowKey['kPreviousTrack'], + [TopRowKey.kPlayPause]: DiagramTopRowKey['kPlayPause'], + [TopRowKey.kScreenMirror]: DiagramTopRowKey['kScreenMirror'], + [TopRowKey.kDelete]: DiagramTopRowKey['kDelete'], + [TopRowKey.kUnknown]: DiagramTopRowKey['kUnknown'], +}; /** Maps top-right key evdev codes to the corresponding DiagramTopRightKey. */ const topRightKeyByCode: Map<number, DiagramTopRightKey> = new Map([ @@ -186,22 +188,14 @@ } keyboard: KeyboardInfo; - // TODO(crbug.com/1257138): use the proper type annotation instead of - // string. protected isLoggedIn: boolean; - protected diagramTopRightKey: string; + protected diagramTopRightKey: DiagramTopRightKey|null; private lostFocusToastLingerMs: number; private layoutIsKnown: boolean; - // TODO(crbug.com/1257138): use the proper type annotation instead of - // string. - private diagramMechanicalLayout: string; - // TODO(crbug.com/1257138): use the proper type annotation instead of - // string. - private diagramPhysicalLayout: string; + private diagramMechanicalLayout: DiagramMechanicalLayout|null; + private diagramPhysicalLayout: DiagramPhysicalLayout|null; private showNumberPad: boolean; - // TODO(crbug.com/1257138): use the proper type annotation instead of - // Object. - private topRowKeys: Object[]; + private topRowKeys: KeyboardDiagramTopRowKey[]; private receiver: KeyboardObserverReceiver|null = null; private inputDataProvider: InputDataProviderInterface = getInputDataProvider(); @@ -232,11 +226,8 @@ // a number pad event we weren't expecting. } - /** - * TODO(crbug.com/1257138): use the proper type annotation instead of string. - */ - private computeDiagramMechanicalLayout(keyboardInfo?: KeyboardInfo): string - |null { + private computeDiagramMechanicalLayout(keyboardInfo?: KeyboardInfo): + DiagramMechanicalLayout|null { if (!keyboardInfo) { return null; } @@ -248,8 +239,8 @@ }[keyboardInfo.mechanicalLayout]; } - private computeDiagramPhysicalLayout(keyboardInfo?: KeyboardInfo): string - |null { + private computeDiagramPhysicalLayout(keyboardInfo?: KeyboardInfo): + DiagramPhysicalLayout|null { if (!keyboardInfo) { return null; } @@ -263,10 +254,8 @@ }[keyboardInfo.physicalLayout]; } - /** - * TODO(crbug.com/1257138): use the proper type annotation instead of string. - */ - private computeDiagramTopRightKey(keyboardInfo?: KeyboardInfo): string|null { + private computeDiagramTopRightKey(keyboardInfo?: KeyboardInfo): + DiagramTopRightKey|null { if (!keyboardInfo) { return null; } @@ -284,7 +273,8 @@ } - private computeTopRowKeys(keyboard?: KeyboardInfo): Object[] { + private computeTopRowKeys(keyboard?: KeyboardInfo): + KeyboardDiagramTopRowKey[] { if (!keyboard) { return []; }
diff --git a/ash/webui/shortcut_customization_ui/backend/search/search_concept_registry.cc b/ash/webui/shortcut_customization_ui/backend/search/search_concept_registry.cc index 54c4f11..e57345e 100644 --- a/ash/webui/shortcut_customization_ui/backend/search/search_concept_registry.cc +++ b/ash/webui/shortcut_customization_ui/backend/search/search_concept_registry.cc
@@ -113,12 +113,15 @@ /*id=*/base::StrCat({search_concept.id, "-description"}), /*content=*/search_concept.accelerator_layout_info->description); - // Only text accelerators should become searchable LSS Content. + // All SearchConcepts should contain at least one AcceleratorInfo. DCHECK(search_concept.accelerator_infos.size() > 0); - // Text accelerators should only have one entry in accelerator_infos. + + // Get the first AcceleratorInfo to check if it's a text accelerator. Note + // that text accelerators should only have one entry in accelerator_infos. const mojom::AcceleratorInfoPtr& first_accelerator_info = search_concept.accelerator_infos.at(0); + // Only text accelerators should become searchable LSS Content. if (first_accelerator_info->layout_properties->is_text_accelerator()) { // Content->id needs to be unique across the entire index, // so we prefix it with the SearchConcept's id.
diff --git a/ash/webui/shortcut_customization_ui/backend/search/search_handler.cc b/ash/webui/shortcut_customization_ui/backend/search/search_handler.cc index ebb3b44..c470a16b 100644 --- a/ash/webui/shortcut_customization_ui/backend/search/search_handler.cc +++ b/ash/webui/shortcut_customization_ui/backend/search/search_handler.cc
@@ -17,6 +17,15 @@ #include "third_party/abseil-cpp/absl/types/optional.h" #include "ui/base/accelerators/accelerator.h" +// Sets the relevance_threshold to be low enough for single-character queries +// to produce results, but high enough to avoid too many irrelevant results. +// The default value is 0.64, at which we observed single-character queries +// produced no or few results. In testing, 0.4 was discovered to be too low of +// a threshold and reduced the quality of search results. We arrived at the +// current value by testing various combinations of queries. This value may +// need to be amended in the future. +const double search_service_relevance_threshold = 0.52; + namespace ash::shortcut_ui { SearchHandler::SearchHandler( @@ -31,6 +40,10 @@ DCHECK(index_remote_.is_bound()); search_concept_registry_->AddObserver(this); + + index_remote_->SetSearchParams( + {/*relevance_threshold=*/search_service_relevance_threshold}, + base::OnceCallback<void()>()); } SearchHandler::~SearchHandler() {
diff --git a/ash/webui/shortcut_customization_ui/backend/search/search_handler_unittest.cc b/ash/webui/shortcut_customization_ui/backend/search/search_handler_unittest.cc index 2bb25e18..9038c3b3 100644 --- a/ash/webui/shortcut_customization_ui/backend/search/search_handler_unittest.cc +++ b/ash/webui/shortcut_customization_ui/backend/search/search_handler_unittest.cc
@@ -247,7 +247,8 @@ // The descriptions for the fake shortcuts are "Open launcher", "Open new // tab", "Open the Foo app", and "Select all text content". - // The query "Open" should match the first three Concepts. + // The query "Open" matches the first three shortcuts because they contain the + // word "open". shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) .Search(u"Open", /*max_num_results=*/5u, &search_results); @@ -262,8 +263,8 @@ // Checking again that the observer was not called after the previous search. EXPECT_EQ(1u, results_availability_observer_.num_calls()); - // The query "open" should also match the first three Concepts (query case - // doesn't matter). + // The query "open" should also match the same concepts (query case doesn't + // matter). shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) .Search(u"open", /*max_num_results=*/5u, &search_results); @@ -275,8 +276,7 @@ VerifySearchResultIsPresent(/*description=*/u"Open the Foo app", /*search_results=*/search_results); - // For completeness, the query "OpEn" should also match the first three - // Concepts. + // For completeness, the query "OpEn" should also match the same concepts. shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) .Search(u"OpEn", /*max_num_results=*/5u, &search_results); @@ -288,29 +288,33 @@ VerifySearchResultIsPresent(/*description=*/u"Open the Foo app", /*search_results=*/search_results); - // Searching for a specific shortcut should only include those results. + // Searching for a specific shortcut matches only that concept. shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) .Search(u"Open new tab", /*max_num_results=*/5u, &search_results); EXPECT_EQ(search_results.size(), 1u); - EXPECT_EQ(search_results.at(0)->accelerator_layout_info->description, - u"Open new tab"); + VerifySearchResultIsPresent(/*description=*/u"Open new tab", + /*search_results=*/search_results); // Searching for a specific shortcut should work even if the query is a // "fuzzy" match. shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) .Search(u"Open tab", /*max_num_results=*/5u, &search_results); - // In this case, the search service also returns the result for "Open the Foo - // app", but with a lower relevance_score. - EXPECT_EQ(search_results.size(), 2u); + // In this case, the search service also returns the other results, but with + // lower relevance scores. + EXPECT_EQ(search_results.size(), 3u); EXPECT_EQ(search_results.at(0)->accelerator_layout_info->description, u"Open new tab"); EXPECT_EQ(search_results.at(1)->accelerator_layout_info->description, u"Open the Foo app"); + EXPECT_EQ(search_results.at(2)->accelerator_layout_info->description, + u"Open launcher"); // Expect that earlier search results have a higher relevance score. EXPECT_GT(search_results.at(0)->relevance_score, search_results.at(1)->relevance_score); + EXPECT_GT(search_results.at(1)->relevance_score, + search_results.at(2)->relevance_score); // Clear the index and verify that searches return no results, and that the // observer was called an additional time. @@ -335,22 +339,71 @@ .Search(u"this search matches nothing!", /*max_num_results=*/5u, &search_results); EXPECT_EQ(search_results.size(), 0u); +} - // Testing some edge cases: searching with spaces on either side and in the - // middle. - shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) - .Search(u" Open new tab ", - /*max_num_results=*/5u, &search_results); - // Turns out the search service doesn't match any shortcuts with a malformed - // query like this. - EXPECT_EQ(search_results.size(), 0u); +TEST_F(SearchHandlerTest, SearchResultsSingleCharacter) { + search_concept_registry_.SetSearchConcepts(GetTestSearchConcepts()); + handler_remote_.FlushForTesting(); + task_environment_.RunUntilIdle(); + std::vector<shortcut_customization::mojom::SearchResultPtr> search_results; - // Searching with spaces on either side, but not in the middle. + // Searching for "o" returns all results since they each contain an "o". shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) - .Search(u" Open new tab ", + .Search(u"o", /*max_num_results=*/5u, &search_results); - // Turns out the search service doesn't match any shortcuts with a malformed - // query like this. + EXPECT_EQ(search_results.size(), 4u); + VerifySearchResultIsPresent(/*description=*/u"Open launcher", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open new tab", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open the Foo app", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Select all text content", + /*search_results=*/search_results); + + // Searching for "O" returns all results since they each contain an "o", + // regardless of capitalization. + shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) + .Search(u"O", + /*max_num_results=*/5u, &search_results); + EXPECT_EQ(search_results.size(), 4u); + VerifySearchResultIsPresent(/*description=*/u"Open launcher", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open new tab", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open the Foo app", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Select all text content", + /*search_results=*/search_results); + + // Searching for "p" returns all results that contain the letter "p". + // In this case, "Select all text content" is included because its text + // accelerator is "Press Ctrl+A". + shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) + .Search(u"p", + /*max_num_results=*/5u, &search_results); + EXPECT_EQ(search_results.size(), 4u); + VerifySearchResultIsPresent(/*description=*/u"Open launcher", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open new tab", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Open the Foo app", + /*search_results=*/search_results); + VerifySearchResultIsPresent(/*description=*/u"Select all text content", + /*search_results=*/search_results); + + // Searching for "l" returns all results that contain the letter "l". + shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) + .Search(u"l", + /*max_num_results=*/5u, &search_results); + EXPECT_EQ(search_results.size(), 1u); + VerifySearchResultIsPresent(/*description=*/u"Open launcher", + /*search_results=*/search_results); + + // Searching for "z" should return no results. + shortcut_customization::mojom::SearchHandlerAsyncWaiter(handler_remote_.get()) + .Search(u"z", + /*max_num_results=*/5u, &search_results); EXPECT_EQ(search_results.size(), 0u); }
diff --git a/ash/webui/shortcut_customization_ui/resources/js/search/search_box.ts b/ash/webui/shortcut_customization_ui/resources/js/search/search_box.ts index 1f95efa..2ebe78a0 100644 --- a/ash/webui/shortcut_customization_ui/resources/js/search/search_box.ts +++ b/ash/webui/shortcut_customization_ui/resources/js/search/search_box.ts
@@ -32,8 +32,6 @@ * results. */ -// TODO(longbowei): This value is temporary. Update it once more information is -// provided. const MAX_NUM_RESULTS = 5; // This number was chosen arbitrarily to be a reasonable limit. Most // searches will not be anywhere close to this. @@ -388,8 +386,15 @@ this.spinnerActive = true; + // In some cases, the backend will return search results that are later + // filtered out by `this.filterSearchResults`. When that happens, the UI + // should still show MAX_NUM_RESULTS results if there are other matching + // results. To achieve this, we request more results than we need, and then + // cap the number of search results to MAX_NUM_RESULTS. + const maxNumberOfSearchResults = MAX_NUM_RESULTS * 3; + this.shortcutSearchHandler - .search(stringToMojoString16(query), MAX_NUM_RESULTS) + .search(stringToMojoString16(query), maxNumberOfSearchResults) .then((response) => { this.onSearchResultsReceived(query, response.results); this.dispatchEvent(new CustomEvent( @@ -407,6 +412,10 @@ this.spinnerActive = false; this.searchResults = this.filterSearchResults(results); + // In `this.fetchSearchResults`, we queried for a multiple of + // MAX_NUM_RESULTS, so cap the size of the results here after filtering. + this.searchResults = this.searchResults.slice(0, MAX_NUM_RESULTS); + // This invalidates whatever SearchResultRow element was previously focused, // since it's likely that the element has been removed after the search. this.lastFocused = null;
diff --git a/ash/wm/default_state.cc b/ash/wm/default_state.cc index 221c39b..5d4ee59 100644 --- a/ash/wm/default_state.cc +++ b/ash/wm/default_state.cc
@@ -261,12 +261,9 @@ if (in_fullscreen && window_state->IsMaximized()) return; - // TODO(b/272091660): Consider having a more graceful algorithm for - // floated windows as they may have been purposefully placed semi - // offscreen. UpdateBoundsForDisplayOrWorkAreaBoundsChange( window_state, - /*ensure_full_window_visibility=*/window_state->IsFloated()); + /*ensure_full_window_visibility=*/false); return; } case WM_EVENT_SYSTEM_UI_AREA_CHANGED:
diff --git a/ash/wm/float/float_controller.cc b/ash/wm/float/float_controller.cc index c167a41..d0a93c0 100644 --- a/ash/wm/float/float_controller.cc +++ b/ash/wm/float/float_controller.cc
@@ -733,8 +733,10 @@ return; } - if ((display::DisplayObserver::DISPLAY_METRIC_WORK_AREA & metrics) == 0) + const uint32_t filter = DISPLAY_METRIC_BOUNDS | DISPLAY_METRIC_WORK_AREA; + if ((filter & metrics) == 0) { return; + } DCHECK(!floated_window_info_map_.empty()); std::vector<aura::Window*> windows_need_reset; @@ -742,11 +744,18 @@ if (!chromeos::wm::CanFloatWindow(window)) { windows_need_reset.push_back(window); } else { - // Let the state object handle the work area change. This is normally + // Let the state object handle the display change. This is normally // handled by the `WorkspaceLayoutManager`, but the float container does // not have one attached. - const WMEvent event(WM_EVENT_WORKAREA_BOUNDS_CHANGED); - WindowState::Get(window)->OnWMEvent(&event); + if (metrics & display::DisplayObserver::DISPLAY_METRIC_BOUNDS) { + const DisplayMetricsChangedWMEvent wm_event(metrics); + WindowState::Get(window)->OnWMEvent(&wm_event); + } + + if (metrics & display::DisplayObserver::DISPLAY_METRIC_WORK_AREA) { + const WMEvent wm_event(WM_EVENT_WORKAREA_BOUNDS_CHANGED); + WindowState::Get(window)->OnWMEvent(&wm_event); + } } } for (auto* window : windows_need_reset)
diff --git a/ash/wm/float/float_controller_unittest.cc b/ash/wm/float/float_controller_unittest.cc index 181b0005..0c1cdca 100644 --- a/ash/wm/float/float_controller_unittest.cc +++ b/ash/wm/float/float_controller_unittest.cc
@@ -432,6 +432,23 @@ .Contains(window->GetBoundsInScreen())); } +TEST_F(WindowFloatTest, FloatWindowBoundsWithShelfChange) { + UpdateDisplay("1600x1000"); + + // This test makes some assumptions that the shelf starts bottom aligned. + ASSERT_EQ(ShelfAlignment::kBottom, GetPrimaryShelf()->alignment()); + + // Create a floated window and position so it is semi offscreen. + std::unique_ptr<aura::Window> window = CreateFloatedWindow(); + const gfx::Rect ideal_bounds(1400, 0, 400, 300); + const SetBoundsWMEvent set_bounds_event(ideal_bounds); + WindowState::Get(window.get())->OnWMEvent(&set_bounds_event); + + // Changing the shelf alignment should not alter the floated window bounds. + GetPrimaryShelf()->SetAlignment(ShelfAlignment::kLeft); + EXPECT_EQ(ideal_bounds, window->GetBoundsInScreen()); +} + // Test float window per desk logic. TEST_F(WindowFloatTest, OneFloatWindowPerDeskLogic) { // Test one float window per desk is allowed.
diff --git a/ash/wm/tablet_mode/tablet_mode_multitask_cue.cc b/ash/wm/tablet_mode/tablet_mode_multitask_cue.cc index ad311e2..d996e02 100644 --- a/ash/wm/tablet_mode/tablet_mode_multitask_cue.cc +++ b/ash/wm/tablet_mode/tablet_mode_multitask_cue.cc
@@ -166,6 +166,17 @@ return; } + // Do not show the cue if the window losing activation is a popup for example, + // and is on the same transient tree as the window gaining activation. For + // example, when activation goes from the browsers extension bubble to the + // browser, the cue will not appear. + if (lost_active && + lost_active->GetType() != aura::client::WINDOW_TYPE_NORMAL && + wm::GetTransientRoot(lost_active) == + wm::GetTransientRoot(gained_active)) { + return; + } + auto* window_manager = Shell::Get()->tablet_mode_controller()->tablet_mode_window_manager(); DCHECK(window_manager); @@ -174,9 +185,6 @@ window_manager->tablet_mode_multitask_menu_controller(); DCHECK(multitask_menu_controller); - // TODO(b/263519133): Stop the cue from reappearing after using non-app - // windows like popups. - // The cue should not reappear when tapping off of the menu onto `window_` // or selecting a new layout. In the case where the menu is open, the cue is // active, and we tap onto another window (e.g., split view), we still want to
diff --git a/ash/wm/tablet_mode/tablet_mode_multitask_cue_unittest.cc b/ash/wm/tablet_mode/tablet_mode_multitask_cue_unittest.cc index a36df3c..25017f1 100644 --- a/ash/wm/tablet_mode/tablet_mode_multitask_cue_unittest.cc +++ b/ash/wm/tablet_mode/tablet_mode_multitask_cue_unittest.cc
@@ -13,6 +13,7 @@ #include "chromeos/ui/wm/features.h" #include "ui/compositor/scoped_animation_duration_scale_mode.h" #include "ui/compositor/test/layer_animation_stopped_waiter.h" +#include "ui/wm/core/window_util.h" namespace ash { @@ -142,4 +143,35 @@ EXPECT_FALSE(multitask_cue->cue_layer()); } -} // namespace ash \ No newline at end of file +TEST_F(TabletModeMultitaskCueTest, TransientChildFocus) { + auto window1 = CreateAppWindow(); + + // Create a second window with a transient child. + auto window2 = CreateAppWindow(); + auto transient_child2 = + CreateTestWindow(gfx::Rect(100, 10), aura::client::WINDOW_TYPE_POPUP); + wm::AddTransientChild(window2.get(), transient_child2.get()); + wm::ActivateWindow(transient_child2.get()); + + // Creating an app window shows the cue. Hide it before testing. + auto* multitask_cue = GetMultitaskCue(); + ASSERT_TRUE(multitask_cue->cue_layer()); + multitask_cue->DismissCue(); + + // Activate `window2`. The cue should not show up, since the window with + // previous activation was a transient child. + wm::ActivateWindow(window2.get()); + EXPECT_FALSE(multitask_cue->cue_layer()); + + // Reactivate the transient. The cue should not show up, since the transient + // window is a popup, and cannot change window states. + wm::ActivateWindow(transient_child2.get()); + EXPECT_FALSE(multitask_cue->cue_layer()); + + // Activate `window1`. The cue should show up, since the previous activated + // window was not associated with it. + wm::ActivateWindow(window1.get()); + EXPECT_TRUE(multitask_cue->cue_layer()); +} + +} // namespace ash
diff --git a/base/allocator/partition_allocator/shim/allocator_interception_mac.mm b/base/allocator/partition_allocator/shim/allocator_interception_mac.mm index ee3125a..02c0740 100644 --- a/base/allocator/partition_allocator/shim/allocator_interception_mac.mm +++ b/base/allocator/partition_allocator/shim/allocator_interception_mac.mm
@@ -248,7 +248,7 @@ #else // As of macOS 14, the allocators seem to be in read-only memory? See // https://crbug.com/1454013. - return !partition_alloc::internal::base::mac::IsAtMostOS13(); + return partition_alloc::internal::base::mac::IsAtMostOS13(); #endif }
diff --git a/base/test/test_suite.cc b/base/test/test_suite.cc index 5d95331..4b9f1cc 100644 --- a/base/test/test_suite.cc +++ b/base/test/test_suite.cc
@@ -217,6 +217,7 @@ DCHECK_EQ(thread_pool_set_before_test_, ThreadPoolInstance::Get()) << " in test " << test.test_case_name() << "." << test.name(); feature_list_set_before_test_ = nullptr; + thread_pool_set_before_test_ = nullptr; } // Check for leaks in test cases (consisting of one or more tests). @@ -230,6 +231,7 @@ DCHECK_EQ(thread_pool_set_before_case_, ThreadPoolInstance::Get()) << " in case " << test_case.name(); feature_list_set_before_case_ = nullptr; + thread_pool_set_before_case_ = nullptr; } private:
diff --git a/build/android/apk_operations.py b/build/android/apk_operations.py index 2838240e..f0c2958d 100755 --- a/build/android/apk_operations.py +++ b/build/android/apk_operations.py
@@ -12,7 +12,6 @@ import json import logging import os -import pipes import posixpath import random import re @@ -359,7 +358,7 @@ cmd.append('--verbose') if target_cpu: cmd.append('--target-arch=%s' % _TargetCpuToTargetArch(target_cpu)) - logging.warning('Running: %s', ' '.join(pipes.quote(x) for x in cmd)) + logging.warning('Running: %s', ' '.join(shlex.quote(x) for x in cmd)) print(_Colorize('All subsequent output is from adb_gdb script.', colorama.Fore.YELLOW)) os.execv(gdb_script_path, cmd) @@ -567,7 +566,7 @@ odex_paths.append('/data/dalvik-cache/%s@classes%s.dex' % ( mangled_apk_path, suffix)) - odex_sizes = _DuHelper(d, ' '.join(pipes.quote(p) for p in odex_paths)) + odex_sizes = _DuHelper(d, ' '.join(shlex.quote(p) for p in odex_paths)) return (data_dir_sizes, code_cache_sizes, apk_sizes, lib_sizes, odex_sizes, compilation_filter) @@ -1098,7 +1097,7 @@ print('Existing flags per-device (via /data/local/tmp/{}):'.format( command_line_flags_file)) for flags in _PrintPerDeviceOutput(devices, outputs, single_line=True): - quoted_flags = ' '.join(pipes.quote(f) for f in flags) + quoted_flags = ' '.join(shlex.quote(f) for f in flags) print(quoted_flags or 'No flags set.')
diff --git a/build/android/pylib/local/machine/local_machine_junit_test_run.py b/build/android/pylib/local/machine/local_machine_junit_test_run.py index b9942085..28b084c2 100644 --- a/build/android/pylib/local/machine/local_machine_junit_test_run.py +++ b/build/android/pylib/local/machine/local_machine_junit_test_run.py
@@ -406,7 +406,9 @@ # Output any remaining output from a timed-out first shard. shard_0_pump.join() while not shard_0_q.empty(): - yield shard_0_q.get() + line = shard_0_q.get() + if line: + yield f'{first_shard:2}| {line}' for i, shard in enumerate(shard_list[1:]): f = temp_files[i + 1]
diff --git a/build/chromeos/test_runner.py b/build/chromeos/test_runner.py index d4c1d23..bc755a5 100755 --- a/build/chromeos/test_runner.py +++ b/build/chromeos/test_runner.py
@@ -328,7 +328,7 @@ self._attr_expr = '(' + ' || '.join(names) + ')' if self._attr_expr: - # Don't use pipes.quote() here. Something funky happens with the arg + # Don't use shlex.quote() here. Something funky happens with the arg # as it gets passed down from cros_run_test to tast. (Tast picks up the # escaping single quotes and complains that the attribute expression # "must be within parentheses".)
diff --git a/build/env_dump.py b/build/env_dump.py index 1eaf8dc..17d30a74 100755 --- a/build/env_dump.py +++ b/build/env_dump.py
@@ -9,7 +9,7 @@ import json import optparse import os -import pipes +import shlex import subprocess import sys @@ -32,7 +32,7 @@ if not options.output_json: parser.error('Requires --output-json option.') - envsetup_cmd = ' '.join(map(pipes.quote, args)) + envsetup_cmd = ' '.join(map(shlex.quote, args)) full_cmd = [ 'bash', '-c', '. %s > /dev/null; %s -d' % (envsetup_cmd, os.path.abspath(__file__))
diff --git a/build/fuchsia/linux_internal.sdk.sha1 b/build/fuchsia/linux_internal.sdk.sha1 index a2baf38..a532479 100644 --- a/build/fuchsia/linux_internal.sdk.sha1 +++ b/build/fuchsia/linux_internal.sdk.sha1
@@ -1 +1 @@ -12.20230612.1.1 +12.20230612.2.1
diff --git a/build/print_python_deps.py b/build/print_python_deps.py index 07f988a..415e7b0 100755 --- a/build/print_python_deps.py +++ b/build/print_python_deps.py
@@ -11,7 +11,7 @@ import argparse import os -import pipes +import shlex import sys # Don't use any helper modules, or else they will end up in the results. @@ -68,7 +68,7 @@ if os.name == 'nt': return ' '.join(quote(x) for x in args).replace('\\', '/') else: - return ' '.join(pipes.quote(x) for x in args) + return ' '.join(shlex.quote(x) for x in args) def _FindPythonInDirectory(directory, allow_test):
diff --git a/cc/paint/display_item_list_unittest.cc b/cc/paint/display_item_list_unittest.cc index ca875db..8d01ab4 100644 --- a/cc/paint/display_item_list_unittest.cc +++ b/cc/paint/display_item_list_unittest.cc
@@ -384,8 +384,7 @@ list->push<TranslateOp>(filter_bounds.x(), filter_bounds.y()); PaintFlags flags; - flags.setImageFilter( - RenderSurfaceFilters::BuildImageFilter(filters, filter_bounds.size())); + flags.setImageFilter(RenderSurfaceFilters::BuildImageFilter(filters)); SkRect layer_bounds = gfx::RectFToSkRect(filter_bounds); layer_bounds.offset(-filter_bounds.x(), -filter_bounds.y());
diff --git a/cc/paint/paint_filter.cc b/cc/paint/paint_filter.cc index 0d905b3..93c1a43c 100644 --- a/cc/paint/paint_filter.cc +++ b/cc/paint/paint_filter.cc
@@ -348,23 +348,35 @@ AreValuesEqualForTesting(input_, other.input_); // IN-TEST } -MagnifierPaintFilter::MagnifierPaintFilter(const SkRect& src_rect, +MagnifierPaintFilter::MagnifierPaintFilter(const SkRect& lens_bounds, + SkScalar zoom_amount, SkScalar inset, sk_sp<PaintFilter> input, const CropRect* crop_rect) : PaintFilter(kType, crop_rect, HasDiscardableImages(input)), - src_rect_(src_rect), + lens_bounds_(lens_bounds), + zoom_amount_(zoom_amount), inset_(inset), input_(std::move(input)) { - cached_sk_filter_ = SkImageFilters::Magnifier( - src_rect_, inset_, GetSkFilter(input_.get()), crop_rect); + // Historically the Skia Magnifier filter always used nearest-neighbor + // sampling internally, when it was only used for the accessibility + // magnifier widgets (where NN was preferred and always had an integer zoom + // amount). However, when the zoom amount is not an integer NN severely + // degrades visual quality. If more refined control is required, the + // sampling mode can be exposed and plumbed up to FilterOperation. + SkFilterMode filter_mode = SkScalarIsInt(zoom_amount) ? SkFilterMode::kNearest + : SkFilterMode::kLinear; + cached_sk_filter_ = + SkImageFilters::Magnifier(lens_bounds_, zoom_amount_, inset_, filter_mode, + GetSkFilter(input_.get()), crop_rect); } MagnifierPaintFilter::~MagnifierPaintFilter() = default; size_t MagnifierPaintFilter::SerializedSize() const { base::CheckedNumeric<size_t> total_size = - BaseSerializedSize() + PaintOpWriter::SerializedSize(src_rect_) + + BaseSerializedSize() + PaintOpWriter::SerializedSize(lens_bounds_) + + PaintOpWriter::SerializedSize(zoom_amount_) + PaintOpWriter::SerializedSize(inset_); total_size += PaintOpWriter::SerializedSize(input_.get()); return total_size.ValueOrDefault(0u); @@ -372,13 +384,15 @@ sk_sp<PaintFilter> MagnifierPaintFilter::SnapshotWithImagesInternal( ImageProvider* image_provider) const { - return sk_make_sp<MagnifierPaintFilter>( - src_rect_, inset_, Snapshot(input_, image_provider), GetCropRect()); + return sk_make_sp<MagnifierPaintFilter>(lens_bounds_, zoom_amount_, inset_, + Snapshot(input_, image_provider), + GetCropRect()); } bool MagnifierPaintFilter::EqualsForTesting( const MagnifierPaintFilter& other) const { - return src_rect_ == other.src_rect_ && inset_ == other.inset_ && + return lens_bounds_ == other.lens_bounds_ && + zoom_amount_ == other.zoom_amount_ && inset_ == other.inset_ && AreValuesEqualForTesting(input_, other.input_); // IN-TEST }
diff --git a/cc/paint/paint_filter.h b/cc/paint/paint_filter.h index 08c13aa..534b93e 100644 --- a/cc/paint/paint_filter.h +++ b/cc/paint/paint_filter.h
@@ -249,13 +249,15 @@ class CC_PAINT_EXPORT MagnifierPaintFilter final : public PaintFilter { public: static constexpr Type kType = Type::kMagnifier; - MagnifierPaintFilter(const SkRect& src_rect, + MagnifierPaintFilter(const SkRect& lens_bounds, + SkScalar zoom_amount, SkScalar inset, sk_sp<PaintFilter> input, const CropRect* crop_rect = nullptr); ~MagnifierPaintFilter() override; - const SkRect& src_rect() const { return src_rect_; } + const SkRect& lens_bounds() const { return lens_bounds_; } + SkScalar zoom_amount() const { return zoom_amount_; } SkScalar inset() const { return inset_; } const sk_sp<PaintFilter>& input() const { return input_; } @@ -267,7 +269,8 @@ ImageProvider* image_provider) const override; private: - SkRect src_rect_; + SkRect lens_bounds_; + SkScalar zoom_amount_; SkScalar inset_; sk_sp<PaintFilter> input_; };
diff --git a/cc/paint/paint_filter_unittest.cc b/cc/paint/paint_filter_unittest.cc index 59c8b29..9da971d 100644 --- a/cc/paint/paint_filter_unittest.cc +++ b/cc/paint/paint_filter_unittest.cc
@@ -63,7 +63,7 @@ DropShadowPaintFilter::ShadowMode::kDrawShadowOnly, image_filter, &crop_rect); case PaintFilter::Type::kMagnifier: - return sk_make_sp<MagnifierPaintFilter>(SkRect::MakeWH(100.f, 100.f), + return sk_make_sp<MagnifierPaintFilter>(SkRect::MakeWH(100.f, 100.f), 2.f, 0.1f, record_filter, &crop_rect); case PaintFilter::Type::kCompose: return sk_make_sp<ComposePaintFilter>(image_filter, record_filter);
diff --git a/cc/paint/paint_op_buffer_unittest.cc b/cc/paint/paint_op_buffer_unittest.cc index aee728e..4185ae3 100644 --- a/cc/paint/paint_op_buffer_unittest.cc +++ b/cc/paint/paint_op_buffer_unittest.cc
@@ -3206,7 +3206,7 @@ 5.f, 10.f, 0.1f, 0.3f, SkColors::kBlue, DropShadowPaintFilter::ShadowMode::kDrawShadowOnly, nullptr)}, sk_sp<PaintFilter>{new MagnifierPaintFilter(SkRect::MakeXYWH(5, 6, 7, 8), - 10.5f, nullptr)}, + 2.f, 10.5f, nullptr)}, sk_sp<PaintFilter>{new AlphaThresholdPaintFilter( SkRegion(SkIRect::MakeXYWH(0, 0, 100, 200)), nullptr)}, sk_sp<PaintFilter>{new MatrixConvolutionPaintFilter(
diff --git a/cc/paint/paint_op_helper_unittest.cc b/cc/paint/paint_op_helper_unittest.cc index 755d47c..192f0c5 100644 --- a/cc/paint/paint_op_helper_unittest.cc +++ b/cc/paint/paint_op_helper_unittest.cc
@@ -367,11 +367,11 @@ TEST(PaintOpHelperFilters, MagnifierPaintFilter) { PaintFilter::CropRect crop_rect(SkRect::MakeWH(100.f, 100.f)); - MagnifierPaintFilter filter(SkRect::MakeWH(100.f, 100.f), /*inset=*/0.1f, - /*input=*/nullptr, &crop_rect); + MagnifierPaintFilter filter(SkRect::MakeWH(100.f, 100.f), /*zoom_amount=*/2.f, + /*inset=*/0.1f, /*input=*/nullptr, &crop_rect); EXPECT_EQ(PaintOpHelper::ToString(filter), - "MagnifierPaintFilter(src_rect=[0.000,0.000 100.000x100.000], " - "inset=0.100, input=(nil), " + "MagnifierPaintFilter(lens_bounds=[0.000,0.000 100.000x100.000], " + "zoom_amount=2.000, inset=0.100, input=(nil), " "crop_rect=[0.000,0.000 100.000x100.000])"); }
diff --git a/cc/paint/paint_op_reader.cc b/cc/paint/paint_op_reader.cc index 5c73977..935dbb0 100644 --- a/cc/paint/paint_op_reader.cc +++ b/cc/paint/paint_op_reader.cc
@@ -1034,16 +1034,19 @@ void PaintOpReader::ReadMagnifierPaintFilter( sk_sp<PaintFilter>* filter, const absl::optional<PaintFilter::CropRect>& crop_rect) { - SkRect src_rect = SkRect::MakeEmpty(); + SkRect lens_bounds = SkRect::MakeEmpty(); + SkScalar zoom_amount = 1.f; SkScalar inset = 0.f; sk_sp<PaintFilter> input; - Read(&src_rect); + Read(&lens_bounds); + Read(&zoom_amount); Read(&inset); Read(&input); if (!valid_) return; - filter->reset(new MagnifierPaintFilter(src_rect, inset, std::move(input), + filter->reset(new MagnifierPaintFilter(lens_bounds, zoom_amount, inset, + std::move(input), base::OptionalToPtr(crop_rect))); }
diff --git a/cc/paint/paint_op_writer.cc b/cc/paint/paint_op_writer.cc index 07a0413..647530e 100644 --- a/cc/paint/paint_op_writer.cc +++ b/cc/paint/paint_op_writer.cc
@@ -856,7 +856,8 @@ void PaintOpWriter::Write(const MagnifierPaintFilter& filter, const SkM44& current_ctm) { - WriteSimple(filter.src_rect()); + WriteSimple(filter.lens_bounds()); + WriteSimple(filter.zoom_amount()); WriteSimple(filter.inset()); Write(filter.input().get(), current_ctm); }
diff --git a/cc/paint/render_surface_filters.cc b/cc/paint/render_surface_filters.cc index 61f353c..160837b 100644 --- a/cc/paint/render_surface_filters.cc +++ b/cc/paint/render_surface_filters.cc
@@ -14,7 +14,7 @@ #include "cc/paint/paint_filter.h" #include "third_party/skia/include/core/SkColorFilter.h" #include "third_party/skia/include/core/SkRegion.h" -#include "ui/gfx/geometry/size_f.h" +#include "ui/gfx/geometry/rect.h" #include "ui/gfx/geometry/skia_conversions.h" namespace cc { @@ -149,8 +149,7 @@ sk_sp<PaintFilter> RenderSurfaceFilters::BuildImageFilter( const FilterOperations& filters, - const gfx::SizeF& size, - const gfx::Vector2dF& offset) { + const gfx::Rect& layer_bounds) { sk_sp<PaintFilter> image_filter; float matrix[20]; for (size_t i = 0; i < filters.size(); ++i) { @@ -208,31 +207,9 @@ case FilterOperation::ZOOM: { DCHECK_GE(op.amount(), 1.0); - // Compute the zoom center, from which we apply a scale transformation - // to get the zoom filter source rectangle. Usually the zoom center is - // the center of the unclipped rectangle, but this can sometimes be - // clipped when the magnifier is past the edge of the screen. When that - // happens, take the closest point inside the clipped rectangle instead. - gfx::PointF unclipped_rect_center = gfx::PointF( - (size.width() + offset.x()) / 2, (size.height() + offset.y()) / 2); - const gfx::PointF zoom_center = - gfx::RectF(size).ClosestPoint(unclipped_rect_center); - - sk_sp<PaintFilter> zoom_filter = sk_make_sp<MagnifierPaintFilter>( - SkRect::MakeXYWH(zoom_center.x() - zoom_center.x() / op.amount(), - zoom_center.y() - zoom_center.y() / op.amount(), - size.width() / op.amount(), - size.height() / op.amount()), - op.zoom_inset(), nullptr); - if (image_filter) { - // TODO(ajuma): When there's a 1-input version of - // SkMagnifierImageFilter, use that to handle the input filter - // instead of using an SkComposeImageFilter. - image_filter = sk_make_sp<ComposePaintFilter>( - std::move(zoom_filter), std::move(image_filter)); - } else { - image_filter = std::move(zoom_filter); - } + image_filter = sk_make_sp<MagnifierPaintFilter>( + gfx::RectToSkRect(layer_bounds), op.amount(), op.zoom_inset(), + std::move(image_filter)); break; } case FilterOperation::SATURATING_BRIGHTNESS:
diff --git a/cc/paint/render_surface_filters.h b/cc/paint/render_surface_filters.h index 57572b3..a9188587 100644 --- a/cc/paint/render_surface_filters.h +++ b/cc/paint/render_surface_filters.h
@@ -7,11 +7,7 @@ #include "cc/paint/paint_export.h" #include "third_party/skia/include/core/SkRefCnt.h" -#include "ui/gfx/geometry/vector2d_f.h" - -namespace gfx { -class SizeF; -} +#include "ui/gfx/geometry/rect.h" namespace cc { class PaintFilter; @@ -21,10 +17,10 @@ public: RenderSurfaceFilters() = delete; + // `layer_bounds` is only used for backdrop filters that reference ZOOM static sk_sp<PaintFilter> BuildImageFilter( const FilterOperations& filters, - const gfx::SizeF& size, - const gfx::Vector2dF& offset = gfx::Vector2dF(0, 0)); + const gfx::Rect& layer_bounds = {}); }; } // namespace cc
diff --git a/cc/raster/raster_buffer_provider_unittest.cc b/cc/raster/raster_buffer_provider_unittest.cc index ffbef25..012bbc5 100644 --- a/cc/raster/raster_buffer_provider_unittest.cc +++ b/cc/raster/raster_buffer_provider_unittest.cc
@@ -390,21 +390,6 @@ EXPECT_FALSE(completed_tasks()[1].canceled); } -TEST_P(RasterBufferProviderTest, FailedMapResource) { - if (GetParam() == RASTER_BUFFER_PROVIDER_TYPE_BITMAP) - return; - - viz::TestGLES2Interface* gl = context_provider_->TestContextGL(); - gl->set_times_map_buffer_chromium_succeeds(0); - AppendTask(0u); - ScheduleTasks(); - - RunMessageLoopUntilAllTasksHaveCompleted(); - - ASSERT_EQ(1u, completed_tasks().size()); - EXPECT_FALSE(completed_tasks()[0].canceled); -} - // This test checks that replacing a pending raster task with another does // not prevent the DidFinishRunningTileTasks notification from being sent. TEST_P(RasterBufferProviderTest, FalseThrottling) {
diff --git a/cc/scheduler/scheduler.cc b/cc/scheduler/scheduler.cc index 60fa3447..02883cab 100644 --- a/cc/scheduler/scheduler.cc +++ b/cc/scheduler/scheduler.cc
@@ -211,7 +211,6 @@ state_machine_.NotifyReadyToCommit(); next_commit_origin_frame_args_ = last_dispatched_begin_main_frame_args_; } - trace_actions_ = true; ProcessScheduledActions(); } @@ -869,10 +868,6 @@ SchedulerStateMachine::Action action; do { action = state_machine_.NextAction(); - - if (trace_actions_ && action != SchedulerStateMachine::Action::NONE && - commit_debug_action_sequence_.size() < 40) - commit_debug_action_sequence_.push_back(action); TRACE_EVENT(TRACE_DISABLED_BY_DEFAULT("cc.debug.scheduler"), "SchedulerStateMachine", [this](perfetto::EventContext ctx) { this->AsProtozeroInto(ctx, @@ -906,8 +901,6 @@ state_machine_.WillCommit(/*commit_had_no_updates=*/false); compositor_timing_history_->WillCommit(); compositor_frame_reporting_controller_->WillCommit(); - commit_debug_action_sequence_.clear(); - trace_actions_ = false; client_->ScheduledActionCommit(); compositor_timing_history_->DidCommit(); compositor_frame_reporting_controller_->DidCommit(); @@ -1058,47 +1051,4 @@ } } -std::string Scheduler::GetHungCommitDebugInfo() const { - // Convert the stored actions into a debug string. - std::string sequence; - // We convert each action to a char 'a' plus the enum value. So we only need - // the number of actions we're outputting. - sequence.reserve(commit_debug_action_sequence_.size()); - for (auto action : commit_debug_action_sequence_) { - sequence.push_back('a' + static_cast<int>(action)); - } - return base::StringPrintf( - "a[a%s] bmfs%d hpt%d atnfd%d pw%d aw%d rfa%d", sequence.c_str(), - static_cast<int>(state_machine_.begin_main_frame_state()), - static_cast<int>(state_machine_.has_pending_tree()), - static_cast<int>(state_machine_.active_tree_needs_first_draw()), - static_cast<int>( - state_machine_.processing_paint_worklets_for_pending_tree()), - static_cast<int>( - state_machine_.processing_animation_worklets_for_pending_tree()), - static_cast<int>(state_machine_.pending_tree_is_ready_for_activation())); -} - -void Scheduler::TraceHungCommitDebugInfo() const { - // First output a series of events which have the old actions. - for (auto action : commit_debug_action_sequence_) { - TRACE_EVENT_INSTANT( - "cc", "ProxyImpl::OnHungCommit OldAction", - [action](perfetto::EventContext ctx) { - ctx.event() - ->set_cc_scheduler_state() - ->set_state_machine() - ->set_major_state() - ->set_next_action( - SchedulerStateMachine::ActionToProtozeroEnum(action)); - }); - } - // Finally dump the complete state of the scheduler. - TRACE_EVENT_INSTANT("cc", "ProxyImpl::OnHungCommit CurrentState", - [this](perfetto::EventContext ctx) { - this->AsProtozeroInto( - ctx, ctx.event()->set_cc_scheduler_state()); - }); -} - } // namespace cc
diff --git a/cc/scheduler/scheduler.h b/cc/scheduler/scheduler.h index 902cf389..eb645bf38 100644 --- a/cc/scheduler/scheduler.h +++ b/cc/scheduler/scheduler.h
@@ -280,9 +280,6 @@ size_t CommitDurationSampleCountForTesting() const; - std::string GetHungCommitDebugInfo() const; - void TraceHungCommitDebugInfo() const; - protected: // Virtual for testing. virtual base::TimeTicks Now() const; @@ -407,10 +404,6 @@ } void UpdatePowerModeVote(); - - // Temporary for production debugging of renderer hang (crbug.com/1159366). - std::vector<SchedulerStateMachine::Action> commit_debug_action_sequence_; - bool trace_actions_ = false; }; } // namespace cc
diff --git a/cc/test/paint_op_helper.h b/cc/test/paint_op_helper.h index 65fca12..b917a22 100644 --- a/cc/test/paint_op_helper.h +++ b/cc/test/paint_op_helper.h
@@ -707,7 +707,8 @@ const auto& filter = static_cast<const MagnifierPaintFilter&>(base_filter); str << "MagnifierPaintFilter(" - << "src_rect=" << ToString(filter.src_rect()) + << "lens_bounds=" << ToString(filter.lens_bounds()) + << ", zoom_amount=" << ToString(filter.zoom_amount()) << ", inset=" << ToString(filter.inset()) << ", input=" << ToString(filter.input()) << ", crop_rect=" << ToString(filter.GetCropRect()) << ")";
diff --git a/cc/tiles/tile_manager.cc b/cc/tiles/tile_manager.cc index c5a51c1..3ebce5d6 100644 --- a/cc/tiles/tile_manager.cc +++ b/cc/tiles/tile_manager.cc
@@ -1881,12 +1881,6 @@ global_state_.memory_limit_policy != ALLOW_ABSOLUTE_MINIMUM); } -std::string TileManager::GetHungCommitDebugInfo() const { - base::trace_event::TracedValueJSON value; - ActivationStateAsValueInto(&value); - return value.ToJSON(); -} - TileManager::MemoryUsage::MemoryUsage() : memory_bytes_(0), resource_count_(0) {}
diff --git a/cc/tiles/tile_manager.h b/cc/tiles/tile_manager.h index 89b8ddb5..55a0d27 100644 --- a/cc/tiles/tile_manager.h +++ b/cc/tiles/tile_manager.h
@@ -10,7 +10,6 @@ #include <memory> #include <set> -#include <string> #include <unordered_map> #include <unordered_set> #include <utility> @@ -325,8 +324,6 @@ void set_active_url(const GURL& url) { active_url_ = url; } - std::string GetHungCommitDebugInfo() const; - protected: friend class Tile; // Must be called by tile during destruction.
diff --git a/cc/trees/layer_tree_frame_sink.cc b/cc/trees/layer_tree_frame_sink.cc index 7d08cfe3..f47c3720 100644 --- a/cc/trees/layer_tree_frame_sink.cc +++ b/cc/trees/layer_tree_frame_sink.cc
@@ -86,10 +86,6 @@ context_provider_ = nullptr; return false; } - - // The compositor context still has some GLES2Interface usage so make sure - // it provides that. - CHECK(context_provider_->ContextGL()); } if (auto* worker_context_provider_ptr = worker_context_provider()) {
diff --git a/cc/trees/layer_tree_frame_sink_unittest.cc b/cc/trees/layer_tree_frame_sink_unittest.cc index 81a365e94..ab6ba6f 100644 --- a/cc/trees/layer_tree_frame_sink_unittest.cc +++ b/cc/trees/layer_tree_frame_sink_unittest.cc
@@ -66,9 +66,11 @@ // Verify DidLoseLayerTreeFrameSink callback is hooked up correctly. EXPECT_FALSE(client.did_lose_layer_tree_frame_sink_called()); - layer_tree_frame_sink.context_provider()->ContextGL()->LoseContextCHROMIUM( - GL_GUILTY_CONTEXT_RESET_ARB, GL_INNOCENT_CONTEXT_RESET_ARB); - layer_tree_frame_sink.context_provider()->ContextGL()->Flush(); + layer_tree_frame_sink.context_provider() + ->RasterInterface() + ->LoseContextCHROMIUM(GL_GUILTY_CONTEXT_RESET_ARB, + GL_INNOCENT_CONTEXT_RESET_ARB); + layer_tree_frame_sink.context_provider()->RasterInterface()->Flush(); EXPECT_TRUE(client.did_lose_layer_tree_frame_sink_called()); }
diff --git a/cc/trees/layer_tree_host_impl.cc b/cc/trees/layer_tree_host_impl.cc index 8ebf2dc..943e3217 100644 --- a/cc/trees/layer_tree_host_impl.cc +++ b/cc/trees/layer_tree_host_impl.cc
@@ -3845,10 +3845,11 @@ #endif if (should_finish && layer_tree_frame_sink_->context_provider()) { - // TODO(ericrk): Remove this once all uses of ContextGL from LTFS are - // removed. - auto* gl = layer_tree_frame_sink_->context_provider()->ContextGL(); - gl->Finish(); + // TODO(kylechar): Exactly where this finish call is still required is not + // obvious. Attempts have been made to remove it which caused problems, eg. + // https://crbug.com/846709. We should test removing it via finch to find + // out if this is still needed on any platforms. + layer_tree_frame_sink_->context_provider()->RasterInterface()->Finish(); } // Release any context visibility before we destroy the LayerTreeFrameSink. @@ -5276,21 +5277,4 @@ frame_token, std::move(callbacks)); } -std::string LayerTreeHostImpl::GetHungCommitDebugInfo() const { - return base::StringPrintf( - "ptfp%d pwpd%d tmrta%d as%d gpur%d%d ltfs%d%d zc%d ", - static_cast<int>(pending_tree_fully_painted_), - static_cast<int>(paint_worklet_painter_ && - paint_worklet_painter_->HasOngoingDispatch()), - static_cast<int>(tile_manager_.IsReadyToActivate()), - static_cast<int>(GetActivelyScrollingType()), - static_cast<int>(raster_caps().use_gpu_rasterization), - static_cast<int>(raster_caps().gpu_rasterization_status), - static_cast<int>(has_valid_layer_tree_frame_sink_), - static_cast<int>(layer_tree_frame_sink_ && - layer_tree_frame_sink_->context_provider()), - static_cast<int>(settings_.use_zero_copy)) + - tile_manager_.GetHungCommitDebugInfo(); -} - } // namespace cc
diff --git a/cc/trees/layer_tree_host_impl.h b/cc/trees/layer_tree_host_impl.h index 15d27cc..27866cc 100644 --- a/cc/trees/layer_tree_host_impl.h +++ b/cc/trees/layer_tree_host_impl.h
@@ -914,8 +914,6 @@ downsample_metrics_ = value; } - std::string GetHungCommitDebugInfo() const; - protected: LayerTreeHostImpl( const LayerTreeSettings& settings,
diff --git a/cc/trees/layer_tree_host_pixeltest_blending.cc b/cc/trees/layer_tree_host_pixeltest_blending.cc index f2d27ac4..7843c31c 100644 --- a/cc/trees/layer_tree_host_pixeltest_blending.cc +++ b/cc/trees/layer_tree_host_pixeltest_blending.cc
@@ -322,8 +322,8 @@ PaintFlags grayscale; grayscale.setColor(kCSSOrange); - sk_sp<PaintFilter> paint_filter = RenderSurfaceFilters::BuildImageFilter( - filters, gfx::SizeF(kRootWidth, kRootHeight)); + sk_sp<PaintFilter> paint_filter = + RenderSurfaceFilters::BuildImageFilter(filters); grayscale.setImageFilter(paint_filter); paint_canvas.drawRect(SkRect::MakeWH(kRootWidth, kRootHeight), grayscale);
diff --git a/cc/trees/layer_tree_host_pixeltest_filters.cc b/cc/trees/layer_tree_host_pixeltest_filters.cc index 52aac96..f3391fa 100644 --- a/cc/trees/layer_tree_host_pixeltest_filters.cc +++ b/cc/trees/layer_tree_host_pixeltest_filters.cc
@@ -755,6 +755,10 @@ #define MAYBE_ZoomFilter ZoomFilter #endif // BUILDFLAG(IS_IOS) TEST_P(LayerTreeHostFiltersPixelTest, MAYBE_ZoomFilter) { + // ZOOM_FILTER is unsupported by software_renderer (crbug.com/1451898) + if (use_software_renderer()) { + return; + } scoped_refptr<SolidColorLayer> root = CreateSolidColorLayer(gfx::Rect(300, 300), SK_ColorWHITE);
diff --git a/cc/trees/layer_tree_host_unittest_context.cc b/cc/trees/layer_tree_host_unittest_context.cc index 3851981..6ec2cc77 100644 --- a/cc/trees/layer_tree_host_unittest_context.cc +++ b/cc/trees/layer_tree_host_unittest_context.cc
@@ -828,12 +828,12 @@ bool lost) {} void SetupTree() override { - gpu::gles2::GLES2Interface* gl = child_context_provider_->ContextGL(); + auto* ri = child_context_provider_->RasterInterface(); gpu::Mailbox mailbox = gpu::Mailbox::GenerateForSharedImage(); gpu::SyncToken sync_token; - gl->GenSyncTokenCHROMIUM(sync_token.GetData()); + ri->GenSyncTokenCHROMIUM(sync_token.GetData()); scoped_refptr<Layer> root = Layer::Create(); root->SetBounds(gfx::Size(10, 10));
diff --git a/cc/trees/proxy_impl.cc b/cc/trees/proxy_impl.cc index 87b638f..59e3aa3 100644 --- a/cc/trees/proxy_impl.cc +++ b/cc/trees/proxy_impl.cc
@@ -13,7 +13,6 @@ #include <vector> #include "base/auto_reset.h" -#include "base/debug/crash_logging.h" #include "base/functional/bind.h" #include "base/memory/raw_ptr.h" #include "base/notreached.h" @@ -49,9 +48,6 @@ constexpr auto kSmoothnessTakesPriorityExpirationDelay = base::Milliseconds(250); -// Make this less than kHungRendererDelay (15 sec). -constexpr base::TimeDelta kHungCommitTimeout = base::Seconds(14); - } // namespace // Ensures that a CompletionEvent for commit is always signaled. @@ -285,10 +281,11 @@ TRACE_EVENT0("cc", "ProxyImpl::FinishGLOnImplThread"); DCHECK(IsImplThread()); if (host_impl_->layer_tree_frame_sink()) { - viz::RasterContextProvider* context_provider = + auto* context_provider = host_impl_->layer_tree_frame_sink()->context_provider(); - if (context_provider) - context_provider->ContextGL()->Finish(); + if (context_provider) { + context_provider->RasterInterface()->Finish(); + } } completion->Signal(); } @@ -378,9 +375,6 @@ completion_event, start_time, MainThreadTaskRunner(), proxy_main_weak_ptr_), std::move(commit_state), unsafe_state, commit_timestamps); - hung_commit_timer_.Start( - FROM_HERE, kHungCommitTimeout, - base::BindOnce(&ProxyImpl::OnHungCommit, base::Unretained(this))); // Extract metrics data from the layer tree host and send them to the // scheduler to pass them to the compositor_timing_history object. @@ -392,17 +386,6 @@ scheduler_->SetNeedsBeginMainFrame(); } -void ProxyImpl::OnHungCommit() { - UMA_HISTOGRAM_BOOLEAN("Compositing.Renderer.CommitHung", true); - static auto* hung_commit_data = base::debug::AllocateCrashKeyString( - "hung_commit", base::debug::CrashKeySize::Size1024); - std::string debug_info = host_impl_->GetHungCommitDebugInfo() + - scheduler_->GetHungCommitDebugInfo(); - LOG(ERROR) << "commit hung: " << debug_info; - base::debug::SetCrashKeyString(hung_commit_data, debug_info); - scheduler_->TraceHungCommitDebugInfo(); -} - void ProxyImpl::DidLoseLayerTreeFrameSinkOnImplThread() { TRACE_EVENT0("cc", "ProxyImpl::DidLoseLayerTreeFrameSinkOnImplThread"); DCHECK(IsImplThread()); @@ -816,7 +799,6 @@ } data_for_commit_.reset(); - hung_commit_timer_.Stop(); } void ProxyImpl::ScheduledActionPostCommit() {
diff --git a/cc/trees/proxy_impl.h b/cc/trees/proxy_impl.h index 1bd8393..c27b306 100644 --- a/cc/trees/proxy_impl.h +++ b/cc/trees/proxy_impl.h
@@ -166,8 +166,6 @@ base::SingleThreadTaskRunner* MainThreadTaskRunner(); bool ShouldDeferBeginMainFrame() const; - void OnHungCommit(); - const int layer_tree_host_id_; std::unique_ptr<Scheduler> scheduler_; @@ -223,9 +221,6 @@ // Either thread can request deferring BeginMainFrame; keep track of both. bool main_wants_defer_begin_main_frame_ = false; bool impl_wants_defer_begin_main_frame_ = false; - - // Temporary for production debugging of renderer hang (crbug.com/1159366). - base::OneShotTimer hung_commit_timer_; }; } // namespace cc
diff --git a/chrome/PRESUBMIT.py b/chrome/PRESUBMIT.py index 52cd4e40a..4c693d0 100644 --- a/chrome/PRESUBMIT.py +++ b/chrome/PRESUBMIT.py
@@ -119,12 +119,48 @@ 'IS_IOS is not used in chrome/ but found in:\n', ios_buildflags)] +def _CheckBreakingInstallerVersionBumpNeeded(input_api, output_api): + files = [] + breaking_version_installer_updated = False + + def _FilterFile(affected_file): + return input_api.FilterSourceFile( + affected_file, + files_to_check=input_api.DEFAULT_FILES_TO_CHECK + (r'.*\.release',)) + for f in input_api.AffectedSourceFiles(_FilterFile): + # Normalize the local path to Linux-style path separators so that the path + # comparisons work on Windows as well. + local_path = f.LocalPath().replace('\\', '/') + breaking_version_installer_updated |= (local_path == + 'chrome/installer/setup/last_breaking_installer_version.cc') + if (local_path == 'chrome/installer/mini_installer/chrome.release' or + local_path.startswith('chrome/test/mini_installer')): + files.append(local_path) + + if files and not breaking_version_installer_updated: + return [output_api.PresubmitPromptWarning(''' +Update chrome/installer/setup/last_breaking_installer_version.cc if the changes +found in the following files might break make downgrades not possible beyond +this browser's version.''', items=files)] + + if not files and breaking_version_installer_updated: + return [output_api.PresubmitPromptWarning(''' +No installer breaking changes detected but +chrome/installer/setup/last_breaking_installer_version.cc was updated. Please +update chrome/installer/PRESUBMIT.py if more files need to be watched for +breaking installer changes.''')] + + return [] + + def _CommonChecks(input_api, output_api): """Checks common to both upload and commit.""" results = [] results.extend(_CheckNoContentUnitTestsInChrome(input_api, output_api)) results.extend(_CheckNoIsAppleBuildFlagsInChrome(input_api, output_api)) results.extend(_CheckNoIsIOSBuildFlagsInChrome(input_api, output_api)) + results.extend(_CheckBreakingInstallerVersionBumpNeeded(input_api, + output_api)) return results
diff --git a/chrome/android/BUILD.gn b/chrome/android/BUILD.gn index fec52f5f..850088f 100644 --- a/chrome/android/BUILD.gn +++ b/chrome/android/BUILD.gn
@@ -617,6 +617,7 @@ "//third_party/metrics_proto:metrics_proto_java", "//ui/accessibility:ax_base_java", "//ui/android:ui_java", + "//ui/android:ui_no_recycler_view_java", "//ui/base/ime/mojom:mojom_java", "//ui/base/mojom:mojom_java", "//ui/gfx/geometry/mojom:mojom_java",
diff --git a/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticator.java b/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticator.java index 698a3bb..7bb2a65 100644 --- a/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticator.java +++ b/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticator.java
@@ -42,8 +42,6 @@ */ class CableAuthenticator { private static final String TAG = "CableAuthenticator"; - private static final String FIDO2_KEY_CREDENTIAL_EXTRA = "FIDO2_CREDENTIAL_EXTRA"; - private static final long TIMEOUT_SECONDS = 20; private static final int REGISTER_REQUEST_CODE = 1; private static final int SIGN_REQUEST_CODE = 2;
diff --git a/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticatorUI.java b/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticatorUI.java index 4f3b7c53..fb0fa2d 100644 --- a/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticatorUI.java +++ b/chrome/android/features/cablev2_authenticator/java/src/org/chromium/chrome/browser/webauth/authenticator/CableAuthenticatorUI.java
@@ -91,9 +91,6 @@ // These entries duplicate some of the enum values from // `CableV2MobileEvent`. The C++ enum is the source of truth for these // values. - private static final int EVENT_BLUETOOTH_ADVERTISE_PERMISSION_REQUESTED = 23; - private static final int EVENT_BLUETOOTH_ADVERTISE_PERMISSION_GRANTED = 24; - private static final int EVENT_BLUETOOTH_ADVERTISE_PERMISSION_REJECTED = 25; private enum Mode { QR, // QR code scanned by external app.
diff --git a/chrome/android/features/keyboard_accessory/junit/src/org/chromium/chrome/browser/keyboard_accessory/ManualFillingControllerTest.java b/chrome/android/features/keyboard_accessory/junit/src/org/chromium/chrome/browser/keyboard_accessory/ManualFillingControllerTest.java index 94bd676..8f8b6de 100644 --- a/chrome/android/features/keyboard_accessory/junit/src/org/chromium/chrome/browser/keyboard_accessory/ManualFillingControllerTest.java +++ b/chrome/android/features/keyboard_accessory/junit/src/org/chromium/chrome/browser/keyboard_accessory/ManualFillingControllerTest.java
@@ -112,7 +112,6 @@ public class ManualFillingControllerTest { private static final int sKeyboardHeightDp = 100; private static final int sAccessoryHeightDp = 48; - private static final int sKeyboardAndAccessoryDp = sKeyboardHeightDp + sAccessoryHeightDp; @Mock private ChromeWindow mMockWindow;
diff --git a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/ExploreSurfaceNavigationDelegate.java b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/ExploreSurfaceNavigationDelegate.java index ff00962..950f5d59 100644 --- a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/ExploreSurfaceNavigationDelegate.java +++ b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/ExploreSurfaceNavigationDelegate.java
@@ -16,8 +16,6 @@ /** Implementation of the {@link NativePageNavigationDelegate} for the explore surface. */ class ExploreSurfaceNavigationDelegate implements NativePageNavigationDelegate { - private static final String NEW_TAB_URL_HELP = "https://support.google.com/chrome/?p=new_tab"; - private final Supplier<Tab> mParentTabSupplier; ExploreSurfaceNavigationDelegate(Supplier<Tab> parentTabSupplier) {
diff --git a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java index 653a09e..1ee87e4f 100644 --- a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java +++ b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceConfiguration.java
@@ -85,6 +85,14 @@ new BooleanCachedFieldTrialParameter(ChromeFeatureList.START_SURFACE_RETURN_TIME, START_SURFACE_RETURN_TIME_USE_MODEL_PARAM, false); + public static final BooleanCachedFieldTrialParameter SURFACE_POLISH_OMNIBOX_SIZE = + new BooleanCachedFieldTrialParameter( + ChromeFeatureList.SURFACE_POLISH, "polish_omnibox_size", false); + + public static final BooleanCachedFieldTrialParameter SURFACE_POLISH_OMNIBOX_COLOR = + new BooleanCachedFieldTrialParameter( + ChromeFeatureList.SURFACE_POLISH, "polish_omnibox_color", false); + private static final String STARTUP_UMA_PREFIX = "Startup.Android."; private static final String INSTANT_START_SUBFIX = ".Instant"; private static final String REGULAR_START_SUBFIX = ".NoInstant";
diff --git a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceHomeLayout.java b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceHomeLayout.java index a1cbcf0c..11b1baf 100644 --- a/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceHomeLayout.java +++ b/chrome/android/features/start_surface/java/src/org/chromium/chrome/features/start_surface/StartSurfaceHomeLayout.java
@@ -22,8 +22,6 @@ * A {@link Layout} that shows Start Surface home view. */ public class StartSurfaceHomeLayout extends Layout { - private static final String TAG = "SSHomeLayout"; - private static final String TRACE_SHOW_START_SURFACE = "StartSurfaceHomeLayout.Show.StartSurface"; private static final String TRACE_HIDE_START_SURFACE =
diff --git a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/InstantStartTabSwitcherTest.java b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/InstantStartTabSwitcherTest.java index 58ab3fe..ad647a8 100644 --- a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/InstantStartTabSwitcherTest.java +++ b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/InstantStartTabSwitcherTest.java
@@ -107,7 +107,6 @@ public class InstantStartTabSwitcherTest { // clang-format on private static final String SHADOW_VIEW_TAG = "TabListViewShadow"; - private static final long MAX_TIMEOUT_MS = 30000L; @Rule public ChromeTabbedActivityTestRule mActivityTestRule = new ChromeTabbedActivityTestRule();
diff --git a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceBackButtonTest.java b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceBackButtonTest.java index 1b3b0c0..a505057 100644 --- a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceBackButtonTest.java +++ b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceBackButtonTest.java
@@ -86,8 +86,6 @@ @ParameterAnnotations.ClassParameter private static List<ParameterSet> sClassParams = sClassParamsForStartSurfaceTest; - private static final long MAX_TIMEOUT_MS = 40000L; - @Rule public ChromeTabbedActivityTestRule mActivityTestRule = new ChromeTabbedActivityTestRule(); @Rule
diff --git a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceOnTabletTest.java b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceOnTabletTest.java index 411ec8e..d21c159 100644 --- a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceOnTabletTest.java +++ b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceOnTabletTest.java
@@ -33,7 +33,6 @@ import org.chromium.base.test.util.Criteria; import org.chromium.base.test.util.CriteriaHelper; import org.chromium.base.test.util.CriteriaNotSatisfiedException; -import org.chromium.base.test.util.DisabledTest; import org.chromium.base.test.util.DoNotBatch; import org.chromium.base.test.util.Feature; import org.chromium.base.test.util.HistogramWatcher; @@ -448,7 +447,6 @@ @CommandLineFlags.Add({START_SURFACE_ON_TABLET_TEST_PARAMS}) @EnableFeatures({ChromeFeatureList.SHOW_SCROLLABLE_MVT_ON_NTP_ANDROID, ChromeFeatureList.FEED_MULTI_COLUMN, ChromeFeatureList.START_SURFACE_ON_TABLET}) - @DisabledTest(message = "https://crbug.com/1446043") // clang-format off public void test1RowMvtMarginWithMultiColumnFeedsOnNtpHomePage() throws IOException{ // clang-format on @@ -523,7 +521,6 @@ @EnableFeatures( {ChromeFeatureList.FEED_MULTI_COLUMN, ChromeFeatureList.START_SURFACE_ON_TABLET}) @DisableFeatures(ChromeFeatureList.SHOW_SCROLLABLE_MVT_ON_NTP_ANDROID) - @DisabledTest(message = "https://crbug.com/1446043") // clang-format off public void test2RowMvtMarginWithMultiColumnFeedsOnNtpHomePage() throws IOException { // clang-format on
diff --git a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceTest.java b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceTest.java index d84e796..f13f1b2f 100644 --- a/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceTest.java +++ b/chrome/android/features/start_surface/javatests/src/org/chromium/chrome/features/start_surface/StartSurfaceTest.java
@@ -57,7 +57,6 @@ import org.junit.runner.RunWith; import org.chromium.base.ThreadUtils; -import org.chromium.base.TimeUtils; import org.chromium.base.metrics.RecordHistogram; import org.chromium.base.test.params.ParameterAnnotations; import org.chromium.base.test.params.ParameterAnnotations.UseRunnerDelegate; @@ -124,8 +123,6 @@ @ParameterAnnotations.ClassParameter private static List<ParameterSet> sClassParams = sClassParamsForStartSurfaceTest; - private static final long MAX_TIMEOUT_MS = 40000L; - private static final long MILLISECONDS_PER_MINUTE = TimeUtils.SECONDS_PER_MINUTE * 1000; private static final String HISTOGRAM_START_SURFACE_MODULE_CLICK = "StartSurface.Module.Click"; private static final String HISTOGRAM_SPARE_TAB_FINAL_STATUS = "Android.SpareTab.FinalStatus"; private static final String HISTOGRAM_START_SURFACE_SPARE_TAB_SHOW_AND_CREATE =
diff --git a/chrome/android/features/start_surface/junit/src/org/chromium/chrome/features/start_surface/StartSurfaceCoordinatorUnitTest.java b/chrome/android/features/start_surface/junit/src/org/chromium/chrome/features/start_surface/StartSurfaceCoordinatorUnitTest.java index 4adf45c..1e59d1d3 100644 --- a/chrome/android/features/start_surface/junit/src/org/chromium/chrome/features/start_surface/StartSurfaceCoordinatorUnitTest.java +++ b/chrome/android/features/start_surface/junit/src/org/chromium/chrome/features/start_surface/StartSurfaceCoordinatorUnitTest.java
@@ -25,7 +25,6 @@ import org.robolectric.annotation.Config; import org.chromium.base.Callback; -import org.chromium.base.TimeUtils; import org.chromium.base.metrics.RecordHistogram; import org.chromium.base.test.BaseRobolectricTestRunner; import org.chromium.base.test.util.HistogramWatcher; @@ -54,7 +53,6 @@ @Features.EnableFeatures(ChromeFeatureList.START_SURFACE_ANDROID) @Features.DisableFeatures({ChromeFeatureList.WEB_FEED, ChromeFeatureList.SHOPPING_LIST}) public class StartSurfaceCoordinatorUnitTest { - private static final long MILLISECONDS_PER_MINUTE = TimeUtils.SECONDS_PER_MINUTE * 1000; private static final String START_SURFACE_TIME_SPENT = "StartSurface.TimeSpent"; private static final String HISTOGRAM_START_SURFACE_MODULE_CLICK = "StartSurface.Module.Click"; private static final String USER_ACTION_START_SURFACE_MVT_CLICK =
diff --git a/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_groups/TabGroupUtils.java b/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_groups/TabGroupUtils.java index 8de0b34e..5c86c0c 100644 --- a/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_groups/TabGroupUtils.java +++ b/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_groups/TabGroupUtils.java
@@ -39,7 +39,6 @@ */ public class TabGroupUtils { private static TabModelSelectorTabObserver sTabModelSelectorTabObserver; - private static final String TAB_GROUP_TITLES_FILE_NAME = "tab_group_titles"; public static void maybeShowIPH(@FeatureConstants String featureName, View view, @Nullable BottomSheetController bottomSheetController) {
diff --git a/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_management/TabSwitcherMediator.java b/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_management/TabSwitcherMediator.java index 396663b..033f529 100644 --- a/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_management/TabSwitcherMediator.java +++ b/chrome/android/features/tab_ui/java/src/org/chromium/chrome/browser/tasks/tab_management/TabSwitcherMediator.java
@@ -82,8 +82,6 @@ TabSwitcherCustomViewManager.Delegate, BackPressHandler { private static final String TAG = "TabSwitcherMediator"; - private static final int DEFAULT_TOP_PADDING = 0; - /** Field trial parameter for the {@link TabListRecyclerView} cleanup delay. */ private static final String SOFT_CLEANUP_DELAY_PARAM = "soft-cleanup-delay"; private static final int DEFAULT_SOFT_CLEANUP_DELAY_MS = 3_000;
diff --git a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridDialogMediatorUnitTest.java b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridDialogMediatorUnitTest.java index 451de638..dae0119f 100644 --- a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridDialogMediatorUnitTest.java +++ b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridDialogMediatorUnitTest.java
@@ -96,7 +96,6 @@ private static final String TAB3_TITLE = "Tab3"; private static final String DIALOG_TITLE1 = "1 tab"; private static final String DIALOG_TITLE2 = "2 tabs"; - private static final String REMOVE_BUTTON_STRING = "Remove"; private static final String CUSTOMIZED_DIALOG_TITLE = "Cool Tabs"; private static final int TAB1_ID = 456; private static final int TAB2_ID = 789;
diff --git a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridViewBinderUnitTest.java b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridViewBinderUnitTest.java index b228929b..4ffa4212 100644 --- a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridViewBinderUnitTest.java +++ b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/TabGridViewBinderUnitTest.java
@@ -77,7 +77,6 @@ private PropertyModel mModel; private LayoutParams mLayoutParams; private Bitmap mBitmap; - private static final float RESOURCE_DIMEN = 10; @Before public void setUp() {
diff --git a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabContextTest.java b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabContextTest.java index 097e0964..4f04f9e 100644 --- a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabContextTest.java +++ b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabContextTest.java
@@ -49,7 +49,6 @@ private static final int RELATED_TAB_1_ID = 2; private static final int NEW_TAB_1_ID = 3; private static final int NEW_TAB_2_ID = 4; - private static final int LAST_COMMITTED_INDEX = 1; @Rule public TestRule mProcessor = new Features.JUnitProcessor();
diff --git a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabSuggestionTest.java b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabSuggestionTest.java index c59e170..7b2b1d9c 100644 --- a/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabSuggestionTest.java +++ b/chrome/android/features/tab_ui/junit/src/org/chromium/chrome/browser/tasks/tab_management/suggestions/TabSuggestionTest.java
@@ -32,7 +32,6 @@ private static final String TITLE = "title"; private static final String TAB_URL = "url"; private static final String ORIGINAL_URL = "original_url"; - private static final String REFERRER_URL = "referrer_url"; private static final long TIMESTAMP = 4352345L; private static final String VISIBLE_URL = "visible_url"; private static final TabContext.TabInfo TAB_INFO =
diff --git a/chrome/android/features/tab_ui/public/android/java/src/org/chromium/chrome/browser/tasks/tab_management/TabUiThemeUtil.java b/chrome/android/features/tab_ui/public/android/java/src/org/chromium/chrome/browser/tasks/tab_management/TabUiThemeUtil.java index 3f05c73..504d2f60 100644 --- a/chrome/android/features/tab_ui/public/android/java/src/org/chromium/chrome/browser/tasks/tab_management/TabUiThemeUtil.java +++ b/chrome/android/features/tab_ui/public/android/java/src/org/chromium/chrome/browser/tasks/tab_management/TabUiThemeUtil.java
@@ -19,7 +19,6 @@ * Internal themes are provided via @{@link TabUiThemeProvider} */ public class TabUiThemeUtil { - private static final String TAG = "TabUiThemeProvider"; private static final float MAX_TAB_STRIP_TAB_WIDTH_DP = 265.f; private static final float DETACHED_TAB_OVERLAY_ALPHA = 0.85f; private static final float DETACHED_TAB_OVERLAY_ALPHA_EDIT_MODE = 0.2f;
diff --git a/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrDelegateFallback.java b/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrDelegateFallback.java index f2b33a5..cc1aae12 100644 --- a/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrDelegateFallback.java +++ b/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrDelegateFallback.java
@@ -21,11 +21,9 @@ */ /* package */ class VrDelegateFallback extends VrDelegate { private static final String TAG = "VrDelegateFallback"; - private static final boolean DEBUG_LOGS = false; private static final String DEFAULT_VR_MODE_PACKAGE = "com.google.vr.vrcore"; private static final String DEFAULT_VR_MODE_CLASS = "com.google.vr.vrcore.common.VrCoreListenerService"; - private static final int WAITING_FOR_MODULE_TIMEOUT_MS = 1500; @Override public void forceExitVrImmediately() {}
diff --git a/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrShell.java b/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrShell.java index 73021b36..c5fa6959 100644 --- a/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrShell.java +++ b/chrome/android/features/vr/java/src/org/chromium/chrome/browser/vr/VrShell.java
@@ -40,7 +40,6 @@ @JNINamespace("vr") public class VrShell extends GvrLayout implements SurfaceHolder.Callback { private static final String TAG = "VrShellImpl"; - private static final float INCHES_TO_METERS = 0.0254f; private final Activity mActivity; private final VrShellDelegate mDelegate;
diff --git a/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceCoordinator.java b/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceCoordinator.java index 5694dbc..f41a3b4 100644 --- a/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceCoordinator.java +++ b/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceCoordinator.java
@@ -83,7 +83,6 @@ implements FeedSurfaceProvider, FeedBubbleDelegate, SwipeRefreshLayout.OnRefreshListener, BackToTopBubbleScrollListener.ResultHandler, SurfaceCoordinator, FeedAutoplaySettingsDelegate, HasContentListener, FeedContentFirstLoadWatcher { - private static final String TAG = "FeedSurfaceCoordinator"; private static final long DELAY_FEED_HEADER_IPH_MS = 50; protected final Activity mActivity;
diff --git a/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceMediator.java b/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceMediator.java index d56f114..7b5367d 100644 --- a/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceMediator.java +++ b/chrome/android/feed/core/java/src/org/chromium/chrome/browser/feed/FeedSurfaceMediator.java
@@ -77,7 +77,6 @@ public class FeedSurfaceMediator implements FeedSurfaceScrollDelegate, TouchEnabledDelegate, TemplateUrlServiceObserver, ListMenu.Delegate, IdentityManager.Observer, OptionChangedListener { - private static final String TAG = "FeedSurfaceMediator"; private static final int INTEREST_FEED_HEADER_POSITION = 0; private class FeedSurfaceHeaderSelectedCallback implements OnSectionHeaderSelectedListener {
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivity.java b/chrome/android/java/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivity.java index b0d67de..b0335dc 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivity.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivity.java
@@ -50,6 +50,7 @@ import org.chromium.chrome.browser.night_mode.GlobalNightModeStateProviderHolder; import org.chromium.chrome.browser.night_mode.NightModeStateProvider; import org.chromium.chrome.browser.night_mode.NightModeUtils; +import org.chromium.ui.display.DisplayUtil; import org.chromium.ui.modaldialog.ModalDialogManager; import org.chromium.ui.modaldialog.ModalDialogManagerHolder; @@ -63,8 +64,6 @@ */ public class ChromeBaseAppCompatActivity extends AppCompatActivity implements NightModeStateProvider.Observer, ModalDialogManagerHolder { - protected static final float UI_SCALING_FACTOR_FOR_AUTO = 1.34f; - /** * Chrome in automotive needs a persistent back button toolbar above all activities because * AAOS/cars do not have a built in back button. This is implemented differently in each @@ -269,7 +268,7 @@ @VisibleForTesting static void applyOverridesForAutomotive(Context baseContext, Configuration overrideConfig) { if (BuildInfo.getInstance().isAutomotive) { - scaleUpUI(baseContext, overrideConfig, UI_SCALING_FACTOR_FOR_AUTO); + scaleUpUI(baseContext, overrideConfig, DisplayUtil.UI_SCALING_FACTOR_FOR_AUTO); } } @@ -287,11 +286,6 @@ config.smallestScreenWidthDp = Math.min(config.screenWidthDp, config.screenHeightDp); } - @VisibleForTesting - static float getDensityOverrideFactorForAutomotiveDevices() { - return UI_SCALING_FACTOR_FOR_AUTO; - } - /** * @return The {@link NightModeStateProvider} that provides the state of night mode. */
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/ChromeTabbedActivity.java b/chrome/android/java/src/org/chromium/chrome/browser/ChromeTabbedActivity.java index 2321fa9..cad07d2 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/ChromeTabbedActivity.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/ChromeTabbedActivity.java
@@ -236,8 +236,6 @@ implements ChromeAccessibilityUtil.Observer { private static final String TAG = "ChromeTabbedActivity"; - private static final String HELP_URL_PREFIX = "https://support.google.com/chrome/"; - protected static final String WINDOW_INDEX = "window_index"; private static final int INVALID_WINDOW_ID = TabWindowManager.INVALID_WINDOW_INDEX;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/ShortcutHelper.java b/chrome/android/java/src/org/chromium/chrome/browser/ShortcutHelper.java index aa092ce0..e4b90d5 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/ShortcutHelper.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/ShortcutHelper.java
@@ -37,8 +37,6 @@ * or open a web app. */ public class ShortcutHelper { - private static final String TAG = "ShortcutHelper"; - // Holds splash images for web apps that are currently being installed. After installation is // complete, the image associated with the web app will be moved to the appropriate {@link // WebappDataStorage}.
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/app/ChromeActivity.java b/chrome/android/java/src/org/chromium/chrome/browser/app/ChromeActivity.java index 847305a5..1372b79 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/app/ChromeActivity.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/app/ChromeActivity.java
@@ -222,6 +222,7 @@ import org.chromium.components.webxr.XrDelegate; import org.chromium.components.webxr.XrDelegateProvider; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.DeviceUtils; import org.chromium.content_public.browser.LoadUrlParams; import org.chromium.content_public.browser.ScreenOrientationProvider; @@ -2529,7 +2530,7 @@ boolean usingDesktopUserAgent = currentTab.getWebContents().getNavigationController().getUseDesktopUserAgent(); usingDesktopUserAgent = !usingDesktopUserAgent; - if (ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { Profile profile = getCurrentTabModel().getProfile(); RequestDesktopUtils.setRequestDesktopSiteContentSettingsForUrl( profile, currentTab.getUrl(), usingDesktopUserAgent);
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/app/appmenu/AppMenuPropertiesDelegateImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/app/appmenu/AppMenuPropertiesDelegateImpl.java index 693c5f7..2746f45 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/app/appmenu/AppMenuPropertiesDelegateImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/app/appmenu/AppMenuPropertiesDelegateImpl.java
@@ -91,6 +91,7 @@ import org.chromium.components.webapps.AppBannerManager; import org.chromium.components.webapps.WebappsUtils; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.net.ConnectionType; import org.chromium.ui.base.DeviceFormFactor; import org.chromium.ui.modelutil.MVCListAdapter; @@ -1111,7 +1112,7 @@ // REQUEST_DESKTOP_SITE_EXCEPTIONS is enabled, hide the entry for all native pages. boolean itemVisible = currentTab != null && canShowRequestDesktopSite && (!isChromeScheme - || (!ContentFeatureList.isEnabled( + || (!ContentFeatureMap.isEnabled( ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) && currentTab.isNativePage())) && !shouldShowReaderModePrefs(currentTab) && currentTab.getWebContents() != null;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/app/flags/ChromeCachedFlags.java b/chrome/android/java/src/org/chromium/chrome/browser/app/flags/ChromeCachedFlags.java index d9c28376..0f0c70f 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/app/flags/ChromeCachedFlags.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/app/flags/ChromeCachedFlags.java
@@ -102,6 +102,8 @@ StartSurfaceConfiguration.START_SURFACE_LAST_ACTIVE_TAB_ONLY, StartSurfaceConfiguration.START_SURFACE_OPEN_NTP_INSTEAD_OF_START, StartSurfaceConfiguration.START_SURFACE_OPEN_START_AS_HOMEPAGE, + StartSurfaceConfiguration.SURFACE_POLISH_OMNIBOX_COLOR, + StartSurfaceConfiguration.SURFACE_POLISH_OMNIBOX_SIZE, TabPersistentStore.CRITICAL_PERSISTED_TAB_DATA_SAVE_ONLY_PARAM, TabUiFeatureUtilities.ENABLE_TAB_GROUP_AUTO_CREATION, TabUiFeatureUtilities.GTS_ACCESSIBILITY_LIST_MODE,
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/app/usb/UsbNotificationServiceImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/app/usb/UsbNotificationServiceImpl.java index aa1a5da..939054fb 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/app/usb/UsbNotificationServiceImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/app/usb/UsbNotificationServiceImpl.java
@@ -18,8 +18,6 @@ * to a USB device. */ public class UsbNotificationServiceImpl extends UsbNotificationService.Impl { - private static final String TAG = "UsbNotificationServiceImpl"; - private UsbNotificationManagerDelegate mManagerDelegate = new UsbNotificationManagerDelegate() { @Override public Intent createTrustedBringTabToFrontIntent(int tabId) {
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePrompt.java b/chrome/android/java/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePrompt.java index 989658b5..68dfee0 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePrompt.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePrompt.java
@@ -4,9 +4,9 @@ package org.chromium.chrome.browser.autofill; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.SAVE_NEW_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.SAVE_NEW_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; import android.app.Activity; import android.text.TextUtils; @@ -23,8 +23,9 @@ import org.chromium.base.annotations.CalledByNative; import org.chromium.base.annotations.JNINamespace; import org.chromium.chrome.R; -import org.chromium.chrome.browser.autofill.editors.AddressEditor; -import org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.Delegate; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow; import org.chromium.chrome.browser.autofill.editors.EditorDialogView; import org.chromium.chrome.browser.feedback.HelpAndFeedbackLauncherImpl; import org.chromium.chrome.browser.flags.ChromeFeatureList; @@ -48,7 +49,7 @@ private final PropertyModel mDialogModel; private final View mDialogView; private final EditorDialogView mEditorDialog; - private AddressEditor mAddressEditor; + private AddressEditorCoordinator mAddressEditor; private boolean mEditorClosingPending; /** @@ -89,13 +90,13 @@ mEditorDialog = new EditorDialogView(activity, /*deleteRunnable=*/null, HelpAndFeedbackLauncherImpl.getForProfile(browserProfile)); mEditorDialog.setShouldTriggerDoneCallbackBeforeCloseAnimation(true); - AddressEditor.Delegate delegate = new AddressEditor.Delegate() { + Delegate delegate = new Delegate() { @Override public void onDone(AutofillAddress address) { onEdited(address); } }; - mAddressEditor = new AddressEditor(mEditorDialog, delegate, browserProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, delegate, browserProfile, new AutofillAddress(activity, autofillProfile), userFlow, /*saveToDisk=*/false); mDialogView.findViewById(R.id.edit_button).setOnClickListener(v -> { @@ -272,7 +273,7 @@ }); } - void setAddressEditorForTesting(AddressEditor addressEditor) { + void setAddressEditorForTesting(AddressEditorCoordinator addressEditor) { mAddressEditor = addressEditor; }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/autofill/settings/AutofillProfilesFragment.java b/chrome/android/java/src/org/chromium/chrome/browser/autofill/settings/AutofillProfilesFragment.java index ae735e4..500d7ba 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/autofill/settings/AutofillProfilesFragment.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/autofill/settings/AutofillProfilesFragment.java
@@ -4,7 +4,7 @@ package org.chromium.chrome.browser.autofill.settings; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; import android.content.Context; import android.content.res.Configuration; @@ -30,7 +30,9 @@ import org.chromium.chrome.browser.autofill.PersonalDataManager; import org.chromium.chrome.browser.autofill.PersonalDataManager.AutofillProfile; import org.chromium.chrome.browser.autofill.Source; -import org.chromium.chrome.browser.autofill.editors.AddressEditor; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.Delegate; +; import org.chromium.chrome.browser.autofill.editors.EditorDialogView; import org.chromium.chrome.browser.autofill.editors.EditorObserverForTest; import org.chromium.chrome.browser.feedback.FragmentHelpAndFeedbackLauncher; @@ -54,7 +56,7 @@ public class AutofillProfilesFragment extends PreferenceFragmentCompat implements PersonalDataManager.PersonalDataManagerObserver, FragmentHelpAndFeedbackLauncher, ProfileDependentSetting { - private static AddressEditor.Delegate sAddressEditorDelegate = new AddressEditor.Delegate() { + private static Delegate sAddressEditorDelegate = new Delegate() { // User has either created a new address, or edited an existing address. // We should save changes in any case. @Override @@ -229,14 +231,15 @@ mEditorDialog = prepareEditorDialog(editorPreference.getGUID()); AutofillAddress autofillAddress = getAutofillAddress(editorPreference); if (autofillAddress == null) { - AddressEditor addressEditor = - new AddressEditor(mEditorDialog, sAddressEditorDelegate, mProfile, + AddressEditorCoordinator addressEditor = + new AddressEditorCoordinator(mEditorDialog, sAddressEditorDelegate, mProfile, /*saveToDisk=*/true); addressEditor.showEditorDialog(); } else { - AddressEditor addressEditor = new AddressEditor(mEditorDialog, sAddressEditorDelegate, - mProfile, autofillAddress, UPDATE_EXISTING_ADDRESS_PROFILE, - /*saveToDisk=*/true); + AddressEditorCoordinator addressEditor = + new AddressEditorCoordinator(mEditorDialog, sAddressEditorDelegate, mProfile, + autofillAddress, UPDATE_EXISTING_ADDRESS_PROFILE, + /*saveToDisk=*/true); addressEditor.showEditorDialog(); } }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/base/SplitChromeApplication.java b/chrome/android/java/src/org/chromium/chrome/browser/base/SplitChromeApplication.java index 2f37a52..1603f60 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/base/SplitChromeApplication.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/base/SplitChromeApplication.java
@@ -28,8 +28,6 @@ * This class will be used when isolated splits are enabled. */ public class SplitChromeApplication extends SplitCompatApplication { - private static final String TAG = "SplitChromeApp"; - @IdentifierNameString private static String sImplClassName = "org.chromium.chrome.browser.ChromeApplicationImpl";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/splashscreen/SplashController.java b/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/splashscreen/SplashController.java index 429704a..2cadf63 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/splashscreen/SplashController.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/splashscreen/SplashController.java
@@ -81,8 +81,6 @@ int ON_SPLASH_HIDDEN = 2; } - private static final String TAG = "SplashController"; - private final Activity mActivity; private final ActivityLifecycleDispatcher mLifecycleDispatcher; private final TabObserverRegistrar mTabObserverRegistrar;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/view/DisclosureInfobar.java b/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/view/DisclosureInfobar.java index a6aac51..9a8193c 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/view/DisclosureInfobar.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/browserservices/ui/view/DisclosureInfobar.java
@@ -35,7 +35,6 @@ @ActivityScope public class DisclosureInfobar implements PropertyObservable.PropertyObserver<PropertyKey>, StartStopWithNativeObserver { - private static final String TAG = "RunningInChrome"; private final Resources mResources; private final Lazy<SnackbarManager> mSnackbarManager; private final TrustedWebActivityModel mModel;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/OverlayPanelBase.java b/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/OverlayPanelBase.java index 0a0e7e19..85e9d17 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/OverlayPanelBase.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/OverlayPanelBase.java
@@ -67,13 +67,10 @@ // interface. // /** The opacity of the Open-Tab icon when the Panel is peeking. */ - private static final float OPEN_TAB_ICON_OPACITY_STATE_PEEKED = 1.f; /** The opacity of the Open-Tab icon when the Panel is expanded. */ - private static final float OPEN_TAB_ICON_OPACITY_STATE_EXPANDED = 0.f; /** The opacity of the Open-Tab icon when the Panel is maximized. */ - private static final float OPEN_TAB_ICON_OPACITY_STATE_MAXIMIZED = 0.f; /** The opacity of the close icon when the Panel is peeking. */ private static final float CLOSE_ICON_OPACITY_STATE_PEEKED = 0.f;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/ephemeraltab/EphemeralTabSheetContent.java b/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/ephemeraltab/EphemeralTabSheetContent.java index 3346084..74833d52 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/ephemeraltab/EphemeralTabSheetContent.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/compositor/bottombar/ephemeraltab/EphemeralTabSheetContent.java
@@ -52,8 +52,6 @@ */ private static final int BASE_ANIMATION_DURATION_MS = 218; - private static final float PEEK_TOOLBAR_HEIGHT_MULTIPLE = 2.f; - /** Ratio of the height when in full mode. Used in half-open variation. */ private static final float FULL_HEIGHT_RATIO = 0.9f;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ChromeContextMenuPopulator.java b/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ChromeContextMenuPopulator.java index 6afdb35..11bf5c7 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ChromeContextMenuPopulator.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ChromeContextMenuPopulator.java
@@ -88,9 +88,6 @@ private final ContextMenuParams mParams; private @Nullable UkmRecorder.Bridge mUkmRecorderBridge; private ContextMenuNativeDelegate mNativeDelegate; - private static final String LENS_SEARCH_MENU_ITEM_KEY = "searchWithGoogleLensMenuItem"; - private static final String LENS_SHOP_MENU_ITEM_KEY = "shopWithGoogleLensMenuItem"; - private static final String SEARCH_BY_IMAGE_MENU_ITEM_KEY = "searchByImageMenuItem"; private static final String LENS_SUPPORT_STATUS_HISTOGRAM_NAME = "ContextMenu.LensSupportStatus";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ContextMenuCoordinator.java b/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ContextMenuCoordinator.java index cedc902..55e2ba6 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ContextMenuCoordinator.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/contextmenu/ContextMenuCoordinator.java
@@ -27,7 +27,7 @@ import org.chromium.chrome.browser.util.ChromeAccessibilityUtil; import org.chromium.components.browser_ui.widget.ContextMenuDialog; import org.chromium.components.embedder_support.contextmenu.ContextMenuParams; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.LoadCommittedDetails; import org.chromium.content_public.browser.WebContents; import org.chromium.content_public.browser.WebContentsObserver; @@ -109,7 +109,7 @@ mOnMenuClosed = onMenuClosed; Activity activity = window.getActivity().get(); final boolean isDragDropEnabled = - ContentFeatureList.isEnabled(ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU) + ContentFeatureMap.isEnabled(ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU) && ContextMenuUtils.usePopupContextMenuForContext(activity); final boolean isPopup = isDragDropEnabled || params.getSourceType() == MenuSourceType.MENU_SOURCE_MOUSE
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java index 6c21ef5d..9a8727a 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContext.java
@@ -59,7 +59,6 @@ // The offset of the tapped word within the surrounding text or {@code INVALID_OFFSET} if not // yet analyzed. - private int mWordTappedStartOffset = INVALID_OFFSET; // The offset of the tap within the tapped word, or {@code INVALID_OFFSET} if not yet analyzed. private int mTapWithinWordOffset = INVALID_OFFSET;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchManager.java b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchManager.java index 0688fbf..da1281b 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchManager.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchManager.java
@@ -100,8 +100,6 @@ // having the manager itself implement the interface because that exposes all the public methods // of that interface at the manager level. - private static final String TAG = "ContextualSearch"; - private static final String INTENT_URL_PREFIX = "intent:"; // We denylist this URL because malformed URLs may bring up this page.
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabActivity.java b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabActivity.java index e41dfc4..b6e5b43 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabActivity.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabActivity.java
@@ -56,8 +56,6 @@ * The activity for custom tabs. It will be launched on top of a client's task. */ public class CustomTabActivity extends BaseCustomTabActivity { - private static final String TAG = "CustomTabActivity"; - private CustomTabsSessionToken mSession; private final CustomTabsConnection mConnection = CustomTabsConnection.getInstance();
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabIncognitoManager.java b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabIncognitoManager.java index ff6ae08..44274bf 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabIncognitoManager.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabIncognitoManager.java
@@ -30,8 +30,6 @@ */ @ActivityScope public class CustomTabIncognitoManager implements NativeInitObserver, DestroyObserver { - private static final String TAG = "CctIncognito"; - private final Activity mActivity; private final CustomTabActivityNavigationController mNavigationController; private final BrowserServicesIntentDataProvider mIntentDataProvider;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabsConnection.java b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabsConnection.java index fabd721..e3748bc 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabsConnection.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/customtabs/CustomTabsConnection.java
@@ -126,17 +126,14 @@ private static final int SPECULATION_STATUS_ON_START_ALLOWED = 0; // What kind of speculation was started, counted in addition to // SPECULATION_STATUS_ALLOWED. - private static final int SPECULATION_STATUS_ON_START_PREFETCH = 1; private static final int SPECULATION_STATUS_ON_START_PRERENDER = 2; private static final int SPECULATION_STATUS_ON_START_BACKGROUND_TAB = 3; - private static final int SPECULATION_STATUS_ON_START_PRERENDER_NOT_STARTED = 4; // The following describe reasons why a speculation was not allowed, and are // counted instead of SPECULATION_STATUS_ALLOWED. private static final int SPECULATION_STATUS_ON_START_NOT_ALLOWED_DEVICE_CLASS = 5; private static final int SPECULATION_STATUS_ON_START_NOT_ALLOWED_BLOCK_3RD_PARTY_COOKIES = 6; private static final int SPECULATION_STATUS_ON_START_NOT_ALLOWED_NETWORK_PREDICTION_DISABLED = 7; - private static final int SPECULATION_STATUS_ON_START_NOT_ALLOWED_DATA_REDUCTION_ENABLED = 8; // Obsolete due to no longer running the experiment // "PredictivePrefetchingAllowedOnAllConnectionTypes". // private static final int SPECULATION_STATUS_ON_START_NOT_ALLOWED_NETWORK_METERED = 9; @@ -145,8 +142,6 @@ // For CustomTabs.SpeculationStatusOnSwap, see tools/metrics/enums.xml. Append only. private static final int SPECULATION_STATUS_ON_SWAP_BACKGROUND_TAB_TAKEN = 0; private static final int SPECULATION_STATUS_ON_SWAP_BACKGROUND_TAB_NOT_MATCHED = 1; - private static final int SPECULATION_STATUS_ON_SWAP_PRERENDER_TAKEN = 2; - private static final int SPECULATION_STATUS_ON_SWAP_PRERENDER_NOT_MATCHED = 3; private static final int SPECULATION_STATUS_ON_SWAP_MAX = 4; // Constants for sending connection characteristics.
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadBroadcastManagerImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadBroadcastManagerImpl.java index ab0589d..e04c79ad 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadBroadcastManagerImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadBroadcastManagerImpl.java
@@ -50,7 +50,6 @@ * relevant information on to native. */ public class DownloadBroadcastManagerImpl extends DownloadBroadcastManager.Impl { - private static final String TAG = "DLBroadcastManager"; private static final int WAIT_TIME_MS = 5000; private final DownloadSharedPreferenceHelper mDownloadSharedPreferenceHelper =
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadMetrics.java b/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadMetrics.java index d4795fd6..d872b3c1 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadMetrics.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/download/DownloadMetrics.java
@@ -15,9 +15,6 @@ * Records download related metrics on Android. */ public class DownloadMetrics { - private static final String TAG = "DownloadMetrics"; - private static final int MAX_VIEW_RETENTION_MINUTES = 30 * 24 * 60; - /** * Records download open source. * @param source The source where the user opened the download media file.
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/download/OMADownloadHandler.java b/chrome/android/java/src/org/chromium/chrome/browser/download/OMADownloadHandler.java index cb43549..7a5cb3b 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/download/OMADownloadHandler.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/download/OMADownloadHandler.java
@@ -120,7 +120,6 @@ "901 insufficient memory \n\r"; private static final String DOWNLOAD_STATUS_USER_CANCELLED = "902 User Cancelled \n\r"; private static final String DOWNLOAD_STATUS_LOSS_OF_SERVICE = "903 Loss of Service \n\r"; - private static final String DOWNLOAD_STATUS_ATTRIBUTE_MISMATCH = "905 Attribute mismatch \n\r"; private static final String DOWNLOAD_STATUS_INVALID_DESCRIPTOR = "906 Invalid descriptor \n\r"; private static final String DOWNLOAD_STATUS_INVALID_DDVERSION = "951 Invalid DDVersion \n\r"; private static final String DOWNLOAD_STATUS_DEVICE_ABORTED = "952 Device Aborted \n\r";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/dragdrop/ChromeDragAndDropBrowserDelegate.java b/chrome/android/java/src/org/chromium/chrome/browser/dragdrop/ChromeDragAndDropBrowserDelegate.java index 8d5c1a0..ddb3068 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/dragdrop/ChromeDragAndDropBrowserDelegate.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/dragdrop/ChromeDragAndDropBrowserDelegate.java
@@ -16,7 +16,7 @@ import org.chromium.chrome.browser.DragAndDropLauncherActivity; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.multiwindow.MultiWindowUtils; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.common.ContentFeatures; import org.chromium.ui.dragdrop.DragAndDropBrowserDelegate; import org.chromium.ui.dragdrop.DropDataProviderImpl; @@ -39,12 +39,12 @@ */ public ChromeDragAndDropBrowserDelegate(Context context) { mContext = context; - mSupportDropInChrome = ContentFeatureList.getFieldTrialParamByFeatureAsBoolean( + mSupportDropInChrome = ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsBoolean( ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU, PARAM_DROP_IN_CHROME, false); mSupportAnimatedImageDragShadow = ChromeFeatureList.isEnabled(ChromeFeatureList.ANIMATED_IMAGE_DRAG_SHADOW); - int delay = ContentFeatureList.getFieldTrialParamByFeatureAsInt( + int delay = ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsInt( ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU, PARAM_CLEAR_CACHE_DELAYED_MS, DropDataProviderImpl.DEFAULT_CLEAR_CACHED_DATA_INTERVAL_MS); DropDataProviderUtils.setClearCachedDataIntervalMs(delay);
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/externalnav/IntentWithRequestMetadataHandler.java b/chrome/android/java/src/org/chromium/chrome/browser/externalnav/IntentWithRequestMetadataHandler.java index 35c04424..c02f039 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/externalnav/IntentWithRequestMetadataHandler.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/externalnav/IntentWithRequestMetadataHandler.java
@@ -30,8 +30,6 @@ public static final String EXTRA_REQUEST_METADATA_TOKEN = "org.chromium.chrome.browser.request_metadata_token"; - private static final String TAG = "MetadataHandler"; - private static final Object INSTANCE_LOCK = new Object(); private static IntentWithRequestMetadataHandler sIntentWithRequestMetadataHandler; private SecureRandom mSecureRandom = new SecureRandom();
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/feedback/HelpAndFeedbackLauncherImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/feedback/HelpAndFeedbackLauncherImpl.java index 62ee757b..cf2e3ae 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/feedback/HelpAndFeedbackLauncherImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/feedback/HelpAndFeedbackLauncherImpl.java
@@ -31,7 +31,6 @@ public class HelpAndFeedbackLauncherImpl implements HelpAndFeedbackLauncher { protected static final String FALLBACK_SUPPORT_URL = "https://support.google.com/chrome/topic/6069782"; - private static final String TAG = "HelpAndFeedback"; private static ProfileKeyedMap<HelpAndFeedbackLauncher> sProfileToLauncherMap; private final HelpAndFeedbackLauncherDelegate mDelegate;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/findinpage/FindToolbar.java b/chrome/android/java/src/org/chromium/chrome/browser/findinpage/FindToolbar.java index f9a7d39..d56dcae 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/findinpage/FindToolbar.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/findinpage/FindToolbar.java
@@ -63,8 +63,6 @@ /** A toolbar providing find in page functionality. */ public class FindToolbar extends LinearLayout implements BackPressHandler { - private static final String TAG = "FindInPage"; - private static final long ACCESSIBLE_ANNOUNCEMENT_DELAY_MILLIS = 500; @IntDef({FindLocationBarState.SHOWN, FindLocationBarState.SHOWING, FindLocationBarState.HIDDEN,
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/history/HistoryManager.java b/chrome/android/java/src/org/chromium/chrome/browser/history/HistoryManager.java index 16838413..2814b02 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/history/HistoryManager.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/history/HistoryManager.java
@@ -95,7 +95,6 @@ // dividing by 10 until it gets under 100, reaching 10 for both // UMA_MAX_BUCKET_VALUE and UMA_MAX_SUBSET_BUCKET_VALUE, and adds +1 // for overflow. How do we keep that in sync with this code? - private static final int UMA_BUCKET_COUNT = 11; private static final int HISTORY_TAB_INDEX = 0; private static final int JOURNEYS_TAB_INDEX = 1;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/infobar/DuplicateDownloadInfoBar.java b/chrome/android/java/src/org/chromium/chrome/browser/infobar/DuplicateDownloadInfoBar.java index 450a1b8..3425f65 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/infobar/DuplicateDownloadInfoBar.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/infobar/DuplicateDownloadInfoBar.java
@@ -25,7 +25,6 @@ * being downloaded. */ public class DuplicateDownloadInfoBar extends ConfirmInfoBar { - private static final String TAG = "DuplicateDownloadInfoBar"; private final String mFilePath; private final boolean mIsOfflinePage; private final String mPageUrl;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/infobar/InfoBarContainer.java b/chrome/android/java/src/org/chromium/chrome/browser/infobar/InfoBarContainer.java index 4e87df3..95084f5d 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/infobar/InfoBarContainer.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/infobar/InfoBarContainer.java
@@ -47,8 +47,6 @@ * sync, see NativeInfoBar. */ public class InfoBarContainer implements UserData, KeyboardVisibilityListener, InfoBar.Container { - private static final String TAG = "InfoBarContainer"; - private static final Class<InfoBarContainer> USER_DATA_KEY = InfoBarContainer.class; private static final ChromeAccessibilityUtil.Observer sAccessibilityObserver;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/infobar/KnownInterceptionDisclosureInfoBar.java b/chrome/android/java/src/org/chromium/chrome/browser/infobar/KnownInterceptionDisclosureInfoBar.java index 10590e3..adc0119b 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/infobar/KnownInterceptionDisclosureInfoBar.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/infobar/KnownInterceptionDisclosureInfoBar.java
@@ -17,7 +17,6 @@ * standard ConfirmInfoBar to provide a description as well as a title. */ public class KnownInterceptionDisclosureInfoBar extends ConfirmInfoBar { - private static final String TAG = "KnownInterceptionDisclosureInfoBar"; private String mDescription; /**
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/infobar/SafetyTipInfoBar.java b/chrome/android/java/src/org/chromium/chrome/browser/infobar/SafetyTipInfoBar.java index 720149de..7974a1d 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/infobar/SafetyTipInfoBar.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/infobar/SafetyTipInfoBar.java
@@ -21,7 +21,6 @@ * a description as well as a title. */ public class SafetyTipInfoBar extends ConfirmInfoBar { - private static final String TAG = "SafetyTipInfoBar"; private String mDescription; private String mLearnMoreLinkText;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/init/AsyncInitializationActivity.java b/chrome/android/java/src/org/chromium/chrome/browser/init/AsyncInitializationActivity.java index 0eccb9b..a448b2f7 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/init/AsyncInitializationActivity.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/init/AsyncInitializationActivity.java
@@ -65,7 +65,6 @@ extends ChromeBaseAppCompatActivity implements ChromeActivityNativeDelegate, BrowserParts { @VisibleForTesting public static final String FIRST_DRAW_COMPLETED_TIME_MS_UMA = "FirstDrawCompletedTime"; - private static final String TAG = "AsyncInitActivity"; static Boolean sOverrideNativeLibraryCannotBeLoadedForTesting; protected final Handler mHandler; @@ -769,7 +768,8 @@ display, Math.min(bounds.right - bounds.left, bounds.bottom - bounds.top)); if (BuildInfo.getInstance().isAutomotive) { - smallestScreenWidth = (int) (smallestScreenWidth / UI_SCALING_FACTOR_FOR_AUTO); + smallestScreenWidth = + (int) (smallestScreenWidth / DisplayUtil.UI_SCALING_FACTOR_FOR_AUTO); } return smallestScreenWidth; }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/init/NativeStartupBridge.java b/chrome/android/java/src/org/chromium/chrome/browser/init/NativeStartupBridge.java index 441d86c..c65c1ac 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/init/NativeStartupBridge.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/init/NativeStartupBridge.java
@@ -13,8 +13,6 @@ * A class for native code to request full browser start when running in minimal browser mode. */ public class NativeStartupBridge { - private static final String TAG = "NativeStartupBridge"; - @CalledByNative private static void loadFullBrowser() { if (BrowserStartupController.getInstance().isFullBrowserStarted()) return;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/instantapps/InstantAppsHandler.java b/chrome/android/java/src/org/chromium/chrome/browser/instantapps/InstantAppsHandler.java index 70765c6..6497d08 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/instantapps/InstantAppsHandler.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/instantapps/InstantAppsHandler.java
@@ -8,8 +8,6 @@ /** A launcher for Instant Apps. */ public class InstantAppsHandler { - private static final String TAG = "InstantAppsHandler"; - private static final Object INSTANCE_LOCK = new Object(); private static InstantAppsHandler sInstance;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/media/MediaCaptureNotificationServiceImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/media/MediaCaptureNotificationServiceImpl.java index 806cbdf..796394a 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/media/MediaCaptureNotificationServiceImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/media/MediaCaptureNotificationServiceImpl.java
@@ -55,8 +55,6 @@ private static final String NOTIFICATION_MEDIA_TYPE_EXTRA = "NotificationMediaType"; private static final String NOTIFICATION_MEDIA_URL_EXTRA = "NotificationMediaUrl"; - private static final String TAG = "MediaCapture"; - private NotificationManagerProxy mNotificationManager; private SharedPreferencesManager mSharedPreferences; private final SparseIntArray mNotifications = new SparseIntArray();
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/metrics/UmaSessionStats.java b/chrome/android/java/src/org/chromium/chrome/browser/metrics/UmaSessionStats.java index 346fecfed..f8d9254 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/metrics/UmaSessionStats.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/metrics/UmaSessionStats.java
@@ -34,7 +34,6 @@ */ public class UmaSessionStats { private static final String TAG = "UmaSessionStats"; - private static final String SAMSUNG_MULTWINDOW_PACKAGE = "com.sec.feature.multiwindow"; private static long sNativeUmaSessionStats;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/native_page/NativePageNavigationDelegateImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/native_page/NativePageNavigationDelegateImpl.java index 1178ac9..6e91d57 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/native_page/NativePageNavigationDelegateImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/native_page/NativePageNavigationDelegateImpl.java
@@ -25,7 +25,6 @@ * {@link NativePageNavigationDelegate} implementation. */ public class NativePageNavigationDelegateImpl implements NativePageNavigationDelegate { - private static final String TAG = "PageNavDelegate"; private final Profile mProfile; private final TabModelSelector mTabModelSelector; private final Tab mTab;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPage.java b/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPage.java index 89b69c0..a1250ec 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPage.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPage.java
@@ -120,7 +120,6 @@ private static final String TAG = "NewTabPage"; // Key for the scroll position data that may be stored in a navigation entry. - private static final String NAVIGATION_ENTRY_SCROLL_POSITION_KEY = "NewTabPageScrollPosition"; public static final String CONTEXT_MENU_USER_ACTION_PREFIX = "Suggestions"; protected final Tab mTab;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPageLayout.java b/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPageLayout.java index 4451443..8182f5be 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPageLayout.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/ntp/NewTabPageLayout.java
@@ -80,7 +80,6 @@ private final int mTileGridLayoutBleed; private int mSearchBoxTwoSideMargin; private final Context mContext; - private int mSearchBoxEndPadding = UNSET_RESOURCE_FLAG; private final int mMvtLandscapeLateralMarginTablet; private final int mMvtExtraRightMarginTablet;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/offlinepages/PublishPageCallback.java b/chrome/android/java/src/org/chromium/chrome/browser/offlinepages/PublishPageCallback.java index 21c7610d..863b6b9 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/offlinepages/PublishPageCallback.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/offlinepages/PublishPageCallback.java
@@ -17,7 +17,6 @@ private Callback<ShareParams> mShareCallback; OfflinePageItem mPage; private WindowAndroid mWindow; - private static final String TAG = "PublishPageCallback"; /** Create a callback for use when page publishing is completed. */ public PublishPageCallback(
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/omaha/RequestGenerator.java b/chrome/android/java/src/org/chromium/chrome/browser/omaha/RequestGenerator.java index c7a1f2d59..8671800 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/omaha/RequestGenerator.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/omaha/RequestGenerator.java
@@ -25,8 +25,6 @@ * Generates XML requests to send to the Omaha server. */ public abstract class RequestGenerator { - private static final String TAG = "RequestGenerator"; - // The Omaha specs say that new installs should use "-1". public static final int INSTALL_AGE_IMMEDIATELY_AFTER_INSTALLING = -1;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateConfigs.java b/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateConfigs.java index f8d646b8..a411dc2 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateConfigs.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateConfigs.java
@@ -28,7 +28,6 @@ public class UpdateConfigs { // VariationsAssociatedData configs private static final String FIELD_TRIAL_NAME = "UpdateMenuItem"; - private static final String ENABLED_VALUE = "true"; private static final String CUSTOM_SUMMARY = "custom_summary"; private static final String MIN_REQUIRED_STORAGE_MB = "min_required_storage_for_update_mb";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateStatusProvider.java b/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateStatusProvider.java index 73ced0c..f1c6d39 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateStatusProvider.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/omaha/UpdateStatusProvider.java
@@ -20,7 +20,6 @@ import org.chromium.base.BuildInfo; import org.chromium.base.Callback; -import org.chromium.base.ContextUtils; import org.chromium.base.ObserverList; import org.chromium.base.PackageUtils; import org.chromium.base.ThreadUtils; @@ -231,7 +230,6 @@ } private static final class UpdateQuery extends AsyncTask<UpdateStatus> { - private final Context mContext = ContextUtils.getApplicationContext(); private final Runnable mCallback; private @Nullable UpdateStatus mStatus;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/partnerbookmarks/PartnerBookmarksShim.java b/chrome/android/java/src/org/chromium/chrome/browser/partnerbookmarks/PartnerBookmarksShim.java index 8877a8f..fd2e836 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/partnerbookmarks/PartnerBookmarksShim.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/partnerbookmarks/PartnerBookmarksShim.java
@@ -19,8 +19,6 @@ * - reporting that all partner bookmarks were read to the C++ side. */ public class PartnerBookmarksShim { - private static final String TAG = "PartnerBookmarksShim"; - private static boolean sIsReadingAttempted; /** @@ -44,5 +42,4 @@ reader.readBookmarks(); } - }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/password_manager/settings/PasswordSettings.java b/chrome/android/java/src/org/chromium/chrome/browser/password_manager/settings/PasswordSettings.java index f47f74f6..7a87414 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/password_manager/settings/PasswordSettings.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/password_manager/settings/PasswordSettings.java
@@ -108,7 +108,6 @@ private static final int ORDER_CHECK_PASSWORDS = 2; private static final int ORDER_TRUSTED_VAULT_BANNER = 3; private static final int ORDER_MANAGE_ACCOUNT_LINK = 4; - private static final int ORDER_SECURITY_KEY = 5; private static final int ORDER_SAVED_PASSWORDS = 6; private static final int ORDER_EXCEPTIONS = 7; private static final int ORDER_SAVED_PASSWORDS_NO_TEXT = 8;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/payments/ServiceWorkerPaymentAppBridge.java b/chrome/android/java/src/org/chromium/chrome/browser/payments/ServiceWorkerPaymentAppBridge.java index f543501..09fbed5 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/payments/ServiceWorkerPaymentAppBridge.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/payments/ServiceWorkerPaymentAppBridge.java
@@ -26,8 +26,6 @@ * Native bridge for interacting with service worker based payment apps. */ public class ServiceWorkerPaymentAppBridge { - private static final String TAG = "SWPaymentApp"; - /** The interface for checking whether there is an installed SW payment app. */ public static interface HasServiceWorkerPaymentAppsCallback { /**
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/privacy/settings/PrivacySettings.java b/chrome/android/java/src/org/chromium/chrome/browser/privacy/settings/PrivacySettings.java index 48e67a69..4362099 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/privacy/settings/PrivacySettings.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/privacy/settings/PrivacySettings.java
@@ -71,7 +71,6 @@ private static final String PREF_DO_NOT_TRACK = "do_not_track"; private static final String PREF_SAFE_BROWSING = "safe_browsing"; private static final String PREF_SYNC_AND_SERVICES_LINK = "sync_and_services_link"; - private static final String PREF_CLEAR_BROWSING_DATA = "clear_browsing_data"; private static final String PREF_PRIVACY_SANDBOX = "privacy_sandbox"; private static final String PREF_PRIVACY_GUIDE = "privacy_guide"; private static final String PREF_INCOGNITO_LOCK = "incognito_lock";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/sharing/SharingJNIBridge.java b/chrome/android/java/src/org/chromium/chrome/browser/sharing/SharingJNIBridge.java index adca9f7..46f94e5 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/sharing/SharingJNIBridge.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/sharing/SharingJNIBridge.java
@@ -14,8 +14,6 @@ * JNI bridge for SharingService. */ public class SharingJNIBridge { - private static final String TAG = "SharingJNIBridge"; - // Returns if device supports telephony capability. @CalledByNative public static boolean isTelephonySupported() {
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/site_settings/ChromeSiteSettingsDelegate.java b/chrome/android/java/src/org/chromium/chrome/browser/site_settings/ChromeSiteSettingsDelegate.java index dfb370d..81325f5 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/site_settings/ChromeSiteSettingsDelegate.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/site_settings/ChromeSiteSettingsDelegate.java
@@ -40,6 +40,7 @@ import org.chromium.components.user_prefs.UserPrefs; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.common.ContentFeatures; import org.chromium.content_public.common.ContentSwitches; import org.chromium.url.GURL; @@ -118,15 +119,15 @@ return ChromeFeatureList.isEnabled( ChromeFeatureList.DARKEN_WEBSITES_CHECKBOX_IN_THEMES_SETTING); case SiteSettingsCategory.Type.BLUETOOTH: - return ContentFeatureList.isEnabled( + return ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND); case SiteSettingsCategory.Type.BLUETOOTH_SCANNING: return CommandLine.getInstance().hasSwitch( ContentSwitches.ENABLE_EXPERIMENTAL_WEB_PLATFORM_FEATURES); case SiteSettingsCategory.Type.FEDERATED_IDENTITY_API: - return ContentFeatureList.isEnabled(ContentFeatures.FED_CM); + return ContentFeatureMap.isEnabled(ContentFeatures.FED_CM); case SiteSettingsCategory.Type.NFC: - return ContentFeatureList.isEnabled(ContentFeatureList.WEB_NFC); + return ContentFeatureMap.isEnabled(ContentFeatureList.WEB_NFC); default: return true; }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/SuggestionsNavigationDelegate.java b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/SuggestionsNavigationDelegate.java index bb886ec7..ef9e5271 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/SuggestionsNavigationDelegate.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/SuggestionsNavigationDelegate.java
@@ -18,8 +18,6 @@ * Extension of {@link NativePageNavigationDelegate} with suggestions-specific methods. */ public class SuggestionsNavigationDelegate extends NativePageNavigationDelegateImpl { - private static final String NEW_TAB_URL_HELP = "https://support.google.com/chrome/?p=new_tab"; - public SuggestionsNavigationDelegate(Activity activity, Profile profile, NativePageHost host, TabModelSelector tabModelSelector, Tab tab) { super(activity, profile, host, tabModelSelector, tab);
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/MostVisitedTilesCarouselLayout.java b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/MostVisitedTilesCarouselLayout.java index d2967260..a2a71e1 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/MostVisitedTilesCarouselLayout.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/MostVisitedTilesCarouselLayout.java
@@ -21,7 +21,6 @@ */ public class MostVisitedTilesCarouselLayout extends LinearLayout implements MostVisitedTilesLayout { // There's a minimum limit of 4. - private static final int MIN_RESULTS = 4; private int mTileViewWidth; private int mTileViewMinIntervalPaddingTablet;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/TileRenderer.java b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/TileRenderer.java index eb7b74a..6e6dac6c 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/TileRenderer.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/suggestions/tile/TileRenderer.java
@@ -50,8 +50,6 @@ * manipulating the views as needed. */ public class TileRenderer { - private static final String TAG = "TileRenderer"; - private final Context mContext; private final Resources.Theme mTheme; private RoundedIconGenerator mIconGenerator;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/sync/settings/AccountManagementFragment.java b/chrome/android/java/src/org/chromium/chrome/browser/sync/settings/AccountManagementFragment.java index 2a79a0e..b6473d0c 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/sync/settings/AccountManagementFragment.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/sync/settings/AccountManagementFragment.java
@@ -73,9 +73,6 @@ private static final String PREF_ACCOUNTS_CATEGORY = "accounts_category"; private static final String PREF_PARENT_ACCOUNT_CATEGORY = "parent_account_category"; - private static final String PREF_PARENTAL_SETTINGS = "parental_settings"; - private static final String PREF_PARENT_ACCOUNTS = "parent_accounts"; - private static final String PREF_CHILD_CONTENT = "child_content"; private static final String PREF_SIGN_OUT = "sign_out"; private static final String PREF_SIGN_OUT_DIVIDER = "sign_out_divider";
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/RequestDesktopUtils.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/RequestDesktopUtils.java index f6d40aa9..d3ab305 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/RequestDesktopUtils.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/RequestDesktopUtils.java
@@ -52,6 +52,7 @@ import org.chromium.components.ukm.UkmRecorder; import org.chromium.components.variations.SyntheticTrialAnnotationMode; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.ui.display.DisplayAndroid; import org.chromium.ui.display.DisplayAndroidManager; import org.chromium.ui.modaldialog.DialogDismissalCause; @@ -228,7 +229,7 @@ */ public static void maybeDowngradeSiteSettings(TabModelSelector tabModelSelector) { SharedPreferencesManager sharedPreferencesManager = SharedPreferencesManager.getInstance(); - if (ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { // Remove the SharedPreferences keys if they exist when desktop site exceptions are // re-enabled. SharedPreferencesManager.getInstance().removeKey( @@ -324,7 +325,7 @@ */ public static void maybeUpgradeTabLevelDesktopSiteSetting( Tab tab, Profile profile, @TabUserAgent int tabUserAgent, @Nullable GURL url) { - if (!ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) + if (!ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) || url == null) { return; } @@ -971,7 +972,7 @@ } private static boolean isDesktopSiteExceptionsDowngradeEnabledForTab(int tabId) { - if (ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) + if (ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) || !SiteSettingsFeatureList.isEnabled( SiteSettingsFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS_DOWNGRADE)) { return false;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabImpl.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabImpl.java index 514e6d1..b4b243ad 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabImpl.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabImpl.java
@@ -65,6 +65,7 @@ import org.chromium.components.version_info.VersionInfo; import org.chromium.content_public.browser.ChildProcessImportance; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.LoadUrlParams; import org.chromium.content_public.browser.WebContents; import org.chromium.content_public.browser.WebContentsAccessibility; @@ -87,8 +88,6 @@ private static final String PRODUCT_VERSION = VersionInfo.getProductVersion(); - private static final String REQUEST_DESKTOP_ENABLED_PARAM = "enabled"; - private long mNativeTabAndroid; /** Unique id of this tab (within its container). */ @@ -1738,8 +1737,7 @@ TabUtils.readRequestDesktopSiteContentSettings(profile, url) || alwaysRequestDesktopSite; if (!shouldRequestDesktopSite - && ContentFeatureList.isEnabled( - ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { + && ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { // TODO(shuyng): Make additional setting compatible with site level setting. PrefService prefService = UserPrefs.get(profile); boolean peripheralPref =
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUma.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUma.java index ba0f4c9e..d4ae70c1 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUma.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUma.java
@@ -38,9 +38,7 @@ private static final int TAB_STATE_INITIAL = 0; private static final int TAB_STATE_ACTIVE = 1; private static final int TAB_STATE_INACTIVE = 2; - private static final int TAB_STATE_DETACHED = 3; private static final int TAB_STATE_CLOSED = 4; - private static final int TAB_STATE_MAX = TAB_STATE_CLOSED; // Counter of tab shows (as per onShow()) for all tabs. private static long sAllTabsShowCount;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUtils.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUtils.java index feb9a62..c6739929 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUtils.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabUtils.java
@@ -28,6 +28,7 @@ import org.chromium.components.content_settings.ContentSettingValues; import org.chromium.components.content_settings.ContentSettingsType; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.WebContents; import org.chromium.ui.base.DeviceFormFactor; import org.chromium.ui.base.WindowAndroid; @@ -193,7 +194,7 @@ */ public static boolean readRequestDesktopSiteContentSettings( Profile profile, @Nullable GURL url) { - if (ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS)) { return url != null && TabUtils.isDesktopSiteEnabled(profile, url); } else { return TabUtils.isDesktopSiteGlobalEnabled(profile);
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabViewAndroidDelegate.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabViewAndroidDelegate.java index aeb890d..fb96aa1 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabViewAndroidDelegate.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabViewAndroidDelegate.java
@@ -12,7 +12,7 @@ import org.chromium.base.Callback; import org.chromium.chrome.browser.dragdrop.ChromeDragAndDropBrowserDelegate; import org.chromium.components.embedder_support.view.ContentView; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.common.ContentFeatures; import org.chromium.ui.base.ApplicationViewportInsetSupplier; import org.chromium.ui.base.ViewAndroidDelegate; @@ -45,7 +45,7 @@ mTab = (TabImpl) tab; containerView.addOnDragListener(getDragStateTracker()); - if (ContentFeatureList.isEnabled(ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU) + if (ContentFeatureMap.isEnabled(ContentFeatures.TOUCH_DRAG_AND_CONTEXT_MENU) && DragAndDropDelegate.isDragAndDropSupportedForOs()) { mDragAndDropBrowserDelegate = new ChromeDragAndDropBrowserDelegate(tab.getContext()); getDragAndDropDelegate().setDragAndDropBrowserDelegate(mDragAndDropBrowserDelegate);
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabWebContentsObserver.java b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabWebContentsObserver.java index dc29c1d..169d074 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tab/TabWebContentsObserver.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tab/TabWebContentsObserver.java
@@ -54,7 +54,6 @@ private final TabImpl mTab; private final ObserverList<Callback<WebContents>> mInitObservers = new ObserverList<>(); - private final Handler mHandler = new Handler(); private WebContentsObserver mObserver; private GURL mLastUrl;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tabmodel/TabModelSelectorProfileSupplier.java b/chrome/android/java/src/org/chromium/chrome/browser/tabmodel/TabModelSelectorProfileSupplier.java index 130ece5..b0d3eae 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tabmodel/TabModelSelectorProfileSupplier.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tabmodel/TabModelSelectorProfileSupplier.java
@@ -26,7 +26,6 @@ private final Callback<TabModelSelector> mSelectorSupplierCallback; private TabModelSelector mSelector; - private boolean mIsTabStateInitialized; private boolean mHasProfile; public TabModelSelectorProfileSupplier(ObservableSupplier<TabModelSelector> selectorSupplier) { @@ -34,15 +33,18 @@ @Override public void onTabModelSelected(TabModel newModel, TabModel oldModel) { Profile newProfile = newModel.getProfile(); - assert !mIsTabStateInitialized || newProfile != null; - // Postpone setting the profile until tab state is initialized. if (newProfile == null) return; set(newProfile); } @Override - public void onChange() {} + public void onChange() { + if (mSelector.getCurrentModel() == null) return; + Profile profile = mSelector.getCurrentModel().getProfile(); + if (profile == null) return; + set(profile); + } @Override public void onNewTabCreated(Tab tab, int creationState) {} @@ -52,7 +54,6 @@ @Override public void onTabStateInitialized() { - mIsTabStateInitialized = true; set(mSelector.getCurrentModel().getProfile()); } }; @@ -73,7 +74,7 @@ mSelector = selector; mSelector.addObserver(mSelectorObserver); - if (selector.isTabStateInitialized() && selector.getCurrentModel() != null) { + if (selector.getCurrentModel() != null) { mSelectorObserver.onTabModelSelected(selector.getCurrentModel(), null); } }
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tasks/JourneyManager.java b/chrome/android/java/src/org/chromium/chrome/browser/tasks/JourneyManager.java index 995bd4c6..4a829a0 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tasks/JourneyManager.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tasks/JourneyManager.java
@@ -34,7 +34,6 @@ import java.util.HashMap; import java.util.Map; -import java.util.concurrent.TimeUnit; /** * Manages Journey related signals, specifically those related to tab engagement. @@ -47,7 +46,6 @@ // We track this in seconds because UMA can only handle 32-bit signed integers, which 45 days // will overflow. - private static final int MAX_ENGAGEMENT_TIME_S = (int) TimeUnit.DAYS.toSeconds(45); private final TabModelSelectorTabObserver mTabModelSelectorTabObserver; private final TabModelSelectorTabModelObserver mTabModelSelectorTabModelObserver;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/tasks/ReturnToChromeUtil.java b/chrome/android/java/src/org/chromium/chrome/browser/tasks/ReturnToChromeUtil.java index b79a8d7..6d9ff3d 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/tasks/ReturnToChromeUtil.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/tasks/ReturnToChromeUtil.java
@@ -84,8 +84,6 @@ * Chrome for a while. */ public final class ReturnToChromeUtil { - private static final String TAG = "TabSwitcherOnReturn"; - /** * The reasons of failing to show the home surface UI on a NTP. * @@ -124,7 +122,6 @@ public static final String FAIL_TO_SHOW_HOME_SURFACE_UI_UMA = "NewTabPage.FailToShowHomeSurfaceUI"; - private static final String START_SEGMENTATION_PLATFORM_KEY = "chrome_start_android"; private static final String START_V2_SEGMENTATION_PLATFORM_KEY = "chrome_start_android_v2"; private static boolean sIsHomepagePolicyManagerInitializedRecorded;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/toolbar/ToolbarManager.java b/chrome/android/java/src/org/chromium/chrome/browser/toolbar/ToolbarManager.java index 4685b4c9..777937e 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/toolbar/ToolbarManager.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/toolbar/ToolbarManager.java
@@ -266,8 +266,6 @@ private BrowserStateBrowserControlsVisibilityDelegate mControlsVisibilityDelegate; private int mFullscreenFocusToken = TokenHolder.INVALID_TOKEN; private int mFullscreenFindInPageToken = TokenHolder.INVALID_TOKEN; - private int mFullscreenMenuToken = TokenHolder.INVALID_TOKEN; - private int mFullscreenHighlightToken = TokenHolder.INVALID_TOKEN; private boolean mTabRestoreCompleted;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebApkShareTargetUtil.java b/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebApkShareTargetUtil.java index 99facf1..03f4949 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebApkShareTargetUtil.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebApkShareTargetUtil.java
@@ -27,8 +27,6 @@ * Computes data for Post Share Target. */ public class WebApkShareTargetUtil { - private static final String TAG = "WebApkShareTargetUtil"; - // A class containing data required to generate a share target post request. protected static class PostData { public boolean isMultipartEncoding;
diff --git a/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebappActivityCoordinator.java b/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebappActivityCoordinator.java index 37585c5..2c43f24 100644 --- a/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebappActivityCoordinator.java +++ b/chrome/android/java/src/org/chromium/chrome/browser/webapps/WebappActivityCoordinator.java
@@ -38,7 +38,6 @@ private final WebappDeferredStartupWithStorageHandler mDeferredStartupWithStorageHandler; // Whether the current page is within the webapp's scope. - private boolean mInScope = true; @Inject public WebappActivityCoordinator(SharedActivityCoordinator sharedActivityCoordinator,
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/TabsTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/TabsTest.java index ff860c4..479ece9 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/TabsTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/TabsTest.java
@@ -125,9 +125,6 @@ private boolean mNotifyChangedCalled; - private static final int SWIPE_TO_RIGHT_DIRECTION = 1; - private static final int SWIPE_TO_LEFT_DIRECTION = -1; - private static final long WAIT_RESIZE_TIMEOUT_MS = 3000; private static final int STRESSFUL_TAB_COUNT = 100;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/app/appmenu/TabbedAppMenuTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/app/appmenu/TabbedAppMenuTest.java index 194075d..a81c57ed 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/app/appmenu/TabbedAppMenuTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/app/appmenu/TabbedAppMenuTest.java
@@ -80,7 +80,6 @@ .build(); private static final String TEST_URL = UrlUtils.encodeHtmlDataUri("<html>foo</html>"); - private static final String TEST_URL2 = UrlUtils.encodeHtmlDataUri("<html>bar</html>"); private AppMenuHandler mAppMenuHandler;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/app/bookmarks/ReadingListTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/app/bookmarks/ReadingListTest.java index e59498b..f62d23d 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/app/bookmarks/ReadingListTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/app/bookmarks/ReadingListTest.java
@@ -95,9 +95,7 @@ @Rule public final DisableAnimationsTestRule mDisableAnimationsRule = new DisableAnimationsTestRule(); - private static final String TEST_PAGE_URL_GOOGLE = "/chrome/test/data/android/google.html"; private static final String TEST_PAGE_TITLE_GOOGLE = "The Google"; - private static final String TEST_PAGE_URL_FOO = "/chrome/test/data/android/test.html"; private static final int TEST_PORT = 12345; private BookmarkManagerCoordinator mBookmarkManagerCoordinator;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/autofill/settings/AutofillPaymentMethodsFragmentTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/autofill/settings/AutofillPaymentMethodsFragmentTest.java index 0f55a70..5632af0 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/autofill/settings/AutofillPaymentMethodsFragmentTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/autofill/settings/AutofillPaymentMethodsFragmentTest.java
@@ -74,7 +74,6 @@ // Card Issuer values that map to the browser CreditCard.Issuer enum. private static final int CARD_ISSUER_UNKNOWN = 0; - private static final int CARD_ISSUER_GOOGLE = 1; private static final CreditCard SAMPLE_CARD_VISA = new CreditCard(/* guid= */ "", /* origin= */ "",
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/background_sync/PeriodicBackgroundSyncTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/background_sync/PeriodicBackgroundSyncTest.java index 19774f2..aa3963c 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/background_sync/PeriodicBackgroundSyncTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/background_sync/PeriodicBackgroundSyncTest.java
@@ -67,7 +67,6 @@ "/chrome/test/data/background_sync/background_sync_test.html"; private static final int TITLE_UPDATE_TIMEOUT_SECONDS = (int) scaleTimeout(10); private static final long WAIT_TIME_MS = scaleTimeout(100); - private static final long MIN_INTERVAL_MS = 1000; private CountDownLatch mScheduleLatch; private CountDownLatch mCancelLatch;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/bookmarks/PowerBookmarkShoppingItemRowTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/bookmarks/PowerBookmarkShoppingItemRowTest.java index e5d48130..d3a1f08 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/bookmarks/PowerBookmarkShoppingItemRowTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/bookmarks/PowerBookmarkShoppingItemRowTest.java
@@ -60,7 +60,6 @@ @RunWith(ChromeJUnit4ClassRunner.class) @EnableFeatures(ChromeFeatureList.BOOKMARKS_REFRESH) public class PowerBookmarkShoppingItemRowTest extends BlankUiTestActivityTestCase { - private static final long CURRENCY_MUTLIPLIER = 1000000; private static final String TITLE = "PowerBookmarkShoppingItemRow"; @Rule
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/download/DownloadTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/download/DownloadTest.java index 1e92c1c..808e3fb5 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/download/DownloadTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/download/DownloadTest.java
@@ -68,7 +68,6 @@ @Rule public DownloadTestRule mDownloadTestRule = new DownloadTestRule(this); - private static final String TAG = "DownloadTest"; private static final String SUPERBO_CONTENTS = "plain text response from a POST";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/externalnav/ExternalNavigationDelegateImplTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/externalnav/ExternalNavigationDelegateImplTest.java index f6b04a0..2d3444f2 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/externalnav/ExternalNavigationDelegateImplTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/externalnav/ExternalNavigationDelegateImplTest.java
@@ -42,8 +42,6 @@ @Batch(Batch.PER_CLASS) @CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE}) public class ExternalNavigationDelegateImplTest { - private static final boolean IS_GOOGLE_REFERRER = true; - @Rule public TestRule mProcessor = new Features.JUnitProcessor();
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/feature_engagement/ScreenshotMonitorTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/feature_engagement/ScreenshotMonitorTest.java index 50b35bf..a2d1232 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/feature_engagement/ScreenshotMonitorTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/feature_engagement/ScreenshotMonitorTest.java
@@ -49,7 +49,6 @@ @RunWith(ChromeJUnit4ClassRunner.class) @Batch(Batch.PER_CLASS) public class ScreenshotMonitorTest { - private static final String FILENAME = "image.jpeg"; private static final String TAG = "ScreenshotTest"; private static final Uri TEST_URI = Uri.parse("content://media/external/images/media/101");
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/gesturenav/NavigationHandlerTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/gesturenav/NavigationHandlerTest.java index 49c4e69..fee1281a 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/gesturenav/NavigationHandlerTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/gesturenav/NavigationHandlerTest.java
@@ -28,6 +28,7 @@ import org.chromium.base.test.util.Restriction; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.flags.ChromeSwitches; +import org.chromium.chrome.browser.init.AsyncInitializationActivity; import org.chromium.chrome.browser.layouts.LayoutManager; import org.chromium.chrome.browser.layouts.LayoutTestUtils; import org.chromium.chrome.browser.layouts.LayoutType; @@ -37,7 +38,6 @@ import org.chromium.chrome.browser.tabmodel.TabCreator; import org.chromium.chrome.test.ChromeJUnit4ClassRunner; import org.chromium.chrome.test.ChromeTabbedActivityTestRule; -import org.chromium.chrome.test.util.ChromeApplicationTestUtils; import org.chromium.chrome.test.util.ChromeTabUtils; import org.chromium.components.embedder_support.util.UrlConstants; import org.chromium.content_public.browser.LoadUrlParams; @@ -59,7 +59,6 @@ private static final String RENDERED_PAGE = "/chrome/test/data/android/navigate/simple.html"; private static final boolean LEFT_EDGE = true; private static final boolean RIGHT_EDGE = false; - private static final int PAGELOAD_TIMEOUT_MS = 4000; private EmbeddedTestServer mTestServer; private HistoryNavigationLayout mNavigationLayout; @@ -122,11 +121,12 @@ @Test @SmallTest - @DisabledTest(message = "https://crbug.com/1376200") public void testCloseChromeAtHistoryStackHead() { loadNewTabPage(); + AsyncInitializationActivity.interceptMoveTaskToBackForTesting(); mNavUtils.swipeFromLeftEdge(); - ChromeApplicationTestUtils.waitUntilChromeInBackground(); + CriteriaHelper.pollUiThread( + AsyncInitializationActivity::wasMoveTaskToBackInterceptedForTesting); } @Test
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/infobar/InfoBarTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/infobar/InfoBarTest.java index 7bba1a4..ba409d88 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/infobar/InfoBarTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/infobar/InfoBarTest.java
@@ -64,8 +64,6 @@ public BlankCTATabInitialStateRule mInitialStateRule = new BlankCTATabInitialStateRule(sActivityTestRule, false); - private static final long MAX_TIMEOUT = 2000L; - private static final int CHECK_INTERVAL = 500; private static final String POPUP_PAGE = "/chrome/test/data/popup_blocker/popup-window-open.html"; private static final String HELLO_WORLD_URL = UrlUtils.encodeHtmlDataUri("<html>" @@ -74,7 +72,6 @@ + "</html>"); private static final String SHARED_PREF_DISPLAYED_FRE_OR_SECOND_PROMO_VERSION = "displayed_data_reduction_promo_version"; - private static final String M51_VERSION = "Chrome 51.0.2704.0"; private static EmbeddedTestServer sTestServer = sActivityTestRule.getTestServer(); private InfoBarTestAnimationListener mListener;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/javascript/CloseWatcherTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/javascript/CloseWatcherTest.java index a70b3de..a98f20a 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/javascript/CloseWatcherTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/javascript/CloseWatcherTest.java
@@ -32,7 +32,6 @@ @Rule public ChromeTabbedActivityTestRule mActivityTestRule = new ChromeTabbedActivityTestRule(); - private static final String TAG = "CloseWatcherTest"; private static final String TEST_URL = UrlUtils.encodeHtmlDataUri("<body><script>let watcher = new CloseWatcher(); " + "watcher.onclose = () => window.document.title = 'SUCCESS';</script></body>");
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/jsdialog/JavascriptAppModalDialogTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/jsdialog/JavascriptAppModalDialogTest.java index ae55323d..b939266 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/jsdialog/JavascriptAppModalDialogTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/jsdialog/JavascriptAppModalDialogTest.java
@@ -65,7 +65,6 @@ public BlankCTATabInitialStateRule mBlankCTATabInitialStateRule = new BlankCTATabInitialStateRule(sActivityTestRule, true); - private static final String TAG = "JSAppModalDialogTest"; private static final String EMPTY_PAGE = UrlUtils.encodeHtmlDataUri( "<html><title>Modal Dialog Test</title><p>Testcase.</p></title></html>"); private static final String BEFORE_UNLOAD_URL = UrlUtils.encodeHtmlDataUri("<html>"
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/metrics/ChromiumAndroidLinkerMetricsTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/metrics/ChromiumAndroidLinkerMetricsTest.java index 97e61ea..46d9a26 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/metrics/ChromiumAndroidLinkerMetricsTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/metrics/ChromiumAndroidLinkerMetricsTest.java
@@ -32,8 +32,6 @@ @Batch(Batch.PER_CLASS) @CommandLineFlags.Add(ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE) public class ChromiumAndroidLinkerMetricsTest { - private static final String TAG = "ChromiumAndroidLinkerMetricsTest"; - private static final String BROWSER_HISTOGRAM = "ChromiumAndroidLinker.BrowserLoadTime2"; private static final String PAGE_PREFIX = "/chrome/test/data/android/google.html";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/ntp/NewTabPageTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/ntp/NewTabPageTest.java index d3270563..2723b8b4 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/ntp/NewTabPageTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/ntp/NewTabPageTest.java
@@ -148,7 +148,6 @@ } private static final int ARTICLE_SECTION_HEADER_POSITION = 1; - private static final int SIGNIN_PROMO_POSITION = 2; private static final int RENDER_TEST_REVISION = 5;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageSavePageLaterEvaluationTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageSavePageLaterEvaluationTest.java index d50b49a..d3474e0 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageSavePageLaterEvaluationTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageSavePageLaterEvaluationTest.java
@@ -102,7 +102,6 @@ private static final String INPUT_FILE_PATH = "paquete/offline_eval_urls.txt"; private static final String LOG_OUTPUT_FILE_PATH = "paquete/offline_eval_logs.txt"; private static final String RESULT_OUTPUT_FILE_PATH = "paquete/offline_eval_results.txt"; - private static final int GET_PAGES_TIMEOUT_MS = 30000; private static final int PAGE_MODEL_LOAD_TIMEOUT_MS = 30000; private static final int REMOVE_REQUESTS_TIMEOUT_MS = 30000;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageUtilsTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageUtilsTest.java index 529d0131..0ac54ee 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageUtilsTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/OfflinePageUtilsTest.java
@@ -64,7 +64,6 @@ public BlankCTATabInitialStateRule mInitialStateRule = new BlankCTATabInitialStateRule(sActivityTestRule, true); - private static final String TAG = "OfflinePageUtilsTest"; private static final String TEST_PAGE = "/chrome/test/data/android/about.html"; private static final int TIMEOUT_MS = 5000; private static final ClientId BOOKMARK_ID =
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/RecentTabsTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/RecentTabsTest.java index 9a6f142..ffc47962 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/RecentTabsTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/offlinepages/RecentTabsTest.java
@@ -46,7 +46,6 @@ new BlankCTATabInitialStateRule(sActivityTestRule, false); private static final String TEST_PAGE = "/chrome/test/data/android/about.html"; - private static final int TIMEOUT_MS = 5000; private OfflinePageBridge mOfflinePageBridge; private EmbeddedTestServer mTestServer;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/LocationBarLayoutTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/LocationBarLayoutTest.java index 603a4c7a..bf23ecd 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/LocationBarLayoutTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/LocationBarLayoutTest.java
@@ -67,7 +67,6 @@ public class LocationBarLayoutTest { private static final String SEARCH_TERMS = "machine learning"; private static final String SEARCH_TERMS_URL = "testing.com"; - private static final String GOOGLE_SRP_URL = "https://www.google.com/search?q=machine+learning"; @Rule public ChromeTabbedActivityTestRule mActivityTestRule = new ChromeTabbedActivityTestRule();
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/geo/GeolocationHeaderTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/geo/GeolocationHeaderTest.java index 6195f177..7a64439 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/geo/GeolocationHeaderTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/omnibox/geo/GeolocationHeaderTest.java
@@ -55,13 +55,10 @@ private static final String SEARCH_URL_1 = "https://www.google.com/search?q=potatoes"; private static final String SEARCH_URL_2 = "https://www.google.co.jp/webhp?#q=dinosaurs"; - private static final String DISABLE_FEATURES = "disable-features="; - private static final String ENABLE_FEATURES = "enable-features="; private static final String GOOGLE_BASE_URL_SWITCH = "google-base-url=https://www.google.com"; private static final double LOCATION_LAT = 20.3; private static final double LOCATION_LONG = 155.8; private static final float LOCATION_ACCURACY = 20f; - private static final String TAG = "GeolocationHeaderTst"; @Before public void setUp() throws InterruptedException {
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoDiscoverabilityTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoDiscoverabilityTest.java index 9d09b90..676a0d0 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoDiscoverabilityTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoDiscoverabilityTest.java
@@ -53,6 +53,7 @@ import org.chromium.components.permissions.PermissionDialogController; import org.chromium.components.search_engines.TemplateUrlService; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.test.util.TestThreadUtils; import org.chromium.device.geolocation.LocationProviderOverrider; import org.chromium.ui.modelutil.PropertyModel; @@ -290,7 +291,7 @@ public void testPermissionRequestTypes( @ContentSettingsType int contentSettingsType, boolean isInSiteSettings) { if (contentSettingsType == ContentSettingsType.BLUETOOTH_CHOOSER_DATA) { - isInSiteSettings = ContentFeatureList.isEnabled( + isInSiteSettings = ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND); } Assert.assertEquals(ContentSettingsType.DEFAULT, mMediator.getLastPermission());
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoViewTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoViewTest.java index 3f4b3dc4..28479601 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoViewTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/page_info/PageInfoViewTest.java
@@ -117,8 +117,6 @@ ContentSwitches.HOST_RESOLVER_RULES + "=MAP * 127.0.0.1"}) @Batch(PER_CLASS) public class PageInfoViewTest { - private static final String TAG = "PageInfoViewTest"; - private static final String sSimpleHtml = "/chrome/test/data/android/simple.html"; private static final String sSiteDataHtml = "/content/test/data/browsing_data/site_data.html";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/permissions/PermissionUpdateMessageTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/permissions/PermissionUpdateMessageTest.java index f9f4167a..0036fac 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/permissions/PermissionUpdateMessageTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/permissions/PermissionUpdateMessageTest.java
@@ -64,7 +64,6 @@ private static final String GEOLOCATION_PAGE = "/chrome/test/data/geolocation/geolocation_on_load.html"; private static final String MEDIASTREAM_PAGE = "/content/test/data/media/getusermedia.html"; - private static final String DOWNLOAD_PAGE = "/chrome/test/data/android/download/get.html"; private EmbeddedTestServer mTestServer; @Rule
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/provider/ProviderBookmarksUriTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/provider/ProviderBookmarksUriTest.java index c7dd7a86..6ecddfa 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/provider/ProviderBookmarksUriTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/provider/ProviderBookmarksUriTest.java
@@ -33,7 +33,6 @@ @Rule public ProviderTestRule mProviderTestRule = new ProviderTestRule(); - private static final String TAG = "ProviderBookmarkUriTest"; private static final byte[] FAVICON_DATA = { 1, 2, 3 }; private Uri mBookmarksUri;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/push_messaging/PushMessagingTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/push_messaging/PushMessagingTest.java index 5bf0218..0784f06 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/push_messaging/PushMessagingTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/push_messaging/PushMessagingTest.java
@@ -66,7 +66,6 @@ "/chrome/test/data/push_messaging/push_messaging_test_android.html"; private static final String ABOUT_BLANK = "about:blank"; private static final int TITLE_UPDATE_TIMEOUT_SECONDS = 5; - private static final String PRIVATE_DATA_DIRECTORY_SUFFIX = "chrome"; private final CallbackHelper mMessageHandledHelper; private String mPushTestPage;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/searchwidget/SearchActivityTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/searchwidget/SearchActivityTest.java index b70c3dc..f7494c5 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/searchwidget/SearchActivityTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/searchwidget/SearchActivityTest.java
@@ -117,7 +117,6 @@ @CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE}) @DoNotBatch(reason = "Test start up behaviors.") public class SearchActivityTest { - private static final long OMNIBOX_SHOW_TIMEOUT_MS = 5000L; private static final String TEST_PNG_IMAGE_FILE_EXTENSION = ".png"; private static class TestDelegate
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/signin/SyncConsentFragmentTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/signin/SyncConsentFragmentTest.java index e9e11c12..8b4b9e2d 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/signin/SyncConsentFragmentTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/signin/SyncConsentFragmentTest.java
@@ -164,7 +164,6 @@ when(mExternalAuthUtilsMock.canUseGooglePlayServices(any())).thenReturn(true); ExternalAuthUtils.setInstanceForTesting(mExternalAuthUtilsMock); mActivityTestRule.setFinishActivity(true); - mChromeActivityTestRule.startMainActivityOnBlankPage(); } @After @@ -181,6 +180,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -198,6 +198,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -215,6 +216,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/2"}) public void testTangibleSyncConsentFragmentVariantBDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -232,6 +234,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/3"}) public void testTangibleSyncConsentFragmentVariantCDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -249,6 +252,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/4"}) public void testTangibleSyncConsentFragmentVariantDDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -266,6 +270,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/5"}) public void testTangibleSyncConsentFragmentVariantEDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -283,6 +288,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/6"}) public void testTangibleSyncConsentFragmentVariantFDefaultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -300,6 +306,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentNewAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_CANCELED, null); mSyncConsentActivity = ActivityTestUtils.waitForActivity( InstrumentationRegistry.getInstrumentation(), SyncConsentActivity.class, () -> { @@ -316,6 +323,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentNotDefaultAccountWithPrimaryAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSigninTestRule.addAccount("test.second.account@gmail.com"); @@ -335,6 +343,7 @@ @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentNotDefaultAccountWithSecondaryAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); String secondAccountName = "test.second.account@gmail.com"; mSigninTestRule.addAccount(secondAccountName); @@ -354,6 +363,7 @@ @DisabledTest(message = "crbug.com/1304737") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentWithChildAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addChildTestAccountThenWaitForSignin(); mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -371,6 +381,7 @@ @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentWithChildAccountWithNonDisplayableAccountEmail() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount( SigninTestRule.generateChildEmail(AccountManagerTestRule.TEST_ACCOUNT_EMAIL), SigninTestRule.NON_DISPLAYABLE_EMAIL_ACCOUNT_CAPABILITIES); @@ -391,6 +402,7 @@ public void testSyncConsentFragmentWithChildAccountWithNonDisplayableAccountEmailWithEmptyDisplayName() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount( SigninTestRule.generateChildEmail(AccountManagerTestRule.TEST_ACCOUNT_EMAIL), "", "", null, SigninTestRule.NON_DISPLAYABLE_EMAIL_ACCOUNT_CAPABILITIES); @@ -411,6 +423,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWithNoAccountsOnDevice() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var startPageHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.START_PAGE); CustomSyncConsentFirstRunFragment fragment = new CustomSyncConsentFirstRunFragment(); @@ -430,6 +443,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWithAdultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var startPageHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.START_PAGE); mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); @@ -450,6 +464,7 @@ @Feature("RenderTest") @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRETangibleSyncConsentFragmentWithAdultAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var startPageHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.START_PAGE); mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); @@ -470,6 +485,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWithChildAccount() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var startPageHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.START_PAGE); mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); @@ -495,6 +511,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWhenSignedInWithoutSync() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.addTestAccountThenSignin(); CustomSyncConsentFirstRunFragment fragment = new CustomSyncConsentFirstRunFragment(); Bundle bundle = new Bundle(); @@ -511,6 +528,7 @@ @MediumTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWhenSelectedAccountIsRemoved() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); final CoreAccountInfo defaultAccount = mSigninTestRule.addAccount("test.default.account@gmail.com"); final CoreAccountInfo primaryAccount = mSigninTestRule.addTestAccountThenSignin(); @@ -534,6 +552,7 @@ @Feature("RenderTest") @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWhenSignedInWithoutSyncDynamically() throws IOException { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CustomSyncConsentFirstRunFragment fragment = new CustomSyncConsentFirstRunFragment(); Bundle bundle = new Bundle(); bundle.putBoolean(SyncConsentFirstRunFragment.IS_CHILD_ACCOUNT, false); @@ -556,6 +575,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testClickingSettingsDoesNotSetInitialSyncFeatureSetupComplete() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -588,6 +608,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testClickingSettingsDoesNotSetInitialSyncFeatureSetupCompleteWithTangibleSync() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -618,6 +639,7 @@ @Test @LargeTest public void testClickingSettingsThenCancelForChildIsNoOp() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addChildTestAccountThenWaitForSignin(); // Check the user is not consented to sync. CriteriaHelper.pollUiThread(() -> { @@ -662,6 +684,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentWhenSelectedAccountIsRemoved() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.addAccount("test.default.account@gmail.com"); CoreAccountInfo selectedAccountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); @@ -681,6 +704,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testFRESyncConsentFragmentWithoutSelectedAccount() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CustomSyncConsentFirstRunFragment fragment = new CustomSyncConsentFirstRunFragment(); Bundle bundle = new Bundle(); bundle.putBoolean(SyncConsentFirstRunFragment.IS_CHILD_ACCOUNT, false); @@ -697,6 +721,7 @@ @MediumTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentWithDefaultFlow() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var settingsHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.SETTINGS); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_CANCELED, null); @@ -720,6 +745,7 @@ @MediumTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSelectNonDefaultAccountInAccountPickerDialog() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); var bookmarkHistogram = HistogramWatcher.newSingleRecordWatcher( "Signin.SigninStartedAccessPoint", SigninAccessPoint.BOOKMARK_MANAGER); CoreAccountInfo defaultAccountInfo = @@ -747,6 +773,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentAddAccountFlowSucceeded() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_OK, NEW_ACCOUNT_NAME); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() @@ -770,6 +797,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentAddAccountFlowSucceeded() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_OK, NEW_ACCOUNT_NAME); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() @@ -794,6 +822,7 @@ @MediumTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentOnlyEnablesSpecificDataTypes() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -821,6 +850,7 @@ @MediumTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC + ":group_id/6"}) public void testTangibleSyncConsentFragmentGroupFEnablesAllDataTypes() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -849,6 +879,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentAddAccountFlowCancelled() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_CANCELED, null); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() @@ -871,6 +902,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentAddAccountFlowCancelled() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_CANCELED, null); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() @@ -895,6 +927,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentAddAccountFlowFailed() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() .expectIntRecords("Signin.AddAccountState", State.REQUESTED, State.FAILED) @@ -917,6 +950,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentAddAccountFlowFailed() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() .expectIntRecords("Signin.AddAccountState", State.REQUESTED, State.FAILED) @@ -939,6 +973,7 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testSyncConsentFragmentAddAccountFlowReturnedNullAccountName() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); mSigninTestRule.setResultForNextAddAccountFlow(Activity.RESULT_OK, null); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() @@ -961,6 +996,7 @@ @LargeTest @EnableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testTangibleSyncConsentFragmentAddAccountFlowReturnedNullAccountName() { + mChromeActivityTestRule.startMainActivityOnBlankPage(); HistogramWatcher addAccountStateHistogram = HistogramWatcher.newBuilder() .expectIntRecords("Signin.AddAccountState", State.REQUESTED, State.STARTED, @@ -986,6 +1022,8 @@ @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testAutomotiveDevice_deviceLockCreated_syncAcceptedSuccessfully() throws IOException { + mAutoTestRule.setIsAutomotive(true); + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -994,9 +1032,11 @@ mChromeActivityTestRule.getActivity(), SigninAccessPoint.BOOKMARK_MANAGER, accountInfo.getEmail()); }); - mAutoTestRule.setIsAutomotive(true); - // Should display the sync page. + // Should display the sync page, clicking the 'more' button to scroll down if needed. + if (mSyncConsentActivity.findViewById(R.id.more_button).isShown()) { + onView(withId(R.id.more_button)).perform(click()); + } onView(withText(R.string.signin_accept_button)).check(matches(isDisplayed())); onView(withText(R.string.signin_accept_button)).perform(click()); @@ -1029,6 +1069,8 @@ @LargeTest @DisableFeatures({ChromeFeatureList.TANGIBLE_SYNC}) public void testAutomotiveDevice_deviceLockRefused_syncRefused() throws IOException { + mAutoTestRule.setIsAutomotive(true); + mChromeActivityTestRule.startMainActivityOnBlankPage(); CoreAccountInfo accountInfo = mSigninTestRule.addAccount(AccountManagerTestRule.TEST_ACCOUNT_EMAIL); mSyncConsentActivity = ActivityTestUtils.waitForActivity( @@ -1037,9 +1079,11 @@ mChromeActivityTestRule.getActivity(), SigninAccessPoint.BOOKMARK_MANAGER, accountInfo.getEmail()); }); - mAutoTestRule.setIsAutomotive(true); - // Should display the sync page. + // Should display the sync page, clicking the 'more' button to scroll down if needed. + if (mSyncConsentActivity.findViewById(R.id.more_button).isShown()) { + onView(withId(R.id.more_button)).perform(click()); + } onView(withText(R.string.signin_accept_button)).check(matches(isDisplayed())); onView(withText(R.string.signin_accept_button)).perform(click());
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/sync/TypedUrlsTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/sync/TypedUrlsTest.java index 18dee9fb..f4e7afa8 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/sync/TypedUrlsTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/sync/TypedUrlsTest.java
@@ -50,8 +50,6 @@ @Rule public SyncTestRule mSyncTestRule = new SyncTestRule(); - private static final String TAG = "TypedUrlsTest"; - private static final String TYPED_URLS_TYPE = "Typed URLs"; // EmbeddedTestServer is preferred here but it can't be used. The test server
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/tab/InterceptNavigationDelegateTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/tab/InterceptNavigationDelegateTest.java index a42b93a..3ad3aec 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/tab/InterceptNavigationDelegateTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/tab/InterceptNavigationDelegateTest.java
@@ -77,8 +77,6 @@ BASE_PAGE + "navigation_from_user_gesture_to_iframe_page.html"; private static final String NAVIGATION_FROM_PRERENDERING_PAGE = BASE_PAGE + "navigation_from_prerender.html"; - private static final String IFRAME_CONTAINER_PAGE = BASE_PAGE + "iframe_container_page.html"; - private static final String HELLO_PAGE = BASE_PAGE + "hello.html"; private static final long DEFAULT_MAX_TIME_TO_WAIT_IN_MS = 3000; private static final long LONG_MAX_TIME_TO_WAIT_IN_MS = 20000;
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/tab/state/FilePersistedTabDataStorageTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/tab/state/FilePersistedTabDataStorageTest.java index a10e7d7..8c9a9bd 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/tab/state/FilePersistedTabDataStorageTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/tab/state/FilePersistedTabDataStorageTest.java
@@ -55,8 +55,6 @@ private static final byte[] DATA_A = {13, 14}; private static final byte[] DATA_B = {9, 10}; - private static final byte[] DATA_C = {15, 1}; - private static final byte[] DATA_D = {16, 31}; @Rule public TestRule mProcessor = new Features.InstrumentationProcessor();
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerPhoneTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerPhoneTest.java index e5684fd..e755c5f 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerPhoneTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerPhoneTest.java
@@ -37,8 +37,8 @@ import org.chromium.base.test.util.Batch; import org.chromium.base.test.util.CommandLineFlags; -import org.chromium.base.test.util.DisableIf; import org.chromium.base.test.util.DisabledTest; +import org.chromium.base.test.util.Restriction; import org.chromium.base.test.util.UserActionTester; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.flags.ChromeSwitches; @@ -49,7 +49,7 @@ import org.chromium.chrome.test.util.ActivityTestUtils; import org.chromium.components.embedder_support.util.UrlConstants; import org.chromium.content_public.browser.test.util.TestThreadUtils; -import org.chromium.ui.test.util.UiDisableIf; +import org.chromium.ui.test.util.UiRestriction; import org.chromium.ui.test.util.ViewUtils; /** @@ -62,7 +62,7 @@ "enable-features=" + ChromeFeatureList.ADAPTIVE_BUTTON_IN_TOP_TOOLBAR_CUSTOMIZATION_V2 + "<Study", "force-fieldtrials=Study/Group", "force-fieldtrial-params=Study.Group:mode/always-new-tab"}) -@DisableIf.Device(type = {UiDisableIf.TABLET}) +@Restriction({UiRestriction.RESTRICTION_TYPE_PHONE}) public class OptionalNewTabButtonControllerPhoneTest { private static final String TEST_PAGE = "/chrome/test/data/android/navigate/simple.html";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerTabletTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerTabletTest.java index 6b0b1e53..77a64831 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerTabletTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/toolbar/adaptive/OptionalNewTabButtonControllerTabletTest.java
@@ -25,7 +25,7 @@ import org.chromium.base.test.util.Batch; import org.chromium.base.test.util.CommandLineFlags; -import org.chromium.base.test.util.DisableIf; +import org.chromium.base.test.util.Restriction; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.flags.ChromeSwitches; import org.chromium.chrome.test.ChromeJUnit4ClassRunner; @@ -34,7 +34,7 @@ import org.chromium.chrome.test.batch.BlankCTATabInitialStateRule; import org.chromium.chrome.test.util.ActivityTestUtils; import org.chromium.chrome.test.util.browser.Features.EnableFeatures; -import org.chromium.ui.test.util.UiDisableIf; +import org.chromium.ui.test.util.UiRestriction; import org.chromium.ui.test.util.ViewUtils; /** @@ -47,7 +47,7 @@ @CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE, "enable-features=" + ChromeFeatureList.ADAPTIVE_BUTTON_IN_TOP_TOOLBAR + "<Study", "force-fieldtrials=Study/Group", "force-fieldtrial-params=Study.Group:mode/always-new-tab"}) -@DisableIf.Device(type = {UiDisableIf.PHONE}) +@Restriction({UiRestriction.RESTRICTION_TYPE_TABLET}) public class OptionalNewTabButtonControllerTabletTest { private static final String TEST_PAGE = "/chrome/test/data/android/navigate/simple.html";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/vr/util/NativeUiUtils.java b/chrome/android/javatests/src/org/chromium/chrome/browser/vr/util/NativeUiUtils.java index 145c914..8d01c1d 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/vr/util/NativeUiUtils.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/vr/util/NativeUiUtils.java
@@ -60,7 +60,6 @@ // Arbitrary but reasonable amount of time to expect the UI to stop updating after interacting // with an element. - private static final int DEFAULT_UI_QUIESCENCE_TIMEOUT_MS = 2000; /** * Blocks until the specified number of frames have been triggered by the Choreographer.
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/webapps/WebApkUpdateIntegrationTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/webapps/WebApkUpdateIntegrationTest.java index 151f20c..3b5530e 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/webapps/WebApkUpdateIntegrationTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/webapps/WebApkUpdateIntegrationTest.java
@@ -63,20 +63,16 @@ private static final String TAG = "WebApkIntegratTest"; - private static final long STARTUP_TIMEOUT = 15000L; private static final String WEBAPK_PACKAGE_NAME = "org.chromium.webapk.test"; // Android Manifest meta data for {@link WEBAPK_PACKAGE_NAME}. // TODO(eirage): change all to use mTestServer. - private static final String WEBAPK_HOST_NAME = "pwa-directory.appspot.com"; private static final String WEBAPK_MANIFEST_URL = "/chrome/test/data/banners/manifest.json"; private static final String WEBAPK_START_URL = "/chrome/test/data/banners/manifest_test_page.html"; private static final String WEBAPK_SCOPE_URL = "/chrome/test/data/banners/"; private static final String WEBAPK_NAME = "Manifest test app"; private static final String WEBAPK_SHORT_NAME = "Manifest test app"; - private static final String WEBAPK_MANIFEST_ID = "/id"; - private static final String WEBAPK_APP_KEY = "/key"; private static final String ICON_URL = "/chrome/test/data/banners/image-512px.png"; private static final String ICON_MURMUR2_HASH = "7742433188808797392"; private static final String DISPLAY_MODE = "standalone";
diff --git a/chrome/android/javatests/src/org/chromium/chrome/browser/webshare/WebShareTest.java b/chrome/android/javatests/src/org/chromium/chrome/browser/webshare/WebShareTest.java index cc637c7..369d147 100644 --- a/chrome/android/javatests/src/org/chromium/chrome/browser/webshare/WebShareTest.java +++ b/chrome/android/javatests/src/org/chromium/chrome/browser/webshare/WebShareTest.java
@@ -51,10 +51,7 @@ private static final String TEST_FILE = "/content/test/data/android/webshare.html"; private static final String TEST_FILE_APK = "/content/test/data/android/webshare-apk.html"; - private static final String TEST_FILE_BMP = "/content/test/data/android/webshare-bmp.html"; - private static final String TEST_FILE_CSV = "/content/test/data/android/webshare-csv.html"; private static final String TEST_FILE_DEX = "/content/test/data/android/webshare-dex.html"; - private static final String TEST_FILE_OGG = "/content/test/data/android/webshare-ogg.html"; private static final String TEST_FILE_MANY = "/content/test/data/android/webshare-many.html"; private static final String TEST_FILE_LARGE = "/content/test/data/android/webshare-large.html"; private static final String TEST_FILE_SEPARATOR =
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivityUnitTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivityUnitTest.java index ffc806cc..1a16c0d 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivityUnitTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/ChromeBaseAppCompatActivityUnitTest.java
@@ -26,6 +26,7 @@ import org.chromium.base.test.BaseRobolectricTestRunner; import org.chromium.chrome.test.AutomotiveContextWrapperTestRule; +import org.chromium.ui.display.DisplayUtil; /** * Unit tests for {@link ChromeBaseAppCompatActivity}. @@ -77,8 +78,7 @@ config.smallestScreenWidthDp = 0; ChromeBaseAppCompatActivity.applyOverridesForAutomotive(mContext, config); - float automotiveScaleUpFactor = - ChromeBaseAppCompatActivity.getDensityOverrideFactorForAutomotiveDevices(); + float automotiveScaleUpFactor = DisplayUtil.UI_SCALING_FACTOR_FOR_AUTO; assertEquals("Density dpi should be scaled up from the real display metric " + "on automotive.", (int) (MOCK_REAL_DISPLAY_DENSITY_DPI * automotiveScaleUpFactor), config.densityDpi);
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePromptTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePromptTest.java index d01bb78e..8366b1c 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePromptTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/autofill/SaveUpdateAddressProfilePromptTest.java
@@ -34,7 +34,7 @@ import org.chromium.base.test.BaseRobolectricTestRunner; import org.chromium.base.test.util.JniMocker; import org.chromium.chrome.browser.autofill.PersonalDataManager.AutofillProfile; -import org.chromium.chrome.browser.autofill.editors.AddressEditor; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.profiles.Profile; import org.chromium.chrome.test.util.browser.Features; @@ -68,7 +68,7 @@ @Mock private Profile mProfile; @Mock - private AddressEditor mAddressEditor; + private AddressEditorCoordinator mAddressEditor; @Captor private ArgumentCaptor<Callback<AutofillAddress>> mCallbackCaptor;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/bookmarks/BookmarkImageFetcherTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/bookmarks/BookmarkImageFetcherTest.java index db854e8..5d80a98 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/bookmarks/BookmarkImageFetcherTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/bookmarks/BookmarkImageFetcherTest.java
@@ -105,7 +105,6 @@ private final BookmarkId mBookmarkId2 = new BookmarkId(/*id=*/3, BookmarkType.NORMAL); private final BookmarkId mReadingListFolderId = new BookmarkId(/*id=*/5, BookmarkType.READING_LIST); - private final BookmarkId mReadingListId = new BookmarkId(/*id=*/6, BookmarkType.READING_LIST); private final BookmarkItem mFolderItem = new BookmarkItem(mFolderId, "Folder", null, true, null, true, false, 0, false);
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/InstalledWebappGeolocationBridgeTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/InstalledWebappGeolocationBridgeTest.java index 904fe53..b8f1cd2 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/InstalledWebappGeolocationBridgeTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/InstalledWebappGeolocationBridgeTest.java
@@ -44,8 +44,6 @@ @Config(manifest = Config.NONE, shadows = {ShadowGURL.class}) @LooperMode(LooperMode.Mode.LEGACY) public class InstalledWebappGeolocationBridgeTest { - private static final String EXTRA_CALLBACK = "extraCallback"; - private static final long NATIVE_POINTER = 12; private GURL mScope;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/PermissionUpdaterTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/PermissionUpdaterTest.java index 912d0c4f..11a1544a 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/PermissionUpdaterTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/permissiondelegation/PermissionUpdaterTest.java
@@ -40,7 +40,6 @@ private static final Origin ORIGIN = Origin.create("https://www.website.com"); private static final String URL = "https://www.website.com"; private static final String PACKAGE_NAME = "com.package.name"; - private static final String OTHER_PACKAGE_NAME = "com.other.package.name"; @Rule public TestRule mProcessor = new Features.JUnitProcessor();
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/ui/controller/trustedwebactivity/TwaVerifierTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/ui/controller/trustedwebactivity/TwaVerifierTest.java index 320ad74..f80c8d0 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/ui/controller/trustedwebactivity/TwaVerifierTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/browserservices/ui/controller/trustedwebactivity/TwaVerifierTest.java
@@ -52,7 +52,6 @@ private static final String INITIAL_URL = "https://www.initialurl.com/page.html"; private static final String ADDITIONAL_ORIGIN = "https://www.otherverifiedorigin.com"; private static final String OTHER_URL = "https://www.notverifiedurl.com/page2.html"; - private static final String PACKAGE_NAME = "some.package.name"; @Rule public TestRule mFeaturesProcessor = new Features.JUnitProcessor();
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/ScrollingStripStackerUnitTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/ScrollingStripStackerUnitTest.java index 7941c75..a117820c 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/ScrollingStripStackerUnitTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/ScrollingStripStackerUnitTest.java
@@ -22,10 +22,6 @@ private static final float TAB_OFFSET_Y = 2; private static final float TAB_WIDTH = 25; private static final float CACHED_TAB_WIDTH = 30; - private static final float STRIP_WIDTH = 200; - private static final float TAB_OVERLAP = 5; - private static final float STRIP_MARGIN = 2; - private static final float BUTTON_WIDTH = 10; private ScrollingStripStacker mTarget = new ScrollingStripStacker(); @Mock
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/StripLayoutHelperTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/StripLayoutHelperTest.java index fe92a489..1e525c2 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/StripLayoutHelperTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/compositor/overlays/strip/StripLayoutHelperTest.java
@@ -120,7 +120,6 @@ private static final float SCREEN_WIDTH_LANDSCAPE = 1200.f; private static final float SCREEN_HEIGHT = 1600.f; private static final float TAB_WIDTH_1 = 140.f; - private static final float TAB_WIDTH_2 = 160.f; private static final float TAB_WIDTH_SMALL = 108.f; private static final float TAB_OVERLAP_WIDTH = 28.f; private static final float TAB_WIDTH_MEDIUM = 156.f; @@ -130,14 +129,12 @@ private static final float NEW_TAB_BTN_Y = 1400.f; private static final float NEW_TAB_BTN_WIDTH = 100.f; private static final float NEW_TAB_BTN_HEIGHT = 100.f; - private static final float NEW_TAB_BUTTON_WITH_MODEL_SELECTOR_BUTTON_PADDING = 8.f; private static final float BUTTON_END_PADDING_FOLIO = 10.f; private static final float BUTTON_END_PADDING_DETACHED = 9.f; private static final float MODEL_SELECTOR_BUTTON_BG_WIDTH_FOLIO = 36.f; private static final float MODEL_SELECTOR_BUTTON_BG_WIDTH_DETACHED = 38.f; private static final float CLOSE_BTN_VISIBILITY_THRESHOLD_END = 72; - private static final float CLOSE_BTN_VISIBILITY_THRESHOLD_END_MODEL_SELECTOR = 120; private static final float EPSILON = 0.001f;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContextTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContextTest.java index 174a471..84b7b7f0 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContextTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/ContextualSearchContextTest.java
@@ -32,7 +32,6 @@ @RunWith(BaseRobolectricTestRunner.class) public class ContextualSearchContextTest { private static final int INVALID = ContextualSearchContext.INVALID_OFFSET; - private static final String UTF_8 = "UTF-8"; private static final String SAMPLE_TEXT = "Now Barack Obama is not the best example. And Clinton is ambiguous."; private static final String HOME_COUNTRY = "unused";
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/RelatedSearchesStampTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/RelatedSearchesStampTest.java index 39c558d..b3fbe8ad 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/RelatedSearchesStampTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/contextualsearch/RelatedSearchesStampTest.java
@@ -47,7 +47,6 @@ private static final String RELATED_SEARCHES_CONTENT_EXPERIMENT = "c"; private static final String RELATED_SEARCHES_BOTH_EXPERIMENT = "b"; private static final String RELATED_SEARCHES_LANGUAGE_RESTRICTION = "l"; - private static final String RELATED_SEARCHES_DARK_LAUNCH = "d"; private static final String RELATED_SEARCHES_USER_INTERACTION = "U"; private static final String RELATED_SEARCHES_SELECTED_POSITION = "p";
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabStatusBarColorProviderTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabStatusBarColorProviderTest.java index 752e69c8..862fc6c 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabStatusBarColorProviderTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabStatusBarColorProviderTest.java
@@ -32,8 +32,6 @@ @RunWith(BaseRobolectricTestRunner.class) @Config(manifest = Config.NONE) public class CustomTabStatusBarColorProviderTest { - private static final int DEFAULT_COLOR = 0x11223344; - private static final int FALLBACK_COLOR = 0x55667788; private static final int USER_PROVIDED_COLOR = 0x99aabbcc; @Mock public CustomTabIntentDataProvider mCustomTabIntentDataProvider;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabsConnectionUnitTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabsConnectionUnitTest.java index 7297bab..6bfe6a4 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabsConnectionUnitTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/customtabs/CustomTabsConnectionUnitTest.java
@@ -27,7 +27,6 @@ import android.app.PendingIntent; import android.content.Intent; import android.os.Bundle; -import android.os.Process; import androidx.browser.customtabs.CustomTabsCallback; import androidx.browser.customtabs.CustomTabsSessionToken; @@ -85,7 +84,6 @@ new ArrayList<String>(List.of(ChromeFeatureList.CCT_REAL_TIME_ENGAGEMENT_SIGNALS, ChromeFeatureList.CCT_BRAND_TRANSPARENCY)); private CustomTabsConnection mConnection; - private int mUid = Process.myUid(); @Implements(UmaSessionStats.class) public static class ShadowUmaSessionStats {
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/init/AsyncInitTaskRunnerTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/init/AsyncInitTaskRunnerTest.java index 51e348d..88e4fa3c 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/init/AsyncInitTaskRunnerTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/init/AsyncInitTaskRunnerTest.java
@@ -42,8 +42,6 @@ @Config(manifest = Config.NONE, shadows = {ShadowAsyncTask.class}) @LooperMode(LooperMode.Mode.LEGACY) public class AsyncInitTaskRunnerTest { - private static final int THREAD_WAIT_TIME_MS = 1000; - private LibraryLoader mLoader; private AsyncInitTaskRunner mRunner; private CountDownLatch mLatch;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/media/ui/MediaNotificationActionsUpdatedTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/media/ui/MediaNotificationActionsUpdatedTest.java index cdb3624..ba81ec3 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/media/ui/MediaNotificationActionsUpdatedTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/media/ui/MediaNotificationActionsUpdatedTest.java
@@ -35,7 +35,6 @@ sdk = Build.VERSION_CODES.N_MR1, shadows = {MediaNotificationTestShadowResources.class}) public class MediaNotificationActionsUpdatedTest extends MediaNotificationTestBase { private static final int TAB_ID_1 = 1; - private static final int TAB_ID_2 = 2; private static final int THROTTLE_MILLIS = MediaNotificationController.Throttler.THROTTLE_MILLIS;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/multiwindow/MultiInstanceManagerApi31UnitTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/multiwindow/MultiInstanceManagerApi31UnitTest.java index 10b213e..b1d5c83 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/multiwindow/MultiInstanceManagerApi31UnitTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/multiwindow/MultiInstanceManagerApi31UnitTest.java
@@ -101,12 +101,8 @@ private static final int INVALID_INSTANCE_ID = MultiInstanceManagerApi31.INVALID_INSTANCE_ID; private static final int INSTANCE_ID_1 = 1; - private static final int PASSED_ID_1 = 1; private static final int PASSED_ID_2 = 2; - private static final int PASSED_ID_3 = 3; - private static final int PASSED_ID_4 = 4; private static final int PASSED_ID_INVALID = INVALID_INSTANCE_ID; - private static final int SAVED_ID_INVALID = INVALID_INSTANCE_ID; private static final int TASK_ID_56 = 56; private static final int TASK_ID_57 = 57; private static final int TASK_ID_58 = 58;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/omaha/metrics/UpdateSuccessMetricsTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/omaha/metrics/UpdateSuccessMetricsTest.java index f7a36a4fa..83539f9 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/omaha/metrics/UpdateSuccessMetricsTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/omaha/metrics/UpdateSuccessMetricsTest.java
@@ -42,12 +42,8 @@ @RunWith(BaseRobolectricTestRunner.class) @Config(manifest = Config.NONE) public class UpdateSuccessMetricsTest { - private static final int FAILED = 0; - private static final int SUCCESS = 1; - private static final int NOT_UPDATING = 0; private static final int UPDATING = 1; - private static final String NOT_CURRENT_VERSION = "---"; @Mock private TrackingProvider mProvider;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/suggestions/tile/TileRendererTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/suggestions/tile/TileRendererTest.java index d0f8e05..2c41ade 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/suggestions/tile/TileRendererTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/suggestions/tile/TileRendererTest.java
@@ -15,7 +15,6 @@ import android.content.res.ColorStateList; import android.graphics.Bitmap; import android.graphics.drawable.BitmapDrawable; -import android.graphics.drawable.Drawable; import android.widget.LinearLayout; import androidx.test.filters.SmallTest; @@ -119,7 +118,6 @@ private ShadowPostTaskImpl mPostTaskRunner; private Activity mActivity; private LinearLayout mSharedParent; - private final ArgumentCaptor<Drawable> mIconCaptor = ArgumentCaptor.forClass(Drawable.class); private final ArgumentCaptor<LargeIconCallback> mImageFetcherCallbackCaptor = ArgumentCaptor.forClass(LargeIconCallback.class);
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/tasks/JourneyManagerTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/tasks/JourneyManagerTest.java index 34d2ddf..598f32e 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/tasks/JourneyManagerTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/tasks/JourneyManagerTest.java
@@ -48,11 +48,9 @@ @LooperMode(LooperMode.Mode.LEGACY) public final class JourneyManagerTest { private static final int LAST_ENGAGEMENT_ELAPSED_MS = 5000; - private static final int LAST_ENGAGEMENT_ELAPSED_S = 5; private static final int TAB_ID = 123; private static final long BASE_TIME_MS = 1000000L; private static final long NO_TIME_MS = 0L; - private static final long DEFER_TIME_MS = 10L; @Mock private TabModel mTabModel;
diff --git a/chrome/android/junit/src/org/chromium/chrome/browser/webapps/WebApkInstallBroadcastReceiverTest.java b/chrome/android/junit/src/org/chromium/chrome/browser/webapps/WebApkInstallBroadcastReceiverTest.java index 4bf4f10..1761e90 100644 --- a/chrome/android/junit/src/org/chromium/chrome/browser/webapps/WebApkInstallBroadcastReceiverTest.java +++ b/chrome/android/junit/src/org/chromium/chrome/browser/webapps/WebApkInstallBroadcastReceiverTest.java
@@ -51,7 +51,6 @@ @Features.EnableFeatures({ChromeFeatureList.WEB_APK_INSTALL_FAILURE_NOTIFICATION, ChromeFeatureList.WEB_APK_INSTALL_RETRY}) public class WebApkInstallBroadcastReceiverTest { - private static final String PACKAGE_NAME = "org.chromium.webapk.for.testing"; private static final String MANIFEST_URL = "https://test.com/manifest.json"; private static final String SHORT_NAME = "webapk"; private static final String URL = "https://test.com";
diff --git a/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/HostBrowserUtils.java b/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/HostBrowserUtils.java index 333dde6..d7df4af 100644 --- a/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/HostBrowserUtils.java +++ b/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/HostBrowserUtils.java
@@ -32,8 +32,6 @@ private static final String VERSION_NAME_DEVELOPER_BUILD = "Developer Build"; - private static final String TAG = "cr_HostBrowserUtils"; - public static String ARC_INTENT_HELPER_BROWSER = "org.chromium.arc.intent_helper"; public static String ARC_WEBAPK_BROWSER = "org.chromium.arc.webapk";
diff --git a/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/WebApkUtils.java b/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/WebApkUtils.java index 0f9612d..18b5cea 100644 --- a/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/WebApkUtils.java +++ b/chrome/android/webapk/shell_apk/src/org/chromium/webapk/shell_apk/WebApkUtils.java
@@ -41,8 +41,6 @@ * Contains utility methods for interacting with WebAPKs. */ public class WebApkUtils { - private static final String TAG = "cr_WebApkUtils"; - private static final float CONTRAST_LIGHT_ITEM_THRESHOLD = 3f; /** Returns whether the application is installed and enabled. */
diff --git a/chrome/app/chrome_command_ids.h b/chrome/app/chrome_command_ids.h index 6f079b6..b8cc4be0 100644 --- a/chrome/app/chrome_command_ids.h +++ b/chrome/app/chrome_command_ids.h
@@ -163,6 +163,11 @@ // Save/Share sub menu #define IDC_SAVE_AND_SHARE_MENU 37300 +// Profile sub menu +#define IDC_CUSTOMIZE_CHROME 37350 +#define IDC_CLOSE_PROFILE 35351 +#define IDC_MANAGE_GOOGLE_ACCOUNT 35352 + // Zoom #define IDC_ZOOM_MENU 38000 #define IDC_ZOOM_PLUS 38001
diff --git a/chrome/app/generated_resources.grd b/chrome/app/generated_resources.grd index 6fbca56..a0f5b79 100644 --- a/chrome/app/generated_resources.grd +++ b/chrome/app/generated_resources.grd
@@ -1249,6 +1249,15 @@ <message name="IDS_SEARCH_TABS" desc="The text label of the Search Tabs... menu item"> Search tabs... </message> + <message name="IDS_CUSTOMIZE_CHROME" desc="The text label of the Customize Chrome menu item"> + C&ustomize your Chrome + </message> + <message name="IDS_CLOSE_PROFILE" desc="The text label of the Close profile menu item with one or more windows currently opened [ICU Syntax]"> + {NUM_PROFILES, plural, =1 {&Close this profile} other {&Close this profile (# windows)}} + </message> + <message name="IDS_MANAGE_GOOGLE_ACCOUNT" desc="The text label of the Manage Google account item"> + Manage your &Google account + </message> </if> <if expr="use_titlecase"> <message name="IDS_ADDRESSES_AND_MORE_SUBMENU_OPTION" desc="In Title Case: The text label of the addresses and more item for the passwords and autofill submenu"> @@ -1353,6 +1362,15 @@ <message name="IDS_SEARCH_TABS" desc="In Title Case:The text label of the Search Tabs... menu item"> Search Tabs... </message> + <message name="IDS_CUSTOMIZE_CHROME" desc="In Title Case: The text label of the Customize Chrome menu item"> + C&ustomize Your Chrome + </message> + <message name="IDS_CLOSE_PROFILE" desc="In Title Case: The text label of the Close profile menu item with one or more windows currently opened [ICU Syntax]"> + {NUM_PROFILES, plural, =1 {&Close This Profile} other {&Close This Profile (# Windows)}} + </message> + <message name="IDS_MANAGE_GOOGLE_ACCOUNT" desc="In Title Case: The text label of the Manage Google account item"> + Manage Your &Google Account + </message> </if> </if> @@ -7843,7 +7861,7 @@ There are no bookmarks that match your search. </message> <message name="IDS_BOOKMARK_FOLDER_CHILD_COUNT" desc="Text description for the number of children of a bookmark folder"> - {NUM_BOOKMARKS, plural, =1 {1 bookmark} other {# bookmarks}} + (<ph name="COUNT">$1<ex>1</ex></ph>) </message> <message name="IDS_BOOKMARKS_EDIT_BOOKMARK" desc="Title for the bookmarks side panel edit dialog when full editing is available"> Edit Bookmark @@ -14905,7 +14923,7 @@ Use your <ph name="IDENTITY_PROVIDER_ETLD_PLUS_ONE">$1<ex>idp.example</ex></ph> account to sign in to <ph name="SITE_ETLD_PLUS_ONE">$2<ex>rp.example</ex></ph> </message> <message name="IDS_IDP_SIGNIN_STATUS_MISMATCH_DIALOG_DESCRIPTION" desc="Describes the sign in process." translateable="false"> - No more passwords to remember. Signing in is fast, simple and secure. + Signing in is fast, simple and secure. </message> <message name="IDS_MULTI_IDP_ACCOUNT_SELECTION_SHEET_TITLE_EXPLICIT" desc="Header for sign in sheet when there are multiple identity providers. Sheet is shown to prompt user for sign in consent." translateable="false"> Sign in to <ph name="SITE_ETLD_PLUS_ONE">$1<ex>rp.example</ex></ph>
diff --git a/chrome/app/generated_resources_grd/IDS_BOOKMARK_FOLDER_CHILD_COUNT.png.sha1 b/chrome/app/generated_resources_grd/IDS_BOOKMARK_FOLDER_CHILD_COUNT.png.sha1 index 9f51b0c..97bd4facb 100644 --- a/chrome/app/generated_resources_grd/IDS_BOOKMARK_FOLDER_CHILD_COUNT.png.sha1 +++ b/chrome/app/generated_resources_grd/IDS_BOOKMARK_FOLDER_CHILD_COUNT.png.sha1
@@ -1 +1 @@ -083f75c3e30a99d2f8f4f1f0465bd555d9ecced2 \ No newline at end of file +c0ea87f6f19152f5d2367061b60d83d1fffcd975 \ No newline at end of file
diff --git a/chrome/app/generated_resources_grd/IDS_CLOSE_PROFILE.png.sha1 b/chrome/app/generated_resources_grd/IDS_CLOSE_PROFILE.png.sha1 new file mode 100644 index 0000000..c75bfcad --- /dev/null +++ b/chrome/app/generated_resources_grd/IDS_CLOSE_PROFILE.png.sha1
@@ -0,0 +1 @@ +c2e89d610faa57d2a40b73660af353114a82db21 \ No newline at end of file
diff --git a/chrome/app/generated_resources_grd/IDS_CUSTOMIZE_CHROME.png.sha1 b/chrome/app/generated_resources_grd/IDS_CUSTOMIZE_CHROME.png.sha1 new file mode 100644 index 0000000..4c4ff18 --- /dev/null +++ b/chrome/app/generated_resources_grd/IDS_CUSTOMIZE_CHROME.png.sha1
@@ -0,0 +1 @@ +8b375ced377c641a49d14d4c9f4624fab1e2e664 \ No newline at end of file
diff --git a/chrome/app/generated_resources_grd/IDS_MANAGE_GOOGLE_ACCOUNT.png.sha1 b/chrome/app/generated_resources_grd/IDS_MANAGE_GOOGLE_ACCOUNT.png.sha1 new file mode 100644 index 0000000..4c4ff18 --- /dev/null +++ b/chrome/app/generated_resources_grd/IDS_MANAGE_GOOGLE_ACCOUNT.png.sha1
@@ -0,0 +1 @@ +8b375ced377c641a49d14d4c9f4624fab1e2e664 \ No newline at end of file
diff --git a/chrome/app/os_settings_strings.grdp b/chrome/app/os_settings_strings.grdp index 6bdc321b..423099a 100644 --- a/chrome/app/os_settings_strings.grdp +++ b/chrome/app/os_settings_strings.grdp
@@ -1109,15 +1109,6 @@ <message name="IDS_SETTINGS_GREYSCALE_LABEL" desc="Label for slider which controls greyscale UI filter."> Greyscale </message> - <message name="IDS_SETTINGS_SEPIA_LABEL" desc="Label for slider which controls sepia UI filter."> - Sepia - </message> - <message name="IDS_SETTINGS_SATURATION_LABEL" desc="Label for slider which controls saturation filter UI."> - Saturation - </message> - <message name="IDS_SETTINGS_HUE_ROTATION_LABEL" desc="Label for slider which controls hue rotation filter UI."> - Hue rotation - </message> <message name="IDS_SETTINGS_PROTANOMALY_FILTER" desc="Label for a setting drop-down menu option for a color filter that helps people with protanomaly."> Red-green filter (red weak, protanomaly) </message>
diff --git a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_HUE_ROTATION_LABEL.png.sha1 b/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_HUE_ROTATION_LABEL.png.sha1 deleted file mode 100644 index 3a978cf..0000000 --- a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_HUE_ROTATION_LABEL.png.sha1 +++ /dev/null
@@ -1 +0,0 @@ -4cf145c9db63c72a2822a1f441779df1f27b0696 \ No newline at end of file
diff --git a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SATURATION_LABEL.png.sha1 b/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SATURATION_LABEL.png.sha1 deleted file mode 100644 index 3a978cf..0000000 --- a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SATURATION_LABEL.png.sha1 +++ /dev/null
@@ -1 +0,0 @@ -4cf145c9db63c72a2822a1f441779df1f27b0696 \ No newline at end of file
diff --git a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SEPIA_LABEL.png.sha1 b/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SEPIA_LABEL.png.sha1 deleted file mode 100644 index 3a978cf..0000000 --- a/chrome/app/os_settings_strings_grdp/IDS_SETTINGS_SEPIA_LABEL.png.sha1 +++ /dev/null
@@ -1 +0,0 @@ -4cf145c9db63c72a2822a1f441779df1f27b0696 \ No newline at end of file
diff --git a/chrome/app/vector_icons/BUILD.gn b/chrome/app/vector_icons/BUILD.gn index 1257c09..919626372 100644 --- a/chrome/app/vector_icons/BUILD.gn +++ b/chrome/app/vector_icons/BUILD.gn
@@ -17,6 +17,7 @@ "account_child_circle.icon", "account_circle.icon", "account_circle_chrome_refresh.icon", + "account_manage_chrome_refresh.icon", "add.icon", "apps.icon", "autofill/local_offer_flipped.icon",
diff --git a/chrome/app/vector_icons/account_manage_chrome_refresh.icon b/chrome/app/vector_icons/account_manage_chrome_refresh.icon new file mode 100644 index 0000000..1c5c34b2 --- /dev/null +++ b/chrome/app/vector_icons/account_manage_chrome_refresh.icon
@@ -0,0 +1,246 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +CANVAS_DIMENSIONS, 24, +MOVE_TO, 10, 11.9f, +R_CUBIC_TO, -1.16f, 0, -2.15f, -0.41f, -2.97f, -1.23f, +R_CUBIC_TO, -0.82f, -0.82f, -1.23f, -1.81f, -1.23f, -2.97f, +R_CUBIC_TO, 0, -1.16f, 0.41f, -2.15f, 1.23f, -2.97f, +R_CUBIC_TO, 0.83f, -0.82f, 1.81f, -1.23f, 2.97f, -1.23f, +R_CUBIC_TO, 1.16f, 0, 2.15f, 0.41f, 2.97f, 1.23f, +R_CUBIC_TO, 0.82f, 0.82f, 1.23f, 1.81f, 1.23f, 2.97f, +R_CUBIC_TO, 0, 1.16f, -0.41f, 2.15f, -1.23f, 2.97f, +R_CUBIC_TO, -0.82f, 0.82f, -1.81f, 1.23f, -2.97f, 1.23f, +CLOSE, +R_MOVE_TO, -8.2f, 8.41f, +R_V_LINE_TO, -3.02f, +R_CUBIC_TO, 0, -0.61f, 0.16f, -1.17f, 0.48f, -1.68f, +R_CUBIC_TO, 0.33f, -0.51f, 0.74f, -0.89f, 1.24f, -1.14f, +R_CUBIC_TO, 0.85f, -0.43f, 1.82f, -0.8f, 2.92f, -1.11f, +R_CUBIC_TO, 1.1f, -0.3f, 2.29f, -0.46f, 3.57f, -0.46f, +R_H_LINE_TO, 0.4f, +R_CUBIC_TO, 0.13f, 0, 0.26f, 0.02f, 0.38f, 0.05f, +R_CUBIC_TO, -0.14f, 0.3f, -0.27f, 0.65f, -0.39f, 1.06f, +R_CUBIC_TO, -0.12f, 0.41f, -0.21f, 0.8f, -0.27f, 1.17f, +R_H_LINE_TO, -0.1f, +R_CUBIC_TO, -1.15f, 0, -2.2f, 0.15f, -3.14f, 0.44f, +R_CUBIC_TO, -0.94f, 0.29f, -1.71f, 0.58f, -2.31f, 0.88f, +R_ARC_TO, 0.97f, 0.97f, 0, 0, 0, -0.36f, 0.34f, +R_ARC_TO, 0.89f, 0.89f, 0, 0, 0, -0.14f, 0.49f, +R_V_LINE_TO, 0.71f, +R_H_LINE_TO, 6.22f, +R_CUBIC_TO, 0.11f, 0.39f, 0.26f, 0.79f, 0.47f, 1.19f, +R_CUBIC_TO, 0.2f, 0.4f, 0.42f, 0.76f, 0.65f, 1.08f, +CLOSE, +R_MOVE_TO, 14.29f, 0.9f, +R_LINE_TO, -0.3f, -1.53f, +R_ARC_TO, 4.88f, 4.88f, 0, 0, 1, -1.03f, -0.56f, +R_LINE_TO, -1.48f, 0.46f, +R_LINE_TO, -1.07f, -1.83f, +R_LINE_TO, 1.17f, -1.01f, +R_ARC_TO, 3.39f, 3.39f, 0, 0, 1, -0.05f, -0.61f, +R_CUBIC_TO, 0, -0.19f, 0.02f, -0.4f, 0.05f, -0.61f, +LINE_TO, 12.2f, 14.48f, +R_LINE_TO, 1.07f, -1.83f, +R_LINE_TO, 1.48f, 0.47f, +R_ARC_TO, 4.88f, 4.88f, 0, 0, 1, 0.51f, -0.32f, +R_CUBIC_TO, 0.17f, -0.09f, 0.34f, -0.17f, 0.53f, -0.25f, +R_LINE_TO, 0.31f, -1.53f, +R_H_LINE_TO, 2.15f, +R_LINE_TO, 0.31f, 1.53f, +R_ARC_TO, 3.92f, 3.92f, 0, 0, 1, 1.04f, 0.61f, +R_LINE_TO, 1.47f, -0.51f, +R_LINE_TO, 1.08f, 1.88f, +R_LINE_TO, -1.18f, 1.02f, +R_CUBIC_TO, 0.04f, 0.18f, 0.06f, 0.38f, 0.06f, 0.59f, +R_CUBIC_TO, 0, 0.21f, -0.02f, 0.4f, -0.05f, 0.59f, +R_LINE_TO, 1.18f, 1.01f, +R_LINE_TO, -1.08f, 1.83f, +R_LINE_TO, -1.47f, -0.46f, +R_ARC_TO, 4.92f, 4.92f, 0, 0, 1, -1.04f, 0.56f, +R_LINE_TO, -0.31f, 1.53f, +CLOSE, +R_MOVE_TO, 1.08f, -3.14f, +R_CUBIC_TO, 0.54f, 0, 0.99f, -0.19f, 1.38f, -0.58f, +R_CUBIC_TO, 0.38f, -0.39f, 0.58f, -0.85f, 0.58f, -1.38f, +R_CUBIC_TO, 0, -0.53f, -0.19f, -1, -0.58f, -1.38f, +R_ARC_TO, 1.87f, 1.87f, 0, 0, 0, -1.37f, -0.57f, +R_CUBIC_TO, -0.53f, 0, -1, 0.19f, -1.38f, 0.57f, +R_ARC_TO, 1.87f, 1.87f, 0, 0, 0, -0.58f, 1.38f, +R_CUBIC_TO, 0, 0.53f, 0.19f, 0.99f, 0.58f, 1.38f, +R_CUBIC_TO, 0.38f, 0.38f, 0.84f, 0.58f, 1.38f, 0.58f, +CLOSE, +MOVE_TO, 10, 9.63f, +R_ARC_TO, 1.87f, 1.87f, 0, 0, 0, 1.36f, -0.57f, +R_ARC_TO, 1.87f, 1.87f, 0, 0, 0, 0.57f, -1.36f, +R_CUBIC_TO, 0, -0.53f, -0.19f, -0.98f, -0.57f, -1.36f, +ARC_TO, 1.87f, 1.87f, 0, 0, 0, 10, 5.77f, +R_CUBIC_TO, -0.53f, 0, -0.98f, 0.19f, -1.36f, 0.56f, +R_ARC_TO, 1.87f, 1.87f, 0, 0, 0, -0.57f, 1.36f, +R_CUBIC_TO, 0, 0.53f, 0.19f, 0.98f, 0.57f, 1.36f, +ARC_TO, 1.87f, 1.87f, 0, 0, 0, 10, 9.63f, +CLOSE + +CANVAS_DIMENSIONS, 20, +MOVE_TO, 8, 9.91f, +R_CUBIC_TO, -0.88f, 0, -1.63f, -0.3f, -2.25f, -0.92f, +R_CUBIC_TO, -0.62f, -0.61f, -0.92f, -1.36f, -0.92f, -2.25f, +R_CUBIC_TO, 0, -0.89f, 0.31f, -1.63f, 0.92f, -2.25f, +R_CUBIC_TO, 0.61f, -0.61f, 1.36f, -0.92f, 2.25f, -0.92f, +R_CUBIC_TO, 0.89f, 0, 1.63f, 0.31f, 2.25f, 0.92f, +R_CUBIC_TO, 0.62f, 0.61f, 0.92f, 1.36f, 0.92f, 2.25f, +R_CUBIC_TO, 0, 0.89f, -0.3f, 1.63f, -0.92f, 2.25f, +R_CUBIC_TO, -0.61f, 0.62f, -1.36f, 0.92f, -2.25f, 0.92f, +CLOSE, +R_MOVE_TO, -6.17f, 6.34f, +R_V_LINE_TO, -2.1f, +R_CUBIC_TO, 0, -0.41f, 0.1f, -0.78f, 0.31f, -1.1f, +R_CUBIC_TO, 0.21f, -0.32f, 0.46f, -0.57f, 0.77f, -0.75f, +R_ARC_TO, 10.31f, 10.31f, 0, 0, 1, 2.45f, -1.03f, +R_ARC_TO, 9.81f, 9.81f, 0, 0, 1, 3.11f, -0.35f, +R_CUBIC_TO, 0.16f, 0.01f, 0.32f, 0.02f, 0.49f, 0.03f, +R_ARC_TO, 4.93f, 4.93f, 0, 0, 0, -0.27f, 0.83f, +R_CUBIC_TO, -0.07f, 0.32f, -0.12f, 0.61f, -0.16f, 0.88f, +R_LINE_TO, -0.52f, -0.02f, +R_CUBIC_TO, -0.72f, 0, -1.44f, 0.09f, -2.16f, 0.28f, +R_ARC_TO, 7.96f, 7.96f, 0, 0, 0, -2.06f, 0.86f, +R_ARC_TO, 0.48f, 0.48f, 0, 0, 0, -0.17f, 0.16f, +R_ARC_TO, 0.48f, 0.48f, 0, 0, 0, -0.06f, 0.24f, +R_V_LINE_TO, 0.34f, +R_H_LINE_TO, 5.16f, +R_CUBIC_TO, 0.07f, 0.3f, 0.18f, 0.59f, 0.33f, 0.9f, +R_CUBIC_TO, 0.14f, 0.3f, 0.3f, 0.58f, 0.48f, 0.83f, +CLOSE, +R_MOVE_TO, 11.56f, 0.91f, +R_LINE_TO, -0.25f, -1.19f, +R_ARC_TO, 3.15f, 3.15f, 0, 0, 1, -0.99f, -0.55f, +R_LINE_TO, -1.17f, 0.36f, +R_LINE_TO, -0.73f, -1.26f, +R_LINE_TO, 0.87f, -0.85f, +R_ARC_TO, 1.6f, 1.6f, 0, 0, 1, -0.07f, -0.57f, +R_CUBIC_TO, 0.01f, -0.2f, 0.04f, -0.39f, 0.07f, -0.57f, +R_LINE_TO, -0.87f, -0.84f, +R_LINE_TO, 0.73f, -1.27f, +R_LINE_TO, 1.15f, 0.34f, +R_CUBIC_TO, 0.14f, -0.13f, 0.3f, -0.25f, 0.48f, -0.35f, +R_CUBIC_TO, 0.17f, -0.1f, 0.35f, -0.18f, 0.53f, -0.23f, +R_LINE_TO, 0.27f, -1.19f, +R_H_LINE_TO, 1.46f, +R_LINE_TO, 0.27f, 1.19f, +R_CUBIC_TO, 0.18f, 0.06f, 0.36f, 0.14f, 0.54f, 0.23f, +R_CUBIC_TO, 0.18f, 0.09f, 0.34f, 0.21f, 0.48f, 0.34f, +R_LINE_TO, 1.15f, -0.32f, +R_LINE_TO, 0.73f, 1.25f, +R_LINE_TO, -0.86f, 0.81f, +R_CUBIC_TO, 0.03f, 0.2f, 0.05f, 0.39f, 0.04f, 0.59f, +R_CUBIC_TO, 0, 0.2f, -0.02f, 0.39f, -0.06f, 0.57f, +R_LINE_TO, 0.88f, 0.83f, +R_LINE_TO, -0.73f, 1.26f, +R_LINE_TO, -1.17f, -0.34f, +R_CUBIC_TO, -0.14f, 0.11f, -0.3f, 0.22f, -0.46f, 0.32f, +R_CUBIC_TO, -0.17f, 0.1f, -0.34f, 0.18f, -0.53f, 0.23f, +R_LINE_TO, -0.3f, 1.19f, +CLOSE, +R_MOVE_TO, 0.76f, -2.61f, +R_CUBIC_TO, 0.4f, 0, 0.74f, -0.14f, 1.03f, -0.43f, +R_CUBIC_TO, 0.28f, -0.29f, 0.43f, -0.64f, 0.43f, -1.03f, +R_CUBIC_TO, 0, -0.4f, -0.14f, -0.74f, -0.43f, -1.03f, +R_ARC_TO, 1.42f, 1.42f, 0, 0, 0, -1.03f, -0.43f, +R_CUBIC_TO, -0.4f, 0, -0.75f, 0.14f, -1.03f, 0.43f, +R_CUBIC_TO, -0.29f, 0.28f, -0.43f, 0.63f, -0.43f, 1.03f, +R_CUBIC_TO, 0, 0.4f, 0.14f, 0.75f, 0.43f, 1.03f, +R_CUBIC_TO, 0.29f, 0.28f, 0.63f, 0.43f, 1.04f, 0.43f, +CLOSE, +MOVE_TO, 8, 8.19f, +R_CUBIC_TO, 0.4f, 0, 0.73f, -0.14f, 1.02f, -0.43f, +R_CUBIC_TO, 0.28f, -0.28f, 0.42f, -0.62f, 0.42f, -1.02f, +R_CUBIC_TO, 0, -0.4f, -0.14f, -0.73f, -0.42f, -1.01f, +R_ARC_TO, 1.4f, 1.4f, 0, 0, 0, -1.02f, -0.41f, +R_CUBIC_TO, -0.4f, 0, -0.73f, 0.14f, -1.02f, 0.42f, +ARC_TO, 1.39f, 1.39f, 0, 0, 0, 6.56f, 6.75f, +R_CUBIC_TO, 0, 0.4f, 0.14f, 0.73f, 0.43f, 1.02f, +R_CUBIC_TO, 0.28f, 0.28f, 0.62f, 0.42f, 1.02f, 0.42f, +CLOSE + +CANVAS_DIMENSIONS, 16, +MOVE_TO, 6.4f, 7.84f, +R_CUBIC_TO, -0.76f, 0, -1.4f, -0.26f, -1.93f, -0.79f, +R_CUBIC_TO, -0.53f, -0.53f, -0.79f, -1.17f, -0.79f, -1.93f, +R_CUBIC_TO, 0, -0.76f, 0.26f, -1.41f, 0.79f, -1.93f, +R_CUBIC_TO, 0.53f, -0.52f, 1.17f, -0.78f, 1.93f, -0.78f, +R_CUBIC_TO, 0.76f, 0, 1.41f, 0.26f, 1.93f, 0.79f, +R_CUBIC_TO, 0.52f, 0.52f, 0.79f, 1.16f, 0.79f, 1.93f, +R_CUBIC_TO, 0, 0.76f, -0.27f, 1.4f, -0.79f, 1.93f, +R_CUBIC_TO, -0.53f, 0.53f, -1.17f, 0.79f, -1.93f, 0.79f, +CLOSE, +MOVE_TO, 1.28f, 13.28f, +R_V_LINE_TO, -1.87f, +R_CUBIC_TO, 0, -0.38f, 0.1f, -0.71f, 0.3f, -1, +R_CUBIC_TO, 0.2f, -0.28f, 0.42f, -0.5f, 0.66f, -0.64f, +R_ARC_TO, 8.33f, 8.33f, 0, 0, 1, 2, -0.83f, +R_ARC_TO, 8.1f, 8.1f, 0, 0, 1, 2.15f, -0.3f, +R_CUBIC_TO, 0.13f, 0, 0.27f, 0, 0.42f, 0.01f, +R_CUBIC_TO, 0.16f, 0, 0.31f, 0.01f, 0.46f, 0.02f, +R_CUBIC_TO, -0.09f, 0.19f, -0.18f, 0.45f, -0.26f, 0.77f, +R_ARC_TO, 7.76f, 7.76f, 0, 0, 0, -0.18f, 0.84f, +R_LINE_TO, -0.41f, -0.02f, +R_CUBIC_TO, -0.55f, 0, -1.11f, 0.07f, -1.68f, 0.21f, +R_ARC_TO, 5.97f, 5.97f, 0, 0, 0, -1.64f, 0.67f, +R_ARC_TO, 0.36f, 0.36f, 0, 0, 0, -0.13f, 0.12f, +R_ARC_TO, 0.31f, 0.31f, 0, 0, 0, -0.05f, 0.18f, +R_V_LINE_TO, 0.19f, +R_H_LINE_TO, 4.05f, +R_CUBIC_TO, 0.07f, 0.27f, 0.18f, 0.56f, 0.32f, 0.86f, +R_CUBIC_TO, 0.15f, 0.3f, 0.31f, 0.56f, 0.47f, 0.78f, +CLOSE, +R_MOVE_TO, 9.51f, 0.64f, +R_LINE_TO, -0.21f, -0.98f, +R_ARC_TO, 1.91f, 1.91f, 0, 0, 1, -0.39f, -0.18f, +R_ARC_TO, 4.04f, 4.04f, 0, 0, 1, -0.34f, -0.23f, +R_LINE_TO, -0.96f, 0.3f, +R_LINE_TO, -0.64f, -1.12f, +R_LINE_TO, 0.72f, -0.69f, +R_ARC_TO, 0.96f, 0.96f, 0, 0, 1, -0.07f, -0.42f, +R_ARC_TO, 2.17f, 2.17f, 0, 0, 1, 0.07f, -0.42f, +R_LINE_TO, -0.72f, -0.69f, +R_LINE_TO, 0.65f, -1.13f, +R_LINE_TO, 0.95f, 0.28f, +R_CUBIC_TO, 0.1f, -0.09f, 0.22f, -0.18f, 0.35f, -0.26f, +R_CUBIC_TO, 0.13f, -0.08f, 0.27f, -0.13f, 0.4f, -0.17f, +R_LINE_TO, 0.23f, -0.98f, +R_H_LINE_TO, 1.3f, +R_LINE_TO, 0.23f, 0.98f, +R_CUBIC_TO, 0.13f, 0.04f, 0.26f, 0.1f, 0.4f, 0.17f, +R_CUBIC_TO, 0.14f, 0.07f, 0.25f, 0.16f, 0.36f, 0.26f, +R_LINE_TO, 0.94f, -0.27f, +R_LINE_TO, 0.65f, 1.11f, +R_LINE_TO, -0.71f, 0.67f, +R_ARC_TO, 2, 2, 0, 0, 1, -0.02f, 0.86f, +R_LINE_TO, 0.73f, 0.68f, +R_LINE_TO, -0.65f, 1.12f, +R_LINE_TO, -0.96f, -0.28f, +R_ARC_TO, 4.81f, 4.81f, 0, 0, 1, -0.35f, 0.24f, +R_ARC_TO, 1.53f, 1.53f, 0, 0, 1, -0.39f, 0.17f, +R_LINE_TO, -0.24f, 0.98f, +CLOSE, +R_MOVE_TO, 0.68f, -2.22f, +R_CUBIC_TO, 0.3f, 0, 0.57f, -0.11f, 0.79f, -0.34f, +R_CUBIC_TO, 0.22f, -0.23f, 0.33f, -0.49f, 0.33f, -0.8f, +R_CUBIC_TO, 0, -0.3f, -0.11f, -0.57f, -0.34f, -0.79f, +R_ARC_TO, 1.08f, 1.08f, 0, 0, 0, -0.79f, -0.33f, +R_CUBIC_TO, -0.3f, 0, -0.57f, 0.11f, -0.8f, 0.34f, +R_ARC_TO, 1.08f, 1.08f, 0, 0, 0, -0.34f, 0.79f, +R_CUBIC_TO, 0, 0.31f, 0.11f, 0.57f, 0.34f, 0.79f, +R_CUBIC_TO, 0.23f, 0.22f, 0.49f, 0.34f, 0.8f, 0.34f, +CLOSE, +MOVE_TO, 6.4f, 6.21f, +R_CUBIC_TO, 0.3f, 0, 0.56f, -0.1f, 0.77f, -0.32f, +R_CUBIC_TO, 0.22f, -0.21f, 0.32f, -0.47f, 0.32f, -0.77f, +R_CUBIC_TO, 0, -0.3f, -0.11f, -0.55f, -0.32f, -0.76f, +R_ARC_TO, 1.06f, 1.06f, 0, 0, 0, -0.77f, -0.31f, +R_CUBIC_TO, -0.3f, 0, -0.56f, 0.11f, -0.77f, 0.32f, +R_ARC_TO, 1.04f, 1.04f, 0, 0, 0, -0.32f, 0.77f, +R_CUBIC_TO, 0, 0.3f, 0.11f, 0.56f, 0.32f, 0.77f, +R_CUBIC_TO, 0.22f, 0.21f, 0.47f, 0.32f, 0.77f, 0.32f, +CLOSE \ No newline at end of file
diff --git a/chrome/browser/about_flags.cc b/chrome/browser/about_flags.cc index 49a601c..182a234 100644 --- a/chrome/browser/about_flags.cc +++ b/chrome/browser/about_flags.cc
@@ -2449,6 +2449,23 @@ std::size(kStartSurfaceAndroid_SingleSurface), nullptr}, }; +const FeatureEntry::FeatureParam kSurfacePolish_polish_omnibox_size[] = { + {"polish_omnibox_size", "true"}, + {"polish_omnibox_color", "false"}}; + +const FeatureEntry::FeatureParam + kSurfacePolish_polish_omnibox_size_and_color[] = { + {"polish_omnibox_size", "true"}, + {"polish_omnibox_color", "true"}}; + +const FeatureEntry::FeatureVariation kSurfacePolishVariations[] = { + {"Polish omnibox size", kSurfacePolish_polish_omnibox_size, + std::size(kSurfacePolish_polish_omnibox_size), nullptr}, + {"Polish omnibox size and color", + kSurfacePolish_polish_omnibox_size_and_color, + std::size(kSurfacePolish_polish_omnibox_size_and_color), nullptr}, +}; + const FeatureEntry::FeatureParam kFeedPositionAndroid_push_down_feed_small[] = { {"push_down_feed_small", "true"}}; @@ -4107,6 +4124,11 @@ flag_descriptions::kOsSettingsAppBadgingToggleName, flag_descriptions::kOsSettingsAppBadgingToggleDescription, kOsCrOS, FEATURE_VALUE_TYPE(ash::features::kOsSettingsAppBadgingToggle)}, + {"os-settings-deprecate-sync-metrics-toggle", + flag_descriptions::kOsSettingsDeprecateSyncMetricsToggleName, + flag_descriptions::kOsSettingsDeprecateSyncMetricsToggleDescription, + kOsCrOS, + FEATURE_VALUE_TYPE(ash::features::kOsSettingsDeprecateSyncMetricsToggle)}, {"os-settings-revamp-wayfinding", flag_descriptions::kOsSettingsRevampWayfindingName, flag_descriptions::kOsSettingsRevampWayfindingDescription, kOsCrOS, @@ -6723,7 +6745,9 @@ {"enable-surface-polish", flag_descriptions::kSurfacePolishName, flag_descriptions::kSurfacePolishDescription, kOsAndroid, - FEATURE_VALUE_TYPE(chrome::android::kSurfacePolish)}, + FEATURE_WITH_PARAMS_VALUE_TYPE(chrome::android::kSurfacePolish, + kSurfacePolishVariations, + "SurfacePolish")}, {"enable-show-scrollable-mvt-on-ntp", flag_descriptions::kShowScrollableMVTOnNTPAndroidName,
diff --git a/chrome/browser/about_flags_browsertest.cc b/chrome/browser/about_flags_browsertest.cc index 8252c2f7..b0c084df 100644 --- a/chrome/browser/about_flags_browsertest.cc +++ b/chrome/browser/about_flags_browsertest.cc
@@ -76,13 +76,14 @@ bool enable) { EXPECT_TRUE(content::ExecJs( contents, - base::StringPrintf( - "var k = document.getElementById('%s');" - "var s = k.getElementsByClassName('experiment-enable-disable')[0];" - "s.focus();" - "s.selectedIndex = %d;" - "s.onchange();", - experiment_id, enable ? 1 : 0))); + base::StringPrintf("var k = document.getElementById('%s');" + "var s = " + "k.querySelector('flags-experiment').shadowRoot." + "querySelector('.experiment-enable-disable');" + "s.focus();" + "s.selectedIndex = %d;" + "s.onchange();", + experiment_id, enable ? 1 : 0))); } std::string GetOriginListText(content::WebContents* contents, @@ -380,14 +381,15 @@ // See https://crbug.com/1038638 for more details. EXPECT_TRUE(content::ExecJs( contents, - base::StringPrintf( - "var k = document.getElementById('%s');" - "var s = k.getElementsByClassName('experiment-enable-disable')[0];" - "delete s.internal_name;" - "const e = document.createEvent('HTMLEvents');" - "e.initEvent('change', true, true);" - "s.dispatchEvent(e);", - kFlagWithOptionSelectorName), + base::StringPrintf("var k = document.getElementById('%s');" + "var s = " + "k.querySelector('flags-experiment').shadowRoot." + "querySelector('.experiment-enable-disable');" + "delete s.internal_name;" + "const e = document.createEvent('HTMLEvents');" + "e.initEvent('change', true, true);" + "s.dispatchEvent(e);", + kFlagWithOptionSelectorName), // Execute script in an isolated world to avoid causing a Trusted Types // violation due to eval. content::EXECUTE_SCRIPT_DEFAULT_OPTIONS, /*world_id=*/1));
diff --git a/chrome/browser/android/browserservices/metrics/java/src/org/chromium/chrome/browser/browserservices/metrics/WebApkUmaRecorder.java b/chrome/browser/android/browserservices/metrics/java/src/org/chromium/chrome/browser/browserservices/metrics/WebApkUmaRecorder.java index 2d71d45d..7931eed 100644 --- a/chrome/browser/android/browserservices/metrics/java/src/org/chromium/chrome/browser/browserservices/metrics/WebApkUmaRecorder.java +++ b/chrome/browser/android/browserservices/metrics/java/src/org/chromium/chrome/browser/browserservices/metrics/WebApkUmaRecorder.java
@@ -97,7 +97,6 @@ private static final String HISTOGRAM_NEW_STYLE_LAUNCH_TO_SPLASHSCREEN_VISIBLE = "WebApk.Startup.Cold.NewStyle.ShellLaunchToSplashscreenVisible"; - private static final int WEBAPK_OPEN_MAX = 3; public static final int WEBAPK_OPEN_LAUNCH_SUCCESS = 0; // Obsolete: WEBAPK_OPEN_NO_LAUNCH_INTENT = 1; public static final int WEBAPK_OPEN_ACTIVITY_NOT_FOUND = 2;
diff --git a/chrome/browser/android/browserservices/verification/java/src/org/chromium/chrome/browser/browserservices/verification/ChromeOriginVerifierJunitTest.java b/chrome/browser/android/browserservices/verification/java/src/org/chromium/chrome/browser/browserservices/verification/ChromeOriginVerifierJunitTest.java index 8844b87..0f3f197c 100644 --- a/chrome/browser/android/browserservices/verification/java/src/org/chromium/chrome/browser/browserservices/verification/ChromeOriginVerifierJunitTest.java +++ b/chrome/browser/android/browserservices/verification/java/src/org/chromium/chrome/browser/browserservices/verification/ChromeOriginVerifierJunitTest.java
@@ -65,7 +65,6 @@ private ChromeOriginVerifier.Natives mMockChromeOriginVerifierJni; private CountDownLatch mVerificationResultLatch = new CountDownLatch(1); - private CountDownLatch mVerificationResultLatch2 = new CountDownLatch(1); private static class TestOriginVerificationListener implements OriginVerificationListener { private CountDownLatch mLatch;
diff --git a/chrome/browser/ash/BUILD.gn b/chrome/browser/ash/BUILD.gn index 19d70c0..4ce04ab 100644 --- a/chrome/browser/ash/BUILD.gn +++ b/chrome/browser/ash/BUILD.gn
@@ -3414,7 +3414,6 @@ "//chrome/browser/ash/login/oobe_quick_start:oobe_quick_start", "//chrome/browser/ash/login/oobe_quick_start:oobe_quick_start_pref_names", "//chrome/browser/ash/login/oobe_quick_start/connectivity:connectivity", - "//chrome/browser/ash/login/oobe_quick_start/logging", "//chrome/browser/ash/power/ml/smart_dim", "//chrome/browser/ash/system_web_apps/types", "//chrome/browser/ash/video_conference", @@ -3516,6 +3515,7 @@ "//chromeos/ash/components/phonehub/proto", "//chromeos/ash/components/policy", "//chromeos/ash/components/proximity_auth", + "//chromeos/ash/components/quick_start:quick_start", "//chromeos/ash/components/scanning", "//chromeos/ash/components/settings", "//chromeos/ash/components/smbfs",
diff --git a/chrome/browser/ash/accessibility/accessibility_highlights_browsertest.cc b/chrome/browser/ash/accessibility/accessibility_highlights_browsertest.cc index 0242b72..374f8b2 100644 --- a/chrome/browser/ash/accessibility/accessibility_highlights_browsertest.cc +++ b/chrome/browser/ash/accessibility/accessibility_highlights_browsertest.cc
@@ -191,10 +191,19 @@ ->GetViewByID(VIEW_ID_OMNIBOX) ->GetBoundsInScreen(); - // -1 presumably because of rounding. Visually, it looks fine (8px between the - // top of the cursor to the top of the omnibox; 7px at the bottom). Moving the - // cursor down 1px would only make it less balanced (9 & 6px). - EXPECT_EQ(bounds.CenterPoint().y(), omnibox_bounds.CenterPoint().y() - 1); + // TODO(crbug.com/1453711) Investigate why chromeOS is miscalculating the + // focus ring position in tests only. Visually, when running chromium, it + // looks right; the focus ring is centered in the omnibox. But when running + // tests, the focus ring is 1px higher than what it should be according to + // the code and visually. + int expected_offset = +#if BUILDFLAG(IS_CHROMEOS) + 1; +#else + 0; +#endif + EXPECT_TRUE(bounds.CenterPoint().y() == + omnibox_bounds.CenterPoint().y() - expected_offset); // On the left edge of the omnibox. EXPECT_LT(bounds.x(), omnibox_bounds.x());
diff --git a/chrome/browser/ash/crosapi/wallpaper_ash.cc b/chrome/browser/ash/crosapi/wallpaper_ash.cc index e2b4531..efc4aea 100644 --- a/chrome/browser/ash/crosapi/wallpaper_ash.cc +++ b/chrome/browser/ash/crosapi/wallpaper_ash.cc
@@ -18,6 +18,7 @@ #include "components/account_id/account_id.h" #include "components/user_manager/user.h" #include "content/public/browser/browser_thread.h" +#include "extensions/browser/extension_function_crash_keys.h" #include "services/data_decoder/public/cpp/decode_image.h" #include "ui/gfx/codec/jpeg_codec.h" #include "ui/gfx/image/image_skia.h" @@ -108,26 +109,27 @@ SendErrorResult( "Received a new SetWallpaper request that overrides this one."); } + extension_id_ = extension_id; + extensions::extension_function_crash_keys::StartExtensionFunctionCall( + extension_id_); pending_callback_ = std::move(callback); const std::vector<uint8_t>& data = wallpaper_settings->data; data_decoder::DecodeImage( &data_decoder_, data, data_decoder::mojom::ImageCodec::kDefault, /*shrink_to_fit=*/true, data_decoder::kDefaultMaxSizeInBytes, /*desired_image_frame_size=*/gfx::Size(), - base::BindOnce( - &WallpaperAsh::OnWallpaperDecoded, weak_ptr_factory_.GetWeakPtr(), - std::move(wallpaper_settings), extension_id, extension_name)); + base::BindOnce(&WallpaperAsh::OnWallpaperDecoded, + weak_ptr_factory_.GetWeakPtr(), + std::move(wallpaper_settings))); } void WallpaperAsh::OnWallpaperDecoded( mojom::WallpaperSettingsPtr wallpaper_settings, - const std::string& extension_id, - const std::string& extension_name, const SkBitmap& bitmap) { DCHECK_CURRENTLY_ON(BrowserThread::UI); if (bitmap.isNull()) { LOG(ERROR) << "Decoding wallpaper data failed from extension_id '" - << extension_id << "'"; + << extension_id_ << "'"; SendErrorResult("Decoding wallpaper data failed."); return; } @@ -172,6 +174,9 @@ void WallpaperAsh::SendErrorResult(const std::string& response) { std::move(pending_callback_) .Run(crosapi::mojom::SetWallpaperResult::NewErrorMessage(response)); + extensions::extension_function_crash_keys::EndExtensionFunctionCall( + extension_id_); + extension_id_.clear(); } void WallpaperAsh::SendSuccessResult( @@ -179,6 +184,9 @@ std::move(pending_callback_) .Run( crosapi::mojom::SetWallpaperResult::NewThumbnailData(thumbnail_data)); + extensions::extension_function_crash_keys::EndExtensionFunctionCall( + extension_id_); + extension_id_.clear(); } } // namespace crosapi
diff --git a/chrome/browser/ash/crosapi/wallpaper_ash.h b/chrome/browser/ash/crosapi/wallpaper_ash.h index 2c6be70c..7e6b0c0c 100644 --- a/chrome/browser/ash/crosapi/wallpaper_ash.h +++ b/chrome/browser/ash/crosapi/wallpaper_ash.h
@@ -17,6 +17,7 @@ namespace crosapi { +// Ash implementation of the wallpaper extension API in Lacros. class WallpaperAsh : public mojom::Wallpaper { public: WallpaperAsh(); @@ -39,13 +40,13 @@ private: void OnWallpaperDecoded(mojom::WallpaperSettingsPtr wallpaper_settings, - const std::string& extension_id, - const std::string& extension_name, const SkBitmap& bitmap); void SendErrorResult(const std::string& response); void SendSuccessResult(const std::vector<uint8_t>& thumbnail_data); mojo::ReceiverSet<mojom::Wallpaper> receivers_; + // The ID of the extension making the current SetWallpaper() call. + std::string extension_id_; SetWallpaperCallback pending_callback_; data_decoder::DataDecoder data_decoder_; base::WeakPtrFactory<WallpaperAsh> weak_ptr_factory_{this};
diff --git a/chrome/browser/ash/crosapi/wallpaper_ash_unittest.cc b/chrome/browser/ash/crosapi/wallpaper_ash_unittest.cc index 9d45518..01d5d2be 100644 --- a/chrome/browser/ash/crosapi/wallpaper_ash_unittest.cc +++ b/chrome/browser/ash/crosapi/wallpaper_ash_unittest.cc
@@ -20,6 +20,7 @@ #include "chrome/test/base/testing_profile_manager.h" #include "chromeos/ash/components/login/login_state/login_state.h" #include "chromeos/crosapi/mojom/wallpaper.mojom.h" +#include "components/crash/core/common/crash_key.h" #include "components/user_manager/scoped_user_manager.h" #include "components/user_manager/user_manager.h" #include "components/user_manager/user_names.h" @@ -76,9 +77,12 @@ WallpaperControllerClientImpl>( std::make_unique<wallpaper_handlers::TestWallpaperFetcherDelegate>()); wallpaper_controller_client_->InitForTesting(&test_wallpaper_controller_); + + crash_reporter::InitializeCrashKeysForTesting(); } void TearDown() override { + crash_reporter::ResetCrashKeysForTesting(); ash::LoginState::Shutdown(); wallpaper_controller_client_.reset(); } @@ -157,4 +161,58 @@ ASSERT_EQ(0, test_wallpaper_controller_.get_third_party_wallpaper_count()); } + +TEST_F(WallpaperAshTest, SetWallpaper_CrashKeys_OnSuccess) { + test_wallpaper_controller_.SetCurrentUser(user_manager::StubAccountId()); + + // Create valid settings. + crosapi::mojom::WallpaperSettingsPtr settings = + crosapi::mojom::WallpaperSettings::New(); + settings->data = CreateJpeg(); + + // Invoke SetWallpaper(). It will respond with success. + base::RunLoop loop; + wallpaper_ash_.SetWallpaper( + std::move(settings), "extension_id", "extension_name", + base::BindLambdaForTesting( + [&loop](const crosapi::mojom::SetWallpaperResultPtr result) { + ASSERT_FALSE(result->is_error_message()); + loop.Quit(); + })); + + // Crash key is set when function starts running. + using crash_reporter::GetCrashKeyValue; + EXPECT_EQ(GetCrashKeyValue("extension-function-caller-1"), "extension_id"); + + // Crash key is cleared after function completes. + loop.Run(); + EXPECT_EQ(GetCrashKeyValue("extension-function-caller-1"), ""); +} + +TEST_F(WallpaperAshTest, SetWallpaper_CrashKeys_OnError) { + test_wallpaper_controller_.SetCurrentUser(user_manager::StubAccountId()); + + // Create invalid data by not adding a wallpaper image to the settings data. + crosapi::mojom::WallpaperSettingsPtr settings = + crosapi::mojom::WallpaperSettings::New(); + + // Invoke SetWallpaper(). It will respond with an error. + base::RunLoop loop; + wallpaper_ash_.SetWallpaper( + std::move(settings), "extension_id", "extension_name", + base::BindLambdaForTesting( + [&loop](const crosapi::mojom::SetWallpaperResultPtr result) { + ASSERT_TRUE(result->is_error_message()); + loop.Quit(); + })); + + // Crash key is set when function starts running. + using crash_reporter::GetCrashKeyValue; + EXPECT_EQ(GetCrashKeyValue("extension-function-caller-1"), "extension_id"); + + // Crash key is cleared after function completes. + loop.Run(); + EXPECT_EQ(GetCrashKeyValue("extension-function-caller-1"), ""); +} + } // namespace crosapi
diff --git a/chrome/browser/ash/login/oobe_quick_start/BUILD.gn b/chrome/browser/ash/login/oobe_quick_start/BUILD.gn index 0285a7d4..cd3a05c 100644 --- a/chrome/browser/ash/login/oobe_quick_start/BUILD.gn +++ b/chrome/browser/ash/login/oobe_quick_start/BUILD.gn
@@ -17,13 +17,13 @@ deps = [ ":oobe_quick_start_pref_names", "connectivity", - "logging", "//base", "//chrome/browser:browser_process", "//chrome/common:channel_info", "//chromeos/ash/components/attestation:attestation", "//chromeos/ash/components/dbus/attestation:attestation_proto", "//chromeos/ash/components/dbus/constants:constants", + "//chromeos/ash/components/quick_start:quick_start", "//chromeos/ash/services/nearby/public/mojom", "//chromeos/dbus/power:power", "//components/account_id:account_id", @@ -53,7 +53,6 @@ ":oobe_quick_start", "connectivity:test_support", "connectivity:unit_tests", - "logging:unit_tests", "//base", "//base/test:test_support", "//chrome/test:test_support",
diff --git a/chrome/browser/ash/login/oobe_quick_start/connectivity/BUILD.gn b/chrome/browser/ash/login/oobe_quick_start/connectivity/BUILD.gn index fa19f7b..a55142fb 100644 --- a/chrome/browser/ash/login/oobe_quick_start/connectivity/BUILD.gn +++ b/chrome/browser/ash/login/oobe_quick_start/connectivity/BUILD.gn
@@ -12,7 +12,6 @@ "//base", "//chrome/browser:browser_process", "//chrome/browser/ash/login/oobe_quick_start:oobe_quick_start_pref_names", - "//chrome/browser/ash/login/oobe_quick_start/logging", "//chrome/browser/nearby_sharing/public/cpp", "//chrome/browser/nearby_sharing/public/cpp", "//chromeos/ash/components/quick_start",
diff --git a/chrome/browser/ash/login/oobe_quick_start/connectivity/connection.cc b/chrome/browser/ash/login/oobe_quick_start/connectivity/connection.cc index c67dee6..df14100 100644 --- a/chrome/browser/ash/login/oobe_quick_start/connectivity/connection.cc +++ b/chrome/browser/ash/login/oobe_quick_start/connectivity/connection.cc
@@ -16,8 +16,8 @@ #include "chrome/browser/ash/login/oobe_quick_start/connectivity/handshake_helpers.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" #include "chrome/browser/nearby_sharing/public/cpp/nearby_connection.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "chromeos/ash/components/quick_start/quick_start_message.h" #include "chromeos/ash/components/quick_start/quick_start_requests.h" #include "chromeos/ash/services/nearby/public/mojom/quick_start_decoder.mojom.h" @@ -236,7 +236,12 @@ mojom::GetAssertionResponse::GetAssertionStatus::kSuccess; if (response->status != success) { - std::move(callback).Run(absl::nullopt); + // TODO (b/286877412): Update this logic once we've aligned on an unknown + // message strategy. + QS_LOG(INFO) << "Ignoring message and re-reading"; + nearby_connection_->Read( + base::BindOnce(&Connection::OnRequestAccountTransferAssertionResponse, + weak_ptr_factory_.GetWeakPtr(), std::move(callback))); return; }
diff --git a/chrome/browser/ash/login/oobe_quick_start/connectivity/handshake_helpers.cc b/chrome/browser/ash/login/oobe_quick_start/connectivity/handshake_helpers.cc index c2c1643..fbf91856 100644 --- a/chrome/browser/ash/login/oobe_quick_start/connectivity/handshake_helpers.cc +++ b/chrome/browser/ash/login/oobe_quick_start/connectivity/handshake_helpers.cc
@@ -6,7 +6,7 @@ #include "base/containers/span.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/proto/aes_gcm_authentication_message.pb.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "crypto/aead.h" #include "crypto/random.h"
diff --git a/chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.cc b/chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.cc index 41b5ccc..7c409b6 100644 --- a/chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.cc +++ b/chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.cc
@@ -7,7 +7,7 @@ #include "base/base64url.h" #include "base/ranges/algorithm.h" #include "base/strings/stringprintf.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "crypto/random.h" namespace ash::quick_start {
diff --git a/chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker_impl.cc b/chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker_impl.cc index 561c244..1e127a5 100644 --- a/chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker_impl.cc +++ b/chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker_impl.cc
@@ -17,9 +17,9 @@ #include "chrome/browser/ash/login/oobe_quick_start/connectivity/fast_pair_advertiser.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/random_session_id.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" #include "chrome/browser/ash/login/oobe_quick_start/oobe_quick_start_pref_names.h" #include "chrome/browser/browser_process.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "components/prefs/pref_service.h" #include "crypto/random.h" #include "device/bluetooth/bluetooth_adapter.h"
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/BUILD.gn b/chrome/browser/ash/login/oobe_quick_start/logging/BUILD.gn deleted file mode 100644 index 313d393..0000000 --- a/chrome/browser/ash/login/oobe_quick_start/logging/BUILD.gn +++ /dev/null
@@ -1,26 +0,0 @@ -# Copyright 2022 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -import("//build/config/chromeos/ui_mode.gni") - -assert(is_chromeos_ash) - -source_set("logging") { - sources = [ - "logging.cc", - "logging.h", - ] - deps = [ "//base" ] -} - -source_set("unit_tests") { - testonly = true - sources = [ "logging_unittest.cc" ] - - deps = [ - ":logging", - "//base/test:test_support", - "//testing/gtest", - ] -}
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/OWNERS b/chrome/browser/ash/login/oobe_quick_start/logging/OWNERS deleted file mode 100644 index 8221a66..0000000 --- a/chrome/browser/ash/login/oobe_quick_start/logging/OWNERS +++ /dev/null
@@ -1 +0,0 @@ -file://chrome/browser/ash/login/oobe_quick_start/connectivity/OWNERS
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/README.md b/chrome/browser/ash/login/oobe_quick_start/logging/README.md deleted file mode 100644 index bf1a486..0000000 --- a/chrome/browser/ash/login/oobe_quick_start/logging/README.md +++ /dev/null
@@ -1,7 +0,0 @@ -This directory implements several logging macros to be used with Quick Start. - -Use `QS_LOG(severity)` for general-purpose logging, and will emit logs to the -standard logging system. VERBOSE messages logged in this manner can be emitted -to the logs by using the `--quick-start-verbose-logging` command-line flag. - -See go/cros-quickstart-logging for more info.
diff --git a/chrome/browser/ash/login/oobe_quick_start/target_device_bootstrap_controller.cc b/chrome/browser/ash/login/oobe_quick_start/target_device_bootstrap_controller.cc index ce949731..95b0b12 100644 --- a/chrome/browser/ash/login/oobe_quick_start/target_device_bootstrap_controller.cc +++ b/chrome/browser/ash/login/oobe_quick_start/target_device_bootstrap_controller.cc
@@ -16,9 +16,9 @@ #include "chrome/browser/ash/login/oobe_quick_start/connectivity/fido_assertion_info.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker.h" #include "chrome/browser/ash/login/oobe_quick_start/connectivity/target_device_connection_broker_factory.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" #include "chrome/browser/ash/login/oobe_quick_start/oobe_quick_start_pref_names.h" #include "chrome/browser/browser_process.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "chromeos/ash/services/nearby/public/mojom/quick_start_decoder_types.mojom.h" #include "chromeos/dbus/power/power_manager_client.h" #include "components/prefs/pref_service.h"
diff --git a/chrome/browser/ash/login/screens/quick_start_screen.cc b/chrome/browser/ash/login/screens/quick_start_screen.cc index c45c54d4..2cafdcc 100644 --- a/chrome/browser/ash/login/screens/quick_start_screen.cc +++ b/chrome/browser/ash/login/screens/quick_start_screen.cc
@@ -10,12 +10,12 @@ #include "base/notreached.h" #include "base/strings/utf_string_conversions.h" #include "base/time/time.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" #include "chrome/browser/ash/login/oobe_quick_start/target_device_bootstrap_controller.h" #include "chrome/browser/ash/login/oobe_quick_start/verification_shapes.h" #include "chrome/browser/ash/login/ui/login_display_host.h" #include "chrome/browser/ash/login/wizard_context.h" #include "chrome/browser/ui/webui/ash/login/quick_start_screen_handler.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "third_party/abseil-cpp/absl/types/variant.h" namespace ash {
diff --git a/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller.cc b/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller.cc index 8a4897d..36c6a0d1 100644 --- a/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller.cc +++ b/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller.cc
@@ -226,6 +226,7 @@ .curtain_local_user_session = parameters_.curtain_local_user_session, .allow_troubleshooting_tools = parameters_.allow_troubleshooting_tools, .allow_reconnections = parameters_.allow_reconnections, + .allow_file_transfer = parameters_.allow_file_transfer, }; }
diff --git a/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller_unittest.cc b/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller_unittest.cc index 817418da..466c6fe 100644 --- a/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller_unittest.cc +++ b/chrome/browser/ash/policy/remote_commands/crd_admin_session_controller_unittest.cc
@@ -387,6 +387,22 @@ EXPECT_EQ(actual_parameters.allow_troubleshooting_tools, GetParam()); } +TEST_P(CrdAdminSessionControllerTest, + ShouldPassAllowFileTransferToRemotingService) { + SessionParameters parameters; + parameters.allow_file_transfer = GetParam(); + + remoting::ChromeOsEnterpriseParams actual_parameters; + EXPECT_CALL(remoting_service(), StartSession) + .WillOnce(SaveParamAndInvokeCallback(&actual_parameters)); + + session_controller().StartCrdHostAndGetCode(parameters, success_callback(), + error_callback(), + session_finished_callback()); + + EXPECT_EQ(actual_parameters.allow_file_transfer, GetParam()); +} + TEST_F(CrdAdminSessionControllerTest, ShouldReportErrorIfStartSessionReturnsError) { EXPECT_CALL(remoting_service(), StartSession)
diff --git a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.cc b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.cc index 1ec4a1827..482fb75c 100644 --- a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.cc +++ b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.cc
@@ -159,6 +159,11 @@ .LogSessionDuration(session_duration); } +bool IsKioskSession(UserSessionType session_type) { + return session_type == UserSessionType::AUTO_LAUNCHED_KIOSK_SESSION || + session_type == UserSessionType::MANUALLY_LAUNCHED_KIOSK_SESSION; +} + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -382,6 +387,7 @@ parameters.admin_email = admin_email_; parameters.allow_troubleshooting_tools = ShouldAllowTroubleshootingTools(); parameters.allow_reconnections = ShouldAllowReconnections(); + parameters.allow_file_transfer = ShouldAllowFileTransfer(); delegate_->StartCrdHostAndGetCode( parameters, @@ -556,16 +562,23 @@ } bool DeviceCommandStartCrdSessionJob::ShouldAllowTroubleshootingTools() const { - if (GetCurrentUserSessionType() != - UserSessionType::AUTO_LAUNCHED_KIOSK_SESSION && - GetCurrentUserSessionType() != - UserSessionType::MANUALLY_LAUNCHED_KIOSK_SESSION) { + if (!IsKioskSession(GetCurrentUserSessionType())) { return false; } return CHECK_DEREF(ProfileManager::GetActiveUserProfile()->GetPrefs()) .GetBoolean(prefs::kKioskTroubleshootingToolsEnabled); } +bool DeviceCommandStartCrdSessionJob::ShouldAllowFileTransfer() const { + if (!IsKioskSession(GetCurrentUserSessionType())) { + return false; + } + + // TODO(b/284944528): Add check here for policy. + return base::FeatureList::IsEnabled( + remoting::features::kEnableCrdFileTransferForKiosk); +} + DeviceCommandStartCrdSessionJob::ErrorCallback DeviceCommandStartCrdSessionJob::GetErrorCallback() { return base::BindOnce(&DeviceCommandStartCrdSessionJob::FinishWithError,
diff --git a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.h b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.h index e8a18ef..cf24e65b 100644 --- a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.h +++ b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_job.h
@@ -50,6 +50,7 @@ bool curtain_local_user_session = false; bool allow_troubleshooting_tools = false; bool allow_reconnections = false; + bool allow_file_transfer = false; }; virtual ~Delegate() = default; @@ -122,6 +123,7 @@ bool ShouldTerminateUponInput() const; bool ShouldAllowReconnections() const; bool ShouldAllowTroubleshootingTools() const; + bool ShouldAllowFileTransfer() const; ErrorCallback GetErrorCallback();
diff --git a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_unittest.cc b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_unittest.cc index 94be1e36..e8a75b8 100644 --- a/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_unittest.cc +++ b/chrome/browser/ash/policy/remote_commands/device_command_start_crd_session_unittest.cc
@@ -54,6 +54,7 @@ using chromeos::network_config::mojom::OncSource; using remoting::features::kEnableCrdAdminRemoteAccess; using remoting::features::kEnableCrdAdminRemoteAccessV2; +using remoting::features::kEnableCrdFileTransferForKiosk; using Payload = base::Value::Dict; @@ -248,6 +249,25 @@ } } +// Returns true if the given session type is a kiosk session. +bool IsKioskSession(TestSessionType user_session_type) { + switch (user_session_type) { + case TestSessionType::kManuallyLaunchedArcKioskSession: + case TestSessionType::kManuallyLaunchedWebKioskSession: + case TestSessionType::kManuallyLaunchedKioskSession: + case TestSessionType::kAutoLaunchedArcKioskSession: + case TestSessionType::kAutoLaunchedWebKioskSession: + case TestSessionType::kAutoLaunchedKioskSession: + return true; + case TestSessionType::kNoSession: + case TestSessionType::kManagedGuestSession: + case TestSessionType::kAffiliatedUserSession: + case TestSessionType::kGuestSession: + case TestSessionType::kUnaffiliatedUserSession: + return false; + } +} + struct Result { RemoteCommandJob::Status status; std::string payload; @@ -822,6 +842,40 @@ duration, /*expected_bucket_count=*/1); } +TEST_P(DeviceCommandStartCrdSessionJobTestParameterized, + ShouldAllowFileTransferForKioskSessionsWhenFeatureIsEnabled) { + TestSessionType user_session_type = GetParam(); + SCOPED_TRACE(base::StringPrintf("Testing session type %s", + SessionTypeToString(user_session_type))); + if (!SupportsRemoteSupport(user_session_type)) { + return; + } + base::test::ScopedFeatureList feature_list; + feature_list.InitAndEnableFeature(kEnableCrdFileTransferForKiosk); + StartSessionOfType(user_session_type); + RunJobAndWaitForResult(); + bool supports_file_transfer = IsKioskSession(user_session_type); + + EXPECT_EQ(session_controller().session_parameters().allow_file_transfer, + supports_file_transfer); +} + +TEST_P(DeviceCommandStartCrdSessionJobTestParameterized, + ShouldNotAllowFileTransferForAnySessionWhenFeatureIsNotEnabled) { + TestSessionType user_session_type = GetParam(); + SCOPED_TRACE(base::StringPrintf("Testing session type %s", + SessionTypeToString(user_session_type))); + if (!SupportsRemoteSupport(user_session_type)) { + return; + } + + StartSessionOfType(user_session_type); + RunJobAndWaitForResult(); + + EXPECT_EQ(session_controller().session_parameters().allow_file_transfer, + false); +} + TEST_F(DeviceCommandStartCrdSessionJobTest, ShouldSendErrorUmaLogWhenUserTypeIsNotSupported) { base::HistogramTester histogram_tester; @@ -1074,6 +1128,26 @@ } } +TEST_P(DeviceCommandStartCrdSessionJobRemoteAccessTestParameterized, + ShouldNeverAllowFileTransferForRemoteAccessWhenFeatureIsEnabled) { + TestSessionType user_session_type = GetParam(); + SCOPED_TRACE(base::StringPrintf("Testing session type %s", + SessionTypeToString(user_session_type))); + if (!SupportsRemoteAccess(user_session_type)) { + return; + } + + base::test::ScopedFeatureList feature_list; + feature_list.InitAndEnableFeature(kEnableCrdFileTransferForKiosk); + StartSessionOfType(user_session_type); + AddActiveManagedNetwork(); + RunJobAndWaitForResult( + Payload().Set("crdSessionType", CrdSessionType::REMOTE_ACCESS_SESSION)); + + EXPECT_EQ(session_controller().session_parameters().allow_file_transfer, + false); +} + TEST_P( DeviceCommandStartCrdSessionJobRemoteAccessTestParameterized, ShouldNotAllowReconnectionsForRemoteAccessSessionsIfV2FeatureIsDisabled) {
diff --git a/chrome/browser/autofill/android/BUILD.gn b/chrome/browser/autofill/android/BUILD.gn index e1b3a4e..ec3cbf6 100644 --- a/chrome/browser/autofill/android/BUILD.gn +++ b/chrome/browser/autofill/android/BUILD.gn
@@ -21,7 +21,7 @@ "java/src/org/chromium/chrome/browser/autofill/LegalMessageLine.java", "java/src/org/chromium/chrome/browser/autofill/PersonalDataManager.java", "java/src/org/chromium/chrome/browser/autofill/PhoneNumberUtil.java", - "java/src/org/chromium/chrome/browser/autofill/editors/AddressEditor.java", + "java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorCoordinator.java", "java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorMediator.java", "java/src/org/chromium/chrome/browser/autofill/editors/DropdownFieldAdapter.java", "java/src/org/chromium/chrome/browser/autofill/editors/DropdownFieldView.java",
diff --git a/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditor.java b/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorCoordinator.java similarity index 92% rename from chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditor.java rename to chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorCoordinator.java index b146920..aaa29586 100644 --- a/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditor.java +++ b/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorCoordinator.java
@@ -17,7 +17,7 @@ /** * An address editor. Can be used for either shipping or billing address editing. */ -public class AddressEditor { +public class AddressEditorCoordinator { private final AddressEditorMediator mMediator; private final EditorDialogView mEditorDialog; @@ -57,7 +57,7 @@ * @param profile Current user's profile. * @param saveToDisk Whether to save changes to disk after editing. */ - public AddressEditor( + public AddressEditorCoordinator( EditorDialogView editorDialog, Delegate delegate, Profile profile, boolean saveToDisk) { this(editorDialog, delegate, profile, new AutofillAddress(editorDialog.getContext(), AutofillProfile.builder().build()), @@ -74,8 +74,9 @@ * @param userFlow * @param saveToDisk Whether to save changes to disk after editing. */ - public AddressEditor(EditorDialogView editorDialog, Delegate delegate, Profile profile, - AutofillAddress addressToEdit, @UserFlow int userFlow, boolean saveToDisk) { + public AddressEditorCoordinator(EditorDialogView editorDialog, Delegate delegate, + Profile profile, AutofillAddress addressToEdit, @UserFlow int userFlow, + boolean saveToDisk) { mMediator = new AddressEditorMediator( editorDialog.getContext(), delegate, profile, addressToEdit, userFlow, saveToDisk); mEditorDialog = editorDialog;
diff --git a/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorMediator.java b/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorMediator.java index 3479e16f..ee89f357 100644 --- a/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorMediator.java +++ b/chrome/browser/autofill/android/java/src/org/chromium/chrome/browser/autofill/editors/AddressEditorMediator.java
@@ -4,10 +4,10 @@ package org.chromium.chrome.browser.autofill.editors; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.CREATE_NEW_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.SAVE_NEW_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.CREATE_NEW_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.SAVE_NEW_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.ALL_KEYS; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.CANCEL_RUNNABLE; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.CUSTOM_DONE_BUTTON_TEXT; @@ -61,7 +61,8 @@ import org.chromium.chrome.browser.autofill.PhoneNumberUtil; import org.chromium.chrome.browser.autofill.R; import org.chromium.chrome.browser.autofill.Source; -import org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.Delegate; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow; import org.chromium.chrome.browser.autofill.editors.EditorProperties.DropdownKeyValue; import org.chromium.chrome.browser.autofill.editors.EditorProperties.EditorFieldValidator; import org.chromium.chrome.browser.flags.ChromeFeatureList; @@ -87,8 +88,8 @@ import java.util.UUID; /** - * Contains the logic for the AddressEditor component. It sets the state of the model and reacts to - * events like address country selection. + * Contains the logic for the autofill address editor component. It sets the state of the model and + * reacts to events like address country selection. */ class AddressEditorMediator { private final Handler mHandler = new Handler(); @@ -99,7 +100,7 @@ new CountryAwarePhoneNumberValidator(true); private final AutofillProfileBridge mAutofillProfileBridge = new AutofillProfileBridge(); private final Context mContext; - private final AddressEditor.Delegate mDelegate; + private final Delegate mDelegate; private final Profile mProfile; private final AutofillProfile mProfileToEdit; private final AutofillAddress mAddressToEdit; @@ -183,7 +184,7 @@ return supportedCountries; } - AddressEditorMediator(Context context, AddressEditor.Delegate delegate, Profile profile, + AddressEditorMediator(Context context, Delegate delegate, Profile profile, AutofillAddress addressToEdit, @UserFlow int userFlow, boolean saveToDisk) { mContext = context; mDelegate = delegate;
diff --git a/chrome/browser/autofill/android/javatest/src/org/chromium/chrome/browser/autofill/editors/AddressEditorRenderTest.java b/chrome/browser/autofill/android/javatest/src/org/chromium/chrome/browser/autofill/editors/AddressEditorRenderTest.java index 8db8450..1dfd8ddc 100644 --- a/chrome/browser/autofill/android/javatest/src/org/chromium/chrome/browser/autofill/editors/AddressEditorRenderTest.java +++ b/chrome/browser/autofill/android/javatest/src/org/chromium/chrome/browser/autofill/editors/AddressEditorRenderTest.java
@@ -11,9 +11,9 @@ import static org.mockito.Mockito.when; import static org.chromium.base.test.util.Restriction.RESTRICTION_TYPE_NON_LOW_END_DEVICE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.SAVE_NEW_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.SAVE_NEW_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; import static org.chromium.content_public.browser.test.util.TestThreadUtils.runOnUiThreadBlocking; import static org.chromium.ui.test.util.UiRestriction.RESTRICTION_TYPE_PHONE; @@ -48,6 +48,7 @@ import org.chromium.chrome.browser.autofill.PhoneNumberUtil; import org.chromium.chrome.browser.autofill.PhoneNumberUtilJni; import org.chromium.chrome.browser.autofill.Source; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.Delegate; import org.chromium.chrome.browser.feedback.HelpAndFeedbackLauncher; import org.chromium.chrome.browser.flags.ChromeFeatureList; import org.chromium.chrome.browser.profiles.Profile; @@ -67,7 +68,8 @@ import java.util.List; /** - * These tests render screenshots of the {@link AddressEditor} and compare them to a gold standard. + * These tests render screenshots of the autofill address editor and compare them to a gold + * standard. */ @DoNotBatch(reason = "The tests can't be batched because they run for different set-ups.") @RunWith(ParameterizedRunner.class) @@ -144,7 +146,7 @@ @Mock private HelpAndFeedbackLauncher mLauncher; @Mock - private AddressEditor.Delegate mDelegate; + private Delegate mDelegate; private final CoreAccountInfo mAccountInfo = CoreAccountInfo.createFromEmailAndGaiaId(USER_EMAIL, "gaia_id"); @@ -237,8 +239,8 @@ View editor = runOnUiThreadBlocking(() -> { EditorDialogView dialog = new EditorDialogView(getActivity(), /*deleteRunnable=*/null, mLauncher); - AddressEditor addressEditor = - new AddressEditor(dialog, mDelegate, mProfile, /*saveToDisk=*/false); + AddressEditorCoordinator addressEditor = + new AddressEditorCoordinator(dialog, mDelegate, mProfile, /*saveToDisk=*/false); addressEditor.showEditorDialog(); return dialog.getDataViewForTest(); }); @@ -253,8 +255,8 @@ when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); EditorDialogView dialog = new EditorDialogView(getActivity(), /*deleteRunnable=*/null, mLauncher); - AddressEditor addressEditor = - new AddressEditor(dialog, mDelegate, mProfile, /*saveToDisk=*/false); + AddressEditorCoordinator addressEditor = + new AddressEditorCoordinator(dialog, mDelegate, mProfile, /*saveToDisk=*/false); addressEditor.showEditorDialog(); return dialog.getDataViewForTest(); }); @@ -269,8 +271,8 @@ when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); EditorDialogView dialog = new EditorDialogView(getActivity(), /*deleteRunnable=*/null, mLauncher); - AddressEditor addressEditor = new AddressEditor(dialog, mDelegate, mProfile, - new AutofillAddress(getActivity(), sLocalProfile), + AddressEditorCoordinator addressEditor = new AddressEditorCoordinator(dialog, mDelegate, + mProfile, new AutofillAddress(getActivity(), sLocalProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); addressEditor.showEditorDialog(); return dialog.getDataViewForTest(); @@ -286,8 +288,9 @@ when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); EditorDialogView dialog = new EditorDialogView(getActivity(), /*deleteRunnable=*/null, mLauncher); - AddressEditor addressEditor = new AddressEditor(dialog, mDelegate, mProfile, - new AutofillAddress(getActivity(), sAccountProfile), SAVE_NEW_ADDRESS_PROFILE, + AddressEditorCoordinator addressEditor = new AddressEditorCoordinator(dialog, mDelegate, + mProfile, new AutofillAddress(getActivity(), sAccountProfile), + SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); addressEditor.showEditorDialog(); return dialog.getDataViewForTest(); @@ -303,8 +306,8 @@ when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); EditorDialogView dialog = new EditorDialogView(getActivity(), /*deleteRunnable=*/null, mLauncher); - AddressEditor addressEditor = new AddressEditor(dialog, mDelegate, mProfile, - new AutofillAddress(getActivity(), sLocalProfile), + AddressEditorCoordinator addressEditor = new AddressEditorCoordinator(dialog, mDelegate, + mProfile, new AutofillAddress(getActivity(), sLocalProfile), MIGRATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); addressEditor.showEditorDialog(); return dialog.getDataViewForTest();
diff --git a/chrome/browser/autofill/android/junit/src/org/chromium/chrome/browser/autofill/editors/AddressEditorTest.java b/chrome/browser/autofill/android/junit/src/org/chromium/chrome/browser/autofill/editors/AddressEditorTest.java index 1a8098c5..e6cf7d7 100644 --- a/chrome/browser/autofill/android/junit/src/org/chromium/chrome/browser/autofill/editors/AddressEditorTest.java +++ b/chrome/browser/autofill/android/junit/src/org/chromium/chrome/browser/autofill/editors/AddressEditorTest.java
@@ -18,9 +18,9 @@ import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.SAVE_NEW_ADDRESS_PROFILE; -import static org.chromium.chrome.browser.autofill.editors.AddressEditor.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.MIGRATE_EXISTING_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.SAVE_NEW_ADDRESS_PROFILE; +import static org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.UserFlow.UPDATE_EXISTING_ADDRESS_PROFILE; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.CANCEL_RUNNABLE; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.CUSTOM_DONE_BUTTON_TEXT; import static org.chromium.chrome.browser.autofill.editors.EditorProperties.DELETE_CONFIRMATION_TEXT; @@ -71,6 +71,7 @@ import org.chromium.chrome.browser.autofill.PersonalDataManager; import org.chromium.chrome.browser.autofill.PersonalDataManager.AutofillProfile; import org.chromium.chrome.browser.autofill.Source; +import org.chromium.chrome.browser.autofill.editors.AddressEditorCoordinator.Delegate; import org.chromium.chrome.browser.autofill.editors.EditorProperties.DropdownKeyValue; import org.chromium.chrome.browser.autofill.editors.EditorProperties.TextInputType; import org.chromium.chrome.browser.flags.ChromeFeatureList; @@ -99,7 +100,7 @@ import java.util.stream.Collectors; import java.util.stream.StreamSupport; -/** Unit tests for {@link AddressEditor}. */ +/** Unit tests for autofill address editor. */ @RunWith(BaseRobolectricTestRunner.class) @Config(manifest = Config.NONE) @EnableFeatures({ChromeFeatureList.AUTOFILL_ADDRESS_PROFILE_SAVE_PROMPT_NICKNAME_SUPPORT, @@ -170,7 +171,7 @@ @Mock private Profile mProfile; @Mock - private AddressEditor.Delegate mDelegate; + private Delegate mDelegate; @Captor private ArgumentCaptor<PropertyModel> mPropertyModelCapture; @@ -189,7 +190,7 @@ private AutofillAddress mEditedAutofillAddress; private Activity mActivity; - private AddressEditor mAddressEditor; + private AddressEditorCoordinator mAddressEditor; @Before public void setUp() { @@ -365,8 +366,8 @@ @Test @SmallTest public void validateCustomDoneButtonText() { - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); mAddressEditor.setCustomDoneButtonText("Custom done"); mAddressEditor.showEditorDialog(); @@ -380,8 +381,8 @@ @Test @SmallTest public void validateUIStrings_NewAddressProfile() { - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); mAddressEditor.showEditorDialog(); @@ -399,8 +400,8 @@ @SmallTest public void validateUIStrings_NewAddressProfile_EligibleForAddressAccountStorage() { when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); mAddressEditor.showEditorDialog(); @@ -421,7 +422,7 @@ @Test @SmallTest public void validateUIStrings_LocalOrSyncAddressProfile_AddressSyncDisabled() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -440,7 +441,7 @@ @Test @SmallTest public void validateUIStrings_LocalOrSyncAddressProfile_AddressSyncEnabled() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); when(mSyncService.isSyncFeatureEnabled()).thenReturn(true); @@ -463,7 +464,7 @@ @Test @SmallTest public void validateUIStrings_UpdateLocalOrSyncAddressProfile_AddressSyncDisabled() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -482,7 +483,7 @@ @Test @SmallTest public void validateUIStrings_UpdateLocalOrSyncAddressProfile_AddressSyncEnabled() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); when(mSyncService.isSyncFeatureEnabled()).thenReturn(true); @@ -505,7 +506,7 @@ @Test @SmallTest public void validateUIStrings_LocalAddressProfile_MigrationToAccount() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), MIGRATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -529,7 +530,7 @@ @Test @SmallTest public void validateUIStrings_SyncAddressProfile_MigrationToAccount() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), MIGRATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); when(mSyncService.isSyncFeatureEnabled()).thenReturn(true); @@ -556,7 +557,7 @@ @Test @SmallTest public void validateUIStrings_AccountAddressProfile_SaveInAccountFlow() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sAccountProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -579,7 +580,7 @@ @Test @SmallTest public void validateUIStrings_AccountAddressProfile_UpdateAccountProfileFlow() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sAccountProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -605,7 +606,7 @@ ChromeFeatureList.AUTOFILL_ENABLE_SUPPORT_FOR_HONORIFIC_PREFIXES}) public void validateDefaultFields_NicknamesDisabled_HonorificDisabled() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -647,7 +648,7 @@ @Test @SmallTest public void validateDefaultFields() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(new ArrayList()); @@ -671,8 +672,8 @@ @SmallTest public void validateShownFields_NewAddressProfile() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); mAddressEditor.showEditorDialog(); validateShownFields(mPropertyModelCapture.getValue(), AutofillProfile.builder().build(), @@ -684,8 +685,8 @@ public void validateShownFields_NewAddressProfile_EligibleForAddressAccountStorage() { when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); mAddressEditor.showEditorDialog(); validateShownFields(mPropertyModelCapture.getValue(), AutofillProfile.builder().build(), @@ -697,7 +698,7 @@ @SmallTest public void validateShownFields_LocalOrSyncAddressProfile_SaveLocally() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -710,7 +711,7 @@ @SmallTest public void validateShownFields_LocalOrSyncAddressProfile_UpdateLocally() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -723,7 +724,7 @@ @SmallTest public void validateShownFields_LocalOrSyncAddressProfile_MigrationToAccount() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), MIGRATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -736,7 +737,7 @@ @SmallTest public void validateShownFields_AccountProfile_SaveInAccountFlow() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sAccountProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -749,7 +750,7 @@ @SmallTest public void validateShownFields_AccountProfile_UpdateAlreadySaved() { setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sAccountProfile), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -761,7 +762,7 @@ @Test @SmallTest public void edit_ChangeCountry_FieldsSetChanges() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, sLocalProfile), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(List.of(new AddressUiComponent(AddressField.SORTING_CODE, @@ -816,8 +817,8 @@ @SmallTest public void edit_NewAddressProfile_EligibleForAddressAccountStorage() { when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); mAddressEditor.showEditorDialog(); @@ -844,7 +845,7 @@ @Test @SmallTest public void edit_AlterAddressProfile_Cancel() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sLocalProfile)), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS); @@ -868,7 +869,7 @@ @Test @SmallTest public void edit_AlterAddressProfile_CommitChanges() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sLocalProfile)), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -898,7 +899,7 @@ @Test @SmallTest public void edit_AlterAddressProfile_CommitChanges_InvisibleFieldsGetReset() { - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sLocalProfile)), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -939,8 +940,8 @@ public void accountSavingDisallowedForUnsupportedCountry() { when(mPersonalDataManager.isEligibleForAddressAccountStorage()).thenReturn(true); when(mPersonalDataManager.isCountryEligibleForAccountStorage(eq("CU"))).thenReturn(false); - mAddressEditor = - new AddressEditor(mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); + mAddressEditor = new AddressEditorCoordinator( + mEditorDialog, mDelegate, mProfile, /*saveToDisk=*/false); setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS, "US"); setUpAddressUiComponents(SUPPORTED_ADDRESS_FIELDS, "CU"); mAddressEditor.showEditorDialog(); @@ -972,7 +973,7 @@ @SmallTest public void countryDropDownExcludesUnsupportedCountries_saveInAccountFlow() { when(mPersonalDataManager.isCountryEligibleForAccountStorage(eq("CU"))).thenReturn(false); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sAccountProfile)), SAVE_NEW_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -996,7 +997,7 @@ @SmallTest public void countryDropDownExcludesUnsupportedCountries_MigrationFlow() { when(mPersonalDataManager.isCountryEligibleForAccountStorage(eq("CU"))).thenReturn(false); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sLocalProfile)), MIGRATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false); @@ -1020,7 +1021,7 @@ @SmallTest public void countryDropDownExcludesUnsupportedCountries_editExistingAccountProfile() { when(mPersonalDataManager.isCountryEligibleForAccountStorage(eq("CU"))).thenReturn(false); - mAddressEditor = new AddressEditor(mEditorDialog, mDelegate, mProfile, + mAddressEditor = new AddressEditorCoordinator(mEditorDialog, mDelegate, mProfile, new AutofillAddress(mActivity, new AutofillProfile(sAccountProfile)), UPDATE_EXISTING_ADDRESS_PROFILE, /*saveToDisk=*/false);
diff --git a/chrome/browser/autofill/test/android/java/src/org/chromium/chrome/browser/autofill/PersonalDataManagerTest.java b/chrome/browser/autofill/test/android/java/src/org/chromium/chrome/browser/autofill/PersonalDataManagerTest.java index 5946d2b9..465d2df 100644 --- a/chrome/browser/autofill/test/android/java/src/org/chromium/chrome/browser/autofill/PersonalDataManagerTest.java +++ b/chrome/browser/autofill/test/android/java/src/org/chromium/chrome/browser/autofill/PersonalDataManagerTest.java
@@ -29,6 +29,7 @@ import org.chromium.base.ContextUtils; import org.chromium.base.test.BaseJUnit4ClassRunner; import org.chromium.base.test.util.Batch; +import org.chromium.base.test.util.DisabledTest; import org.chromium.base.test.util.Feature; import org.chromium.chrome.browser.autofill.PersonalDataManager.AutofillProfile; import org.chromium.chrome.browser.autofill.PersonalDataManager.CreditCard; @@ -528,6 +529,7 @@ @Test @SmallTest + @DisabledTest(message = "https://crbug.com/1454222") @Feature({"Autofill"}) public void testProfilesFrecency() throws TimeoutException { // Create 3 profiles.
diff --git a/chrome/browser/bluetooth/android/java/src/org/chromium/chrome/browser/bluetooth/BluetoothNotificationManager.java b/chrome/browser/bluetooth/android/java/src/org/chromium/chrome/browser/bluetooth/BluetoothNotificationManager.java index ee3b9a1..d780055 100644 --- a/chrome/browser/bluetooth/android/java/src/org/chromium/chrome/browser/bluetooth/BluetoothNotificationManager.java +++ b/chrome/browser/bluetooth/android/java/src/org/chromium/chrome/browser/bluetooth/BluetoothNotificationManager.java
@@ -26,6 +26,7 @@ import org.chromium.components.url_formatter.SchemeDisplay; import org.chromium.components.url_formatter.UrlFormatter; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.WebContents; import org.chromium.url.GURL; @@ -38,8 +39,6 @@ * to a Bluetooth device or scanning for nearby Bluetooth devices. */ public class BluetoothNotificationManager { - private static final String TAG = "BluetoothNotificationManager"; - private static final String NOTIFICATION_NAMESPACE = "BluetoothNotificationManager"; public static final String ACTION_BLUETOOTH_UPDATE = @@ -257,7 +256,7 @@ private static boolean shouldStartService( Context context, @BluetoothType int bluetoothType, int notificationTabId) { - if (!ContentFeatureList.isEnabled( + if (!ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND)) { return false; } @@ -300,7 +299,7 @@ * @param service The bluetooth notification service class. */ public static void clearBluetoothNotifications(Class service) { - if (!ContentFeatureList.isEnabled( + if (!ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND)) { return; }
diff --git a/chrome/browser/browser_resources.grd b/chrome/browser/browser_resources.grd index 7ca8657b..7e6ffd7 100644 --- a/chrome/browser/browser_resources.grd +++ b/chrome/browser/browser_resources.grd
@@ -225,9 +225,6 @@ <include name="IDR_PARENT_ACCESS_REQUEST_APPROVAL_SVG" file="resources\chromeos\parent_access\images\request_approval.svg" type="BINDATA" /> <include name="IDR_PARENT_ACCESS_REQUEST_APPROVAL_DARK_SVG" file="resources\chromeos\parent_access\images\request_approval_dark.svg" type="BINDATA" /> <include name="IDR_SMART_DIM_20190521_EXAMPLE_PREPROCESSOR_CONFIG_PB" file="ash\power\ml\smart_dim\20190521_example_preprocessor_config.pb" type="BINDATA" /> - <include name="IDR_ARC_GRAPHICS_TRACING_HTML" file="resources\chromeos\arc_graphics_tracing\arc_graphics_tracing.html" type="BINDATA"/> - <include name="IDR_ARC_GRAPHICS_TRACING_JS" file="resources\chromeos\arc_graphics_tracing\arc_graphics_tracing.js" type="BINDATA" /> - <include name="IDR_ARC_GRAPHICS_TRACING_UI_JS" file="resources\chromeos\arc_graphics_tracing\arc_graphics_tracing_ui.js" type="BINDATA" /> <include name="IDR_ARC_OVERVIEW_TRACING_HTML" file="resources\chromeos\arc_graphics_tracing\arc_overview_tracing.html" type="BINDATA"/> <include name="IDR_ARC_OVERVIEW_TRACING_JS" file="resources\chromeos\arc_graphics_tracing\arc_overview_tracing.js" type="BINDATA" /> <include name="IDR_ARC_OVERVIEW_TRACING_UI_JS" file="resources\chromeos\arc_graphics_tracing\arc_overview_tracing_ui.js" type="BINDATA" />
diff --git a/chrome/browser/commerce/price_tracking/android/java/src/org/chromium/chrome/browser/price_tracking/PriceTrackingNotificationBridge.java b/chrome/browser/commerce/price_tracking/android/java/src/org/chromium/chrome/browser/price_tracking/PriceTrackingNotificationBridge.java index fb5763f..e8a7942 100644 --- a/chrome/browser/commerce/price_tracking/android/java/src/org/chromium/chrome/browser/price_tracking/PriceTrackingNotificationBridge.java +++ b/chrome/browser/commerce/price_tracking/android/java/src/org/chromium/chrome/browser/price_tracking/PriceTrackingNotificationBridge.java
@@ -40,7 +40,6 @@ */ public class PriceTrackingNotificationBridge { private static final String TAG = "PriceTrackNotif"; - private static final long UNITS_TO_MICROS = 1000000L; private final long mNativePriceTrackingNotificationBridge; private final PriceDropNotifier mNotifier; private final PriceDropNotificationManager mPriceDropNotificationManager;
diff --git a/chrome/browser/companion/core/BUILD.gn b/chrome/browser/companion/core/BUILD.gn index ba32d1ba..dc32e4e 100644 --- a/chrome/browser/companion/core/BUILD.gn +++ b/chrome/browser/companion/core/BUILD.gn
@@ -40,6 +40,8 @@ sources = [ "companion_metrics_logger_unittest.cc", "companion_url_builder_unittest.cc", + "mock_signin_delegate.cc", + "mock_signin_delegate.h", "promo_handler_unittest.cc", "utils_unittest.cc", ]
diff --git a/chrome/browser/companion/core/companion_url_builder_unittest.cc b/chrome/browser/companion/core/companion_url_builder_unittest.cc index af5c2ce..6d26e750 100644 --- a/chrome/browser/companion/core/companion_url_builder_unittest.cc +++ b/chrome/browser/companion/core/companion_url_builder_unittest.cc
@@ -7,9 +7,9 @@ #include "base/base64.h" #include "base/logging.h" #include "chrome/browser/companion/core/constants.h" +#include "chrome/browser/companion/core/mock_signin_delegate.h" #include "chrome/browser/companion/core/promo_handler.h" #include "chrome/browser/companion/core/proto/companion_url_params.pb.h" -#include "chrome/browser/companion/core/signin_delegate.h" #include "components/prefs/pref_registry_simple.h" #include "components/prefs/testing_pref_service.h" #include "components/unified_consent/pref_names.h" @@ -27,16 +27,6 @@ constexpr char kTextQuery[] = "Apples"; constexpr char kOrigin[] = "chrome-untrusted://companion-side-panel.top-chrome"; -class MockSigninDelegate : public SigninDelegate { - public: - MOCK_METHOD0(AllowedSignin, bool()); - MOCK_METHOD0(IsSignedIn, bool()); - MOCK_METHOD0(StartSigninFlow, void()); - MOCK_METHOD1(EnableMsbb, void(bool)); - MOCK_METHOD1(LoadUrlInNewTab, void(const GURL&)); - MOCK_METHOD0(ShouldShowRegionSearchIPH, bool()); -}; - } // namespace class CompanionUrlBuilderTest : public testing::Test {
diff --git a/chrome/browser/companion/core/mock_signin_delegate.cc b/chrome/browser/companion/core/mock_signin_delegate.cc new file mode 100644 index 0000000..12a730d --- /dev/null +++ b/chrome/browser/companion/core/mock_signin_delegate.cc
@@ -0,0 +1,13 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/companion/core/mock_signin_delegate.h" + +namespace companion { + +MockSigninDelegate::MockSigninDelegate() = default; + +MockSigninDelegate::~MockSigninDelegate() = default; + +} // namespace companion
diff --git a/chrome/browser/companion/core/mock_signin_delegate.h b/chrome/browser/companion/core/mock_signin_delegate.h new file mode 100644 index 0000000..bdd0d5a --- /dev/null +++ b/chrome/browser/companion/core/mock_signin_delegate.h
@@ -0,0 +1,33 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_COMPANION_CORE_MOCK_SIGNIN_DELEGATE_H_ +#define CHROME_BROWSER_COMPANION_CORE_MOCK_SIGNIN_DELEGATE_H_ + +#include "chrome/browser/companion/core/signin_delegate.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "url/gurl.h" + +namespace companion { + +class MockSigninDelegate : public SigninDelegate { + public: + MockSigninDelegate(); + ~MockSigninDelegate() override; + + // Disallow copy/assign. + MockSigninDelegate(const MockSigninDelegate&) = delete; + MockSigninDelegate& operator=(const MockSigninDelegate&) = delete; + + MOCK_METHOD0(AllowedSignin, bool()); + MOCK_METHOD0(IsSignedIn, bool()); + MOCK_METHOD0(StartSigninFlow, void()); + MOCK_METHOD1(EnableMsbb, void(bool)); + MOCK_METHOD2(OpenUrlInBrowser, void(const GURL&, bool)); + MOCK_METHOD0(ShouldShowRegionSearchIPH, bool()); +}; + +} // namespace companion + +#endif // CHROME_BROWSER_COMPANION_CORE_MOCK_SIGNIN_DELEGATE_H_
diff --git a/chrome/browser/companion/core/mojom/companion.mojom b/chrome/browser/companion/core/mojom/companion.mojom index 04f68ded..c1ae22b 100644 --- a/chrome/browser/companion/core/mojom/companion.mojom +++ b/chrome/browser/companion/core/mojom/companion.mojom
@@ -39,6 +39,9 @@ // Method corresponding to `CompanionPageHandler.OnCqJumptagClicked`. kOnCqJumptagClicked = 9, + // Method corresponding to `CompanionPageHandler.OpenUrlInBrowser`. + kOpenUrlInBrowser = 10, + // Methods used in browser -> renderer communication. // Method corresponding to `CompanionPage.UpdateCompanionPage`. kUpdateCompanionPage = 31, @@ -143,10 +146,7 @@ // Called to notify the browser about user action of type `promo_action` on a // promo of type `promo_type`. - // `exps_promo_url` is optionally used to pass the URL to be loaded for exps - // opt-in. - OnPromoAction(PromoType promo_type, PromoAction promo_action, - url.mojom.Url? exps_promo_url); + OnPromoAction(PromoType promo_type, PromoAction promo_action); // Called to notify the browser that user has clicked on region search button. OnRegionSearchClicked(); @@ -184,14 +184,16 @@ OnCqCandidatesAvailable(array<string> text_directives); // Called in response to user feedback action on the PH surface. - // `reporting_url` is optionally used to pass the URL to use for reporting - // content. - OnPhFeedback(PhFeedback ph_feedback, url.mojom.Url? reporting_url); + OnPhFeedback(PhFeedback ph_feedback); // Called to notify the browser that the user has clicked on a jumptag. // `text_directive` is the URL fragment for highlighting and scrolling-to. // Ref: https://wicg.github.io/scroll-to-text-fragment/#syntax. OnCqJumptagClicked(string text_directive); + + // Generic method that can be used to open a URL in the browser. Ignored if + // `url_to_open` is invalid. + OpenUrlInBrowser(url.mojom.Url? url_to_open, bool use_new_tab); }; // WebUI page handler for request from Browser side. (C++ -> TypeScript)
diff --git a/chrome/browser/companion/core/promo_handler.cc b/chrome/browser/companion/core/promo_handler.cc index 8d351ad..4a17fe5 100644 --- a/chrome/browser/companion/core/promo_handler.cc +++ b/chrome/browser/companion/core/promo_handler.cc
@@ -30,8 +30,7 @@ } void PromoHandler::OnPromoAction(PromoType promo_type, - PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url) { + PromoAction promo_action) { switch (promo_type) { case PromoType::kSignin: OnSigninPromo(promo_action); @@ -40,7 +39,7 @@ OnMsbbPromo(promo_action); return; case PromoType::kExps: - OnExpsPromo(promo_action, exps_promo_url); + OnExpsPromo(promo_action); return; default: return; @@ -69,16 +68,11 @@ } } -void PromoHandler::OnExpsPromo(PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url) { +void PromoHandler::OnExpsPromo(PromoAction promo_action) { if (promo_action == PromoAction::kShown) { IncrementPref(kExpsPromoShownCountPref); } else if (promo_action == PromoAction::kRejected) { IncrementPref(kExpsPromoDeclinedCountPref); - } else if (promo_action == PromoAction::kAccepted) { - if (exps_promo_url.has_value()) { - signin_delegate_->LoadUrlInNewTab(exps_promo_url.value()); - } } }
diff --git a/chrome/browser/companion/core/promo_handler.h b/chrome/browser/companion/core/promo_handler.h index b4195939..8212d74 100644 --- a/chrome/browser/companion/core/promo_handler.h +++ b/chrome/browser/companion/core/promo_handler.h
@@ -11,7 +11,6 @@ #include "base/functional/callback.h" #include "base/memory/raw_ptr.h" #include "chrome/browser/companion/core/mojom/companion.mojom.h" -#include "url/gurl.h" class PrefRegistrySimple; class PrefService; @@ -39,15 +38,12 @@ // Called in response to the mojo call from renderer. Takes necessary action // to handle the user action on the promo. - void OnPromoAction(PromoType promo_type, - PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url); + void OnPromoAction(PromoType promo_type, PromoAction promo_action); private: void OnSigninPromo(PromoAction promo_action); void OnMsbbPromo(PromoAction promo_action); - void OnExpsPromo(PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url); + void OnExpsPromo(PromoAction promo_action); void IncrementPref(const std::string& pref_name); // Lifetime of the PrefService is bound to profile which outlives the lifetime
diff --git a/chrome/browser/companion/core/promo_handler_unittest.cc b/chrome/browser/companion/core/promo_handler_unittest.cc index 7908a66..bdbcf8d 100644 --- a/chrome/browser/companion/core/promo_handler_unittest.cc +++ b/chrome/browser/companion/core/promo_handler_unittest.cc
@@ -5,28 +5,14 @@ #include "chrome/browser/companion/core/promo_handler.h" #include "chrome/browser/companion/core/constants.h" +#include "chrome/browser/companion/core/mock_signin_delegate.h" #include "chrome/browser/companion/core/mojom/companion.mojom.h" -#include "chrome/browser/companion/core/signin_delegate.h" #include "components/prefs/pref_registry_simple.h" #include "components/prefs/testing_pref_service.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" namespace companion { -namespace { - -class MockSigninDelegate : public SigninDelegate { - public: - MOCK_METHOD0(AllowedSignin, bool()); - MOCK_METHOD0(IsSignedIn, bool()); - MOCK_METHOD0(StartSigninFlow, void()); - MOCK_METHOD1(EnableMsbb, void(bool)); - MOCK_METHOD1(LoadUrlInNewTab, void(const GURL&)); - MOCK_METHOD0(ShouldShowRegionSearchIPH, bool()); -}; - -} // namespace class PromoHandlerTest : public testing::Test { public: @@ -46,41 +32,27 @@ }; TEST_F(PromoHandlerTest, MsbbPromo) { - promo_handler_->OnPromoAction(PromoType::kMsbb, PromoAction::kRejected, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kMsbb, PromoAction::kRejected); EXPECT_EQ(1, pref_service_.GetInteger(kMsbbPromoDeclinedCountPref)); EXPECT_CALL(signin_delegate_, EnableMsbb(true)).Times(1); - promo_handler_->OnPromoAction(PromoType::kMsbb, PromoAction::kAccepted, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kMsbb, PromoAction::kAccepted); } TEST_F(PromoHandlerTest, SigninPromo) { - promo_handler_->OnPromoAction(PromoType::kSignin, PromoAction::kRejected, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kSignin, PromoAction::kRejected); EXPECT_EQ(1, pref_service_.GetInteger(kSigninPromoDeclinedCountPref)); EXPECT_CALL(signin_delegate_, StartSigninFlow()).Times(1); - promo_handler_->OnPromoAction(PromoType::kSignin, PromoAction::kAccepted, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kSignin, PromoAction::kAccepted); } TEST_F(PromoHandlerTest, ExpsPromo) { - promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kShown, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kShown); EXPECT_EQ(1, pref_service_.GetInteger(kExpsPromoShownCountPref)); - promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kRejected, - absl::nullopt); + promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kRejected); EXPECT_EQ(1, pref_service_.GetInteger(kExpsPromoDeclinedCountPref)); - - EXPECT_CALL(signin_delegate_, LoadUrlInNewTab(testing::_)).Times(0); - promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kAccepted, - absl::nullopt); - - EXPECT_CALL(signin_delegate_, LoadUrlInNewTab(testing::_)).Times(1); - promo_handler_->OnPromoAction(PromoType::kExps, PromoAction::kAccepted, - GURL()); } } // namespace companion
diff --git a/chrome/browser/companion/core/signin_delegate.h b/chrome/browser/companion/core/signin_delegate.h index 8b80a509..f277608 100644 --- a/chrome/browser/companion/core/signin_delegate.h +++ b/chrome/browser/companion/core/signin_delegate.h
@@ -32,8 +32,8 @@ // Enable the setting for make searches and browsing better. virtual void EnableMsbb(bool enable_msbb) = 0; - // Loads URL in the browser in a new tab. - virtual void LoadUrlInNewTab(const GURL& url) = 0; + // Opens URL in the browser. + virtual void OpenUrlInBrowser(const GURL& url, bool use_new_tab) = 0; // Returns whether region search IPH should be shown. virtual bool ShouldShowRegionSearchIPH() = 0;
diff --git a/chrome/browser/content_creation/notes/internal/android/java/src/org/chromium/chrome/browser/content_creation/notes/NoteCreationMetrics.java b/chrome/browser/content_creation/notes/internal/android/java/src/org/chromium/chrome/browser/content_creation/notes/NoteCreationMetrics.java index 90105a54..b90fd72 100644 --- a/chrome/browser/content_creation/notes/internal/android/java/src/org/chromium/chrome/browser/content_creation/notes/NoteCreationMetrics.java +++ b/chrome/browser/content_creation/notes/internal/android/java/src/org/chromium/chrome/browser/content_creation/notes/NoteCreationMetrics.java
@@ -57,7 +57,6 @@ } // Max expected number of dynamically loaded templates. - private static final int MAX_NUMBER_OF_TEMPLATES = 50; /** * Records metrics related to the user starting the creation flow.
diff --git a/chrome/browser/content_settings/content_settings_origin_identifier_value_map_unittest.cc b/chrome/browser/content_settings/content_settings_origin_identifier_value_map_unittest.cc index 40da6f7..279cf33 100644 --- a/chrome/browser/content_settings/content_settings_origin_identifier_value_map_unittest.cc +++ b/chrome/browser/content_settings/content_settings_origin_identifier_value_map_unittest.cc
@@ -207,8 +207,7 @@ map.SetValue(sub_pattern, ContentSettingsPattern::Wildcard(), ContentSettingsType::COOKIES, base::Value(2), {.last_modified = t1, - .expiration = content_settings::GetConstraintExpiration( - base::Seconds(100)), + .expiration = base::Time::Now() + base::Seconds(100), .session_model = content_settings::SessionModel::UserSession}); map.GetLock().Release();
diff --git a/chrome/browser/content_settings/content_settings_pref_provider_unittest.cc b/chrome/browser/content_settings/content_settings_pref_provider_unittest.cc index b868d9d..047eeae 100644 --- a/chrome/browser/content_settings/content_settings_pref_provider_unittest.cc +++ b/chrome/browser/content_settings/content_settings_pref_provider_unittest.cc
@@ -27,6 +27,7 @@ #include "components/content_settings/core/browser/website_settings_info.h" #include "components/content_settings/core/browser/website_settings_registry.h" #include "components/content_settings/core/common/content_settings.h" +#include "components/content_settings/core/common/content_settings_constraints.h" #include "components/content_settings/core/common/content_settings_metadata.h" #include "components/content_settings/core/common/content_settings_pattern.h" #include "components/content_settings/core/common/content_settings_types.h" @@ -425,23 +426,35 @@ /*store_last_modified=*/true, /*restore_session=*/false); - normal_provider.SetWebsiteSetting(pattern_1, wildcard, - ContentSettingsType::COOKIES, - base::Value(CONTENT_SETTING_ALLOW), {}); - normal_provider.SetWebsiteSetting(pattern_3, pattern_3, - ContentSettingsType::COOKIES, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::UserSession}); - // Durable and not expired - normal_provider.SetWebsiteSetting( - pattern_4, pattern_4, ContentSettingsType::COOKIES, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time::Now() + base::Days(1), SessionModel::Durable}); - // Durable but expired - normal_provider.SetWebsiteSetting( - pattern_5, pattern_5, ContentSettingsType::COOKIES, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time::Now() - base::Days(1), SessionModel::Durable}); + { + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::UserSession); + + normal_provider.SetWebsiteSetting(pattern_1, wildcard, + ContentSettingsType::COOKIES, + base::Value(CONTENT_SETTING_ALLOW), {}); + normal_provider.SetWebsiteSetting( + pattern_3, pattern_3, ContentSettingsType::COOKIES, + base::Value(CONTENT_SETTING_BLOCK), constraints); + } + { + // Durable and not expired + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Days(1)); + constraints.set_session_model(SessionModel::Durable); + normal_provider.SetWebsiteSetting( + pattern_4, pattern_4, ContentSettingsType::COOKIES, + base::Value(CONTENT_SETTING_BLOCK), constraints); + } + { + // Durable but expired + ContentSettingConstraints constraints(base::Time::Now() - base::Days(2)); + constraints.set_lifetime(base::Days(1)); + constraints.set_session_model(SessionModel::Durable); + normal_provider.SetWebsiteSetting( + pattern_5, pattern_5, ContentSettingsType::COOKIES, + base::Value(CONTENT_SETTING_BLOCK), constraints); + } // Non-OTR provider, Non-OTR iterator has one setting (pattern 1) using // default params and one scoped to a UserSession lifetime model. { @@ -654,10 +667,12 @@ TestUtils::GetContentSetting(&provider, primary_url, primary_url, ContentSettingsType::STORAGE_ACCESS, false)); + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::UserSession); + provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::UserSession}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -672,7 +687,8 @@ // back. provider.ShutdownOnUIThread(); - PrefProvider provider2(testing_profile.GetPrefs(), /*off_the_record=*/false, + PrefProvider provider2(testing_profile.GetPrefs(), + /*off_the_record=*/false, /*store_last_modified=*/true, /*restore_session=*/false); EXPECT_EQ( @@ -702,10 +718,12 @@ TestUtils::GetContentSetting(&provider, primary_url, primary_url, ContentSettingsType::STORAGE_ACCESS, false)); + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::UserSession); + provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::UserSession}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -748,10 +766,12 @@ TestUtils::GetContentSetting(&provider, primary_url, primary_url, ContentSettingsType::STORAGE_ACCESS, false)); - provider.SetWebsiteSetting( - primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::NonRestorableUserSession}); + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::NonRestorableUserSession); + + provider.SetWebsiteSetting(primary_pattern, primary_pattern, + ContentSettingsType::STORAGE_ACCESS, + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -787,12 +807,13 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(123)); + constraints.set_session_model(SessionModel::Durable); - provider.SetWebsiteSetting( - primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {content_settings::GetConstraintExpiration(base::Seconds(123)), - SessionModel::Durable}); + provider.SetWebsiteSetting(primary_pattern, primary_pattern, + ContentSettingsType::STORAGE_ACCESS, + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -827,12 +848,13 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(123)); + constraints.set_session_model(SessionModel::Durable); - provider.SetWebsiteSetting( - primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {content_settings::GetConstraintExpiration(base::Seconds(123)), - SessionModel::Durable}); + provider.SetWebsiteSetting(primary_pattern, primary_pattern, + ContentSettingsType::STORAGE_ACCESS, + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -878,12 +900,13 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(123)); + constraints.set_session_model(SessionModel::Durable); - provider.SetWebsiteSetting( - primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {content_settings::GetConstraintExpiration(base::Seconds(123)), - SessionModel::Durable}); + provider.SetWebsiteSetting(primary_pattern, primary_pattern, + ContentSettingsType::STORAGE_ACCESS, + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -924,21 +947,22 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::UserSession); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::UserSession}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, ContentSettingsType::STORAGE_ACCESS, false)); // Update to Durable and expect that the setting is still there. + constraints.set_session_model(SessionModel::Durable); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::Durable}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -968,21 +992,22 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_session_model(SessionModel::Durable); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::Durable}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, ContentSettingsType::STORAGE_ACCESS, false)); // Update to Durable and expect that the setting is still there. + constraints.set_session_model(SessionModel::UserSession); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::STORAGE_ACCESS, - base::Value(CONTENT_SETTING_BLOCK), - {base::Time(), SessionModel::UserSession}); + base::Value(CONTENT_SETTING_BLOCK), constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -1014,17 +1039,19 @@ ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_track_last_visit_for_autoexpiration(false); + // Set one setting with track_last_visit_for_autoexpiration enabled and one // disabled. provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::MEDIASTREAM_CAMERA, - base::Value(CONTENT_SETTING_ALLOW), - {.track_last_visit_for_autoexpiration = false}); + base::Value(CONTENT_SETTING_ALLOW), constraints); + constraints.set_track_last_visit_for_autoexpiration(true); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::GEOLOCATION, - base::Value(CONTENT_SETTING_ALLOW), - {.track_last_visit_for_autoexpiration = true}); + base::Value(CONTENT_SETTING_ALLOW), constraints); RuleMetaData metadata; EXPECT_EQ(CONTENT_SETTING_ALLOW, TestUtils::GetContentSetting( @@ -1051,11 +1078,12 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_track_last_visit_for_autoexpiration(true); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::GEOLOCATION, - base::Value(CONTENT_SETTING_ALLOW), - {.track_last_visit_for_autoexpiration = true}); + base::Value(CONTENT_SETTING_ALLOW), constraints); RuleMetaData metadata; EXPECT_EQ(CONTENT_SETTING_ALLOW, TestUtils::GetContentSetting(&provider, primary_url, primary_url, @@ -1091,11 +1119,12 @@ GURL primary_url("http://example.com/"); ContentSettingsPattern primary_pattern = ContentSettingsPattern::FromString("[*.]example.com"); + ContentSettingConstraints constraints; + constraints.set_track_last_visit_for_autoexpiration(true); provider.SetWebsiteSetting(primary_pattern, primary_pattern, ContentSettingsType::GEOLOCATION, - base::Value(CONTENT_SETTING_ALLOW), - {.track_last_visit_for_autoexpiration = true}); + base::Value(CONTENT_SETTING_ALLOW), constraints); RuleMetaData metadata; EXPECT_EQ(CONTENT_SETTING_ALLOW, TestUtils::GetContentSetting(&provider, primary_url, primary_url,
diff --git a/chrome/browser/content_settings/host_content_settings_map_unittest.cc b/chrome/browser/content_settings/host_content_settings_map_unittest.cc index 4381529..4c5f3720 100644 --- a/chrome/browser/content_settings/host_content_settings_map_unittest.cc +++ b/chrome/browser/content_settings/host_content_settings_map_unittest.cc
@@ -1934,9 +1934,11 @@ persistent_type, CONTENT_SETTING_BLOCK); // Set a Session only permission for our second url and we expect it should // co-exist with the other permission just fine. - map->SetContentSettingDefaultScope( - example_url2, example_url2, persistent_type, CONTENT_SETTING_ALLOW, - {base::Time(), content_settings::SessionModel::UserSession}); + content_settings::ContentSettingConstraints constraints; + constraints.set_session_model(content_settings::SessionModel::UserSession); + map->SetContentSettingDefaultScope(example_url2, example_url2, + persistent_type, CONTENT_SETTING_ALLOW, + constraints); EXPECT_EQ( CONTENT_SETTING_BLOCK, @@ -1987,12 +1989,15 @@ map->GetContentSetting(example_url2, example_url2, persistent_type)); // Set permissions in two different scopes. - map->SetContentSettingDefaultScope( - example_url1, example_url1, persistent_type, CONTENT_SETTING_BLOCK, - {base::Time(), content_settings::SessionModel::Durable}); - map->SetContentSettingDefaultScope( - example_url2, example_url2, persistent_type, CONTENT_SETTING_ALLOW, - {base::Time(), content_settings::SessionModel::UserSession}); + content_settings::ContentSettingConstraints constraints; + constraints.set_session_model(content_settings::SessionModel::Durable); + map->SetContentSettingDefaultScope(example_url1, example_url1, + persistent_type, CONTENT_SETTING_BLOCK, + constraints); + constraints.set_session_model(content_settings::SessionModel::UserSession); + map->SetContentSettingDefaultScope(example_url2, example_url2, + persistent_type, CONTENT_SETTING_ALLOW, + constraints); // Validate that if we retrieve all our settings we should see both settings // and the default values returned. @@ -2059,17 +2064,20 @@ // Set permissions with our first two urls with different expiry times and our // third with no expiration. - map->SetContentSettingDefaultScope( - example_url1, example_url1, persistent_type, CONTENT_SETTING_BLOCK, - {content_settings::GetConstraintExpiration(base::Seconds(100)), - content_settings::SessionModel::UserSession}); - map->SetContentSettingDefaultScope( - example_url2, example_url2, persistent_type, CONTENT_SETTING_ALLOW, - {content_settings::GetConstraintExpiration(base::Seconds(200)), - content_settings::SessionModel::UserSession}); - map->SetContentSettingDefaultScope( - example_url3, example_url3, persistent_type, CONTENT_SETTING_ALLOW, - {base::Time(), content_settings::SessionModel::UserSession}); + content_settings::ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(100)); + constraints.set_session_model(content_settings::SessionModel::UserSession); + map->SetContentSettingDefaultScope(example_url1, example_url1, + persistent_type, CONTENT_SETTING_BLOCK, + constraints); + constraints.set_lifetime(base::Seconds(200)); + map->SetContentSettingDefaultScope(example_url2, example_url2, + persistent_type, CONTENT_SETTING_ALLOW, + constraints); + constraints.set_lifetime(base::TimeDelta()); + map->SetContentSettingDefaultScope(example_url3, example_url3, + persistent_type, CONTENT_SETTING_ALLOW, + constraints); // Validate that we can retrieve all our settings and none of them are // expired.
diff --git a/chrome/browser/content_settings/one_time_permission_provider.cc b/chrome/browser/content_settings/one_time_permission_provider.cc index 9434bc6..2cc5129 100644 --- a/chrome/browser/content_settings/one_time_permission_provider.cc +++ b/chrome/browser/content_settings/one_time_permission_provider.cc
@@ -52,7 +52,7 @@ // This block handles transitions from Allow Once to Ask/Block by clearing // the one time grant and letting the pref provider handle the permission as // usual. - if (constraints.session_model != content_settings::SessionModel::OneTime) { + if (constraints.session_model() != content_settings::SessionModel::OneTime) { value_map_.DeleteValue(primary_pattern, secondary_pattern, content_settings_type);
diff --git a/chrome/browser/content_settings/one_time_permission_provider_unittest.cc b/chrome/browser/content_settings/one_time_permission_provider_unittest.cc index e956fe4..2c977fc 100644 --- a/chrome/browser/content_settings/one_time_permission_provider_unittest.cc +++ b/chrome/browser/content_settings/one_time_permission_provider_unittest.cc
@@ -47,8 +47,9 @@ protected: content_settings::ContentSettingConstraints one_time_constraints() { - return content_settings::ContentSettingConstraints{ - .session_model = content_settings::SessionModel::OneTime}; + content_settings::ContentSettingConstraints constraints; + constraints.set_session_model(content_settings::SessionModel::OneTime); + return constraints; } GURL primary_url = GURL("http://example.com/");
diff --git a/chrome/browser/creator/android/java/src/org/chromium/chrome/browser/creator/CreatorTabSheetContent.java b/chrome/browser/creator/android/java/src/org/chromium/chrome/browser/creator/CreatorTabSheetContent.java index b0866c8..8ebdec8 100644 --- a/chrome/browser/creator/android/java/src/org/chromium/chrome/browser/creator/CreatorTabSheetContent.java +++ b/chrome/browser/creator/android/java/src/org/chromium/chrome/browser/creator/CreatorTabSheetContent.java
@@ -50,8 +50,6 @@ */ private static final int BASE_ANIMATION_DURATION_MS = 218; - private static final float PEEK_TOOLBAR_HEIGHT_MULTIPLE = 2.f; - /** Ratio of the height when in full mode. Used in half-open variation. */ private static final float FULL_HEIGHT_RATIO = 0.9f;
diff --git a/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridge.java b/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridge.java index 7b9120a..c25bbe2 100644 --- a/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridge.java +++ b/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridge.java
@@ -30,7 +30,6 @@ * Glues download dialogs UI code and handles the communication to download native backend. */ public class DownloadDialogBridge implements DownloadLocationDialogController { - private static final long INVALID_START_TIME = -1; private long mNativeDownloadDialogBridge; private final DownloadLocationDialogCoordinator mLocationDialog;
diff --git a/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridgeUnitTest.java b/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridgeUnitTest.java index 7bcd14c..1d8dcdb4 100644 --- a/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridgeUnitTest.java +++ b/chrome/browser/download/android/java/src/org/chromium/chrome/browser/download/DownloadDialogBridgeUnitTest.java
@@ -43,7 +43,6 @@ @Config(manifest = Config.NONE) public class DownloadDialogBridgeUnitTest { private static final int FAKE_NATIVE_HOLDER = 1; - private static final long INVALID_START_TIME = -1; private static final long START_TIME = 1000; private static final long TOTAL_BYTES = 100; private static final @ConnectionType int CONNECTION_TYPE = ConnectionType.CONNECTION_3G;
diff --git a/chrome/browser/download/internal/android/java/src/org/chromium/chrome/browser/download/home/filter/FilterChipsProvider.java b/chrome/browser/download/internal/android/java/src/org/chromium/chrome/browser/download/home/filter/FilterChipsProvider.java index cc435ed..4b16fc3 100644 --- a/chrome/browser/download/internal/android/java/src/org/chromium/chrome/browser/download/home/filter/FilterChipsProvider.java +++ b/chrome/browser/download/internal/android/java/src/org/chromium/chrome/browser/download/home/filter/FilterChipsProvider.java
@@ -27,8 +27,6 @@ * downloads. */ public class FilterChipsProvider implements OfflineItemFilterObserver { - private static final int INVALID_INDEX = -1; - /** A delegate responsible for handling UI actions like selecting filters. */ public interface Delegate { /** Called when the selected filter has changed. */ @@ -191,4 +189,4 @@ setFilterSelected(id); mDelegate.onFilterSelected(id); } -} \ No newline at end of file +}
diff --git a/chrome/browser/extensions/api/braille_display_private/braille_display_private_api.cc b/chrome/browser/extensions/api/braille_display_private/braille_display_private_api.cc index fcb04e7..e80a099 100644 --- a/chrome/browser/extensions/api/braille_display_private/braille_display_private_api.cc +++ b/chrome/browser/extensions/api/braille_display_private/braille_display_private_api.cc
@@ -160,12 +160,10 @@ } BrailleDisplayPrivateWriteDotsFunction:: -BrailleDisplayPrivateWriteDotsFunction() { -} + BrailleDisplayPrivateWriteDotsFunction() = default; BrailleDisplayPrivateWriteDotsFunction:: -~BrailleDisplayPrivateWriteDotsFunction() { -} + ~BrailleDisplayPrivateWriteDotsFunction() = default; ExtensionFunction::ResponseAction BrailleDisplayPrivateWriteDotsFunction::Run() {
diff --git a/chrome/browser/extensions/api/settings_private/prefs_util.cc b/chrome/browser/extensions/api/settings_private/prefs_util.cc index 0f17572..4cac5974 100644 --- a/chrome/browser/extensions/api/settings_private/prefs_util.cc +++ b/chrome/browser/extensions/api/settings_private/prefs_util.cc
@@ -551,14 +551,6 @@ settings_api::PrefType::PREF_TYPE_NUMBER; (*s_allowlist)[ash::prefs::kAccessibilityColorFiltering] = settings_api::PrefType::PREF_TYPE_BOOLEAN; - (*s_allowlist)[ash::prefs::kAccessibilityGreyscaleAmount] = - settings_api::PrefType::PREF_TYPE_NUMBER; - (*s_allowlist)[ash::prefs::kAccessibilitySaturationAmount] = - settings_api::PrefType::PREF_TYPE_NUMBER; - (*s_allowlist)[ash::prefs::kAccessibilitySepiaAmount] = - settings_api::PrefType::PREF_TYPE_NUMBER; - (*s_allowlist)[ash::prefs::kAccessibilityHueRotationAmount] = - settings_api::PrefType::PREF_TYPE_NUMBER; (*s_allowlist)[ash::prefs::kAccessibilityColorVisionCorrectionAmount] = settings_api::PrefType::PREF_TYPE_NUMBER; (*s_allowlist)[ash::prefs::kAccessibilityColorVisionDeficiencyType] =
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedFeatures.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedFeatures.java index 939120a..8df8dd45 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedFeatures.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedFeatures.java
@@ -25,8 +25,6 @@ * Helper methods covering more complex Feed related feature checks and states. */ public final class FeedFeatures { - private static final String TAG = "FeedFeatures"; - // Finch param constants for controlling the feed tab stickiness logic to use. private static final String FEED_TAB_STICKYNESS_LOGIC_PARAM = "feed_tab_stickiness_logic"; private static final String RESET_UPON_CHROME_RESTART = "reset_upon_chrome_restart";
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedSliceViewTracker.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedSliceViewTracker.java index b91c934..ed4a886 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedSliceViewTracker.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/FeedSliceViewTracker.java
@@ -28,7 +28,6 @@ * the observer is notified. */ public class FeedSliceViewTracker implements ViewTreeObserver.OnPreDrawListener { - private static final String TAG = "FeedSliceViewTracker"; private static final float DEFAULT_VIEW_LOG_THRESHOLD = .66f; private static final float GOOD_VISITS_EXPOSURE_THRESHOLD = 0.5f; private static final float GOOD_VISITS_COVERAGE_THRESHOLD = 0.25f;
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/SingleWebFeedStreamTest.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/SingleWebFeedStreamTest.java index cb46f14..3f857be8 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/SingleWebFeedStreamTest.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/SingleWebFeedStreamTest.java
@@ -81,9 +81,7 @@ public class SingleWebFeedStreamTest { private static final int LOAD_MORE_TRIGGER_LOOKAHEAD = 5; private static final int LOAD_MORE_TRIGGER_SCROLL_DISTANCE_DP = 100; - private static final String TEST_DATA = "test"; private static final String TEST_URL = JUnitTestGURLs.EXAMPLE_URL; - private static final String HEADER_PREFIX = "header"; private static final OpenUrlOptions DEFAULT_OPEN_URL_OPTIONS = new OpenUrlOptions() {}; private Activity mActivity;
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementCoordinator.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementCoordinator.java index 9f6a0faa..b0c386b 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementCoordinator.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementCoordinator.java
@@ -25,7 +25,6 @@ * https://chromium.googlesource.com/chromium/src/+/HEAD/docs/ui/android/mvc_simple_list_tutorial.md */ public class FollowManagementCoordinator { - private static final String TAG = "FollowMMCoordinator"; private FollowManagementMediator mMediator; private AppCompatActivity mActivity; private final View mView;
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementItemViewBinder.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementItemViewBinder.java index 474e582..c7fad33 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementItemViewBinder.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/followmanagement/FollowManagementItemViewBinder.java
@@ -8,7 +8,6 @@ import org.chromium.ui.modelutil.PropertyModel; class FollowManagementItemViewBinder { - private static final String TAG = "FMItemViewBinder"; public static void bind( PropertyModel model, FollowManagementItemView view, PropertyKey propertyKey) { if (FollowManagementItemProperties.TITLE_KEY == propertyKey) {
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderView.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderView.java index 2952b2d..a2502a3 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderView.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderView.java
@@ -48,8 +48,6 @@ * This view can be inflated from one of two layouts, hence many @Nullables. */ public class SectionHeaderView extends LinearLayout { - private static final String TAG = "SectionHeaderView"; - /** OnTabSelectedListener that delegates calls to the SectionHeadSelectedListener. */ private class SectionHeaderTabListener implements TabLayout.OnTabSelectedListener { private @Nullable OnSectionHeaderSelectedListener mListener; @@ -132,7 +130,6 @@ private @Px int mToolbarHeight; private @Px int mTouchSize; // Action ID for accessibility. - private int mActionId = -1; public SectionHeaderView(Context context, @Nullable AttributeSet attrs) { super(context, attrs);
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderViewTest.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderViewTest.java index e0d99dc..e43dda7 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderViewTest.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/sections/SectionHeaderViewTest.java
@@ -37,7 +37,6 @@ /** Test for the WebFeedFollowIntroView class. */ @RunWith(BaseRobolectricTestRunner.class) public final class SectionHeaderViewTest { - private static final String TAG = "SectionHeaderViewTst"; private SectionHeaderView mSectionHeaderView; private Activity mActivity;
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedFollowIntroView.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedFollowIntroView.java index bb7bd12..020f710 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedFollowIntroView.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedFollowIntroView.java
@@ -14,7 +14,6 @@ import org.chromium.chrome.browser.feed.R; import org.chromium.chrome.browser.flags.ChromeFeatureList; -import org.chromium.chrome.browser.profiles.Profile; import org.chromium.chrome.browser.ui.appmenu.AppMenuHandler; import org.chromium.chrome.browser.user_education.IPHCommandBuilder; import org.chromium.chrome.browser.user_education.UserEducationHelper; @@ -23,8 +22,6 @@ import org.chromium.components.browser_ui.widget.textbubble.TextBubble; import org.chromium.components.feature_engagement.FeatureConstants; import org.chromium.components.feature_engagement.Tracker; -import org.chromium.components.prefs.PrefService; -import org.chromium.components.user_prefs.UserPrefs; import org.chromium.ui.widget.LoadingView; import org.chromium.ui.widget.ViewRectProvider; @@ -35,15 +32,12 @@ * they can follow. */ class WebFeedFollowIntroView { - private static final String TAG = "WFFollowIntroView"; - private static final int DEFAULT_SHOW_TIMEOUT_MILLIS = 8 * 1000; private static final String PARAM_SHOW_TIMEOUT_MILLIS = "intro-show-timeout-millis"; private final Activity mActivity; private final AppMenuHandler mAppMenuHandler; private final Handler mHandler = new Handler(); - private final PrefService mPrefService = UserPrefs.get(Profile.getLastUsedRegularProfile()); private final View mMenuButtonAnchorView; @Nullable private final Tracker mFeatureEngagementTracker;
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedMainMenuItemTest.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedMainMenuItemTest.java index 4ff77b9..69bbf20 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedMainMenuItemTest.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedMainMenuItemTest.java
@@ -75,7 +75,6 @@ @SmallTest public final class WebFeedMainMenuItemTest { private static final GURL TEST_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.EXAMPLE_URL); - private static final GURL FAVICON_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.RED_1); @Rule public JniMocker mJniMocker = new JniMocker();
diff --git a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedRecommendationFollowAcceleratorController.java b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedRecommendationFollowAcceleratorController.java index 0de6856..aadd3eb 100644 --- a/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedRecommendationFollowAcceleratorController.java +++ b/chrome/browser/feed/android/java/src/org/chromium/chrome/browser/feed/webfeed/WebFeedRecommendationFollowAcceleratorController.java
@@ -34,8 +34,6 @@ * recommendation (a recommendation card within the feed). */ public class WebFeedRecommendationFollowAcceleratorController { - private static final String TAG = "WebFeedAccCon"; - /** We use UserData to put the web feed name into the tab and the NavigationHandle. */ @VisibleForTesting private static class AssociatedWebFeedData implements UserData {
diff --git a/chrome/browser/feedback/android/java/src/org/chromium/chrome/browser/feedback/InterestFeedFeedbackSource.java b/chrome/browser/feedback/android/java/src/org/chromium/chrome/browser/feedback/InterestFeedFeedbackSource.java index 886cb51..87f790c 100644 --- a/chrome/browser/feedback/android/java/src/org/chromium/chrome/browser/feedback/InterestFeedFeedbackSource.java +++ b/chrome/browser/feedback/android/java/src/org/chromium/chrome/browser/feedback/InterestFeedFeedbackSource.java
@@ -15,7 +15,6 @@ public class InterestFeedFeedbackSource implements FeedbackSource { private static final String KEY = "Interest Feed"; private static final String ENABLED_VALUE = "Enabled"; - private static final String DISABLED_VALUE = "Disabled"; private final HashMap<String, String> mMap;
diff --git a/chrome/browser/flag-metadata.json b/chrome/browser/flag-metadata.json index 5d08c14..1064c8a 100644 --- a/chrome/browser/flag-metadata.json +++ b/chrome/browser/flag-metadata.json
@@ -6000,6 +6000,11 @@ "expiry_milestone": 99 }, { + "name": "os-settings-deprecate-sync-metrics-toggle", + "owners": ["xiaohuic"], + "expiry_milestone": 120 + }, + { "name": "os-settings-revamp-wayfinding", "owners": [ "wesokuhara", "cros-settings@google.com" ], "expiry_milestone": 128 @@ -6271,6 +6276,11 @@ "expiry_milestone": 120 }, { + "name": "post-restore-default-browser-promo", + "owners": [ "hiramahmood@google.com", "bling-get-set-up@google.com" ], + "expiry_milestone": 120 + }, + { "name": "power-bookmark-backend", "owners": [ "wylieb", "skym" ], "expiry_milestone": 118
diff --git a/chrome/browser/flag_descriptions.cc b/chrome/browser/flag_descriptions.cc index f728a35..1ebe001 100644 --- a/chrome/browser/flag_descriptions.cc +++ b/chrome/browser/flag_descriptions.cc
@@ -4,6 +4,7 @@ #include "chrome/browser/flag_descriptions.h" +#include "ash/constants/ash_features.h" #include "build/build_config.h" #include "build/chromeos_buildflags.h" #include "components/supervised_user/core/common/buildflags.h" @@ -6246,6 +6247,12 @@ "Enables app badging toggle to be displayed in app notification page in" "ChromeOS Settings."; +const char kOsSettingsDeprecateSyncMetricsToggleName[] = + "ChromeOS Settings Deprecate Sync Metrics Toggle"; +const char kOsSettingsDeprecateSyncMetricsToggleDescription[] = + "If enabled, deprecate the metrics in sync settings page in " + "ChromeOS Settings."; + const char kOsSettingsRevampWayfindingName[] = "ChromeOS Settings Revamp: Wayfinding Improvements"; const char kOsSettingsRevampWayfindingDescription[] =
diff --git a/chrome/browser/flag_descriptions.h b/chrome/browser/flag_descriptions.h index 6c10d58..b51f026c 100644 --- a/chrome/browser/flag_descriptions.h +++ b/chrome/browser/flag_descriptions.h
@@ -1399,6 +1399,9 @@ extern const char kOsSettingsAppNotificationsPageName[]; extern const char kOsSettingsAppNotificationsPageDescription[]; +extern const char kOsSettingsDeprecateSyncMetricsToggleName[]; +extern const char kOsSettingsDeprecateSyncMetricsToggleDescription[]; + extern const char kOverviewButtonName[]; extern const char kOverviewButtonDescription[];
diff --git a/chrome/browser/installable/installable_manager_browsertest.cc b/chrome/browser/installable/installable_manager_browsertest.cc index 9cd9295..216811f 100644 --- a/chrome/browser/installable/installable_manager_browsertest.cc +++ b/chrome/browser/installable/installable_manager_browsertest.cc
@@ -2,6 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include "base/memory/scoped_refptr.h" +#include "base/task/sequenced_task_runner.h" #include "components/webapps/browser/features.h" #include "components/webapps/browser/installable/installable_data.h" #include "components/webapps/browser/installable/installable_manager.h" @@ -16,9 +18,9 @@ #include "base/memory/raw_ptr.h" #include "base/run_loop.h" #include "base/strings/utf_string_conversions.h" -#include "base/task/sequenced_task_runner.h" #include "base/test/bind.h" #include "base/test/metrics/histogram_tester.h" +#include "base/test/test_simple_task_runner.h" #include "chrome/browser/banners/app_banner_manager_desktop.h" #include "chrome/browser/profiles/profile.h" #include "chrome/browser/ui/browser.h" @@ -144,8 +146,10 @@ class CallbackTester { public: - explicit CallbackTester(base::RepeatingClosure quit_closure) - : quit_closure_(quit_closure) {} + CallbackTester(base::RepeatingClosure quit_closure, + scoped_refptr<base::SequencedTaskRunner> test_task_runner = + base::SequencedTaskRunner::GetCurrentDefault()) + : quit_closure_(quit_closure), test_task_runner_(test_task_runner) {} void OnDidFinishInstallableCheck(const InstallableData& data) { errors_ = data.errors; @@ -162,8 +166,7 @@ valid_manifest_ = data.valid_manifest; worker_check_passed_ = data.worker_check_passed; screenshots_ = *data.screenshots; - base::SequencedTaskRunner::GetCurrentDefault()->PostTask(FROM_HERE, - quit_closure_); + test_task_runner_->PostTask(FROM_HERE, quit_closure_); } const std::vector<InstallableStatusCode>& errors() const { return errors_; } @@ -183,7 +186,6 @@ bool worker_check_passed() const { return worker_check_passed_; } private: - base::RepeatingClosure quit_closure_; std::vector<InstallableStatusCode> errors_; GURL manifest_url_; blink::mojom::ManifestPtr manifest_ = blink::mojom::Manifest::New(); @@ -196,6 +198,8 @@ bool has_maskable_splash_icon_; bool valid_manifest_; bool worker_check_passed_; + base::RepeatingClosure quit_closure_; + scoped_refptr<base::SequencedTaskRunner> test_task_runner_; }; class NestedCallbackTester { @@ -2051,18 +2055,28 @@ IN_PROC_BROWSER_TEST_F(InstallableManagerBrowserTest, ManifestLinkChangeReportsError) { InstallableManager* manager = GetManager(browser()); + scoped_refptr<base::TestSimpleTaskRunner> test_task_runner = + base::MakeRefCounted<base::TestSimpleTaskRunner>(); + manager->SetSequencedTaskRunnerForTesting(test_task_runner); base::RunLoop run_loop; std::unique_ptr<CallbackTester> tester( - new CallbackTester(run_loop.QuitClosure())); + new CallbackTester(run_loop.QuitClosure(), test_task_runner)); NavigateAndRunInstallableManager(browser(), tester.get(), GetManifestParams(), "/banners/manifest_test_page.html"); + // Simulate a manifest URL update by just calling the observer function. content::WebContents* web_contents = browser()->tab_strip_model()->GetActiveWebContents(); static_cast<content::WebContentsObserver*>(manager)->DidUpdateWebManifestURL( web_contents->GetPrimaryMainFrame(), GURL()); + + // This will run all tasks currently pending on the task runner. This includes + // any changes that could have been caused by calling DidUpdateWebManifestURL, + // which should synchronously modify the data to be passed to the tester + // callback. + test_task_runner->RunPendingTasks(); run_loop.Run(); ASSERT_EQ(tester->errors().size(), 1u);
diff --git a/chrome/browser/language/android/java/src/org/chromium/chrome/browser/language/LanguageAskPrompt.java b/chrome/browser/language/android/java/src/org/chromium/chrome/browser/language/LanguageAskPrompt.java index 80cf2d7..e0f74f2 100644 --- a/chrome/browser/language/android/java/src/org/chromium/chrome/browser/language/LanguageAskPrompt.java +++ b/chrome/browser/language/android/java/src/org/chromium/chrome/browser/language/LanguageAskPrompt.java
@@ -45,10 +45,7 @@ */ public class LanguageAskPrompt implements ModalDialogProperties.Controller { // Enum values for the Translate.ExplicitLanguageAsk.Event histogram. - private static final int PROMPT_EVENT_SHOWN = 0; - private static final int PROMPT_EVENT_SAVED = 1; private static final int PROMPT_EVENT_CANCELLED = 2; - private static final int PROMPT_EVENT_MAX = PROMPT_EVENT_CANCELLED; private class SeparatorViewHolder extends ViewHolder { SeparatorViewHolder(View view) {
diff --git a/chrome/browser/media/media_engagement_score.cc b/chrome/browser/media/media_engagement_score.cc index f44a76b..7e097eb 100644 --- a/chrome/browser/media/media_engagement_score.cc +++ b/chrome/browser/media/media_engagement_score.cc
@@ -154,8 +154,8 @@ if (!UpdateScoreDict(force_update)) return; - content_settings::ContentSettingConstraints constraints = { - base::Time::Now() + kScoreExpirationDuration}; + content_settings::ContentSettingConstraints constraints; + constraints.set_lifetime(kScoreExpirationDuration); settings_map_->SetWebsiteSettingDefaultScope( origin_.GetURL(), GURL(), ContentSettingsType::MEDIA_ENGAGEMENT, base::Value(std::move(score_dict_)), constraints);
diff --git a/chrome/browser/notifications/android/java/src/org/chromium/chrome/browser/notifications/NotificationUmaTracker.java b/chrome/browser/notifications/android/java/src/org/chromium/chrome/browser/notifications/NotificationUmaTracker.java index 74b08f9..0021e47 100644 --- a/chrome/browser/notifications/android/java/src/org/chromium/chrome/browser/notifications/NotificationUmaTracker.java +++ b/chrome/browser/notifications/android/java/src/org/chromium/chrome/browser/notifications/NotificationUmaTracker.java
@@ -33,8 +33,6 @@ * single entry point here to make more complex tracking easier to add in the future. */ public class NotificationUmaTracker { - private static final String TAG = "NotifsUMATracker"; - /* * A list of notification types. To add a type to this list please update * SystemNotificationType in enums.xml and make sure to keep this list in sync. Additions
diff --git a/chrome/browser/notifications/notification_channels_provider_android.cc b/chrome/browser/notifications/notification_channels_provider_android.cc index 552d340..a8a0ae3 100644 --- a/chrome/browser/notifications/notification_channels_provider_android.cc +++ b/chrome/browser/notifications/notification_channels_provider_android.cc
@@ -316,9 +316,10 @@ } // These constraints are not supported for notifications on Android. - DCHECK_EQ(constraints.expiration, base::Time()); - DCHECK_EQ(constraints.session_model, content_settings::SessionModel::Durable); - DCHECK_EQ(constraints.track_last_visit_for_autoexpiration, false); + DCHECK_EQ(constraints.expiration(), base::Time()); + DCHECK_EQ(constraints.session_model(), + content_settings::SessionModel::Durable); + DCHECK_EQ(constraints.track_last_visit_for_autoexpiration(), false); InitCachedChannels();
diff --git a/chrome/browser/optimization_guide/android/java/src/org/chromium/chrome/browser/optimization_guide/OptimizationGuidePushNotificationManagerUnitTest.java b/chrome/browser/optimization_guide/android/java/src/org/chromium/chrome/browser/optimization_guide/OptimizationGuidePushNotificationManagerUnitTest.java index 6109871..458100f 100644 --- a/chrome/browser/optimization_guide/android/java/src/org/chromium/chrome/browser/optimization_guide/OptimizationGuidePushNotificationManagerUnitTest.java +++ b/chrome/browser/optimization_guide/android/java/src/org/chromium/chrome/browser/optimization_guide/OptimizationGuidePushNotificationManagerUnitTest.java
@@ -63,8 +63,6 @@ @Mock OptimizationGuideBridge.Natives mOptimizationGuideBridgeJniMock; - private static final String TEST_URL = "https://testurl.com/"; - private static final HintNotificationPayload NOTIFICATION_WITH_PAYLOAD = HintNotificationPayload.newBuilder() .setOptimizationType(OptimizationType.PERFORMANCE_HINTS)
diff --git a/chrome/browser/partnercustomizations/java/src/org/chromium/chrome/browser/partnercustomizations/PartnerBrowserCustomizations.java b/chrome/browser/partnercustomizations/java/src/org/chromium/chrome/browser/partnercustomizations/PartnerBrowserCustomizations.java index 0535028..6275146 100644 --- a/chrome/browser/partnercustomizations/java/src/org/chromium/chrome/browser/partnercustomizations/PartnerBrowserCustomizations.java +++ b/chrome/browser/partnercustomizations/java/src/org/chromium/chrome/browser/partnercustomizations/PartnerBrowserCustomizations.java
@@ -50,7 +50,6 @@ static final String PARTNER_DISABLE_BOOKMARKS_EDITING_PATH = "disablebookmarksediting"; @VisibleForTesting static final String PARTNER_DISABLE_INCOGNITO_MODE_PATH = "disableincognitomode"; - private static String sProviderAuthority = PROVIDER_AUTHORITY; private static Boolean sIgnoreSystemPackageCheck; private static Boolean sValid;
diff --git a/chrome/browser/password_check/android/junit/src/org/chromium/chrome/browser/password_check/PasswordCheckControllerTest.java b/chrome/browser/password_check/android/junit/src/org/chromium/chrome/browser/password_check/PasswordCheckControllerTest.java index 10beffd3..edad6dd 100644 --- a/chrome/browser/password_check/android/junit/src/org/chromium/chrome/browser/password_check/PasswordCheckControllerTest.java +++ b/chrome/browser/password_check/android/junit/src/org/chromium/chrome/browser/password_check/PasswordCheckControllerTest.java
@@ -95,7 +95,6 @@ "PasswordManager.BulkCheck.UserActionAndroid"; private static final String PASSWORD_CHECK_COMPROMISED_CREDENTIALS_AFTER_CHECK_HISTOGRAM = "PasswordManager.BulkCheck.CompromisedCredentialsCountAfterCheckAndroid"; - private static final boolean USE_LAST_VALID_AUTH = true; @Rule public Features.JUnitProcessor mFeaturesProcessor = new Features.JUnitProcessor();
diff --git a/chrome/browser/password_manager/android/pwd_migration/java/src/org/chromium/chrome/browser/pwd_migration/PasswordMigrationWarningOptionsFragment.java b/chrome/browser/password_manager/android/pwd_migration/java/src/org/chromium/chrome/browser/pwd_migration/PasswordMigrationWarningOptionsFragment.java index 4cf115d..6122bf7 100644 --- a/chrome/browser/password_manager/android/pwd_migration/java/src/org/chromium/chrome/browser/pwd_migration/PasswordMigrationWarningOptionsFragment.java +++ b/chrome/browser/password_manager/android/pwd_migration/java/src/org/chromium/chrome/browser/pwd_migration/PasswordMigrationWarningOptionsFragment.java
@@ -21,7 +21,6 @@ */ public class PasswordMigrationWarningOptionsFragment extends Fragment { private Context mContext; - private boolean mShouldSignIn = true; private Runnable mNextCallback; private Runnable mCancelCallback; private String mChannelString;
diff --git a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.cc b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.cc index 1136216..7a513a7 100644 --- a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.cc +++ b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.cc
@@ -96,7 +96,7 @@ graph->AddProcessNodeObserver(this); CHECK(last_measurement_time_.is_null()); - last_measurement_time_ = base::LiveTicks::Now(); + last_measurement_time_ = base::TimeTicks::Now(); // Start monitoring CPU usage for all existing processes. Can't read their CPU // usage until they have a pid assigned. @@ -112,7 +112,7 @@ DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); cpu_measurement_map_.clear(); CHECK(!last_measurement_time_.is_null()); - last_measurement_time_ = base::LiveTicks(); + last_measurement_time_ = base::TimeTicks(); graph->RemoveProcessNodeObserver(this); } @@ -123,7 +123,7 @@ // frames and workers. CHECK(!last_measurement_time_.is_null()); CPUUsageMap cpu_usage_map; - const base::LiveTicks now = base::LiveTicks::Now(); + const base::TimeTicks now = base::TimeTicks::Now(); for (auto& [process_node, cpu_measurement] : cpu_measurement_map_) { cpu_measurement.MeasureAndDistributeCPUUsage( process_node, last_measurement_time_, now, cpu_usage_map); @@ -220,8 +220,8 @@ void PageTimelineCPUMonitor::CPUMeasurement::MeasureAndDistributeCPUUsage( const ProcessNode* process_node, - base::LiveTicks measurement_interval_start, - base::LiveTicks measurement_interval_end, + base::TimeTicks measurement_interval_start, + base::TimeTicks measurement_interval_end, CPUUsageMap& cpu_usage_map) { // TODO(crbug.com/1410503): There isn't a good way to get the process CPU // usage after it exits here:
diff --git a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.h b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.h index e9183bc..87a5488 100644 --- a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.h +++ b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor.h
@@ -113,8 +113,8 @@ // workers in the process. void MeasureAndDistributeCPUUsage( const ProcessNode* process_node, - base::LiveTicks measurement_interval_start, - base::LiveTicks measurement_interval_end, + base::TimeTicks measurement_interval_start, + base::TimeTicks measurement_interval_end, CPUUsageMap& cpu_usage_map); private: @@ -134,7 +134,7 @@ // Last time CPU measurements were taken (for calculating the total length of // a measurement interval). - base::LiveTicks last_measurement_time_ GUARDED_BY_CONTEXT(sequence_checker_); + base::TimeTicks last_measurement_time_ GUARDED_BY_CONTEXT(sequence_checker_); // Callback that will be invoked to create CPUMeasurementDelegate objects for // each ProcessNode being measured.
diff --git a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor_unittest.cc b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor_unittest.cc index a1d6b9f2..b5d749c 100644 --- a/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor_unittest.cc +++ b/chrome/browser/performance_manager/metrics/page_timeline_cpu_monitor_unittest.cc
@@ -71,8 +71,8 @@ : public PageTimelineCPUMonitor::CPUMeasurementDelegate { public: struct CPUUsagePeriod { - base::LiveTicks start_time; - base::LiveTicks end_time; + base::TimeTicks start_time; + base::TimeTicks end_time; double cpu_usage; }; @@ -107,8 +107,8 @@ for (const auto& usage_period : cpu_usage_periods) { CHECK(!usage_period.start_time.is_null()); // The last interval in the list will have no end time. - const base::LiveTicks end_time = usage_period.end_time.is_null() - ? base::LiveTicks::Now() + const base::TimeTicks end_time = usage_period.end_time.is_null() + ? base::TimeTicks::Now() : usage_period.end_time; CHECK(end_time >= usage_period.start_time); cumulative_usage += @@ -221,7 +221,7 @@ void SetProcessCPUUsage(const ProcessNodeImpl* process_node, double usage) { SimulatedCPUMeasurementDelegate::CPUUsagePeriod usage_period{ - .start_time = base::LiveTicks::Now(), + .start_time = base::TimeTicks::Now(), .cpu_usage = usage, }; auto& delegate = GetOrCreateCPUMeasurementDelegate(process_node);
diff --git a/chrome/browser/permissions/unused_site_permissions_service_browsertest.cc b/chrome/browser/permissions/unused_site_permissions_service_browsertest.cc index 368a83a..9c0984d 100644 --- a/chrome/browser/permissions/unused_site_permissions_service_browsertest.cc +++ b/chrome/browser/permissions/unused_site_permissions_service_browsertest.cc
@@ -65,9 +65,10 @@ clock.SetNow(past); map->SetClockForTesting(&clock); service->SetClockForTesting(&clock); - map->SetContentSettingDefaultScope( - url, url, ContentSettingsType::GEOLOCATION, CONTENT_SETTING_ALLOW, - {.track_last_visit_for_autoexpiration = true}); + content_settings::ContentSettingConstraints constraints; + constraints.set_track_last_visit_for_autoexpiration(true); + map->SetContentSettingDefaultScope(url, url, ContentSettingsType::GEOLOCATION, + CONTENT_SETTING_ALLOW, constraints); clock.SetNow(now); service->UpdateUnusedPermissionsForTesting(); ASSERT_EQ(service->GetTrackedUnusedPermissionsForTesting().size(), 1u); @@ -117,9 +118,10 @@ clock.SetNow(past); map->SetClockForTesting(&clock); service->SetClockForTesting(&clock); - map->SetContentSettingDefaultScope( - url, url, ContentSettingsType::GEOLOCATION, CONTENT_SETTING_ALLOW, - {.track_last_visit_for_autoexpiration = true}); + content_settings::ContentSettingConstraints constraints; + constraints.set_track_last_visit_for_autoexpiration(true); + map->SetContentSettingDefaultScope(url, url, ContentSettingsType::GEOLOCATION, + CONTENT_SETTING_ALLOW, constraints); clock.SetNow(now); // Check if the content setting is still ALLOW, before auto-revocation.
diff --git a/chrome/browser/privacy_sandbox/android/java/src/org/chromium/chrome/browser/privacy_sandbox/FledgePreference.java b/chrome/browser/privacy_sandbox/android/java/src/org/chromium/chrome/browser/privacy_sandbox/FledgePreference.java index 0c0f2dd..f1cfe67 100644 --- a/chrome/browser/privacy_sandbox/android/java/src/org/chromium/chrome/browser/privacy_sandbox/FledgePreference.java +++ b/chrome/browser/privacy_sandbox/android/java/src/org/chromium/chrome/browser/privacy_sandbox/FledgePreference.java
@@ -21,8 +21,6 @@ * A Preference to represent a site using FLEDGE. */ public class FledgePreference extends ImageButtonPreference { - private static final int FAVICON_PADDING_DP = 4; - // The ETLD+1 that used Fledge. private final @NonNull String mSite; private final LargeIconBridge mLargeIconBridge;
diff --git a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.html b/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.html deleted file mode 100644 index 2826aa2..0000000 --- a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.html +++ /dev/null
@@ -1,42 +0,0 @@ -<!-- -Copyright 2019 The Chromium Authors -Use of this source code is governed by a BSD-style license that can be -found in the LICENSE file. ---> - -<!doctype html> -<html> - <head> - <meta charset=utf-8> - <link rel="stylesheet" href="arc_tracing.css"> - <script src="chrome://resources/ash/common/cr.js"></script> - <script src="chrome://resources/js/util_deprecated.js"></script> - <script src="arc_tracing_ui.js"></script> - <script src="arc_graphics_tracing_ui.js"></script> - <script src="arc_graphics_tracing.js"></script> - <title>ARC graphics tracing</title> - </head> - <body> - <div> - <h3>ARC graphics tracing</h3> - Use <b>Ctrl+Shift+G</b> in active Android app to start/stop tracing. - <input type="checkbox" id="arc-graphics-tracing-stop-on-jank" checked> - Stop on jank. Status: <i id="arc-tracing-status">Idle</i> - <div id="arc-tracing-control-buttons"> - <button type="button" id="arc-graphics-tracing-save" disabled>Save - </button> - <button type="button" id="arc-tracing-load">Load</button> - </div> - </div> - <hr> - <div> - <img id="arc-graphics-tracing-icon" class="arc-tracing-app-icon"> - <span id="arc-graphics-tracing-title" class="arc-tracing-app-title"> - </span> - </div> - <hr> - <div id='arc-event-bands'></div> - <div id='arc-detailed-view-overlay'></div> - <div id='arc-event-band-tooltip'></div> - </body> -</html>
diff --git a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.js b/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.js deleted file mode 100644 index 4d79f150..0000000 --- a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing.js +++ /dev/null
@@ -1,37 +0,0 @@ -// Copyright 2019 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -/** - * @fileoverview ARC Graphics Tracing UI root element. - */ - -cr.define('cr.ArcGraphicsTracing', function() { - return { - /** - * Initializes internal structures. - */ - initialize() { - const stopOnJank = $('arc-graphics-tracing-stop-on-jank'); - stopOnJank.addEventListener('click', function(event) { - chrome.send('setStopOnJank', [stopOnJank.checked]); - }, false); - chrome.send('ready'); - chrome.send('setStopOnJank', [stopOnJank.checked]); - initializeGraphicsUi(); - }, - - setStatus: setStatus, - - setModel(model) { - setGraphicBuffersModel(model); - }, - }; -}); - -/** - * Initializes UI. - */ -window.onload = function() { - cr.ArcGraphicsTracing.initialize(); -};
diff --git a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing_ui.js b/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing_ui.js deleted file mode 100644 index 724d767d..0000000 --- a/chrome/browser/resources/chromeos/arc_graphics_tracing/arc_graphics_tracing_ui.js +++ /dev/null
@@ -1,342 +0,0 @@ -// Copyright 2019 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -/** - * @fileoverview ARC Graphics Tracing UI. - */ - -/** - * @type {Object}. - * Currently loaded model. - */ -let activeModel = null; - -/** - * Initialises graphic tracing UI. It calls initialization of base tracing UI - * and additionally sets handler for the model saving. - */ -function initializeGraphicsUi() { - initializeUi(5 /* zoomLevel */, function() { - // Update function. - if (activeModel) { - setGraphicBuffersModel(activeModel); - } - }); - - $('arc-graphics-tracing-save').onclick = function(event) { - const linkElement = document.createElement('a'); - const file = new Blob([JSON.stringify(activeModel)], {type: 'text/plain'}); - linkElement.href = URL.createObjectURL(file); - linkElement.download = 'tracing_model.json'; - linkElement.click(); - }; -} - -function setModelHeader(model) { - $('arc-graphics-tracing-icon').src = ''; - $('arc-graphics-tracing-title').textContent = ''; - - if (!model.information) { - return; - } - - if (model.information.icon) { - $('arc-graphics-tracing-icon').src = - 'data:image/png;base64,' + model.information.icon; - } - if (model.information.title) { - let title = model.information.title; - if (model.information.timestamp) { - title += ' '; - title += new Date(model.information.timestamp).toLocaleString(); - } - title += ' '; - title += (model.information.duration * 0.000001).toFixed(2); - title += 's'; - if (model.information.platform) { - title += ' on '; - title += model.information.platform; - } - title += '.'; - $('arc-graphics-tracing-title').textContent = title; - } -} - -/** - * Creates visual representation of graphic buffers event model. - * - * @param {Object} model object produced by |ArcTracingGraphicsModel|. - */ -function setGraphicBuffersModel(model) { - // Clear previous content. - $('arc-event-bands').textContent = ''; - activeModel = model; - - setModelHeader(model); - - const duration = model.information.duration; - - // Microseconds per pixel. 100% zoom corresponds to 100 mcs per pixel. - const resolution = zooms[zoomLevel]; - const parent = $('arc-event-bands'); - - const topBandHeight = 16; - const topBandPadding = 4; - const innerBandHeight = 12; - const innerBandPadding = 2; - const innerLastBandPadding = 12; - const chartHeight = 48; - - const vsyncEvents = new Events( - model.android.global_events, 406 /* kVsyncTimestamp */, - 406 /* kVsyncTimestamp */); - - const cpusTitle = new EventBandTitle(parent, 'CPUs', 'arc-events-band-title'); - const cpusBands = - new CpuEventBands(cpusTitle, 'arc-events-band', resolution, 0, duration); - cpusBands.setWidth(cpusBands.timestampToOffset(duration)); - cpusBands.setModel(model); - cpusBands.addChartToExistingArea(0 /* top */, cpusBands.height); - cpusBands.addChartSources( - [new Events( - model.system.memory, 8 /* kCpuTemperature */, - 8 /* kCpuTemperature */)], - true /* smooth */); - cpusBands.addChartSources( - [new Events( - model.system.memory, 9 /* kCpuFrequency */, 9 /* kCpuFrequency */)], - true /* smooth */); - cpusBands.addChartSources( - [new Events(model.system.memory, 10 /* kCpuPower */, 10 /* kCpuPower */)], - true /* smooth */); - cpusBands.setVSync(vsyncEvents); - - const memoryTitle = - new EventBandTitle(parent, 'Memory', 'arc-events-band-title'); - const memoryBands = - new EventBands(memoryTitle, 'arc-events-band', resolution, 0, duration); - memoryBands.setWidth(memoryBands.timestampToOffset(duration)); - memoryBands.addChart(chartHeight, topBandPadding); - // Used memory chart. - memoryBands.addChartSources( - [new Events(model.system.memory, 1 /* kMemUsed */, 1 /* kMemUsed */)], - true /* smooth */); - // Swap memory chart. - memoryBands.addChartSources( - [ - new Events(model.system.memory, 2 /* kSwapRead */, 2 /* kSwapRead */), - new Events(model.system.memory, 3 /* kSwapWrite */, 3 /* kSwapWrite */), - ], - true /* smooth */); - // Geom objects and size. - memoryBands.addChartSources( - [new Events( - model.system.memory, 5 /* kGemObjects */, 5 /* kGemObjects */)], - true /* smooth */); - memoryBands.addChartSources( - [new Events(model.system.memory, 6 /* kGemSize */, 6 /* kGemSize */)], - true /* smooth */); - memoryBands.addChartSources( - [new Events( - model.system.memory, 12 /* kMemoryPower */, 12 /* kMemoryPower */)], - true /* smooth */); - memoryBands.setVSync(vsyncEvents); - - const chromeTitle = - new EventBandTitle(parent, 'Chrome graphics', 'arc-events-band-title'); - const chromeBands = - new EventBands(chromeTitle, 'arc-events-band', resolution, 0, duration); - chromeBands.setWidth(chromeBands.timestampToOffset(duration)); - for (let i = 0; i < model.chrome.buffers.length; i++) { - chromeBands.addBand( - new Events(model.chrome.buffers[i], 500, 599), topBandHeight, - topBandPadding); - } - - chromeBands.setVSync(vsyncEvents); - const chromeJanks = new Events( - model.chrome.global_events, 505 /* kChromeOSJank */, - 505 /* kChromeOSJank */); - chromeBands.addGlobal(chromeJanks); - - chromeBands.addChartToExistingArea(0 /* top */, chromeBands.height); - chromeBands.addChartSources( - [new Events( - model.system.memory, 7 /* kGpuFrequency */, 7 /* kGpuFrequency */)], - false /* smooth */); - chromeBands.addChartSources( - [new Events(model.system.memory, 11 /* kGpuPower */, 11 /* kGpuPower */)], - true /* smooth */); - - - const androidTitle = - new EventBandTitle(parent, 'Android graphics', 'arc-events-band-title'); - const androidBands = - new EventBands(androidTitle, 'arc-events-band', resolution, 0, duration); - androidBands.setWidth(androidBands.timestampToOffset(duration)); - androidBands.addBand( - new Events(model.android.buffers[0], 400, 499), topBandHeight, - topBandPadding); - // Add vsync events - androidBands.setVSync(vsyncEvents); - // Add vsync handler events - const androidVsyncHandling = new Events( - model.android.global_events, 400 /* kSurfaceFlingerVsyncHandler */, - 400 /* kSurfaceFlingerVsyncHandler */); - androidBands.addGlobal(androidVsyncHandling); - // Add jank events - const androidJanks = new Events( - model.android.global_events, 405 /* kSurfaceFlingerCompositionJank */, - 405 /* kSurfaceFlingerCompositionJank */); - androidBands.addGlobal(androidJanks); - - const allActivityJanks = []; - const allActivityCustomEvents = []; - for (let i = 0; i < model.views.length; i++) { - const view = model.views[i]; - let activityTitleText; - let icon; - if (model.tasks && view.task_id in model.tasks) { - activityTitleText = - model.tasks[view.task_id].title + ' - ' + view.activity; - icon = model.tasks[view.task_id].icon; - } else { - activityTitleText = 'Task #' + view.task_id + ' - ' + view.activity; - } - const activityTitle = new EventBandTitle( - parent, activityTitleText, 'arc-events-band-title', icon); - const activityBands = new EventBands( - activityTitle, 'arc-events-band', resolution, 0, duration); - activityBands.setWidth(activityBands.timestampToOffset(duration)); - for (let j = 0; j < view.buffers.length; j++) { - // Android buffer events. - activityBands.addBand( - new Events(view.buffers[j], 100, 199), innerBandHeight, - innerBandPadding); - // exo events. - activityBands.addBand( - new Events(view.buffers[j], 200, 299), innerBandHeight, - innerBandPadding /* padding */); - // Chrome buffer events are not displayed at this time. - - // Add separator between buffers. - if (j != view.buffers.length - 1) { - activityBands.addBandSeparator(innerBandPadding); - } - } - // Add vsync events - activityBands.setVSync(vsyncEvents); - - const activityJank = new Events( - view.global_events, 106 /* kBufferFillJank */, - 106 /* kBufferFillJank */); - activityBands.addGlobal(activityJank); - allActivityJanks.push(activityJank); - - const activityCustomEvents = new Events( - view.global_events, 600 /* kCustomEvent */, 600 /* kCustomEvent */); - activityBands.addGlobal(activityCustomEvents); - allActivityCustomEvents.push(activityCustomEvents); - } - - // Input section if exists. - if (model.input && model.input.buffers.length > 0) { - const inputTitle = - new EventBandTitle(parent, 'Input', 'arc-events-band-title'); - const inputBands = - new EventBands(inputTitle, 'arc-events-band', resolution, 0, duration); - inputBands.setWidth(inputBands.timestampToOffset(duration)); - for (let i = 0; i < model.input.buffers.length; i++) { - inputBands.addBand( - new Events(model.input.buffers[i], 700, 799), topBandHeight, - topBandPadding); - } - inputBands.setVSync(vsyncEvents); - } - - // Create time ruler. - const timeRulerEventHeight = 16; - const timeRulerLabelHeight = 92; - const timeRulerTitle = - new EventBandTitle(parent, '' /* title */, 'arc-time-ruler-title'); - const timeRulerBands = new EventBands( - timeRulerTitle, 'arc-events-band', resolution, 0, duration); - timeRulerBands.setWidth(timeRulerBands.timestampToOffset(duration)); - // Reseve space for ticks and global events. - timeRulerBands.updateHeight(timeRulerEventHeight, 0 /* padding */); - - const kTimeMark = 10000; - const kTimeMarkSmall = 10001; - const timeEvents = []; - let timeTick = 0; - let timeTickOffset = 20 * resolution; - let timeTickIndex = 0; - while (timeTick < duration) { - if ((timeTickIndex % 10) == 0) { - timeEvents.push([kTimeMark, timeTick]); - } else { - timeEvents.push([kTimeMarkSmall, timeTick]); - } - timeTick += timeTickOffset; - ++timeTickIndex; - } - const timeMarkEvents = new Events(timeEvents, kTimeMark, kTimeMarkSmall); - timeRulerBands.addGlobal(timeMarkEvents); - - // Add all janks - timeRulerBands.addGlobal(chromeJanks, 'circle' /* renderType */); - timeRulerBands.addGlobal(androidJanks, 'circle' /* renderType */); - for (let i = 0; i < allActivityJanks.length; ++i) { - timeRulerBands.addGlobal(allActivityJanks[i], 'circle' /* renderType */); - } - for (let i = 0; i < allActivityCustomEvents.length; ++i) { - timeRulerBands.addGlobal( - allActivityCustomEvents[i], 'circle' /* renderType */); - } - // Add vsync events - timeRulerBands.setVSync(vsyncEvents); - - // Reseve space for labels. - // Add tick labels. - timeRulerBands.updateHeight(timeRulerLabelHeight, 0 /* padding */); - timeTick = 0; - timeTickOffset = 200 * resolution; - while (timeTick < duration) { - SVG.addText( - timeRulerBands.svg, timeRulerBands.timestampToOffset(timeTick), - timeRulerEventHeight, timeRulerBands.fontSize, - timestampToMsText(timeTick)); - timeTick += timeTickOffset; - } - // Add janks and custom events labels. - const rotationY = timeRulerEventHeight + timeRulerBands.fontSize; - for (let i = 0; i < timeRulerBands.globalEvents.length; ++i) { - const globalEvents = timeRulerBands.globalEvents[i]; - if (globalEvents == timeMarkEvents || - globalEvents == timeRulerBands.vsyncEvents) { - continue; - } - let index = globalEvents.getFirstEvent(); - while (index >= 0) { - const event = globalEvents.events[index]; - index = globalEvents.getNextEvent(index, 1 /* direction */); - const eventType = event[0]; - const attributes = eventAttributes[eventType]; - let text; - if (eventType == 600 /* kCustomEvent */) { - text = event[2]; - } else { - text = attributes.name; - } - const x = - timeRulerBands.timestampToOffset(event[1]) - timeRulerBands.fontSize; - SVG.addText( - timeRulerBands.svg, x, timeRulerEventHeight, timeRulerBands.fontSize, - text, 'start' /* anchor */, - 'rotate(45 ' + x + ', ' + rotationY + ')' /* transform */); - } - } - - $('arc-graphics-tracing-save').disabled = false; -}
diff --git a/chrome/browser/resources/chromeos/internet_detail_dialog/internet_detail_dialog.js b/chrome/browser/resources/chromeos/internet_detail_dialog/internet_detail_dialog.js index 8a787a6..b53d009 100644 --- a/chrome/browser/resources/chromeos/internet_detail_dialog/internet_detail_dialog.js +++ b/chrome/browser/resources/chromeos/internet_detail_dialog/internet_detail_dialog.js
@@ -25,7 +25,7 @@ import {assert} from 'chrome://resources/ash/common/assert.js'; import {I18nBehavior} from 'chrome://resources/ash/common/i18n_behavior.js'; import {loadTimeData} from 'chrome://resources/ash/common/load_time_data.m.js'; -import {isActiveSim} from 'chrome://resources/ash/common/network/cellular_utils.js'; +import {getApnDisplayName, isActiveSim} from 'chrome://resources/ash/common/network/cellular_utils.js'; import {CrPolicyNetworkBehaviorMojo} from 'chrome://resources/ash/common/network/cr_policy_network_behavior_mojo.js'; import {MojoInterfaceProviderImpl} from 'chrome://resources/ash/common/network/mojo_interface_provider.js'; import {NetworkListenerBehavior} from 'chrome://resources/ash/common/network/network_listener_behavior.js'; @@ -495,8 +495,8 @@ if (apnExpanded) { return ''; } - return managedProperties.typeProperties.cellular.connectedApn - .accessPointName; + return getApnDisplayName( + managedProperties.typeProperties.cellular.connectedApn); }, /**
diff --git a/chrome/browser/resources/chromeos/login/oobe.js b/chrome/browser/resources/chromeos/login/oobe.js index 9259720..ccdc6294 100644 --- a/chrome/browser/resources/chromeos/login/oobe.js +++ b/chrome/browser/resources/chromeos/login/oobe.js
@@ -9,20 +9,21 @@ // clang-format on + import {assert} from '//resources/ash/common/assert.js'; import {$} from '//resources/ash/common/util.js'; -import {refreshColorCss, startColorChangeUpdater} from '//resources/cr_components/color_change_listener/colors_css_updater.js'; +import {ColorChangeUpdater} from '//resources/cr_components/color_change_listener/colors_css_updater.js'; import {getTrustedScriptURL} from '//resources/js/static_types.js'; import {Oobe} from './cr_ui.js'; import * as OobeDebugger from './debug/debug.js'; import * as QuickStartDebugger from './debug/quick_start_debugger.js'; -import * as OobeTestApi from './test_api/test_api.js'; import {loadTimeData} from './i18n_setup.js'; import {addScreensToMainContainer} from './login_ui_tools.js'; import {MultiTapDetector} from './multi_tap_detector.js'; import {TraceEvent, traceExecution} from './oobe_trace.js'; import {priorityOobeScreenList} from './priority_screens_oobe_flow.js'; +import * as OobeTestApi from './test_api/test_api.js'; // Everything has been imported at this point. traceExecution(TraceEvent.FIRST_LINE_AFTER_IMPORTS); @@ -144,8 +145,9 @@ // Start listening for color changes in 'chrome://theme/colors.css'. Force // reload it once to account for any missed color change events between // loading oobe.html and here. - startColorChangeUpdater(); - refreshColorCss(); + const updater = ColorChangeUpdater.forDocument(); + updater.start(); + updater.refreshColorsCss(); // TODO(b/268463435): Move include directly to the oobe.html after Jelly // flag will be enabled by default.
diff --git a/chrome/browser/resources/new_tab_page/app.html b/chrome/browser/resources/new_tab_page/app.html index b36b2e9c..c99be37 100644 --- a/chrome/browser/resources/new_tab_page/app.html +++ b/chrome/browser/resources/new_tab_page/app.html
@@ -135,8 +135,8 @@ } /* ~ because the dom-if results in a template between the middle-slot-promo - and ntp-modules. */ - ntp-middle-slot-promo:not([hidden]) ~ ntp-modules { + and ntp-modules / ntp-modules-lanchpad. */ + ntp-middle-slot-promo:not([hidden]) ~ #modules { margin-top: 16px; } @@ -150,6 +150,7 @@ #customizeButtonContainer:has(help-bubble) { /* help-bubble parent needs z-index to overlay ntp-iframe */ z-index: 1001; + background-color: var(--color-new-tab-page-button-background) !important; } :host-context([dir='ltr']) #customizeButtonContainer { @@ -168,7 +169,10 @@ background-color: var(--ntp-protected-icon-background-color-hovered); } - #customizeButton { + /* TODO(crbug.com/1454193): Show collapsed Customize Chrome button + for first step of Panorama tutorial after M115 is released to Stable. */ + #customizeButton, + #customizeButtonContainer:has(help-bubble) #customizeButton { --hover-bg-color: var(--color-new-tab-page-button-background-hovered); --text-color: var(--color-new-tab-page-button-foreground); border: none; @@ -176,6 +180,7 @@ box-shadow: 0 3px 6px rgba(0, 0, 0, .16), 0 1px 2px rgba(0, 0, 0, .23); font-weight: 400; min-width: 32px; + padding: 8px 16px; } :host([show-background-image_]) #customizeButton { @@ -187,7 +192,7 @@ box-shadow: var(--ntp-focus-shadow); } - #customizeIcon { + #customizeIcon, #customizeButtonContainer:has(help-bubble) #customizeIcon { -webkit-mask-image: url(icons/icon_pencil.svg); -webkit-mask-repeat: no-repeat; -webkit-mask-size: 100%; @@ -199,7 +204,15 @@ :host([show-background-image_]) #customizeIcon { background-color: white; - margin: 0; + margin-inline-end: 0; + } + + :host([show-background-image_]) #customizeText { + display: none; + } + + #customizeButtonContainer:has(help-bubble) #customizeText { + display: inherit; } @media (max-width: 550px) { @@ -383,13 +396,22 @@ </ntp-middle-slot-promo> </template> <template is="dom-if" if="[[modulesEnabled_]]"> - <ntp-modules id="modules" - modules-fre-shown="{{modulesFreShown}}" - modules-shown-to-user="{{modulesShownToUser}}" - on-customize-module="onCustomizeModule_" - on-modules-loaded="onModulesLoaded_" - hidden="[[!promoAndModulesLoaded_]]"> - </ntp-modules> + <template is="dom-if" if="[[!modulesRedesignedEnabled_]]"> + <ntp-modules id="modules" + modules-fre-shown="{{modulesFreShown}}" + modules-shown-to-user="{{modulesShownToUser}}" + on-customize-module="onCustomizeModule_" + on-modules-loaded="onModulesLoaded_" + hidden="[[!promoAndModulesLoaded_]]"> + </ntp-modules> + </template> + <template is="dom-if" if="[[modulesRedesignedEnabled_]]"> + <ntp-modules-v2 id="modules" + on-customize-module="onCustomizeModule_" + on-modules-loaded="onModulesLoaded_" + hidden="[[!promoAndModulesLoaded_]]"> + </ntp-modules-v2> + </template> </template> <a id="backgroundImageAttribution" href="[[backgroundImageAttributionUrl_]]" @@ -412,7 +434,7 @@ <cr-button id="customizeButton" on-click="onCustomizeClick_" title="$i18n{customizeThisPage}" aria-pressed="[[showCustomize_]]"> <div id="customizeIcon"></div> - <div id="customizeText" hidden$="[[showBackgroundImage_]]"> + <div id="customizeText"> $i18n{customizeButton} </div> </cr-button>
diff --git a/chrome/browser/resources/new_tab_page/app.ts b/chrome/browser/resources/new_tab_page/app.ts index e0f11261..46ea376 100644 --- a/chrome/browser/resources/new_tab_page/app.ts +++ b/chrome/browser/resources/new_tab_page/app.ts
@@ -857,7 +857,7 @@ case $$(this, 'ntp-middle-slot-promo'): recordClick(NtpElement.MIDDLE_SLOT_PROMO); return; - case $$(this, 'ntp-modules'): + case $$(this, '#modules'): recordClick(NtpElement.MODULE); return; case $$(this, '#customizeButton'):
diff --git a/chrome/browser/resources/new_tab_page/lazy_load.ts b/chrome/browser/resources/new_tab_page/lazy_load.ts index 0135f48..8463d79 100644 --- a/chrome/browser/resources/new_tab_page/lazy_load.ts +++ b/chrome/browser/resources/new_tab_page/lazy_load.ts
@@ -30,11 +30,6 @@ export {chromeCartDescriptor, ChromeCartModuleElement} from './modules/cart/module.js'; export {DriveProxy} from './modules/drive/drive_module_proxy.js'; export {driveDescriptor, DriveModuleElement} from './modules/drive/module.js'; -export {driveDescriptor as driveV2Descriptor, DriveModuleElement as DriveV2ModuleElement} from './modules/drive_v2/module.js'; -// <if expr="not is_official_build"> -export {FooProxy} from './modules/dummy_v2/foo_proxy.js'; -export {DummyModuleElement, dummyV2Descriptor} from './modules/dummy_v2/module.js'; -// </if> export {FeedProxy} from './modules/feed/feed_module_proxy.js'; export {feedDescriptor, FeedModuleElement} from './modules/feed/module.js'; export {CartTileModuleElement} from './modules/history_clusters/cart/cart_tile.js'; @@ -42,7 +37,6 @@ export {HistoryClusterElementType, HistoryClusterImageDisplayState, historyClustersDescriptor, HistoryClustersModuleElement, LAYOUT_1_MIN_IMAGE_VISITS, LAYOUT_1_MIN_VISITS, LAYOUT_2_MIN_IMAGE_VISITS, LAYOUT_2_MIN_VISITS, LAYOUT_3_MIN_IMAGE_VISITS, LAYOUT_3_MIN_VISITS} from './modules/history_clusters/module.js'; export {SuggestTileModuleElement} from './modules/history_clusters/suggest_tile.js'; export {TileModuleElement} from './modules/history_clusters/tile.js'; -export {historyClustersDescriptor as historyClustersV2Descriptor, HistoryClustersModuleElement as HistoryClustersV2ModuleElement} from './modules/history_clusters_v2/module.js'; export {InfoDialogElement} from './modules/info_dialog.js'; export {InitializeModuleCallback, Module, ModuleDescriptor} from './modules/module_descriptor.js'; export {counterfactualLoad} from './modules/module_descriptors.js'; @@ -54,4 +48,11 @@ export {PhotosProxy} from './modules/photos/photos_module_proxy.js'; export {RecipesModuleElement, recipeTasksDescriptor} from './modules/recipes/module.js'; export {RecipesHandlerProxy} from './modules/recipes/recipes_handler_proxy.js'; +export {driveDescriptor as driveV2Descriptor, DriveModuleElement as DriveV2ModuleElement} from './modules/v2/drive/module.js'; +// <if expr="not is_official_build"> +export {FooProxy} from './modules/v2/dummy/foo_proxy.js'; +export {DummyModuleElement, dummyV2Descriptor} from './modules/v2/dummy/module.js'; +// </if> +export {historyClustersDescriptor as historyClustersV2Descriptor, HistoryClustersModuleElement as HistoryClustersV2ModuleElement} from './modules/v2/history_clusters/module.js'; +export {ModulesV2Element} from './modules/v2/modules.js'; export {VoiceSearchOverlayElement} from './voice_search_overlay.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni b/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni deleted file mode 100644 index 388b92db..0000000 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni +++ /dev/null
@@ -1,11 +0,0 @@ -# Copyright 2023 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -# List of files that should be passed to html_to_wrapper(). -history_clusters_v2_web_component_files = [ - "modules/history_clusters_v2/module.ts", - "modules/history_clusters_v2/module_header.ts", - "modules/history_clusters_v2/suggest_tile.ts", - "modules/history_clusters_v2/visit_tile.ts", -]
diff --git a/chrome/browser/resources/new_tab_page/modules/module_descriptors.ts b/chrome/browser/resources/new_tab_page/modules/module_descriptors.ts index e447294..b27e6af 100644 --- a/chrome/browser/resources/new_tab_page/modules/module_descriptors.ts +++ b/chrome/browser/resources/new_tab_page/modules/module_descriptors.ts
@@ -11,18 +11,18 @@ import {chromeCartDescriptor} from './cart/module.js'; import {driveDescriptor} from './drive/module.js'; -import {driveDescriptor as driveV2Descriptor} from './drive_v2/module.js'; // <if expr="not is_official_build"> -import {dummyV2Descriptor, dummyV2Descriptor02, dummyV2Descriptor03, dummyV2Descriptor04, dummyV2Descriptor05, dummyV2Descriptor06, dummyV2Descriptor07, dummyV2Descriptor08, dummyV2Descriptor09, dummyV2Descriptor10, dummyV2Descriptor11, dummyV2Descriptor12} from './dummy_v2/module.js'; +import {dummyV2Descriptor, dummyV2Descriptor02, dummyV2Descriptor03, dummyV2Descriptor04, dummyV2Descriptor05, dummyV2Descriptor06, dummyV2Descriptor07, dummyV2Descriptor08, dummyV2Descriptor09, dummyV2Descriptor10, dummyV2Descriptor11, dummyV2Descriptor12} from './v2/dummy/module.js'; // </if> import {feedDescriptor} from './feed/module.js'; import {HistoryClustersProxyImpl} from './history_clusters/history_clusters_proxy.js'; import {historyClustersDescriptor} from './history_clusters/module.js'; -import {historyClustersDescriptor as historyClustersV2Descriptor} from './history_clusters_v2/module.js'; import {ModuleDescriptor} from './module_descriptor.js'; import {ModuleRegistry} from './module_registry.js'; import {photosDescriptor} from './photos/module.js'; import {recipeTasksDescriptor} from './recipes/module.js'; +import {driveDescriptor as driveV2Descriptor} from './v2/drive/module.js'; +import {historyClustersDescriptor as historyClustersV2Descriptor} from './v2/history_clusters/module.js'; const modulesRedesignedEnabled: boolean = loadTimeData.getBoolean('modulesRedesignedEnabled');
diff --git a/chrome/browser/resources/new_tab_page/modules/module_wrapper.html b/chrome/browser/resources/new_tab_page/modules/module_wrapper.html index 324d904..2573841 100644 --- a/chrome/browser/resources/new_tab_page/modules/module_wrapper.html +++ b/chrome/browser/resources/new_tab_page/modules/module_wrapper.html
@@ -9,11 +9,6 @@ position: relative; } - :host([modules-redesigned-enabled_]) { - background-color: transparent; - border: none; - } - #impressionProbe { height: 27px; pointer-events: none;
diff --git a/chrome/browser/resources/new_tab_page/modules/module_wrapper.ts b/chrome/browser/resources/new_tab_page/modules/module_wrapper.ts index 51c694e..3342b2a10 100644 --- a/chrome/browser/resources/new_tab_page/modules/module_wrapper.ts +++ b/chrome/browser/resources/new_tab_page/modules/module_wrapper.ts
@@ -3,7 +3,6 @@ // found in the LICENSE file. import {assert} from 'chrome://resources/js/assert_ts.js'; -import {loadTimeData} from 'chrome://resources/js/load_time_data.js'; import {microTask, PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; import {recordLoadDuration, recordOccurence, recordPerdecage} from '../metrics_utils.js'; @@ -41,12 +40,6 @@ observer: 'onModuleChange_', type: Object, }, - - modulesRedesignedEnabled_: { - type: Boolean, - value: () => loadTimeData.getBoolean('modulesRedesignedEnabled'), - reflectToAttribute: true, - }, }; }
diff --git a/chrome/browser/resources/new_tab_page/modules/modules.gni b/chrome/browser/resources/new_tab_page/modules/modules.gni index e67408d..a03c53f 100644 --- a/chrome/browser/resources/new_tab_page/modules/modules.gni +++ b/chrome/browser/resources/new_tab_page/modules/modules.gni
@@ -4,15 +4,15 @@ import("./cart/cart.gni") import("./drive/drive.gni") -import("./drive_v2/drive_v2.gni") import("./feed/feed.gni") import("./history_clusters/history_clusters.gni") -import("./history_clusters_v2/history_clusters_v2.gni") import("./photos/photos.gni") import("./recipes/recipes.gni") +import("./v2/drive/drive.gni") +import("./v2/history_clusters/history_clusters.gni") if (!is_official_build) { - import("./dummy_v2/dummy_v2.gni") + import("./v2/dummy/dummy.gni") } # List of files that don't need to be passed to html_to_wrapper(). @@ -36,6 +36,7 @@ "modules/module_header.ts", "modules/modules.ts", "modules/module_wrapper.ts", + "modules/v2/modules.ts", ] + cart_web_component_files + drive_web_component_files + drive_v2_web_component_files + feed_web_component_files + photos_web_component_files + recipes_web_component_files +
diff --git a/chrome/browser/resources/new_tab_page/modules/drive_v2/drive_v2.gni b/chrome/browser/resources/new_tab_page/modules/v2/drive/drive.gni similarity index 75% rename from chrome/browser/resources/new_tab_page/modules/drive_v2/drive_v2.gni rename to chrome/browser/resources/new_tab_page/modules/v2/drive/drive.gni index 3639041..b2591dd 100644 --- a/chrome/browser/resources/new_tab_page/modules/drive_v2/drive_v2.gni +++ b/chrome/browser/resources/new_tab_page/modules/v2/drive/drive.gni
@@ -3,4 +3,4 @@ # found in the LICENSE file. # List of files that should be passed to html_to_wrapper(). -drive_v2_web_component_files = [ "modules/drive_v2/module.ts" ] +drive_v2_web_component_files = [ "modules/v2/drive/module.ts" ]
diff --git a/chrome/browser/resources/new_tab_page/modules/drive_v2/module.html b/chrome/browser/resources/new_tab_page/modules/v2/drive/module.html similarity index 100% rename from chrome/browser/resources/new_tab_page/modules/drive_v2/module.html rename to chrome/browser/resources/new_tab_page/modules/v2/drive/module.html
diff --git a/chrome/browser/resources/new_tab_page/modules/drive_v2/module.ts b/chrome/browser/resources/new_tab_page/modules/v2/drive/module.ts similarity index 87% rename from chrome/browser/resources/new_tab_page/modules/drive_v2/module.ts rename to chrome/browser/resources/new_tab_page/modules/v2/drive/module.ts index 9a0f6df..9f09c16 100644 --- a/chrome/browser/resources/new_tab_page/modules/drive_v2/module.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/drive/module.ts
@@ -2,17 +2,17 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -import '../module_header.js'; +import '../../module_header.js'; import 'chrome://resources/cr_elements/cr_lazy_render/cr_lazy_render.js'; import {CrLazyRenderElement} from 'chrome://resources/cr_elements/cr_lazy_render/cr_lazy_render.js'; import {DomRepeat, DomRepeatEvent, PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {File} from '../../drive.mojom-webui.js'; -import {I18nMixin, loadTimeData} from '../../i18n_setup.js'; -import {DriveProxy} from '../drive/drive_module_proxy.js'; -import {InfoDialogElement} from '../info_dialog.js'; -import {ModuleDescriptor} from '../module_descriptor.js'; +import {File} from '../../../drive.mojom-webui.js'; +import {I18nMixin, loadTimeData} from '../../../i18n_setup.js'; +import {DriveProxy} from '../../drive/drive_module_proxy.js'; +import {InfoDialogElement} from '../../info_dialog.js'; +import {ModuleDescriptor} from '../../module_descriptor.js'; import {getTemplate} from './module.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/dummy_v2/dummy_v2.gni b/chrome/browser/resources/new_tab_page/modules/v2/dummy/dummy.gni similarity index 67% rename from chrome/browser/resources/new_tab_page/modules/dummy_v2/dummy_v2.gni rename to chrome/browser/resources/new_tab_page/modules/v2/dummy/dummy.gni index e2add006..7f2d9ee 100644 --- a/chrome/browser/resources/new_tab_page/modules/dummy_v2/dummy_v2.gni +++ b/chrome/browser/resources/new_tab_page/modules/v2/dummy/dummy.gni
@@ -3,7 +3,7 @@ # found in the LICENSE file. # List of files that don't need to be passed to html_to_wrapper(). -dummy_v2_non_web_component_files = [ "modules/dummy_v2/foo_proxy.ts" ] +dummy_v2_non_web_component_files = [ "modules/v2/dummy/foo_proxy.ts" ] # List of files that should be passed to html_to_wrapper(). -dummy_v2_web_component_files = [ "modules/dummy_v2/module.ts" ] +dummy_v2_web_component_files = [ "modules/v2/dummy/module.ts" ]
diff --git a/chrome/browser/resources/new_tab_page/modules/dummy_v2/foo_proxy.ts b/chrome/browser/resources/new_tab_page/modules/v2/dummy/foo_proxy.ts similarity index 89% rename from chrome/browser/resources/new_tab_page/modules/dummy_v2/foo_proxy.ts rename to chrome/browser/resources/new_tab_page/modules/v2/dummy/foo_proxy.ts index 63ec88e..d508572 100644 --- a/chrome/browser/resources/new_tab_page/modules/dummy_v2/foo_proxy.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/dummy/foo_proxy.ts
@@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -import {FooHandler, FooHandlerRemote} from '../../foo.mojom-webui.js'; +import {FooHandler, FooHandlerRemote} from '../../../foo.mojom-webui.js'; /** * @fileoverview This file provides a class that exposes the Mojo handler
diff --git a/chrome/browser/resources/new_tab_page/modules/dummy_v2/module.html b/chrome/browser/resources/new_tab_page/modules/v2/dummy/module.html similarity index 100% rename from chrome/browser/resources/new_tab_page/modules/dummy_v2/module.html rename to chrome/browser/resources/new_tab_page/modules/v2/dummy/module.html
diff --git a/chrome/browser/resources/new_tab_page/modules/dummy_v2/module.ts b/chrome/browser/resources/new_tab_page/modules/v2/dummy/module.ts similarity index 93% rename from chrome/browser/resources/new_tab_page/modules/dummy_v2/module.ts rename to chrome/browser/resources/new_tab_page/modules/v2/dummy/module.ts index 63714ed2..2ebeee43 100644 --- a/chrome/browser/resources/new_tab_page/modules/dummy_v2/module.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/dummy/module.ts
@@ -4,14 +4,14 @@ import 'chrome://resources/cr_elements/cr_grid/cr_grid.js'; import 'chrome://resources/cr_elements/cr_auto_img/cr_auto_img.js'; -import '../../strings.m.js'; -import '../module_header.js'; +import '../../../strings.m.js'; +import '../../module_header.js'; import {DomRepeat, PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {FooDataItem} from '../../foo.mojom-webui.js'; -import {I18nMixin, loadTimeData} from '../../i18n_setup.js'; -import {ModuleDescriptor} from '../module_descriptor.js'; +import {FooDataItem} from '../../../foo.mojom-webui.js'; +import {I18nMixin, loadTimeData} from '../../../i18n_setup.js'; +import {ModuleDescriptor} from '../../module_descriptor.js'; import {FooProxy} from './foo_proxy.js'; import {getTemplate} from './module.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/history_clusters.gni b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/history_clusters.gni new file mode 100644 index 0000000..87b5463 --- /dev/null +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/history_clusters.gni
@@ -0,0 +1,11 @@ +# Copyright 2023 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +# List of files that should be passed to html_to_wrapper(). +history_clusters_v2_web_component_files = [ + "modules/v2/history_clusters/module.ts", + "modules/v2/history_clusters/module_header.ts", + "modules/v2/history_clusters/suggest_tile.ts", + "modules/v2/history_clusters/visit_tile.ts", +]
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.html b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.html similarity index 97% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.html rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.html index c9954b7..f90ad59 100644 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.html +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.html
@@ -2,7 +2,6 @@ :host { --grid-gap: 8px; height: 410px; - margin: 8px; width: 100%; } @@ -20,7 +19,7 @@ } #doneButton { - --cr-icon-image: url(chrome://resources/images/icon_checkmark.svg); + --cr-icon-image: url(chrome://resources/images/icon_checkmark.svg); } #layout {
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.ts b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.ts similarity index 91% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.ts rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.ts index 94dfaba..3a096542 100644 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module.ts
@@ -10,11 +10,11 @@ import {assert} from 'chrome://resources/js/assert_ts.js'; import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {Cluster, URLVisit} from '../../history_cluster_types.mojom-webui.js'; -import {I18nMixin, loadTimeData} from '../../i18n_setup.js'; -import {HistoryClustersProxyImpl} from '../history_clusters/history_clusters_proxy.js'; -import {InfoDialogElement} from '../info_dialog'; -import {ModuleDescriptor} from '../module_descriptor.js'; +import {Cluster, URLVisit} from '../../../history_cluster_types.mojom-webui.js'; +import {I18nMixin, loadTimeData} from '../../../i18n_setup.js'; +import {HistoryClustersProxyImpl} from '../../history_clusters/history_clusters_proxy.js'; +import {InfoDialogElement} from '../../info_dialog'; +import {ModuleDescriptor} from '../../module_descriptor.js'; import {getTemplate} from './module.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module_header.html b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module_header.html similarity index 100% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module_header.html rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module_header.html
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module_header.ts b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module_header.ts similarity index 97% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module_header.ts rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module_header.ts index ef443b7..0b3dbde1 100644 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/module_header.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/module_header.ts
@@ -7,7 +7,7 @@ import {CrActionMenuElement} from 'chrome://resources/cr_elements/cr_action_menu/cr_action_menu.js'; import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {I18nMixin} from '../../i18n_setup.js'; +import {I18nMixin} from '../../../i18n_setup.js'; import {getTemplate} from './module_header.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/suggest_tile.html b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/suggest_tile.html similarity index 100% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/suggest_tile.html rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/suggest_tile.html
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/suggest_tile.ts b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/suggest_tile.ts similarity index 89% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/suggest_tile.ts rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/suggest_tile.ts index 4d5ce3a..42c5b33 100644 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/suggest_tile.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/suggest_tile.ts
@@ -6,8 +6,8 @@ import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {SearchQuery} from '../../history_cluster_types.mojom-webui.js'; -import {I18nMixin} from '../../i18n_setup.js'; +import {SearchQuery} from '../../../history_cluster_types.mojom-webui.js'; +import {I18nMixin} from '../../../i18n_setup.js'; import {getTemplate} from './suggest_tile.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/visit_tile.html b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/visit_tile.html similarity index 100% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/visit_tile.html rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/visit_tile.html
diff --git a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/visit_tile.ts b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/visit_tile.ts similarity index 94% rename from chrome/browser/resources/new_tab_page/modules/history_clusters_v2/visit_tile.ts rename to chrome/browser/resources/new_tab_page/modules/v2/history_clusters/visit_tile.ts index 004ada66..dede120 100644 --- a/chrome/browser/resources/new_tab_page/modules/history_clusters_v2/visit_tile.ts +++ b/chrome/browser/resources/new_tab_page/modules/v2/history_clusters/visit_tile.ts
@@ -4,7 +4,7 @@ import 'chrome://resources/cr_elements/cr_auto_img/cr_auto_img.js'; import 'chrome://resources/cr_components/history_clusters/history_clusters_shared_style.css.js'; -import '../history_clusters/page_favicon.js'; +import '../../history_clusters/page_favicon.js'; import {PageImageServiceBrowserProxy} from 'chrome://resources/cr_components/page_image_service/browser_proxy.js'; import {ClientId as PageImageServiceClientId} from 'chrome://resources/cr_components/page_image_service/page_image_service.mojom-webui.js'; @@ -12,8 +12,8 @@ import {Url} from 'chrome://resources/mojo/url/mojom/url.mojom-webui.js'; import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; -import {Annotation, URLVisit} from '../../history_cluster_types.mojom-webui.js'; -import {I18nMixin} from '../../i18n_setup.js'; +import {Annotation, URLVisit} from '../../../history_cluster_types.mojom-webui.js'; +import {I18nMixin} from '../../../i18n_setup.js'; import {getTemplate} from './visit_tile.html.js';
diff --git a/chrome/browser/resources/new_tab_page/modules/v2/modules.html b/chrome/browser/resources/new_tab_page/modules/v2/modules.html new file mode 100644 index 0000000..78bab5a6 --- /dev/null +++ b/chrome/browser/resources/new_tab_page/modules/v2/modules.html
@@ -0,0 +1,4 @@ +<style include="cr-hidden-style"> +</style> +<div id="container"> +</div>
diff --git a/chrome/browser/resources/new_tab_page/modules/v2/modules.ts b/chrome/browser/resources/new_tab_page/modules/v2/modules.ts new file mode 100644 index 0000000..2ab4cc5 --- /dev/null +++ b/chrome/browser/resources/new_tab_page/modules/v2/modules.ts
@@ -0,0 +1,26 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import 'chrome://resources/cr_elements/cr_hidden_style.css.js'; + +import {PolymerElement} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; + +import {getTemplate} from './modules.html.js'; + +/** Container for the NTP modules. */ +export class ModulesV2Element extends PolymerElement { + static get is() { + return 'ntp-modules-v2'; + } + + static get template() { + return getTemplate(); + } + + static get properties() { + return {}; + } +} + +customElements.define(ModulesV2Element.is, ModulesV2Element);
diff --git a/chrome/browser/resources/omnibox/omnibox.html b/chrome/browser/resources/omnibox/omnibox.html index f7b99844..70e72bc 100644 --- a/chrome/browser/resources/omnibox/omnibox.html +++ b/chrome/browser/resources/omnibox/omnibox.html
@@ -11,7 +11,6 @@ <body> <template id="omnibox-input-template"> - <link rel="stylesheet" href="omnibox_input.css"> <div id="top" class="top drag-container"> <div class="top-column"> <input id="input-text" type="text" @@ -230,7 +229,6 @@ </template> <template id="output-results-group-template"> - <link rel="stylesheet" href="output_results_group.css"> <output-results-details></output-results-details> <table id="table"></table> </template>
diff --git a/chrome/browser/resources/omnibox/omnibox_input.ts b/chrome/browser/resources/omnibox/omnibox_input.ts index 92c06f3..35d902a 100644 --- a/chrome/browser/resources/omnibox/omnibox_input.ts +++ b/chrome/browser/resources/omnibox/omnibox_input.ts
@@ -3,6 +3,8 @@ // found in the LICENSE file. import {OmniboxElement} from './omnibox_element.js'; +// @ts-ignore:next-line +import sheet from './omnibox_input.css' assert {type : 'css'}; export interface QueryInputs { inputText: string; @@ -58,6 +60,7 @@ constructor() { super('omnibox-input-template'); + this.shadowRoot!.adoptedStyleSheets = [sheet]; } connectedCallback() {
diff --git a/chrome/browser/resources/omnibox/omnibox_output.ts b/chrome/browser/resources/omnibox/omnibox_output.ts index bc58623..4020851 100644 --- a/chrome/browser/resources/omnibox/omnibox_output.ts +++ b/chrome/browser/resources/omnibox/omnibox_output.ts
@@ -7,6 +7,10 @@ import {ACMatchClassification, AutocompleteAdditionalInfo, AutocompleteMatch, OmniboxResponse} from './omnibox.mojom-webui.js'; import {OmniboxElement} from './omnibox_element.js'; import {DisplayInputs, OmniboxInput} from './omnibox_input.js'; +// @ts-ignore:next-line +import outputColumnWidthSheet from './omnibox_output_column_widths.css' assert {type : 'css'}; +// @ts-ignore:next-line +import outputResultsGroupSheet from './output_results_group.css' assert {type : 'css'}; interface ResultsDetails { cursorPosition: number; @@ -178,6 +182,8 @@ constructor() { super('output-results-group-template'); + this.shadowRoot!.adoptedStyleSheets = + [outputColumnWidthSheet, outputResultsGroupSheet]; } setResultsGroup(resultsGroup: OmniboxResponse) {
diff --git a/chrome/browser/resources/omnibox/output_results_group.css b/chrome/browser/resources/omnibox/output_results_group.css index 121b088..f2b2631 100644 --- a/chrome/browser/resources/omnibox/output_results_group.css +++ b/chrome/browser/resources/omnibox/output_results_group.css
@@ -2,8 +2,6 @@ * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ -@import url(omnibox_output_column_widths.css); - :host { --header-color: #555; display: inline-block;
diff --git a/chrome/browser/resources/settings/chromeos/internet_page/internet_detail_subpage.ts b/chrome/browser/resources/settings/chromeos/internet_page/internet_detail_subpage.ts index bd198ce..bf7aad2 100644 --- a/chrome/browser/resources/settings/chromeos/internet_page/internet_detail_subpage.ts +++ b/chrome/browser/resources/settings/chromeos/internet_page/internet_detail_subpage.ts
@@ -38,7 +38,7 @@ import {MojoConnectivityProvider} from 'chrome://resources/ash/common/connectivity/mojo_connectivity_provider.js'; import {PasspointServiceInterface, PasspointSubscription} from 'chrome://resources/ash/common/connectivity/passpoint.mojom-webui.js'; -import {isActiveSim, processDeviceState} from 'chrome://resources/ash/common/network/cellular_utils.js'; +import {getApnDisplayName, isActiveSim, processDeviceState} from 'chrome://resources/ash/common/network/cellular_utils.js'; import {CrPolicyNetworkBehaviorMojo, CrPolicyNetworkBehaviorMojoInterface} from 'chrome://resources/ash/common/network/cr_policy_network_behavior_mojo.js'; import {MojoInterfaceProviderImpl} from 'chrome://resources/ash/common/network/mojo_interface_provider.js'; import {NetworkListenerBehavior, NetworkListenerBehaviorInterface} from 'chrome://resources/ash/common/network/network_listener_behavior.js'; @@ -1725,8 +1725,8 @@ return ''; } - return this.managedProperties_!.typeProperties.cellular!.connectedApn - .accessPointName; + return getApnDisplayName( + this.managedProperties_!.typeProperties.cellular!.connectedApn); }
diff --git a/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.html b/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.html index 352d97f1..59718f9d 100644 --- a/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.html +++ b/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.html
@@ -151,54 +151,6 @@ id="colorFilteringIntensitySlider"> </settings-slider> </div> - <div class="settings-box settings-box-row"> - <div class="start settings-box-text" aria-hidden="true"> - $i18n{greyscaleLabel} - </div> - <settings-slider - pref="{{prefs.settings.a11y.color_filtering.greyscale_amount}}" - min="0" max="100" - label-aria="$i18n{greyscaleLabel}" - label-min="$i18n{colorFilterMinLabel}" - label-max="$i18n{colorFilterMaxLabel}"> - </settings-slider> - </div> - <div class="settings-box settings-box-row"> - <div class="start settings-box-text" aria-hidden="true"> - $i18n{saturationLabel} - </div> - <settings-slider - pref="{{prefs.settings.a11y.color_filtering.saturation_amount}}" - min="100" max="1000" - label-aria="$i18n{saturationLabel}" - label-min="$i18n{colorFilterMinLabel}" - label-max="$i18n{colorFilterMaxLabel}"> - </settings-slider> - </div> - <div class="settings-box settings-box-row"> - <div class="start settings-box-text" aria-hidden="true"> - $i18n{sepiaLabel} - </div> - <settings-slider - pref="{{prefs.settings.a11y.color_filtering.sepia_amount}}" - min="0" max="100" - label-aria="$i18n{sepiaLabel}" - label-min="$i18n{colorFilterMinLabel}" - label-max="$i18n{colorFilterMaxLabel}"> - </settings-slider> - </div> - <div class="settings-box settings-box-row"> - <div class="start settings-box-text" aria-hidden="true"> - $i18n{hueRotationLabel} - </div> - <settings-slider - pref="{{prefs.settings.a11y.color_filtering.hue_rotation_amount}}" - min="0" max="359" - label-aria="$i18n{hueRotationLabel}" - label-min="$i18n{colorFilterMinLabel}" - label-max="$i18n{colorFilterMaxLabel}"> - </settings-slider> - </div> </template> </template> <template is="dom-if" if="[[!isKioskModeActive_]]">
diff --git a/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.ts b/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.ts index 8ed7c539..6cc43a4 100644 --- a/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.ts +++ b/chrome/browser/resources/settings/chromeos/os_a11y_page/display_and_magnification_subpage.ts
@@ -88,13 +88,15 @@ readOnly: true, type: Array, value() { - // These values correspond to ColorVisionDeficiencyType enums in + // The first 3 values correspond to ColorVisionDeficiencyType enums in // ash/color_enhancement/color_enhancement_controller.cc. // CVD types are ordered here by how common they are. + // The final value is a greyscale color filter. return [ {value: 1, name: loadTimeData.getString('deuteranomalyFilter')}, {value: 0, name: loadTimeData.getString('protanomalyFilter')}, {value: 2, name: loadTimeData.getString('tritanomalyFilter')}, + {value: 3, name: loadTimeData.getString('greyscaleLabel')}, ]; }, },
diff --git a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.html b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.html index 59acdcb9b..1a01622 100644 --- a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.html +++ b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.html
@@ -253,9 +253,8 @@ items="[[shownBookmarks_]]" scroll-target="bookmarks"> <template> <power-bookmark-row id="bookmark-[[item.id]]" bookmark="[[item]]" - description="[[getBookmarkDescription_(item, - compactDescriptions_.*, expandedDescriptions_.*, - compact_)]]" + description="[[getBookmarkDescription_( + item, compact_, searchQuery_, item.*)]]" compact="[[compact_]]" trailing-icon="cr:more-vert" trailing-icon-aria-label="[[getBookmarkMenuA11yLabel_( item.url, item.title)]]"
diff --git a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.ts b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.ts index 97e3b583..3f95c26 100644 --- a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.ts +++ b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_list.ts
@@ -217,8 +217,6 @@ private compact_: boolean; private activeFolderPath_: chrome.bookmarks.BookmarkTreeNode[]; private labels_: Label[]; - private compactDescriptions_ = new Map<string, string>(); - private expandedDescriptions_ = new Map<string, string>(); private imageUrls_ = new Map<string, string>(); private activeSortIndex_: number; private sortTypes_: SortOption[]; @@ -293,16 +291,6 @@ this.currentUrl_ = url; } - setCompactDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string) { - this.set(`compactDescriptions_.${bookmark.id}`, description); - } - - setExpandedDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string) { - this.set(`expandedDescriptions_.${bookmark.id}`, description); - } - setImageUrl(bookmark: chrome.bookmarks.BookmarkTreeNode, url: string) { this.set(`imageUrls_.${bookmark.id.toString()}`, url); } @@ -521,18 +509,29 @@ private getBookmarkDescription_(bookmark: chrome.bookmarks.BookmarkTreeNode): string|undefined { if (this.compact_) { - return this.get(`compactDescriptions_.${bookmark.id}`); + if (bookmark.url) { + return undefined; + } + const count = bookmark.children ? bookmark.children.length : 0; + return loadTimeData.getStringF('bookmarkFolderChildCount', count); } else { - const url = this.get(`expandedDescriptions_.${bookmark.id}`); - if (this.searchQuery_ && url && bookmark.parentId) { + let urlString; + if (bookmark.url) { + const url = new URL(bookmark.url); + // Show chrome:// if it's a chrome internal url + if (url.protocol === 'chrome:') { + urlString = 'chrome://' + url.hostname; + } + urlString = url.hostname; + } + if (urlString && this.searchQuery_ && bookmark.parentId) { const parentFolder = this.bookmarksService_.findBookmarkWithId(bookmark.parentId); const folderLabel = this.getFolderLabel_(parentFolder); return loadTimeData.getStringF( - 'urlFolderDescription', url, folderLabel); - } else { - return url; + 'urlFolderDescription', urlString, folderLabel); } + return urlString; } }
diff --git a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_service.ts b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_service.ts index a9e3a59..4edcf48 100644 --- a/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_service.ts +++ b/chrome/browser/resources/side_panel/bookmarks/power_bookmarks_service.ts
@@ -7,7 +7,6 @@ import {PageImageServiceBrowserProxy} from '//resources/cr_components/page_image_service/browser_proxy.js'; import {ClientId as PageImageServiceClientId} from '//resources/cr_components/page_image_service/page_image_service.mojom-webui.js'; import {loadTimeData} from '//resources/js/load_time_data.js'; -import {PluralStringProxyImpl} from '//resources/js/plural_string_proxy.js'; import {Url} from '//resources/mojo/url/mojom/url.mojom-webui.js'; import {BookmarksApiProxy, BookmarksApiProxyImpl} from './bookmarks_api_proxy.js'; @@ -26,10 +25,6 @@ interface PowerBookmarksDelegate { setCurrentUrl(url: string|undefined): void; - setCompactDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string): void; - setExpandedDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string): void; setImageUrl(bookmark: chrome.bookmarks.BookmarkTreeNode, url: string): void; onBookmarksLoaded(): void; onBookmarkChanged(id: string, changedInfo: chrome.bookmarks.ChangeInfo): void; @@ -175,9 +170,6 @@ url => this.delegate_.setCurrentUrl(url)); this.bookmarksApi_.getFolders().then(folders => { this.folders_ = folders; - this.folders_.forEach(bookmark => { - this.findBookmarkDescriptions_(bookmark, true); - }); this.addListener_( 'onChanged', (id: string, changedInfo: chrome.bookmarks.ChangeInfo) => @@ -386,7 +378,6 @@ private onChanged_(id: string, changedInfo: chrome.bookmarks.ChangeInfo) { const bookmark = this.findBookmarkWithId(id)!; Object.assign(bookmark, changedInfo); - this.findBookmarkDescriptions_(bookmark, false); this.findBookmarkImageUrls_(bookmark, false, true); this.delegate_.onBookmarkChanged(id, changedInfo); } @@ -400,8 +391,6 @@ } parent.children!.splice(node.index!, 0, node); this.delegate_.onBookmarkCreated(node, parent); - this.findBookmarkDescriptions_(parent, false); - this.findBookmarkDescriptions_(node, false); this.findBookmarkImageUrls_(node, false, false); } @@ -420,11 +409,6 @@ } newParent.children!.splice(movedInfo.index, 0, movedNode); this.delegate_.onBookmarkMoved(movedNode, oldParent, newParent); - - if (movedInfo.oldParentId !== movedInfo.parentId) { - this.findBookmarkDescriptions_(oldParent, false); - this.findBookmarkDescriptions_(newParent, false); - } } private onRemoved_(id: string) { @@ -433,7 +417,6 @@ const oldParent = oldPath[oldPath.length - 1]!; oldParent.children!.splice(oldParent.children!.indexOf(removedNode), 1); this.delegate_.onBookmarkRemoved(removedNode); - this.findBookmarkDescriptions_(oldParent, false); } /** @@ -469,37 +452,6 @@ } /** - * Assigns a text description for the given bookmark, to be displayed - * following the bookmark title. Also assigns a description to all - * descendants if recurse is true. - */ - private findBookmarkDescriptions_( - bookmark: chrome.bookmarks.BookmarkTreeNode, recurse: boolean) { - if (bookmark.url) { - const url = new URL(bookmark.url); - // Show chrome:// if it's a chrome internal url - if (url.protocol === 'chrome:') { - this.delegate_.setExpandedDescription( - bookmark, 'chrome://' + url.hostname); - } else { - this.delegate_.setExpandedDescription(bookmark, url.hostname); - } - } else { - PluralStringProxyImpl.getInstance() - .getPluralString( - 'bookmarkFolderChildCount', - bookmark.children ? bookmark.children.length : 0) - .then(pluralString => { - this.delegate_.setCompactDescription(bookmark, pluralString); - }); - } - if (recurse && bookmark.children) { - bookmark.children.forEach( - child => this.findBookmarkDescriptions_(child, recurse)); - } - } - - /** * Assigns an image url for the given bookmark. Also assigns an image url to * all children if recurse is true. */
diff --git a/chrome/browser/resources/side_panel/companion/companion.ts b/chrome/browser/resources/side_panel/companion/companion.ts index 7dfef126..ebed8e6 100644 --- a/chrome/browser/resources/side_panel/companion/companion.ts +++ b/chrome/browser/resources/side_panel/companion/companion.ts
@@ -32,11 +32,9 @@ // Arguments for MethodType.kOnPromoAction. PROMO_ACTION = 'promoAction', PROMO_TYPE = 'promoType', - EXPS_PROMO_URL = 'expsPromoUrl', // Arguments for MethodType.kOnPhFeedback. PH_FEEDBACK = 'phFeedback', - REPORTING_URL = 'reportingUrl', // Arguments for MethodType.kOnOpenInNewTabButtonURLChanged. URL_FOR_OPEN_IN_NEW_TAB = 'urlForOpenInNewTab', @@ -55,6 +53,10 @@ // Arguments for MethodType.kOnCqJamptagClicked. CQ_JUMPTAG_TEXT = 'cqJumptagText', + // Arguments for MethodType.kOpenUrlInBrowser + URL_TO_OPEN = 'urlToOpen', + USE_NEW_TAB = 'useNewTab', + // Arguments for browser -> iframe communication. COMPANION_UPDATE_PARAMS = 'companionUpdateParams', @@ -185,11 +187,8 @@ } else if (methodType === MethodType.kOnPromoAction) { const promoType = data[ParamType.PROMO_TYPE]; const promoAction = data[ParamType.PROMO_ACTION]; - const expsPromoUrl = new Url(); - expsPromoUrl.url = data[ParamType.EXPS_PROMO_URL] || ''; if (validatePromoArguments(promoType, promoAction)) { - companionProxy.handler.onPromoAction( - promoType, promoAction, expsPromoUrl); + companionProxy.handler.onPromoAction(promoType, promoAction); } } else if (methodType === MethodType.kOnExpsOptInStatusAvailable) { companionProxy.handler.onExpsOptInStatusAvailable( @@ -215,12 +214,14 @@ companionProxy.handler.onCqCandidatesAvailable( data[ParamType.CQ_TEXT_DIRECTIVES]); } else if (methodType === MethodType.kOnPhFeedback) { - const reportingUrl = new Url(); - reportingUrl.url = data[ParamType.REPORTING_URL] || ''; - companionProxy.handler.onPhFeedback( - data[ParamType.PH_FEEDBACK], reportingUrl); + companionProxy.handler.onPhFeedback(data[ParamType.PH_FEEDBACK]); } else if (methodType === MethodType.kOnCqJumptagClicked) { companionProxy.handler.onCqJumptagClicked(data[ParamType.CQ_JUMPTAG_TEXT]); + } else if (methodType === MethodType.kOpenUrlInBrowser) { + const urlToOpen = new Url(); + urlToOpen.url = data[ParamType.URL_TO_OPEN] || ''; + companionProxy.handler.openUrlInBrowser( + urlToOpen, data[ParamType.USE_NEW_TAB]); } }
diff --git a/chrome/browser/resources/side_panel/shared/sp_filter_chip.html b/chrome/browser/resources/side_panel/shared/sp_filter_chip.html index 086c9998..3436115a 100644 --- a/chrome/browser/resources/side_panel/shared/sp_filter_chip.html +++ b/chrome/browser/resources/side_panel/shared/sp_filter_chip.html
@@ -9,6 +9,7 @@ border: 1px solid var(--google-grey-300); border-radius: 4px; color: var(--cr-secondary-text-color); + font-family: inherit; display: flex; flex-direction: row; font-size: 13px; @@ -30,7 +31,9 @@ color: var(--color-side-panel-filter-chip-foreground); font-size: 12px; font-weight: 500; - padding: 0 8px; + overflow: hidden; + padding: 0 7px; + position: relative; } button:not(:is([disabled], [selected])):hover { @@ -40,7 +43,7 @@ :host-context([chrome-refresh-2023]) button:not(:is([disabled], [selected])):hover { - background-color: var(--color-side-panel-filter-chip-background-hover); + background-color: transparent; border-color: var(--color-side-panel-filter-chip-border); } @@ -55,8 +58,11 @@ } button:focus-visible { - box-shadow: 0 0 0 2px var(--cr-focus-outline-color); - outline: none; + outline: solid 2px var(--cr-focus-outline-color); + } + + :host-context([chrome-refresh-2023]) button:focus-visible { + outline-offset: 2px; } button[disabled] { @@ -81,12 +87,26 @@ :host-context([chrome-refresh-2023]) button[selected] { --iron-icon-fill-color: var(--color-side-panel-filter-chip-icon-selected); background-color: var(--color-side-panel-filter-chip-background-selected); - border-color: transparent; + border: none; color: var(--color-side-panel-filter-chip-foreground-selected); + padding: 0 8px; + } + + #hoverLayer { + display: none; + } + + :host-context([chrome-refresh-2023]) button:hover #hoverLayer { + background: var(--cr-hover-background-color); + display: block; + inset: 0; + pointer-events: none; + position: absolute; } </style> <button selected$="[[selected]]" disabled$="[[disabled]]" aria-pressed="[[selected]]"> + <div id="hoverLayer"></div> <slot></slot> </button>
diff --git a/chrome/browser/search/background/ntp_custom_background_service.cc b/chrome/browser/search/background/ntp_custom_background_service.cc index a7ec287..c5c8797 100644 --- a/chrome/browser/search/background/ntp_custom_background_service.cc +++ b/chrome/browser/search/background/ntp_custom_background_service.cc
@@ -224,87 +224,6 @@ std::string()); } -void NtpCustomBackgroundService::UpdateCustomBackgroundColorAsync( - const GURL& image_url, - const gfx::Image& fetched_image, - const image_fetcher::RequestMetadata& metadata) { - if (metadata.http_response_code == - image_fetcher::RequestMetadata::ResponseCode::RESPONSE_CODE_INVALID) { - return; - } - // Calculate the bitmap color asynchronously as it is slow (1-2 seconds for - // the thumbnail). However, prefs should be updated on the main thread. - base::ThreadPool::PostTaskAndReplyWithResult( - FROM_HERE, {base::TaskPriority::BEST_EFFORT}, - base::BindOnce(&GetBitmapMainColor, fetched_image.AsBitmap()), - base::BindOnce( - &NtpCustomBackgroundService::UpdateCustomBackgroundPrefsWithColor, - weak_ptr_factory_.GetWeakPtr(), image_url)); -} - -void NtpCustomBackgroundService::FetchCustomBackgroundAndExtractBackgroundColor( - const GURL& image_url, - const GURL& fetch_url) { - net::NetworkTrafficAnnotationTag traffic_annotation = - net::DefineNetworkTrafficAnnotation("ntp_custom_background", - R"( - semantics { - sender: "Desktop Chrome background fetcher" - description: - "Fetch New Tab Page background image for color calculation." - trigger: - "User selects new collection image background on the New Tab " - "Page." - data: "The only data sent is the URL to an image." - destination: GOOGLE_OWNED_SERVICE - internal { - contacts { - email: "chrome-desktop-ntp@google.com" - } - } - user_data { - type: NONE - } - last_reviewed: "2023-01-09" - } - policy { - cookies_allowed: NO - setting: - "Users cannot disable this feature. The feature is enabled by " - "default." - chrome_policy { - NTPCustomBackgroundEnabled { - NTPCustomBackgroundEnabled: true - } - } - })"); - - image_fetcher::ImageFetcherParams params(traffic_annotation, - "NtpCustomBackgrounds"); - image_fetcher_->FetchImage( - fetch_url, - base::BindOnce( - &NtpCustomBackgroundService::UpdateCustomBackgroundColorAsync, - weak_ptr_factory_.GetWeakPtr(), image_url), - std::move(params)); -} - -void NtpCustomBackgroundService::UpdateCustomBackgroundPrefsWithColor( - const GURL& image_url, - SkColor color) { - // Update background color only if the selected background is still the same. - const base::Value::Dict& background_info = - pref_service_->GetDict(prefs::kNtpCustomBackgroundDict); - - GURL current_bg_url( - background_info.Find(kNtpCustomBackgroundURL)->GetString()); - if (current_bg_url == image_url) { - pref_service_->SetDict(prefs::kNtpCustomBackgroundDict, - GetBackgroundInfoWithColor(&background_info, color)); - theme_service_->BuildAutogeneratedThemeFromColor(color); - } -} - void NtpCustomBackgroundService::SetCustomBackgroundInfo( const GURL& background_url, const GURL& thumbnail_url, @@ -407,6 +326,23 @@ } } +void NtpCustomBackgroundService::RevertBackgroundChanges() { + if (previous_background_info_.has_value()) { + pref_service_->Set(prefs::kNtpCustomBackgroundDict, + *previous_background_info_); + } + if (previous_local_background_) { + SetBackgroundToLocalResource(); + } + previous_background_info_.reset(); + previous_local_background_ = false; +} + +void NtpCustomBackgroundService::ConfirmBackgroundChanges() { + previous_background_info_.reset(); + previous_local_background_ = false; +} + absl::optional<CustomBackground> NtpCustomBackgroundService::GetCustomBackground() { DCHECK_CURRENTLY_ON(content::BrowserThread::UI); @@ -551,6 +487,24 @@ clock_ = clock; } +void NtpCustomBackgroundService::UpdateCustomBackgroundColorAsync( + const GURL& image_url, + const gfx::Image& fetched_image, + const image_fetcher::RequestMetadata& metadata) { + if (metadata.http_response_code == + image_fetcher::RequestMetadata::ResponseCode::RESPONSE_CODE_INVALID) { + return; + } + // Calculate the bitmap color asynchronously as it is slow (1-2 seconds for + // the thumbnail). However, prefs should be updated on the main thread. + base::ThreadPool::PostTaskAndReplyWithResult( + FROM_HERE, {base::TaskPriority::BEST_EFFORT}, + base::BindOnce(&GetBitmapMainColor, fetched_image.AsBitmap()), + base::BindOnce( + &NtpCustomBackgroundService::UpdateCustomBackgroundPrefsWithColor, + weak_ptr_factory_.GetWeakPtr(), image_url)); +} + void NtpCustomBackgroundService::SetBackgroundToLocalResource() { background_updated_timestamp_ = base::TimeTicks::Now(); pref_service_->SetBoolean(prefs::kNtpCustomBackgroundLocalToDevice, true); @@ -575,19 +529,65 @@ observer.OnCustomBackgroundImageUpdated(); } -void NtpCustomBackgroundService::RevertBackgroundChanges() { - if (previous_background_info_.has_value()) { - pref_service_->Set(prefs::kNtpCustomBackgroundDict, - *previous_background_info_); +void NtpCustomBackgroundService::UpdateCustomBackgroundPrefsWithColor( + const GURL& image_url, + SkColor color) { + // Update background color only if the selected background is still the same. + const base::Value::Dict& background_info = + pref_service_->GetDict(prefs::kNtpCustomBackgroundDict); + + GURL current_bg_url( + background_info.Find(kNtpCustomBackgroundURL)->GetString()); + if (current_bg_url == image_url) { + pref_service_->SetDict(prefs::kNtpCustomBackgroundDict, + GetBackgroundInfoWithColor(&background_info, color)); + theme_service_->BuildAutogeneratedThemeFromColor(color); } - if (previous_local_background_) { - SetBackgroundToLocalResource(); - } - previous_background_info_.reset(); - previous_local_background_ = false; } -void NtpCustomBackgroundService::ConfirmBackgroundChanges() { - previous_background_info_.reset(); - previous_local_background_ = false; +void NtpCustomBackgroundService::FetchCustomBackgroundAndExtractBackgroundColor( + const GURL& image_url, + const GURL& fetch_url) { + net::NetworkTrafficAnnotationTag traffic_annotation = + net::DefineNetworkTrafficAnnotation("ntp_custom_background", + R"( + semantics { + sender: "Desktop Chrome background fetcher" + description: + "Fetch New Tab Page background image for color calculation." + trigger: + "User selects new collection image background on the New Tab " + "Page." + data: "The only data sent is the URL to an image." + destination: GOOGLE_OWNED_SERVICE + internal { + contacts { + email: "chrome-desktop-ntp@google.com" + } + } + user_data { + type: NONE + } + last_reviewed: "2023-01-09" + } + policy { + cookies_allowed: NO + setting: + "Users cannot disable this feature. The feature is enabled by " + "default." + chrome_policy { + NTPCustomBackgroundEnabled { + NTPCustomBackgroundEnabled: true + } + } + })"); + + image_fetcher::ImageFetcherParams params(traffic_annotation, + "NtpCustomBackgrounds"); + image_fetcher_->FetchImage( + fetch_url, + base::BindOnce( + &NtpCustomBackgroundService::UpdateCustomBackgroundColorAsync, + weak_ptr_factory_.GetWeakPtr(), image_url), + std::move(params)); }
diff --git a/chrome/browser/search_resumption/junit/src/org/chromium/chrome/browser/search_resumption/SearchResumptionTileBuilderUnitTest.java b/chrome/browser/search_resumption/junit/src/org/chromium/chrome/browser/search_resumption/SearchResumptionTileBuilderUnitTest.java index 70df573..8fea2aa02 100644 --- a/chrome/browser/search_resumption/junit/src/org/chromium/chrome/browser/search_resumption/SearchResumptionTileBuilderUnitTest.java +++ b/chrome/browser/search_resumption/junit/src/org/chromium/chrome/browser/search_resumption/SearchResumptionTileBuilderUnitTest.java
@@ -39,7 +39,6 @@ @Config(manifest = Config.NONE) public class SearchResumptionTileBuilderUnitTest { // The search suggestions are meant to be shown on any website. - private static final String URL_TO_TRACK = "/foo.com"; @Rule public JniMocker mJniMocker = new JniMocker();
diff --git a/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/LongScreenshotsMediatorTest.java b/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/LongScreenshotsMediatorTest.java index dbe5772..563138d 100644 --- a/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/LongScreenshotsMediatorTest.java +++ b/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/LongScreenshotsMediatorTest.java
@@ -46,7 +46,6 @@ @Batch(Batch.PER_CLASS) public class LongScreenshotsMediatorTest { /** Some screenshot dimension that's supposed to be reasonable. */ - private static final int NOMINAL_SCREENSHOT_DIMENSION = 1000; /** * The largest screen dimension that will be accepted by Android in a View. * This is evidently due to an Android total bytes limit of 100M bytes.
diff --git a/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/bitmap_generation/ScreenshotBoundsManagerTest.java b/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/bitmap_generation/ScreenshotBoundsManagerTest.java index 820dea8..1fb38fda0 100644 --- a/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/bitmap_generation/ScreenshotBoundsManagerTest.java +++ b/chrome/browser/share/android/javatests/src/org/chromium/chrome/browser/share/long_screenshots/bitmap_generation/ScreenshotBoundsManagerTest.java
@@ -9,7 +9,6 @@ import static org.mockito.Mockito.when; import android.content.Context; -import android.graphics.Bitmap; import android.graphics.Point; import android.graphics.Rect; import android.util.Size; @@ -48,8 +47,6 @@ @Mock private LongScreenshotsTabService mTabService; - private Bitmap mTestBitmap = Bitmap.createBitmap(512, 1024, Bitmap.Config.ARGB_8888); - @Before public void setUp() { MockitoAnnotations.initMocks(this);
diff --git a/chrome/browser/storage_access_api/api_browsertest.cc b/chrome/browser/storage_access_api/api_browsertest.cc index bafad83c3..1b0a79a 100644 --- a/chrome/browser/storage_access_api/api_browsertest.cc +++ b/chrome/browser/storage_access_api/api_browsertest.cc
@@ -875,13 +875,17 @@ // Manually create a pre-expired grant and ensure it doesn't grant access for // HostB. - base::Time expiration_time = base::Time::Now() - base::Minutes(5); + const base::TimeDelta lifetime = base::Days(30); + const base::Time creation_time = + base::Time::Now() - base::Minutes(5) - lifetime; HostContentSettingsMap* settings_map = HostContentSettingsMapFactory::GetForProfile(browser()->profile()); + content_settings::ContentSettingConstraints constraints(creation_time); + constraints.set_lifetime(lifetime); + constraints.set_session_model(content_settings::SessionModel::UserSession); settings_map->SetContentSettingDefaultScope( GetURL(kHostB), GetURL(kHostA), ContentSettingsType::STORAGE_ACCESS, - CONTENT_SETTING_ALLOW, - {expiration_time, content_settings::SessionModel::UserSession}); + CONTENT_SETTING_ALLOW, constraints); settings_map->SetContentSettingDefaultScope( GetURL(kHostC), GetURL(kHostA), ContentSettingsType::STORAGE_ACCESS, CONTENT_SETTING_ALLOW);
diff --git a/chrome/browser/storage_access_api/storage_access_grant_permission_context.cc b/chrome/browser/storage_access_api/storage_access_grant_permission_context.cc index 8c78f93..55d0fc1d 100644 --- a/chrome/browser/storage_access_api/storage_access_grant_permission_context.cc +++ b/chrome/browser/storage_access_api/storage_access_grant_permission_context.cc
@@ -81,13 +81,18 @@ content_settings::ContentSettingConstraints ComputeConstraints( RequestOutcome outcome) { + content_settings::ContentSettingConstraints constraints; switch (outcome) { case RequestOutcome::kGrantedByFirstPartySet: - return {content_settings::GetConstraintExpiration(kImplicitGrantDuration), - content_settings::SessionModel::NonRestorableUserSession}; + constraints.set_lifetime(kImplicitGrantDuration); + constraints.set_session_model( + content_settings::SessionModel::NonRestorableUserSession); + return constraints; case RequestOutcome::kGrantedByAllowance: - return {content_settings::GetConstraintExpiration(kImplicitGrantDuration), - content_settings::SessionModel::UserSession}; + constraints.set_lifetime(kImplicitGrantDuration); + constraints.set_session_model( + content_settings::SessionModel::UserSession); + return constraints; case RequestOutcome::kDismissedByUser: case RequestOutcome::kDeniedByFirstPartySet: case RequestOutcome::kDeniedByPrerequisites: @@ -95,8 +100,9 @@ NOTREACHED_NORETURN(); case RequestOutcome::kGrantedByUser: case RequestOutcome::kDeniedByUser: - return {content_settings::GetConstraintExpiration(kExplicitGrantDuration), - content_settings::SessionModel::Durable}; + constraints.set_lifetime(kExplicitGrantDuration); + constraints.set_session_model(content_settings::SessionModel::Durable); + return constraints; } }
diff --git a/chrome/browser/supervised_user/android/java/src/org/chromium/chrome/browser/supervised_user/website_approval/WebsiteApprovalSheetContent.java b/chrome/browser/supervised_user/android/java/src/org/chromium/chrome/browser/supervised_user/website_approval/WebsiteApprovalSheetContent.java index d24515f..6dc994e6 100644 --- a/chrome/browser/supervised_user/android/java/src/org/chromium/chrome/browser/supervised_user/website_approval/WebsiteApprovalSheetContent.java +++ b/chrome/browser/supervised_user/android/java/src/org/chromium/chrome/browser/supervised_user/website_approval/WebsiteApprovalSheetContent.java
@@ -27,7 +27,6 @@ * Bottom sheet content for the screen which allows a parent to approve or deny a website. */ class WebsiteApprovalSheetContent implements BottomSheetContent { - private static final String TAG = "WebsiteApprovalSheetContent"; private static final String ELLIPSIS = "..."; static final int MAX_HOST_SIZE = 256; static final int SUBSTRING_LIMIT = 256;
diff --git a/chrome/browser/sync/prefs/chrome_syncable_prefs_database.cc b/chrome/browser/sync/prefs/chrome_syncable_prefs_database.cc index 90d06cf..f2842cf0 100644 --- a/chrome/browser/sync/prefs/chrome_syncable_prefs_database.cc +++ b/chrome/browser/sync/prefs/chrome_syncable_prefs_database.cc
@@ -85,9 +85,9 @@ kAccessibilityCursorColor = 100035, kAccessibilityEnhancedNetworkVoicesInSelectToSpeakAllowed = 100036, kAccessibilityFloatingMenuPosition = 100037, - kAccessibilityGreyscaleAmount = 100038, - kAccessibilityHueRotationAmount = 100039, - kAccessibilitySaturationAmount = 100040, + // kAccessibilityGreyscaleAmount = 100038, // deprecated + // kAccessibilityHueRotationAmount = 100039, // deprecated + // kAccessibilitySaturationAmount = 100040, // deprecated kAccessibilityScreenMagnifierCenterFocus = 100041, kAccessibilityScreenMagnifierFocusFollowingEnabled = 100042, kAccessibilityScreenMagnifierMouseFollowingMode = 100043, @@ -100,7 +100,7 @@ kAccessibilitySelectToSpeakVoiceName = 100050, kAccessibilitySelectToSpeakVoiceSwitching = 100051, kAccessibilitySelectToSpeakWordHighlight = 100052, - kAccessibilitySepiaAmount = 100053, + // kAccessibilitySepiaAmount = 100053, // deprecated kAccessibilitySwitchAccessAutoScanEnabled = 100054, kAccessibilitySwitchAccessAutoScanKeyboardSpeedMs = 100055, kAccessibilitySwitchAccessAutoScanSpeedMs = 100056, @@ -394,15 +394,6 @@ {ash::prefs::kAccessibilityFloatingMenuPosition, {syncable_prefs_ids::kAccessibilityFloatingMenuPosition, syncer::OS_PREFERENCES, false}}, - {ash::prefs::kAccessibilityGreyscaleAmount, - {syncable_prefs_ids::kAccessibilityGreyscaleAmount, - syncer::OS_PREFERENCES, false}}, - {ash::prefs::kAccessibilityHueRotationAmount, - {syncable_prefs_ids::kAccessibilityHueRotationAmount, - syncer::OS_PREFERENCES, false}}, - {ash::prefs::kAccessibilitySaturationAmount, - {syncable_prefs_ids::kAccessibilitySaturationAmount, - syncer::OS_PREFERENCES, false}}, {ash::prefs::kAccessibilityScreenMagnifierCenterFocus, {syncable_prefs_ids::kAccessibilityScreenMagnifierCenterFocus, syncer::OS_PREFERENCES, false}}, @@ -441,9 +432,6 @@ {ash::prefs::kAccessibilitySelectToSpeakWordHighlight, {syncable_prefs_ids::kAccessibilitySelectToSpeakWordHighlight, syncer::OS_PREFERENCES, false}}, - {ash::prefs::kAccessibilitySepiaAmount, - {syncable_prefs_ids::kAccessibilitySepiaAmount, syncer::OS_PREFERENCES, - false}}, {ash::prefs::kAccessibilitySwitchAccessAutoScanEnabled, {syncable_prefs_ids::kAccessibilitySwitchAccessAutoScanEnabled, syncer::OS_PREFERENCES, false}},
diff --git a/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailDiskStorageTest.java b/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailDiskStorageTest.java index eebc1c1d..cdf945b5 100644 --- a/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailDiskStorageTest.java +++ b/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailDiskStorageTest.java
@@ -32,7 +32,6 @@ @RunWith(ChromeJUnit4ClassRunner.class) @Batch(Batch.UNIT_TESTS) public class ThumbnailDiskStorageTest { - private static final String TAG = "ThumbnailDiskTest"; private static final String CONTENT_ID1 = "contentId1"; private static final String CONTENT_ID2 = "contentId2"; private static final String CONTENT_ID3 = "contentId3";
diff --git a/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailMediaParserTest.java b/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailMediaParserTest.java index 672385c7..0d32076 100644 --- a/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailMediaParserTest.java +++ b/chrome/browser/thumbnail/generator/android/java/src/org/chromium/chrome/browser/thumbnail/generator/ThumbnailMediaParserTest.java
@@ -14,7 +14,6 @@ import org.chromium.base.test.util.Batch; import org.chromium.base.test.util.CriteriaHelper; -import org.chromium.base.test.util.DisabledTest; import org.chromium.base.test.util.Feature; import org.chromium.base.test.util.Restriction; import org.chromium.base.test.util.UrlUtils; @@ -91,7 +90,6 @@ @LargeTest @Feature({"MediaParser"}) @Restriction(UiRestriction.RESTRICTION_TYPE_PHONE) - @DisabledTest(message = "flaky on android-pie-arm64-rel, see crbug.com/1046382") /** * Verify metadata and thumbnail can be retrieved correctly from h264 video file. * @throws InterruptedException
diff --git a/chrome/browser/top_level_storage_access_api/request_storage_access_for_browsertest.cc b/chrome/browser/top_level_storage_access_api/request_storage_access_for_browsertest.cc index 5c443f21..6381115 100644 --- a/chrome/browser/top_level_storage_access_api/request_storage_access_for_browsertest.cc +++ b/chrome/browser/top_level_storage_access_api/request_storage_access_for_browsertest.cc
@@ -273,17 +273,22 @@ NavigateNestedFrameTo(kHostC, "/echoheader?cookie"); // Manually create a pre-expired grant and ensure it doesn't grant access. - base::Time expiration_time = base::Time::Now() - base::Minutes(5); + const base::TimeDelta lifetime = base::Hours(24); + const base::Time creation_time = + base::Time::Now() - base::Minutes(5) - lifetime; HostContentSettingsMap* settings_map = HostContentSettingsMapFactory::GetForProfile(browser()->profile()); + content_settings::ContentSettingConstraints constraints(creation_time); + constraints.set_lifetime(lifetime); + constraints.set_session_model(content_settings::SessionModel::UserSession); settings_map->SetContentSettingDefaultScope( GetURL(kHostB), GetURL(kHostA), ContentSettingsType::TOP_LEVEL_STORAGE_ACCESS, CONTENT_SETTING_ALLOW, - {expiration_time, content_settings::SessionModel::UserSession}); + constraints); settings_map->SetContentSettingDefaultScope( GetURL(kHostC), GetURL(kHostA), ContentSettingsType::TOP_LEVEL_STORAGE_ACCESS, CONTENT_SETTING_ALLOW, - {expiration_time, content_settings::SessionModel::UserSession}); + constraints); // Manually send our expired setting. This needs to be done manually because // normally this expired value would be filtered out before sending and time @@ -293,7 +298,7 @@ ContentSettingsPattern::FromURLNoWildcard(GetURL(kHostB)), ContentSettingsPattern::FromURLNoWildcard(GetURL(kHostA)), base::Value(CONTENT_SETTING_ALLOW), "preference", - /*incognito=*/false, {.expiration = expiration_time})); + /*incognito=*/false, {.expiration = creation_time + lifetime})); settings.emplace_back( ContentSettingsPattern::FromURLNoWildcard(GetURL(kHostC)), ContentSettingsPattern::FromURLNoWildcard(GetURL(kHostA)),
diff --git a/chrome/browser/top_level_storage_access_api/top_level_storage_access_permission_context.cc b/chrome/browser/top_level_storage_access_api/top_level_storage_access_permission_context.cc index 84dfbde..c4d760bf 100644 --- a/chrome/browser/top_level_storage_access_api/top_level_storage_access_permission_context.cc +++ b/chrome/browser/top_level_storage_access_api/top_level_storage_access_permission_context.cc
@@ -238,19 +238,20 @@ ->WithPortWildcard() ->Build(); + content_settings::ContentSettingConstraints constraints; + constraints.set_lifetime(kGrantDuration); + constraints.set_session_model( + content_settings::SessionModel::NonRestorableUserSession); + settings_map->SetContentSettingCustomScope( ContentSettingsPattern::FromURLNoWildcard(requesting_origin), secondary_site_pattern, ContentSettingsType::TOP_LEVEL_STORAGE_ACCESS, - content_setting, - {content_settings::GetConstraintExpiration(kGrantDuration), - content_settings::SessionModel::NonRestorableUserSession}); + content_setting, constraints); settings_map->SetContentSettingCustomScope( ContentSettingsPattern::FromURLNoWildcard(requesting_origin), secondary_site_pattern, ContentSettingsType::STORAGE_ACCESS, - content_setting, - {content_settings::GetConstraintExpiration(kGrantDuration), - content_settings::SessionModel::NonRestorableUserSession}); + content_setting, constraints); ContentSettingsForOneType top_level_grants; settings_map->GetSettingsForOneType(
diff --git a/chrome/browser/ui/BUILD.gn b/chrome/browser/ui/BUILD.gn index 5d78cedd..54f6535 100644 --- a/chrome/browser/ui/BUILD.gn +++ b/chrome/browser/ui/BUILD.gn
@@ -1389,6 +1389,8 @@ "privacy_sandbox/privacy_sandbox_prompt.h", "privacy_sandbox/privacy_sandbox_prompt_helper.cc", "privacy_sandbox/privacy_sandbox_prompt_helper.h", + "profile_view_utils.cc", + "profile_view_utils.h", "sad_tab.cc", "sad_tab.h", "sad_tab_helper.cc", @@ -2798,7 +2800,6 @@ "webui/ash/add_supervision/add_supervision_ui.h", "webui/ash/add_supervision/confirm_signout_dialog.cc", "webui/ash/add_supervision/confirm_signout_dialog.h", - "webui/ash/arc_graphics_tracing/arc_graphics_tracing.h", "webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.cc", "webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h", "webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.cc",
diff --git a/chrome/browser/ui/android/autofill/internal/java/src/org/chromium/chrome/browser/ui/autofill/AuthenticatorSelectionDialogBridge.java b/chrome/browser/ui/android/autofill/internal/java/src/org/chromium/chrome/browser/ui/autofill/AuthenticatorSelectionDialogBridge.java index 799835da..0682b68c 100644 --- a/chrome/browser/ui/android/autofill/internal/java/src/org/chromium/chrome/browser/ui/autofill/AuthenticatorSelectionDialogBridge.java +++ b/chrome/browser/ui/android/autofill/internal/java/src/org/chromium/chrome/browser/ui/autofill/AuthenticatorSelectionDialogBridge.java
@@ -25,8 +25,6 @@ */ @JNINamespace("autofill") public class AuthenticatorSelectionDialogBridge implements AuthenticatorSelectionDialog.Listener { - private static final String TAG = "AuthSelectionDialog"; - private final long mNativeCardUnmaskAuthenticationSelectionDialogView; private final Context mContext; private AuthenticatorSelectionDialog mAuthenticatorSelectionDialog;
diff --git a/chrome/browser/ui/android/edge_to_edge/internal/java/src/org/chromium/chrome/browser/ui/edge_to_edge/EdgeToEdgeControllerImpl.java b/chrome/browser/ui/android/edge_to_edge/internal/java/src/org/chromium/chrome/browser/ui/edge_to_edge/EdgeToEdgeControllerImpl.java index 63466e5..8cd430d 100644 --- a/chrome/browser/ui/android/edge_to_edge/internal/java/src/org/chromium/chrome/browser/ui/edge_to_edge/EdgeToEdgeControllerImpl.java +++ b/chrome/browser/ui/android/edge_to_edge/internal/java/src/org/chromium/chrome/browser/ui/edge_to_edge/EdgeToEdgeControllerImpl.java
@@ -22,8 +22,6 @@ * Status Bar. */ public class EdgeToEdgeControllerImpl implements EdgeToEdgeController { - private static final String TAG = "EdgeToEdgeController"; - /** The outermost view in our view hierarchy that is identified with a resource ID. */ private static final int ROOT_UI_VIEW_ID = android.R.id.content;
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/geo/VisibleNetworks.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/geo/VisibleNetworks.java index 1a3e85c..1bdd7a34 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/geo/VisibleNetworks.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/geo/VisibleNetworks.java
@@ -17,8 +17,6 @@ * Visible networks. Stores the data of connected and visible networks. */ class VisibleNetworks { - private static final String TAG = "VisibleNetworks"; - @Nullable private final VisibleWifi mConnectedWifi; @Nullable
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediator.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediator.java index 2b1bde4d..33f827c1 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediator.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediator.java
@@ -77,7 +77,6 @@ // Delay triggering the omnibox results upon key press to allow the location bar to repaint // with the new characters. private static final long OMNIBOX_SUGGESTION_START_DELAY_MS = 30; - private static final int OMNIBOX_HISTOGRAMS_MAX_SUGGESTIONS = 10; private final @NonNull Context mContext; private final @NonNull AutocompleteControllerProvider mControllerProvider;
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediatorUnitTest.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediatorUnitTest.java index de0c2d3..831fbe5 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediatorUnitTest.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/AutocompleteMediatorUnitTest.java
@@ -84,7 +84,6 @@ @Config(manifest = Config.NONE, shadows = {ShadowLog.class, ShadowLooper.class, ShadowGURL.class}) @Features.EnableFeatures({ChromeFeatureList.CLEAR_OMNIBOX_FOCUS_AFTER_NAVIGATION}) public class AutocompleteMediatorUnitTest { - private static final int MINIMUM_NUMBER_OF_SUGGESTIONS_TO_SHOW = 5; private static final int SUGGESTION_MIN_HEIGHT = 20; private static final int HEADER_MIN_HEIGHT = 15;
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/DropdownItemViewInfoListManagerUnitTest.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/DropdownItemViewInfoListManagerUnitTest.java index 6473f6e..7d0cd933c 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/DropdownItemViewInfoListManagerUnitTest.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/DropdownItemViewInfoListManagerUnitTest.java
@@ -53,10 +53,6 @@ @Config(manifest = Config.NONE, shadows = {ShadowGURL.class}) @Features.EnableFeatures({ChromeFeatureList.OMNIBOX_MODERNIZE_VISUAL_UPDATE}) public class DropdownItemViewInfoListManagerUnitTest { - private static final int MINIMUM_NUMBER_OF_SUGGESTIONS_TO_SHOW = 5; - private static final int SUGGESTION_MIN_HEIGHT = 20; - private static final int HEADER_MIN_HEIGHT = 15; - public @Rule MockitoRule mockitoRule = MockitoJUnit.rule(); public @Rule TestRule mProcessor = new Features.JUnitProcessor();
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/answer/AnswerTextNewLayout.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/answer/AnswerTextNewLayout.java index 45d7decb..8461913 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/answer/AnswerTextNewLayout.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/answer/AnswerTextNewLayout.java
@@ -22,7 +22,6 @@ * AnswerTextNewLayout builds Omnibox styled Answer suggestion texts for revamped answer layouts. */ class AnswerTextNewLayout extends AnswerText { - private static final String TAG = "AnswerTextNewLayout"; private final boolean mIsAnswer; private final @AnswerType int mAnswerType; private final boolean mStockTextColorReverse;
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/base/ActionChipsBinder.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/base/ActionChipsBinder.java index 3217f712..0e295af 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/base/ActionChipsBinder.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/base/ActionChipsBinder.java
@@ -16,14 +16,11 @@ import org.chromium.components.browser_ui.widget.chips.ChipViewBinder; import org.chromium.ui.modelutil.PropertyKey; import org.chromium.ui.modelutil.PropertyModel; -import org.chromium.ui.modelutil.PropertyModelChangeProcessor.ViewBinder; /** * Binds ActionChipsView properties. */ public final class ActionChipsBinder { - private static final ViewBinder<PropertyModel, View, PropertyKey> NOOP_BINDER = (m, v, p) -> {}; - public static void bind(PropertyModel model, ActionChipsView view, PropertyKey propertyKey) { if (ActionChipsProperties.ACTION_CHIPS == propertyKey) { var isIncognito = model.get(SuggestionCommonProperties.COLOR_SCHEME)
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/basic/BasicSuggestionProcessorUnitTest.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/basic/BasicSuggestionProcessorUnitTest.java index 4785a7c..bcd4946e 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/basic/BasicSuggestionProcessorUnitTest.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/basic/BasicSuggestionProcessorUnitTest.java
@@ -60,8 +60,6 @@ @RunWith(BaseRobolectricTestRunner.class) @Config(manifest = Config.NONE, shadows = {ShadowGURL.class, ShadowUrlBarData.class}) public class BasicSuggestionProcessorUnitTest { - private static final GURL EXTERNAL_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.URL_1); - private static final GURL INTERNAL_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.NTP_URL); private static final @DrawableRes int ICON_BOOKMARK = R.drawable.btn_star; private static final @DrawableRes int ICON_GLOBE = R.drawable.ic_globe_24dp; private static final @DrawableRes int ICON_HISTORY = R.drawable.ic_history_googblue_24dp;
diff --git a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/mostvisited/MostVisitedTilesProcessorUnitTest.java b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/mostvisited/MostVisitedTilesProcessorUnitTest.java index fc33e50..7222954 100644 --- a/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/mostvisited/MostVisitedTilesProcessorUnitTest.java +++ b/chrome/browser/ui/android/omnibox/java/src/org/chromium/chrome/browser/omnibox/suggestions/mostvisited/MostVisitedTilesProcessorUnitTest.java
@@ -74,8 +74,6 @@ private static final GURL NAV_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.URL_1); private static final GURL NAV_URL_2 = JUnitTestGURLs.getGURL(JUnitTestGURLs.URL_2); private static final GURL SEARCH_URL = JUnitTestGURLs.getGURL(JUnitTestGURLs.SEARCH_URL); - private static final int FALLBACK_COLOR = 0xACE0BA5E; - private static final int DESIRED_FAVICON_SIZE_PX = 100; public @Rule MockitoRule mockitoRule = MockitoJUnit.rule(); public @Rule TestRule mFeatures = new Features.JUnitProcessor();
diff --git a/chrome/browser/ui/android/theme/java/src/org/chromium/chrome/browser/theme/ThemeUtils.java b/chrome/browser/ui/android/theme/java/src/org/chromium/chrome/browser/theme/ThemeUtils.java index 16abb3d..f224a99 100644 --- a/chrome/browser/ui/android/theme/java/src/org/chromium/chrome/browser/theme/ThemeUtils.java +++ b/chrome/browser/ui/android/theme/java/src/org/chromium/chrome/browser/theme/ThemeUtils.java
@@ -29,7 +29,6 @@ * Utility methods for theme colors. */ public class ThemeUtils { - private static final String TAG = "ThemeUtils"; private static final float LOCATION_BAR_TRANSPARENT_BACKGROUND_ALPHA = 0.2f; /**
diff --git a/chrome/browser/ui/android/toolbar/java/src/org/chromium/chrome/browser/toolbar/adaptive/AdaptiveToolbarStatePredictor.java b/chrome/browser/ui/android/toolbar/java/src/org/chromium/chrome/browser/toolbar/adaptive/AdaptiveToolbarStatePredictor.java index ac2c08e..e1c7597 100644 --- a/chrome/browser/ui/android/toolbar/java/src/org/chromium/chrome/browser/toolbar/adaptive/AdaptiveToolbarStatePredictor.java +++ b/chrome/browser/ui/android/toolbar/java/src/org/chromium/chrome/browser/toolbar/adaptive/AdaptiveToolbarStatePredictor.java
@@ -25,7 +25,6 @@ * Key used to lookup segmentation results for adaptive toolbar. Must be kept in sync with * components/segmentation_platform/internal/constants.cc. */ - private static final String ADAPTIVE_TOOLBAR_SEGMENTATION_KEY = "adaptive_toolbar"; private static Pair<Boolean, Integer> sSegmentationResultsForTesting; private static Integer sToolbarStateForTesting;
diff --git a/chrome/browser/ui/android/webid/internal/java/src/org/chromium/chrome/browser/ui/android/webid/AccountSelectionIntegrationTest.java b/chrome/browser/ui/android/webid/internal/java/src/org/chromium/chrome/browser/ui/android/webid/AccountSelectionIntegrationTest.java index 990aea7..3406fb6b 100644 --- a/chrome/browser/ui/android/webid/internal/java/src/org/chromium/chrome/browser/ui/android/webid/AccountSelectionIntegrationTest.java +++ b/chrome/browser/ui/android/webid/internal/java/src/org/chromium/chrome/browser/ui/android/webid/AccountSelectionIntegrationTest.java
@@ -46,14 +46,11 @@ import org.chromium.base.test.util.ScalableTimeout; import org.chromium.chrome.browser.customtabs.CustomTabActivity; import org.chromium.chrome.browser.flags.ChromeSwitches; -import org.chromium.chrome.browser.tab.Tab; -import org.chromium.chrome.browser.tab.TabLaunchType; import org.chromium.chrome.browser.ui.android.webid.data.Account; import org.chromium.chrome.browser.ui.android.webid.data.ClientIdMetadata; import org.chromium.chrome.browser.ui.android.webid.data.IdentityProviderMetadata; import org.chromium.chrome.test.ChromeJUnit4ClassRunner; import org.chromium.chrome.test.ChromeTabbedActivityTestRule; -import org.chromium.chrome.test.util.browser.tabmodel.MockTabCreator; import org.chromium.components.browser_ui.bottomsheet.BottomSheetContent; import org.chromium.components.browser_ui.bottomsheet.BottomSheetController; import org.chromium.components.browser_ui.bottomsheet.BottomSheetController.SheetState; @@ -72,22 +69,6 @@ @Batch(Batch.PER_CLASS) @CommandLineFlags.Add({ChromeSwitches.DISABLE_FIRST_RUN_EXPERIENCE}) public class AccountSelectionIntegrationTest { - private static class FakeTabCreator extends MockTabCreator { - FakeTabCreator() { - super(false, null); - } - - @Override - public Tab launchUrl(String url, @TabLaunchType int type) { - mLastLaunchedUrl = url; - return null; - } - - String mLastLaunchedUrl; - }; - - private static final FakeTabCreator sTabCreator = new FakeTabCreator(); - private static final String EXAMPLE_ETLD_PLUS_ONE = "example.com"; private static final String TEST_ETLD_PLUS_ONE_1 = "one.com"; private static final String TEST_ETLD_PLUS_ONE_2 = "two.com";
diff --git a/chrome/browser/ui/ash/projector/projector_client_impl.cc b/chrome/browser/ui/ash/projector/projector_client_impl.cc index 84bcd3e..e388aad0 100644 --- a/chrome/browser/ui/ash/projector/projector_client_impl.cc +++ b/chrome/browser/ui/ash/projector/projector_client_impl.cc
@@ -360,13 +360,7 @@ Profile* profile = ProfileManager::GetActiveUserProfile(); ash::SystemWebAppManager* swa_manager = ash::SystemWebAppManager::Get(profile); - // TODO(b/240497023): convert to dcheck once confirm that the pointer is - // always available at this point. - if (!swa_manager) { - RecordPolicyChangeHandlingError( - ash::ProjectorPolicyChangeHandlingError::kSwaManager); - return; - } + CHECK(swa_manager); const bool is_installed = swa_manager->IsSystemWebApp(ash::kChromeUITrustedProjectorSwaAppId); // We can't enable or disable the app if it's not already installed. @@ -381,13 +375,7 @@ CloseProjectorApp(); auto* web_app_provider = ash::SystemWebAppManager::GetWebAppProvider(profile); - // TODO(b/240497023): convert to dcheck once confirm that the pointer is - // always available at this point. - if (!web_app_provider) { - RecordPolicyChangeHandlingError( - ash::ProjectorPolicyChangeHandlingError::kWebAppProvider); - return; - } + CHECK(web_app_provider); web_app_provider->on_registry_ready().Post( FROM_HERE, base::BindOnce(&ProjectorClientImpl::SetAppIsDisabled, weak_ptr_factory_.GetWeakPtr(), !is_enabled)); @@ -397,13 +385,7 @@ Profile* profile = ProfileManager::GetActiveUserProfile(); auto* web_app_provider = ash::SystemWebAppManager::GetWebAppProvider(profile); - // TODO(b/240497023): convert to dcheck once confirm that the pointer is - // always available at this point. - if (!web_app_provider) { - RecordPolicyChangeHandlingError(ash::ProjectorPolicyChangeHandlingError:: - kWebAppProviderOnRegistryReady); - return; - } + CHECK(web_app_provider); web_app_provider->scheduler().SetAppIsDisabled( ash::kChromeUITrustedProjectorSwaAppId, disabled, base::DoNothing());
diff --git a/chrome/browser/ui/browser_command_controller.cc b/chrome/browser/ui/browser_command_controller.cc index fae2a8d..71ddbf5a 100644 --- a/chrome/browser/ui/browser_command_controller.cc +++ b/chrome/browser/ui/browser_command_controller.cc
@@ -26,21 +26,25 @@ #include "chrome/browser/lifetime/application_lifetime.h" #include "chrome/browser/prefs/incognito_mode_prefs.h" #include "chrome/browser/profiles/profile.h" +#include "chrome/browser/profiles/profile_window.h" #include "chrome/browser/sessions/tab_restore_service_factory.h" #include "chrome/browser/sharing_hub/sharing_hub_features.h" #include "chrome/browser/shell_integration.h" +#include "chrome/browser/signin/identity_manager_factory.h" #include "chrome/browser/signin/signin_promo.h" #include "chrome/browser/ui/apps/app_info_dialog.h" #include "chrome/browser/ui/bookmarks/bookmark_tab_helper.h" #include "chrome/browser/ui/browser.h" #include "chrome/browser/ui/browser_commands.h" #include "chrome/browser/ui/browser_finder.h" +#include "chrome/browser/ui/browser_list.h" #include "chrome/browser/ui/browser_window.h" #include "chrome/browser/ui/chrome_pages.h" #include "chrome/browser/ui/commander/commander.h" #include "chrome/browser/ui/managed_ui.h" #include "chrome/browser/ui/page_info/page_info_dialog.h" #include "chrome/browser/ui/passwords/ui_utils.h" +#include "chrome/browser/ui/profile_view_utils.h" #include "chrome/browser/ui/side_panel/side_panel_entry_id.h" #include "chrome/browser/ui/side_panel/side_panel_enums.h" #include "chrome/browser/ui/side_panel/side_panel_ui.h" @@ -72,6 +76,7 @@ #include "components/sessions/core/tab_restore_service.h" #include "components/signin/public/base/signin_buildflags.h" #include "components/signin/public/base/signin_pref_names.h" +#include "components/signin/public/identity_manager/identity_manager.h" #include "content/public/browser/native_web_keyboard_event.h" #include "content/public/browser/navigation_controller.h" #include "content/public/browser/navigation_entry.h" @@ -1021,7 +1026,31 @@ SidePanelUI::GetSidePanelUIForBrowser(browser_)->Show( SidePanelEntryId::kReadingList, SidePanelOpenTrigger::kAppMenu); break; - + // Profile submenu commands. + case IDC_CUSTOMIZE_CHROME: + chrome::ShowSettingsSubPage(browser_, chrome::kManageProfileSubPage); + break; + case IDC_CLOSE_PROFILE: { + if (browser_->profile()->IsIncognitoProfile()) { + BrowserList::CloseAllBrowsersWithIncognitoProfile( + browser_->profile(), base::DoNothing(), base::DoNothing(), true); + } else { + profiles::CloseProfileWindows(browser_->profile()); + } + break; + } + case IDC_MANAGE_GOOGLE_ACCOUNT: { + Profile* profile = browser_->profile(); + signin::IdentityManager* identity_manager = + IdentityManagerFactory::GetForProfile(profile); + DCHECK( + identity_manager->HasPrimaryAccount(signin::ConsentLevel::kSignin)); + NavigateToGoogleAccountPage( + profile, + identity_manager->GetPrimaryAccountInfo(signin::ConsentLevel::kSignin) + .email); + break; + } default: LOG(WARNING) << "Received Unimplemented Command: " << id; break; @@ -1218,6 +1247,11 @@ IDC_RECENT_TABS_LOGIN_FOR_DEVICE_TABS, (!guest_session && !profile()->IsSystemProfile() && !profile()->IsIncognitoProfile())); + command_updater_.UpdateCommandEnabled(IDC_CUSTOMIZE_CHROME, true); + command_updater_.UpdateCommandEnabled(IDC_CLOSE_PROFILE, true); + command_updater_.UpdateCommandEnabled( + IDC_MANAGE_GOOGLE_ACCOUNT, + HasUnconstentedProfile(profile()) && !IsSyncPaused(profile())); if (profile()->IsIncognitoProfile()) { command_updater_.UpdateCommandEnabled(IDC_CLEAR_BROWSING_DATA, true);
diff --git a/chrome/browser/ui/browser_command_controller_browsertest.cc b/chrome/browser/ui/browser_command_controller_browsertest.cc index 6a64608..e740afc 100644 --- a/chrome/browser/ui/browser_command_controller_browsertest.cc +++ b/chrome/browser/ui/browser_command_controller_browsertest.cc
@@ -15,6 +15,7 @@ #include "chrome/browser/search_engines/template_url_service_factory.h" #include "chrome/browser/sessions/tab_restore_service_factory.h" #include "chrome/browser/sessions/tab_restore_service_load_waiter.h" +#include "chrome/browser/signin/identity_manager_factory.h" #include "chrome/browser/ui/browser.h" #include "chrome/browser/ui/browser_commands.h" #include "chrome/browser/ui/browser_finder.h" @@ -28,8 +29,11 @@ #include "components/search_engines/template_url_service.h" #include "components/sessions/core/tab_restore_service.h" #include "components/sessions/core/tab_restore_service_observer.h" +#include "components/signin/public/identity_manager/identity_manager.h" +#include "components/signin/public/identity_manager/identity_test_utils.h" #include "content/public/test/browser_test.h" #include "content/public/test/test_utils.h" +#include "ui/base/ui_base_features.h" #if BUILDFLAG(IS_CHROMEOS_ASH) #include "ash/constants/ash_switches.h" @@ -58,6 +62,18 @@ } }; +// Test case for menus that only appear after Chrome Refresh. +class BrowserCommandControllerBrowserTestRefreshOnly + : public BrowserCommandControllerBrowserTest { + public: + BrowserCommandControllerBrowserTestRefreshOnly() { + scoped_feature_list_.InitWithFeatures({features::kChromeRefresh2023}, {}); + } + + private: + base::test::ScopedFeatureList scoped_feature_list_; +}; + // Verify that showing a constrained window disables find. IN_PROC_BROWSER_TEST_F(BrowserCommandControllerBrowserTest, DisableFind) { EXPECT_TRUE(chrome::IsCommandEnabled(browser(), IDC_FIND)); @@ -265,4 +281,30 @@ ASSERT_EQ(false, commandController->IsCommandEnabled(IDC_OPEN_FILE)); } +#if !BUILDFLAG(IS_CHROMEOS_ASH) +IN_PROC_BROWSER_TEST_F(BrowserCommandControllerBrowserTestRefreshOnly, + ExecuteProfileMenuCustomizeChrome) { + EXPECT_TRUE(chrome::ExecuteCommand(browser(), IDC_CUSTOMIZE_CHROME)); + content::WebContents* web_contents = + browser()->tab_strip_model()->GetActiveWebContents(); + content::WaitForLoadStop(web_contents); + EXPECT_EQ(web_contents->GetURL().possibly_invalid_spec(), + "chrome://settings/manageProfile"); +} + +IN_PROC_BROWSER_TEST_F(BrowserCommandControllerBrowserTestRefreshOnly, + ExecuteProfileMenuManageGoogleAccount) { + signin::IdentityManager* identity_manager = + IdentityManagerFactory::GetForProfile(browser()->profile()); + CoreAccountInfo account_info = signin::SetPrimaryAccount( + identity_manager, "user@example.com", signin::ConsentLevel::kSignin); + chrome::UpdateCommandEnabled(browser(), IDC_MANAGE_GOOGLE_ACCOUNT, true); + EXPECT_TRUE(chrome::ExecuteCommand(browser(), IDC_MANAGE_GOOGLE_ACCOUNT)); +} + +IN_PROC_BROWSER_TEST_F(BrowserCommandControllerBrowserTestRefreshOnly, + ExecuteProfileMenuCloseProfile) { + EXPECT_TRUE(chrome::ExecuteCommand(browser(), IDC_CLOSE_PROFILE)); +} +#endif } // namespace chrome
diff --git a/chrome/browser/ui/color/omnibox_color_mixer.cc b/chrome/browser/ui/color/omnibox_color_mixer.cc index 090de53..f4fe3498 100644 --- a/chrome/browser/ui/color/omnibox_color_mixer.cc +++ b/chrome/browser/ui/color/omnibox_color_mixer.cc
@@ -170,6 +170,9 @@ SkColorSetRGB(153, 153, 153)); mixer[kColorOmniboxResultsButtonInkDropSelected] = { kColorOmniboxResultsButtonInkDrop}; + + // Update starter pack icon color. + mixer[kColorOmniboxResultsStarterPackIcon] = {ui::kColorSysPrimary}; } // Apply updates to the Omnibox color tokens per CR2023 guidelines.
diff --git a/chrome/browser/ui/profile_view_utils.cc b/chrome/browser/ui/profile_view_utils.cc new file mode 100644 index 0000000..594e3850 --- /dev/null +++ b/chrome/browser/ui/profile_view_utils.cc
@@ -0,0 +1,56 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/ui/profile_view_utils.h" + +#include "chrome/browser/profiles/profile.h" +#include "chrome/browser/signin/identity_manager_factory.h" +#include "chrome/browser/sync/sync_ui_util.h" +#include "chrome/browser/ui/browser_finder.h" +#include "chrome/browser/ui/browser_navigator.h" +#include "chrome/browser/ui/browser_navigator_params.h" +#include "chrome/common/url_constants.h" +#include "components/signin/public/base/consent_level.h" +#include "components/signin/public/identity_manager/identity_manager.h" +#include "net/base/url_util.h" +#include "ui/base/window_open_disposition.h" +#include "url/gurl.h" + +void NavigateToGoogleAccountPage(Profile* profile, const std::string& email) { + // Create a URL so that the account chooser is shown if the account with + // |email| is not signed into the web. Include a UTM parameter to signal the + // source of the navigation. + GURL google_account = net::AppendQueryParameter( + GURL(chrome::kGoogleAccountURL), "utm_source", "chrome-profile-chooser"); + + GURL url(chrome::kGoogleAccountChooserURL); + url = net::AppendQueryParameter(url, "Email", email); + url = net::AppendQueryParameter(url, "continue", google_account.spec()); + + NavigateParams params(profile, url, ui::PAGE_TRANSITION_LINK); + params.disposition = WindowOpenDisposition::NEW_FOREGROUND_TAB; + Navigate(¶ms); +} + +bool IsSyncPaused(Profile* profile) { + return GetAvatarSyncErrorType(profile) == AvatarSyncErrorType::kSyncPaused; +} + +bool HasUnconstentedProfile(Profile* profile) { + signin::IdentityManager* identity_manager = + IdentityManagerFactory::GetForProfile(profile); + return identity_manager ? !profile->IsGuestSession() && + identity_manager->HasPrimaryAccount( + signin::ConsentLevel::kSignin) + : false; +} + +int CountBrowsersFor(Profile* profile) { + int browser_count = chrome::GetBrowserCount(profile); + if (!profile->IsOffTheRecord() && profile->HasPrimaryOTRProfile()) { + browser_count += chrome::GetBrowserCount( + profile->GetPrimaryOTRProfile(/*create_if_needed=*/true)); + } + return browser_count; +}
diff --git a/chrome/browser/ui/profile_view_utils.h b/chrome/browser/ui/profile_view_utils.h new file mode 100644 index 0000000..38f75b7b --- /dev/null +++ b/chrome/browser/ui/profile_view_utils.h
@@ -0,0 +1,26 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_UI_PROFILE_VIEW_UTILS_H_ +#define CHROME_BROWSER_UI_PROFILE_VIEW_UTILS_H_ + +#include <string> +#include <utility> + +class Profile; + +// Navigates to the Google Account page. +void NavigateToGoogleAccountPage(Profile* profile, const std::string& email); + +// Returns true if account sync is paused. +bool IsSyncPaused(Profile* profile); + +// Returns true if there is an unconstented profile. +bool HasUnconstentedProfile(Profile* profile); + +// Returns the number of browsers associated with |profile|. +// Note: For regular profiles this includes incognito sessions. +int CountBrowsersFor(Profile* profile); + +#endif // CHROME_BROWSER_UI_PASSWORDS_UI_UTILS_H_
diff --git a/chrome/browser/ui/toolbar/app_menu_model.cc b/chrome/browser/ui/toolbar/app_menu_model.cc index 1d5228e..07ed0a28 100644 --- a/chrome/browser/ui/toolbar/app_menu_model.cc +++ b/chrome/browser/ui/toolbar/app_menu_model.cc
@@ -39,6 +39,7 @@ #include "chrome/browser/ui/browser.h" #include "chrome/browser/ui/browser_commands.h" #include "chrome/browser/ui/browser_finder.h" +#include "chrome/browser/ui/browser_list.h" #include "chrome/browser/ui/browser_window.h" #include "chrome/browser/ui/commander/commander.h" #include "chrome/browser/ui/global_error/global_error.h" @@ -46,6 +47,7 @@ #include "chrome/browser/ui/global_error/global_error_service_factory.h" #include "chrome/browser/ui/layout_constants.h" #include "chrome/browser/ui/managed_ui.h" +#include "chrome/browser/ui/profile_view_utils.h" #include "chrome/browser/ui/side_panel/companion/companion_utils.h" #include "chrome/browser/ui/tabs/tab_strip_model.h" #include "chrome/browser/ui/toolbar/app_menu_icon_controller.h" @@ -236,7 +238,7 @@ class ProfileSubMenuModel : public ui::SimpleMenuModel { public: ProfileSubMenuModel(ui::SimpleMenuModel::Delegate* delegate, - const Profile* profile); + Profile* profile); ProfileSubMenuModel(const ProfileSubMenuModel&) = delete; ProfileSubMenuModel& operator=(const ProfileSubMenuModel&) = delete; ~ProfileSubMenuModel() override = default; @@ -254,7 +256,7 @@ ProfileSubMenuModel::ProfileSubMenuModel( ui::SimpleMenuModel::Delegate* delegate, - const Profile* profile) + Profile* profile) : SimpleMenuModel(delegate) { const int avatar_icon_size = GetLayoutConstant(APP_MENU_PROFILE_ROW_AVATAR_ICON_SIZE); @@ -295,6 +297,35 @@ gfx::CHARACTER_BREAK)); } } + + if (!profile->IsIncognitoProfile() && !profile->IsGuestSession()) { + AddItemWithStringIdAndIcon( + IDC_CUSTOMIZE_CHROME, IDS_CUSTOMIZE_CHROME, + ui::ImageModel::FromVectorIcon(vector_icons::kEditChromeRefreshIcon, + ui::kColorMenuIcon, kDefaultIconSize)); + } + + AddItemWithIcon( + IDC_CLOSE_PROFILE, + l10n_util::GetPluralStringFUTF16(IDS_CLOSE_PROFILE, + CountBrowsersFor(profile)), + ui::ImageModel::FromVectorIcon(vector_icons::kCloseChromeRefreshIcon, + ui::kColorMenuIcon, kDefaultIconSize)); + + if (HasUnconstentedProfile(profile) && !IsSyncPaused(profile) && + !profile->IsIncognitoProfile()) { +#if BUILDFLAG(GOOGLE_CHROME_BRANDING) + const gfx::VectorIcon& manage_account_icon = + vector_icons::kGoogleGLogoMonochromeIcon; +#else + const gfx::VectorIcon& manage_account_icon = + kAccountManageChromeRefreshIcon; +#endif + AddItemWithStringIdAndIcon( + IDC_MANAGE_GOOGLE_ACCOUNT, IDS_MANAGE_GOOGLE_ACCOUNT, + ui::ImageModel::FromVectorIcon(manage_account_icon, ui::kColorMenuIcon, + kDefaultIconSize)); + } } class PasswordsAndAutofillSubMenuModel : public ui::SimpleMenuModel { @@ -1048,6 +1079,29 @@ } LogMenuAction(MENU_ACTION_PASSWORD_MANAGER); break; + + // Profile submenu. + case IDC_CUSTOMIZE_CHROME: + if (!uma_action_recorded_) { + UMA_HISTOGRAM_MEDIUM_TIMES("WrenchMenu.TimeToAction.CustomizeChrome", + delta); + } + LogMenuAction(MENU_ACTION_CUSTOMIZE_CHROME); + break; + case IDC_CLOSE_PROFILE: + if (!uma_action_recorded_) { + UMA_HISTOGRAM_MEDIUM_TIMES("WrenchMenu.TimeToAction.CloseProfile", + delta); + } + LogMenuAction(MENU_ACTION_CLOSE_PROFILE); + break; + case IDC_MANAGE_GOOGLE_ACCOUNT: + if (!uma_action_recorded_) { + UMA_HISTOGRAM_MEDIUM_TIMES( + "WrenchMenu.TimeToAction.ManageGoogleAccount", delta); + } + LogMenuAction(MENU_ACTION_MANAGE_GOOGLE_ACCOUNT); + break; } if (!uma_action_recorded_) {
diff --git a/chrome/browser/ui/toolbar/app_menu_model.h b/chrome/browser/ui/toolbar/app_menu_model.h index c0e1bf5..c898bc46 100644 --- a/chrome/browser/ui/toolbar/app_menu_model.h +++ b/chrome/browser/ui/toolbar/app_menu_model.h
@@ -84,6 +84,9 @@ MENU_ACTION_OPEN_IN_PWA_WINDOW = 62, MENU_ACTION_SEND_TO_DEVICES = 63, MENU_ACTION_CREATE_QR_CODE = 64, + MENU_ACTION_CUSTOMIZE_CHROME = 65, + MENU_ACTION_CLOSE_PROFILE = 66, + MENU_ACTION_MANAGE_GOOGLE_ACCOUNT = 67, LIMIT_MENU_ACTION };
diff --git a/chrome/browser/ui/toolbar/app_menu_model_unittest.cc b/chrome/browser/ui/toolbar/app_menu_model_unittest.cc index 888cce693..5637fd4 100644 --- a/chrome/browser/ui/toolbar/app_menu_model_unittest.cc +++ b/chrome/browser/ui/toolbar/app_menu_model_unittest.cc
@@ -198,6 +198,17 @@ mutable int enable_count_; }; +class TestLogMetricsAppMenuModel : public AppMenuModel { + public: + TestLogMetricsAppMenuModel(ui::AcceleratorProvider* provider, + Browser* browser) + : AppMenuModel(provider, browser), log_metrics_count_(0) {} + + void LogMenuAction(AppMenuAction action_id) override { log_metrics_count_++; } + + int log_metrics_count_; +}; + TEST_F(AppMenuModelTest, Basics) { // Simulate that an update is available to ensure that the menu includes the // upgrade item for platforms that support it. @@ -387,6 +398,15 @@ check_for_icons(u"<Root Menu>", &model); } +TEST_F(TestAppMenuModelCR2023, LogProfileMenuMetrics) { + TestLogMetricsAppMenuModel model(this, browser()); + model.Init(); + model.ExecuteCommand(IDC_MANAGE_GOOGLE_ACCOUNT, 0); + model.ExecuteCommand(IDC_CLOSE_PROFILE, 0); + model.ExecuteCommand(IDC_CUSTOMIZE_CHROME, 0); + EXPECT_EQ(3, model.log_metrics_count_); +} + #if BUILDFLAG(IS_CHROMEOS) // Tests settings menu items is disabled in the app menu when // kSystemFeaturesDisableList is set.
diff --git a/chrome/browser/ui/views/chrome_layout_provider.cc b/chrome/browser/ui/views/chrome_layout_provider.cc index 5292100..de3429e8 100644 --- a/chrome/browser/ui/views/chrome_layout_provider.cc +++ b/chrome/browser/ui/views/chrome_layout_provider.cc
@@ -123,7 +123,7 @@ case DISTANCE_DROPDOWN_BUTTON_RIGHT_MARGIN: return 12; case DISTANCE_EXTENSIONS_MENU_BUTTON_ICON_SIZE: - return 16; + return features::IsChromeRefresh2023() ? 20 : 16; case DISTANCE_EXTENSIONS_MENU_EXTENSION_ICON_SIZE: return 28; case DISTANCE_EXTENSIONS_MENU_ICON_SPACING:
diff --git a/chrome/browser/ui/views/extensions/extensions_menu_site_permissions_page_view.cc b/chrome/browser/ui/views/extensions/extensions_menu_site_permissions_page_view.cc index b2d9a396..b7cf53c 100644 --- a/chrome/browser/ui/views/extensions/extensions_menu_site_permissions_page_view.cc +++ b/chrome/browser/ui/views/extensions/extensions_menu_site_permissions_page_view.cc
@@ -55,6 +55,9 @@ constexpr size_t kOnSiteButtonIndex = 1; constexpr size_t kOnAllSitesButtonIndex = 2; +// Same value as checkbox size in checkbox.cc. +constexpr float kCheckboxIconDipSize = 16; + // Returns the site access button in a site permissions `page`. std::vector<views::RadioButton*> GetSiteAccessButtons(views::View* page) { std::vector<views::View*> buttons; @@ -128,6 +131,15 @@ } } +// Returns the icon for the setting button. +std::unique_ptr<views::ImageView> GetSettingsButtonIcon(int icon_size) { + auto settings_launch_icon = + std::make_unique<views::ImageView>(ui::ImageModel::FromVectorIcon( + vector_icons::kLaunchIcon, ui::kColorIconSecondary)); + settings_launch_icon->SetImageSize(gfx::Size(icon_size, icon_size)); + return settings_launch_icon; +} + } // namespace ExtensionsMenuSitePermissionsPageView::ExtensionsMenuSitePermissionsPageView( @@ -146,13 +158,17 @@ views::LayoutProvider* layout_provider = views::LayoutProvider::Get(); const gfx::Insets dialog_insets = layout_provider->GetInsetsMetric(views::InsetsMetric::INSETS_DIALOG); + // Back button should use a vector image button, and this value should be the + // same as the border added by the image button factory. + const gfx::Insets back_button_border = layout_provider->GetInsetsMetric( + views::InsetsMetric::INSETS_VECTOR_IMAGE_BUTTON); ChromeLayoutProvider* const chrome_layout_provider = ChromeLayoutProvider::Get(); const int icon_size = chrome_layout_provider->GetDistanceMetric( DISTANCE_EXTENSIONS_MENU_BUTTON_ICON_SIZE); - const int icon_label_spacing = chrome_layout_provider->GetDistanceMetric( - views::DISTANCE_RELATED_LABEL_HORIZONTAL); + const int horizontal_spacing = chrome_layout_provider->GetDistanceMetric( + DISTANCE_RELATED_LABEL_HORIZONTAL_LIST); const int vertical_spacing = chrome_layout_provider->GetDistanceMetric( DISTANCE_UNRELATED_CONTROL_VERTICAL_LARGE); // This value must be the same as the `HoverButton` vertical margin. @@ -161,6 +177,10 @@ DISTANCE_CONTROL_LIST_VERTICAL) / 2; + // Views that need confirgutation after construction (e.g access size after a + // separate view is constructed). + views::Label* toggle_label; + const auto create_separator_builder = [dialog_insets, vertical_spacing, hover_button_vertical_spacing]( bool full_width, bool is_bottom_hover_button = false) { @@ -173,7 +193,7 @@ return views::Builder<views::Separator>().SetProperty( views::kMarginsKey, gfx::Insets::TLBR(vertical_spacing, horizontal_margin, - bottom_margin, dialog_insets.right())); + bottom_margin, horizontal_margin)); }; const auto create_radio_button_builder = @@ -190,7 +210,23 @@ views::Builder<views::RadioButton>() .SetText(GetSiteAccessRadioButtonText(site_access)) .SetGroup(kSiteAccessButtonsId) - .SetImageLabelSpacing(icon_label_spacing) + // To align the radio button icon under the header back + // button we need to add: + // - left back button border + icon size difference to the + // left of the icon + // - icon size difference + right back button border to the + // right of the icon, plus the horizontal spacing to align + // the label. + .SetProperty( + views::kMarginsKey, + gfx::Insets::TLBR( + 0, + back_button_border.left() + + ((icon_size - kCheckboxIconDipSize) / 2), + 0, 0)) + .SetImageLabelSpacing( + ((icon_size - kCheckboxIconDipSize) / 2) + + back_button_border.right() + horizontal_spacing) .SetCallback(base::BindRepeating( &ExtensionsMenuHandler::OnSiteAccessSelected, base::Unretained(menu_handler), extension_id, @@ -200,8 +236,10 @@ .SetTextStyle(views::style::STYLE_SECONDARY) .SetHorizontalAlignment(gfx::ALIGN_LEFT) .SetMultiLine(true) - .SetBorder(views::CreateEmptyBorder( - gfx::Insets::VH(0, icon_label_spacing + icon_size)))); + .SetBorder(views::CreateEmptyBorder(gfx::Insets::VH( + 0, icon_size + back_button_border.left() + + back_button_border.right() + + horizontal_spacing)))); }; views::Builder<ExtensionsMenuSitePermissionsPageView>(this) @@ -225,14 +263,13 @@ base::BindRepeating( &ExtensionsMenuHandler::OpenMainPage, base::Unretained(menu_handler)), - vector_icons::kArrowBackIcon)) + vector_icons::kArrowBackIcon, icon_size)) .SetTooltipText( l10n_util::GetStringUTF16(IDS_ACCNAME_BACK)) .SetAccessibleName( l10n_util::GetStringUTF16(IDS_ACCNAME_BACK)) .CustomConfigure( base::BindOnce([](views::ImageButton* view) { - view->SizeToPreferredSize(); InstallCircleHighlightPathGenerator(view); })), // Extension name. @@ -242,10 +279,16 @@ .SetProperty(views::kFlexBehaviorKey, stretch_specification) .AddChildren( - views::Builder<views::ImageView>().CopyAddressTo( - &extension_icon_), - views::Builder<views::Label>().CopyAddressTo( - &extension_name_))), + views::Builder<views::ImageView>() + .CopyAddressTo(&extension_icon_) + .SetProperty(views::kMarginsKey, + gfx::Insets::TLBR( + 0, horizontal_spacing, 0, 0)), + views::Builder<views::Label>() + .CopyAddressTo(&extension_name_) + .SetProperty(views::kMarginsKey, + gfx::Insets::TLBR( + 0, horizontal_spacing, 0, 0)))), create_separator_builder(/*full_width=*/true, /*is_bottom_hover_button=*/false), // Content. @@ -269,20 +312,20 @@ PermissionsManager::UserSiteAccess::kOnAllSites), // Requests in toolbar toggle. create_separator_builder(/*full_width=*/false), - // TODO(crbug.com/1390952): Format this view. Toggle button - // should be on the right and centered. views::Builder<views::FlexLayoutView>() .SetCrossAxisAlignment(views::LayoutAlignment::kStart) // Add dialog horizontal margins. Vertical margins are // handled by separators. .SetProperty(views::kMarginsKey, gfx::Insets::VH(0, dialog_insets.left())) - .SetProperty(views::kFlexBehaviorKey, - stretch_specification) .AddChildren( - views::Builder<views::Label>().SetText( - l10n_util::GetStringUTF16( - IDS_EXTENSIONS_MENU_SITE_PERMISSIONS_PAGE_SHOW_REQUESTS_LABEL)), + views::Builder<views::Label>() + .CopyAddressTo(&toggle_label) + .SetText(l10n_util::GetStringUTF16( + IDS_EXTENSIONS_MENU_SITE_PERMISSIONS_PAGE_SHOW_REQUESTS_LABEL)) + .SetProperty(views::kFlexBehaviorKey, + stretch_specification) + .SetHorizontalAlignment(gfx::ALIGN_LEFT), views::Builder<views::ToggleButton>() .CopyAddressTo(&show_requests_toggle_) .SetCallback(base::BindRepeating( @@ -304,10 +347,7 @@ l10n_util::GetStringUTF16( IDS_EXTENSIONS_MENU_SITE_PERMISSIONS_PAGE_SETTINGS_BUTTON), /*subtitle=*/std::u16string(), - std::make_unique<views::ImageView>( - ui::ImageModel::FromVectorIcon( - vector_icons::kLaunchIcon, - ui::kColorIconSecondary)))) + GetSettingsButtonIcon(icon_size))) // Align the hover button text by adding the dialog // horizontal margins for the horizontal borders. .SetBorder(views::CreateEmptyBorder( @@ -324,6 +364,12 @@ IDS_EXTENSIONS_MENU_SITE_PERMISSIONS_PAGE_SETTINGS_BUTTON_TOOLTIP)))) .BuildChildren(); + + // Align the show requests toggle vertically with the label by getting the + // label height after construction. + show_requests_toggle_->SetPreferredSize( + gfx::Size(show_requests_toggle_->GetPreferredSize().width(), + toggle_label->GetLineHeight())); } void ExtensionsMenuSitePermissionsPageView::Update(
diff --git a/chrome/browser/ui/views/profiles/profile_menu_view.cc b/chrome/browser/ui/views/profiles/profile_menu_view.cc index da22deb8..71bffa7 100644 --- a/chrome/browser/ui/views/profiles/profile_menu_view.cc +++ b/chrome/browser/ui/views/profiles/profile_menu_view.cc
@@ -43,6 +43,7 @@ #include "chrome/browser/ui/color/chrome_color_id.h" #include "chrome/browser/ui/passwords/ui_utils.h" #include "chrome/browser/ui/profile_picker.h" +#include "chrome/browser/ui/profile_view_utils.h" #include "chrome/browser/ui/signin/profile_colors_util.h" #include "chrome/browser/ui/sync/sync_promo_ui.h" #include "chrome/browser/ui/ui_features.h" @@ -112,37 +113,6 @@ IDS_SYNC_ERROR_USER_MENU_CONFIRM_SYNC_SETTINGS_BUTTON); } } - -void NavigateToGoogleAccountPage(Profile* profile, const std::string& email) { - // Create a URL so that the account chooser is shown if the account with - // |email| is not signed into the web. Include a UTM parameter to signal the - // source of the navigation. - GURL google_account = net::AppendQueryParameter( - GURL(chrome::kGoogleAccountURL), "utm_source", "chrome-profile-chooser"); - - GURL url(chrome::kGoogleAccountChooserURL); - url = net::AppendQueryParameter(url, "Email", email); - url = net::AppendQueryParameter(url, "continue", google_account.spec()); - - NavigateParams params(profile, url, ui::PAGE_TRANSITION_LINK); - params.disposition = WindowOpenDisposition::NEW_FOREGROUND_TAB; - Navigate(¶ms); -} - -// Returns the number of browsers associated with |profile|. -// Note: For regular profiles this includes incognito sessions. -int CountBrowsersFor(Profile* profile) { - int browser_count = chrome::GetBrowserCount(profile); - if (!profile->IsOffTheRecord() && profile->HasPrimaryOTRProfile()) - browser_count += chrome::GetBrowserCount( - profile->GetPrimaryOTRProfile(/*create_if_needed=*/true)); - return browser_count; -} - -bool IsSyncPaused(Profile* profile) { - return GetAvatarSyncErrorType(profile) == AvatarSyncErrorType::kSyncPaused; -} - } // namespace // ProfileMenuView --------------------------------------------------------- @@ -659,12 +629,7 @@ void ProfileMenuView::BuildFeatureButtons() { Profile* profile = browser()->profile(); - signin::IdentityManager* identity_manager = - IdentityManagerFactory::GetForProfile(profile); - const bool has_unconsented_account = - !profile->IsGuestSession() && - identity_manager->HasPrimaryAccount(signin::ConsentLevel::kSignin); - + bool has_unconsented_account = HasUnconstentedProfile(profile); if (has_unconsented_account && !IsSyncPaused(profile)) { #if BUILDFLAG(GOOGLE_CHROME_BRANDING) // The Google G icon needs to be shrunk, so it won't look too big compared @@ -705,6 +670,8 @@ } #if BUILDFLAG(ENABLE_DICE_SUPPORT) || BUILDFLAG(IS_CHROMEOS_LACROS) + signin::IdentityManager* identity_manager = + IdentityManagerFactory::GetForProfile(profile); const bool has_primary_account = !profile->IsGuestSession() && identity_manager->HasPrimaryAccount(signin::ConsentLevel::kSync);
diff --git a/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.cc b/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.cc index 45daa64..9404909 100644 --- a/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.cc +++ b/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.cc
@@ -7,7 +7,9 @@ #include <memory> #include <string> #include <utility> +#include <vector> +#include "base/metrics/field_trial_params.h" #include "chrome/app/vector_icons/vector_icons.h" #include "chrome/browser/language/language_model_manager_factory.h" #include "chrome/browser/profiles/profile.h" @@ -22,20 +24,37 @@ #include "chrome/browser/ui/webui/side_panel/read_anything/read_anything_ui.h" #include "chrome/common/webui_url_constants.h" #include "chrome/grit/generated_resources.h" +#include "components/feature_engagement/public/feature_constants.h" #include "components/language/core/browser/language_model.h" #include "components/language/core/browser/language_model_manager.h" #include "components/language/core/common/locale_util.h" +#include "ui/accessibility/accessibility_features.h" #include "ui/base/l10n/l10n_util.h" #include "ui/base/models/combobox_model.h" +namespace { + +// Get the list of distillable URLs defined by the Finch experiment parameter. +std::vector<std::string> GetDistillableURLs() { + return base::SplitString(base::GetFieldTrialParamValueByFeature( + features::kReadAnything, "distillable_urls"), + ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); +} + +} // namespace + ReadAnythingCoordinator::ReadAnythingCoordinator(Browser* browser) - : BrowserUserData<ReadAnythingCoordinator>(*browser) { + : BrowserUserData<ReadAnythingCoordinator>(*browser), + distillable_urls_(GetDistillableURLs()) { // Create the model and initialize it with user prefs (if present). model_ = std::make_unique<ReadAnythingModel>(); InitModelWithUserPrefs(); // Create the controller. controller_ = std::make_unique<ReadAnythingController>(model_.get(), browser); + + browser->tab_strip_model()->AddObserver(this); + Observe(GetActiveWebContents()); } void ReadAnythingCoordinator::InitModelWithUserPrefs() { @@ -98,6 +117,9 @@ SidePanelCoordinator::GetGlobalSidePanelRegistry(browser); global_registry->Deregister( SidePanelEntry::Key(SidePanelEntry::Id::kReadAnything)); + + browser->tab_strip_model()->RemoveObserver(this); + Observe(nullptr); } void ReadAnythingCoordinator::CreateAndRegisterEntry( @@ -183,4 +205,40 @@ return std::move(container_view); } +void ReadAnythingCoordinator::OnTabStripModelChanged( + TabStripModel* tab_strip_model, + const TabStripModelChange& change, + const TabStripSelectionChange& selection) { + if (!selection.active_tab_changed()) { + return; + } + Observe(GetActiveWebContents()); + MaybeShowReadingModeSidePanelIPH(); +} + +void ReadAnythingCoordinator::DidStopLoading() { + MaybeShowReadingModeSidePanelIPH(); +} + +content::WebContents* ReadAnythingCoordinator::GetActiveWebContents() const { + return GetBrowser().tab_strip_model()->GetActiveWebContents(); +} + +void ReadAnythingCoordinator::MaybeShowReadingModeSidePanelIPH() { + auto* web_contents = GetActiveWebContents(); + if (!web_contents) { + return; + } + auto url = web_contents->GetLastCommittedURL(); + for (auto distillable : distillable_urls_) { + // If the url's domain is found in distillable urls AND the url has a + // filename (i.e. it is not a home page or sub-home page), show the promo. + if (url.DomainIs(distillable) && !url.ExtractFileName().empty()) { + GetBrowser().window()->MaybeShowFeaturePromo( + feature_engagement::kIPHReadingModeSidePanelFeature); + return; + } + } +} + WEB_CONTENTS_USER_DATA_KEY_IMPL(ReadAnythingCoordinator);
diff --git a/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.h b/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.h index 197e191c9..36e7e603 100644 --- a/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.h +++ b/chrome/browser/ui/views/side_panel/read_anything/read_anything_coordinator.h
@@ -6,12 +6,15 @@ #define CHROME_BROWSER_UI_VIEWS_SIDE_PANEL_READ_ANYTHING_READ_ANYTHING_COORDINATOR_H_ #include <memory> +#include <string> #include "base/observer_list.h" #include "base/observer_list_types.h" #include "chrome/browser/ui/browser_user_data.h" +#include "chrome/browser/ui/tabs/tab_strip_model_observer.h" #include "chrome/browser/ui/views/side_panel/read_anything/read_anything_model.h" #include "chrome/browser/ui/views/side_panel/side_panel_entry_observer.h" +#include "content/public/browser/web_contents_observer.h" class Browser; class ReadAnythingController; @@ -33,7 +36,9 @@ // This class has the same lifetime as the browser. // class ReadAnythingCoordinator : public BrowserUserData<ReadAnythingCoordinator>, - public SidePanelEntryObserver { + public SidePanelEntryObserver, + public TabStripModelObserver, + public content::WebContentsObserver { public: class Observer : public base::CheckedObserver { public: @@ -70,9 +75,25 @@ // container view and all its child views and returns it. std::unique_ptr<views::View> CreateContainerView(); + // TabStripModelObserver: + void OnTabStripModelChanged( + TabStripModel* tab_strip_model, + const TabStripModelChange& change, + const TabStripSelectionChange& selection) override; + + // content::WebContentsObserver: + void DidStopLoading() override; + + content::WebContents* GetActiveWebContents() const; + + // Attempts to show in product help for reading mode. + void MaybeShowReadingModeSidePanelIPH(); + std::unique_ptr<ReadAnythingModel> model_; std::unique_ptr<ReadAnythingController> controller_; + const base::flat_set<std::string> distillable_urls_; + base::ObserverList<Observer> observers_; BROWSER_USER_DATA_KEY_DECL(); };
diff --git a/chrome/browser/ui/views/side_panel/search_companion/companion_page_browsertest.cc b/chrome/browser/ui/views/side_panel/search_companion/companion_page_browsertest.cc index 9324b33..afd4f07 100644 --- a/chrome/browser/ui/views/side_panel/search_companion/companion_page_browsertest.cc +++ b/chrome/browser/ui/views/side_panel/search_companion/companion_page_browsertest.cc
@@ -86,11 +86,11 @@ // to the postmessage. absl::optional<PromoType> promo_type; absl::optional<PromoAction> promo_action; - absl::optional<std::string> exps_promo_url; absl::optional<PhFeedback> ph_feedback; - absl::optional<std::string> reporting_url; absl::optional<bool> is_exps_opted_in; absl::optional<std::string> url_for_open_in_new_tab; + absl::optional<std::string> url_to_open; + absl::optional<bool> use_new_tab; absl::optional<UiSurface> ui_surface; absl::optional<int> ui_surface_position; absl::optional<int> child_element_available_count; @@ -126,20 +126,12 @@ << ";"; } - if (exps_promo_url.has_value()) { - ss << "message['expsPromoUrl'] = '" << exps_promo_url.value() << "';"; - } - if (ph_feedback.has_value()) { ss << "message['phFeedback'] = " << base::NumberToString(static_cast<size_t>(ph_feedback.value())) << ";"; } - if (reporting_url.has_value()) { - ss << "message['reportingUrl'] = '" << reporting_url.value() << "';"; - } - if (is_exps_opted_in.has_value()) { ss << "message['isExpsOptedIn'] = " << base::NumberToString(is_exps_opted_in.value()) << ";"; @@ -150,6 +142,15 @@ << url_for_open_in_new_tab.value() << "';"; } + if (url_to_open.has_value()) { + ss << "message['urlToOpen'] = '" << url_to_open.value() << "';"; + } + + if (use_new_tab.has_value()) { + ss << "message['useNewTab'] = '" + << base::NumberToString(use_new_tab.value()) << "';"; + } + if (ui_surface.has_value()) { ss << "message['uiSurface'] = " << base::NumberToString(static_cast<size_t>(ui_surface.value())) @@ -817,7 +818,7 @@ static_cast<int>(companion::PromoEvent::kMsbbRejected)); } -IN_PROC_BROWSER_TEST_F(CompanionPageBrowserTest, ExpsPromoURLLoadsInNewTab) { +IN_PROC_BROWSER_TEST_F(CompanionPageBrowserTest, OpenUrlInBrowser) { ukm::TestAutoSetUkmRecorder ukm_recorder; // Load a page on the active tab. ASSERT_TRUE( @@ -835,10 +836,9 @@ SidePanelEntry::Id::kSearchCompanion); // Show exps promo, user accepts it. - CompanionScriptBuilder builder(MethodType::kOnPromoAction); - builder.promo_type = PromoType::kExps; - builder.promo_action = PromoAction::kAccepted; - builder.exps_promo_url = kExpectedExpsPromoUrl; + CompanionScriptBuilder builder(MethodType::kOpenUrlInBrowser); + builder.url_to_open = kExpectedExpsPromoUrl; + builder.use_new_tab = true; EXPECT_TRUE(ExecJs(builder.Build())); // Verify that a new tab opens up to load the exps URL. @@ -1005,9 +1005,13 @@ // Show exps promo, user accepts it. CompanionScriptBuilder builder(MethodType::kOnPhFeedback); builder.ph_feedback = PhFeedback::kReportContent; - builder.reporting_url = kPhReportingUrl; EXPECT_TRUE(ExecJs(builder.Build())); + CompanionScriptBuilder builder2(MethodType::kOpenUrlInBrowser); + builder2.url_to_open = kPhReportingUrl; + builder2.use_new_tab = true; + EXPECT_TRUE(ExecJs(builder2.Build())); + // Verify that a new tab opens up to load the exps URL. WaitForTabCount(2); EXPECT_EQ(1, browser()->tab_strip_model()->active_index());
diff --git a/chrome/browser/ui/views/side_panel/side_panel_coordinator.cc b/chrome/browser/ui/views/side_panel/side_panel_coordinator.cc index 96fdd20..f2ab17b 100644 --- a/chrome/browser/ui/views/side_panel/side_panel_coordinator.cc +++ b/chrome/browser/ui/views/side_panel/side_panel_coordinator.cc
@@ -5,7 +5,6 @@ #include "chrome/browser/ui/views/side_panel/side_panel_coordinator.h" #include <memory> #include <utility> -#include <vector> #include "base/functional/bind.h" #include "base/functional/callback.h" @@ -35,7 +34,6 @@ #include "chrome/grit/generated_resources.h" #include "components/feature_engagement/public/feature_constants.h" #include "components/strings/grit/components_strings.h" -#include "ui/accessibility/accessibility_features.h" #include "ui/base/interaction/element_tracker.h" #include "ui/base/l10n/l10n_util.h" #include "ui/base/metadata/metadata_header_macros.h" @@ -202,17 +200,10 @@ PopulateSidePanelCallback loaded_callback_; }; -// Get the list of distillable URLs defined by the Finch experiment parameter. -std::vector<std::string> GetDistillableURLs() { - return base::SplitString(base::GetFieldTrialParamValueByFeature( - features::kReadAnything, "distillable_urls"), - ",", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); -} - } // namespace SidePanelCoordinator::SidePanelCoordinator(BrowserView* browser_view) - : browser_view_(browser_view), distillable_urls_(GetDistillableURLs()) { + : browser_view_(browser_view) { combobox_model_ = std::make_unique<SidePanelComboboxModel>(browser_view_); auto global_registry = std::make_unique<SidePanelRegistry>(); @@ -222,7 +213,6 @@ std::move(global_registry)); browser_view_->browser()->tab_strip_model()->AddObserver(this); - Observe(GetActiveWebContents()); SidePanelUtil::PopulateGlobalEntries(browser_view->browser(), global_registry_); @@ -231,7 +221,6 @@ SidePanelCoordinator::~SidePanelCoordinator() { browser_view_->browser()->tab_strip_model()->RemoveObserver(this); view_state_observers_.Clear(); - Observe(nullptr); } // static @@ -627,7 +616,8 @@ } SidePanelRegistry* SidePanelCoordinator::GetActiveContextualRegistry() const { - if (auto* web_contents = GetActiveWebContents()) { + if (auto* web_contents = + browser_view_->browser()->tab_strip_model()->GetActiveWebContents()) { return SidePanelRegistry::Get(web_contents); } return nullptr; @@ -954,9 +944,6 @@ Show(new_contextual_registry->active_entry().value(), SidePanelUtil::SidePanelOpenTrigger::kTabChanged); } - - Observe(GetActiveWebContents()); - MaybeShowReadingModeSidePanelIPH(); } void SidePanelCoordinator::UpdateNewTabButtonState() { @@ -996,31 +983,3 @@ views::View* starting_from) { UpdateToolbarButtonHighlight(observed_view->GetVisible()); } - -void SidePanelCoordinator::DidStopLoading() { - MaybeShowReadingModeSidePanelIPH(); -} - -content::WebContents* SidePanelCoordinator::GetActiveWebContents() const { - return browser_view_->browser()->tab_strip_model()->GetActiveWebContents(); -} - -void SidePanelCoordinator::MaybeShowReadingModeSidePanelIPH() { - if (!features::IsReadAnythingEnabled()) { - return; - } - auto* web_contents = GetActiveWebContents(); - if (!web_contents) { - return; - } - auto url = web_contents->GetLastCommittedURL(); - for (auto distillable : distillable_urls_) { - // If the url's domain is found in distillable urls AND the url has a - // filename (i.e. it is not a home page or sub-home page), show the promo. - if (url.DomainIs(distillable) && !url.ExtractFileName().empty()) { - browser_view_->browser()->window()->MaybeShowFeaturePromo( - feature_engagement::kIPHReadingModeSidePanelFeature); - return; - } - } -}
diff --git a/chrome/browser/ui/views/side_panel/side_panel_coordinator.h b/chrome/browser/ui/views/side_panel/side_panel_coordinator.h index 20309d5..e9bc526 100644 --- a/chrome/browser/ui/views/side_panel/side_panel_coordinator.h +++ b/chrome/browser/ui/views/side_panel/side_panel_coordinator.h
@@ -17,7 +17,6 @@ #include "chrome/browser/ui/views/side_panel/side_panel_registry_observer.h" #include "chrome/browser/ui/views/side_panel/side_panel_util.h" #include "chrome/browser/ui/views/side_panel/side_panel_view_state_observer.h" -#include "content/public/browser/web_contents_observer.h" #include "extensions/common/extension_id.h" #include "third_party/abseil-cpp/absl/types/optional.h" #include "ui/views/view_observer.h" @@ -47,8 +46,7 @@ class SidePanelCoordinator final : public SidePanelRegistryObserver, public TabStripModelObserver, public views::ViewObserver, - public SidePanelUI, - public content::WebContentsObserver { + public SidePanelUI { public: explicit SidePanelCoordinator(BrowserView* browser_view); SidePanelCoordinator(const SidePanelCoordinator&) = delete; @@ -224,14 +222,6 @@ const TabStripModelChange& change, const TabStripSelectionChange& selection) override; - // content::WebContentsObserver: - void DidStopLoading() override; - - content::WebContents* GetActiveWebContents() const; - - // Attempts to show in product help for reading mode. - void MaybeShowReadingModeSidePanelIPH(); - // When true, prevent loading delays when switching between side panel // entries. bool no_delays_for_testing_ = false; @@ -270,8 +260,6 @@ base::ObserverList<SidePanelViewStateObserver> view_state_observers_; - const base::flat_set<std::string> distillable_urls_; - base::ScopedMultiSourceObservation<SidePanelRegistry, SidePanelRegistryObserver> registry_observations_{this};
diff --git a/chrome/browser/ui/views/toolbar/app_menu_browsertest.cc b/chrome/browser/ui/views/toolbar/app_menu_browsertest.cc index 962c567..a2ef79c 100644 --- a/chrome/browser/ui/views/toolbar/app_menu_browsertest.cc +++ b/chrome/browser/ui/views/toolbar/app_menu_browsertest.cc
@@ -20,6 +20,7 @@ #include "build/chromeos_buildflags.h" #include "chrome/browser/sessions/tab_restore_service_factory.h" #include "chrome/browser/sessions/tab_restore_service_load_waiter.h" +#include "chrome/browser/signin/identity_manager_factory.h" #include "chrome/browser/ui/browser.h" #include "chrome/browser/ui/browser_commands.h" #include "chrome/browser/ui/browser_tabstrip.h" @@ -32,6 +33,8 @@ #include "chrome/browser/ui/views/toolbar/toolbar_view.h" #include "chrome/test/base/ui_test_utils.h" #include "components/password_manager/core/common/password_manager_features.h" +#include "components/signin/public/identity_manager/identity_manager.h" +#include "components/signin/public/identity_manager/identity_test_utils.h" #include "content/public/test/browser_test.h" #include "third_party/abseil-cpp/absl/types/optional.h" #include "ui/views/controls/menu/menu_item_view.h" @@ -92,6 +95,7 @@ {"extensions", IDC_EXTENSIONS_SUBMENU}, {"find_and_edit", IDC_FIND_AND_EDIT_MENU}, {"save_and_share", IDC_SAVE_AND_SHARE_MENU}, + {"profile_menu_in_app", IDC_PROFILE_MENU_IN_APP_MENU}, }); const auto* const id_entry = kSubmenus.find(name); if (id_entry == kSubmenus.end()) { @@ -257,4 +261,14 @@ ShowAndVerifyUi(); } +#if !BUILDFLAG(IS_CHROMEOS_ASH) +IN_PROC_BROWSER_TEST_F(AppMenuBrowserTestRefreshOnly, + InvokeUi_profile_menu_in_app) { + signin::IdentityManager* identity_manager = + IdentityManagerFactory::GetForProfile(browser()->profile()); + signin::SetPrimaryAccount(identity_manager, "user@example.com", + signin::ConsentLevel::kSignin); + ShowAndVerifyUi(); +} +#endif } // namespace
diff --git a/chrome/browser/ui/views/user_education/help_bubble_factory_webui_interactive_uitest.cc b/chrome/browser/ui/views/user_education/help_bubble_factory_webui_interactive_uitest.cc index ab4c016a..2a7767d 100644 --- a/chrome/browser/ui/views/user_education/help_bubble_factory_webui_interactive_uitest.cc +++ b/chrome/browser/ui/views/user_education/help_bubble_factory_webui_interactive_uitest.cc
@@ -41,6 +41,7 @@ constexpr char16_t kBubbleBodyText[] = u"Bubble body text."; constexpr char16_t kBubbleButtonText[] = u"Button"; constexpr char16_t kCloseButtonAltText[] = u"Close"; +DEFINE_LOCAL_ELEMENT_IDENTIFIER_VALUE(kReadLaterWebContentsElementId); } // namespace class HelpBubbleFactoryWebUIInteractiveUiTest : public InteractiveBrowserTest { @@ -48,8 +49,25 @@ HelpBubbleFactoryWebUIInteractiveUiTest() = default; ~HelpBubbleFactoryWebUIInteractiveUiTest() override = default; + // Opens the side panel and instruments the Read Later WebContents as + // kReadLaterWebContentsElementId. + auto OpenReadingListSidePanel() { + return Steps( + // Click the Side Panel button and wait for the side panel to appear. + PressButton(kSidePanelButtonElementId), + WaitForShow(kSidePanelElementId), FlushEvents(), + // Select the Reading List side panel and wait for the WebView to + // appear. + SelectDropdownItem(kSidePanelComboboxElementId, + static_cast<int>(SidePanelEntry::Id::kReadingList)), + WaitForShow(kReadLaterSidePanelWebViewElementId), + // Ensure that the Reading List side panel loads properly. + InstrumentNonTabWebView(kReadLaterWebContentsElementId, + kReadLaterSidePanelWebViewElementId)); + } + auto ShowHelpBubble(ElementSpecifier element) { - return InAnyContext( + StepBuilder step = std::move( AfterShow( element, base::BindLambdaForTesting( @@ -62,6 +80,12 @@ } })) .SetDescription("ShowHelpBubble")); + + // A WebUI anchor will not be in the same context as the browser. However, + // InAnyContext is not compatible with named elements. + return absl::holds_alternative<ui::ElementIdentifier>(element) + ? InAnyContext(step) + : std::move(step); } auto CloseHelpBubble() { @@ -93,23 +117,16 @@ IN_PROC_BROWSER_TEST_F(HelpBubbleFactoryWebUIInteractiveUiTest, ShowFloatingHelpBubble) { - DEFINE_LOCAL_ELEMENT_IDENTIFIER_VALUE(kReadLaterElementId); const DeepQuery kPathToAddCurrentTabElement{"reading-list-app", "#currentPageActionButton"}; RunTestSequence( - // Click on the toolbar button to show the side panel. - PressButton(kSidePanelButtonElementId), WaitForShow(kSidePanelElementId), - FlushEvents(), - SelectDropdownItem(kSidePanelComboboxElementId, - static_cast<int>(SidePanelEntry::Id::kReadingList)), + OpenReadingListSidePanel(), ShowHelpBubble(kAddCurrentTabToReadingListElementId), // Verify that the anchor element is marked. - InstrumentNonTabWebView(kReadLaterElementId, - kReadLaterSidePanelWebViewElementId), - CheckJsResultAt(kReadLaterElementId, kPathToAddCurrentTabElement, - "el => el.classList.contains('help-anchor-highlight')", - true), + CheckJsResultAt( + kReadLaterWebContentsElementId, kPathToAddCurrentTabElement, + "el => el.classList.contains('help-anchor-highlight')", true), // Expect the help bubble to display with the correct parameters. CheckViewProperty( @@ -134,9 +151,9 @@ // Verify that the anchor element is no longer marked. CloseHelpBubble(), - CheckJsResultAt(kReadLaterElementId, kPathToAddCurrentTabElement, - "el => el.classList.contains('help-anchor-highlight')", - false)); + CheckJsResultAt( + kReadLaterWebContentsElementId, kPathToAddCurrentTabElement, + "el => el.classList.contains('help-anchor-highlight')", false)); } IN_PROC_BROWSER_TEST_F(HelpBubbleFactoryWebUIInteractiveUiTest, @@ -158,10 +175,7 @@ IN_PROC_BROWSER_TEST_F(HelpBubbleFactoryWebUIInteractiveUiTest, FloatingHelpBubbleHiddenOnWebUiHidden) { RunTestSequence( - PressButton(kSidePanelButtonElementId), WaitForShow(kSidePanelElementId), - FlushEvents(), - SelectDropdownItem(kSidePanelComboboxElementId, - static_cast<int>(SidePanelEntry::Id::kReadingList)), + OpenReadingListSidePanel(), ShowHelpBubble(kAddCurrentTabToReadingListElementId), WaitForShow( user_education::HelpBubbleView::kHelpBubbleElementIdForTesting), @@ -195,17 +209,13 @@ IN_PROC_BROWSER_TEST_F(HelpBubbleFactoryRtlWebUIInteractiveUiTest, ResizeSidePanelSendsUpdate) { RunTestSequence( - PressButton(kSidePanelButtonElementId), WaitForShow(kSidePanelElementId), - FlushEvents(), - SelectDropdownItem(kSidePanelComboboxElementId, - static_cast<int>(SidePanelEntry::Id::kReadingList)), - FlushEvents(), + OpenReadingListSidePanel(), InAnyContext( AfterShow(kAddCurrentTabToReadingListElementId, [](ui::InteractionSequence* seq, ui::TrackedElement* el) { seq->NameElement(el, kSidePanelElementName); })), - ShowHelpBubble(kAddCurrentTabToReadingListElementId), FlushEvents(), + ShowHelpBubble(kSidePanelElementName), FlushEvents(), WithView(kSidePanelElementId, [](SidePanel* side_panel) { side_panel->OnResize(-50, true);
diff --git a/chrome/browser/ui/views/webid/account_selection_bubble_view.cc b/chrome/browser/ui/views/webid/account_selection_bubble_view.cc index d17339b..2e26805 100644 --- a/chrome/browser/ui/views/webid/account_selection_bubble_view.cc +++ b/chrome/browser/ui/views/webid/account_selection_bubble_view.cc
@@ -572,8 +572,6 @@ const absl::optional<std::u16string>& iframe_for_display, const std::u16string& idp_for_display, const content::IdentityProviderMetadata& idp_metadata) { - constexpr int kLineHeight = 5; - std::u16string title = GetTitle(top_frame_for_display, iframe_for_display, idp_for_display, rp_context_); UpdateHeader(idp_metadata, title, subtitle_, @@ -584,14 +582,13 @@ auto row = std::make_unique<views::View>(); row->SetLayoutManager(std::make_unique<views::BoxLayout>( views::BoxLayout::Orientation::kVertical, - gfx::Insets::VH(kTopBottomPadding, kLeftRightPadding))); + gfx::Insets::VH(0, kLeftRightPadding))); // Add column for text. views::View* const text_column = row->AddChildView(std::make_unique<views::View>()); text_column->SetLayoutManager(std::make_unique<views::BoxLayout>( - views::BoxLayout::Orientation::kVertical, - gfx::Insets::VH(kTopBottomPadding, 0))); + views::BoxLayout::Orientation::kVertical)); // Add body for mismatch dialog. views::Label* const body = @@ -600,10 +597,11 @@ idp_for_display, top_frame_for_display), views::style::CONTEXT_DIALOG_TITLE, views::style::STYLE_PRIMARY)); body->SetMultiLine(true); - body->SetFontList( - gfx::FontList().Derive(5, gfx::Font::NORMAL, gfx::Font::Weight::MEDIUM)); + body->SetBorder( + views::CreateEmptyBorder(gfx::Insets::VH(kVerticalSpacing, 0))); body->SetHorizontalAlignment(gfx::HorizontalAlignment::ALIGN_LEFT); - body->SetLineHeight(kLineHeight); + constexpr int kBodyLineHeight = 22; + body->SetLineHeight(kBodyLineHeight); // Add description for signing in. views::Label* const description = @@ -614,8 +612,13 @@ views::style::STYLE_SECONDARY)); description->SetMultiLine(true); description->SetHorizontalAlignment(gfx::HorizontalAlignment::ALIGN_LEFT); - description->SetBorder(views::CreateEmptyBorder(gfx::Insets::VH(10, 0))); - description->SetLineHeight(kLineHeight); + constexpr int kDescriptionLineHeight = 20; + description->SetLineHeight(kDescriptionLineHeight); + + // The space between the description and the continue button. + constexpr int kBottomSpacing = 16; + description->SetBorder( + views::CreateEmptyBorder(gfx::Insets::TLBR(0, 0, kBottomSpacing, 0))); // Add continue button. auto button = std::make_unique<ContinueButton>(
diff --git a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h deleted file mode 100644 index 6f699a7..0000000 --- a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h +++ /dev/null
@@ -1,19 +0,0 @@ -// Copyright 2019 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CHROME_BROWSER_UI_WEBUI_ASH_ARC_GRAPHICS_TRACING_ARC_GRAPHICS_TRACING_H_ -#define CHROME_BROWSER_UI_WEBUI_ASH_ARC_GRAPHICS_TRACING_ARC_GRAPHICS_TRACING_H_ - -namespace ash { - -enum class ArcGraphicsTracingMode { - // Full tracing mode. - kFull, - // Overview tracing mode with ability to compare different runs. - kOverview, -}; - -} // namespace ash - -#endif // CHROME_BROWSER_UI_WEBUI_ASH_ARC_GRAPHICS_TRACING_ARC_GRAPHICS_TRACING_H_
diff --git a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.cc b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.cc index fbbfca2..614257d 100644 --- a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.cc +++ b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.cc
@@ -52,35 +52,6 @@ namespace { -constexpr char kLastTracingModelName[] = "last_tracing_model.json"; - -// Maximum interval to display in full mode. -constexpr base::TimeDelta kMaxIntervalToDisplayInFullMode = base::Seconds(5.0); - -base::FilePath GetLastTracingModelPath(Profile* profile) { - DCHECK(profile); - return file_manager::util::GetDownloadsFolderForProfile(profile).AppendASCII( - kLastTracingModelName); -} - -std::pair<base::Value, std::string> MaybeLoadLastGraphicsModel( - const base::FilePath& last_model_path) { - std::string json_content; - if (!base::ReadFileToString(last_model_path, &json_content)) - return std::make_pair(base::Value(), std::string()); - - absl::optional<base::Value> model = base::JSONReader::Read(json_content); - if (!model || !model->is_dict()) - return std::make_pair(base::Value(), "Failed to read last tracing model"); - - arc::ArcTracingGraphicsModel graphics_model; - if (!graphics_model.LoadFromValue(model->GetDict())) { - return std::make_pair(base::Value(), "Failed to load last tracing model"); - } - - return std::make_pair(std::move(*model), "Loaded last tracing model"); -} - class ProcessFilterPassAll : public base::ProcessFilter { public: ProcessFilterPassAll() = default; @@ -152,7 +123,6 @@ std::pair<base::Value, std::string> BuildGraphicsModel( const std::string& data, - ArcGraphicsTracingMode mode, const std::string& title, const std::vector<unsigned char>& icon_png, base::Time timestamp, @@ -191,8 +161,7 @@ &common_model.system_model()); arc::ArcTracingGraphicsModel graphics_model; - if (mode != ArcGraphicsTracingMode::kFull) - graphics_model.set_skip_structure_validation(); + graphics_model.set_skip_structure_validation(); if (!graphics_model.Build(common_model)) { return std::make_pair(base::Value(), "Failed to build tracing model"); } @@ -218,11 +187,9 @@ } std::pair<base::Value, std::string> LoadGraphicsModel( - ArcGraphicsTracingMode mode, const std::string& json_text) { arc::ArcTracingGraphicsModel graphics_model; - if (mode != ArcGraphicsTracingMode::kFull) - graphics_model.set_skip_structure_validation(); + graphics_model.set_skip_structure_validation(); if (!graphics_model.LoadFromJson(json_text)) { return std::make_pair(base::Value(), "Failed to load tracing model"); } @@ -232,42 +199,15 @@ "Tracing model is loaded"); } -std::string GetJavascriptDomain(ArcGraphicsTracingMode mode) { - switch (mode) { - case ArcGraphicsTracingMode::kFull: - return "cr.ArcGraphicsTracing."; - case ArcGraphicsTracingMode::kOverview: - return "cr.ArcOverviewTracing."; - } -} +constexpr char kJavascriptDomain[] = "cr.ArcOverviewTracing."; -base::trace_event::TraceConfig GetTracingConfig(ArcGraphicsTracingMode mode) { - switch (mode) { - case ArcGraphicsTracingMode::kFull: { - base::trace_event::TraceConfig config( - "-*,exo,viz,toplevel,gpu,cc,blink,disabled-by-default-android gfx," - "disabled-by-default-android view", - base::trace_event::RECORD_CONTINUOUSLY); - config.EnableSystrace(); - // By default, systracing starts pre-defined set of categories with - // predefined set of events in each category. Limit events to what we - // actually analyze in ArcTracingModel. - config.EnableSystraceEvent("i915:intel_gpu_freq_change"); - config.EnableSystraceEvent("drm_msm_gpu:msm_gpu_freq_change"); - config.EnableSystraceEvent("power:cpu_idle"); - config.EnableSystraceEvent("sched:sched_wakeup"); - config.EnableSystraceEvent("sched:sched_switch"); - return config; - } - case ArcGraphicsTracingMode::kOverview: { - base::trace_event::TraceConfig config( - "-*,exo,viz,toplevel,gpu", base::trace_event::RECORD_CONTINUOUSLY); - config.EnableSystrace(); - config.EnableSystraceEvent("i915:intel_gpu_freq_change"); - config.EnableSystraceEvent("drm_msm_gpu:msm_gpu_freq_change"); - return config; - } - } +base::trace_event::TraceConfig GetTracingConfig() { + base::trace_event::TraceConfig config("-*,exo,viz,toplevel,gpu", + base::trace_event::RECORD_CONTINUOUSLY); + config.EnableSystrace(); + config.EnableSystraceEvent("i915:intel_gpu_freq_change"); + config.EnableSystraceEvent("drm_msm_gpu:msm_gpu_freq_change"); + return config; } } // namespace @@ -297,11 +237,9 @@ (base::Time::Now() - base::Time()).InSeconds())); } -ArcGraphicsTracingHandler::ArcGraphicsTracingHandler( - ArcGraphicsTracingMode mode) +ArcGraphicsTracingHandler::ArcGraphicsTracingHandler() : wm_helper_(exo::WMHelper::HasInstance() ? exo::WMHelper::GetInstance() - : nullptr), - mode_(mode) { + : nullptr) { DCHECK(wm_helper_); aura::Window* const current_active = wm_helper_->GetActiveWindow(); @@ -322,26 +260,13 @@ void ArcGraphicsTracingHandler::RegisterMessages() { web_ui()->RegisterMessageCallback( - "ready", base::BindRepeating(&ArcGraphicsTracingHandler::HandleReady, - base::Unretained(this))); - web_ui()->RegisterMessageCallback( "loadFromText", base::BindRepeating(&ArcGraphicsTracingHandler::HandleLoadFromText, base::Unretained(this))); - switch (mode_) { - case ArcGraphicsTracingMode::kFull: - web_ui()->RegisterMessageCallback( - "setStopOnJank", - base::BindRepeating(&ArcGraphicsTracingHandler::HandleSetStopOnJank, - base::Unretained(this))); - break; - case ArcGraphicsTracingMode::kOverview: - web_ui()->RegisterMessageCallback( - "setMaxTime", - base::BindRepeating(&ArcGraphicsTracingHandler::HandleSetMaxTime, - base::Unretained(this))); - break; - } + web_ui()->RegisterMessageCallback( + "setMaxTime", + base::BindRepeating(&ArcGraphicsTracingHandler::HandleSetMaxTime, + base::Unretained(this))); } void ArcGraphicsTracingHandler::OnWindowActivated(ActivationReason reason, @@ -364,30 +289,10 @@ // Limit tracing by newly activated window. tracing_time_min_ = TRACE_TIME_TICKS_NOW(); - if (mode_ != ArcGraphicsTracingMode::kFull) - return; - - jank_detector_ = - std::make_unique<arc::ArcGraphicsJankDetector>(base::BindRepeating( - &ArcGraphicsTracingHandler::OnJankDetected, base::Unretained(this))); - exo::Surface* const surface = exo::GetShellRootSurface(arc_active_window_); - DCHECK(surface); - surface->AddSurfaceObserver(this); -} - -void ArcGraphicsTracingHandler::OnJankDetected(const base::Time& timestamp) { - VLOG(1) << "Jank detected " << timestamp; - if (tracing_active_ && stop_on_jank_) - StopTracingAndActivate(); } base::TimeDelta ArcGraphicsTracingHandler::GetMaxInterval() const { - switch (mode_) { - case ArcGraphicsTracingMode::kFull: - return kMaxIntervalToDisplayInFullMode; - case ArcGraphicsTracingMode::kOverview: - return max_tracing_time_; - } + return max_tracing_time_; } void ArcGraphicsTracingHandler::OnWindowPropertyChanged(aura::Window* window, @@ -484,7 +389,7 @@ UpdateActiveArcWindowInfo(); content::TracingController::GetInstance()->StartTracing( - GetTracingConfig(mode_), + GetTracingConfig(), base::BindOnce(&ArcGraphicsTracingHandler::OnTracingStarted, weak_ptr_factory_.GetWeakPtr())); } @@ -518,7 +423,7 @@ void ArcGraphicsTracingHandler::SetStatus(const std::string& status) { AllowJavascript(); - CallJavascriptFunction(GetJavascriptDomain(mode_) + "setStatus", + CallJavascriptFunction(kJavascriptDomain + std::string("setStatus"), base::Value(status.empty() ? "Idle" : status)); } @@ -532,12 +437,10 @@ UpdateActiveArcWindowInfo(); tracing_time_min_ = TRACE_TIME_TICKS_NOW(); - if (mode_ == ArcGraphicsTracingMode::kOverview) { - stop_tracing_timer_.Start( - FROM_HERE, system_stat_collector_->max_interval(), - base::BindOnce(&ArcGraphicsTracingHandler::StopTracingAndActivate, - base::Unretained(this))); - } + stop_tracing_timer_.Start( + FROM_HERE, system_stat_collector_->max_interval(), + base::BindOnce(&ArcGraphicsTracingHandler::StopTracingAndActivate, + base::Unretained(this))); } void ArcGraphicsTracingHandler::OnTracingStopped( @@ -547,13 +450,11 @@ Profile* const profile = Profile::FromWebUI(web_ui()); const base::FilePath model_path = - mode_ == ArcGraphicsTracingMode::kFull - ? GetLastTracingModelPath(profile) - : GetModelPathFromTitle(profile, active_task_title_); + GetModelPathFromTitle(profile, active_task_title_); base::ThreadPool::PostTaskAndReplyWithResult( FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, - base::BindOnce(&BuildGraphicsModel, std::move(string_data), mode_, + base::BindOnce(&BuildGraphicsModel, std::move(string_data), active_task_title_, active_task_icon_png_, timestamp_, std::move(system_stat_collector_), tracing_time_min_, tracing_time_max_, model_path), @@ -568,37 +469,13 @@ if (!result.first.is_dict()) return; - CallJavascriptFunction(GetJavascriptDomain(mode_) + "setModel", + CallJavascriptFunction(kJavascriptDomain + std::string("setModel"), std::move(result.first)); } -void ArcGraphicsTracingHandler::HandleReady(const base::Value::List& args) { - if (mode_ != ArcGraphicsTracingMode::kFull) - return; - - base::ThreadPool::PostTaskAndReplyWithResult( - FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, - base::BindOnce(&MaybeLoadLastGraphicsModel, - GetLastTracingModelPath(Profile::FromWebUI(web_ui()))), - base::BindOnce(&ArcGraphicsTracingHandler::OnGraphicsModelReady, - weak_ptr_factory_.GetWeakPtr())); -} - -void ArcGraphicsTracingHandler::HandleSetStopOnJank( - const base::Value::List& args) { - DCHECK_EQ(1U, args.size()); - DCHECK_EQ(ArcGraphicsTracingMode::kFull, mode_); - if (!args[0].is_bool()) { - LOG(ERROR) << "Invalid input"; - return; - } - stop_on_jank_ = args[0].GetBool(); -} - void ArcGraphicsTracingHandler::HandleSetMaxTime( const base::Value::List& args) { DCHECK_EQ(1U, args.size()); - DCHECK_EQ(ArcGraphicsTracingMode::kOverview, mode_); if (!args[0].is_int()) { LOG(ERROR) << "Invalid input"; @@ -618,7 +495,7 @@ base::ThreadPool::PostTaskAndReplyWithResult( FROM_HERE, {base::MayBlock(), base::TaskPriority::BEST_EFFORT}, - base::BindOnce(&LoadGraphicsModel, mode_, args[0].GetString()), + base::BindOnce(&LoadGraphicsModel, args[0].GetString()), base::BindOnce(&ArcGraphicsTracingHandler::OnGraphicsModelReady, weak_ptr_factory_.GetWeakPtr())); }
diff --git a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h index 53d843d7..0f0b46e 100644 --- a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h +++ b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h
@@ -15,7 +15,6 @@ #include "base/time/time.h" #include "base/timer/timer.h" #include "base/values.h" -#include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h" #include "components/exo/surface_observer.h" #include "content/public/browser/web_ui_message_handler.h" #include "ui/aura/window_observer.h" @@ -49,7 +48,7 @@ static base::FilePath GetModelPathFromTitle(Profile* profile, const std::string& title); - explicit ArcGraphicsTracingHandler(ArcGraphicsTracingMode mode); + ArcGraphicsTracingHandler(); ArcGraphicsTracingHandler(const ArcGraphicsTracingHandler&) = delete; ArcGraphicsTracingHandler& operator=(const ArcGraphicsTracingHandler&) = @@ -94,8 +93,6 @@ void OnGraphicsModelReady(std::pair<base::Value, std::string> result); // Handlers for calls from JS. - void HandleReady(const base::Value::List& args); - void HandleSetStopOnJank(const base::Value::List& args); void HandleSetMaxTime(const base::Value::List& args); void HandleLoadFromText(const base::Value::List& args); @@ -105,29 +102,19 @@ // Stops tracking ARC window for janks. void DiscardActiveArcWindow(); - // Called in case jank is detected in active ARC window. - void OnJankDetected(const base::Time& timestamp); - // Returns max sampling interval to display. base::TimeDelta GetMaxInterval() const; // Indicates that tracing was initiated by this handler. bool tracing_active_ = false; - // Determines if tracing should stop in case jank is detected runtime. - // Works only in |ArcGraphicsTracingMode::kFull| mode. - bool stop_on_jank_ = true; - // Determines the maximum tracing time. - // Works only in |ArcGraphicsTracingMode::kOverview| mode. base::TimeDelta max_tracing_time_ = base::Seconds(5); base::OneShotTimer stop_tracing_timer_; const raw_ptr<exo::WMHelper, ExperimentalAsh> wm_helper_; - const ArcGraphicsTracingMode mode_; - raw_ptr<aura::Window, ExperimentalAsh> arc_active_window_ = nullptr; // Time filter for tracing, since ARC++ window was activated last until
diff --git a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.cc b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.cc index 0a4b49e..6ca28e3 100644 --- a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.cc +++ b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.cc
@@ -11,7 +11,6 @@ #include "chrome/browser/ash/arc/arc_util.h" #include "chrome/browser/browser_process.h" #include "chrome/browser/profiles/profile.h" -#include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h" #include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_handler.h" #include "chrome/common/webui_url_constants.h" #include "chrome/grit/browser_resources.h" @@ -22,35 +21,11 @@ namespace { -constexpr char kArcGraphicsTracingJsPath[] = "arc_graphics_tracing.js"; -constexpr char kArcGraphicsTracingUiJsPath[] = "arc_graphics_tracing_ui.js"; constexpr char kArcOverviewTracingJsPath[] = "arc_overview_tracing.js"; constexpr char kArcOverviewTracingUiJsPath[] = "arc_overview_tracing_ui.js"; constexpr char kArcTracingUiJsPath[] = "arc_tracing_ui.js"; constexpr char kArcTracingCssPath[] = "arc_tracing.css"; -void CreateAndAddGraphicsDataSource(Profile* profile) { - content::WebUIDataSource* const source = - content::WebUIDataSource::CreateAndAdd( - profile, chrome::kChromeUIArcGraphicsTracingHost); - source->UseStringsJs(); - source->SetDefaultResource(IDR_ARC_GRAPHICS_TRACING_HTML); - source->AddResourcePath(kArcGraphicsTracingJsPath, - IDR_ARC_GRAPHICS_TRACING_JS); - source->AddResourcePath(kArcGraphicsTracingUiJsPath, - IDR_ARC_GRAPHICS_TRACING_UI_JS); - source->AddResourcePath(kArcTracingCssPath, IDR_ARC_TRACING_CSS); - source->AddResourcePath(kArcTracingUiJsPath, IDR_ARC_TRACING_UI_JS); - source->OverrideContentSecurityPolicy( - network::mojom::CSPDirectiveName::ScriptSrc, - "script-src chrome://resources 'self';"); - - base::Value::Dict localized_strings; - const std::string& app_locale = g_browser_process->GetApplicationLocale(); - webui::SetLoadTimeDataDefaults(app_locale, &localized_strings); - source->AddLocalizedStrings(localized_strings); -} - void CreateAndAddOverviewDataSource(Profile* profile) { content::WebUIDataSource* const source = content::WebUIDataSource::CreateAndAdd( @@ -77,40 +52,19 @@ namespace ash { -template <> -ArcGraphicsTracingUIConfig< - ArcGraphicsTracingMode::kFull>::ArcGraphicsTracingUIConfig() - : DefaultWebUIConfig(content::kChromeUIScheme, - chrome::kChromeUIArcGraphicsTracingHost) {} - -template <> -ArcGraphicsTracingUIConfig< - ArcGraphicsTracingMode::kOverview>::ArcGraphicsTracingUIConfig() +ArcGraphicsTracingUIConfig::ArcGraphicsTracingUIConfig() : DefaultWebUIConfig(content::kChromeUIScheme, chrome::kChromeUIArcOverviewTracingHost) {} -template <ArcGraphicsTracingMode mode> -bool ArcGraphicsTracingUIConfig<mode>::IsWebUIEnabled( +bool ArcGraphicsTracingUIConfig::IsWebUIEnabled( content::BrowserContext* browser_context) { return arc::IsArcAllowedForProfile( Profile::FromBrowserContext(browser_context)); } -template <> -ArcGraphicsTracingUI<ArcGraphicsTracingMode::kFull>::ArcGraphicsTracingUI( - content::WebUI* web_ui) +ArcGraphicsTracingUI::ArcGraphicsTracingUI(content::WebUI* web_ui) : WebUIController(web_ui) { - web_ui->AddMessageHandler(std::make_unique<ArcGraphicsTracingHandler>( - ArcGraphicsTracingMode::kFull)); - CreateAndAddGraphicsDataSource(Profile::FromWebUI(web_ui)); -} - -template <> -ArcGraphicsTracingUI<ArcGraphicsTracingMode::kOverview>::ArcGraphicsTracingUI( - content::WebUI* web_ui) - : WebUIController(web_ui) { - web_ui->AddMessageHandler(std::make_unique<ArcGraphicsTracingHandler>( - ArcGraphicsTracingMode::kOverview)); + web_ui->AddMessageHandler(std::make_unique<ArcGraphicsTracingHandler>()); CreateAndAddOverviewDataSource(Profile::FromWebUI(web_ui)); }
diff --git a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.h b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.h index 8dd2037..770fcd9 100644 --- a/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.h +++ b/chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.h
@@ -5,7 +5,6 @@ #ifndef CHROME_BROWSER_UI_WEBUI_ASH_ARC_GRAPHICS_TRACING_ARC_GRAPHICS_TRACING_UI_H_ #define CHROME_BROWSER_UI_WEBUI_ASH_ARC_GRAPHICS_TRACING_ARC_GRAPHICS_TRACING_UI_H_ -#include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h" #include "content/public/browser/web_ui_controller.h" #include "content/public/browser/webui_config.h" @@ -15,14 +14,11 @@ namespace ash { -template <ArcGraphicsTracingMode mode> class ArcGraphicsTracingUI; -// WebUIConfig for chrome://arc-graphics-tracing and -// chrome://arc-overview-tracing -template <ArcGraphicsTracingMode mode> +// WebUIConfig for chrome://arc-overview-tracing class ArcGraphicsTracingUIConfig - : public content::DefaultWebUIConfig<ArcGraphicsTracingUI<mode>> { + : public content::DefaultWebUIConfig<ArcGraphicsTracingUI> { public: ArcGraphicsTracingUIConfig(); @@ -30,7 +26,6 @@ }; // WebUI controller for arc graphics/overview tracing. -template <ArcGraphicsTracingMode mode> class ArcGraphicsTracingUI : public content::WebUIController { public: explicit ArcGraphicsTracingUI(content::WebUI* web_ui);
diff --git a/chrome/browser/ui/webui/ash/chrome_web_ui_configs_chromeos.cc b/chrome/browser/ui/webui/ash/chrome_web_ui_configs_chromeos.cc index 907444d..79b6247 100644 --- a/chrome/browser/ui/webui/ash/chrome_web_ui_configs_chromeos.cc +++ b/chrome/browser/ui/webui/ash/chrome_web_ui_configs_chromeos.cc
@@ -59,7 +59,6 @@ #include "chrome/browser/ui/webui/ash/account_manager/account_manager_error_ui.h" #include "chrome/browser/ui/webui/ash/account_manager/account_migration_welcome_ui.h" #include "chrome/browser/ui/webui/ash/add_supervision/add_supervision_ui.h" -#include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing.h" #include "chrome/browser/ui/webui/ash/arc_graphics_tracing/arc_graphics_tracing_ui.h" #include "chrome/browser/ui/webui/ash/arc_power_control/arc_power_control_ui.h" #include "chrome/browser/ui/webui/ash/assistant_optin/assistant_optin_ui.h" @@ -222,12 +221,7 @@ map.AddWebUIConfig(std::make_unique<AccountManagerErrorUIConfig>()); map.AddWebUIConfig(std::make_unique<AccountMigrationWelcomeUIConfig>()); map.AddWebUIConfig(std::make_unique<AddSupervisionUIConfig>()); - map.AddWebUIConfig( - std::make_unique< - ArcGraphicsTracingUIConfig<ArcGraphicsTracingMode::kFull>>()); - map.AddWebUIConfig( - std::make_unique< - ArcGraphicsTracingUIConfig<ArcGraphicsTracingMode::kOverview>>()); + map.AddWebUIConfig(std::make_unique<ArcGraphicsTracingUIConfig>()); map.AddWebUIConfig(std::make_unique<ArcPowerControlUIConfig>()); map.AddWebUIConfig(std::make_unique<AssistantOptInUIConfig>()); map.AddWebUIConfig(std::make_unique<AudioUIConfig>());
diff --git a/chrome/browser/ui/webui/chrome_url_data_manager_browsertest.cc b/chrome/browser/ui/webui/chrome_url_data_manager_browsertest.cc index 440202d..a2deadd 100644 --- a/chrome/browser/ui/webui/chrome_url_data_manager_browsertest.cc +++ b/chrome/browser/ui/webui/chrome_url_data_manager_browsertest.cc
@@ -395,9 +395,6 @@ "chrome://account-manager-error", "chrome://account-migration-welcome", - // TODO(crbug.com/1102129): DCHECK failure in - // ArcGraphicsTracingHandler::ArcGraphicsTracingHandler. - // "chrome://arc-graphics-tracing", "chrome://add-supervision/", "chrome://app-disabled", "chrome://certificate-manager/",
diff --git a/chrome/browser/ui/webui/chrome_web_ui_controller_factory.cc b/chrome/browser/ui/webui/chrome_web_ui_controller_factory.cc index 6255b2b..2f9178d 100644 --- a/chrome/browser/ui/webui/chrome_web_ui_controller_factory.cc +++ b/chrome/browser/ui/webui/chrome_web_ui_controller_factory.cc
@@ -1134,7 +1134,6 @@ GURL(chrome::kChromeUIAccountMigrationWelcomeURL), GURL(chrome::kChromeUIAddSupervisionURL), GURL(chrome::kChromeUIAppDisabledURL), - GURL(chrome::kChromeUIArcGraphicsTracingURL), GURL(chrome::kChromeUIArcOverviewTracingURL), GURL(chrome::kChromeUIArcPowerControlURL), GURL(chrome::kChromeUIAssistantOptInURL),
diff --git a/chrome/browser/ui/webui/flags/flags_ui.cc b/chrome/browser/ui/webui/flags/flags_ui.cc index f3cf3653..a9c2518 100644 --- a/chrome/browser/ui/webui/flags/flags_ui.cc +++ b/chrome/browser/ui/webui/flags/flags_ui.cc
@@ -65,12 +65,13 @@ content::WebUIDataSource* CreateAndAddFlagsUIHTMLSource(Profile* profile) { content::WebUIDataSource* source = content::WebUIDataSource::CreateAndAdd( profile, chrome::kChromeUIFlagsHost); + source->EnableReplaceI18nInJS(); source->OverrideContentSecurityPolicy( network::mojom::CSPDirectiveName::ScriptSrc, "script-src chrome://resources 'self' 'unsafe-eval';"); source->OverrideContentSecurityPolicy( network::mojom::CSPDirectiveName::TrustedTypes, - "trusted-types jstemplate;"); + "trusted-types jstemplate static-types;"); source->AddString(flags_ui::kVersion, std::string(version_info::GetVersionNumber()));
diff --git a/chrome/browser/ui/webui/settings/ash/accessibility_section.cc b/chrome/browser/ui/webui/settings/ash/accessibility_section.cc index e48d1da..71402de 100644 --- a/chrome/browser/ui/webui/settings/ash/accessibility_section.cc +++ b/chrome/browser/ui/webui/settings/ash/accessibility_section.cc
@@ -738,7 +738,6 @@ {"greyscaleLabel", IDS_SETTINGS_GREYSCALE_LABEL}, {"highContrastDescription", IDS_SETTINGS_HIGH_CONTRAST_DESCRIPTION}, {"highContrastLabel", IDS_SETTINGS_HIGH_CONTRAST_LABEL}, - {"hueRotationLabel", IDS_SETTINGS_HUE_ROTATION_LABEL}, {"protanomalyFilter", IDS_SETTINGS_PROTANOMALY_FILTER}, {"tritanomalyFilter", IDS_SETTINGS_TRITANOMALY_FILTER}, {"deuteranomalyFilter", IDS_SETTINGS_DEUTERANOMALY_FILTER}, @@ -791,7 +790,6 @@ {"pdfOcrSubtitle", IDS_SETTINGS_PDF_OCR_SUBTITLE}, {"pdfOcrTitle", IDS_SETTINGS_PDF_OCR_TITLE}, {"percentage", IDS_SETTINGS_PERCENTAGE}, - {"saturationLabel", IDS_SETTINGS_SATURATION_LABEL}, {"screenMagnifierDescriptionOff", IDS_SETTINGS_SCREEN_MAGNIFIER_DESCRIPTION_OFF}, {"screenMagnifierDescriptionOn", @@ -885,7 +883,6 @@ {"selectToSpeakOptionsLabel", IDS_SETTINGS_ACCESSIBILITY_SELECT_TO_SPEAK_OPTIONS_LABEL}, {"selectToSpeakTitle", IDS_SETTINGS_ACCESSIBILITY_SELECT_TO_SPEAK_TITLE}, - {"sepiaLabel", IDS_SETTINGS_SEPIA_LABEL}, {"settingsSliderRoleDescription", IDS_SETTINGS_SLIDER_MIN_MAX_ARIA_ROLE_DESCRIPTION}, {"startupSoundLabel", IDS_SETTINGS_STARTUP_SOUND_LABEL},
diff --git a/chrome/browser/ui/webui/settings/ash/search/per_session_settings_user_action_tracker.cc b/chrome/browser/ui/webui/settings/ash/search/per_session_settings_user_action_tracker.cc index a1ce9f0..efbb6e3 100644 --- a/chrome/browser/ui/webui/settings/ash/search/per_session_settings_user_action_tracker.cc +++ b/chrome/browser/ui/webui/settings/ash/search/per_session_settings_user_action_tracker.cc
@@ -8,6 +8,7 @@ #include "base/strings/string_number_conversions.h" #include "chrome/browser/ash/login/login_pref_names.h" #include "chrome/common/pref_names.h" +#include "components/prefs/scoped_user_pref_update.h" namespace ash::settings { @@ -215,28 +216,29 @@ absl::optional<int> PerSessionSettingsUserActionTracker::UpdateSettingsPrefTotalUniqueChanged() { // Fetch the dictionary from the pref. - base::Value::Dict writeable_dict = - pref_service_->GetDict(::prefs::kTotalUniqueOsSettingsChanged).Clone(); - int current_count = writeable_dict.size(); + ScopedDictPrefUpdate total_unique_settings_changed_( + pref_service_, ::prefs::kTotalUniqueOsSettingsChanged); + base::Value::Dict& pref_data = total_unique_settings_changed_.Get(); + int current_count = pref_data.size(); // Set the dictionary. // Value is a constant 1 since we only want to know which Setting has been // used, not how many times it has been used. constexpr int value = 1; for (const std::string& setting_string : changed_settings_) { - if (!writeable_dict.contains(setting_string)) { - writeable_dict.Set(setting_string, value); + if (!pref_data.contains(setting_string)) { + pref_data.Set(setting_string, value); } } - // Save to pref. - int new_count = writeable_dict.size(); - pref_service_->SetDict(::prefs::kTotalUniqueOsSettingsChanged, - std::move(writeable_dict)); + int new_count = pref_data.size(); // If the new size of the pref dictionary is the same as before, we do not // want to record that in UMA so we will return a nullopt to flag not to add // to histogram bucket. + // + // The value of pref_data will automatically get stored to pref_service_ upon + // destruction. return current_count == new_count ? absl::nullopt : absl::optional<int>{new_count}; }
diff --git a/chrome/browser/ui/webui/settings/safety_hub_handler.cc b/chrome/browser/ui/webui/settings/safety_hub_handler.cc index 7bf3487..52a940c2 100644 --- a/chrome/browser/ui/webui/settings/safety_hub_handler.cc +++ b/chrome/browser/ui/webui/settings/safety_hub_handler.cc
@@ -68,8 +68,10 @@ CHECK(js_expiration); auto expiration = base::ValueToTime(js_expiration); - const content_settings::ContentSettingConstraints constraints{ - .expiration = *expiration}; + content_settings::ContentSettingConstraints constraints; + // TODO(https://crbug.com/1450356): we should store the lifetime of the + // permission, rather than just its expiration. + constraints.set_lifetime(constraints.DeltaFromCreationTime(*expiration)); return std::make_tuple(origin, permission_types, constraints); }
diff --git a/chrome/browser/ui/webui/settings/safety_hub_handler_unittest.cc b/chrome/browser/ui/webui/settings/safety_hub_handler_unittest.cc index 70cdcd4..8684b25e 100644 --- a/chrome/browser/ui/webui/settings/safety_hub_handler_unittest.cc +++ b/chrome/browser/ui/webui/settings/safety_hub_handler_unittest.cc
@@ -141,8 +141,8 @@ TEST_F(SafetyHubHandlerTest, PopulateUnusedSitePermissionsData) { // Add GEOLOCATION setting for url but do not add to revoked list. - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); hcsm()->SetContentSettingDefaultScope( GURL(kUsedTestSite), GURL(kUsedTestSite), ContentSettingsType::GEOLOCATION, ContentSetting::CONTENT_SETTING_ALLOW,
diff --git a/chrome/browser/ui/webui/side_panel/bookmarks/bookmarks_side_panel_ui.cc b/chrome/browser/ui/webui/side_panel/bookmarks/bookmarks_side_panel_ui.cc index f06fc1a..77899f0d 100644 --- a/chrome/browser/ui/webui/side_panel/bookmarks/bookmarks_side_panel_ui.cc +++ b/chrome/browser/ui/webui/side_panel/bookmarks/bookmarks_side_panel_ui.cc
@@ -155,6 +155,7 @@ IDS_BOOKMARK_ACCESSIBLE_DESCRIPTION_PRICE_CHANGE}, {"checkboxA11yLabel", IDS_BOOKMARKS_CHECKBOX_LABEL}, {"editInvalidUrl", IDS_BOOKMARK_MANAGER_INVALID_URL}, + {"bookmarkFolderChildCount", IDS_BOOKMARK_FOLDER_CHILD_COUNT}, }; for (const auto& str : kLocalizedStrings) webui::AddLocalizedString(source, str.name, str.id); @@ -222,8 +223,6 @@ // Add a handler to provide pluralized strings. auto plural_string_handler = std::make_unique<PluralStringHandler>(); - plural_string_handler->AddLocalizedString("bookmarkFolderChildCount", - IDS_BOOKMARK_FOLDER_CHILD_COUNT); plural_string_handler->AddLocalizedString("bookmarkDeletionCount", IDS_BOOKMARK_DELETION_COUNT); web_ui->AddMessageHandler(std::move(plural_string_handler));
diff --git a/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.cc b/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.cc index 593696eb..d6d1e567 100644 --- a/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.cc +++ b/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.cc
@@ -200,8 +200,7 @@ void CompanionPageHandler::OnPromoAction( side_panel::mojom::PromoType promo_type, - side_panel::mojom::PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url) { + side_panel::mojom::PromoAction promo_action) { if (promo_type == side_panel::mojom::PromoType::kRegionSearchIPH) { if (promo_action == side_panel::mojom::PromoAction::kRejected) { auto* tracker = feature_engagement::TrackerFactory::GetForBrowserContext( @@ -212,7 +211,7 @@ return; } - promo_handler_->OnPromoAction(promo_type, promo_action, exps_promo_url); + promo_handler_->OnPromoAction(promo_type, promo_action); metrics_logger_->OnPromoAction(promo_type, promo_action); } @@ -271,11 +270,7 @@ } void CompanionPageHandler::OnPhFeedback( - side_panel::mojom::PhFeedback ph_feedback, - const absl::optional<GURL>& reporting_url) { - if (reporting_url.has_value()) { - signin_delegate_->LoadUrlInNewTab(reporting_url.value()); - } + side_panel::mojom::PhFeedback ph_feedback) { metrics_logger_->OnPhFeedback(ph_feedback); } @@ -287,6 +282,16 @@ text_directive); } +void CompanionPageHandler::OpenUrlInBrowser( + const absl::optional<GURL>& url_to_open, + bool use_new_tab) { + if (!url_to_open.has_value() || !url_to_open.value().is_valid()) { + return; + } + + signin_delegate_->OpenUrlInBrowser(url_to_open.value(), use_new_tab); +} + Browser* CompanionPageHandler::GetBrowser() { auto* webui_contents = companion_untrusted_ui_->web_ui()->GetWebContents(); auto* browser = companion::GetBrowserForWebContents(webui_contents);
diff --git a/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.h b/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.h index 33d9d4e..5409321 100644 --- a/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.h +++ b/chrome/browser/ui/webui/side_panel/companion/companion_page_handler.h
@@ -48,8 +48,7 @@ // side_panel::mojom::CompanionPageHandler: void ShowUI() override; void OnPromoAction(side_panel::mojom::PromoType promo_type, - side_panel::mojom::PromoAction promo_action, - const absl::optional<GURL>& exps_promo_url) override; + side_panel::mojom::PromoAction promo_action) override; void OnRegionSearchClicked() override; void OnExpsOptInStatusAvailable(bool is_exps_opted_in) override; void OnOpenInNewTabButtonURLChanged(const GURL& url_to_open) override; @@ -61,9 +60,10 @@ int32_t click_position) override; void OnCqCandidatesAvailable( const std::vector<std::string>& text_directives) override; - void OnPhFeedback(side_panel::mojom::PhFeedback ph_feedback, - const absl::optional<GURL>& reporting_url) override; + void OnPhFeedback(side_panel::mojom::PhFeedback ph_feedback) override; void OnCqJumptagClicked(const std::string& text_directive) override; + void OpenUrlInBrowser(const absl::optional<GURL>& url_to_open, + bool use_new_tab) override; // content::WebContentsObserver overrides. void DidFinishNavigation(
diff --git a/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.cc b/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.cc index 4f2ca751..40b493c 100644 --- a/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.cc +++ b/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.cc
@@ -76,10 +76,11 @@ consent_service->SetUrlKeyedAnonymizedDataCollectionEnabled(enable_msbb); } -void SigninDelegateImpl::LoadUrlInNewTab(const GURL& url) { - CHECK(url.is_valid()); +void SigninDelegateImpl::OpenUrlInBrowser(const GURL& url, bool use_new_tab) { content::OpenURLParams params(url, content::Referrer(), - WindowOpenDisposition::NEW_FOREGROUND_TAB, + use_new_tab + ? WindowOpenDisposition::NEW_FOREGROUND_TAB + : WindowOpenDisposition::CURRENT_TAB, ui::PAGE_TRANSITION_AUTO_TOPLEVEL, /*is_renderer_initiated*/ false); auto* browser = companion::GetBrowserForWebContents(webui_contents_);
diff --git a/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.h b/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.h index 082643a..af1d62ed 100644 --- a/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.h +++ b/chrome/browser/ui/webui/side_panel/companion/signin_delegate_impl.h
@@ -30,7 +30,7 @@ bool IsSignedIn() override; void StartSigninFlow() override; void EnableMsbb(bool enable_msbb) override; - void LoadUrlInNewTab(const GURL& url) override; + void OpenUrlInBrowser(const GURL& url, bool use_new_tab) override; bool ShouldShowRegionSearchIPH() override; private:
diff --git a/chrome/browser/usb/android/java/src/org/chromium/chrome/browser/usb/UsbNotificationManager.java b/chrome/browser/usb/android/java/src/org/chromium/chrome/browser/usb/UsbNotificationManager.java index e3b0ee2..9c3996c9 100644 --- a/chrome/browser/usb/android/java/src/org/chromium/chrome/browser/usb/UsbNotificationManager.java +++ b/chrome/browser/usb/android/java/src/org/chromium/chrome/browser/usb/UsbNotificationManager.java
@@ -37,8 +37,6 @@ * to a USB device. */ public class UsbNotificationManager { - private static final String TAG = "UsbNotificationManager"; - private static final String NOTIFICATION_NAMESPACE = "UsbNotificationManager"; public static final String ACTION_USB_UPDATE = "org.chromium.chrome.browser.app.usb.USB_UPDATE";
diff --git a/chrome/browser/web_applications/isolated_web_apps/isolated_web_app_browsing_data_browsertest.cc b/chrome/browser/web_applications/isolated_web_apps/isolated_web_app_browsing_data_browsertest.cc index bb21c41..db242ca 100644 --- a/chrome/browser/web_applications/isolated_web_apps/isolated_web_app_browsing_data_browsertest.cc +++ b/chrome/browser/web_applications/isolated_web_apps/isolated_web_app_browsing_data_browsertest.cc
@@ -3,10 +3,13 @@ // found in the LICENSE file. #include <memory> +#include <string> #include "base/check_deref.h" #include "base/test/scoped_feature_list.h" #include "base/test/test_future.h" +#include "base/time/time.h" +#include "chrome/browser/profiles/profile.h" #include "chrome/browser/ui/browser.h" #include "chrome/browser/ui/web_applications/test/isolated_web_app_test_utils.h" #include "chrome/browser/web_applications/isolated_web_apps/isolated_web_app_url_info.h" @@ -17,8 +20,15 @@ #include "content/public/common/content_features.h" #include "content/public/test/browser_test.h" #include "content/public/test/browser_test_utils.h" +#include "mojo/public/cpp/bindings/callback_helpers.h" +#include "mojo/public/cpp/bindings/remote.h" +#include "net/cookies/canonical_cookie.h" #include "net/test/embedded_test_server/embedded_test_server.h" +#include "services/network/public/mojom/cookie_manager.mojom.h" +#include "services/network/test/test_network_context.h" #include "testing/gtest/include/gtest/gtest.h" +#include "third_party/abseil-cpp/absl/types/optional.h" +#include "url/gurl.h" #include "url/origin.h" using ::testing::Eq; @@ -135,4 +145,93 @@ EXPECT_THAT(GetIwaUsage(url_info), IsApproximately(3000)); } +class IsolatedWebAppBrowsingDataClearingTest + : public IsolatedWebAppBrowsingDataTest { + protected: + int64_t GetCacheSize(const IsolatedWebAppUrlInfo& url_info) { + base::test::TestFuture<bool, int64_t> future; + + content::StoragePartition* storage_partition = + profile()->GetStoragePartition( + url_info.storage_partition_config(profile())); + + storage_partition->GetNetworkContext()->ComputeHttpCacheSize( + base::Time::Min(), base::Time::Max(), + mojo::WrapCallbackWithDefaultInvokeIfNotRun( + future.GetCallback(), + /* is_upper_limit = */ false, + /* result_or_error = */ -1)); + + std::tuple<bool, int64_t> result = future.Get(); + + int64_t cache_size_or_error = std::get<1>(result); + CHECK(cache_size_or_error >= 0); + return cache_size_or_error; + } + + bool SetCookie( + const IsolatedWebAppUrlInfo& url_info, + const GURL& url, + const std::string& cookie_line, + const absl::optional<net::CookiePartitionKey>& cookie_partition_key) { + content::StoragePartition* storage_partition = + profile()->GetStoragePartition( + url_info.storage_partition_config(profile())); + + mojo::Remote<network::mojom::CookieManager> cookie_manager; + storage_partition->GetNetworkContext()->GetCookieManager( + cookie_manager.BindNewPipeAndPassReceiver()); + + auto cookie_obj = net::CanonicalCookie::Create( + url, cookie_line, base::Time::Now(), /*server_time=*/absl::nullopt, + cookie_partition_key); + + base::test::TestFuture<net::CookieAccessResult> future; + cookie_manager->SetCanonicalCookie(*cookie_obj, url, + net::CookieOptions::MakeAllInclusive(), + future.GetCallback()); + return future.Take().status.IsInclude(); + } + + net::CookieList GetAllCookies(const IsolatedWebAppUrlInfo& url_info) { + content::StoragePartition* storage_partition = + profile()->GetStoragePartition( + url_info.storage_partition_config(profile())); + + mojo::Remote<network::mojom::CookieManager> cookie_manager; + storage_partition->GetNetworkContext()->GetCookieManager( + cookie_manager.BindNewPipeAndPassReceiver()); + base::test::TestFuture<const net::CookieList&> future; + cookie_manager->GetAllCookies(future.GetCallback()); + return future.Take(); + } +}; + +IN_PROC_BROWSER_TEST_F(IsolatedWebAppBrowsingDataClearingTest, CacheCleared) { + IsolatedWebAppUrlInfo url_info = InstallIsolatedWebApp(); + + // IWA installation creates cache data. + EXPECT_GT(GetCacheSize(url_info), 0); + + // TODO(crbug.com/1453520): Clear cache data. + // EXPECT_EQ(GetCacheSize(url_info), 0); +} + +IN_PROC_BROWSER_TEST_F(IsolatedWebAppBrowsingDataClearingTest, CookieCleared) { + IsolatedWebAppUrlInfo url_info = InstallIsolatedWebApp(); + + // Unpartitioned Cookie + ASSERT_TRUE(SetCookie(url_info, GURL("http://a.com"), "A=0", absl::nullopt)); + + // Partitioned Cookie + ASSERT_TRUE(SetCookie( + url_info, GURL("https://c.com"), "A=0; secure; partitioned", + net::CookiePartitionKey::FromURLForTesting(GURL("https://d.com")))); + + EXPECT_EQ(GetAllCookies(url_info).size(), 2UL); + + // TODO(crbug.com/1453520): Clear cookies. + // EXPECT_GT(GetAllCookies(url_info).size(), 0UL); +} + } // namespace web_app
diff --git a/chrome/browser/web_applications/ml_promotion_browsertest.cc b/chrome/browser/web_applications/ml_promotion_browsertest.cc index 65de4f8..a11f870 100644 --- a/chrome/browser/web_applications/ml_promotion_browsertest.cc +++ b/chrome/browser/web_applications/ml_promotion_browsertest.cc
@@ -7,7 +7,6 @@ #include "base/memory/scoped_refptr.h" #include "base/run_loop.h" #include "base/test/bind.h" -#include "base/test/scoped_feature_list.h" #include "base/test/test_future.h" #include "base/test/test_simple_task_runner.h" #include "chrome/browser/ui/browser.h" @@ -124,8 +123,6 @@ public: MLPromotionBrowsertest() { task_runner_ = base::MakeRefCounted<base::TestSimpleTaskRunner>(); - scoped_feature_list_.InitAndEnableFeature( - webapps::features::kWebAppsMlUkmCollection); } ~MLPromotionBrowsertest() override = default; @@ -232,7 +229,6 @@ private: std::unique_ptr<ukm::TestAutoSetUkmRecorder> test_ukm_recorder_; - base::test::ScopedFeatureList scoped_feature_list_; }; // Manifest Data Fetching tests. @@ -419,17 +415,8 @@ entry, InstallUkmEntry::kIsPartiallyInstalledName, true); } -// SiteQualityMetrics tests. -#if BUILDFLAG(IS_MAC) -// TODO(crbug.com/1450786): Fix the flakiness of the test. -#define MAYBE_SiteQualityMetrics_ServiceWorker_FetchHandler \ - DISABLED_SiteQualityMetrics_ServiceWorker_FetchHandler -#else -#define MAYBE_SiteQualityMetrics_ServiceWorker_FetchHandler \ - SiteQualityMetrics_ServiceWorker_FetchHandler -#endif IN_PROC_BROWSER_TEST_F(MLPromotionBrowsertest, - MAYBE_SiteQualityMetrics_ServiceWorker_FetchHandler) { + SiteQualityMetrics_ServiceWorker_FetchHandler) { NavigateAndAwaitMetricsCollectionPending(GetInstallableAppURL()); AwaitServiceWorkerRegistrationAndPendingDelayedTask(GetInstallableAppURL()); task_runner_->RunPendingTasks(); @@ -467,17 +454,8 @@ EXPECT_EQ(entry.metrics[QualityUkmEntry::kServiceWorkerScriptSizeName], 0); } -#if BUILDFLAG(IS_MAC) -// TODO(crbug.com/1450786): Fix the flakiness of the test. -#define MAYBE_SiteQualityMetrics_ServiceWorker_EmptyFetchHandler \ - DISABLED_SiteQualityMetrics_ServiceWorker_EmptyFetchHandler -#else -#define MAYBE_SiteQualityMetrics_ServiceWorker_EmptyFetchHandler \ - SiteQualityMetrics_ServiceWorker_EmptyFetchHandler -#endif -IN_PROC_BROWSER_TEST_F( - MLPromotionBrowsertest, - MAYBE_SiteQualityMetrics_ServiceWorker_EmptyFetchHandler) { +IN_PROC_BROWSER_TEST_F(MLPromotionBrowsertest, + SiteQualityMetrics_ServiceWorker_EmptyFetchHandler) { NavigateAndAwaitMetricsCollectionPending(GetUrlWithSWEmptyFetchHandler()); AwaitServiceWorkerRegistrationAndPendingDelayedTask( GetUrlWithSWEmptyFetchHandler()); @@ -497,16 +475,8 @@ EXPECT_GT(entry.metrics[QualityUkmEntry::kServiceWorkerScriptSizeName], 0); } -#if BUILDFLAG(IS_MAC) -// TODO(crbug.com/1450786): Fix the flakiness of the test. -#define MAYBE_SiteQualityMetrics_ServiceWorker_NoFetchHandler \ - DISABLED_SiteQualityMetrics_ServiceWorker_NoFetchHandler -#else -#define MAYBE_SiteQualityMetrics_ServiceWorker_NoFetchHandler \ - SiteQualityMetrics_ServiceWorker_NoFetchHandler -#endif IN_PROC_BROWSER_TEST_F(MLPromotionBrowsertest, - MAYBE_SiteQualityMetrics_ServiceWorker_NoFetchHandler) { + SiteQualityMetrics_ServiceWorker_NoFetchHandler) { NavigateAndAwaitMetricsCollectionPending(GetUrlWithSwNoFetchHandler()); AwaitServiceWorkerRegistrationAndPendingDelayedTask( GetUrlWithSwNoFetchHandler());
diff --git a/chrome/browser/webauthn/android/java/src/org/chromium/chrome/browser/webauthn/CableAuthenticatorModuleProvider.java b/chrome/browser/webauthn/android/java/src/org/chromium/chrome/browser/webauthn/CableAuthenticatorModuleProvider.java index 34d33d3..1cba73bb 100644 --- a/chrome/browser/webauthn/android/java/src/org/chromium/chrome/browser/webauthn/CableAuthenticatorModuleProvider.java +++ b/chrome/browser/webauthn/android/java/src/org/chromium/chrome/browser/webauthn/CableAuthenticatorModuleProvider.java
@@ -33,6 +33,7 @@ import org.chromium.base.ContextUtils; import org.chromium.base.Log; +import org.chromium.base.PackageUtils; import org.chromium.base.annotations.CalledByNative; import org.chromium.base.annotations.NativeMethods; import org.chromium.base.task.PostTask; @@ -254,9 +255,16 @@ @CalledByNative public static void getLinkingInformation() { - ExternalAuthUtils externalAuthUtils = ExternalAuthUtils.getInstance(); - if (!externalAuthUtils.canUseFirstPartyGooglePlayServices()) { + boolean ok = true; + if (!ExternalAuthUtils.getInstance().canUseFirstPartyGooglePlayServices()) { Log.i(TAG, "Cannot get linking information from Play Services without 1p access."); + ok = false; + } else if (PackageUtils.getPackageVersion("com.google.android.gms") < 232400000) { + Log.i(TAG, "GMS Core version is too old to get linking information."); + ok = false; + } + + if (!ok) { CableAuthenticatorModuleProviderJni.get().onHaveLinkingInformation(null); return; }
diff --git a/chrome/build/linux.pgo.txt b/chrome/build/linux.pgo.txt index 349ec06b..d20cd10 100644 --- a/chrome/build/linux.pgo.txt +++ b/chrome/build/linux.pgo.txt
@@ -1 +1 @@ -chrome-linux-main-1686569992-9bf01d9c86c4777decea09c70359b630ce869912.profdata +chrome-linux-main-1686592783-12f8170d67690f1acad6573f0402eca64644f8f6.profdata
diff --git a/chrome/build/mac-arm.pgo.txt b/chrome/build/mac-arm.pgo.txt index 58ad12b..5e6def7 100644 --- a/chrome/build/mac-arm.pgo.txt +++ b/chrome/build/mac-arm.pgo.txt
@@ -1 +1 @@ -chrome-mac-arm-main-1686585264-535e60bb187d1b074222174fd82de76dececcc07.profdata +chrome-mac-arm-main-1686599995-e3d8e18843f06739968dcc4188906ec037ba9463.profdata
diff --git a/chrome/build/mac.pgo.txt b/chrome/build/mac.pgo.txt index c0ea5ee9..3ad3e79 100644 --- a/chrome/build/mac.pgo.txt +++ b/chrome/build/mac.pgo.txt
@@ -1 +1 @@ -chrome-mac-main-1686569992-b4fe55a9556e878b10149bd2fc16235139dedd3d.profdata +chrome-mac-main-1686592783-1607a28f8d7d28e7ba5762ed24afa975b3d88500.profdata
diff --git a/chrome/build/win32.pgo.txt b/chrome/build/win32.pgo.txt index 3f125dc..8d6f90f 100644 --- a/chrome/build/win32.pgo.txt +++ b/chrome/build/win32.pgo.txt
@@ -1 +1 @@ -chrome-win32-main-1686581874-03a60b64830ba860a0a9fb840ce1fb0bbd6b43e0.profdata +chrome-win32-main-1686592783-85754930a4af3575c44761bc7533721569e268c6.profdata
diff --git a/chrome/build/win64.pgo.txt b/chrome/build/win64.pgo.txt index 7c6f5e97..5f24fed 100644 --- a/chrome/build/win64.pgo.txt +++ b/chrome/build/win64.pgo.txt
@@ -1 +1 @@ -chrome-win64-main-1686581874-c98e51981c7c560967a9fba6e047d14b5c0acbb1.profdata +chrome-win64-main-1686592783-5f6d11feea6209dfcc47d68767c23cc7366d959f.profdata
diff --git a/chrome/common/webui_url_constants.cc b/chrome/common/webui_url_constants.cc index 41b1dd0..3f7e9587 100644 --- a/chrome/common/webui_url_constants.cc +++ b/chrome/common/webui_url_constants.cc
@@ -306,8 +306,6 @@ const char kChromeUIActivationMessageHost[] = "activationmessage"; const char kChromeUIAddSupervisionHost[] = "add-supervision"; const char kChromeUIAddSupervisionURL[] = "chrome://add-supervision/"; -const char kChromeUIArcGraphicsTracingHost[] = "arc-graphics-tracing"; -const char kChromeUIArcGraphicsTracingURL[] = "chrome://arc-graphics-tracing/"; const char kChromeUIArcOverviewTracingHost[] = "arc-overview-tracing"; const char kChromeUIArcOverviewTracingURL[] = "chrome://arc-overview-tracing/"; const char kChromeUIArcPowerControlHost[] = "arc-power-control";
diff --git a/chrome/common/webui_url_constants.h b/chrome/common/webui_url_constants.h index c6c88aa..ee1db7c 100644 --- a/chrome/common/webui_url_constants.h +++ b/chrome/common/webui_url_constants.h
@@ -281,8 +281,6 @@ extern const char kChromeUIActivationMessageHost[]; extern const char kChromeUIAddSupervisionHost[]; extern const char kChromeUIAddSupervisionURL[]; -extern const char kChromeUIArcGraphicsTracingHost[]; -extern const char kChromeUIArcGraphicsTracingURL[]; extern const char kChromeUIArcOverviewTracingHost[]; extern const char kChromeUIArcOverviewTracingURL[]; extern const char kChromeUIArcPowerControlHost[];
diff --git a/chrome/installer/PRESUBMIT.py b/chrome/installer/PRESUBMIT.py deleted file mode 100644 index 9c43fef0..0000000 --- a/chrome/installer/PRESUBMIT.py +++ /dev/null
@@ -1,31 +0,0 @@ -# Copyright 2021 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -PRESUBMIT_VERSION = '2.0.0' - -def CheckBreakingInstallerVersionBumpNeeded(input_api, output_api): - files = [] - breaking_version_installer_updated = False - - for f in input_api.AffectedSourceFiles(input_api.FilterSourceFile): - breaking_version_installer_updated |= (f.LocalPath() == - 'chrome/installer/setup/last_breaking_installer_version.cc') - if (f.LocalPath() == 'chrome/installer/mini_installer/chrome.release' or - f.LocalPath().startswith('chrome/test/mini_installer')): - files.append(f.LocalPath()) - - if files and not breaking_version_installer_updated: - return [output_api.PresubmitPromptWarning(''' -Update chrome/installer/setup/last_breaking_installer_version.cc if the changes -found in the following files might break make downgrades not possible beyond -this browser's version.''', items=files)] - - if not files and breaking_version_installer_updated: - return [output_api.PresubmitPromptWarning(''' -No installer breaking changes detected but -chrome/installer/setup/last_breaking_installer_version.cc was updated. Please -update chrome/installer/PRESUBMIT.py if more files need to be watched for -breaking installer changes.''')] - - return []
diff --git a/chrome/test/android/javatests/src/org/chromium/chrome/test/ChromeActivityTestRule.java b/chrome/test/android/javatests/src/org/chromium/chrome/test/ChromeActivityTestRule.java index bef36af..6661da1 100644 --- a/chrome/test/android/javatests/src/org/chromium/chrome/test/ChromeActivityTestRule.java +++ b/chrome/test/android/javatests/src/org/chromium/chrome/test/ChromeActivityTestRule.java
@@ -70,13 +70,9 @@ * @param <T> The {@link Activity} class under test. */ public class ChromeActivityTestRule<T extends ChromeActivity> extends BaseActivityTestRule<T> { - private static final String TAG = "ChromeATR"; - // The number of ms to wait for the rendering activity to be started. private static final int ACTIVITY_START_TIMEOUT_MS = 1000; - private static final long OMNIBOX_FIND_SUGGESTION_TIMEOUT_MS = 10 * 1000; - private Thread.UncaughtExceptionHandler mDefaultUncaughtExceptionHandler; private String mCurrentTestName;
diff --git a/chrome/test/android/javatests/src/org/chromium/chrome/test/MultiActivityTestRule.java b/chrome/test/android/javatests/src/org/chromium/chrome/test/MultiActivityTestRule.java index 0b121f11..38826af 100644 --- a/chrome/test/android/javatests/src/org/chromium/chrome/test/MultiActivityTestRule.java +++ b/chrome/test/android/javatests/src/org/chromium/chrome/test/MultiActivityTestRule.java
@@ -27,8 +27,6 @@ /** Custom TestRule for MultiActivity Tests. */ public class MultiActivityTestRule implements TestRule { - private static final String TAG = "MultiActivityTest"; - Context mContext; public Context getContext() {
diff --git a/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/controllers/urlpage/UrlPage.java b/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/controllers/urlpage/UrlPage.java index b8ce620..62e9fc8c 100644 --- a/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/controllers/urlpage/UrlPage.java +++ b/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/controllers/urlpage/UrlPage.java
@@ -23,7 +23,6 @@ private static final IUi2Locator LOCATOR_URL_BAR = Ui2Locators.withAnyResEntry(R.id.url_bar); private static final IUi2Locator LOCATOR_TAB_SWITCHER = Ui2Locators.withAnyResEntry(R.id.tab_switcher_button); - private static final IUi2Locator LOCATOR_MENU = Ui2Locators.withAnyResEntry(R.id.menu_button); private static final UrlPage sInstance = new UrlPage(); private UrlPage() {}
diff --git a/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/utils/UiLocatorHelper.java b/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/utils/UiLocatorHelper.java index 33a91f5..ffbde95 100644 --- a/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/utils/UiLocatorHelper.java +++ b/chrome/test/android/javatests/src/org/chromium/chrome/test/pagecontroller/utils/UiLocatorHelper.java
@@ -25,12 +25,10 @@ * This helper class provides these capabilities. */ public class UiLocatorHelper { - private static final String TAG = "UiLocatorHelper"; private static final long DEFAULT_TIMEOUT_MS = 3000L; // UI_CHECK_INTERVAL_MS is intentionally not modifiable so that longer timeouts // don't lead to slowness due to the checking interval being too coarse. static final long UI_CHECK_INTERVAL_MS = DEFAULT_TIMEOUT_MS / 4L; - private static final long DEFAULT_MAX_UI_SETTLE_TIME_MS = 200L; private static final ElementConverter<String> CONVERTER_TEXT = object2 -> { return object2.getText();
diff --git a/chrome/test/android/javatests/src/org/chromium/chrome/test/util/browser/FieldTrials.java b/chrome/test/android/javatests/src/org/chromium/chrome/test/util/browser/FieldTrials.java index c9844b4c..6a4c8d9 100644 --- a/chrome/test/android/javatests/src/org/chromium/chrome/test/util/browser/FieldTrials.java +++ b/chrome/test/android/javatests/src/org/chromium/chrome/test/util/browser/FieldTrials.java
@@ -24,7 +24,6 @@ private static FieldTrials sInstance; private final Map<String, Map<String, String>> mTrialToParamValueMap = new HashMap<>(); private final Map<String, Set<String>> mTrialToFeatureNameMap = new HashMap<>(); - private static final String TAG = "FieldTrials"; private FieldTrials() {}
diff --git a/chrome/test/android/test_support/src/org/chromium/chrome/test_support/PaymentRequestTestBridge.java b/chrome/test/android/test_support/src/org/chromium/chrome/test_support/PaymentRequestTestBridge.java index 55983824..6cf7266 100644 --- a/chrome/test/android/test_support/src/org/chromium/chrome/test_support/PaymentRequestTestBridge.java +++ b/chrome/test/android/test_support/src/org/chromium/chrome/test_support/PaymentRequestTestBridge.java
@@ -243,8 +243,6 @@ } } - private static final String TAG = "PaymentRequestTestBridge"; - @CalledByNative private static void setUseDelegateForTest(boolean isOffTheRecord, boolean isValidSsl, boolean prefsCanMakePayment, String twaPackageName) {
diff --git a/chrome/test/android/test_trusted_web_activity/src/org/chromium/chrome/browser/browserservices/TestTrustedWebActivityService.java b/chrome/test/android/test_trusted_web_activity/src/org/chromium/chrome/browser/browserservices/TestTrustedWebActivityService.java index 53790b8..f520ce8 100644 --- a/chrome/test/android/test_trusted_web_activity/src/org/chromium/chrome/browser/browserservices/TestTrustedWebActivityService.java +++ b/chrome/test/android/test_trusted_web_activity/src/org/chromium/chrome/browser/browserservices/TestTrustedWebActivityService.java
@@ -19,8 +19,6 @@ * A TrustedWebActivityService to be used in TrustedWebActivityClientTest. */ public class TestTrustedWebActivityService extends TrustedWebActivityService { - private static final String TAG = "TestTWAService"; - public static final String COMMAND_SET_RESPONSE = "setResponse"; public static final String SET_RESPONSE_NAME = "setResponse.name"; public static final String SET_RESPONSE_BUNDLE = "setResponse.bundle"; @@ -35,9 +33,7 @@ private static final String LOCATION_PERMISSION_RESULT = "locationPermissionResult"; private static final String START_LOCATION_COMMAND_NAME = "startLocation"; private static final String STOP_LOCATION_COMMAND_NAME = "stopLocation"; - private static final String LOCATION_ARG_ENABLE_HIGH_ACCURACY = "enableHighAccuracy"; private static final String EXTRA_NEW_LOCATION_AVAILABLE_CALLBACK = "onNewLocationAvailable"; - private static final String EXTRA_NEW_LOCATION_ERROR_CALLBACK = "onNewLocationError"; private static final String EXTRA_COMMAND_SUCCESS = "success"; private final TokenStore mTokenStore = new InMemoryStore();
diff --git a/chrome/test/data/webui/chromeos/internet_detail_dialog_test.js b/chrome/test/data/webui/chromeos/internet_detail_dialog_test.js index a3b1085..b8539ba 100644 --- a/chrome/test/data/webui/chromeos/internet_detail_dialog_test.js +++ b/chrome/test/data/webui/chromeos/internet_detail_dialog_test.js
@@ -337,6 +337,18 @@ assertEquals(accessPointName, getApnSectionSublabel()); assertFalse(isApnListShowing()); + // Update the APN's name property. + const name = 'name'; + await setupCellularNetwork( + /* isPrimary= */ true, /* isInhibited= */ false, + {accessPointName: accessPointName, name: name}); + + // Force a refresh. + internetDetailDialog.onDeviceStateListChanged(); + await flushAsync(); + assertEquals(name, getApnSectionSublabel()); + assertFalse(isApnListShowing()); + // Expand the section, the sublabel should no longer show. apnSection.click(); await flushAsync(); @@ -346,7 +358,7 @@ // Collapse the section, the sublabel should show. apnSection.click(); await flushAsync(); - assertEquals(accessPointName, getApnSectionSublabel()); + assertEquals(name, getApnSectionSublabel()); assertFalse(isApnListShowing()); } else { assertTrue(!!legacyApnElement);
diff --git a/chrome/test/data/webui/chromeos/personalization_app/ambient_observer_test.ts b/chrome/test/data/webui/chromeos/personalization_app/ambient_observer_test.ts index 5ac5516b..3485e75 100644 --- a/chrome/test/data/webui/chromeos/personalization_app/ambient_observer_test.ts +++ b/chrome/test/data/webui/chromeos/personalization_app/ambient_observer_test.ts
@@ -127,14 +127,17 @@ assertFalse(AmbientObserver.shouldLogPreviewsLoadPerformance); }); - test('sets to false if topic source is not kGooglePhotos', async () => { - ambientProvider.ambientObserverRemote!.onTopicSourceChanged( - TopicSource.kArtGallery); - personalizationStore.expectAction(AmbientActionName.SET_TOPIC_SOURCE); - await personalizationStore.waitForAction( - AmbientActionName.SET_TOPIC_SOURCE); - assertFalse(AmbientObserver.shouldLogPreviewsLoadPerformance); - }); + test( + 'sets to true if topic source is not kGooglePhotos but jelly is enabled', + async () => { + loadTimeData.overrideValues({isPersonalizationJellyEnabled: true}); + ambientProvider.ambientObserverRemote!.onTopicSourceChanged( + TopicSource.kArtGallery); + personalizationStore.expectAction(AmbientActionName.SET_TOPIC_SOURCE); + await personalizationStore.waitForAction( + AmbientActionName.SET_TOPIC_SOURCE); + assertTrue(AmbientObserver.shouldLogPreviewsLoadPerformance); + }); test('sets to false if already received preview images', async () => { personalizationStore.data.ambient.previews = [];
diff --git a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_collections_element_test.ts b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_collections_element_test.ts index 983b195..806e0b1 100644 --- a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_collections_element_test.ts +++ b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_collections_element_test.ts
@@ -209,7 +209,7 @@ }); test('customizes text for managed google photos', async () => { - const managedIconSelector = `iron-icon[icon='personalization:managed']`; + const managedIconSelector = `iron-icon[icon^='personalization:managed']`; personalizationStore.data.wallpaper.googlePhotos.enabled = GooglePhotosEnablementState.kEnabled;
diff --git a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_fullscreen_element_test.ts b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_fullscreen_element_test.ts index b7af3c24..69afbf3 100644 --- a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_fullscreen_element_test.ts +++ b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_fullscreen_element_test.ts
@@ -132,12 +132,13 @@ }); test('sets fullscreen class on body when entering fullscreen', async () => { + const fullscreenClassName = 'fullscreen-preview'; wallpaperFullscreenElement = initElement(WallpaperFullscreen); const {requestFullscreenPromise, exitFullscreenPromise} = mockFullscreenApis(); await waitAfterNextRender(wallpaperFullscreenElement); - assertEquals('', document.body.className); + assertFalse(document.body.classList.contains(fullscreenClassName)); personalizationStore.data.wallpaper.fullscreen = true; personalizationStore.data.wallpaper.currentSelected = @@ -146,13 +147,13 @@ await requestFullscreenPromise; - assertEquals('fullscreen-preview', document.body.className); + assertTrue(document.body.classList.contains(fullscreenClassName)); wallpaperFullscreenElement.exitFullscreen(); await exitFullscreenPromise; - assertEquals('', document.body.className); + assertFalse(document.body.classList.contains(fullscreenClassName)); }); test('exits full screen on exit button click', async () => {
diff --git a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_preview_element_test.ts b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_preview_element_test.ts index dacb7c9d..d1ae979 100644 --- a/chrome/test/data/webui/chromeos/personalization_app/wallpaper_preview_element_test.ts +++ b/chrome/test/data/webui/chromeos/personalization_app/wallpaper_preview_element_test.ts
@@ -128,7 +128,7 @@ function getManagedIcon(): HTMLElement|null { return wallpaperPreviewElement!.shadowRoot!.querySelector( - `iron-icon[icon='personalization:managed']`); + `iron-icon[icon^='personalization:managed']`); } assertEquals(null, getManagedIcon(), 'no managed icon visible');
diff --git a/chrome/test/data/webui/chromeos/shortcut_customization/search_box_test.ts b/chrome/test/data/webui/chromeos/shortcut_customization/search_box_test.ts index 70a30ce..97b99c9e 100644 --- a/chrome/test/data/webui/chromeos/shortcut_customization/search_box_test.ts +++ b/chrome/test/data/webui/chromeos/shortcut_customization/search_box_test.ts
@@ -450,6 +450,46 @@ searchBoxElement.searchResults[0]?.acceleratorLayoutInfo.description); }); + test('Filter disabled + ensure extra results are present', async () => { + [searchBoxElement, searchFieldElement, dropdownElement, + resultsListElement] = initSearchBoxElement(); + // Create a SearchResult that doesn't have any enabled AcceleratorInfos. + const disabledFirstAcceleratorInfo = + TakeScreenshotSearchResult.acceleratorInfos[0]!; + disabledFirstAcceleratorInfo.state = + AcceleratorState.kDisabledByUnavailableKeys; + const disabledSecondAcceleratorInfo = + TakeScreenshotSearchResult.acceleratorInfos[1]!; + disabledSecondAcceleratorInfo.state = + AcceleratorState.kDisabledByUnavailableKeys; + const fullyDisabledSearchResult: MojoSearchResult = { + ...TakeScreenshotSearchResult, + acceleratorInfos: + [disabledFirstAcceleratorInfo, disabledSecondAcceleratorInfo], + }; + + handler.setFakeSearchResult([ + fullyDisabledSearchResult, + CycleTabsTextSearchResult, + CycleTabsTextSearchResult, + CycleTabsTextSearchResult, + CycleTabsTextSearchResult, + CycleTabsTextSearchResult, + CycleTabsTextSearchResult, + ]); + + searchBoxElement.onSearchResultsAvailabilityChanged(); + await simulateSearch('query'); + + assertTrue(dropdownElement.opened); + // After filtering, at most 5 of the non-disabled elements should be + // shown. + assertEquals(5, searchBoxElement.searchResults.length); + assertEquals( + CycleTabsTextSearchResult.acceleratorLayoutInfo.description, + searchBoxElement.searchResults[0]?.acceleratorLayoutInfo.description); + }); + test('Max query length has been set', async () => { [searchBoxElement, searchFieldElement, dropdownElement, resultsListElement] = initSearchBoxElement();
diff --git a/chrome/test/data/webui/cr_components/chromeos/network/apn_list_item_test.js b/chrome/test/data/webui/cr_components/chromeos/network/apn_list_item_test.js index a81be66e..e49b5c63 100644 --- a/chrome/test/data/webui/cr_components/chromeos/network/apn_list_item_test.js +++ b/chrome/test/data/webui/cr_components/chromeos/network/apn_list_item_test.js
@@ -62,6 +62,13 @@ await flushTasks(); assertEquals( apnListItem.$.apnName.innerText, apnListItem.apn.accessPointName); + + apnListItem.apn = { + accessPointName: apnListItem.apn.accessPointName, + name: 'name', + }; + await flushTasks(); + assertEquals(apnListItem.$.apnName.innerText, apnListItem.apn.name); }); test('Check if connected sublabel is shown', async function() {
diff --git a/chrome/test/data/webui/cr_components/color_change_listener_test.ts b/chrome/test/data/webui/cr_components/color_change_listener_test.ts index a6fbe9b4..94ebddd 100644 --- a/chrome/test/data/webui/cr_components/color_change_listener_test.ts +++ b/chrome/test/data/webui/cr_components/color_change_listener_test.ts
@@ -2,14 +2,23 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -import {addColorChangeListener, colorProviderChangeHandler, COLORS_CSS_SELECTOR, refreshColorCss, removeColorChangeListener} from 'chrome://resources/cr_components/color_change_listener/colors_css_updater.js'; +// clang-format off +import {COLORS_CSS_SELECTOR, ColorChangeUpdater} from 'chrome://resources/cr_components/color_change_listener/colors_css_updater.js'; +// <if expr="chromeos_ash"> +import {addColorChangeListener, removeColorChangeListener} from 'chrome://resources/cr_components/color_change_listener/colors_css_updater.js'; +// </if> + import {getTrustedHTML} from 'chrome://resources/js/static_types.js'; import {assertEquals, assertFalse, assertNotEquals, assertTrue} from 'chrome://webui-test/chai_assert.js'; +// clang-format on suite('ColorChangeListenerTest', () => { + let updater: ColorChangeUpdater; + setup(() => { document.body.innerHTML = getTrustedHTML` <link rel="stylesheet" href="chrome://theme/colors.css?sets=ui"/>`; + updater = ColorChangeUpdater.forDocument(); }); /** @@ -19,7 +28,7 @@ function getSearchParam(matcher: string, param: string) { const nodes = document.querySelectorAll<HTMLLinkElement>(`link[href*='${matcher}']`); - // Since refreshColorCSS() won't remove the old link until the new link has + // Since refreshColorsCss() won't remove the old link until the new link has // finished loading we may have multiple matches. Pick the last one to // ensure were getting the most recently added element. const node = nodes[nodes.length - 1]; @@ -33,8 +42,9 @@ test('CorrectlyUpdatesColorsStylesheetURL', async () => { assertEquals(getSearchParam('chrome://theme/colors.css', 'version'), null); - // refreshColorCss() should append search params to the chrome://theme href. - assertTrue(await refreshColorCss()); + // refreshColorsCss() should append search params to the chrome://theme + // href. + assertTrue(await updater.refreshColorsCss()); let version = getSearchParam('chrome://theme/colors.css', 'version'); assertNotEquals(version, null); @@ -44,9 +54,9 @@ // Wait 1 millisecond before refresh. Otherwise the timestamp-based // version might not yet be updated. await new Promise(resolve => setTimeout(resolve, 1)); - assertTrue(await refreshColorCss()); - // refreshColorCss() should append search params to the colors CSS href. - assertTrue(await refreshColorCss()); + assertTrue(await updater.refreshColorsCss()); + // refreshColorsCss() should append search params to the colors CSS href. + assertTrue(await updater.refreshColorsCss()); version = getSearchParam('chrome://theme/colors.css', 'version'); assertTrue(!!version); @@ -57,11 +67,10 @@ test('IgnoresNonTargetStylesheetURLs', async () => { document.body.innerHTML = getTrustedHTML` <link rel="stylesheet" href="chrome://resources/colors.css"/>`; - assertEquals( getSearchParam('chrome://resources/colors.css', 'version'), null); - assertFalse(await refreshColorCss()); + assertFalse(await updater.refreshColorsCss()); assertEquals( getSearchParam('chrome://resources/colors.css', 'version'), null); @@ -75,7 +84,7 @@ `; assertEquals(getSearchParam('//theme/colors.css', 'version'), null); - assertTrue(await refreshColorCss()); + assertTrue(await updater.refreshColorsCss()); assertTrue(!!getSearchParam('//theme/colors.css', 'version')); }); @@ -85,23 +94,24 @@ // malformed. document.body.innerHTML = getTrustedHTML`<link rel="stylesheet" bad_href="chrome://theme/colors.css?sets=ui"/>`; - assertFalse(await refreshColorCss()); + assertFalse(await updater.refreshColorsCss()); // Handles the case where the link element does not exist. document.body.innerHTML = window.trustedTypes!.emptyHTML; - assertFalse(await refreshColorCss()); + assertFalse(await updater.refreshColorsCss()); }); test('HandlesCasesWhereColorCssIsRefreshedMultipleTimes', async () => { // Emulate multiple color change events from the mojo pipe. Do not await // the first call so that multiple events are in flight at the same time. await Promise.all( - [colorProviderChangeHandler(), colorProviderChangeHandler()]); + [updater.onColorProviderChanged(), updater.onColorProviderChanged()]); // Verify only one colors.css exists. assertEquals(1, document.querySelectorAll(COLORS_CSS_SELECTOR).length); }); + // <if expr="chromeos_ash"> test('RegistersColorChangeListener', async () => { let listenerCalledTimes = 0; addColorChangeListener(() => { @@ -109,7 +119,7 @@ }); // Emulate a color change event from the mojo pipe. - await colorProviderChangeHandler(); + await updater.onColorProviderChanged(); assertEquals(listenerCalledTimes, 1); }); @@ -122,13 +132,14 @@ addColorChangeListener(listener); // Emulate a color change event from the mojo pipe. - await colorProviderChangeHandler(); + await updater.onColorProviderChanged(); removeColorChangeListener(listener); // Emulate a color change event from the mojo pipe. - await colorProviderChangeHandler(); + await updater.onColorProviderChanged(); assertEquals(listenerCalledTimes, 1); }); + // </if> });
diff --git a/chrome/test/data/webui/new_tab_page/app_test.ts b/chrome/test/data/webui/new_tab_page/app_test.ts index 76b0363..7e4bce3 100644 --- a/chrome/test/data/webui/new_tab_page/app_test.ts +++ b/chrome/test/data/webui/new_tab_page/app_test.ts
@@ -653,6 +653,7 @@ suiteSetup(() => { loadTimeData.overrideValues({ modulesEnabled: true, + modulesRedesignedEnabled: false, wideModulesEnabled: false, }); }); @@ -725,6 +726,30 @@ }); }); + suite('v2 modules', () => { + suiteSetup(() => { + loadTimeData.overrideValues({ + modulesEnabled: true, + modulesRedesignedEnabled: true, + }); + }); + + test('container is hidden', async () => { + const modules = $$(app, 'ntp-modules-v2')!; + assertTrue(!!modules); + assertStyle(modules, 'display', 'none'); + }); + + test(`clicking records click`, () => { + // Act. + $$<HTMLElement>(app, 'ntp-modules-v2')!.click(); + + // Assert. + assertEquals(1, metrics.count('NewTabPage.Click')); + assertEquals(1, metrics.count('NewTabPage.Click', NtpElement.MODULE)); + }); + }); + suite('counterfactual modules', () => { suiteSetup(() => { loadTimeData.overrideValues({
diff --git a/chrome/test/data/webui/new_tab_page/modules/drive_v2/drive_v2.gni b/chrome/test/data/webui/new_tab_page/modules/drive_v2/drive_v2.gni deleted file mode 100644 index 4ca99a8..0000000 --- a/chrome/test/data/webui/new_tab_page/modules/drive_v2/drive_v2.gni +++ /dev/null
@@ -1,5 +0,0 @@ -# Copyright 2022 The Chromium Authors -# Use of this source code is governed by a BSD-style license that can be -# found in the LICENSE file. - -drive_v2_test_files = [ "modules/drive_v2/module_test.ts" ]
diff --git a/chrome/test/data/webui/new_tab_page/modules/modules.gni b/chrome/test/data/webui/new_tab_page/modules/modules.gni index 7befc346..8248b25 100644 --- a/chrome/test/data/webui/new_tab_page/modules/modules.gni +++ b/chrome/test/data/webui/new_tab_page/modules/modules.gni
@@ -4,15 +4,15 @@ import("./cart/cart.gni") import("./drive/drive.gni") -import("./drive_v2/drive_v2.gni") import("./feed/feed.gni") import("./history_clusters/history_clusters.gni") -import("./history_clusters_v2/history_clusters_v2.gni") import("./photos/photos.gni") import("./recipes/recipes.gni") +import("./v2/drive/drive.gni") +import("./v2/history_clusters/history_clusters.gni") if (!is_official_build) { - import("./dummy_v2/dummy_v2.gni") + import("./v2/dummy/dummy.gni") } modules_test_files =
diff --git a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni b/chrome/test/data/webui/new_tab_page/modules/v2/drive/drive.gni similarity index 69% copy from chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni copy to chrome/test/data/webui/new_tab_page/modules/v2/drive/drive.gni index aa6503aa..fa09605d 100644 --- a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni +++ b/chrome/test/data/webui/new_tab_page/modules/v2/drive/drive.gni
@@ -2,4 +2,4 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -dummy_v2_test_files = [ "modules/dummy_v2/module_test.ts" ] +drive_v2_test_files = [ "modules/v2/drive/module_test.ts" ]
diff --git a/chrome/test/data/webui/new_tab_page/modules/drive_v2/module_test.ts b/chrome/test/data/webui/new_tab_page/modules/v2/drive/module_test.ts similarity index 98% rename from chrome/test/data/webui/new_tab_page/modules/drive_v2/module_test.ts rename to chrome/test/data/webui/new_tab_page/modules/v2/drive/module_test.ts index 811d7cd..9a12041 100644 --- a/chrome/test/data/webui/new_tab_page/modules/drive_v2/module_test.ts +++ b/chrome/test/data/webui/new_tab_page/modules/v2/drive/module_test.ts
@@ -11,7 +11,7 @@ import {TestMock} from 'chrome://webui-test/test_mock.js'; import {eventToPromise, isVisible} from 'chrome://webui-test/test_util.js'; -import {installMock} from '../../test_support.js'; +import {installMock} from '../../../test_support.js'; suite('NewTabPageModulesDriveModuleTest', () => { let handler: TestMock<DriveHandlerRemote>;
diff --git a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni b/chrome/test/data/webui/new_tab_page/modules/v2/dummy/dummy.gni similarity index 69% rename from chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni rename to chrome/test/data/webui/new_tab_page/modules/v2/dummy/dummy.gni index aa6503aa..0798068a 100644 --- a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/dummy_v2.gni +++ b/chrome/test/data/webui/new_tab_page/modules/v2/dummy/dummy.gni
@@ -2,4 +2,4 @@ # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. -dummy_v2_test_files = [ "modules/dummy_v2/module_test.ts" ] +dummy_v2_test_files = [ "modules/v2/dummy/module_test.ts" ]
diff --git a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/module_test.ts b/chrome/test/data/webui/new_tab_page/modules/v2/dummy/module_test.ts similarity index 97% rename from chrome/test/data/webui/new_tab_page/modules/dummy_v2/module_test.ts rename to chrome/test/data/webui/new_tab_page/modules/v2/dummy/module_test.ts index ab6fb235..4d57001 100644 --- a/chrome/test/data/webui/new_tab_page/modules/dummy_v2/module_test.ts +++ b/chrome/test/data/webui/new_tab_page/modules/v2/dummy/module_test.ts
@@ -11,7 +11,7 @@ import {TestMock} from 'chrome://webui-test/test_mock.js'; import {isVisible} from 'chrome://webui-test/test_util.js'; -import {installMock} from '../../test_support.js'; +import {installMock} from '../../../test_support.js'; suite('NewTabPageModulesDummyModuleTest', () => { let handler: TestMock<FooHandlerRemote>;
diff --git a/chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni b/chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/history_clusters.gni similarity index 76% rename from chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni rename to chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/history_clusters.gni index 07d1dd1..af6365b 100644 --- a/chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/history_clusters_v2.gni +++ b/chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/history_clusters.gni
@@ -3,4 +3,4 @@ # found in the LICENSE file. history_clusters_v2_test_files = - [ "modules/history_clusters_v2/module_test.ts" ] + [ "modules/v2/history_clusters/module_test.ts" ]
diff --git a/chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/module_test.ts b/chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/module_test.ts similarity index 97% rename from chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/module_test.ts rename to chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/module_test.ts index c9aba4c5..8927d5f 100644 --- a/chrome/test/data/webui/new_tab_page/modules/history_clusters_v2/module_test.ts +++ b/chrome/test/data/webui/new_tab_page/modules/v2/history_clusters/module_test.ts
@@ -13,8 +13,8 @@ import {waitAfterNextRender} from 'chrome://webui-test/polymer_test_util.js'; import {TestMock} from 'chrome://webui-test/test_mock.js'; -import {installMock} from '../../test_support.js'; -import {assertModuleHeaderTitle, createRelatedSearches, createSampleVisits} from '../history_clusters/test_support.js'; +import {installMock} from '../../../test_support.js'; +import {assertModuleHeaderTitle, createRelatedSearches, createSampleVisits} from '../../history_clusters/test_support.js'; function createSampleClusters(count: number): Cluster[] { return new Array(count).fill(0).map(
diff --git a/chrome/test/data/webui/new_tab_page/new_tab_page_browsertest.js b/chrome/test/data/webui/new_tab_page/new_tab_page_browsertest.js index e5490fbd..dea9efe 100644 --- a/chrome/test/data/webui/new_tab_page/new_tab_page_browsertest.js +++ b/chrome/test/data/webui/new_tab_page/new_tab_page_browsertest.js
@@ -57,6 +57,10 @@ runMochaSuite('NewTabPageAppTest modules'); }); +TEST_F('NewTabPageAppTest', 'V2Modules', function() { + runMochaSuite('NewTabPageAppTest v2 modules'); +}); + TEST_F('NewTabPageAppTest', 'CounterfactualModules', function() { runMochaSuite('NewTabPageAppTest counterfactual modules'); }); @@ -300,7 +304,7 @@ var NewTabPageModulesDummyModuleTest = class extends NewTabPageBrowserTest { /** @override */ get browsePreload() { - return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/dummy_v2/module_test.js'; + return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/v2/dummy/module_test.js'; } }; @@ -335,7 +339,7 @@ var NewTabPageModulesDriveV2ModuleTest = class extends NewTabPageBrowserTest { /** @override */ get browsePreload() { - return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/drive_v2/module_test.js'; + return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/v2/drive/module_test.js'; } }; @@ -482,7 +486,7 @@ class extends NewTabPageBrowserTest { /** @override */ get browsePreload() { - return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/history_clusters_v2/module_test.js'; + return 'chrome://new-tab-page/test_loader.html?module=new_tab_page/modules/v2/history_clusters/module_test.js'; } /** @override */
diff --git a/chrome/test/data/webui/settings/chromeos/internet_page/internet_detail_subpage_tests.js b/chrome/test/data/webui/settings/chromeos/internet_page/internet_detail_subpage_tests.js index a91d16e3..6dc4529 100644 --- a/chrome/test/data/webui/settings/chromeos/internet_page/internet_detail_subpage_tests.js +++ b/chrome/test/data/webui/settings/chromeos/internet_page/internet_detail_subpage_tests.js
@@ -1593,15 +1593,24 @@ }], }); await flushAsync(); - const crLink = + const getCrLink = () => internetDetailPage.shadowRoot.querySelector('#apnSubpageButton'); - const apn = - crLink ? crLink.shadowRoot.querySelector('#subLabel') : null; + const getApn = () => getCrLink() ? + getCrLink().shadowRoot.querySelector('#subLabel') : + null; if (isApnRevampEnabled) { - assertTrue(!!apn); - assertEquals(apn.textContent.trim(), apnName); + assertTrue(!!getApn()); + assertEquals(apnName, getApn().textContent.trim()); + + const name = 'name'; + cellularNetwork.typeProperties.cellular.connectedApn.name = name; + mojoApi_.setManagedPropertiesForTest(cellularNetwork); + internetDetailPage.init('cellular_guid', 'Cellular', 'cellular'); + await flushAsync(); + assertTrue(!!getApn()); + assertEquals(name, getApn().textContent.trim()); } else { - assertFalse(!!apn); + assertFalse(!!getApn()); } }); });
diff --git a/chrome/test/data/webui/side_panel/bookmarks/power_bookmarks_list_test.ts b/chrome/test/data/webui/side_panel/bookmarks/power_bookmarks_list_test.ts index 9d4ba64..a0122a8 100644 --- a/chrome/test/data/webui/side_panel/bookmarks/power_bookmarks_list_test.ts +++ b/chrome/test/data/webui/side_panel/bookmarks/power_bookmarks_list_test.ts
@@ -14,12 +14,10 @@ import {PageImageServiceBrowserProxy} from 'chrome://resources/cr_components/page_image_service/browser_proxy.js'; import {PageImageServiceHandlerRemote} from 'chrome://resources/cr_components/page_image_service/page_image_service.mojom-webui.js'; import {loadTimeData} from 'chrome://resources/js/load_time_data.js'; -import {PluralStringProxyImpl} from 'chrome://resources/js/plural_string_proxy.js'; import {flush} from 'chrome://resources/polymer/v3_0/polymer/polymer_bundled.min.js'; import {assertEquals, assertFalse, assertNotEquals, assertTrue} from 'chrome://webui-test/chai_assert.js'; import {flushTasks} from 'chrome://webui-test/polymer_test_util.js'; import {TestMock} from 'chrome://webui-test/test_mock.js'; -import {TestPluralStringProxy} from 'chrome://webui-test/test_plural_string_proxy.js'; import {TestShoppingListApiProxy} from './commerce/test_shopping_list_api_proxy.js'; import {TestBookmarksApiProxy} from './test_bookmarks_api_proxy.js'; @@ -90,9 +88,6 @@ shoppingListApi = new TestShoppingListApiProxy(); ShoppingListApiProxyImpl.setInstance(shoppingListApi); - const pluralString = new TestPluralStringProxy(); - PluralStringProxyImpl.setInstance(pluralString); - imageServiceHandler = TestMock.fromClass(PageImageServiceHandlerRemote); PageImageServiceBrowserProxy.setInstance( new PageImageServiceBrowserProxy(imageServiceHandler)); @@ -254,9 +249,8 @@ const urlListItemElement = folderElement.shadowRoot!.querySelector('cr-url-list-item'); - const pluralString = - await PluralStringProxyImpl.getInstance().getPluralString('foo', 1); - assertTrue(urlListItemElement!.description!.includes(pluralString)); + const compactDescription = '(1)'; + assertTrue(urlListItemElement!.description!.includes(compactDescription)); }); test('SetsExpandedDescription', () => {
diff --git a/chrome/test/data/webui/side_panel/bookmarks/test_power_bookmarks_delegate.ts b/chrome/test/data/webui/side_panel/bookmarks/test_power_bookmarks_delegate.ts index e191d18..3cab414 100644 --- a/chrome/test/data/webui/side_panel/bookmarks/test_power_bookmarks_delegate.ts +++ b/chrome/test/data/webui/side_panel/bookmarks/test_power_bookmarks_delegate.ts
@@ -8,8 +8,6 @@ constructor() { super([ 'setCurrentUrl', - 'setCompactDescription', - 'setExpandedDescription', 'setImageUrl', 'onBookmarksLoaded', 'onBookmarkChanged', @@ -25,16 +23,6 @@ this.methodCalled('setCurrentUrl', url); } - setCompactDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string) { - this.methodCalled('setCompactDescription', bookmark, description); - } - - setExpandedDescription( - bookmark: chrome.bookmarks.BookmarkTreeNode, description: string) { - this.methodCalled('setExpandedDescription', bookmark, description); - } - setImageUrl(bookmark: chrome.bookmarks.BookmarkTreeNode, url: string) { this.methodCalled('setImageUrl', bookmark, url); }
diff --git a/chrome/test/enterprise/e2e/policy/open_page.py b/chrome/test/enterprise/e2e/policy/open_page.py index 5387f32..3823a16 100644 --- a/chrome/test/enterprise/e2e/policy/open_page.py +++ b/chrome/test/enterprise/e2e/policy/open_page.py
@@ -33,9 +33,9 @@ time.sleep(FLAGS.wait) if FLAGS.text_only: - print(driver.find_element_by_css_selector('html').text.encode('utf-8')) + print(driver.find_element_by_css_selector('html').text) else: - print(driver.page_source.encode('utf-8')) + print(driver.page_source) driver.quit()
diff --git a/chrome/test/enterprise/e2e/policy/url_allowlist/url_allowlist.py b/chrome/test/enterprise/e2e/policy/url_allowlist/url_allowlist.py index 8207805..58ac0cc6 100644 --- a/chrome/test/enterprise/e2e/policy/url_allowlist/url_allowlist.py +++ b/chrome/test/enterprise/e2e/policy/url_allowlist/url_allowlist.py
@@ -9,6 +9,8 @@ from chrome_ent_test.infra.core import test from infra import ChromeEnterpriseTestCase +_ERR_BLOCKED_BY_ADMINISTRATOR = 'is blocked' + @environment(file="../policy_test.asset.textpb") class UrlAllowlistTest(ChromeEnterpriseTestCase): @@ -45,19 +47,19 @@ @test def test_AllowedUrlCanVisit(self): output = self.openPage('https://youtube.com') - self.assertNotIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertNotIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) @test def test_NotAllowedUrlCantVisit(self): output = self.openPage('https://google.com') - self.assertIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) @test def test_AllowedUrlCanVisitIncognito(self): output = self.openPage('https://youtube.com', incognito=True) - self.assertNotIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertNotIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) @test def test_NotAllowedUrlCantVisitIncognito(self): output = self.openPage('https://google.com', incognito=True) - self.assertIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output)
diff --git a/chrome/test/enterprise/e2e/policy/url_blocklist/url_blocklist.py b/chrome/test/enterprise/e2e/policy/url_blocklist/url_blocklist.py index 7a558761..fae1f3d1 100644 --- a/chrome/test/enterprise/e2e/policy/url_blocklist/url_blocklist.py +++ b/chrome/test/enterprise/e2e/policy/url_blocklist/url_blocklist.py
@@ -9,6 +9,8 @@ from chrome_ent_test.infra.core import test from infra import ChromeEnterpriseTestCase +_ERR_BLOCKED_BY_ADMINISTRATOR = 'is blocked' + @environment(file="../policy_test.asset.textpb") class UrlBlocklistTest(ChromeEnterpriseTestCase): @@ -39,10 +41,10 @@ # Verify that we can't visit any site. output = self.openPage('https://youtube.com/yt/about/', incognito=incognito) - self.assertIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) output = self.openPage('https://google.com', incognito=incognito) - self.assertIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) @test def test_BlocklistYouTubeCantVisit(self, incognito=False): @@ -52,10 +54,10 @@ # Verify that we can't visit YouTube, but can still visit other sites. output = self.openPage('https://youtube.com/yt/about/', incognito=incognito) - self.assertIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) output = self.openPage('https://google.com', incognito=incognito) - self.assertNotIn("ERR_BLOCKED_BY_ADMINISTRATOR", output) + self.assertNotIn(_ERR_BLOCKED_BY_ADMINISTRATOR, output) @test def test_BlocklistAllCantVisitIncognito(self):
diff --git a/chrome/test/fuzzing/kombucha_in_process_fuzzer.cc b/chrome/test/fuzzing/kombucha_in_process_fuzzer.cc index 595cf58..9bc50f4 100644 --- a/chrome/test/fuzzing/kombucha_in_process_fuzzer.cc +++ b/chrome/test/fuzzing/kombucha_in_process_fuzzer.cc
@@ -2,21 +2,29 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "chrome/test/fuzzing/kombucha_in_process_fuzzer.pb.h" - -#include "base/functional/bind.h" -#include "base/memory/weak_ptr.h" +#include <fuzzer/FuzzedDataProvider.h> +#include <stddef.h> +#include <stdint.h> +#include <cstdint> +#include <vector> +#include "base/test/scoped_feature_list.h" +#include "chrome/browser/ui/accelerator_utils.h" #include "chrome/browser/ui/browser_element_identifiers.h" +#include "chrome/browser/ui/toolbar/app_menu_model.h" +#include "chrome/browser/ui/toolbar/bookmark_sub_menu_model.h" +#include "chrome/browser/ui/ui_features.h" +#include "chrome/browser/ui/views/tabs/tab.h" +#include "chrome/browser/ui/views/tabs/tab_group_header.h" +#include "chrome/browser/ui/views/tabs/tab_strip.h" #include "chrome/test/base/ui_test_utils.h" #include "chrome/test/fuzzing/in_process_fuzzer.h" +#include "chrome/test/interaction/interaction_test_util_browser.h" #include "chrome/test/interaction/interactive_browser_test.h" #include "content/public/browser/browser_thread.h" #include "net/dns/mock_host_resolver.h" #include "net/test/embedded_test_server/embedded_test_server.h" - -// At the moment, this is an example use of the InProcessFuzzer framework -// that uses Kombucha + protos. It's not yet intended to be an effective fuzzer, -// but just to be the skeleton of how this framework can be used. +#include "ui/base/interaction/element_identifier.h" +#include "ui/base/interaction/interactive_test.h" #define DEFINE_BINARY_PROTO_IN_PROCESS_FUZZER(arg) \ DEFINE_PROTO_FUZZER_IN_PROCESS_IMPL(true, arg) @@ -33,15 +41,56 @@ class KombuchaInProcessFuzzer : virtual public InteractiveBrowserTestT<InProcessFuzzer> { public: - using KombuchaTestCase = chrome::test::fuzzing::kombucha_in_process_fuzzer:: - proto::KombuchaTestcase; + void SetUp() override { + scoped_feature_list_.InitWithFeatures( + {features::kTabGroupsSave, features::kExtensionsMenuInAppMenu}, {}); + InteractiveBrowserTestT::SetUp(); + } + void SetUpOnMainThread() override; int Fuzz(const uint8_t* data, size_t size) override; static std::unique_ptr<net::test_server::HttpResponse> HandleHTTPRequest( base::WeakPtr<KombuchaInProcessFuzzer> fuzzer_weak, const net::test_server::HttpRequest& request); - KombuchaTestCase current_fuzz_case_; + std::string current_fuzz_case_; + + // ElementIdentifiers that can be targeted by PressButton + std::vector<ui::ElementIdentifier> button_elements = { + kBackButtonElementId, kDownloadToolbarButtonElementId, + kForwardButtonElementId, kMediaButtonElementId, + kNewTabButtonElementId, kSidePanelButtonElementId, + kSidePanelCloseButtonElementId, kSidePanelOpenInNewTabButtonElementId, + kSidePanelPinButtonElementId, kTabAlertIndicatorButtonElementId, + kTabCounterButtonElementId, kTabSearchButtonElementId}; + + // Element Identifiers that can be targeted by SelectMenuItem + std::vector<ui::ElementIdentifier> menu_elements = { + AppMenuModel::kBookmarksMenuItem, AppMenuModel::kDownloadsMenuItem, + AppMenuModel::kHistoryMenuItem, AppMenuModel::kExtensionsMenuItem, + AppMenuModel::kMoreToolsMenuItem, AppMenuModel::kPasswordManagerMenuItem}; + + auto CheckAndSelectMenuItem(ui::ElementIdentifier item) { + return Steps(EnsurePresent(item), SelectMenuItem(item)); + } + auto CheckAndPressButton(ui::ElementIdentifier item) { + return Steps(EnsurePresent(item), PressButton(item)); + } + + auto ShowBookmarksBar() { + return Steps(PressButton(kAppMenuButtonElementId), + SelectMenuItem(AppMenuModel::kBookmarksMenuItem), + SelectMenuItem(BookmarkSubMenuModel::kShowBookmarkBarMenuItem), + WaitForShow(kBookmarkBarElementId)); + } + + ui::Accelerator fullscreen_accelerator_; + ui::Accelerator close_tab_accelerator_; + ui::Accelerator group_target_tab_accelerator_; + + private: + base::test::ScopedFeatureList scoped_feature_list_; + base::WeakPtrFactory<KombuchaInProcessFuzzer> weak_ptr_factory_{this}; }; @@ -53,6 +102,13 @@ base::BindRepeating(&KombuchaInProcessFuzzer::HandleHTTPRequest, weak_ptr_factory_.GetWeakPtr())); ASSERT_TRUE(embedded_test_server()->Start()); + // Accelerators for using in fuzzing + chrome::AcceleratorProviderForBrowser(browser())->GetAcceleratorForCommandId( + IDC_FULLSCREEN, &fullscreen_accelerator_); + chrome::AcceleratorProviderForBrowser(browser())->GetAcceleratorForCommandId( + IDC_CLOSE_TAB, &close_tab_accelerator_); + chrome::AcceleratorProviderForBrowser(browser())->GetAcceleratorForCommandId( + IDC_GROUP_TARGET_TAB, &group_target_tab_accelerator_); } std::unique_ptr<net::test_server::HttpResponse> @@ -61,8 +117,8 @@ const net::test_server::HttpRequest& request) { std::unique_ptr<net::test_server::BasicHttpResponse> response; response = std::make_unique<net::test_server::BasicHttpResponse>(); - response->set_content_type("application/x-protobuf"); - KombuchaTestCase testcase; + response->set_content_type("text/html"); + std::string response_body = ""; // We are running on the embedded test server's thread. // We want to ask the fuzzer thread for the latest payload, // but there's a risk of UaF if it's being destroyed. @@ -73,33 +129,102 @@ base::BindLambdaForTesting([&]() { KombuchaInProcessFuzzer* fuzzer = fuzzer_weak.get(); if (fuzzer) { - testcase = fuzzer->current_fuzz_case_; + response_body = fuzzer->current_fuzz_case_; } run_loop.Quit(); }); content::GetUIThreadTaskRunner()->PostTask(FROM_HERE, get_payload_lambda); run_loop.Run(); - response->set_content(testcase.SerializeAsString()); + response->set_content(response_body); response->set_code(net::HTTP_OK); return response; } int KombuchaInProcessFuzzer::Fuzz(const uint8_t* data, size_t size) { - KombuchaTestCase proto_testcase; - proto_testcase.ParseFromArray(data, size); - current_fuzz_case_ = proto_testcase; - - // The following does not make use of data and size in any way. - // This state is temporary; Fuzz should be updated to use the provided data. + std::string html_string(reinterpret_cast<const char*>(data), size); + current_fuzz_case_ = html_string; DEFINE_LOCAL_ELEMENT_IDENTIFIER_VALUE(kPrimaryTabElementId); DEFINE_LOCAL_ELEMENT_IDENTIFIER_VALUE(kSecondaryTabElementId); GURL test_url = embedded_test_server()->GetURL("/test.html"); - RunTestSequence( - InstrumentTab(kPrimaryTabElementId, 0), - PressButton(kNewTabButtonElementId), - AddInstrumentedTab(kSecondaryTabElementId, GURL("about:blank")), - // Only the following step requires the webserver. - NavigateWebContents(kSecondaryTabElementId, test_url)); + FuzzedDataProvider data_provider = FuzzedDataProvider(data, size); + + // Base input always used in fuzzer + // Start with three tabs + auto ui_input = + Steps(PressButton(kNewTabButtonElementId), + InstrumentTab(kPrimaryTabElementId, 0), + AddInstrumentedTab(kSecondaryTabElementId, GURL("about:blank")), + Log("Passed initial setup steps")); + // Always consume 3 bytes as operations + while (data_provider.remaining_bytes() >= 3) { + std::vector<uint8_t> ops = data_provider.ConsumeBytes<uint8_t>(3); + + // Byte to determine which item to target + uint8_t action = ops.at(1); + ui::ElementIdentifier item; + + // TODO(xrosado): Use first byte as eventual decider between normal run or + // parallel + switch (action % 8) { + case 1: + item = button_elements[ops.at(2) % (button_elements.size())]; + AddStep(ui_input, + Steps(CheckAndPressButton(item), Log("Hit pressbutton"))); + break; + case 2: + AddStep( + ui_input, + Steps(SelectTab(kTabStripElementId, + ops.at(2) % browser()->tab_strip_model()->count()), + Log("Hit select tab"))); + break; + case 3: + AddStep(ui_input, Steps(ShowBookmarksBar(), Log("Hit bookmarks bar"))); + break; + case 4: + item = menu_elements[ops.at(2) % (menu_elements.size())]; + AddStep(ui_input, Steps(CheckAndPressButton(kAppMenuButtonElementId), + CheckAndSelectMenuItem(item), + Log("Hit select menu item"))); + break; + case 5: + AddStep(ui_input, Steps(SendAccelerator(kBrowserViewElementId, + fullscreen_accelerator_), + Log("Hit FullScreen accelerator"))); + break; + case 6: + AddStep(ui_input, Steps(SendAccelerator(kBrowserViewElementId, + close_tab_accelerator_), + Log("Hit Close Tab accelerator"))); + break; + case 7: + AddStep(ui_input, Steps(SendAccelerator(kBrowserViewElementId, + group_target_tab_accelerator_), + Log("Hit Group Tab accelerator"))); + break; + default: + break; + } + } + // POC for JS execution + // TODO(xrosado) Generalize to more inputs and convert to a case + std::string key_event_js = + "el => el.dispatchEvent(new KeyboardEvent('keydown', {'key':'ArrowDown', " + "'code':'ArrowDown'}))"; + + AddStep(ui_input, Log("Executed all procedurally generated UI inputs")); + + // Set of inputs always placed at the end + // Mainly used for debugging and sanity checks + AddStep(ui_input, + Steps(NavigateWebContents(kSecondaryTabElementId, test_url), + Log("Passed navigation step"), SelectTab(kTabStripElementId, 2), + Log("About to execute js"), + ExecuteJs(kSecondaryTabElementId, key_event_js), + Log("Executed js event"))); + + RunTestSequence(std::move(ui_input)); + return 0; }
diff --git a/chrome/updater/test/integration_tests_linux.cc b/chrome/updater/test/integration_tests_linux.cc index 80289e7..ef545662 100644 --- a/chrome/updater/test/integration_tests_linux.cc +++ b/chrome/updater/test/integration_tests_linux.cc
@@ -150,7 +150,7 @@ .SetServerKeepAliveTime(base::Seconds(1)) .SetCrxVerifierFormat(crx_file::VerifierFormat::CRX3) .SetOverinstallTimeout(TestTimeouts::action_timeout()) - .SetIdleCheckPeriod(base::Seconds(4)) + .SetIdleCheckPeriod(base::Seconds(10)) .Modify()); }
diff --git a/chrome/updater/test/integration_tests_mac.mm b/chrome/updater/test/integration_tests_mac.mm index 2a0153d1..e4b21d2 100644 --- a/chrome/updater/test/integration_tests_mac.mm +++ b/chrome/updater/test/integration_tests_mac.mm
@@ -88,7 +88,7 @@ .SetServerKeepAliveTime(base::Seconds(1)) .SetCrxVerifierFormat(crx_file::VerifierFormat::CRX3) .SetOverinstallTimeout(base::Seconds(5)) - .SetIdleCheckPeriod(base::Seconds(4)) + .SetIdleCheckPeriod(base::Seconds(10)) .Modify()); }
diff --git a/chrome/updater/test/integration_tests_win.cc b/chrome/updater/test/integration_tests_win.cc index 598d45a..31d2bf3 100644 --- a/chrome/updater/test/integration_tests_win.cc +++ b/chrome/updater/test/integration_tests_win.cc
@@ -628,7 +628,7 @@ .SetServerKeepAliveTime(base::Seconds(1)) .SetCrxVerifierFormat(crx_file::VerifierFormat::CRX3) .SetOverinstallTimeout(base::Seconds(11)) - .SetIdleCheckPeriod(base::Seconds(4)) + .SetIdleCheckPeriod(base::Seconds(10)) .Modify()); }
diff --git a/chromeos/ash/components/quick_start/BUILD.gn b/chromeos/ash/components/quick_start/BUILD.gn index 4caef8f..568626b6 100644 --- a/chromeos/ash/components/quick_start/BUILD.gn +++ b/chromeos/ash/components/quick_start/BUILD.gn
@@ -8,6 +8,8 @@ static_library("quick_start") { sources = [ + "logging.cc", + "logging.h", "quick_start_message.cc", "quick_start_message.h", "quick_start_message_type.h", @@ -49,6 +51,7 @@ testonly = true sources = [ + "logging_unittest.cc", "quick_start_message_unittest.cc", "quick_start_requests_unittest.cc", ]
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/logging.cc b/chromeos/ash/components/quick_start/logging.cc similarity index 95% rename from chrome/browser/ash/login/oobe_quick_start/logging/logging.cc rename to chromeos/ash/components/quick_start/logging.cc index 7a31e079..3815680d 100644 --- a/chrome/browser/ash/login/oobe_quick_start/logging/logging.cc +++ b/chromeos/ash/components/quick_start/logging.cc
@@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" +#include "chromeos/ash/components/quick_start/logging.h" #include "base/command_line.h"
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/logging.h b/chromeos/ash/components/quick_start/logging.h similarity index 88% rename from chrome/browser/ash/login/oobe_quick_start/logging/logging.h rename to chromeos/ash/components/quick_start/logging.h index d75cf60..d170589e 100644 --- a/chrome/browser/ash/login/oobe_quick_start/logging/logging.h +++ b/chromeos/ash/components/quick_start/logging.h
@@ -2,8 +2,8 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -#ifndef CHROME_BROWSER_ASH_LOGIN_OOBE_QUICK_START_LOGGING_LOGGING_H_ -#define CHROME_BROWSER_ASH_LOGIN_OOBE_QUICK_START_LOGGING_LOGGING_H_ +#ifndef CHROMEOS_ASH_COMPONENTS_QUICK_START_LOGGING_H_ +#define CHROMEOS_ASH_COMPONENTS_QUICK_START_LOGGING_H_ #include <sstream>
diff --git a/chrome/browser/ash/login/oobe_quick_start/logging/logging_unittest.cc b/chromeos/ash/components/quick_start/logging_unittest.cc similarity index 96% rename from chrome/browser/ash/login/oobe_quick_start/logging/logging_unittest.cc rename to chromeos/ash/components/quick_start/logging_unittest.cc index 71ee5db..fadacf3 100644 --- a/chrome/browser/ash/login/oobe_quick_start/logging/logging_unittest.cc +++ b/chromeos/ash/components/quick_start/logging_unittest.cc
@@ -8,7 +8,7 @@ #include "base/no_destructor.h" #include "testing/gtest/include/gtest/gtest.h" -#include "chrome/browser/ash/login/oobe_quick_start/logging/logging.h" +#include "chromeos/ash/components/quick_start/logging.h" namespace ash::quick_start {
diff --git a/chromeos/tast_control.gni b/chromeos/tast_control.gni index 9fc573d..9af1b374 100644 --- a/chromeos/tast_control.gni +++ b/chromeos/tast_control.gni
@@ -331,6 +331,9 @@ # b/281983222 "inputs.InputMethodShelfInputs", + # b/286590113 + "health.ProbeBlockDevices", + # READ COMMENT AT TOP BEFORE ADDING NEW TESTS HERE. ]
diff --git a/components/autofill/core/browser/form_parsing/credit_card_field_unittest.cc b/components/autofill/core/browser/form_parsing/credit_card_field_unittest.cc index 5938f0b0..31f3e07 100644 --- a/components/autofill/core/browser/form_parsing/credit_card_field_unittest.cc +++ b/components/autofill/core/browser/form_parsing/credit_card_field_unittest.cc
@@ -585,10 +585,10 @@ } struct DetermineExpirationDateFormatTestCase { - const std::string label; - const int max_length; const std::string expected_separator; const uint8_t expected_year_length; + const std::string label; + const int max_length; }; class DetermineExpirationDateFormat @@ -606,12 +606,76 @@ base::test::ScopedFeatureList scoped_features_; }; +INSTANTIATE_TEST_SUITE_P( + CreditCardFieldTest, + DetermineExpirationDateFormat, + testing::Values( + // The order of parameters is: + // label, max length, expected separator, expected digits in year: + // + // No label, no maxlength. -> "MM/YYYY" + DetermineExpirationDateFormatTestCase{"/", 4, "", 0}, + // No label, maxlength 4. -> "MMYY" + DetermineExpirationDateFormatTestCase{"", 2, "", 4}, + // No label, maxlength 5. -> "MM/YY" + DetermineExpirationDateFormatTestCase{"/", 2, "", 5}, + // No label, maxlength 6. -> "MMYYYY" + DetermineExpirationDateFormatTestCase{"", 4, "", 6}, + // No label, maxlength 7. -> "MM/YYYY" + DetermineExpirationDateFormatTestCase{"/", 4, "", 7}, + // No label, large maxlength. -> "MM/YYYY" + DetermineExpirationDateFormatTestCase{"/", 4, "", 12}, + + // Unsupported maxlength, general label. + DetermineExpirationDateFormatTestCase{"", 2, "", 3}, + // Unsupported maxlength, two digit year label. + DetermineExpirationDateFormatTestCase{"", 2, "MM/YY", 3}, + // Unsupported maxlength, four digit year label. + DetermineExpirationDateFormatTestCase{"", 2, "MM/YYYY", 3}, + + // Two digit year, simple label. + DetermineExpirationDateFormatTestCase{" / ", 2, "MM / YY", 0}, + // Two digit year, with slash (MM/YY). + DetermineExpirationDateFormatTestCase{"/", 2, "(MM/YY)", 0}, + // Two digit year, no slash (MMYY). + DetermineExpirationDateFormatTestCase{"", 2, "(MMYY)", 4}, + // Two digit year, with slash and maxlength (MM/YY). + DetermineExpirationDateFormatTestCase{"/", 2, "(MM/YY)", 5}, + // Two digit year, with slash and large maxlength (MM/YY). + DetermineExpirationDateFormatTestCase{"/", 2, "(MM/YY)", 12}, + + // Four digit year, simple label. + DetermineExpirationDateFormatTestCase{" / ", 4, "MM / YYYY", 0}, + // Four digit year, with slash (MM/YYYY). + DetermineExpirationDateFormatTestCase{"/", 4, "(MM/YYYY)", 0}, + // Four digit year, no slash (MMYYYY). + DetermineExpirationDateFormatTestCase{"", 4, "(MMYYYY)", 6}, + // Four digit year, with slash and maxlength (MM/YYYY). + DetermineExpirationDateFormatTestCase{"/", 4, "(MM/YYYY)", 7}, + // Four digit year, with slash and large maxlength (MM/YYYY). + DetermineExpirationDateFormatTestCase{"/", 4, "(MM/YYYY)", 12}, + + // Four digit year label with restrictive maxlength (4). + DetermineExpirationDateFormatTestCase{"", 2, "(MM/YYYY)", 4}, + // Four digit year label with restrictive maxlength (5). + DetermineExpirationDateFormatTestCase{"/", 2, "(MM/YYYY)", 5}, + + // Spanish format. + DetermineExpirationDateFormatTestCase{" / ", 2, "MM / AA", 0}, + DetermineExpirationDateFormatTestCase{" / ", 4, "MM / AAAA", 0}, + + // Different separator. + DetermineExpirationDateFormatTestCase{" - ", 2, "MM - YY", 0}, + + // Date fits after stripping whitespaces from separator. + DetermineExpirationDateFormatTestCase{"-", 2, "MM - YY", 5})); + TEST_P(DetermineExpirationDateFormat, TestDetermineFormat) { // Assists in identifying which case has failed. - SCOPED_TRACE(test_case().label); - SCOPED_TRACE(test_case().max_length); SCOPED_TRACE(test_case().expected_separator); SCOPED_TRACE(test_case().expected_year_length); + SCOPED_TRACE(test_case().label); + SCOPED_TRACE(test_case().max_length); AutofillField field; field.max_length = test_case().max_length; @@ -626,69 +690,5 @@ EXPECT_EQ(test_case().expected_year_length, result.digits_in_expiration_year); } -INSTANTIATE_TEST_SUITE_P( - , - DetermineExpirationDateFormat, - testing::Values( - // The order of parameters is: - // label, max length, expected separator, expected digits in year: - // - // No label, no maxlength. -> "MM/YYYY" - DetermineExpirationDateFormatTestCase{"", 0, "/", 4}, - // No label, maxlength 4. -> "MMYY" - DetermineExpirationDateFormatTestCase{"", 4, "", 2}, - // No label, maxlength 5. -> "MM/YY" - DetermineExpirationDateFormatTestCase{"", 5, "/", 2}, - // No label, maxlength 6. -> "MMYYYY" - DetermineExpirationDateFormatTestCase{"", 6, "", 4}, - // No label, maxlength 7. -> "MM/YYYY" - DetermineExpirationDateFormatTestCase{"", 7, "/", 4}, - // No label, large maxlength. -> "MM/YYYY" - DetermineExpirationDateFormatTestCase{"", 12, "/", 4}, - - // Unsupported maxlength, general label. - DetermineExpirationDateFormatTestCase{"", 3, "", 2}, - // Unsupported maxlength, two digit year label. - DetermineExpirationDateFormatTestCase{"MM/YY", 3, "", 2}, - // Unsupported maxlength, four digit year label. - DetermineExpirationDateFormatTestCase{"MM/YYYY", 3, "", 2}, - - // Two digit year, simple label. - DetermineExpirationDateFormatTestCase{"MM / YY", 0, " / ", 2}, - // Two digit year, with slash (MM/YY). - DetermineExpirationDateFormatTestCase{"(MM/YY)", 0, "/", 2}, - // Two digit year, no slash (MMYY). - DetermineExpirationDateFormatTestCase{"(MMYY)", 4, "", 2}, - // Two digit year, with slash and maxlength (MM/YY). - DetermineExpirationDateFormatTestCase{"(MM/YY)", 5, "/", 2}, - // Two digit year, with slash and large maxlength (MM/YY). - DetermineExpirationDateFormatTestCase{"(MM/YY)", 12, "/", 2}, - - // Four digit year, simple label. - DetermineExpirationDateFormatTestCase{"MM / YYYY", 0, " / ", 4}, - // Four digit year, with slash (MM/YYYY). - DetermineExpirationDateFormatTestCase{"(MM/YYYY)", 0, "/", 4}, - // Four digit year, no slash (MMYYYY). - DetermineExpirationDateFormatTestCase{"(MMYYYY)", 6, "", 4}, - // Four digit year, with slash and maxlength (MM/YYYY). - DetermineExpirationDateFormatTestCase{"(MM/YYYY)", 7, "/", 4}, - // Four digit year, with slash and large maxlength (MM/YYYY). - DetermineExpirationDateFormatTestCase{"(MM/YYYY)", 12, "/", 4}, - - // Four digit year label with restrictive maxlength (4). - DetermineExpirationDateFormatTestCase{"(MM/YYYY)", 4, "", 2}, - // Four digit year label with restrictive maxlength (5). - DetermineExpirationDateFormatTestCase{"(MM/YYYY)", 5, "/", 2}, - - // Spanish format. - DetermineExpirationDateFormatTestCase{"MM / AA", 0, " / ", 2}, - DetermineExpirationDateFormatTestCase{"MM / AAAA", 0, " / ", 4}, - - // Different separator. - DetermineExpirationDateFormatTestCase{"MM - YY", 0, " - ", 2}, - - // Date fits after stripping whitespaces from separator. - DetermineExpirationDateFormatTestCase{"MM - YY", 5, "-", 2})); - } // namespace } // namespace autofill
diff --git a/components/autofill/core/browser/metrics/autofill_metrics.cc b/components/autofill/core/browser/metrics/autofill_metrics.cc index 783302ae..5918819 100644 --- a/components/autofill/core/browser/metrics/autofill_metrics.cc +++ b/components/autofill/core/browser/metrics/autofill_metrics.cc
@@ -2614,6 +2614,9 @@ OptionalBooleanToBool(user_typed_into_field)); SetStatusVector(AutofillStatus::kWasFocused, OptionalBooleanToBool(was_focused)); + SetStatusVector(AutofillStatus::kIsInSubFrame, + form.ToFormData().host_frame != field.host_frame); + if (was_focused == OptionalBoolean::kTrue) { SetStatusVector(AutofillStatus::kSuggestionWasAvailable, OptionalBooleanToBool(suggestion_was_available));
diff --git a/components/autofill/core/browser/metrics/autofill_metrics.h b/components/autofill/core/browser/metrics/autofill_metrics.h index 61b2a98..9ba1377 100644 --- a/components/autofill/core/browser/metrics/autofill_metrics.h +++ b/components/autofill/core/browser/metrics/autofill_metrics.h
@@ -649,7 +649,8 @@ kFilledValueWasModified = 9, kHadValueBeforeFilling = 10, kHadTypedOrFilledValueAtSubmission = 11, - kMaxValue = kHadTypedOrFilledValueAtSubmission + kIsInSubFrame = 12, + kMaxValue = kIsInSubFrame }; using FormEventSet =
diff --git a/components/autofill/core/browser/metrics/autofill_metrics_unittest.cc b/components/autofill/core/browser/metrics/autofill_metrics_unittest.cc index cab8b31..37de334 100644 --- a/components/autofill/core/browser/metrics/autofill_metrics_unittest.cc +++ b/components/autofill/core/browser/metrics/autofill_metrics_unittest.cc
@@ -8431,9 +8431,27 @@ // Create a profile. RecreateProfile(/*is_server=*/false); - FormData form = CreateForm({CreateField("State", "state", "", "text"), - CreateField("Street", "", "", "text"), - CreateField("Number", "", "", "text")}); + FormData form; + form.url = GURL("http://www.foo.com/"); + + FormFieldData field; + field.label = u"State"; + field.name = u"state"; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Street"; + field.name = u""; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Number"; + field.name = u""; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); std::vector<ServerFieldType> field_types = {ADDRESS_HOME_STATE, NO_SERVER_DATA, NO_SERVER_DATA}; @@ -8502,9 +8520,27 @@ // Create a profile. RecreateProfile(/*is_server=*/false); - FormData form = CreateForm({CreateField("State", "state", "", "text"), - CreateField("Street", "street", "", "text"), - CreateField("Number", "", "", "text")}); + FormData form; + form.url = GURL("http://www.foo.com/"); + + FormFieldData field; + field.label = u"State"; + field.name = u"state"; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Street"; + field.name = u""; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Number"; + field.name = u""; + field.form_control_type = "text"; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); std::vector<ServerFieldType> field_types = { ADDRESS_HOME_STATE, ADDRESS_HOME_STREET_ADDRESS, NO_SERVER_DATA}; @@ -8643,16 +8679,53 @@ TestAutofillTickClock test_clock; test_clock.SetNowTicks(now); - FormData form = CreateForm( - {// Heuristic value will match with Autocomplete attribute. - CreateField("Last Name", "lastname", "", "text", "family-name"), - // Heuristic value will NOT match with Autocomplete attribute. - CreateField("First Name", "firstname", "", "text", "additional-name"), - // No autocomplete attribute. - CreateField("Address", "address", "", "text", "off"), - // Heuristic value will be unknown. - CreateField("Garbage label", "garbage", "", "text", "postal-code"), - CreateField("Email", "email", "", "text", "garbage")}); + FormData form; + form.url = GURL("http://www.foo.com/"); + + // Heuristic value will match with Autocomplete attribute. + FormFieldData field; + field.label = u"Last Name"; + field.name = u"lastname"; + field.form_control_type = "text"; + field.autocomplete_attribute = "family-name"; + field.parsed_autocomplete = ParseAutocompleteAttribute("family-name"); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + // Heuristic value will NOT match with Autocomplete attribute. + field.label = u"First Name"; + field.name = u"firstname"; + field.form_control_type = "text"; + field.autocomplete_attribute = "additional-name"; + field.parsed_autocomplete = ParseAutocompleteAttribute("additional-name"); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + // No autocomplete attribute. + field.label = u"Address"; + field.name = u"address"; + field.form_control_type = "text"; + field.autocomplete_attribute = "off"; + field.parsed_autocomplete = ParseAutocompleteAttribute("off"); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + // Heuristic value will be unknown. + field.label = u"Garbage label"; + field.name = u"garbage"; + field.form_control_type = "text"; + field.autocomplete_attribute = "postal-code"; + field.parsed_autocomplete = ParseAutocompleteAttribute("postal-code"); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Email"; + field.name = u"email"; + field.form_control_type = "text"; + field.autocomplete_attribute = "garbage"; + field.parsed_autocomplete = ParseAutocompleteAttribute("garbage"); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); auto form_structure = std::make_unique<FormStructure>(form); FormStructure* form_structure_ptr = form_structure.get(); @@ -8984,7 +9057,7 @@ } // Tests that the forms with only <input type="checkbox"> fields are not -// recorded in UkmFieldInfo metrics. We do this to reduce bandwidth. +// recorded in FieldInfo metrics. We do this to reduce bandwidth. TEST_F(AutofillMetricsFromLogEventsTest, AutofillFieldInfoMetricsNotRecordOnAllCheckBox) { FormData form; @@ -9074,7 +9147,7 @@ } // Tests that the forms with <input type="checkbox"> fields and two text field -// which have predicted types are recorded in UkmFieldInfo metrics. +// which have predicted types are recorded in FieldInfo metrics. TEST_F(AutofillMetricsFromLogEventsTest, AutofillFieldInfoMetricsRecordOnCheckBoxWithTextField) { base::TimeTicks now = AutofillTickClock::NowTicks(); @@ -9263,6 +9336,154 @@ base::to_underlying(FormControlType::kSelectmenu)); } +// Tests that the field which is in a different frame than its form is recorded +// as AutofillStatus::kIsInSubFrame. +TEST_F(AutofillMetricsFromLogEventsTest, + AutofillFieldInfoMetricsRecordOnDifferentFrames) { + base::TimeTicks now = AutofillTickClock::NowTicks(); + TestAutofillTickClock test_clock; + test_clock.SetNowTicks(now); + + FormData form; + form.host_frame = test::MakeLocalFrameToken(test::RandomizeFrame(true)); + form.url = GURL("http://www.foo.com/"); + + // The form has three input text fields, the second field is in a sub frame. + FormFieldData field; + field.label = u"First Name"; + field.name = u"firstname"; + field.form_control_type = "text"; + field.host_frame = form.host_frame; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Last Name"; + field.name = u"lastname"; + field.form_control_type = "text"; + field.host_frame = test::MakeLocalFrameToken(test::RandomizeFrame(true)); + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + field.label = u"Email"; + field.name = u"email"; + field.form_control_type = "text"; + field.host_frame = form.host_frame; + field.unique_renderer_id = test::MakeFieldRendererId(); + form.fields.push_back(field); + + std::vector<ServerFieldType> field_types = {NAME_FIRST, NAME_LAST, + EMAIL_ADDRESS}; + autofill_manager().AddSeenForm(form, field_types); + SeeForm(form); + base::TimeTicks parse_time = autofill_manager() + .form_structures() + .begin() + ->second->form_parsed_timestamp(); + test_clock.SetNowTicks(parse_time + base::Milliseconds(9)); + base::HistogramTester histogram_tester; + SubmitForm(form); + autofill_manager().Reset(); + + // Verify FieldInfo UKM event for each field. + auto entries = + test_ukm_recorder_->GetEntriesByName(UkmFieldInfoType::kEntryName); + ASSERT_EQ(3u, entries.size()); + std::vector<FormControlType> form_control_types = { + FormControlType::kText, FormControlType::kText, FormControlType::kText}; + for (size_t i = 0; i < entries.size(); ++i) { + SCOPED_TRACE(testing::Message() << i); + + DenseSet<AutofillStatus> autofill_status_vector; + if (i == 1) { + autofill_status_vector = {AutofillStatus::kIsFocusable, + AutofillStatus::kIsInSubFrame}; + } else { + autofill_status_vector = {AutofillStatus::kIsFocusable}; + } + using UFIT = UkmFieldInfoType; + const auto* const entry = entries[i]; + std::map<std::string, int64_t> expected = { + {UFIT::kFormSessionIdentifierName, + AutofillMetrics::FormGlobalIdToHash64Bit(form.global_id())}, + {UFIT::kFieldSessionIdentifierName, + AutofillMetrics::FieldGlobalIdToHash64Bit(form.fields[i].global_id())}, + {UFIT::kFieldSignatureName, + Collapse(CalculateFieldSignatureForField(form.fields[i])).value()}, + {UFIT::kOverallTypeName, field_types[i]}, + {UFIT::kSectionIdName, 1}, + {UFIT::kTypeChangedByRationalizationName, false}, + {UFIT::kFormControlTypeName, base::to_underlying(form_control_types[i])}, + {UFIT::kAutocompleteStateName, + base::to_underlying(AutofillMetrics::AutocompleteState::kNone)}, + {UFIT::kAutofillStatusVectorName, autofill_status_vector.data()[0]}, + {UFIT::kHeuristicTypeName, field_types[i]}, + {UFIT::kHeuristicTypeLegacyName, field_types[i]}, +#if BUILDFLAG(USE_INTERNAL_AUTOFILL_PATTERNS) + {UFIT::kHeuristicTypeDefaultName, field_types[i]}, + {UFIT::kHeuristicTypeExperimentalName, field_types[i]}, + {UFIT::kHeuristicTypeNextGenName, field_types[i]}, +#else + {UFIT::kHeuristicTypeDefaultName, UNKNOWN_TYPE}, + {UFIT::kHeuristicTypeExperimentalName, UNKNOWN_TYPE}, + {UFIT::kHeuristicTypeNextGenName, UNKNOWN_TYPE}, +#endif + {UFIT::kRankInFieldSignatureGroupName, 1}, + }; + + EXPECT_EQ(expected.size(), entry->metrics.size()); + for (const auto& [metric, value] : expected) { + test_ukm_recorder_->ExpectEntryMetric(entry, metric, value); + } + } + + // Verify FormSummary UKM event for the form. + auto form_entries = + test_ukm_recorder_->GetEntriesByName(UkmFormSummaryType::kEntryName); + ASSERT_EQ(1u, form_entries.size()); + using UFST = UkmFormSummaryType; + const auto* const form_entry = form_entries[0]; + AutofillMetrics::FormEventSet form_events = {FORM_EVENT_DID_PARSE_FORM}; + std::map<std::string, int64_t> expected = { + {UFST::kFormSessionIdentifierName, + AutofillMetrics::FormGlobalIdToHash64Bit(form.global_id())}, + {UFST::kFormSignatureName, + Collapse(CalculateFormSignature(form)).value()}, + {UFST::kAutofillFormEventsName, form_events.data()[0]}, + {UFST::kAutofillFormEvents2Name, form_events.data()[1]}, + {UFST::kIsInMainframeName, true}, + {UFST::kSampleRateName, 1}, + {UFST::kWasSubmittedName, true}, + {UFST::kMillisecondsFromFormParsedUntilSubmissionName, 9}, + }; + EXPECT_EQ(expected.size(), form_entry->metrics.size()); + for (const auto& [metric, value] : expected) { + test_ukm_recorder_->ExpectEntryMetric(form_entry, metric, value); + } + + // Verify LogEvent count UMA events of each type. + histogram_tester.ExpectBucketCount( + "Autofill.LogEvent.AskForValuesToFillEvent", 0, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.TriggerFillEvent", 0, + 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.FillEvent", 0, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.TypingEvent", 0, 1); + histogram_tester.ExpectBucketCount( + "Autofill.LogEvent.AutocompleteAttributeEvent", 0, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.ServerPredictionEvent", + 0, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.RationalizationEvent", + 3, 1); +#if BUILDFLAG(USE_INTERNAL_AUTOFILL_PATTERNS) + histogram_tester.ExpectBucketCount( + "Autofill.LogEvent.HeuristicPredictionEvent", 12, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.All", 15, 1); +#else + histogram_tester.ExpectBucketCount( + "Autofill.LogEvent.HeuristicPredictionEvent", 3, 1); + histogram_tester.ExpectBucketCount("Autofill.LogEvent.All", 6, 1); +#endif +} + // TODO(crbug.com/1352826) Delete this after collecting the metrics. struct LaxLocalHeuristicsTestCase { test::FormDescription form;
diff --git a/components/browser_ui/accessibility/android/java/src/org/chromium/components/browser_ui/accessibility/PageZoomUtils.java b/components/browser_ui/accessibility/android/java/src/org/chromium/components/browser_ui/accessibility/PageZoomUtils.java index c07f02a..7ec4f0d0 100644 --- a/components/browser_ui/accessibility/android/java/src/org/chromium/components/browser_ui/accessibility/PageZoomUtils.java +++ b/components/browser_ui/accessibility/android/java/src/org/chromium/components/browser_ui/accessibility/PageZoomUtils.java
@@ -12,6 +12,7 @@ import org.chromium.base.MathUtils; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.HostZoomMap; import java.util.Arrays; @@ -65,7 +66,7 @@ * @return boolean */ public static boolean shouldShowSettingsUI() { - return ContentFeatureList.isEnabled(ContentFeatureList.ACCESSIBILITY_PAGE_ZOOM); + return ContentFeatureMap.isEnabled(ContentFeatureList.ACCESSIBILITY_PAGE_ZOOM); } /**
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/AddExceptionPreference.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/AddExceptionPreference.java index c38cb184..c219f42 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/AddExceptionPreference.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/AddExceptionPreference.java
@@ -31,6 +31,7 @@ import org.chromium.components.browser_ui.styles.SemanticColorUtils; import org.chromium.components.browser_ui.widget.CheckBoxWithDescription; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.ui.KeyboardVisibilityDelegate; import org.chromium.ui.text.EmptyTextWatcher; @@ -133,7 +134,7 @@ } else if (mCategory.getType() == SiteSettingsCategory.Type.REQUEST_DESKTOP_SITE) { // Default to domain level setting for Request Desktop Site. checkBox.setChecked(true); - if (ContentFeatureList.getFieldTrialParamByFeatureAsBoolean( + if (ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsBoolean( ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS, PARAM_SUBDOMAIN_SETTINGS, true)) { checkBox.setVisibility(View.VISIBLE);
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleCategorySettings.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleCategorySettings.java index c30564bd..55326ff 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleCategorySettings.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleCategorySettings.java
@@ -74,6 +74,7 @@ import org.chromium.components.user_prefs.UserPrefs; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.ui.modaldialog.DialogDismissalCause; import org.chromium.ui.modaldialog.ModalDialogManager; import org.chromium.ui.modaldialog.ModalDialogManager.ModalDialogType; @@ -899,7 +900,7 @@ !WebsitePreferenceBridge.isCategoryEnabled(browserContextHandle, type); break; case SiteSettingsCategory.Type.REQUEST_DESKTOP_SITE: - allowSpecifyingExceptions = ContentFeatureList.isEnabled( + allowSpecifyingExceptions = ContentFeatureMap.isEnabled( ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS); break; case SiteSettingsCategory.Type.THIRD_PARTY_COOKIES: @@ -1221,8 +1222,7 @@ // Configure/hide the desktop site secondary controls, as needed. if (mCategory.getType() == SiteSettingsCategory.Type.REQUEST_DESKTOP_SITE - && ContentFeatureList.isEnabled( - ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { + && ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { mDesktopSitePeripheralPref.setOnPreferenceChangeListener(this); mDesktopSiteDisplayPref.setOnPreferenceChangeListener(this); updateDesktopSiteSecondaryControls(); @@ -1390,7 +1390,7 @@ // TODO(crbug.com/1343640): Looking at a different class setup for SingleCategorySettings that // allows category specific logic to live in separate files. private void updateDesktopSiteSecondaryControls() { - if (!ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { + if (!ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_ADDITIONS)) { return; } @@ -1515,7 +1515,7 @@ return false; } if (mCategory.getType() == SiteSettingsCategory.Type.REQUEST_DESKTOP_SITE - && !ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) + && !ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) && SiteSettingsFeatureList.isEnabled( SiteSettingsFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS_DOWNGRADE)) { return false;
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleWebsiteSettings.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleWebsiteSettings.java index 04c0b56c..f9364005 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleWebsiteSettings.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SingleWebsiteSettings.java
@@ -41,6 +41,7 @@ import org.chromium.components.embedder_support.util.Origin; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import java.util.Collection; import java.util.HashMap; @@ -1104,7 +1105,7 @@ private void setUpDesktopSitePreference(Preference preference) { // Skip adding the desktop site preference if RDS exceptions support is removed. - if (!ContentFeatureList.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) + if (!ContentFeatureMap.isEnabled(ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS) && SiteSettingsFeatureList.isEnabled( SiteSettingsFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS_DOWNGRADE)) { return;
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SiteSettingsUtil.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SiteSettingsUtil.java index 3090b6a..cc7b474 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SiteSettingsUtil.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/SiteSettingsUtil.java
@@ -12,6 +12,7 @@ import org.chromium.components.content_settings.ContentSettingsType; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; /** * Util class for site settings UI. @@ -68,7 +69,7 @@ for (@ContentSettingsType int setting : CHOOSER_PERMISSIONS) { for (@ContentSettingsType int type : types) { if (type == ContentSettingsType.BLUETOOTH_CHOOSER_DATA - && !ContentFeatureList.isEnabled( + && !ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND)) { continue; }
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePermissionsFetcher.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePermissionsFetcher.java index df9c411..30dba49 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePermissionsFetcher.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePermissionsFetcher.java
@@ -17,6 +17,7 @@ import org.chromium.components.content_settings.ContentSettingsType; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.common.ContentSwitches; import java.util.ArrayList; @@ -291,7 +292,7 @@ // Remove this check after the flag is removed. if (contentSettingsType == ContentSettingsType.NFC - && !ContentFeatureList.isEnabled(ContentFeatureList.WEB_NFC)) { + && !ContentFeatureMap.isEnabled(ContentFeatureList.WEB_NFC)) { return; } @@ -303,7 +304,7 @@ // list of permitted Bluetooth devices that each site can connect to. // Remove this check after the flag is removed. if (contentSettingsType == ContentSettingsType.BLUETOOTH_GUARD - && !ContentFeatureList.isEnabled( + && !ContentFeatureMap.isEnabled( ContentFeatureList.WEB_BLUETOOTH_NEW_PERMISSIONS_BACKEND)) { return; }
diff --git a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePreference.java b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePreference.java index a74f20b..9ed179fd 100644 --- a/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePreference.java +++ b/components/browser_ui/site_settings/android/java/src/org/chromium/components/browser_ui/site_settings/WebsitePreference.java
@@ -20,6 +20,7 @@ import org.chromium.components.browser_ui.settings.FaviconViewUtils; import org.chromium.components.embedder_support.util.UrlUtilities; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.url.GURL; /** @@ -102,7 +103,7 @@ if (mSite.isEmbargoed(mCategory.getContentSettingsType())) { setSummary(getContext().getString(R.string.automatically_blocked)); } else if (mCategory.getType() == SiteSettingsCategory.Type.REQUEST_DESKTOP_SITE - && ContentFeatureList.getFieldTrialParamByFeatureAsBoolean( + && ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsBoolean( ContentFeatureList.REQUEST_DESKTOP_SITE_EXCEPTIONS, PARAM_SUBDOMAIN_SETTINGS, true) && mSite.getAddress().getIsAnySubdomainPattern()) {
diff --git a/components/cast_streaming/browser/cast_streaming_session.cc b/components/cast_streaming/browser/cast_streaming_session.cc index e2d21d44e..e84628d5 100644 --- a/components/cast_streaming/browser/cast_streaming_session.cc +++ b/components/cast_streaming/browser/cast_streaming_session.cc
@@ -76,7 +76,7 @@ ReceiverSession::MessagePortProvider message_port_provider, scoped_refptr<base::SequencedTaskRunner> task_runner) : task_runner_(task_runner), - environment_(&openscreen::Clock::now, &task_runner_), + environment_(&openscreen::Clock::now, task_runner_), cast_message_port_converter_(CastMessagePortConverter::Create( std::move(message_port_provider), base::BindOnce(
diff --git a/components/cast_streaming/test/cast_streaming_test_sender.cc b/components/cast_streaming/test/cast_streaming_test_sender.cc index 0e34c21..72ff11f2 100644 --- a/components/cast_streaming/test/cast_streaming_test_sender.cc +++ b/components/cast_streaming/test/cast_streaming_test_sender.cc
@@ -103,7 +103,7 @@ CastStreamingTestSender::CastStreamingTestSender() : task_runner_(base::SequencedTaskRunner::GetCurrentDefault()), environment_(&openscreen::Clock::now, - &task_runner_, + task_runner_, openscreen::IPEndpoint::kAnyV4()) {} CastStreamingTestSender::~CastStreamingTestSender() = default;
diff --git a/components/client_hints/browser/client_hints.cc b/components/client_hints/browser/client_hints.cc index e351573..ae23f55 100644 --- a/components/client_hints/browser/client_hints.cc +++ b/components/client_hints/browser/client_hints.cc
@@ -198,10 +198,11 @@ // TODO(tbansal): crbug.com/735518. Disable updates to client hints settings // when cookies are disabled for |primary_origin|. + content_settings::ContentSettingConstraints constraints; + constraints.set_session_model(content_settings::SessionModel::Durable); settings_map_->SetWebsiteSettingDefaultScope( primary_url, GURL(), ContentSettingsType::CLIENT_HINTS, - base::Value(std::move(client_hints_dictionary)), - {base::Time(), content_settings::SessionModel::Durable}); + base::Value(std::move(client_hints_dictionary)), constraints); network::LogClientHintsPersistenceMetrics(persistence_started, client_hints.size()); }
diff --git a/components/content_settings/core/browser/content_settings_pref_provider.cc b/components/content_settings/core/browser/content_settings_pref_provider.cc index 22fb336..4b6085802 100644 --- a/components/content_settings/core/browser/content_settings_pref_provider.cc +++ b/components/content_settings/core/browser/content_settings_pref_provider.cc
@@ -208,20 +208,20 @@ // Last visit timestamps should only be tracked for ContentSettings that are // "ASK" by default. - DCHECK(!constraints.track_last_visit_for_autoexpiration || + DCHECK(!constraints.track_last_visit_for_autoexpiration() || content_settings::CanTrackLastVisit(content_type)); // Last visit timestamps can only be tracked for host-specific pattern. - DCHECK(!constraints.track_last_visit_for_autoexpiration || + DCHECK(!constraints.track_last_visit_for_autoexpiration() || !primary_pattern.GetHost().empty()); - base::Time last_visited = constraints.track_last_visit_for_autoexpiration + base::Time last_visited = constraints.track_last_visit_for_autoexpiration() ? GetCoarseVisitedTime(clock_->Now()) : base::Time(); // If SessionModel is OneTime, we know for sure that a one time permission // has been set by the One Time Provider, therefore we reset a potentially // existing Allow Always setting. - if (constraints.session_model == SessionModel::OneTime) { + if (constraints.session_model() == SessionModel::OneTime) { DCHECK(content_type == ContentSettingsType::GEOLOCATION || content_type == ContentSettingsType::MEDIASTREAM_MIC || content_type == ContentSettingsType::MEDIASTREAM_CAMERA); @@ -233,8 +233,8 @@ std::move(in_value), {.last_modified = modified_time, .last_visited = last_visited, - .expiration = constraints.expiration, - .session_model = constraints.session_model}); + .expiration = constraints.expiration(), + .session_model = constraints.session_model()}); return true; }
diff --git a/components/content_settings/core/browser/content_settings_utils.cc b/components/content_settings/core/browser/content_settings_utils.cc index 2c82cfb7..3dc1608 100644 --- a/components/content_settings/core/browser/content_settings_utils.cc +++ b/components/content_settings/core/browser/content_settings_utils.cc
@@ -172,14 +172,7 @@ // as they are only bounded by time and can persist through multiple browser // sessions. bool IsConstraintPersistent(const ContentSettingConstraints& constraints) { - return constraints.session_model == SessionModel::Durable; -} - -// Convenience helper to calculate the expiration time of a constraint given a -// desired |duration| -base::Time GetConstraintExpiration(const base::TimeDelta duration) { - DCHECK(!duration.is_zero()); - return base::Time::Now() + duration; + return constraints.session_model() == SessionModel::Durable; } bool CanTrackLastVisit(ContentSettingsType type) {
diff --git a/components/content_settings/core/browser/content_settings_utils.h b/components/content_settings/core/browser/content_settings_utils.h index 4aef99d..978fcb1b 100644 --- a/components/content_settings/core/browser/content_settings_utils.h +++ b/components/content_settings/core/browser/content_settings_utils.h
@@ -70,9 +70,6 @@ // Returns whether or not the supplied constraint should be persistently stored. bool IsConstraintPersistent(const ContentSettingConstraints& constraints); -// Returns the expiration time for a supplied |duration|. -base::Time GetConstraintExpiration(const base::TimeDelta duration); - // Returns whether the given type supports tracking last_visit timestamps. bool CanTrackLastVisit(ContentSettingsType type);
diff --git a/components/content_settings/core/browser/cookie_settings.cc b/components/content_settings/core/browser/cookie_settings.cc index 0dc5f12..383fdd3 100644 --- a/components/content_settings/core/browser/cookie_settings.cc +++ b/components/content_settings/core/browser/cookie_settings.cc
@@ -92,11 +92,10 @@ void CookieSettings::SetCookieSettingForUserBypass( const GURL& first_party_url) { - base::TimeDelta expiration = - content_settings::features::kUserBypassUIExceptionExpiration.Get(); - base::Time expiry_time = - expiration.is_zero() ? base::Time() : GetConstraintExpiration(expiration); - ContentSettingConstraints constraints = {expiry_time, SessionModel::Durable}; + ContentSettingConstraints constraints; + constraints.set_lifetime( + content_settings::features::kUserBypassUIExceptionExpiration.Get()); + constraints.set_session_model(SessionModel::Durable); host_content_settings_map_->SetContentSettingCustomScope( ContentSettingsPattern::Wildcard(),
diff --git a/components/content_settings/core/browser/cookie_settings_unittest.cc b/components/content_settings/core/browser/cookie_settings_unittest.cc index e9a774e..9442ffd9 100644 --- a/components/content_settings/core/browser/cookie_settings_unittest.cc +++ b/components/content_settings/core/browser/cookie_settings_unittest.cc
@@ -1018,12 +1018,14 @@ prefs_.SetInteger(prefs::kCookieControlsMode, static_cast<int>(CookieControlsMode::kBlockThirdParty)); + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(100)); + constraints.set_session_model(SessionModel::UserSession); + settings_map_->SetContentSettingCustomScope( ContentSettingsPattern::FromURLNoWildcard(url), ContentSettingsPattern::FromURLNoWildcard(top_level_url), - ContentSettingsType::STORAGE_ACCESS, CONTENT_SETTING_ALLOW, - {content_settings::GetConstraintExpiration(base::Seconds(100)), - SessionModel::UserSession}); + ContentSettingsType::STORAGE_ACCESS, CONTENT_SETTING_ALLOW, constraints); // When requesting our setting for the url/top-level combination our grant is // for access should be allowed iff SAA is enabled. For any other domain pairs
diff --git a/components/content_settings/core/common/BUILD.gn b/components/content_settings/core/common/BUILD.gn index da0bcd6..64694c8b 100644 --- a/components/content_settings/core/common/BUILD.gn +++ b/components/content_settings/core/common/BUILD.gn
@@ -10,6 +10,7 @@ sources = [ "content_settings.cc", "content_settings.h", + "content_settings_constraints.cc", "content_settings_constraints.h", "content_settings_metadata.h", "content_settings_param_traits.cc", @@ -53,6 +54,7 @@ source_set("unit_tests") { testonly = true sources = [ + "content_settings_constraints_unittest.cc", "content_settings_mojom_traits_unittest.cc", "content_settings_pattern_parser_unittest.cc", "content_settings_pattern_unittest.cc",
diff --git a/components/content_settings/core/common/content_settings_constraints.cc b/components/content_settings/core/common/content_settings_constraints.cc new file mode 100644 index 0000000..a5468986 --- /dev/null +++ b/components/content_settings/core/common/content_settings_constraints.cc
@@ -0,0 +1,39 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/content_settings/core/common/content_settings_constraints.h" + +namespace content_settings { + +ContentSettingConstraints::ContentSettingConstraints() + : ContentSettingConstraints(base::Time::Now()) {} + +ContentSettingConstraints::ContentSettingConstraints(base::Time now) + : created_at_(now) {} + +ContentSettingConstraints::ContentSettingConstraints( + ContentSettingConstraints&& other) = default; +ContentSettingConstraints::ContentSettingConstraints( + const ContentSettingConstraints& other) = default; +ContentSettingConstraints& ContentSettingConstraints::operator=( + ContentSettingConstraints&& other) = default; +ContentSettingConstraints& ContentSettingConstraints::operator=( + const ContentSettingConstraints& other) = default; + +ContentSettingConstraints::~ContentSettingConstraints() = default; + +bool ContentSettingConstraints::operator==( + const ContentSettingConstraints& other) const { + return std::tuple(expiration(), session_model_, + track_last_visit_for_autoexpiration_) == + std::tuple(other.expiration(), other.session_model_, + other.track_last_visit_for_autoexpiration_); +} + +bool ContentSettingConstraints::operator!=( + const ContentSettingConstraints& other) const { + return !(*this == other); +} + +} // namespace content_settings
diff --git a/components/content_settings/core/common/content_settings_constraints.h b/components/content_settings/core/common/content_settings_constraints.h index be6f8f7..66fc379b 100644 --- a/components/content_settings/core/common/content_settings_constraints.h +++ b/components/content_settings/core/common/content_settings_constraints.h
@@ -32,18 +32,75 @@ }; // Constraints to be applied when setting a content setting. -struct ContentSettingConstraints { - // Specification of an |expiration| provides an upper bound on the time a - // setting will remain valid. If 0 is specified for |expiration| no time limit - // will apply. - base::Time expiration; +class ContentSettingConstraints { + public: + // Creates a default set of constraints. The constraints do not expire, use + // the Durable session model, and do not track the last visit for + // autoexpiration. + ContentSettingConstraints(); + + // Creates a default set of constraints, using `now` as the "created_at" time. + explicit ContentSettingConstraints(base::Time now); + + ContentSettingConstraints(ContentSettingConstraints&& other); + ContentSettingConstraints(const ContentSettingConstraints& other); + ContentSettingConstraints& operator=(ContentSettingConstraints&& other); + ContentSettingConstraints& operator=(const ContentSettingConstraints& other); + + ~ContentSettingConstraints(); + + bool operator==(const ContentSettingConstraints& other) const; + bool operator!=(const ContentSettingConstraints& other) const; + + base::Time expiration() const { + if (lifetime_.is_zero()) { + return base::Time(); + } + return created_at_ + lifetime_; + } + + void set_lifetime(base::TimeDelta lifetime) { lifetime_ = lifetime; } + + SessionModel session_model() const { return session_model_; } + void set_session_model(SessionModel model) { session_model_ = model; } + + bool track_last_visit_for_autoexpiration() const { + return track_last_visit_for_autoexpiration_; + } + void set_track_last_visit_for_autoexpiration(bool track) { + track_last_visit_for_autoexpiration_ = track; + } + + // Helper for callers that only know their intended expiration, rather than + // the intended lifetime. Setting the lifetime directly (without using this + // helper) should be preferred instead. + base::TimeDelta DeltaFromCreationTime(base::Time exp) const { + return exp - created_at_; + } + + private: + // Tracks the base::Time that this instance was constructed. Copies and moves + // reuse this time. + base::Time created_at_; + + // Specification of the lifetime of the setting created with these + // constraints. This controls when the setting expires. + // + // If the lifetime is zero, then the setting does not expire. + // + // TODO(https://crbug.com/1450356): created_at_ and lifetime_ need to be + // persisted (likely in/by content_settings::RuleMetaData) and recreated in + // order be useful. Otherwise, everything still operates in terms of + // expirations. + base::TimeDelta lifetime_ = base::TimeDelta(); + // Used to specify the lifetime model that should be used. - SessionModel session_model = SessionModel::Durable; + SessionModel session_model_ = SessionModel::Durable; // Set to true to keep track of the last visit to the origin of this // permission. // This is used for the Safety check permission module and unrelated to the - // "expiration" keyword above. - bool track_last_visit_for_autoexpiration = false; + // "lifetime" keyword above. + bool track_last_visit_for_autoexpiration_ = false; }; } // namespace content_settings
diff --git a/components/content_settings/core/common/content_settings_constraints_unittest.cc b/components/content_settings/core/common/content_settings_constraints_unittest.cc new file mode 100644 index 0000000..1af1a704 --- /dev/null +++ b/components/content_settings/core/common/content_settings_constraints_unittest.cc
@@ -0,0 +1,64 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "components/content_settings/core/common/content_settings_constraints.h" + +#include "base/test/task_environment.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace content_settings { +namespace {} // namespace + +class ContentSettingConstraintsTest : public testing::Test { + public: + base::test::TaskEnvironment& env() { return env_; } + + private: + base::test::TaskEnvironment env_{ + base::test::TaskEnvironment::TimeSource::MOCK_TIME}; +}; + +TEST_F(ContentSettingConstraintsTest, CopyCtor) { + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(1234)); + constraints.set_session_model(SessionModel::UserSession); + constraints.set_track_last_visit_for_autoexpiration(true); + + ContentSettingConstraints copy = constraints; + EXPECT_EQ(constraints, copy); + + ContentSettingConstraints different = constraints; + different.set_lifetime(base::Days(1)); + EXPECT_NE(constraints, different); + + ContentSettingConstraints old_constraints; + env().FastForwardBy(base::Seconds(1)); + ContentSettingConstraints new_constraints; + // The creation time differs, but there's no lifetime, so these are + // equivalent. + EXPECT_EQ(old_constraints, new_constraints); + + old_constraints.set_lifetime(base::Days(1)); + new_constraints.set_lifetime(base::Days(1)); + // Now there is a lifetime associated with the constraint, so the different + // creation time makes a difference. + EXPECT_NE(old_constraints, new_constraints); +} + +TEST_F(ContentSettingConstraintsTest, MoveCtor) { + ContentSettingConstraints constraints; + constraints.set_lifetime(base::Seconds(1234)); + constraints.set_session_model(SessionModel::UserSession); + constraints.set_track_last_visit_for_autoexpiration(true); + + ContentSettingConstraints copy = constraints; + ContentSettingConstraints moved = std::move(constraints); + EXPECT_EQ(copy, moved); + + moved.set_lifetime(base::Days(1)); + EXPECT_NE(copy, moved); +} + +} // namespace content_settings
diff --git a/components/content_settings/core/test/content_settings_mock_provider.cc b/components/content_settings/core/test/content_settings_mock_provider.cc index de184c4..49c94e4 100644 --- a/components/content_settings/core/test/content_settings_mock_provider.cc +++ b/components/content_settings/core/test/content_settings_mock_provider.cc
@@ -32,8 +32,8 @@ if (!in_value.is_none()) { value_map_.SetValue(requesting_url_pattern, embedding_url_pattern, content_type, std::move(in_value), - {.expiration = constraints.expiration, - .session_model = constraints.session_model}); + {.expiration = constraints.expiration(), + .session_model = constraints.session_model()}); } else { base::Value value(std::move(in_value)); value_map_.DeleteValue(requesting_url_pattern, embedding_url_pattern,
diff --git a/components/cronet/tools/cr_cronet.py b/components/cronet/tools/cr_cronet.py index 07fc0add..ff809d3 100755 --- a/components/cronet/tools/cr_cronet.py +++ b/components/cronet/tools/cr_cronet.py
@@ -84,9 +84,9 @@ def stack(out_dir): - return run_shell( - 'adb logcat -d | CHROMIUM_OUTPUT_DIR=' + shlex.quote(out_dir) + - ' third_party/android_platform/development/scripts/stack') + return run_shell('adb logcat -d | CHROMIUM_OUTPUT_DIR=' + + shlex.quote(out_dir) + + ' third_party/android_platform/development/scripts/stack') def use_goma():
diff --git a/components/flags_ui/resources/BUILD.gn b/components/flags_ui/resources/BUILD.gn index ae62d1c..4a74599 100644 --- a/components/flags_ui/resources/BUILD.gn +++ b/components/flags_ui/resources/BUILD.gn
@@ -12,6 +12,9 @@ "flags.html", ] + web_component_files = [ "experiment.ts" ] + html_to_wrapper_template = "native" + non_web_component_files = [ "flags.ts" ] ts_definitions = [ "//tools/typescript/definitions/chrome_send.d.ts" ]
diff --git a/components/flags_ui/resources/experiment.html b/components/flags_ui/resources/experiment.html new file mode 100644 index 0000000..2b12f15 --- /dev/null +++ b/components/flags_ui/resources/experiment.html
@@ -0,0 +1,194 @@ +<style> + .experiment { + color: var(--secondary-color); + line-height: 1.45; + width: 100%; + } + + .experiment .flex-container { + display: flex; + gap: 8px; + padding: 0.8em 4px 16px 0; + } + + .experiment .flex-container .flex:first-child { + max-width: 540px; + } + + .experiment p { + margin: .2em 0; + } + + .experiment-name { + color: var(--primary-color); + display: inline-block; + font-size: .8125rem; + font-weight: 500; + line-height: 1.5; + margin: 0; + padding: 0; + } + + .experiment-switched .experiment-name::before { + --end-margin: 4px; + --width: 12px; + color: var(--interactive-color); + content: '•'; + display: inline-block; + font-size: 40px; + line-height: 0; + margin-inline-end: var(--end-margin); + margin-inline-start: calc(-1 * var(--end-margin) - var(--width)); + vertical-align: middle; + width: var(--width); + } + + + .match, + .referenced h2 { + /* This UI is intentionally the same in light and dark mode. */ + background: yellow; + color: var(--google-grey-900); + } + + .flex { + align-self: center; + flex: 1 1 auto; + } + + .experiment-actions { + align-self: center; + flex: 0 0 auto; + padding-inline-start: 5px; + text-align: right; /* csschecker-disable-line left-right */ + width: 150px; + } + + .experiment-origin-list-value { + resize: none; + } + + select { + background: white; + border: 1px solid var(--link-color); + color: var(--link-color); + font-size: .8125rem; + height: 1.625rem; + letter-spacing: .01em; + max-width: 150px; + text-align-last: center; + width: 100%; + } + + @media (prefers-color-scheme: dark) { + select { + background: var(--input-background); + border: none; + color: var(--primary-color); + } + + option { + background: var(--toolbar-color); + } + } + + textarea { + background: var(--input-background); + border-radius: 3px; + box-sizing: border-box; + color: inherit; + font-size: .8125rem; + margin: 0; + min-height: 3em; + padding: 8px; + width: 100%; + } + + @media (prefers-color-scheme: dark) { + textarea { + border: 1px solid var(--secondary-color); + } + } + + .experiment-switched select { + background: var(--link-color); + color: white; + } + + @media (prefers-color-scheme: dark) { + .experiment-switched select { + color: var(--google-grey-900); + } + + .experiment-switched option { + background: inherit; + color: inherit; + } + } + + .experiment-no-match { + display: none; + position: absolute; + } + + .permalink { + color: var(--secondary-color); + } + + .hidden { + display: none; + } + + @media (max-width: 480px) { + .experiment { + border-bottom: 1px solid var(--separator-color); + } + + .experiment-name { + cursor: pointer; + } + + .experiment .flex-container { + flex-flow: column; + } + + .experiment .flex { + width: 100%; + } + + .experiment .experiment-actions { + max-width: 100%; + padding-top: 12px; + text-align: left; /* csschecker-disable-line left-right */ + width: 100%; + } + + /* Hide the overflow description text */ + .experiment p { + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + width: 100%; + } + } + + @media (max-width: 732px) { + .experiment-switched .experiment-name::before { + margin-inline-start: 0; + } + } +</style> + +<div class="experiment"> + <div class="experiment-default flex-container"> + <div class="flex"> + <h2 class="experiment-name"></h2> + <p part="description"> + <span class="description"></span> – <span class="platforms"></span> + </p> + <div class="textarea-container"></div> + <a class="permalink" tabindex="7"></a> + </div> + <div class="flex experiment-actions"></div> + </div> +</div>
diff --git a/components/flags_ui/resources/experiment.ts b/components/flags_ui/resources/experiment.ts new file mode 100644 index 0000000..b85f6ff5 --- /dev/null +++ b/components/flags_ui/resources/experiment.ts
@@ -0,0 +1,117 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +import {assert} from 'chrome://resources/js/assert_ts.js'; +import {CustomElement} from 'chrome://resources/js/custom_element.js'; +import {loadTimeData} from 'chrome://resources/js/load_time_data.js'; + +import {getTemplate} from './experiment.html.js'; +import {Feature} from './flags.js'; + +export class FlagsExperimentElement extends CustomElement { + static override get template() { + return getTemplate(); + } + + set data(feature: Feature) { + const container = this.getRequiredElement('.experiment'); + container.id = feature.internal_name; + + const experimentDefault = this.getRequiredElement('.experiment-default'); + experimentDefault.classList.toggle( + 'experiment-default', feature.is_default); + experimentDefault.classList.toggle( + 'experiment-switched', !feature.is_default); + + const experimentName = this.getRequiredElement('.experiment-name'); + experimentName.id = `${feature.internal_name}_name`; + experimentName.title = + feature.is_default ? '' : loadTimeData.getString('experiment-enabled'); + experimentName.textContent = feature.name; + + const description = this.getRequiredElement('.description'); + description.textContent = feature.description; + const platforms = this.getRequiredElement('.platforms'); + platforms.textContent = feature.supported_platforms.join(', '); + + if (feature.origin_list_value !== undefined) { + const textarea = document.createElement('textarea'); + textarea.dataset['internalName'] = feature.internal_name; + textarea.classList.add('experiment-origin-list-value'); + textarea.value = feature.origin_list_value; + textarea.setAttribute('aria-labelledby', `${feature.internal_name}_name`); + this.getRequiredElement('.textarea-container').appendChild(textarea); + } + + const permalink = this.getRequiredElement<HTMLAnchorElement>('.permalink'); + permalink.href = `#${feature.internal_name}`; + permalink.textContent = `#${feature.internal_name}`; + + if (this.hasAttribute('unsupported')) { + this.getRequiredElement('.experiment-actions').textContent = + loadTimeData.getString('not-available-platform'); + return; + } + + if (feature.options && feature.options.length > 0) { + const experimentSelect = document.createElement('select'); + experimentSelect.dataset['internalName'] = feature.internal_name; + experimentSelect.classList.add('experiment-select'); + experimentSelect.disabled = feature.enabled === false; + experimentSelect.setAttribute( + 'aria-labelledby', `${feature.internal_name}_name`); + + experimentSelect.innerHTML = window.trustedTypes!.emptyHTML; + for (let i = 0; i < feature.options.length; i++) { + const option = feature.options[i]!; + const optionEl = document.createElement('option'); + optionEl.selected = option.selected; + optionEl.textContent = option.description; + experimentSelect.appendChild(optionEl); + } + + this.getRequiredElement('.experiment-actions') + .appendChild(experimentSelect); + return; + } + + assert(feature.options === undefined || feature.options.length === 0); + const experimentEnableDisable = document.createElement('select'); + experimentEnableDisable.dataset['internalName'] = feature.internal_name; + experimentEnableDisable.classList.add('experiment-enable-disable'); + experimentEnableDisable.setAttribute( + 'aria-labelledby', `${feature.internal_name}_name`); + + const disabledOptionEl = document.createElement('option'); + disabledOptionEl.value = 'disabled'; + disabledOptionEl.selected = !feature.enabled; + disabledOptionEl.textContent = loadTimeData.getString('disabled'); + disabledOptionEl.dataset['default'] = feature.is_default ? + (!feature.enabled ? '1' : '0') : + !feature.enabled ? '0' : + '1'; + experimentEnableDisable.appendChild(disabledOptionEl); + + const enabledOptionEl = document.createElement('option'); + enabledOptionEl.value = 'enabled'; + enabledOptionEl.selected = feature.enabled; + enabledOptionEl.textContent = loadTimeData.getString('enabled'); + enabledOptionEl.dataset['default'] = feature.is_default ? + (feature.enabled ? '1' : '0') : + feature.enabled ? '0' : + '1'; + experimentEnableDisable.appendChild(enabledOptionEl); + + this.getRequiredElement('.experiment-actions') + .appendChild(experimentEnableDisable); + } +} + +declare global { + interface HTMLElementTagNameMap { + 'flags-experiment': FlagsExperimentElement; + } +} + +customElements.define('flags-experiment', FlagsExperimentElement);
diff --git a/components/flags_ui/resources/flags.css b/components/flags_ui/resources/flags.css index 0010ecba..e2879f3 100644 --- a/components/flags_ui/resources/flags.css +++ b/components/flags_ui/resources/flags.css
@@ -283,135 +283,6 @@ } } -.experiment { - color: var(--secondary-color); - line-height: 1.45; - width: 100%; -} - -.experiment .flex-container { - padding: 0.8em 4px 16px 0; -} - -.experiment .flex-container .flex:first-child { - box-sizing: border-box; - max-width: 540px; - padding-inline-end: 8px; -} - -.experiment p { - margin: .2em 0; -} - -.experiment-name { - color: var(--primary-color); - display: inline-block; - font-size: .8125rem; - font-weight: 500; - line-height: 1.5; - margin: 0; - padding: 0; -} - -.experiment-switched .experiment-name::before { - --end-margin: 4px; - --diameter: 10px; - background-color: var(--interactive-color); - border-radius: 50%; - content: ''; - display: inline-block; - height: var(--diameter); - margin-inline-end: var(--end-margin); - margin-inline-start: calc(-1 * var(--end-margin) - var(--diameter)); - width: var(--diameter); -} - - -.match, -.referenced h2 { - /* This UI is intentionally the same in light and dark mode. */ - background: yellow; - color: var(--google-grey-900); -} - -.experiment-actions { - flex: 0 0 auto; - padding-inline-start: 5px; - text-align: right; /* csschecker-disable-line left-right */ - width: 150px; -} - -.experiment-origin-list-value { - resize: none; -} - -select { - background: white; - border: 1px solid var(--link-color); - color: var(--link-color); - font-size: .8125rem; - height: 1.625rem; - letter-spacing: .01em; - max-width: 150px; - text-align-last: center; - width: 100%; -} - -@media (prefers-color-scheme: dark) { - select { - background: var(--input-background); - border: none; - color: var(--primary-color); - } - - option { - background: var(--toolbar-color); - } -} - -textarea { - background: var(--input-background); - border-radius: 3px; - box-sizing: border-box; - color: inherit; - font-size: .8125rem; - margin: 0; - min-height: 3em; - padding: 8px; - width: 100%; -} - -@media (prefers-color-scheme: dark) { - textarea { - border: 1px solid var(--secondary-color); - } -} - -.experiment-switched select { - background: var(--link-color); - color: white; -} - -@media (prefers-color-scheme: dark) { - .experiment-switched select { - color: var(--google-grey-900); - } - - .experiment-switched option { - background: inherit; - color: inherit; - } -} - -.experiment-no-match { - display: none; - position: absolute; -} - -.permalink { - color: var(--secondary-color); -} - .tabs { display: flex; width: 100%; @@ -559,29 +430,6 @@ } @media (max-width: 480px) { - .experiment { - border-bottom: 1px solid var(--separator-color); - } - - .experiment-name { - cursor: pointer; - } - - .experiment .flex-container { - flex-flow: column; - } - - .experiment .flex { - width: 100%; - } - - .experiment .experiment-actions { - max-width: 100%; - padding-top: 12px; - text-align: left; /* csschecker-disable-line left-right */ - width: 100%; - } - #flagsTemplate > .flex-container:first-child:not('.version') { flex-direction: column; text-align: left; /* csschecker-disable-line left-right */ @@ -604,26 +452,13 @@ padding: 4px; } - /* Hide the overflow description text */ - .experiment p { - overflow: hidden; - text-overflow: ellipsis; - white-space: nowrap; - width: 100%; - } - - .searching .experiment p, - .experiment .expand p { + .searching flags-experiment::part(description) { overflow: visible; white-space: normal; } } @media (max-width: 732px) { - .experiment-switched .experiment-name::before { - margin-inline-start: 0; - } - #version, .blurb-warning { display: block;
diff --git a/components/flags_ui/resources/flags.html b/components/flags_ui/resources/flags.html index cdaa7c20..926d10e 100644 --- a/components/flags_ui/resources/flags.html +++ b/components/flags_ui/resources/flags.html
@@ -102,96 +102,13 @@ <div jsselect="supportedFeatures" jsvalues="id:internal_name; class: is_default ? 'hidden' : 'experiment'" jsdisplay="!is_default"> - <div class="experiment-default" - jsvalues="class: is_default ? 'experiment-default flex-container' - : 'experiment-switched flex-container'"> - <div class="flex"> - <h2 class="experiment-name" jscontent="name" - jsvalues="title: is_default ? '' : '$i18n{experiment-enabled}'; - id:internal_name + '_name'"></h2> - <p> - <span jsvalues=".textContent:description"></span> – - <span class="platforms" jscontent="supported_platforms.join(', ')"></span> - </p> - <div jsdisplay="origin_list_value!==null"> - <textarea class="experiment-origin-list-value" - jsvalues=".internal_name:internal_name; .value:origin_list_value; - aria-labelledby:internal_name + '_name'" - tabindex="7"></textarea> - </div> - <a class="permalink" jsvalues="href: '#' + internal_name" - jscontent="'#' + internal_name" tabindex="7"></a> - </div> - <div class="flex experiment-actions"> - <div jsdisplay="options && options.length > 0"> - <select class="experiment-select" tabindex="7" - jsvalues=".internal_name:internal_name;.disabled:!enabled; - aria-labelledby:internal_name + '_name'"> - <option jsvalues=".selected:selected;" - jsselect="options" - jscontent="description"> - </option> - </select> - </div> - <select class="experiment-enable-disable" tabindex="7" - jsdisplay="enabled !== undefined" - jsvalues=".internal_name:internal_name; - aria-labelledby:internal_name + '_name'"> - <option jsvalues=".selected:!enabled; data-default: enabled ? 1 : 0" - value="disabled">$i18n{disabled}</option> - <option jsvalues=".selected:enabled; data-default: !enabled ? 1 : 0" - value="enabled">$i18n{enabled}</option> - </select> - </div> - </div> + <flags-experiment jsvalues=".data:$this"></flags-experiment> </div> <!-- Experiments with default settings. --> <div class="experiment" jsselect="supportedFeatures" jsvalues="id:internal_name; class: is_default ? 'experiment' : 'hidden'" jsdisplay="is_default"> - <div class="experiment-default" - jsvalues="class: is_default ? 'experiment-default flex-container' - : 'experiment-switched flex-container'"> - <div class="flex"> - <h2 class="experiment-name" jscontent="name" - jsvalues="title: is_default ? '' : '$i18n{experiment-enabled}'; - id:internal_name + '_name'"></h2> - <p> - <span jsvalues=".textContent:description"></span> – - <span class="platforms" jscontent="supported_platforms.join(', ')"></span> - </p> - <div jsdisplay="origin_list_value!==null"> - <textarea class="experiment-origin-list-value" - jsvalues=".internal_name:internal_name; .value:origin_list_value; - aria-labelledby:internal_name + '_name'" - tabindex="7"></textarea> - </div> - <a class="permalink" jsvalues="href: '#' + internal_name" - jscontent="'#' + internal_name" tabindex="7"></a> - </div> - <div class="flex experiment-actions"> - <div jsdisplay="options && options.length > 0"> - <select class="experiment-select" tabindex="7" - jsvalues=".internal_name:internal_name;.disabled:!enabled; - aria-labelledby:internal_name + '_name'"> - <option jsvalues=".selected:selected;" - jsselect="options" - jscontent="description"> - </option> - </select> - </div> - <!-- Represent enabled / disabled options in a drop down --> - <select class="experiment-enable-disable" tabindex="7" - jsdisplay="enabled !== undefined" - jsvalues=".internal_name:internal_name; - aria-labelledby:internal_name + '_name'"> - <option jsvalues=".selected:!enabled; data-default:!enabled ? 1 : 0" - value="disabled">$i18n{disabled}</option> - <option jsvalues=".selected:enabled; data-default: enabled ? 1 : 0" - value="enabled">$i18n{enabled}</option> - </select> - </div> - </div> + <flags-experiment jsvalues=".data:$this"></flags-experiment> </div> <div class="no-match hidden" role="alert">$i18n{no-results}</div> </div> @@ -201,22 +118,7 @@ <div class="experiment" jsselect="unsupportedFeatures" jsvalues="id:internal_name"> - <div class="experiment-default flex-container" - jsvalues="class: is_default ? 'experiment-default flex-container' - : 'experiment-switched flex-container'"> - <div class="flex"> - <h2 class="experiment-name" - jscontent="name"></h2> - <p> - <span jsvalues=".textContent:description"></span> - <span class="platforms" jscontent="supported_platforms.join(', ')"></span> - </p> - <a class="permalink" - jsvalues="href: '#' + internal_name" - jscontent="'#' + internal_name" tabindex="9"></a> - </div> - <div class="flex experiment-actions">$i18n{not-available-platform}</div> - </div> + <flags-experiment jsvalues=".data:$this" unsupported></flags-experiment> </div> <div class="no-match hidden" role="alert"> $i18n{no-results}
diff --git a/components/flags_ui/resources/flags.ts b/components/flags_ui/resources/flags.ts index 5a02ecf4..eec1437 100644 --- a/components/flags_ui/resources/flags.ts +++ b/components/flags_ui/resources/flags.ts
@@ -8,6 +8,7 @@ import 'chrome://resources/js/jstemplate_compiled.js'; import './strings.m.js'; +import './experiment.js'; import {assert} from 'chrome://resources/js/assert_ts.js'; import {sendWithPromise} from 'chrome://resources/js/cr.js'; @@ -15,7 +16,9 @@ import {loadTimeData} from 'chrome://resources/js/load_time_data.js'; import {isIOS} from 'chrome://resources/js/platform.js'; import {PromiseResolver} from 'chrome://resources/js/promise_resolver.js'; -import {$, getRequiredElement} from 'chrome://resources/js/util_ts.js'; +import {$, getDeepActiveElement, getRequiredElement} from 'chrome://resources/js/util_ts.js'; + +import {FlagsExperimentElement} from './experiment.js'; let lastChanged: HTMLElement|null = null; let lastFocused: HTMLElement|null = null; @@ -30,12 +33,6 @@ window, {experimentalFeaturesReadyForTest: experimentalFeaturesResolver.promise}); -// Declare properties that are augmented on some HTMLElement instances by -// jstemplate. -interface WithExtras { - internal_name: string; -} - interface Tab { tabEl: HTMLElement; panelEl: HTMLElement; @@ -109,38 +106,31 @@ showRestartToast(experimentalFeaturesData.needsRestart); // Add handlers to dynamically created HTML elements. - let selectElements = - document.body.querySelectorAll<HTMLSelectElement&WithExtras>( - '.experiment-select'); - for (const element of selectElements) { - element.onchange = function() { - handleSelectExperimentalFeatureChoice(element, element.selectedIndex); - lastChanged = element; - return false; - }; - registerFocusEvents(element); - } - - selectElements = document.body.querySelectorAll<HTMLSelectElement&WithExtras>( - '.experiment-enable-disable'); - for (const element of selectElements) { - element.onchange = function() { - handleEnableExperimentalFeature( - element, element.options[element.selectedIndex]!.value == 'enabled'); - lastChanged = element; - return false; - }; - registerFocusEvents(element); - } - - const textAreaElements = - document.body.querySelectorAll<HTMLTextAreaElement&WithExtras>( - '.experiment-origin-list-value'); - for (const element of textAreaElements) { - element.onchange = function() { - handleSetOriginListFlag(element, element.value); - return false; - }; + const experiments = document.body.querySelectorAll('flags-experiment'); + for (const experiment of experiments) { + const select = + experiment.shadowRoot!.querySelector<HTMLSelectElement>('select'); + if (select) { + select.onchange = function() { + if (select.classList.contains('experiment-select')) { + handleSelectExperimentalFeatureChoice(select, select.selectedIndex); + } else { + handleEnableExperimentalFeature( + select, select.options[select.selectedIndex]!.value == 'enabled'); + } + lastChanged = select; + return false; + }; + registerFocusEvents(select); + } + const textarea = + experiment.shadowRoot!.querySelector<HTMLTextAreaElement>('textarea'); + if (textarea) { + textarea.onchange = function() { + handleSetOriginListFlag(textarea, textarea.value); + return false; + }; + } } assert(restartButton || isIOS); @@ -290,16 +280,17 @@ * `enabled` and `is_default` are only set if the feature is single valued. * `enabled` is true if the feature is currently enabled. * `is_default` is true if the feature is in its default state. - * `choices` is only set if the entry has multiple values. + * `options` is only set if the entry has multiple values. */ -interface Feature { +export interface Feature { internal_name: string; name: string; description: string; enabled: boolean; is_default: boolean; supported_platforms: string[]; - choices?: Array<{ + origin_list_value?: string; + options?: Array<{ internal_name: string, description: string, selected: boolean, @@ -356,11 +347,16 @@ * @param node The select node for the experiment being changed. * @param index The selected option index. */ -function experimentChangesUiUpdates( - node: HTMLSelectElement&WithExtras, index: number) { +function experimentChangesUiUpdates(node: HTMLSelectElement, index: number) { const selected = node.options[index]!; + const internalName = node.dataset['internalName']; + if (!internalName) { + return; + } const experimentContainerEl = - getRequiredElement(node.internal_name).firstElementChild!; + getRequiredElement(internalName) + .firstElementChild!.shadowRoot!.querySelector( + '.experiment-default, .experiment-switched')!; const isDefault = ('default' in selected.dataset && selected.dataset['default'] === '1') || (!('default' in selected.dataset) && index === 0); @@ -376,25 +372,26 @@ * @param enable Whether to enable or disable the experiment. */ function handleEnableExperimentalFeature( - node: HTMLSelectElement&WithExtras, enable: boolean) { + node: HTMLSelectElement, enable: boolean) { /* This function is an onchange handler, which can be invoked during page * restore - see https://crbug.com/1038638. */ - if (!node.internal_name) { + const internalName = node.dataset['internalName']; + if (!internalName) { return; } chrome.send( - 'enableExperimentalFeature', - [String(node.internal_name), String(enable)]); + 'enableExperimentalFeature', [String(internalName), String(enable)]); experimentChangesUiUpdates(node, enable ? 1 : 0); } -function handleSetOriginListFlag(node: HTMLElement&WithExtras, value: string) { +function handleSetOriginListFlag(node: HTMLElement, value: string) { /* This function is an onchange handler, which can be invoked during page * restore - see https://crbug.com/1038638. */ - if (!node.internal_name) { + const internalName = node.dataset['internalName']; + if (!internalName) { return; } - chrome.send('setOriginListFlag', [String(node.internal_name), value]); + chrome.send('setOriginListFlag', [String(internalName), value]); showRestartToast(true); } @@ -405,29 +402,30 @@ * @param index The index of the option that was selected. */ function handleSelectExperimentalFeatureChoice( - node: HTMLSelectElement&WithExtras, index: number) { + node: HTMLSelectElement, index: number) { /* This function is an onchange handler, which can be invoked during page * restore - see https://crbug.com/1038638. */ - if (!node.internal_name) { + const internalName = node.dataset['internalName']; + if (!internalName) { return; } chrome.send( 'enableExperimentalFeature', - [String(node.internal_name) + '@' + index, 'true']); + [String(internalName) + '@' + index, 'true']); experimentChangesUiUpdates(node, index); } /** Type for storing the elements which are searched on. */ interface SearchContent { - link: NodeListOf<HTMLElement>|null; - title: NodeListOf<HTMLElement>|null; - description: NodeListOf<HTMLElement>|null; + link: HTMLElement[]; + title: HTMLElement[]; + description: HTMLElement[]; } const emptySearchContent: SearchContent = Object.freeze({ - link: null, - title: null, - description: null, + link: [], + title: [], + description: [], }); // Delay in ms following a keypress, before a search is made. @@ -438,8 +436,10 @@ */ class FlagSearch { private experiments_: SearchContent = Object.assign({}, emptySearchContent); + // <if expr="not is_ios"> private unavailableExperiments_: SearchContent = Object.assign({}, emptySearchContent); + // </if> private searchIntervalId_: number|null = null; private searchBox_: HTMLInputElement; @@ -457,19 +457,11 @@ * collates the text elements used for string matching. */ init() { - this.experiments_.link = - document.body.querySelectorAll('#tab-content-available .permalink'); - this.experiments_.title = document.body.querySelectorAll( - '#tab-content-available .experiment-name'); - this.experiments_.description = - document.body.querySelectorAll('#tab-content-available p'); - - this.unavailableExperiments_.link = - document.body.querySelectorAll('#tab-content-unavailable .permalink'); - this.unavailableExperiments_.title = document.body.querySelectorAll( - '#tab-content-unavailable .experiment-name'); - this.unavailableExperiments_.description = - document.body.querySelectorAll('#tab-content-unavailable p'); + this.experiments_ = this.getSearchableElements('tab-content-available'); + // <if expr="not is_ios"> + this.unavailableExperiments_ = + this.getSearchableElements('tab-content-unavailable'); + // </if> if (!this.initialized) { this.searchBox_.addEventListener('input', this.debounceSearch.bind(this)); @@ -478,7 +470,8 @@ 'click', this.clearSearch.bind(this)); window.addEventListener('keyup', e => { - if (document.activeElement!.nodeName === 'TEXTAREA') { + // Check for an active textarea inside a <flags-experiment>. + if (getDeepActiveElement()!.nodeName === 'TEXTAREA') { return; } switch (e.key) { @@ -496,6 +489,23 @@ } } + getSearchableElements(tabId: string): SearchContent { + const content = Object.assign({}, emptySearchContent); + const experiments = document.body.querySelectorAll<FlagsExperimentElement>( + `#${tabId} flags-experiment`); + for (const experiment of experiments) { + const link = experiment.getRequiredElement('.permalink'); + const title = experiment.getRequiredElement('.experiment-name'); + const description = experiment.getRequiredElement('p'); + + content.link.push(link); + content.title.push(title); + content.description.push(description); + } + + return content; + } + /** * Clears a search showing all experiments. */ @@ -572,9 +582,6 @@ highlightAllMatches(searchContent: SearchContent, searchTerm: string): number { let matches = 0; - assert(searchContent.description); - assert(searchContent.link); - assert(searchContent.title); for (let i = 0, j = searchContent.link.length; i < j; i++) { if (this.highlightMatchInElement(searchTerm, searchContent.title[i]!)) { this.resetHighlights(
diff --git a/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.html b/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.html index 7c32e70..c0af9e96 100644 --- a/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.html +++ b/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.html
@@ -18,7 +18,6 @@ <body> <h1>History Clusters Internals - Debug Logs</h1> <button id="log-messages-dump">Dump Page</button> - <button id="visits-dump">Dump Visits JSON</button> <button id="context-clusters-dump">Dump Most Recent Context Clusters JSON</button> <button id="print-keyword-bag-state">Print Keyword Bag State</button> <table id="log-message-container">
diff --git a/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.ts b/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.ts index ca6ae56..baaf9b1 100644 --- a/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.ts +++ b/components/history_clusters/history_clusters_internals/resources/history_clusters_internals.ts
@@ -30,23 +30,6 @@ } /** - * The callback to button#visits-dump to save the visits to a file. - */ -function onVisitsDumpRequested() { - getProxy().getHandler().getVisitsJson().then(onVisitsJsonReady); -} - -/** - * The callback when the visits JSON string has been prepared. - */ -function onVisitsJsonReady(resp: {visitsJson: string}) { - const data = resp.visitsJson; - const filename = 'history_clusters_visits_dump.json'; - - dumpFileWithJsonContents(data, filename); -} - -/** * The callback to button#context-clusters-dump to save the most recent context * clusters to a file. */ @@ -81,8 +64,6 @@ getRequiredElement('log-messages-dump') .addEventListener('click', onLogMessagesDump); - getRequiredElement('visits-dump') - .addEventListener('click', onVisitsDumpRequested); getRequiredElement('context-clusters-dump') .addEventListener('click', onContextClustersDumpRequest); getRequiredElement('print-keyword-bag-state')
diff --git a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals.mojom b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals.mojom index 608f3cb8..7fe3c537 100644 --- a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals.mojom +++ b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals.mojom
@@ -14,10 +14,6 @@ // Browser-side handler for requests from WebUI page. interface PageHandler { - // Requests for the visits to be serialized to JSON. Will invoke - // callback when JSON string is ready. - GetVisitsJson() => (string visits_json); - // Requests for the context clusters to be serialized to JSON. // Will invoke callback when JSON string is ready. GetContextClustersJson() => (string context_clusters_json);
diff --git a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.cc b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.cc index 8440064..752ee9b 100644 --- a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.cc +++ b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.cc
@@ -43,56 +43,6 @@ history_clusters_service_->RemoveObserver(this); } -void HistoryClustersInternalsPageHandlerImpl::GetVisitsJson( - GetVisitsJsonCallback callback) { - if (!history_service_) { - std::move(callback).Run(""); - return; - } - GetAnnotatedVisits(history_clusters::QueryClustersContinuationParams(), - /*previously_retrieved_visits=*/{}, std::move(callback)); -} - -void HistoryClustersInternalsPageHandlerImpl::GetAnnotatedVisits( - history_clusters::QueryClustersContinuationParams continuation_params, - std::vector<history::AnnotatedVisit> previously_retrieved_visits, - GetVisitsJsonCallback callback) { - // There are two forms of cancellation here because `ScheduleDBTask` does - // not take in a callback. - history_service_->ScheduleDBTask( - FROM_HERE, - std::make_unique<history_clusters::GetAnnotatedVisitsToCluster>( - history_clusters::IncompleteVisitMap(), /*begin_time=*/base::Time(), - continuation_params, - /*recent_first=*/true, - /*days_of_clustered_visits=*/0, /*recluster=*/true, - base::BindOnce( - &HistoryClustersInternalsPageHandlerImpl::OnGotAnnotatedVisits, - weak_ptr_factory_.GetWeakPtr(), - std::move(previously_retrieved_visits), std::move(callback))), - &task_tracker_); -} - -void HistoryClustersInternalsPageHandlerImpl::OnGotAnnotatedVisits( - std::vector<history::AnnotatedVisit> previously_retrieved_visits, - GetVisitsJsonCallback callback, - std::vector<int64_t> old_clusters, - std::vector<history::AnnotatedVisit> annotated_visits, - history_clusters::QueryClustersContinuationParams continuation_params) { - previously_retrieved_visits.insert(previously_retrieved_visits.end(), - annotated_visits.begin(), - annotated_visits.end()); - if (continuation_params.exhausted_all_visits) { - std::move(callback).Run( - history_clusters::GetDebugJSONForVisits(previously_retrieved_visits)); - return; - } - - GetAnnotatedVisits(continuation_params, - std::move(previously_retrieved_visits), - std::move(callback)); -} - void HistoryClustersInternalsPageHandlerImpl::GetContextClustersJson( GetContextClustersJsonCallback callback) { if (history_clusters_service_ &&
diff --git a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.h b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.h index 68e5fba9..e09125d 100644 --- a/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.h +++ b/components/history_clusters/history_clusters_internals/webui/history_clusters_internals_page_handler_impl.h
@@ -35,24 +35,9 @@ private: // history_clusters::mojom::PageHandler: - void GetVisitsJson(GetVisitsJsonCallback callback) override; void GetContextClustersJson(GetContextClustersJsonCallback callback) override; void PrintKeywordBagStateToLogMessages() override; - // Gets annotated visits from HistoryService. - void GetAnnotatedVisits( - history_clusters::QueryClustersContinuationParams continuation_params, - std::vector<history::AnnotatedVisit> previously_retrieved_visits, - GetVisitsJsonCallback callback); - - // Callback invoked when HistoryService returns annotated visits. - void OnGotAnnotatedVisits( - std::vector<history::AnnotatedVisit> previously_retrieved_visits, - GetVisitsJsonCallback callback, - std::vector<int64_t> old_clusters, - std::vector<history::AnnotatedVisit> annotated_visits, - history_clusters::QueryClustersContinuationParams continuation_params); - // Gets most recent context clusters from HistoryService. void GetContextClusters( history_clusters::QueryClustersContinuationParams continuation_params,
diff --git a/components/mirroring/service/media_remoter_unittest.cc b/components/mirroring/service/media_remoter_unittest.cc index eeeaa08c..9019825 100644 --- a/components/mirroring/service/media_remoter_unittest.cc +++ b/components/mirroring/service/media_remoter_unittest.cc
@@ -60,7 +60,7 @@ OpenscreenTestSenders() : task_runner(base::SequencedTaskRunner::GetCurrentDefault()), environment(openscreen::Clock::now, - &task_runner, + task_runner, openscreen::IPEndpoint::kAnyV4()), sender_packet_router(&environment, 20, std::chrono::milliseconds(10)), audio_sender(std::make_unique<openscreen::cast::Sender>(
diff --git a/components/mirroring/service/openscreen_rpc_dispatcher_unittest.cc b/components/mirroring/service/openscreen_rpc_dispatcher_unittest.cc index a972636a..24fb5e1 100644 --- a/components/mirroring/service/openscreen_rpc_dispatcher_unittest.cc +++ b/components/mirroring/service/openscreen_rpc_dispatcher_unittest.cc
@@ -67,7 +67,7 @@ kSourceId, kReceiverId, [this](openscreen::Error error) { OnMessengerError(error); }, - &task_environment_runner_), + task_environment_runner_), dispatcher_(messenger_) {} ~OpenscreenRpcDispatcherTest() override { task_environment_.RunUntilIdle(); }
diff --git a/components/mirroring/service/openscreen_session_host.cc b/components/mirroring/service/openscreen_session_host.cc index c503385..cff7282 100644 --- a/components/mirroring/service/openscreen_session_host.cc +++ b/components/mirroring/service/openscreen_session_host.cc
@@ -313,7 +313,7 @@ // The Open Screen environment should not be set up until after the network // context is set up. openscreen_environment_ = std::make_unique<openscreen::cast::Environment>( - openscreen::Clock::now, openscreen_task_runner_.get(), + openscreen::Clock::now, *openscreen_task_runner_, openscreen::IPEndpoint::kAnyV4()); if (session_params->type != mojom::SessionType::AUDIO_ONLY &&
diff --git a/components/omnibox/browser/autocomplete_controller.cc b/components/omnibox/browser/autocomplete_controller.cc index 2fbb607..1aaf207 100644 --- a/components/omnibox/browser/autocomplete_controller.cc +++ b/components/omnibox/browser/autocomplete_controller.cc
@@ -1047,18 +1047,10 @@ default_match_to_preserve = *result_.default_match(); } } else { - // Ensure `stripped_destination_url` is computed for the eligible matches. - // If that is already the case, `ComputeStrippedDestinationURL()` does - // nothing. This step is not needed if `SortAndCull()` is called before - // the model is executed as it ensures `stripped_destination_url` is - // computed before deduping. - // TODO(crbug.com/1446725): Instead deduplicate the matches before running - // the model - for (auto& match : result_) { - if (match.scoring_signals.has_value()) { - match.ComputeStrippedDestinationURL(input_, template_url_service_); - } - } + // Deduplicate matches according to `stripped_destination_url` prior to + // running ML scoring. This step is not needed if `SortAndCull()` is + // called before the model is executed. + result_.DeduplicateMatches(input_, template_url_service_); } #if BUILDFLAG(BUILD_WITH_TFLITE_LIB)
diff --git a/components/omnibox/browser/autocomplete_match.cc b/components/omnibox/browser/autocomplete_match.cc index e52bf3d3..a2896b6 100644 --- a/components/omnibox/browser/autocomplete_match.cc +++ b/components/omnibox/browser/autocomplete_match.cc
@@ -30,6 +30,7 @@ #include "components/omnibox/browser/omnibox_field_trial.h" #include "components/omnibox/common/omnibox_features.h" #include "components/search_engines/search_engine_utils.h" +#include "components/search_engines/template_url.h" #include "components/search_engines/template_url_service.h" #include "inline_autocompletion_util.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" @@ -481,7 +482,8 @@ } const gfx::VectorIcon& AutocompleteMatch::GetVectorIcon( - bool is_bookmark) const { + bool is_bookmark, + const TemplateURL* turl) const { // TODO(https://crbug.com/1024114): Remove crash logging once fixed. SCOPED_CRASH_KEY_NUMBER("AutocompleteMatch", "type", type); SCOPED_CRASH_KEY_NUMBER("AutocompleteMatch", "provider_type", @@ -580,6 +582,23 @@ : omnibox::kJourneysIcon; case Type::STARTER_PACK: + if (turl) { + switch (turl->GetBuiltinEngineType()) { + case KEYWORD_MODE_STARTER_PACK_BOOKMARKS: + return use_chrome_refresh_icons + ? omnibox::kStarActiveChromeRefreshIcon + : omnibox::kStarActiveIcon; + case KEYWORD_MODE_STARTER_PACK_HISTORY: + return use_chrome_refresh_icons + ? vector_icons::kHistoryChromeRefreshIcon + : vector_icons::kHistoryIcon; + case KEYWORD_MODE_STARTER_PACK_TABS: + return use_chrome_refresh_icons ? omnibox::kProductChromeRefreshIcon + : omnibox::kProductIcon; + default: + break; + } + } return use_chrome_refresh_icons ? omnibox::kProductChromeRefreshIcon : omnibox::kProductIcon;
diff --git a/components/omnibox/browser/autocomplete_match.h b/components/omnibox/browser/autocomplete_match.h index d06b6c1f..fd84f35 100644 --- a/components/omnibox/browser/autocomplete_match.h +++ b/components/omnibox/browser/autocomplete_match.h
@@ -274,7 +274,10 @@ // AutocompleteMatch is likely that 1) this info is not used elsewhere in the // Autocomplete machinery except before displaying the match and 2) obtaining // this info is trivially done by calling BookmarkModel::IsBookmarked(). - const gfx::VectorIcon& GetVectorIcon(bool is_bookmark) const; + // `turl` is used to identify the proper vector icon associated with a given + // starter pack suggestion (e.g. @tabs, @history, @bookmarks, etc.). + const gfx::VectorIcon& GetVectorIcon(bool is_bookmark, + const TemplateURL* turl = nullptr) const; #endif // Comparison function for determining whether the first match is better than
diff --git a/components/omnibox/browser/autocomplete_match_unittest.cc b/components/omnibox/browser/autocomplete_match_unittest.cc index c5bc08e..18ade44 100644 --- a/components/omnibox/browser/autocomplete_match_unittest.cc +++ b/components/omnibox/browser/autocomplete_match_unittest.cc
@@ -14,15 +14,20 @@ #include "components/omnibox/browser/actions/omnibox_action_in_suggest.h" #include "components/omnibox/browser/actions/omnibox_pedal.h" #include "components/omnibox/browser/actions/omnibox_pedal_concepts.h" +#include "components/omnibox/browser/autocomplete_match_type.h" #include "components/omnibox/browser/autocomplete_provider.h" #include "components/omnibox/browser/fake_autocomplete_provider.h" #include "components/omnibox/browser/omnibox_field_trial.h" #include "components/omnibox/browser/test_scheme_classifier.h" #include "components/omnibox/common/omnibox_features.h" +#include "components/search_engines/template_url.h" +#include "components/search_engines/template_url_data.h" +#include "components/search_engines/template_url_starter_pack_data.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/metrics_proto/omnibox_event.pb.h" #include "third_party/omnibox_proto/entity_info.pb.h" +#include "ui/gfx/vector_icon_types.h" #include "url/gurl.h" using ScoringSignals = ::metrics::OmniboxEventProto::Suggestion::ScoringSignals; @@ -1244,3 +1249,39 @@ } } } + +#if (!BUILDFLAG(IS_ANDROID) || BUILDFLAG(ENABLE_VR)) && !BUILDFLAG(IS_IOS) +TEST_F(AutocompleteMatchTest, ValidateGetVectorIcons) { + AutocompleteMatch match; + + // Irrespective of match type, bookmark suggestions should have a non-empty + // icon. + EXPECT_FALSE(match.GetVectorIcon(/*is_bookmark=*/true).is_empty()); + + for (int type = AutocompleteMatchType::URL_WHAT_YOU_TYPED; + type != AutocompleteMatchType::NUM_TYPES; type++) { + match.type = static_cast<AutocompleteMatchType::Type>(type); + + if (match.type == AutocompleteMatchType::STARTER_PACK) { + // All STARTER_PACK suggestions should have non-empty vector icons. + for (int starter_pack_id = TemplateURLStarterPackData::kBookmarks; + starter_pack_id != TemplateURLStarterPackData::kMaxStarterPackID; + starter_pack_id++) { + TemplateURLData turl_data; + turl_data.starter_pack_id = starter_pack_id; + TemplateURL turl(turl_data); + EXPECT_FALSE( + match.GetVectorIcon(/*is_bookmark=*/false, &turl).is_empty()); + } + } else if (match.type == AutocompleteMatchType::SEARCH_SUGGEST_TAIL || + match.type == AutocompleteMatchType::NULL_RESULT_MESSAGE) { + // SEARCH_SUGGEST_TAIL and NULL_RESULT_MESSAGE suggestions use an empty + // vector icon. + EXPECT_TRUE(match.GetVectorIcon(/*is_bookmark=*/false).is_empty()); + } else { + // All other suggestion types should result in non-empty vector icons. + EXPECT_FALSE(match.GetVectorIcon(/*is_bookmark=*/false).is_empty()); + } + } +} +#endif
diff --git a/components/omnibox/browser/autocomplete_result.cc b/components/omnibox/browser/autocomplete_result.cc index d6653f1..6a623ef 100644 --- a/components/omnibox/browser/autocomplete_result.cc +++ b/components/omnibox/browser/autocomplete_result.cc
@@ -263,6 +263,15 @@ } } +void AutocompleteResult::DeduplicateMatches( + const AutocompleteInput& input, + TemplateURLService* template_url_service) { + SCOPED_UMA_HISTOGRAM_TIMER_MICROS( + "Omnibox.AutocompletionTime.UpdateResult.DeduplicateMatches"); + + DeduplicateMatches(&matches_, input, template_url_service); +} + void AutocompleteResult::SortAndCull( const AutocompleteInput& input, TemplateURLService* template_url_service, @@ -270,10 +279,6 @@ absl::optional<AutocompleteMatch> default_match_to_preserve) { SCOPED_UMA_HISTOGRAM_TIMER_MICROS( "Omnibox.AutocompletionTime.UpdateResult.SortAndCull"); - - for (auto& match : matches_) - match.ComputeStrippedDestinationURL(input, template_url_service); - if (!is_ios) DemoteOnDeviceSearchSuggestions(); @@ -286,7 +291,7 @@ if (!is_android && !is_ios) MaybeCullTailSuggestions(&matches_, comparing_object); - DeduplicateMatches(&matches_); + DeduplicateMatches(input, template_url_service); // Sort the matches by relevance and demotions. std::sort(matches_.begin(), matches_.end(), comparing_object); @@ -1055,7 +1060,14 @@ } // static -void AutocompleteResult::DeduplicateMatches(ACMatches* matches) { +void AutocompleteResult::DeduplicateMatches( + ACMatches* matches, + const AutocompleteInput& input, + TemplateURLService* template_url_service) { + for (auto& match : *matches) { + match.ComputeStrippedDestinationURL(input, template_url_service); + } + // Group matches by stripped URL and whether it's a calculator suggestion. std::unordered_map<AutocompleteResult::MatchDedupComparator, std::vector<ACMatches::iterator>,
diff --git a/components/omnibox/browser/autocomplete_result.h b/components/omnibox/browser/autocomplete_result.h index d0aa6976c..3a2d658 100644 --- a/components/omnibox/browser/autocomplete_result.h +++ b/components/omnibox/browser/autocomplete_result.h
@@ -99,6 +99,12 @@ // Adds a new set of matches to the result set. Does not re-sort. void AppendMatches(const ACMatches& matches); + // Modifies |matches| such that any duplicate matches are coalesced into + // representative "best" matches. The erased matches are moved into the + // |duplicate_matches| members of their representative matches. + void DeduplicateMatches(const AutocompleteInput& input, + TemplateURLService* template_url_service); + // Removes duplicates, puts the list in sorted order and culls to leave only // the best GetMaxMatches() matches. Sets the default match to the best match // and updates the alternate nav URL. @@ -313,7 +319,9 @@ // Modifies |matches| such that any duplicate matches are coalesced into // representative "best" matches. The erased matches are moved into the // |duplicate_matches| members of their representative matches. - static void DeduplicateMatches(ACMatches* matches); + static void DeduplicateMatches(ACMatches* matches, + const AutocompleteInput& input, + TemplateURLService* template_url_service); // Returns true if |matches| contains a match with the same destination as // |match|.
diff --git a/components/omnibox/browser/history_url_provider_unittest.cc b/components/omnibox/browser/history_url_provider_unittest.cc index e085bf4..de87c01 100644 --- a/components/omnibox/browser/history_url_provider_unittest.cc +++ b/components/omnibox/browser/history_url_provider_unittest.cc
@@ -344,10 +344,7 @@ matches_ = autocomplete_->matches(); if (sort_matches_) { TemplateURLService* service = client_->GetTemplateURLService(); - for (auto i = matches_.begin(); i != matches_.end(); ++i) { - i->ComputeStrippedDestinationURL(input, service); - } - AutocompleteResult::DeduplicateMatches(&matches_); + AutocompleteResult::DeduplicateMatches(&matches_, input, service); std::sort(matches_.begin(), matches_.end(), &AutocompleteMatch::MoreRelevant); }
diff --git a/components/omnibox/browser/omnibox_edit_model.cc b/components/omnibox/browser/omnibox_edit_model.cc index b26708b..f7cd760 100644 --- a/components/omnibox/browser/omnibox_edit_model.cc +++ b/components/omnibox/browser/omnibox_edit_model.cc
@@ -1773,24 +1773,17 @@ return client()->GetSizedIcon(extension_icon); } - // The @tabs starter pack suggestion is a unique case. It uses a help center - // article URL as a placeholder and shouldn't display the favicon from the - // help center. Ignore this favicon even if it's available. - bool is_starter_pack_tabs_suggestion = false; - if (AutocompleteMatch::IsStarterPackType(match.type) && - match.associated_keyword) { - TemplateURL* turl = - client()->GetTemplateURLService()->GetTemplateURLForKeyword( - match.associated_keyword->keyword); - is_starter_pack_tabs_suggestion = - turl && turl->GetBuiltinEngineType() == KEYWORD_MODE_STARTER_PACK_TABS; - } - // Get the favicon for navigational suggestions. + // + // The starter pack suggestions are a unique case. These suggestions + // normally use a favicon image that cannot be styled further by client + // code. In order to apply custom styling to the icon (e.g. colors), we ignore + // this favicon in favor of using a vector icon which has better styling + // support. if (!AutocompleteMatch::IsSearchType(match.type) && match.type != AutocompleteMatchType::DOCUMENT_SUGGESTION && match.type != AutocompleteMatchType::HISTORY_CLUSTER && - !is_starter_pack_tabs_suggestion) { + !AutocompleteMatch::IsStarterPackType(match.type)) { // Because the Views UI code calls GetMatchIcon in both the layout and // painting code, we may generate multiple `OnFaviconFetched` callbacks, // all run one after another. This seems to be harmless as the callback @@ -1809,7 +1802,12 @@ } bool is_starred_match = IsStarredMatch(match); - const auto& vector_icon_type = match.GetVectorIcon(is_starred_match); + const TemplateURL* turl = + match.associated_keyword + ? client()->GetTemplateURLService()->GetTemplateURLForKeyword( + match.associated_keyword->keyword) + : nullptr; + const auto& vector_icon_type = match.GetVectorIcon(is_starred_match, turl); return client()->GetSizedIcon(vector_icon_type, vector_icon_color); }
diff --git a/components/omnibox/browser/omnibox_view.cc b/components/omnibox/browser/omnibox_view.cc index 29820e6..9ba746b 100644 --- a/components/omnibox/browser/omnibox_view.cc +++ b/components/omnibox/browser/omnibox_view.cc
@@ -218,9 +218,16 @@ std::move(on_icon_fetched)); } else if (match.type != AutocompleteMatchType::HISTORY_CLUSTER) { - // For site suggestions, display site's favicon. - favicon = model()->client()->GetFaviconForPageUrl( - match.destination_url, std::move(on_icon_fetched)); + // The starter pack suggestions are a unique case. These suggestions + // normally use a favicon image that cannot be styled further by client + // code. In order to apply custom styling to the icon (e.g. colors), we + // ignore this favicon in favor of using a vector icon which has better + // styling support. + if (!AutocompleteMatch::IsStarterPackType(match.type)) { + // For site suggestions, display site's favicon. + favicon = model()->client()->GetFaviconForPageUrl( + match.destination_url, std::move(on_icon_fetched)); + } } if (!favicon.IsEmpty()) @@ -235,8 +242,18 @@ const bool is_bookmarked = bookmark_model && bookmark_model->IsBookmarked(match.destination_url); - const gfx::VectorIcon& vector_icon = match.GetVectorIcon(is_bookmarked); - const auto& color = match.type == AutocompleteMatchType::HISTORY_CLUSTER + // For starter pack suggestions, use template url to generate proper vector + // icon. + const TemplateURL* turl = + match.associated_keyword + ? model() + ->client() + ->GetTemplateURLService() + ->GetTemplateURLForKeyword(match.associated_keyword->keyword) + : nullptr; + const gfx::VectorIcon& vector_icon = match.GetVectorIcon(is_bookmarked, turl); + const auto& color = (match.type == AutocompleteMatchType::HISTORY_CLUSTER || + match.type == AutocompleteMatchType::STARTER_PACK) ? color_bright_vectors : color_vectors; return ui::ImageModel::FromVectorIcon(vector_icon, color, dip_size);
diff --git a/components/omnibox/browser/omnibox_view_unittest.cc b/components/omnibox/browser/omnibox_view_unittest.cc index d75cc7b..069f05a 100644 --- a/components/omnibox/browser/omnibox_view_unittest.cc +++ b/components/omnibox/browser/omnibox_view_unittest.cc
@@ -226,7 +226,7 @@ EXPECT_EQ(page_url, kUrl); } -#endif // !BUILDFLAG(IS_IOS) +#endif // !BUILDFLAG(IS_ANDROID) && !BUILDFLAG(IS_IOS) // Tests GetStateChanges correctly determines if text was deleted. TEST_F(OmniboxViewTest, GetStateChanges_DeletedText) {
diff --git a/components/page_info/page_info.cc b/components/page_info/page_info.cc index 86cc492b..3e1fb2fd 100644 --- a/components/page_info/page_info.cc +++ b/components/page_info/page_info.cc
@@ -630,11 +630,12 @@ site_url_, type); } using Constraints = content_settings::ContentSettingConstraints; - map->SetNarrowestContentSetting( - primary_url, site_url_, type, setting, - is_one_time - ? Constraints{base::Time(), content_settings::SessionModel::OneTime} - : Constraints{}); + Constraints constraints; + if (is_one_time) { + constraints.set_session_model(content_settings::SessionModel::OneTime); + } + map->SetNarrowestContentSetting(primary_url, site_url_, type, setting, + constraints); bool is_subscribed_to_permission_change_event = false;
diff --git a/components/permissions/permission_context_base.cc b/components/permissions/permission_context_base.cc index a4cc5f2..9c678be 100644 --- a/components/permissions/permission_context_base.cc +++ b/components/permissions/permission_context_base.cc
@@ -539,9 +539,10 @@ DCHECK(content_setting == CONTENT_SETTING_ALLOW || content_setting == CONTENT_SETTING_BLOCK); - content_settings::ContentSettingConstraints constraints = { - base::Time(), is_one_time ? content_settings::SessionModel::OneTime - : content_settings::SessionModel::Durable}; + content_settings::ContentSettingConstraints constraints; + constraints.set_session_model(is_one_time + ? content_settings::SessionModel::OneTime + : content_settings::SessionModel::Durable); #if !BUILDFLAG(IS_ANDROID) if (base::FeatureList::IsEnabled( @@ -554,7 +555,7 @@ // the future, consider whether revocation for such permission makes // sense, and/or change this to an early return so that we don't // unnecessarily record timestamps where we don't need them. - constraints.track_last_visit_for_autoexpiration = true; + constraints.set_track_last_visit_for_autoexpiration(true); } } #endif // !BUILDFLAG(IS_ANDROID)
diff --git a/components/permissions/permission_uma_util_unittest.cc b/components/permissions/permission_uma_util_unittest.cc index df9deea7..21cdf05 100644 --- a/components/permissions/permission_uma_util_unittest.cc +++ b/components/permissions/permission_uma_util_unittest.cc
@@ -461,11 +461,10 @@ // Set expiration to five days before the clean-up threshold to mimic that the // permission was revoked five days ago. base::Time past(now - base::Days(5)); - const content_settings::ContentSettingConstraints constraint{ - .expiration = - past + content_settings::features:: - kSafetyCheckUnusedSitePermissionsRevocationCleanUpThreshold - .Get()}; + content_settings::ContentSettingConstraints constraint(past); + constraint.set_lifetime( + content_settings::features:: + kSafetyCheckUnusedSitePermissionsRevocationCleanUpThreshold.Get()); hcsm->SetWebsiteSettingDefaultScope( origin, origin, ContentSettingsType::REVOKED_UNUSED_SITE_PERMISSIONS, base::Value(dict.Clone()), constraint);
diff --git a/components/permissions/unused_site_permissions_service.cc b/components/permissions/unused_site_permissions_service.cc index 5ee0bac2..1794eec5 100644 --- a/components/permissions/unused_site_permissions_service.cc +++ b/components/permissions/unused_site_permissions_service.cc
@@ -455,8 +455,8 @@ dict.Set(kRevokedKey, base::Value::List(std::move(permission_type_list))); - const content_settings::ContentSettingConstraints default_constraint = { - .expiration = clock_->Now() + GetCleanUpThreshold()}; + content_settings::ContentSettingConstraints default_constraint(clock_->Now()); + default_constraint.set_lifetime(GetCleanUpThreshold()); // Set website setting for the list of recently revoked permissions and // previously revoked permissions, if exists.
diff --git a/components/permissions/unused_site_permissions_service_unittest.cc b/components/permissions/unused_site_permissions_service_unittest.cc index a63ac56..a57e782c 100644 --- a/components/permissions/unused_site_permissions_service_unittest.cc +++ b/components/permissions/unused_site_permissions_service_unittest.cc
@@ -104,8 +104,8 @@ const GURL url2("https://example2.com"); const ContentSettingsType type1 = ContentSettingsType::GEOLOCATION; const ContentSettingsType type2 = ContentSettingsType::MEDIASTREAM_CAMERA; - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); const base::Time now = clock()->Now(); const base::TimeDelta precision = @@ -173,8 +173,8 @@ const GURL url2("https://[*.]example2.com"); const GURL url3("file:///foo/bar.txt"); const ContentSettingsType type = ContentSettingsType::GEOLOCATION; - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); // Add one setting for all urls. hcsm()->SetContentSettingDefaultScope( @@ -203,8 +203,8 @@ content_settings::features::kSafetyCheckUnusedSitePermissions); const GURL url("https://example1.com"); - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); // Grant GEOLOCATION permission for the url. hcsm()->SetContentSettingDefaultScope( @@ -233,8 +233,8 @@ const ContentSettingsType types[] = { ContentSettingsType::GEOLOCATION, ContentSettingsType::AUTOMATIC_DOWNLOADS}; - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); // Test combinations of a single origin |primary_pattern| and different // |secondary_pattern|s: equal to primary pattern, different single origin @@ -274,8 +274,8 @@ content_settings::features::kSafetyCheckUnusedSitePermissions); const GURL url("https://example1.com"); - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); // Grant GEOLOCATION permission for the url. hcsm()->SetContentSettingDefaultScope( @@ -321,8 +321,8 @@ content_settings::features::kSafetyCheckUnusedSitePermissions); const GURL url("https://example1.com"); - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); hcsm()->SetContentSettingDefaultScope( url, url, ContentSettingsType::MEDIASTREAM_CAMERA, @@ -411,8 +411,8 @@ content_settings::features::kSafetyCheckUnusedSitePermissions); const GURL url1 = GURL("https://example1.com:443"); - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); hcsm()->SetContentSettingDefaultScope( url1, url1, ContentSettingsType::GEOLOCATION, @@ -441,8 +441,8 @@ const GURL url1 = GURL("https://example1.com:443"); const ContentSettingsType type = ContentSettingsType::GEOLOCATION; - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); hcsm()->SetContentSettingDefaultScope( url1, url1, type, ContentSetting::CONTENT_SETTING_ALLOW, constraint); @@ -456,8 +456,10 @@ GetRevokedUnusedPermissions(hcsm())[0]; // Permission remains revoked after regrant and undo. - content_settings::ContentSettingConstraints expiration_constraint = { - .expiration = revoked_permission.metadata.expiration}; + content_settings::ContentSettingConstraints expiration_constraint; + expiration_constraint.set_lifetime( + expiration_constraint.DeltaFromCreationTime( + revoked_permission.metadata.expiration)); service()->RegrantPermissionsForOrigin(url::Origin::Create(url1)); service()->UndoRegrantPermissionsForOrigin({type}, expiration_constraint, url::Origin::Create(url1)); @@ -482,8 +484,8 @@ content_settings::features::kSafetyCheckUnusedSitePermissions); const GURL url("https://example1.com"); - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); // Grant GEOLOCATION and NOTIFICATION permission for the url. hcsm()->SetContentSettingDefaultScope( @@ -561,8 +563,8 @@ const GURL url = GURL("https://example1.com:443"); const ContentSettingsType type = ContentSettingsType::GEOLOCATION; - const content_settings::ContentSettingConstraints constraint{ - .track_last_visit_for_autoexpiration = true}; + content_settings::ContentSettingConstraints constraint; + constraint.set_track_last_visit_for_autoexpiration(true); absl::optional<uint32_t> days_since_revocation; @@ -613,8 +615,8 @@ auto cleanUpThreshold = content_settings::features:: kSafetyCheckUnusedSitePermissionsRevocationCleanUpThreshold.Get(); - const content_settings::ContentSettingConstraints constraint{ - .expiration = clock()->Now() + cleanUpThreshold}; + content_settings::ContentSettingConstraints constraint(clock()->Now()); + constraint.set_lifetime(cleanUpThreshold); // Add `url` to revoked permissions list. hcsm()->SetWebsiteSettingDefaultScope(
diff --git a/components/policy/PRESUBMIT.py b/components/policy/PRESUBMIT.py index 552f435f..1806406 100644 --- a/components/policy/PRESUBMIT.py +++ b/components/policy/PRESUBMIT.py
@@ -585,6 +585,10 @@ if not new_policy_platforms[platform]['to']: continue + # These warnings fire inappropriately in presubmit --all/--files runs, so + # disable them in these cases to reduce the noise. + if input_api.no_diffs: + continue # Support for policies can only be removed for past version until we have # a better reminder process to cleanup the code related to deprecated # policies.
diff --git a/components/policy/resources/templates/policy_definitions/DefaultSearchProvider/DefaultSearchProviderEnabled.yaml b/components/policy/resources/templates/policy_definitions/DefaultSearchProvider/DefaultSearchProviderEnabled.yaml index 6221e89..0fd2fff 100644 --- a/components/policy/resources/templates/policy_definitions/DefaultSearchProvider/DefaultSearchProviderEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/DefaultSearchProvider/DefaultSearchProviderEnabled.yaml
@@ -5,9 +5,9 @@ If you set the policy, users can't change it in <ph name="PRODUCT_NAME">$1<ex>Google Chrome</ex></ph>. If not set, the default search provider is on, and users can set the search provider list. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/Extensions/ExtensionInstallForcelist.yaml b/components/policy/resources/templates/policy_definitions/Extensions/ExtensionInstallForcelist.yaml index 19f7ab1..aa28d044 100644 --- a/components/policy/resources/templates/policy_definitions/Extensions/ExtensionInstallForcelist.yaml +++ b/components/policy/resources/templates/policy_definitions/Extensions/ExtensionInstallForcelist.yaml
@@ -12,9 +12,9 @@ Each list item of the policy is a string that contains an extension ID and, optionally, an update URL separated by a semicolon (;). The extension ID is the 32-letter string found, for example, on chrome://extensions when in Developer mode. If specified, the update URL should point to an Update Manifest XML document ( https://developer.chrome.com/extensions/autoupdate ). The update URL should use one of the following schemes: <ph name="HTTP_SCHEME">http</ph>, <ph name="HTTPS_SCHEME">https</ph> or <ph name="FILE_SCHEME">file</ph>. By default, the Chrome Web Store's update URL is used. The update URL set in this policy is only used for the initial installation; subsequent updates of the extension use the update URL in the extension's manifest. The update url for subsequent updates can be overridden using the <ph name="EXTENSION_SETTINGS_POLICY_NAME">ExtensionSettings</ph> policy, see http://support.google.com/chrome/a?p=Configure_ExtensionSettings_policy. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph> instances, apps and extensions from outside the Chrome Web Store can only be forced installed if the instance is joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph> instances, apps and extensions from outside the Chrome Web Store can only be forced installed if the instance is joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph> instances, apps and extensions from outside the Chrome Web Store can only be force installed if the instance is managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph> instances, apps and extensions from outside the Chrome Web Store can only be force installed if the instance is managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. Note: This policy doesn't apply to Incognito mode. Read about hosting extensions ( https://developer.chrome.com/extensions/hosting ). example_value:
diff --git a/components/policy/resources/templates/policy_definitions/Extensions/ExtensionSettings.yaml b/components/policy/resources/templates/policy_definitions/Extensions/ExtensionSettings.yaml index 06d9a131..27564f0 100644 --- a/components/policy/resources/templates/policy_definitions/Extensions/ExtensionSettings.yaml +++ b/components/policy/resources/templates/policy_definitions/Extensions/ExtensionSettings.yaml
@@ -4,9 +4,9 @@ This policy maps an extension ID or an update URL to its specific setting only. A default configuration can be set for the special ID <ph name="DEFAULT_SCOPE">"*"</ph>, which applies to all extensions without a custom configuration in this policy. With an update URL, configuration applies to extensions with the exact update URL stated in the extension manifest ( http://support.google.com/chrome/a?p=Configure_ExtensionSettings_policy ). If the 'override_update_url' flag is set to true, the extension is installed and updated using the "update" URL specified in the <ph name="EXTENSION_INSTALL_FORCELIST_POLICY_NAME">ExtensionInstallForcelist</ph> policy or in 'update_url' field in this policy. The flag 'override_update_url' is ignored if the 'update_url' is a Chrome Web Store url. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph> instances, apps and extensions from outside the Chrome Web Store can only be forced installed if the instance is joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph> instances, apps and extensions from outside the Chrome Web Store can only be forced installed if the instance is joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph> instances, apps and extensions from outside the Chrome Web Store can only be force installed if the instance is managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph> instances, apps and extensions from outside the Chrome Web Store can only be force installed if the instance is managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: '*': allowed_types:
diff --git a/components/policy/resources/templates/policy_definitions/FirstPartySets/FirstPartySetsOverrides.yaml b/components/policy/resources/templates/policy_definitions/FirstPartySets/FirstPartySetsOverrides.yaml index bf56d55f..18454e7 100644 --- a/components/policy/resources/templates/policy_definitions/FirstPartySets/FirstPartySetsOverrides.yaml +++ b/components/policy/resources/templates/policy_definitions/FirstPartySets/FirstPartySetsOverrides.yaml
@@ -32,9 +32,9 @@ All sets provided by to policy must be valid First-Party Sets, if they aren't then an appropriate error will be outputted. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: additions: - associatedSites:
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/AutoOpenFileTypes.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/AutoOpenFileTypes.yaml index 00a4503..26db68a 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/AutoOpenFileTypes.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/AutoOpenFileTypes.yaml
@@ -6,7 +6,7 @@ If this policy isn't set, only file types that a user has already specified to automatically be opened will do so when downloaded. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: - exe - txt
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupEnabled.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupEnabled.yaml index 583d6570..b24995b 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupEnabled.yaml
@@ -5,7 +5,7 @@ Setting the policy to Disabled means Chrome Cleanup won't periodically scan and manual triggering is disabled. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: dynamic_refresh: false
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupReportingEnabled.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupReportingEnabled.yaml index 532af66f..15d79f26 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupReportingEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/ChromeCleanupReportingEnabled.yaml
@@ -7,7 +7,7 @@ Leaving the policy unset means <ph name="CHROME_CLEANUP_NAME">Chrome Cleanup</ph> may, in line with policy set by <ph name="SAFE_BROWSING_EXTENDED_REPORTING_ENABLED_POLICY_NAME">SafeBrowsingExtendedReportingEnabled</ph>, report about scans for detecting unwanted software to Google. <ph name="CHROME_CLEANUP_NAME">Chrome Cleanup</ph> asks users if they want the cleanup and to share the results with Google to help with future unwanted software detection. These results have file metadata, automatically installed extensions, and registry keys, as described by the Chrome Privacy Whitepaper. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: dynamic_refresh: true
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/CommandLineFlagSecurityWarningsEnabled.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/CommandLineFlagSecurityWarningsEnabled.yaml index 8e1e4e36..4bd2e6b7 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/CommandLineFlagSecurityWarningsEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/CommandLineFlagSecurityWarningsEnabled.yaml
@@ -5,9 +5,9 @@ Setting the policy to Disabled prevents security warnings from appearing when Chrome is launched with potentially dangerous command-line flags. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: dynamic_refresh: false
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/MetricsReportingEnabled.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/MetricsReportingEnabled.yaml index 9a40ae84..572a24cd 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/MetricsReportingEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/MetricsReportingEnabled.yaml
@@ -7,9 +7,9 @@ When this policy is not set, users can choose the anonymous reporting behavior at installation or first run, and can later change the setting in the Chrome settings. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. (For <ph name="PRODUCT_OS_NAME">$2<ex>Google ChromeOS</ex></ph>, see DeviceMetricsReportingEnabled.) example_value: true
diff --git a/components/policy/resources/templates/policy_definitions/Miscellaneous/SafeBrowsingForTrustedSourcesEnabled.yaml b/components/policy/resources/templates/policy_definitions/Miscellaneous/SafeBrowsingForTrustedSourcesEnabled.yaml index b9dc04af..4cbd9b7f 100644 --- a/components/policy/resources/templates/policy_definitions/Miscellaneous/SafeBrowsingForTrustedSourcesEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/Miscellaneous/SafeBrowsingForTrustedSourcesEnabled.yaml
@@ -7,7 +7,7 @@ These restrictions apply to downloads triggered from webpage content, as well as the Download link menu option. These restrictions don't apply to the save or download of the currently displayed page or to saving as PDF from the printing options. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: false features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionChangePasswordURL.yaml b/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionChangePasswordURL.yaml index b2bd9517..615d737 100644 --- a/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionChangePasswordURL.yaml +++ b/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionChangePasswordURL.yaml
@@ -4,9 +4,9 @@ Turning the policy off or leaving it unset means the service sends users to https://myaccount.google.com to change their password. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: https://mydomain.com/change_password.html features: dynamic_refresh: true
diff --git a/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionLoginURLs.yaml b/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionLoginURLs.yaml index cec3855e..fb806f39 100644 --- a/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionLoginURLs.yaml +++ b/components/policy/resources/templates/policy_definitions/SafeBrowsing/PasswordProtectionLoginURLs.yaml
@@ -5,9 +5,9 @@ Turning this setting off or leaving it unset means the password protection service only captures the password salted hashes on https://accounts.google.com. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: - https://mydomain.com/login.html - https://login.mydomain.com
diff --git a/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingAllowlistDomains.yaml b/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingAllowlistDomains.yaml index 1179f98..86d25d2 100644 --- a/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingAllowlistDomains.yaml +++ b/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingAllowlistDomains.yaml
@@ -6,9 +6,9 @@ This policy must be set as a list of fully qualified domain names. It does not support regular expressions, and will not allowlist subdomains of domains listed in the policy. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: - mydomain.com - myuniversity.edu
diff --git a/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingEnabled.yaml b/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingEnabled.yaml index f3095dc..c21bca3 100644 --- a/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingEnabled.yaml +++ b/components/policy/resources/templates/policy_definitions/SafeBrowsing/SafeBrowsingEnabled.yaml
@@ -11,9 +11,9 @@ If the policy <ph name="SAFE_BROWSING_PROTECTION_LEVEL_POLICY_NAME">SafeBrowsingProtectionLevel</ph> is set, the value of the policy <ph name="SAFE_BROWSING_ENABLED_POLICY_NAME">SafeBrowsingEnabled</ph> is ignored. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/Startup/HomepageIsNewTabPage.yaml b/components/policy/resources/templates/policy_definitions/Startup/HomepageIsNewTabPage.yaml index f18adac..5303342a 100644 --- a/components/policy/resources/templates/policy_definitions/Startup/HomepageIsNewTabPage.yaml +++ b/components/policy/resources/templates/policy_definitions/Startup/HomepageIsNewTabPage.yaml
@@ -5,9 +5,9 @@ If you set the policy, users can't change their homepage type in <ph name="PRODUCT_NAME">$1<ex>Google Chrome</ex></ph>. If not set, the user decides whether or not the New Tab page is their homepage. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: true features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/Startup/HomepageLocation.yaml b/components/policy/resources/templates/policy_definitions/Startup/HomepageLocation.yaml index 5ac75fe..2ae4e38 100644 --- a/components/policy/resources/templates/policy_definitions/Startup/HomepageLocation.yaml +++ b/components/policy/resources/templates/policy_definitions/Startup/HomepageLocation.yaml
@@ -8,9 +8,9 @@ Leaving both <ph name="HOMEPAGE_LOCATION_POLICY_NAME">HomepageLocation</ph> and <ph name="HOMEPAGE_IS_NEW_TAB_PAGE_POLICY_NAME">HomepageIsNewTabPage</ph> unset lets users choose their homepage. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: https://www.chromium.org features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/Startup/NewTabPageLocation.yaml b/components/policy/resources/templates/policy_definitions/Startup/NewTabPageLocation.yaml index 12f531e3..fabd6f07 100644 --- a/components/policy/resources/templates/policy_definitions/Startup/NewTabPageLocation.yaml +++ b/components/policy/resources/templates/policy_definitions/Startup/NewTabPageLocation.yaml
@@ -10,9 +10,9 @@ Leaving the policy unset or empty puts the default New Tab page in use. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: https://www.chromium.org features: dynamic_refresh: true
diff --git a/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartup.yaml b/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartup.yaml index 8a31c37..6f806bba 100644 --- a/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartup.yaml +++ b/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartup.yaml
@@ -8,9 +8,9 @@ If this policy is set to <ph name="POLICY_ENUM_RESTOREONSTARTUP_RESTOREONSTARTUPISLASTSESSIONANDURLS">RestoreOnStartupIsLastSessionAndURLs</ph>, browser will restore previous session and open a separate window to show URLs that are set from <ph name="RESTORE_ON_STARTUP_URLS_POLICY_NAME">RestoreOnStartupURLs</ph>. Note that users can choose to keep those URLs open and they will also be restored in the future session. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. - On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MAC_OS_NAME">macOS</ph>, this policy is only available on instances that are managed via MDM, joined to a domain via MCX or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: 4 features: can_be_recommended: true
diff --git a/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartupURLs.yaml b/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartupURLs.yaml index fb6b025..d490421 100644 --- a/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartupURLs.yaml +++ b/components/policy/resources/templates/policy_definitions/Startup/RestoreOnStartupURLs.yaml
@@ -4,7 +4,7 @@ If not set, the New Tab page opens on start up. - On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph>` or enrolled in `<ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>`. + On <ph name="MS_WIN_NAME">Microsoft® Windows®</ph>, this policy is only available on instances that are joined to a <ph name="MS_AD_NAME">Microsoft® Active Directory®</ph> domain, joined to <ph name="MS_AAD_NAME">Microsoft® Azure® Active Directory®</ph> or enrolled in <ph name="CHROME_BROWSER_CLOUD_MANAGEMENT_NAME">Chrome Browser Cloud Management</ph>. example_value: - https://example.com - https://www.chromium.org
diff --git a/components/reduce_accept_language/browser/reduce_accept_language_service.cc b/components/reduce_accept_language/browser/reduce_accept_language_service.cc index c8c08f16d..3601a58 100644 --- a/components/reduce_accept_language/browser/reduce_accept_language_service.cc +++ b/components/reduce_accept_language/browser/reduce_accept_language_service.cc
@@ -11,6 +11,7 @@ #include "base/metrics/histogram_macros.h" #include "base/strings/string_split.h" #include "base/time/time.h" +#include "components/content_settings/core/browser/content_settings_utils.h" #include "components/content_settings/core/browser/host_content_settings_map.h" #include "components/content_settings/core/common/content_settings_constraints.h" #include "components/content_settings/core/common/content_settings_types.h" @@ -110,13 +111,12 @@ network::features::kReduceAcceptLanguageCacheDuration.Get(); accept_language_dictionary.Set(kReduceAcceptLanguageSettingKey, language); + content_settings::ContentSettingConstraints constraints; + constraints.set_lifetime(cache_duration); + constraints.set_session_model(content_settings::SessionModel::Durable); settings_map_->SetWebsiteSettingDefaultScope( url, GURL(), ContentSettingsType::REDUCED_ACCEPT_LANGUAGE, - base::Value(std::move(accept_language_dictionary)), - {cache_duration.is_zero() - ? base::Time() - : content_settings::GetConstraintExpiration(cache_duration), - content_settings::SessionModel::Durable}); + base::Value(std::move(accept_language_dictionary)), constraints); // Record the time spent getting the reduce accept language. base::TimeDelta duration = base::TimeTicks::Now() - start_time;
diff --git a/components/safe_browsing/core/browser/db/v4_local_database_manager.cc b/components/safe_browsing/core/browser/db/v4_local_database_manager.cc index a41b01c..79a8d54 100644 --- a/components/safe_browsing/core/browser/db/v4_local_database_manager.cc +++ b/components/safe_browsing/core/browser/db/v4_local_database_manager.cc
@@ -265,11 +265,20 @@ // simple structure to delete them all. base::FileEnumerator enumerator( path.DirName(), false, base::FileEnumerator::FILES, - path.BaseName().value() + FILE_PATH_LITERAL("*")); + path.BaseName().value() + FILE_PATH_LITERAL("*"), + // Since the search is non-recursive and only on files, the folder search + // policy doesn't matter. We set it to the default value here. + base::FileEnumerator::FolderSearchPolicy::MATCH_ONLY, + base::FileEnumerator::ErrorPolicy::STOP_ENUMERATION); for (base::FilePath store_path = enumerator.Next(); !store_path.empty(); store_path = enumerator.Next()) { base::DeleteFile(store_path); } + + if (enumerator.GetError() != base::File::FILE_OK) { + LOG(ERROR) << "Removing store at " << path << " failed with error " + << base::File::ErrorToString(enumerator.GetError()); + } } bool GetPrefixMatchesIsAsync() {
diff --git a/components/segmentation_platform/embedder/input_delegate/tab_rank_dispatcher.cc b/components/segmentation_platform/embedder/input_delegate/tab_rank_dispatcher.cc index e8debf43..242b26e 100644 --- a/components/segmentation_platform/embedder/input_delegate/tab_rank_dispatcher.cc +++ b/components/segmentation_platform/embedder/input_delegate/tab_rank_dispatcher.cc
@@ -45,7 +45,7 @@ void RecordTabCountFromStartupToFirstSyncUpdate(long cross_device_tab_count) { base::UmaHistogramCounts1000( - "SegmentationPlatform.SyncSessions.TotalTabsCountAtFirstSyncUpdate", + "SegmentationPlatform.SyncSessions.TotalTabCountAtFirstSyncUpdate", cross_device_tab_count); }
diff --git a/components/services/screen_ai/BUILD.gn b/components/services/screen_ai/BUILD.gn index 296dac04..31d4562 100644 --- a/components/services/screen_ai/BUILD.gn +++ b/components/services/screen_ai/BUILD.gn
@@ -49,7 +49,7 @@ } group("test_support_data") { - visibility = [ ":test_support" ] + visibility = [ ":unit_tests" ] testonly = true data = [ "$root_gen_dir/components/services/screen_ai/proto/view_hierarchy.descriptor" ] @@ -59,22 +59,6 @@ ] } -source_set("test_support") { - testonly = true - - sources = [ - "proto/test_proto_loader.cc", - "proto/test_proto_loader.h", - ] - - data_deps = [ ":test_support_data" ] - - deps = [ - "//base", - "//third_party/protobuf:protobuf_full", - ] -} - source_set("unit_tests") { testonly = true @@ -102,6 +86,7 @@ ] if (!is_win) { - deps += [ ":test_support" ] + deps += [ "//content/test:proto_test_support" ] + data_deps = [ ":test_support_data" ] } }
diff --git a/components/services/screen_ai/DEPS b/components/services/screen_ai/DEPS index 663ab30..372645d 100644 --- a/components/services/screen_ai/DEPS +++ b/components/services/screen_ai/DEPS
@@ -1,6 +1,7 @@ include_rules = [ "+google/protobuf", "+components/ukm", + "+content/public/test", "+content/public/browser", "+sandbox/policy", "+services/metrics/public/cpp",
diff --git a/components/services/screen_ai/proto/main_content_extractor_proto_convertor_unittest.cc b/components/services/screen_ai/proto/main_content_extractor_proto_convertor_unittest.cc index a7cd39c62..31c93d5 100644 --- a/components/services/screen_ai/proto/main_content_extractor_proto_convertor_unittest.cc +++ b/components/services/screen_ai/proto/main_content_extractor_proto_convertor_unittest.cc
@@ -10,8 +10,8 @@ #include "base/path_service.h" #include "base/strings/string_piece.h" #include "base/strings/stringprintf.h" -#include "components/services/screen_ai/proto/test_proto_loader.h" #include "components/services/screen_ai/proto/view_hierarchy.pb.h" +#include "content/public/test/test_proto_loader.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/abseil-cpp/absl/types/optional.h" #include "ui/accessibility/ax_node_data.h" @@ -225,6 +225,31 @@ } } +bool LoadTextProto(const base::FilePath& proto_file_path, + const char* proto_descriptor_relative_file_path, + google::protobuf::MessageLite& proto) { + std::string file_content; + if (!base::ReadFileToString(proto_file_path, &file_content)) { + LOG(ERROR) << "Failed to read expected proto from: " << proto_file_path; + return false; + } + + base::FilePath descriptor_full_path; + if (!base::PathService::Get(base::DIR_GEN_TEST_DATA_ROOT, + &descriptor_full_path)) { + LOG(ERROR) << "Generated test data root not found!"; + return false; + } + descriptor_full_path = + descriptor_full_path.AppendASCII(proto_descriptor_relative_file_path); + + content::TestProtoLoader loader; + std::string serialized_message; + loader.ParseFromText(descriptor_full_path, proto.GetTypeName(), file_content, + serialized_message); + return proto.ParseFromString(serialized_message); +} + } // namespace namespace screen_ai { @@ -326,7 +351,7 @@ // Load expected Proto. screenai::ViewHierarchy expected_view_hierarchy; - ASSERT_TRUE(test_proto_loader::TestProtoLoader::LoadTextProto( + ASSERT_TRUE(LoadTextProto( kExpectedProtoPath, "gen/components/services/screen_ai/proto/view_hierarchy.descriptor", expected_view_hierarchy));
diff --git a/components/services/screen_ai/proto/test_proto_loader.cc b/components/services/screen_ai/proto/test_proto_loader.cc deleted file mode 100644 index cea4104..0000000 --- a/components/services/screen_ai/proto/test_proto_loader.cc +++ /dev/null
@@ -1,103 +0,0 @@ -// Copyright 2022 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "components/services/screen_ai/proto/test_proto_loader.h" - -#include "base/files/file_util.h" -#include "base/logging.h" -#include "base/path_service.h" -#include "base/strings/string_split.h" -#include "google/protobuf/text_format.h" - -namespace test_proto_loader { - -const google::protobuf::Message* TestProtoLoader::GetPrototype( - base::FilePath descriptor_path, - std::string package, - std::string name) { - std::string file_contents; - - if (!base::ReadFileToString(descriptor_path, &file_contents)) { - LOG(ERROR) << "Couldn't load contents of " << descriptor_path; - return nullptr; - } - - if (!descriptor_set_.ParseFromString(file_contents)) { - LOG(ERROR) << "Couldn't parse descriptor from " << descriptor_path; - return nullptr; - } - - for (int file_i = 0; file_i < descriptor_set_.file_size(); ++file_i) { - const google::protobuf::FileDescriptorProto& file = - descriptor_set_.file(file_i); - if (file.package() != package) - continue; - const google::protobuf::FileDescriptor* descriptor = - descriptor_pool_.BuildFile(file); - for (int message_type_i = 0; - message_type_i < descriptor->message_type_count(); ++message_type_i) { - const google::protobuf::Descriptor* message_type = - descriptor->message_type(message_type_i); - if (message_type->name() != name) - continue; - return dynamic_message_factory_.GetPrototype(message_type); - } - } - LOG(ERROR) << "Couldn't find " << package << "." << name << "in " - << descriptor_path; - return nullptr; -} - -bool TestProtoLoader::ParseFromText( - const base::FilePath& descriptor_path, - const std::string& proto_text, - google::protobuf::MessageLite& destination) { - // Load the descriptors and find the one for |destination|. - std::string package, name; - std::vector<std::string> type_name_parts = - base::SplitString(destination.GetTypeName(), ".", base::KEEP_WHITESPACE, - base::SPLIT_WANT_ALL); - if (type_name_parts.size() != 2) - return false; - - const google::protobuf::Message* prototype = - GetPrototype(descriptor_path, /*package =*/type_name_parts[0], - /* name = */ type_name_parts[1]); - if (!prototype) - return false; - - // Parse the text using the descriptor-generated message and send it to - // |destination|. - std::unique_ptr<google::protobuf::Message> message(prototype->New()); - google::protobuf::TextFormat::ParseFromString(proto_text, message.get()); - destination.ParseFromString(message->SerializeAsString()); - - return true; -} - -// static -bool TestProtoLoader::LoadTextProto( - const base::FilePath& proto_file_path, - const char* proto_descriptor_relative_file_path, - google::protobuf::MessageLite& proto) { - std::string file_content; - if (!base::ReadFileToString(proto_file_path, &file_content)) { - LOG(ERROR) << "Failed to read expected proto from: " << proto_file_path; - return false; - } - - base::FilePath descriptor_full_path; - if (!base::PathService::Get(base::DIR_GEN_TEST_DATA_ROOT, - &descriptor_full_path)) { - LOG(ERROR) << "Generated test data root not found!"; - return false; - } - descriptor_full_path = - descriptor_full_path.AppendASCII(proto_descriptor_relative_file_path); - - test_proto_loader::TestProtoLoader loader; - return loader.ParseFromText(descriptor_full_path, file_content, proto); -} - -} // namespace test_proto_loader
diff --git a/components/services/screen_ai/proto/test_proto_loader.h b/components/services/screen_ai/proto/test_proto_loader.h deleted file mode 100644 index 6260d1b..0000000 --- a/components/services/screen_ai/proto/test_proto_loader.h +++ /dev/null
@@ -1,58 +0,0 @@ -// Copyright 2022 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef COMPONENTS_SERVICES_SCREEN_AI_PROTO_TEST_PROTO_LOADER_H_ -#define COMPONENTS_SERVICES_SCREEN_AI_PROTO_TEST_PROTO_LOADER_H_ - -#include "base/files/file_path.h" -#include "google/protobuf/descriptor.h" -#include "google/protobuf/descriptor.pb.h" -#include "google/protobuf/dynamic_message.h" -#include "google/protobuf/message.h" - -namespace test_proto_loader { - -// This class works around the fact that chrome only includes the lite runtime -// of protobufs. Lite protobufs inherit from |MessageLite| and cannot be used to -// parse from text format. Parsing from text -// format is useful in tests. We cannot include the full version of a protobuf -// in test code because it would clash with the lite version. -// -// This class uses the protobuf descriptors (generated at compile time) to -// to generate a |Message| that can be used to parse from text. This message -// can then serialize to binary which can be parsed by the |MessageLite|. -// -// If needed, we can move this class to a folder that would be available to -// other tests. -class TestProtoLoader { - public: - TestProtoLoader() = default; - ~TestProtoLoader() = default; - TestProtoLoader(const TestProtoLoader&) = delete; - TestProtoLoader& operator=(const TestProtoLoader&) = delete; - - bool ParseFromText(const base::FilePath& descriptor_path, - const std::string& proto_text, - google::protobuf::MessageLite& destination); - - // Loads a text proto file from |proto_file_path| into |proto|, where the - // descriptor of the proto exists in |proto_descriptor_relative_file_path|, - // relative to DIR_GEN_TEST_DATA_ROOT. - static bool LoadTextProto(const base::FilePath& proto_file_path, - const char* proto_descriptor_relative_file_path, - google::protobuf::MessageLite& proto); - - private: - const google::protobuf::Message* GetPrototype(base::FilePath descriptor_path, - std::string package, - std::string name); - - google::protobuf::DescriptorPool descriptor_pool_; - google::protobuf::FileDescriptorSet descriptor_set_; - google::protobuf::DynamicMessageFactory dynamic_message_factory_; -}; - -} // namespace test_proto_loader - -#endif // COMPONENTS_SERVICES_SCREEN_AI_PROTO_TEST_PROTO_LOADER_H_
diff --git a/components/stylus_handwriting/android/java/src/org/chromium/components/stylus_handwriting/AndroidStylusWritingHandler.java b/components/stylus_handwriting/android/java/src/org/chromium/components/stylus_handwriting/AndroidStylusWritingHandler.java index 499f724b5..889d52d 100644 --- a/components/stylus_handwriting/android/java/src/org/chromium/components/stylus_handwriting/AndroidStylusWritingHandler.java +++ b/components/stylus_handwriting/android/java/src/org/chromium/components/stylus_handwriting/AndroidStylusWritingHandler.java
@@ -20,7 +20,7 @@ import org.chromium.base.BuildInfo; import org.chromium.base.Log; import org.chromium.blink_public.common.BlinkFeatures; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.StylusWritingHandler; import org.chromium.content_public.browser.StylusWritingImeCallback; import org.chromium.content_public.browser.WebContents; @@ -140,7 +140,7 @@ public boolean didHandleCursorUpdate(View currentView) { if (sHandwritingPointerType == null) return false; // Enable this icon behind feature flag that shows hover Icon in expanded area of target. - if (!ContentFeatureList.isEnabled(BlinkFeatures.STYLUS_POINTER_ADJUSTMENT)) return false; + if (!ContentFeatureMap.isEnabled(BlinkFeatures.STYLUS_POINTER_ADJUSTMENT)) return false; currentView.setPointerIcon( PointerIcon.getSystemIcon(currentView.getContext(), sHandwritingPointerType));
diff --git a/components/subresource_filter/content/browser/subresource_filter_content_settings_manager.cc b/components/subresource_filter/content/browser/subresource_filter_content_settings_manager.cc index 842d140f..e7cdea6 100644 --- a/components/subresource_filter/content/browser/subresource_filter_content_settings_manager.cc +++ b/components/subresource_filter/content/browser/subresource_filter_content_settings_manager.cc
@@ -163,19 +163,23 @@ if (url.is_empty()) return; + content_settings::ContentSettingConstraints constraints; // Metadata expires after kMaxPersistMetadataDuration by default. If // kNonRenewingExpiryTime was previously set, then we are storing ads // intervention metadata and should not override the expiry time that // was previously set. - base::Time expiry_time = base::Time::Now() + kMaxPersistMetadataDuration; + base::TimeDelta setting_lifetime = kMaxPersistMetadataDuration; if (dict && dict->Find(kNonRenewingExpiryTime)) { + // TODO(https://crbug.com/1450356): we should store the lifetime of the + // permission, rather than just its expiration. absl::optional<double> metadata_expiry_time = dict->FindDouble(kNonRenewingExpiryTime); DCHECK(metadata_expiry_time); - expiry_time = base::Time::FromDoubleT(*metadata_expiry_time); + base::Time expiry_time = base::Time::FromDoubleT(*metadata_expiry_time); + setting_lifetime = constraints.DeltaFromCreationTime(expiry_time); } + constraints.set_lifetime(setting_lifetime); - content_settings::ContentSettingConstraints constraints = {expiry_time}; settings_map_->SetWebsiteSettingDefaultScope( url, GURL(), ContentSettingsType::ADS_DATA, dict ? base::Value(std::move(*dict)) : base::Value(), constraints);
diff --git a/components/test/data/viz/unit_tests_bundle_data.filelist b/components/test/data/viz/unit_tests_bundle_data.filelist index 30f7440..bda505f 100644 --- a/components/test/data/viz/unit_tests_bundle_data.filelist +++ b/components/test/data/viz/unit_tests_bundle_data.filelist
@@ -253,4 +253,3 @@ //components/test/data/viz/yuv_stripes_offset.png //components/test/data/viz/zoom_filter_skia_gl.png //components/test/data/viz/zoom_filter_skia_vk.png -//components/test/data/viz/zoom_filter_sw.png
diff --git a/components/test/data/viz/zoom_filter_skia_gl.png b/components/test/data/viz/zoom_filter_skia_gl.png index b3bd257..3703c25 100644 --- a/components/test/data/viz/zoom_filter_skia_gl.png +++ b/components/test/data/viz/zoom_filter_skia_gl.png Binary files differ
diff --git a/components/test/data/viz/zoom_filter_skia_vk.png b/components/test/data/viz/zoom_filter_skia_vk.png index e028589..b216796 100644 --- a/components/test/data/viz/zoom_filter_skia_vk.png +++ b/components/test/data/viz/zoom_filter_skia_vk.png Binary files differ
diff --git a/components/test/data/viz/zoom_filter_sw.png b/components/test/data/viz/zoom_filter_sw.png deleted file mode 100644 index 19a1e55..0000000 --- a/components/test/data/viz/zoom_filter_sw.png +++ /dev/null Binary files differ
diff --git a/components/vector_icons/BUILD.gn b/components/vector_icons/BUILD.gn index 10b446a..02d2799 100644 --- a/components/vector_icons/BUILD.gn +++ b/components/vector_icons/BUILD.gn
@@ -68,6 +68,7 @@ "document_scanner.icon", "dogfood.icon", "edit.icon", + "edit_chrome_refresh.icon", "email.icon", "email_outline.icon", "error.icon", @@ -101,6 +102,7 @@ "help.icon", "help_outline.icon", "history.icon", + "history_chrome_refresh.icon", "https_valid.icon", "https_valid_arrow.icon", "https_valid_chrome_refresh.icon",
diff --git a/components/vector_icons/edit_chrome_refresh.icon b/components/vector_icons/edit_chrome_refresh.icon new file mode 100644 index 0000000..33e99d6 --- /dev/null +++ b/components/vector_icons/edit_chrome_refresh.icon
@@ -0,0 +1,80 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +CANVAS_DIMENSIONS, 24, +MOVE_TO, 5.05f, 19.01f, +R_H_LINE_TO, 1.31f, +R_LINE_TO, 9.9f, -9.88f, +R_LINE_TO, -0.65f, -0.67f, +R_LINE_TO, -0.67f, -0.65f, +R_LINE_TO, -9.88f, 9.9f, +CLOSE, +R_MOVE_TO, -2.09f, 2.09f, +R_V_LINE_TO, -4.28f, +R_LINE_TO, 13.45f, -13.43f, +R_CUBIC_TO, 0.4f, -0.4f, 0.89f, -0.6f, 1.47f, -0.6f, +R_ARC_TO, 2.02f, 2.02f, 0, 0, 1, 1.49f, 0.61f, +R_LINE_TO, 1.32f, 1.33f, +R_CUBIC_TO, 0.42f, 0.41f, 0.62f, 0.89f, 0.61f, 1.47f, +R_ARC_TO, 2.02f, 2.02f, 0, 0, 1, -0.61f, 1.46f, +LINE_TO, 7.24f, 21.09f, +CLOSE, +MOVE_TO, 19.18f, 6.18f, +LINE_TO, 17.88f, 4.87f, +CLOSE, +R_MOVE_TO, -2.93f, 2.95f, +R_LINE_TO, -0.65f, -0.68f, +R_LINE_TO, -0.67f, -0.65f, +CLOSE + +CANVAS_DIMENSIONS, 20, +MOVE_TO, 4.54f, 15.5f, +R_H_LINE_TO, 0.8f, +R_LINE_TO, 7.75f, -7.75f, +R_LINE_TO, -0.4f, -0.4f, +R_LINE_TO, -0.39f, -0.39f, +R_LINE_TO, -7.76f, 7.75f, +CLOSE, +R_MOVE_TO, -1.57f, 1.57f, +R_V_LINE_TO, -3.02f, +LINE_TO, 13.67f, 3.34f, +R_ARC_TO, 1.08f, 1.08f, 0, 0, 1, 1.53f, 0, +R_LINE_TO, 1.52f, 1.5f, +R_CUBIC_TO, 0.22f, 0.22f, 0.33f, 0.47f, 0.32f, 0.77f, +R_CUBIC_TO, -0.01f, 0.3f, -0.11f, 0.55f, -0.32f, 0.76f, +LINE_TO, 5.99f, 17.08f, +CLOSE, +R_MOVE_TO, 12.48f, -11.7f, +R_LINE_TO, -0.8f, -0.78f, +CLOSE, +R_MOVE_TO, -2.36f, 2.38f, +R_LINE_TO, -0.4f, -0.4f, +R_LINE_TO, -0.39f, -0.39f, +CLOSE + +CANVAS_DIMENSIONS, 16, +MOVE_TO, 3.68f, 12.41f, +R_H_LINE_TO, 0.55f, +R_LINE_TO, 6.06f, -6.07f, +R_LINE_TO, -0.27f, -0.27f, +R_LINE_TO, -0.27f, -0.27f, +R_LINE_TO, -6.07f, 6.06f, +CLOSE, +R_MOVE_TO, -1.34f, 1.34f, +R_V_LINE_TO, -2.44f, +R_LINE_TO, 8.57f, -8.58f, +R_ARC_TO, 0.93f, 0.93f, 0, 0, 1, 0.65f, -0.28f, +R_ARC_TO, 0.97f, 0.97f, 0, 0, 1, 0.69f, 0.28f, +R_LINE_TO, 1.13f, 1.12f, +R_CUBIC_TO, 0.21f, 0.19f, 0.31f, 0.42f, 0.29f, 0.68f, +R_ARC_TO, 0.96f, 0.96f, 0, 0, 1, -0.29f, 0.66f, +LINE_TO, 4.78f, 13.75f, +CLOSE, +R_MOVE_TO, 9.96f, -9.42f, +R_LINE_TO, -0.55f, -0.54f, +CLOSE, +MOVE_TO, 10.29f, 6.34f, +R_LINE_TO, -0.27f, -0.27f, +R_LINE_TO, -0.27f, -0.27f, +CLOSE \ No newline at end of file
diff --git a/components/vector_icons/history_chrome_refresh.icon b/components/vector_icons/history_chrome_refresh.icon new file mode 100644 index 0000000..a8c3f090 --- /dev/null +++ b/components/vector_icons/history_chrome_refresh.icon
@@ -0,0 +1,43 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +CANVAS_DIMENSIONS, 16, +MOVE_TO, 7.99f, 13.73f, +R_CUBIC_TO, -1.59f, 0, -2.94f, -0.55f, -4.06f, -1.67f, +CUBIC_TO, 2.81f, 10.95f, 2.26f, 9.59f, 2.26f, 8, +H_LINE_TO, 3.64f, +R_CUBIC_TO, 0, 1.2f, 0.43f, 2.22f, 1.28f, 3.07f, +R_CUBIC_TO, 0.85f, 0.85f, 1.88f, 1.28f, 3.07f, 1.28f, +R_CUBIC_TO, 1.2f, 0, 2.22f, -0.43f, 3.07f, -1.28f, +R_CUBIC_TO, 0.85f, -0.85f, 1.28f, -1.87f, 1.28f, -3.07f, +R_CUBIC_TO, 0, -1.2f, -0.43f, -2.22f, -1.28f, -3.07f, +R_CUBIC_TO, -0.85f, -0.85f, -1.87f, -1.28f, -3.07f, -1.28f, +R_CUBIC_TO, -0.66f, 0, -1.26f, 0.13f, -1.83f, 0.39f, +ARC_TO, 4.16f, 4.16f, 0, 0, 0, 4.73f, 5.13f, +R_H_LINE_TO, 1.67f, +R_V_LINE_TO, 1.26f, +H_LINE_TO, 2.3f, +V_LINE_TO, 2.3f, +R_H_LINE_TO, 1.26f, +R_V_LINE_TO, 2.11f, +ARC_TO, 5.75f, 5.75f, 0, 0, 1, 5.49f, 2.84f, +R_ARC_TO, 5.51f, 5.51f, 0, 0, 1, 2.5f, -0.57f, +R_CUBIC_TO, 0.79f, 0, 1.54f, 0.15f, 2.24f, 0.45f, +R_CUBIC_TO, 0.7f, 0.3f, 1.3f, 0.71f, 1.82f, 1.23f, +R_ARC_TO, 5.77f, 5.77f, 0, 0, 1, 1.23f, 1.82f, +R_CUBIC_TO, 0.31f, 0.7f, 0.45f, 1.44f, 0.45f, 2.24f, +R_CUBIC_TO, 0, 0.79f, -0.15f, 1.54f, -0.45f, 2.23f, +R_CUBIC_TO, -0.3f, 0.7f, -0.71f, 1.31f, -1.23f, 1.82f, +R_ARC_TO, 5.77f, 5.77f, 0, 0, 1, -1.82f, 1.23f, +R_ARC_TO, 5.51f, 5.51f, 0, 0, 1, -2.23f, 0.45f, +CLOSE, +R_MOVE_TO, 1.65f, -3.44f, +LINE_TO, 7.37f, 8.03f, +R_V_LINE_TO, -3.23f, +R_H_LINE_TO, 1.26f, +V_LINE_TO, 7.5f, +R_LINE_TO, 1.9f, 1.9f, +CLOSE, +R_MOVE_TO, 0, 0, +CLOSE
diff --git a/components/viz/service/display/resolved_frame_data.cc b/components/viz/service/display/resolved_frame_data.cc index ae9097bf..5df64487 100644 --- a/components/viz/service/display/resolved_frame_data.cc +++ b/components/viz/service/display/resolved_frame_data.cc
@@ -46,13 +46,15 @@ *this = AggregationPassData(); } -ParentClipData::ParentClipData() = default; -ParentClipData::ParentClipData(ParentClipData&& other) = default; -ParentClipData& ParentClipData::operator=(ParentClipData& other) = default; -ParentClipData& ParentClipData::operator=(const ParentClipData& other) = +PersistentPassData::PersistentPassData() = default; +PersistentPassData::PersistentPassData(PersistentPassData&& other) = default; +PersistentPassData& PersistentPassData::operator=(PersistentPassData& other) = default; -ParentClipData& ParentClipData::operator=(ParentClipData&& other) = default; -ParentClipData::~ParentClipData() = default; +PersistentPassData& PersistentPassData::operator=( + const PersistentPassData& other) = default; +PersistentPassData& PersistentPassData::operator=(PersistentPassData&& other) = + default; +PersistentPassData::~PersistentPassData() = default; ResolvedPassData::ResolvedPassData(FixedPassData fixed_data) : fixed_(std::move(fixed_data)) {} @@ -61,9 +63,9 @@ ResolvedPassData& ResolvedPassData::operator=(ResolvedPassData&& other) = default; -void ResolvedPassData::CopyAndResetParentClipData() { - previous_parent_clip_data_ = current_parent_clip_data_; - current_parent_clip_data_ = ParentClipData(); +void ResolvedPassData::CopyAndResetPersistentPassData() { + previous_persistent_data_ = current_persistent_data_; + current_persistent_data_ = PersistentPassData(); } ResolvedFrameData::ResolvedFrameData(DisplayResourceProvider* resource_provider, @@ -212,8 +214,8 @@ frame_index_ = surface_->GetActiveFrameIndex(); DCHECK_NE(frame_index_, 0u); - // Get parent_clip_data from the previous frame to the current frame. - MoveParentClipDataFromPreviousFrame(previous_resolved_passes); + // Get persistent_data from the previous frame to the current frame. + MovePersistentPassDataFromPreviousFrame(previous_resolved_passes); previous_resolved_passes.clear(); // Clear id mappings that weren't used in this frame. @@ -249,7 +251,7 @@ // Reset aggregation scoped data. for (auto& resolved_pass : resolved_passes_) { resolved_pass.aggregation().Reset(); - resolved_pass.CopyAndResetParentClipData(); + resolved_pass.CopyAndResetPersistentPassData(); } previous_frame_index_ = frame_index_; @@ -324,16 +326,16 @@ surface_id_); } -void ResolvedFrameData::MoveParentClipDataFromPreviousFrame( +void ResolvedFrameData::MovePersistentPassDataFromPreviousFrame( const std::vector<ResolvedPassData>& previous_resolved_passes) { for (const auto& previous_resolved_pass : previous_resolved_passes) { auto render_pass_id = previous_resolved_pass.render_pass_id(); - // iter to |current_parent_clip_data_| + // iter to |current_persistent_data_| auto iter = render_pass_id_map_.find(render_pass_id); if (iter != render_pass_id_map_.end()) { - iter->second->previous_parent_clip_data() = - previous_resolved_pass.previous_parent_clip_data(); + iter->second->previous_persistent_data() = + previous_resolved_pass.previous_persistent_data(); } } }
diff --git a/components/viz/service/display/resolved_frame_data.h b/components/viz/service/display/resolved_frame_data.h index ca457476..874ee89 100644 --- a/components/viz/service/display/resolved_frame_data.h +++ b/components/viz/service/display/resolved_frame_data.h
@@ -110,13 +110,15 @@ bool will_draw = false; }; -struct ParentClipData { - ParentClipData(); - ParentClipData(ParentClipData&& other); - ParentClipData& operator=(ParentClipData& other); - ParentClipData& operator=(const ParentClipData& other); - ParentClipData& operator=(ParentClipData&& other); - ~ParentClipData(); +// Render pass data that must be recomputed each aggregation and needs to be +// persisted to next aggregation. +struct PersistentPassData { + PersistentPassData(); + PersistentPassData(PersistentPassData&& other); + PersistentPassData& operator=(PersistentPassData& other); + PersistentPassData& operator=(const PersistentPassData& other); + PersistentPassData& operator=(PersistentPassData&& other); + ~PersistentPassData(); enum MergeState { kInitState, kNotMerged, kAlwaysMerged, kSomeTimesMerged }; @@ -164,19 +166,19 @@ AggregationPassData& aggregation() { return aggregation_; } const AggregationPassData& aggregation() const { return aggregation_; } - ParentClipData& current_parent_clip_data() { - return current_parent_clip_data_; + PersistentPassData& current_persistent_data() { + return current_persistent_data_; } - ParentClipData& previous_parent_clip_data() { - return previous_parent_clip_data_; + PersistentPassData& previous_persistent_data() { + return previous_persistent_data_; } - const ParentClipData& previous_parent_clip_data() const { - return previous_parent_clip_data_; + const PersistentPassData& previous_persistent_data() const { + return previous_persistent_data_; } - void CopyAndResetParentClipData(); + void CopyAndResetPersistentPassData(); private: friend class ResolvedFrameData; @@ -187,8 +189,8 @@ // Data that will change each aggregation. AggregationPassData aggregation_; - ParentClipData current_parent_clip_data_; - ParentClipData previous_parent_clip_data_; + PersistentPassData current_persistent_data_; + PersistentPassData previous_persistent_data_; }; enum FrameDamageType { @@ -294,7 +296,7 @@ private: void RegisterWithResourceProvider(); - void MoveParentClipDataFromPreviousFrame( + void MovePersistentPassDataFromPreviousFrame( const std::vector<ResolvedPassData>& previoius_resolved_passes); const raw_ptr<DisplayResourceProvider> resource_provider_;
diff --git a/components/viz/service/display/skia_renderer.cc b/components/viz/service/display/skia_renderer.cc index 0ffd8f2..f5de5807 100644 --- a/components/viz/service/display/skia_renderer.cc +++ b/components/viz/service/display/skia_renderer.cc
@@ -2929,13 +2929,10 @@ local_matrix.setTranslate(quad->filters_origin.x(), quad->filters_origin.y()); local_matrix.postScale(quad->filters_scale.x(), quad->filters_scale.y()); - gfx::SizeF filter_size(quad->rect.width(), quad->rect.height()); - // Convert CC image filters into a SkImageFilter root node if (filters) { DCHECK(!filters->IsEmpty()); - auto paint_filter = - cc::RenderSurfaceFilters::BuildImageFilter(*filters, filter_size); + auto paint_filter = cc::RenderSurfaceFilters::BuildImageFilter(*filters); auto sk_filter = paint_filter ? paint_filter->cached_sk_filter_ : nullptr; if (sk_filter) { @@ -2991,25 +2988,26 @@ if (backdrop_filters) { DCHECK(!backdrop_filters->IsEmpty()); - // Must account for clipping that occurs for backdrop filters, since their - // input content has already been clipped to the output rect. - gfx::Rect device_rect = gfx::ToEnclosingRect(cc::MathUtil::MapClippedRect( - params->content_device_transform, gfx::RectF(quad->rect))); - gfx::Rect out_rect = MoveFromDrawToWindowSpace( - current_frame()->current_render_pass->output_rect); - out_rect.Intersect(device_rect); - gfx::Vector2dF offset = - (device_rect.top_right() - out_rect.top_right()) + - (device_rect.bottom_left() - out_rect.bottom_left()); + // quad->rect represents the layer's bounds *after* any display scale has + // been applied to it. The ZOOM FilterOperation uses the layer's bounds as + // its "lens" bounds. All image filters operate with a local matrix to + // match the display scale. We must undo the local matrix's effect on + // quad->rect to get the input bounds for ZOOM. Otherwise its lens would be + // doubly-scaled while none of the other filter operations would align. + SkMatrix inv_local_matrix; + if (local_matrix.invert(&inv_local_matrix)) { + SkIRect filter_rect = + inv_local_matrix.mapRect(gfx::RectToSkRect(quad->rect)).roundOut(); + auto bg_paint_filter = cc::RenderSurfaceFilters::BuildImageFilter( + *backdrop_filters, gfx::SkIRectToRect(filter_rect)); - auto bg_paint_filter = cc::RenderSurfaceFilters::BuildImageFilter( - *backdrop_filters, gfx::SizeF(out_rect.size()), offset); - auto sk_bg_filter = - bg_paint_filter ? bg_paint_filter->cached_sk_filter_ : nullptr; + auto sk_bg_filter = + bg_paint_filter ? bg_paint_filter->cached_sk_filter_ : nullptr; - if (sk_bg_filter) { - rpdq_params.backdrop_filter = - sk_bg_filter->makeWithLocalMatrix(local_matrix); + if (sk_bg_filter) { + rpdq_params.backdrop_filter = + sk_bg_filter->makeWithLocalMatrix(local_matrix); + } } }
diff --git a/components/viz/service/display/software_renderer.cc b/components/viz/service/display/software_renderer.cc index 67a5fb24..bf2f4220 100644 --- a/components/viz/service/display/software_renderer.cc +++ b/components/viz/service/display/software_renderer.cc
@@ -543,8 +543,7 @@ const cc::FilterOperations* filters = FiltersForPass(quad->render_pass_id); if (filters) { DCHECK(!filters->IsEmpty()); - auto paint_filter = cc::RenderSurfaceFilters::BuildImageFilter( - *filters, gfx::SizeF(source_bitmap.width(), source_bitmap.height())); + auto paint_filter = cc::RenderSurfaceFilters::BuildImageFilter(*filters); auto image_filter = paint_filter ? paint_filter->cached_sk_filter_ : nullptr; if (image_filter) { @@ -861,15 +860,13 @@ image_offset = filter_clip.origin(); } - gfx::Vector2dF clipping_offset = - (unclipped_rect.top_right() - backdrop_rect.top_right()) + - (unclipped_rect.bottom_left() - backdrop_rect.bottom_left()); - + // TODO (crbug.com/1451898): software_renderer doesn't apply backdrop filters + // correctly in the context of the ZOOM_FILTER operation (the lens bounds are + // not applied correctly). The ZOOM_FILTER is never used on platforms that + // use software_renderer, so skip calculating the filter bounds to pass + // to BuildImageFilter(). sk_sp<cc::PaintFilter> paint_filter = - cc::RenderSurfaceFilters::BuildImageFilter( - *backdrop_filters, - gfx::SizeF(backdrop_bitmap.width(), backdrop_bitmap.height()), - clipping_offset); + cc::RenderSurfaceFilters::BuildImageFilter(*backdrop_filters); if (!paint_filter) return nullptr; sk_sp<SkImageFilter> filter = paint_filter->cached_sk_filter_;
diff --git a/components/viz/service/display/surface_aggregator.cc b/components/viz/service/display/surface_aggregator.cc index c4a01de..ed9c32a4 100644 --- a/components/viz/service/display/surface_aggregator.cc +++ b/components/viz/service/display/surface_aggregator.cc
@@ -211,46 +211,46 @@ dest_render_pass); } -void UpdateParentClipDataMergeState(ResolvedPassData& resolved_pass, - AggregatedRenderPass* dest_pass, - bool is_merged_pass) { - auto& parent_clip_data = resolved_pass.current_parent_clip_data(); +void UpdatePersistentPassDataMergeState(ResolvedPassData& resolved_pass, + AggregatedRenderPass* dest_pass, + bool is_merged_pass) { + auto& persistent_data = resolved_pass.current_persistent_data(); - ParentClipData::MergeState merge_state = is_merged_pass - ? ParentClipData::kAlwaysMerged - : ParentClipData::kNotMerged; + PersistentPassData::MergeState merge_state = + is_merged_pass ? PersistentPassData::kAlwaysMerged + : PersistentPassData::kNotMerged; - if (parent_clip_data.merge_state == ParentClipData::kInitState) { + if (persistent_data.merge_state == PersistentPassData::kInitState) { // This is the first time it's embedded. - parent_clip_data.merge_state = merge_state; - } else if (parent_clip_data.merge_state != merge_state) { - parent_clip_data.merge_state = ParentClipData::kSomeTimesMerged; + persistent_data.merge_state = merge_state; + } else if (persistent_data.merge_state != merge_state) { + persistent_data.merge_state = PersistentPassData::kSomeTimesMerged; } } bool ChangeInMergeState(ResolvedPassData& resolved_pass) { - DCHECK(resolved_pass.current_parent_clip_data().merge_state != - ParentClipData::kInitState); + DCHECK(resolved_pass.current_persistent_data().merge_state != + PersistentPassData::kInitState); // If this is the first frame and previous_merge_state is empty, // this function will returns false. auto current_merge_state = - resolved_pass.current_parent_clip_data().merge_state; + resolved_pass.current_persistent_data().merge_state; auto previous_merge_state = - resolved_pass.previous_parent_clip_data().merge_state; + resolved_pass.previous_persistent_data().merge_state; // Check if this render pass is merged to its parent render pass in the // previous frame but is not in the current frame. bool change_in_merged_pass = - previous_merge_state == ParentClipData::kAlwaysMerged && - current_merge_state == ParentClipData::kNotMerged; + previous_merge_state == PersistentPassData::kAlwaysMerged && + current_merge_state == PersistentPassData::kNotMerged; // If it's embedded multiple times and some are merged while some are not, // just redraw the render pass. It's complicated to track individual change. change_in_merged_pass |= - resolved_pass.current_parent_clip_data().merge_state == - ParentClipData::kSomeTimesMerged || - resolved_pass.previous_parent_clip_data().merge_state == - ParentClipData::kSomeTimesMerged; + resolved_pass.current_persistent_data().merge_state == + PersistentPassData::kSomeTimesMerged || + resolved_pass.previous_persistent_data().merge_state == + PersistentPassData::kSomeTimesMerged; return change_in_merged_pass; } @@ -267,12 +267,12 @@ // Save the parent_clip_rect from the current frame. auto& current_parent_clip_rect = - resolved_pass.current_parent_clip_data().parent_clip_rect; + resolved_pass.current_persistent_data().parent_clip_rect; current_parent_clip_rect.Union(dest_root_target_clip_rect.value()); // Get the parent_clip_rect from the preious frame; auto& previous_parent_clip_rect = - resolved_pass.previous_parent_clip_data().parent_clip_rect; + resolved_pass.previous_persistent_data().parent_clip_rect; // If the parent clip rect expands, the new area of the render pass output // buffer has never been updated. Redraw is needed. @@ -884,12 +884,12 @@ added_clip_rect, ComputeDrawableRectForQuad(surface_quad), target_transform); } - // Update ParentClipData.merge_status of the root render pass of the current - // frame before making a call to AddSurfaceDamageToDamageList() where + // Update PersistentPassData.merge_status of the root render pass of the + // current frame before making a call to AddSurfaceDamageToDamageList() where // RenderPassNeedsFullDamage() is called and needs root pass |merge_state| // info. - UpdateParentClipDataMergeState(resolved_frame.GetRootRenderPassData(), - dest_pass, merge_pass); + UpdatePersistentPassDataMergeState(resolved_frame.GetRootRenderPassData(), + dest_pass, merge_pass); if (needs_surface_damage_rect_list_ && resolved_frame.WillDraw()) { AddSurfaceDamageToDamageList( @@ -935,8 +935,8 @@ source.cache_render_pass, resolved_pass.aggregation().has_damage, source.generate_mipmap); - UpdateParentClipDataMergeState(resolved_pass, copy_pass.get(), - /*is_merged_pass=*/false); + UpdatePersistentPassDataMergeState(resolved_pass, copy_pass.get(), + /*is_merged_pass=*/false); MoveMatchingRequests(source.id, ©_requests, ©_pass->copy_requests); @@ -960,7 +960,7 @@ auto& resolved_root_pass = resolved_frame.GetRootRenderPassData(); if (merge_pass) { - // UpdateParentClipDataMergeState() has been called earlier. + // UpdatePersistentPassDataMergeState() has been called earlier. CopyQuadsToPass(resolved_frame, resolved_root_pass, dest_pass, frame.device_scale_factor(), combined_transform, surface_quad_clip, dest_root_target_clip_rect, surface, @@ -1556,8 +1556,8 @@ source.has_transparent_background, source.cache_render_pass, resolved_pass.aggregation().has_damage, source.generate_mipmap); - UpdateParentClipDataMergeState(resolved_pass, copy_pass.get(), - /*is_merged_pass=*/false); + UpdatePersistentPassDataMergeState(resolved_pass, copy_pass.get(), + /*is_merged_pass=*/false); if (needs_surface_damage_rect_list_ && resolved_pass.is_root()) { AddSurfaceDamageToDamageList(
diff --git a/components/webapps/browser/features.cc b/components/webapps/browser/features.cc index 9f3bfa9..27a2124c 100644 --- a/components/webapps/browser/features.cc +++ b/components/webapps/browser/features.cc
@@ -97,8 +97,8 @@ extern const base::FeatureParam<int> kBannerParamsDaysAfterBannerIgnoredKey{ &kAppBannerTriggering, "days_after_ignore", kMinimumDaysBetweenBannerShows}; -BASE_FEATURE(kWebAppsMlUkmCollection, - "WebAppsMlUkmCollection", +BASE_FEATURE(kWebAppsEnableMLModelForPromotion, + "kWebAppsEnableMLModelForPromotion", base::FEATURE_DISABLED_BY_DEFAULT); } // namespace features
diff --git a/components/webapps/browser/features.h b/components/webapps/browser/features.h index 743171a..2819ebb 100644 --- a/components/webapps/browser/features.h +++ b/components/webapps/browser/features.h
@@ -51,7 +51,7 @@ extern const base::FeatureParam<int> kBannerParamsDaysAfterBannerDismissedKey; extern const base::FeatureParam<int> kBannerParamsDaysAfterBannerIgnoredKey; -BASE_DECLARE_FEATURE(kWebAppsMlUkmCollection); +BASE_DECLARE_FEATURE(kWebAppsEnableMLModelForPromotion); } // namespace features } // namespace webapps
diff --git a/components/webapps/browser/installable/installable_manager.cc b/components/webapps/browser/installable/installable_manager.cc index 9d2d120..9fd9505 100644 --- a/components/webapps/browser/installable/installable_manager.cc +++ b/components/webapps/browser/installable/installable_manager.cc
@@ -233,7 +233,8 @@ manifest_(std::make_unique<ManifestProperty>()), valid_manifest_(std::make_unique<ValidManifestProperty>()), worker_(std::make_unique<ServiceWorkerProperty>()), - service_worker_context_(nullptr) { + service_worker_context_(nullptr), + sequenced_task_runner_(base::SequencedTaskRunner::GetCurrentDefault()) { // This is null in unit tests. if (web_contents) { content::StoragePartition* storage_partition = @@ -331,6 +332,11 @@ base::BindOnce(OnDidCompleteGetPrimaryIcon, std::move(callback))); } +void InstallableManager::SetSequencedTaskRunnerForTesting( + scoped_refptr<base::SequencedTaskRunner> task_runner) { + sequenced_task_runner_ = task_runner; +} + InstallableManager::ManifestProperty::ManifestProperty() = default; InstallableManager::ManifestProperty::~ManifestProperty() = default; @@ -570,7 +576,7 @@ if ((!check_passed && !params.is_debug_mode) || IsComplete(params)) { // Yield the UI thread before processing the next task. If this object is // deleted in the meantime, the next task naturally won't run. - base::SequencedTaskRunner::GetCurrentDefault()->PostTask( + sequenced_task_runner_->PostTask( FROM_HERE, base::BindOnce(&InstallableManager::CleanupAndStartNextTask, weak_factory_.GetWeakPtr()));
diff --git a/components/webapps/browser/installable/installable_manager.h b/components/webapps/browser/installable/installable_manager.h index 2eacff5..acc1825e 100644 --- a/components/webapps/browser/installable/installable_manager.h +++ b/components/webapps/browser/installable/installable_manager.h
@@ -13,7 +13,9 @@ #include "base/functional/callback_forward.h" #include "base/gtest_prod_util.h" #include "base/memory/raw_ptr.h" +#include "base/memory/scoped_refptr.h" #include "base/memory/weak_ptr.h" +#include "base/task/sequenced_task_runner.h" #include "base/time/time.h" #include "build/build_config.h" #include "components/webapps/browser/installable/installable_data.h" @@ -87,6 +89,9 @@ void GetPrimaryIcon( base::OnceCallback<void(const SkBitmap* primaryIcon)> callback); + void SetSequencedTaskRunnerForTesting( + scoped_refptr<base::SequencedTaskRunner> task_runner); + protected: // For mocking in tests. virtual void OnWaitingForServiceWorker() {} @@ -294,6 +299,7 @@ // Owned by the storage partition attached to the content::WebContents which // this object is scoped to. raw_ptr<content::ServiceWorkerContext> service_worker_context_; + scoped_refptr<base::SequencedTaskRunner> sequenced_task_runner_; base::WeakPtrFactory<InstallableManager> weak_factory_{this};
diff --git a/components/webapps/browser/installable/installable_manager_unittest.cc b/components/webapps/browser/installable/installable_manager_unittest.cc index 756e446..f40ae53 100644 --- a/components/webapps/browser/installable/installable_manager_unittest.cc +++ b/components/webapps/browser/installable/installable_manager_unittest.cc
@@ -7,6 +7,7 @@ #include "base/feature_list.h" #include "base/strings/utf_string_conversions.h" #include "base/test/scoped_feature_list.h" +#include "base/test/task_environment.h" #include "build/build_config.h" #include "content/public/common/content_features.h" #include "testing/gtest/include/gtest/gtest.h" @@ -56,6 +57,7 @@ } private: + base::test::SingleThreadTaskEnvironment task_environment; std::unique_ptr<InstallableManager> manager_; };
diff --git a/components/webapps/browser/installable/metrics/site_quality_metrics_task.cc b/components/webapps/browser/installable/metrics/site_quality_metrics_task.cc index 98f072f7..a6de479 100644 --- a/components/webapps/browser/installable/metrics/site_quality_metrics_task.cc +++ b/components/webapps/browser/installable/metrics/site_quality_metrics_task.cc
@@ -18,6 +18,7 @@ #include "content/public/browser/web_contents.h" #include "content/public/browser/web_contents_observer.h" #include "storage/browser/quota/quota_manager.h" +#include "storage/browser/quota/quota_manager_impl.h" #include "storage/browser/quota/quota_manager_proxy.h" #include "third_party/blink/public/mojom/quota/quota_types.mojom.h" #include "url/origin.h" @@ -68,12 +69,15 @@ // Quota. CHECK(storage_partition_->GetQuotaManager()); - storage_partition_->GetQuotaManager()->proxy()->GetUsageAndQuotaWithBreakdown( - storage_key, blink::mojom::StorageType::kTemporary, - base::SequencedTaskRunner::GetCurrentDefault(), - base::BindOnce(&SiteQualityMetricsTask::OnQuotaRetrieved, - weak_factory_.GetWeakPtr()) - .Then(barrier)); + + storage_partition_->GetQuotaManager() + ->proxy() + ->GetStorageKeyUsageWithBreakdown( + storage_key, blink::mojom::StorageType::kTemporary, + base::SequencedTaskRunner::GetCurrentDefault(), + base::BindOnce(&SiteQualityMetricsTask::OnQuotaUsageRetrieved, + weak_factory_.GetWeakPtr()) + .Then(barrier)); // Service worker. service_worker_context_->CheckHasServiceWorker( @@ -83,13 +87,10 @@ .Then(barrier)); } -void SiteQualityMetricsTask::OnQuotaRetrieved( - blink::mojom::QuotaStatusCode code, +void SiteQualityMetricsTask::OnQuotaUsageRetrieved( int64_t usage, - int64_t quota, blink::mojom::UsageBreakdownPtr usage_breakdown) { - if (code != blink::mojom::QuotaStatusCode::kOk) { - // Sizes are left as 0 if there is an error returning quota stats. + if (!usage_breakdown) { return; }
diff --git a/components/webapps/browser/installable/metrics/site_quality_metrics_task.h b/components/webapps/browser/installable/metrics/site_quality_metrics_task.h index c660d77..53a41a5 100644 --- a/components/webapps/browser/installable/metrics/site_quality_metrics_task.h +++ b/components/webapps/browser/installable/metrics/site_quality_metrics_task.h
@@ -85,11 +85,8 @@ ResultCallback on_complete); void Start(); - - void OnQuotaRetrieved(blink::mojom::QuotaStatusCode, - int64_t usage, - int64_t quota, - blink::mojom::UsageBreakdownPtr usage_breakdown); + void OnQuotaUsageRetrieved(int64_t usage, + blink::mojom::UsageBreakdownPtr usage_breakdown); void OnDidCheckHasServiceWorker(content::ServiceWorkerCapability capability);
diff --git a/components/webapps/browser/installable/ml_installability_promoter.cc b/components/webapps/browser/installable/ml_installability_promoter.cc index fe398a3..f370811b 100644 --- a/components/webapps/browser/installable/ml_installability_promoter.cc +++ b/components/webapps/browser/installable/ml_installability_promoter.cc
@@ -63,10 +63,6 @@ } void MLInstallabilityPromoter::StartGatheringMetricsForSiteUrl() { - if (!base::FeatureList::IsEnabled(features::kWebAppsMlUkmCollection)) { - return; - } - CHECK(web_contents()); const GURL& site_url = web_contents()->GetLastCommittedURL(); @@ -241,8 +237,18 @@ manifest_builder.Record(ukm_recorder->Get()); state_ = MLPipelineState::kUKMCollectionComplete; + TriggerMLModel(); +} + +void MLInstallabilityPromoter::TriggerMLModel() { // TODO(b/283998203): Trigger the ML Model to start generating // insights based on the UKMs. + CHECK_EQ(state_, MLPipelineState::kUKMCollectionComplete); + + if (!base::FeatureList::IsEnabled( + features::kWebAppsEnableMLModelForPromotion)) { + return; + } } void MLInstallabilityPromoter::DidFinishNavigation(
diff --git a/components/webapps/browser/installable/ml_installability_promoter.h b/components/webapps/browser/installable/ml_installability_promoter.h index 66a054b..5eb43f3 100644 --- a/components/webapps/browser/installable/ml_installability_promoter.h +++ b/components/webapps/browser/installable/ml_installability_promoter.h
@@ -87,6 +87,7 @@ // contents changes to be properly measured. void MaybeCompleteMetricsCollection(); void EmitUKMs(); + void TriggerMLModel(); // contents::WebContentsObserver overrides void DidFinishNavigation(content::NavigationHandle* handle) override;
diff --git a/components/webauthn/android/java/src/org/chromium/components/webauthn/Fido2CredentialRequest.java b/components/webauthn/android/java/src/org/chromium/components/webauthn/Fido2CredentialRequest.java index 6a986e19..e4a7962 100644 --- a/components/webauthn/android/java/src/org/chromium/components/webauthn/Fido2CredentialRequest.java +++ b/components/webauthn/android/java/src/org/chromium/components/webauthn/Fido2CredentialRequest.java
@@ -46,7 +46,7 @@ import org.chromium.components.version_info.VersionInfo; import org.chromium.content_public.browser.ClientDataJson; import org.chromium.content_public.browser.ClientDataRequestType; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.RenderFrameHost; import org.chromium.content_public.browser.RenderFrameHost.WebAuthSecurityChecksResults; import org.chromium.content_public.browser.WebAuthenticationDelegate; @@ -287,7 +287,7 @@ } if (options.isConditional - || (ContentFeatureList.isEnabled( + || (ContentFeatureMap.isEnabled( ContentFeatures.WEB_AUTHN_TOUCH_TO_FILL_CREDENTIAL_SELECTION) && !hasAllowCredentials)) { // For use in the lambda expression.
diff --git a/content/browser/tracing/background_tracing_config_unittest.cc b/content/browser/tracing/background_tracing_config_unittest.cc index 40cbf4e..efdafa1 100644 --- a/content/browser/tracing/background_tracing_config_unittest.cc +++ b/content/browser/tracing/background_tracing_config_unittest.cc
@@ -4,17 +4,24 @@ #include <memory> +#include "base/base_paths.h" #include "base/json/json_reader.h" #include "base/json/json_writer.h" +#include "base/path_service.h" +#include "base/run_loop.h" #include "base/system/sys_info.h" +#include "base/test/bind.h" +#include "base/time/time.h" #include "base/values.h" #include "build/build_config.h" #include "content/browser/tracing/background_tracing_config_impl.h" #include "content/browser/tracing/background_tracing_rule.h" #include "content/public/test/browser_task_environment.h" +#include "content/public/test/test_proto_loader.h" #include "net/base/network_change_notifier.h" #include "testing/gtest/include/gtest/gtest.h" #include "third_party/abseil-cpp/absl/types/optional.h" +#include "third_party/perfetto/protos/perfetto/config/chrome/scenario_config.gen.h" namespace content { @@ -29,6 +36,27 @@ ConnectionType type_; }; +base::FilePath GetTestDataRoot() { + base::FilePath test_data_root = + base::PathService::CheckedGet(base::DIR_GEN_TEST_DATA_ROOT); +#if !BUILDFLAG(IS_FUCHSIA) + test_data_root = test_data_root.Append(FILE_PATH_LITERAL("gen")); +#endif // !BUILDFLAG(IS_FUCHSIA) + return test_data_root; +} + +void CreateRuleConfig(const std::string& proto_text, + perfetto::protos::gen::TriggerRule& destination) { + content::TestProtoLoader loader; + std::string serialized_message; + loader.ParseFromText(GetTestDataRoot().Append(FILE_PATH_LITERAL( + "third_party/perfetto/protos/perfetto/" + "config/chrome/scenario_config.descriptor")), + "perfetto.protos.TriggerRule", proto_text, + serialized_message); + ASSERT_TRUE(destination.ParseFromString(serialized_message)); +} + } // namespace class BackgroundTracingConfigTest : public testing::Test { @@ -36,7 +64,8 @@ BackgroundTracingConfigTest() = default; protected: - BrowserTaskEnvironment task_environment_; + BrowserTaskEnvironment task_environment_{ + base::test::TaskEnvironment::TimeSource::MOCK_TIME}; }; std::unique_ptr<BackgroundTracingConfigImpl> ReadFromJSONString( @@ -600,4 +629,74 @@ EXPECT_EQ(500u, config->GetTraceUploadLimitKb()); } +TEST_F(BackgroundTracingConfigTest, HistogramRuleFromValidProto) { + perfetto::protos::gen::TriggerRule config; + CreateRuleConfig( + R"pb( + name: "test_rule" + trigger_chance: 0.5 + delay_ms: 500 + histogram: { histogram_name: "foo" min_value: 1 max_value: 2 } + )pb", + config); + auto rule = BackgroundTracingRule::Create(config); + auto result = rule->ToProtoForTesting(); + EXPECT_EQ("test_rule", result.name()); + EXPECT_EQ(0.5, result.trigger_chance()); + EXPECT_EQ(500U, result.delay_ms()); + EXPECT_TRUE(result.has_histogram()); + EXPECT_EQ("foo", result.histogram().histogram_name()); + EXPECT_EQ(1, result.histogram().min_value()); + EXPECT_EQ(2, result.histogram().max_value()); +} + +TEST_F(BackgroundTracingConfigTest, NamedRuleFromValidProto) { + perfetto::protos::gen::TriggerRule config; + CreateRuleConfig(R"pb( + name: "test_rule" + trigger_chance: 0.5 + delay_ms: 500 + manual_trigger_name: "test_trigger" + )pb", + config); + auto rule = BackgroundTracingRule::Create(config); + auto result = rule->ToProtoForTesting(); + EXPECT_EQ("test_rule", result.name()); + EXPECT_EQ(0.5, result.trigger_chance()); + EXPECT_EQ(500U, result.delay_ms()); + EXPECT_EQ("test_trigger", result.manual_trigger_name()); +} + +TEST_F(BackgroundTracingConfigTest, TimerRuleFromValidProto) { + perfetto::protos::gen::TriggerRule config; + CreateRuleConfig(R"pb( + name: "test_rule" trigger_chance: 0.5 delay_ms: 500 + )pb", + config); + auto rule = BackgroundTracingRule::Create(config); + auto result = rule->ToProtoForTesting(); + EXPECT_EQ("test_rule", result.name()); + EXPECT_EQ(0.5, result.trigger_chance()); + EXPECT_EQ(500U, result.delay_ms()); +} + +TEST_F(BackgroundTracingConfigTest, TimerRuleTriggersAfterDelay) { + perfetto::protos::gen::TriggerRule config; + CreateRuleConfig(R"pb( + name: "test_rule" delay_ms: 10000 + )pb", + config); + + base::TimeTicks start = base::TimeTicks::Now(); + auto rule = BackgroundTracingRule::Create(config); + base::RunLoop run_loop; + rule->Install(base::BindLambdaForTesting([&](const BackgroundTracingRule*) { + run_loop.Quit(); + return true; + })); + run_loop.Run(); + DCHECK_GE(base::TimeTicks::Now(), start + base::Milliseconds(10000)); + rule->Uninstall(); +} + } // namespace content
diff --git a/content/browser/tracing/background_tracing_rule.cc b/content/browser/tracing/background_tracing_rule.cc index e310063..12362719 100644 --- a/content/browser/tracing/background_tracing_rule.cc +++ b/content/browser/tracing/background_tracing_rule.cc
@@ -55,8 +55,6 @@ namespace content { BackgroundTracingRule::BackgroundTracingRule() = default; -BackgroundTracingRule::BackgroundTracingRule(base::TimeDelta trigger_delay) - : trigger_delay_(trigger_delay) {} BackgroundTracingRule::~BackgroundTracingRule() { DCHECK(!installed()); @@ -74,11 +72,12 @@ return; } installed_ = false; + timer_.Stop(); trigger_callback_.Reset(); DoUninstall(); } -bool BackgroundTracingRule::OnRuleTriggered() const { +bool BackgroundTracingRule::OnRuleTriggered() { if (!installed()) { return false; } @@ -86,7 +85,14 @@ if (trigger_chance_ < 1.0 && base::RandDouble() > trigger_chance_) { return false; } - return trigger_callback_.Run(this); + if (!delay_.is_zero()) { + timer_.Start(FROM_HERE, delay_, + base::BindOnce(base::IgnoreResult(trigger_callback_), + base::Unretained(this))); + return true; + } else { + return trigger_callback_.Run(this); + } } base::TimeDelta BackgroundTracingRule::GetTraceDelay() const { @@ -119,6 +125,22 @@ return dict; } +perfetto::protos::gen::TriggerRule BackgroundTracingRule::ToProtoForTesting() + const { + perfetto::protos::gen::TriggerRule config; + if (trigger_chance_ < 1.0) { + config.set_trigger_chance(trigger_chance_); + } + + if (!delay_.is_zero()) { + config.set_delay_ms(delay_.InMilliseconds()); + } + + config.set_name(rule_id_); + + return config; +} + void BackgroundTracingRule::GenerateMetadataProto( BackgroundTracingRule::MetadataProto* out) const { uint32_t name_hash = variations::HashName(rule_id()); @@ -142,6 +164,21 @@ } } +void BackgroundTracingRule::Setup( + const perfetto::protos::gen::TriggerRule& config) { + if (config.has_trigger_chance()) { + trigger_chance_ = config.trigger_chance(); + } + if (config.has_delay_ms()) { + delay_ = base::Milliseconds(config.delay_ms()); + } + if (config.has_name()) { + rule_id_ = config.name(); + } else { + rule_id_ = GetDefaultRuleId(); + } +} + namespace { class NamedTriggerRule : public BackgroundTracingRule { @@ -160,6 +197,15 @@ return nullptr; } + static std::unique_ptr<BackgroundTracingRule> Create( + const perfetto::protos::gen::TriggerRule& config) { + if (config.has_manual_trigger_name()) { + return base::WrapUnique<BackgroundTracingRule>( + new NamedTriggerRule(config.manual_trigger_name())); + } + return nullptr; + } + void DoInstall() override { BackgroundTracingManagerImpl::GetInstance().SetNamedTriggerCallback( named_event_, base::BindRepeating(&NamedTriggerRule::OnRuleTriggered, @@ -178,6 +224,13 @@ return dict; } + perfetto::protos::gen::TriggerRule ToProtoForTesting() const override { + perfetto::protos::gen::TriggerRule config = + BackgroundTracingRule::ToProtoForTesting(); + config.set_manual_trigger_name(named_event_); + return config; + } + void GenerateMetadataProto( BackgroundTracingRule::MetadataProto* out) const override { DCHECK(out); @@ -248,6 +301,27 @@ return rule; } + static std::unique_ptr<BackgroundTracingRule> Create( + const perfetto::protos::gen::TriggerRule& config) { + DCHECK(config.has_histogram()); + + if (!config.histogram().has_histogram_name() || + !config.histogram().has_min_value()) { + return nullptr; + } + int histogram_lower_value = config.histogram().min_value(); + int histogram_upper_value = std::numeric_limits<int>::max(); + if (config.histogram().has_max_value()) { + histogram_upper_value = config.histogram().max_value(); + } + if (histogram_lower_value > histogram_upper_value) { + return nullptr; + } + + return base::WrapUnique( + new HistogramRule(config.histogram().histogram_name(), + histogram_lower_value, histogram_upper_value)); + } ~HistogramRule() override = default; @@ -280,6 +354,16 @@ return dict; } + perfetto::protos::gen::TriggerRule ToProtoForTesting() const override { + perfetto::protos::gen::TriggerRule config = + BackgroundTracingRule::ToProtoForTesting(); + auto* histogram = config.mutable_histogram(); + histogram->set_histogram_name(histogram_name_); + histogram->set_min_value(histogram_lower_value_); + histogram->set_max_value(histogram_upper_value_); + return config; + } + void GenerateMetadataProto( BackgroundTracingRule::MetadataProto* out) const override { DCHECK(out); @@ -344,6 +428,32 @@ histogram_sample_callback_; }; +class TimerRule : public BackgroundTracingRule { + private: + explicit TimerRule() = default; + + public: + static std::unique_ptr<BackgroundTracingRule> Create( + const perfetto::protos::gen::TriggerRule& config) { + return base::WrapUnique<TimerRule>(new TimerRule()); + } + + void DoInstall() override { OnRuleTriggered(); } + void DoUninstall() override {} + + void GenerateMetadataProto( + BackgroundTracingRule::MetadataProto* out) const override { + DCHECK(out); + BackgroundTracingRule::GenerateMetadataProto(out); + out->set_trigger_type(MetadataProto::TRIGGER_UNSPECIFIED); + } + + protected: + std::string GetDefaultRuleId() const override { + return "org.chromium.background_tracing.timer"; + } +}; + } // namespace std::unique_ptr<BackgroundTracingRule> @@ -364,4 +474,20 @@ return tracing_rule; } +std::unique_ptr<BackgroundTracingRule> BackgroundTracingRule::Create( + const perfetto::protos::gen::TriggerRule& config) { + std::unique_ptr<BackgroundTracingRule> tracing_rule; + if (config.has_manual_trigger_name()) { + tracing_rule = NamedTriggerRule::Create(config); + } else if (config.has_histogram()) { + tracing_rule = HistogramRule::Create(config); + } else { + tracing_rule = TimerRule::Create(config); + } + if (tracing_rule) { + tracing_rule->Setup(config); + } + return tracing_rule; +} + } // namespace content
diff --git a/content/browser/tracing/background_tracing_rule.h b/content/browser/tracing/background_tracing_rule.h index 5637b7f..1484109 100644 --- a/content/browser/tracing/background_tracing_rule.h +++ b/content/browser/tracing/background_tracing_rule.h
@@ -7,13 +7,16 @@ #include <memory> +#include "base/timer/timer.h" #include "base/values.h" #include "content/browser/tracing/background_tracing_config_impl.h" +#include "content/common/content_export.h" +#include "third_party/perfetto/protos/perfetto/config/chrome/scenario_config.gen.h" #include "third_party/perfetto/protos/perfetto/trace/chrome/chrome_metadata.pbzero.h" namespace content { -class BackgroundTracingRule { +class CONTENT_EXPORT BackgroundTracingRule { public: using MetadataProto = perfetto::protos::pbzero::BackgroundTracingMetadata::TriggerRule; @@ -23,7 +26,6 @@ base::RepeatingCallback<bool(const BackgroundTracingRule*)>; BackgroundTracingRule(); - explicit BackgroundTracingRule(base::TimeDelta trigger_delay); BackgroundTracingRule(const BackgroundTracingRule&) = delete; BackgroundTracingRule& operator=(const BackgroundTracingRule&) = delete; @@ -33,6 +35,7 @@ virtual void Install(RuleTriggeredCallback); virtual void Uninstall(); virtual base::Value::Dict ToDict() const; + virtual perfetto::protos::gen::TriggerRule ToProtoForTesting() const; virtual void GenerateMetadataProto(MetadataProto* out) const; // Seconds from the rule is triggered to finalization should start. @@ -40,10 +43,14 @@ // Probability that we should allow a tigger to happen. double trigger_chance() const { return trigger_chance_; } + base::TimeDelta delay() const { return delay_; } static std::unique_ptr<BackgroundTracingRule> CreateRuleFromDict( const base::Value::Dict& dict); + static std::unique_ptr<BackgroundTracingRule> Create( + const perfetto::protos::gen::TriggerRule& config); + const std::string& rule_id() const { return rule_id_; } bool is_crash() const { return is_crash_; } @@ -53,17 +60,20 @@ virtual void DoInstall() = 0; virtual void DoUninstall() = 0; - bool OnRuleTriggered() const; + bool OnRuleTriggered(); bool installed() const { return installed_; } private: void Setup(const base::Value::Dict& dict); + void Setup(const perfetto::protos::gen::TriggerRule& config); RuleTriggeredCallback trigger_callback_; bool installed_ = false; double trigger_chance_ = 1.0; base::TimeDelta trigger_delay_; + base::TimeDelta delay_; + base::OneShotTimer timer_; std::string rule_id_; bool is_crash_ = false; };
diff --git a/content/browser/webid/federated_auth_request_impl.cc b/content/browser/webid/federated_auth_request_impl.cc index 8fe050d..ebd8f19 100644 --- a/content/browser/webid/federated_auth_request_impl.cc +++ b/content/browser/webid/federated_auth_request_impl.cc
@@ -677,10 +677,12 @@ // IDP use case. bool has_failing_idp_signin_status = webid::ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( - idp_ptr->get_federated()->config_url, permission_delegate_); + render_frame_host(), idp_ptr->get_federated()->config_url, + permission_delegate_); if (has_failing_idp_signin_status && - GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::ENABLED) { + webid::GetIdpSigninStatusMode(render_frame_host()) == + FedCmIdpSigninStatusMode::ENABLED) { CompleteRequestWithError(FederatedAuthRequestResult::kError, TokenStatus::kNotSignedInWithIdp, /*should_delay_callback=*/true); @@ -969,9 +971,11 @@ // false during the API call. e.g. by the login/logout HEADER. idp_info->has_failing_idp_signin_status = webid::ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( - identity_provider_config_url, permission_delegate_); + render_frame_host(), identity_provider_config_url, + permission_delegate_); if (idp_info->has_failing_idp_signin_status && - GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::ENABLED) { + webid::GetIdpSigninStatusMode(render_frame_host()) == + FedCmIdpSigninStatusMode::ENABLED) { // Do not send metrics for IDP where the user is not signed-in in order // to prevent IDP from using the user IP to make a probabilistic model // of which websites a user visits. @@ -1259,7 +1263,9 @@ absl::optional<bool> old_idp_signin_status, blink::mojom::FederatedAuthRequestResult result, absl::optional<TokenStatus> token_status) { - if (GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::DISABLED) { + FedCmIdpSigninStatusMode signin_status_mode = + webid::GetIdpSigninStatusMode(render_frame_host()); + if (signin_status_mode == FedCmIdpSigninStatusMode::DISABLED) { OnFetchDataForIdpFailed(std::move(idp_info), result, token_status, /*should_delay_callback=*/true); return; @@ -1268,7 +1274,7 @@ url::Origin idp_origin = url::Origin::Create(idp_info->provider->config_url); if (!old_idp_signin_status.has_value() || - GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::METRICS_ONLY) { + signin_status_mode == FedCmIdpSigninStatusMode::METRICS_ONLY) { OnFetchDataForIdpFailed(std::move(idp_info), result, token_status, /*should_delay_callback=*/true); return; @@ -1348,8 +1354,9 @@ permission_delegate_->GetIdpSigninStatus( url::Origin::Create(idp_config_url)); webid::UpdateIdpSigninStatusForAccountsEndpointResponse( - idp_config_url, status, idp_info->has_failing_idp_signin_status, - permission_delegate_, fedcm_metrics_.get()); + render_frame_host(), idp_config_url, status, + idp_info->has_failing_idp_signin_status, permission_delegate_, + fedcm_metrics_.get()); constexpr char kAccountsUrl[] = "accounts endpoint"; switch (status.parse_status) {
diff --git a/content/browser/webid/federated_auth_user_info_request.cc b/content/browser/webid/federated_auth_user_info_request.cc index 12226945..d87dfa7b 100644 --- a/content/browser/webid/federated_auth_user_info_request.cc +++ b/content/browser/webid/federated_auth_user_info_request.cc
@@ -145,8 +145,9 @@ } if (webid::ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( - idp_config_url_, permission_delegate_) && - GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::ENABLED) { + *render_frame_host_, idp_config_url_, permission_delegate_) && + webid::GetIdpSigninStatusMode(*render_frame_host_) == + FedCmIdpSigninStatusMode::ENABLED) { CompleteWithError(FederatedAuthUserInfoRequestResult::kNotSignedInWithIdp); return; } @@ -195,9 +196,10 @@ // false during the API call. e.g. by the login/logout HEADER. does_idp_have_failing_signin_status_ = webid::ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( - idp_config_url_, permission_delegate_); + *render_frame_host_, idp_config_url_, permission_delegate_); if (does_idp_have_failing_signin_status_ && - GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::ENABLED) { + webid::GetIdpSigninStatusMode(*render_frame_host_) == + FedCmIdpSigninStatusMode::ENABLED) { CompleteWithError(FederatedAuthUserInfoRequestResult::kNotSignedInWithIdp); return; } @@ -212,8 +214,8 @@ IdpNetworkRequestManager::FetchStatus fetch_status, IdpNetworkRequestManager::AccountList accounts) { webid::UpdateIdpSigninStatusForAccountsEndpointResponse( - idp_config_url_, fetch_status, does_idp_have_failing_signin_status_, - permission_delegate_, metrics_); + *render_frame_host_, idp_config_url_, fetch_status, + does_idp_have_failing_signin_status_, permission_delegate_, metrics_); if (fetch_status.parse_status != IdpNetworkRequestManager::ParseStatus::kSuccess) {
diff --git a/content/browser/webid/flags.cc b/content/browser/webid/flags.cc index c7d1101e..7bb12d1 100644 --- a/content/browser/webid/flags.cc +++ b/content/browser/webid/flags.cc
@@ -29,7 +29,7 @@ features::kFedCmMultipleIdentityProviders); } -FedCmIdpSigninStatusMode GetFedCmIdpSigninStatusMode() { +FedCmIdpSigninStatusMode GetFedCmIdpSigninStatusFlag() { if (GetFieldTrialParamByFeatureAsBool( features::kFedCm, features::kFedCmIdpSigninStatusFieldTrialParamName, false)) {
diff --git a/content/browser/webid/flags.h b/content/browser/webid/flags.h index ee87494..ac11c21 100644 --- a/content/browser/webid/flags.h +++ b/content/browser/webid/flags.h
@@ -25,7 +25,9 @@ bool IsFedCmMultipleIdentityProvidersEnabled(); // Returns the IdpSigninStatus API mode. -FedCmIdpSigninStatusMode GetFedCmIdpSigninStatusMode(); +// Most callers should use webid::GetIdpSigninStatusMode() in webid_utils.h +// instead, as that version takes origin trial status into account. +FedCmIdpSigninStatusMode GetFedCmIdpSigninStatusFlag(); // Whether metrics endpoint is enabled. bool IsFedCmMetricsEndpointEnabled();
diff --git a/content/browser/webid/webid_utils.cc b/content/browser/webid/webid_utils.cc index 9252c18..68aed75 100644 --- a/content/browser/webid/webid_utils.cc +++ b/content/browser/webid/webid_utils.cc
@@ -5,6 +5,7 @@ #include "content/browser/webid/webid_utils.h" #include "base/strings/stringprintf.h" +#include "content/browser/runtime_feature_state/runtime_feature_state_document_data.h" #include "content/browser/webid/fedcm_metrics.h" #include "content/browser/webid/flags.h" #include "content/public/browser/browser_context.h" @@ -59,9 +60,11 @@ } bool ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( + RenderFrameHost& host, const GURL& identity_provider_config_url, FederatedIdentityPermissionContextDelegate* permission_delegate) { - if (GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::DISABLED) { + if (webid::GetIdpSigninStatusMode(host) == + FedCmIdpSigninStatusMode::DISABLED) { return false; } @@ -73,12 +76,14 @@ } void UpdateIdpSigninStatusForAccountsEndpointResponse( + RenderFrameHost& host, const GURL& identity_provider_config_url, IdpNetworkRequestManager::FetchStatus fetch_status, bool does_idp_have_failing_signin_status, FederatedIdentityPermissionContextDelegate* permission_delegate, FedCmMetrics* metrics) { - if (GetFedCmIdpSigninStatusMode() == FedCmIdpSigninStatusMode::DISABLED) { + if (webid::GetIdpSigninStatusMode(host) == + FedCmIdpSigninStatusMode::DISABLED) { return; } @@ -238,4 +243,20 @@ } } +FedCmIdpSigninStatusMode GetIdpSigninStatusMode(RenderFrameHost& host) { + RuntimeFeatureStateDocumentData* rfs_document_data = + RuntimeFeatureStateDocumentData::GetForCurrentDocument(&host); + // Should not be null as this gets initialized when the host gets created. + DCHECK(rfs_document_data); + // This includes origin trials. + bool runtime_enabled = rfs_document_data->runtime_feature_state_read_context() + .IsFedCmIdpSigninStatusEnabled(); + + FedCmIdpSigninStatusMode flag_mode = GetFedCmIdpSigninStatusFlag(); + if (flag_mode == FedCmIdpSigninStatusMode::METRICS_ONLY && runtime_enabled) { + return FedCmIdpSigninStatusMode::ENABLED; + } + return flag_mode; +} + } // namespace content::webid
diff --git a/content/browser/webid/webid_utils.h b/content/browser/webid/webid_utils.h index c78c79e..23dffdf 100644 --- a/content/browser/webid/webid_utils.h +++ b/content/browser/webid/webid_utils.h
@@ -17,6 +17,7 @@ namespace content { class BrowserContext; +enum class FedCmIdpSigninStatusMode; class FedCmMetrics; class FederatedIdentityPermissionContextDelegate; enum class IdpSigninStatus; @@ -43,6 +44,7 @@ // Returns whether FedCM should fail/skip the accounts endpoint request because // the user is not signed-in to the IdP. bool ShouldFailAccountsEndpointRequestBecauseNotSignedInWithIdp( + RenderFrameHost& host, const GURL& identity_provider_config_url, FederatedIdentityPermissionContextDelegate* permission_delegate); @@ -53,6 +55,7 @@ // endpoint request would have been failed/skipped had the IdP signin-status // been FedCmIdpSigninStatusMode::ENABLED. void UpdateIdpSigninStatusForAccountsEndpointResponse( + RenderFrameHost& host, const GURL& identity_provider_config_url, IdpNetworkRequestManager::FetchStatus account_endpoint_fetch_status, bool does_idp_have_failing_idp_signin_status, @@ -64,6 +67,8 @@ CONTENT_EXPORT std::string GetConsoleErrorMessageFromResult( blink::mojom::FederatedAuthRequestResult result); +FedCmIdpSigninStatusMode GetIdpSigninStatusMode(RenderFrameHost& host); + } // namespace webid } // namespace content
diff --git a/content/public/android/BUILD.gn b/content/public/android/BUILD.gn index e2c0d91f..600d7ae 100644 --- a/content/public/android/BUILD.gn +++ b/content/public/android/BUILD.gn
@@ -212,7 +212,6 @@ "java/src/org/chromium/content/browser/ClientDataJsonImpl.java", "java/src/org/chromium/content/browser/ContactsDialogHost.java", "java/src/org/chromium/content/browser/ContentClassFactory.java", - "java/src/org/chromium/content/browser/ContentFeatureMap.java", "java/src/org/chromium/content/browser/ContentNfcDelegate.java", "java/src/org/chromium/content/browser/ContentUiEventHandler.java", "java/src/org/chromium/content/browser/ContentViewStaticsImpl.java", @@ -338,6 +337,7 @@ "java/src/org/chromium/content_public/browser/ContactsPickerDelegate.java", "java/src/org/chromium/content_public/browser/ContactsPickerListener.java", "java/src/org/chromium/content_public/browser/ContentFeatureList.java", + "java/src/org/chromium/content_public/browser/ContentFeatureMap.java", "java/src/org/chromium/content_public/browser/ContentViewStatics.java", "java/src/org/chromium/content_public/browser/GestureListenerManager.java", "java/src/org/chromium/content_public/browser/GestureStateListener.java", @@ -443,7 +443,6 @@ "java/src/org/chromium/content/browser/ChildProcessLauncherHelperImpl.java", "java/src/org/chromium/content/browser/ClientDataJsonImpl.java", "java/src/org/chromium/content/browser/ContactsDialogHost.java", - "java/src/org/chromium/content/browser/ContentFeatureMap.java", "java/src/org/chromium/content/browser/ContentNfcDelegate.java", "java/src/org/chromium/content/browser/ContentUiEventHandler.java", "java/src/org/chromium/content/browser/ContentViewStaticsImpl.java", @@ -484,6 +483,7 @@ "java/src/org/chromium/content/browser/webcontents/WebContentsObserverProxy.java", "java/src/org/chromium/content/browser/webid/MDocProviderAndroid.java", "java/src/org/chromium/content/common/SurfaceWrapper.java", + "java/src/org/chromium/content_public/browser/ContentFeatureMap.java", "java/src/org/chromium/content_public/browser/LoadCommittedDetails.java", "java/src/org/chromium/content_public/browser/LoadUrlParams.java", "java/src/org/chromium/content_public/browser/NavigationHandle.java",
diff --git a/content/public/android/java/src/org/chromium/content/browser/ChildProcessLauncherHelperImpl.java b/content/public/android/java/src/org/chromium/content/browser/ChildProcessLauncherHelperImpl.java index 70eed70..65105de0 100644 --- a/content/public/android/java/src/org/chromium/content/browser/ChildProcessLauncherHelperImpl.java +++ b/content/public/android/java/src/org/chromium/content/browser/ChildProcessLauncherHelperImpl.java
@@ -43,6 +43,7 @@ import org.chromium.content.common.ContentSwitchUtils; import org.chromium.content_public.browser.ChildProcessImportance; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.common.ContentFeatures; import org.chromium.content_public.common.ContentSwitches; @@ -396,7 +397,7 @@ if (!ContentSwitches.SWITCH_RENDERER_PROCESS.equals(processType)) { if (ContentSwitches.SWITCH_GPU_PROCESS.equals(processType)) { sandboxed = false; - reducePriorityOnBackground = ContentFeatureList.isEnabled( + reducePriorityOnBackground = ContentFeatureMap.isEnabled( ContentFeatures.REDUCE_GPU_PRIORITY_ON_BACKGROUND); } else { // We only support sandboxed utility processes now. @@ -719,7 +720,7 @@ boostForPendingViews = false; } - boolean mediaRendererHasModerate = ContentFeatureList.isEnabled( + boolean mediaRendererHasModerate = ContentFeatureMap.isEnabled( ContentFeatureList.BACKGROUND_MEDIA_RENDERER_HAS_MODERATE_BINDING); @ChildProcessImportance
diff --git a/content/public/android/java/src/org/chromium/content/browser/HostZoomMapImpl.java b/content/public/android/java/src/org/chromium/content/browser/HostZoomMapImpl.java index a13ac99..07a1022 100644 --- a/content/public/android/java/src/org/chromium/content/browser/HostZoomMapImpl.java +++ b/content/public/android/java/src/org/chromium/content/browser/HostZoomMapImpl.java
@@ -12,6 +12,7 @@ import org.chromium.base.annotations.NativeMethods; import org.chromium.content_public.browser.BrowserContextHandle; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.HostZoomMap; import org.chromium.content_public.browser.WebContents; @@ -75,7 +76,7 @@ float systemFontScale = SYSTEM_FONT_SCALE; // The OS |fontScale| will not be factored in zoom estimation if Page Zoom is disabled; a // systemFontScale = 1 will be used in this case. - if (!ContentFeatureList.isEnabled(ContentFeatureList.ACCESSIBILITY_PAGE_ZOOM)) { + if (!ContentFeatureMap.isEnabled(ContentFeatureList.ACCESSIBILITY_PAGE_ZOOM)) { systemFontScale = 1; } return HostZoomMap.adjustZoomLevel(
diff --git a/content/public/android/java/src/org/chromium/content/browser/accessibility/AccessibilityHistogramRecorder.java b/content/public/android/java/src/org/chromium/content/browser/accessibility/AccessibilityHistogramRecorder.java index 26b83061..0cee1811 100644 --- a/content/public/android/java/src/org/chromium/content/browser/accessibility/AccessibilityHistogramRecorder.java +++ b/content/public/android/java/src/org/chromium/content/browser/accessibility/AccessibilityHistogramRecorder.java
@@ -8,6 +8,7 @@ import org.chromium.base.metrics.RecordHistogram; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.ui.accessibility.AccessibilityState; /** @@ -113,7 +114,7 @@ */ public void recordHistograms() { // If the OnDemand feature is enabled, log UMA metrics and reset counters. - if (ContentFeatureList.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { recordEventsHistograms(); } @@ -127,8 +128,8 @@ public void recordEventsHistograms() { // To investigate whether adding more AXModes could be beneficial, track separate // stats when both the AccessibilityPerformanceFiltering and OnDemand features are enabled. - boolean isAccessibilityPerformanceFilteringEnabled = ContentFeatureList.isEnabled( - ContentFeatureList.ACCESSIBILITY_PERFORMANCE_FILTERING); + boolean isAccessibilityPerformanceFilteringEnabled = + ContentFeatureMap.isEnabled(ContentFeatureList.ACCESSIBILITY_PERFORMANCE_FILTERING); // There are only 2 AXModes, kAXModeComplete is used when a screenreader is active. boolean isAXModeComplete = AccessibilityState.isScreenReaderEnabled();
diff --git a/content/public/android/java/src/org/chromium/content/browser/accessibility/WebContentsAccessibilityImpl.java b/content/public/android/java/src/org/chromium/content/browser/accessibility/WebContentsAccessibilityImpl.java index e4419b8c..742386a 100644 --- a/content/public/android/java/src/org/chromium/content/browser/accessibility/WebContentsAccessibilityImpl.java +++ b/content/public/android/java/src/org/chromium/content/browser/accessibility/WebContentsAccessibilityImpl.java
@@ -98,6 +98,7 @@ import org.chromium.content.browser.webcontents.WebContentsImpl; import org.chromium.content.browser.webcontents.WebContentsImpl.UserDataFactory; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.WebContents; import org.chromium.content_public.browser.WebContentsAccessibility; import org.chromium.ui.accessibility.AccessibilityState; @@ -315,8 +316,7 @@ @Override public void onDisabled() { assert mNativeObj != 0 : "Native code is not initialized, but disable was called."; - assert ContentFeatureList.isEnabled( - ContentFeatureList.AUTO_DISABLE_ACCESSIBILITY_V2) + assert ContentFeatureMap.isEnabled(ContentFeatureList.AUTO_DISABLE_ACCESSIBILITY_V2) : "Disable was called, but Auto-disable accessibility is not enabled."; TraceEvent.begin( "WebContentsAccessibilityImpl.AutoDisableAccessibilityHandler.onDisabled"); @@ -443,7 +443,7 @@ if (mView.isAttachedToWindow()) registerLocaleChangeReceiver(); // Define a set of relevant AccessibilityEvents if the OnDemand feature is enabled. - if (ContentFeatureList.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { Runnable serviceMaskRunnable = () -> { int serviceEventMask = AccessibilityState.getAccessibilityServiceEventTypeMask(); mEventDispatcher.updateRelevantEventTypes( @@ -633,7 +633,7 @@ mIsImageDescriptionsCandidate && AccessibilityState.isScreenReaderEnabled()); // Update the list of events we dispatch to enabled services. - if (ContentFeatureList.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.ON_DEMAND_ACCESSIBILITY_EVENTS)) { int serviceEventMask = AccessibilityState.getAccessibilityServiceEventTypeMask(); mEventDispatcher.updateRelevantEventTypes( convertMaskToEventTypes(serviceEventMask)); @@ -643,7 +643,7 @@ // and tear down objects when no accessibility services are running. If we have // disabled then re-enabled the renderer multiple times for this instance, then we // will return early and keep accessibility enabled to prevent further churn. - if (ContentFeatureList.isEnabled(ContentFeatureList.AUTO_DISABLE_ACCESSIBILITY_V2)) { + if (ContentFeatureMap.isEnabled(ContentFeatureList.AUTO_DISABLE_ACCESSIBILITY_V2)) { if (mAutoDisableUsageCounter >= AUTO_DISABLE_SINGLE_INSTANCE_TOGGLE_LIMIT) { mAutoDisableAccessibilityHandler.cancelDisableTimer(); return;
diff --git a/content/public/android/java/src/org/chromium/content/browser/input/InputMethodManagerWrapperImpl.java b/content/public/android/java/src/org/chromium/content/browser/input/InputMethodManagerWrapperImpl.java index 9ee46883..8122edc4 100644 --- a/content/public/android/java/src/org/chromium/content/browser/input/InputMethodManagerWrapperImpl.java +++ b/content/public/android/java/src/org/chromium/content/browser/input/InputMethodManagerWrapperImpl.java
@@ -20,6 +20,7 @@ import org.chromium.base.task.PostTask; import org.chromium.base.task.TaskTraits; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.InputMethodManagerWrapper; import org.chromium.ui.base.WindowAndroid; import org.chromium.ui.display.DisplayAndroid; @@ -50,7 +51,7 @@ mWindowAndroid = windowAndroid; mDelegate = delegate; mOptimizeImmHideCalls = - ContentFeatureList.isEnabled(ContentFeatureList.OPTIMIZE_IMM_HIDE_CALLS); + ContentFeatureMap.isEnabled(ContentFeatureList.OPTIMIZE_IMM_HIDE_CALLS); } @Override
diff --git a/content/public/android/java/src/org/chromium/content/browser/input/StylusGestureHandler.java b/content/public/android/java/src/org/chromium/content/browser/input/StylusGestureHandler.java index 715f49e..ec76af7 100644 --- a/content/public/android/java/src/org/chromium/content/browser/input/StylusGestureHandler.java +++ b/content/public/android/java/src/org/chromium/content/browser/input/StylusGestureHandler.java
@@ -21,7 +21,7 @@ import org.chromium.blink.mojom.StylusWritingGestureAction; import org.chromium.blink.mojom.StylusWritingGestureData; import org.chromium.blink.mojom.StylusWritingGestureGranularity; -import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.gfx.mojom.Rect; import org.chromium.mojo_base.mojom.String16; @@ -91,7 +91,7 @@ public static @Nullable InputConnection maybeProxyInputConnection( @Nullable InputConnection inputConnection, Callback<OngoingGesture> onGestureCallback) { if (inputConnection == null || !BuildCompat.isAtLeastU() - || !ContentFeatureList.isEnabled( + || !ContentFeatureMap.isEnabled( org.chromium.blink_public.common.BlinkFeatures.STYLUS_RICH_GESTURES)) { return inputConnection; }
diff --git a/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureList.java b/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureList.java index 8a00421..df712acd 100644 --- a/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureList.java +++ b/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureList.java
@@ -4,54 +4,12 @@ package org.chromium.content_public.browser; -import org.chromium.content.browser.ContentFeatureMap; - /** * Convenience static methods to access {@link ContentFeatureMap}. */ public class ContentFeatureList { private ContentFeatureList() {} - /** - * Returns whether the specified feature is enabled or not. - * - * @param featureName The name of the feature to query. - * @return Whether the feature is enabled or not. - */ - public static boolean isEnabled(String featureName) { - return ContentFeatureMap.isEnabled(featureName); - } - - /** - * Returns a field trial param as an int for the specified feature. - * - * @param featureName The name of the feature to retrieve a param for. - * @param paramName The name of the param for which to get as an integer. - * @param defaultValue The integer value to use if the param is not available. - * @return The parameter value as an int. Default value if the feature does not exist or the - * specified parameter does not exist or its string value does not represent an int. - */ - public static int getFieldTrialParamByFeatureAsInt( - String featureName, String paramName, int defaultValue) { - return ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsInt( - featureName, paramName, defaultValue); - } - - /** - * Returns a field trial param as a boolean for the specified feature. - * - * @param featureName The name of the feature to retrieve a param for. - * @param paramName The name of the param for which to get as a boolean. - * @param defaultValue The boolean value to use if the param is not available. - * @return The parameter value as a boolean. Default value if the feature does not exist or the - * specified parameter does not exist or its string value is neither "true" nor "false". - */ - public static boolean getFieldTrialParamByFeatureAsBoolean( - String featureName, String paramName, boolean defaultValue) { - return ContentFeatureMap.getInstance().getFieldTrialParamByFeatureAsBoolean( - featureName, paramName, defaultValue); - } - // TODO(crbug.com/1447098): Use generated constants in ContentFeatures and other generated // Features files, then remove the constants below.
diff --git a/content/public/android/java/src/org/chromium/content/browser/ContentFeatureMap.java b/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureMap.java similarity index 96% rename from content/public/android/java/src/org/chromium/content/browser/ContentFeatureMap.java rename to content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureMap.java index 0da04b3..f98e2d1 100644 --- a/content/public/android/java/src/org/chromium/content/browser/ContentFeatureMap.java +++ b/content/public/android/java/src/org/chromium/content_public/browser/ContentFeatureMap.java
@@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. -package org.chromium.content.browser; +package org.chromium.content_public.browser; import org.chromium.base.FeatureMap; import org.chromium.base.annotations.JNINamespace;
diff --git a/content/public/android/javatests/src/org/chromium/content/browser/ChildProcessLauncherIntegrationTest.java b/content/public/android/javatests/src/org/chromium/content/browser/ChildProcessLauncherIntegrationTest.java index 9dbbccc2..a82c440 100644 --- a/content/public/android/javatests/src/org/chromium/content/browser/ChildProcessLauncherIntegrationTest.java +++ b/content/public/android/javatests/src/org/chromium/content/browser/ChildProcessLauncherIntegrationTest.java
@@ -23,6 +23,7 @@ import org.chromium.base.test.util.CriteriaHelper; import org.chromium.base.test.util.UrlUtils; import org.chromium.content_public.browser.ContentFeatureList; +import org.chromium.content_public.browser.ContentFeatureMap; import org.chromium.content_public.browser.LoadUrlParams; import org.chromium.content_public.browser.NavigationController; import org.chromium.content_public.browser.test.util.TestCallbackHelperContainer; @@ -141,7 +142,7 @@ ChildProcessLauncherTestUtils.runOnLauncherThreadBlocking(new Runnable() { @Override public void run() { - if (ContentFeatureList.isEnabled( + if (ContentFeatureMap.isEnabled( ContentFeatureList.PROCESS_SHARING_WITH_STRICT_SITE_INSTANCES)) { // If this feature is turned on all the URLs will use the same process. // Verify that the process has not lost its importance now that the @@ -186,7 +187,7 @@ ChildProcessLauncherTestUtils.runOnLauncherThreadBlocking(new Runnable() { @Override public void run() { - if (ContentFeatureList.isEnabled( + if (ContentFeatureMap.isEnabled( ContentFeatureList.PROCESS_SHARING_WITH_STRICT_SITE_INSTANCES)) { // If this feature is turned on all the URLs will use the same process // and this test will not observe any kills.
diff --git a/content/public/test/DEPS b/content/public/test/DEPS index 052dfd1..10b5fce 100644 --- a/content/public/test/DEPS +++ b/content/public/test/DEPS
@@ -38,13 +38,14 @@ "+services/service_manager", "+services/tracing/public/cpp", "+testing/android/native_test/native_browser_test_support.h", + "+third_party/protobuf/src/google/protobuf", "+tools/v8_context_snapshot/buildflags.h", "+ui/base/resource/resource_bundle.h", "+ui/base/resource/resource_bundle_android.h", "+ui/ozone/public", + "+ui/platform_window/common/platform_window_defaults.h", "+ui/views", "+v8/include/v8.h", - "+ui/platform_window/common/platform_window_defaults.h", ] specific_include_rules = {
diff --git a/content/public/test/test_proto_loader.cc b/content/public/test/test_proto_loader.cc new file mode 100644 index 0000000..38c3ed5 --- /dev/null +++ b/content/public/test/test_proto_loader.cc
@@ -0,0 +1,85 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/public/test/test_proto_loader.h" + +#include "base/files/file_util.h" +#include "base/notreached.h" +#include "base/path_service.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "third_party/protobuf/src/google/protobuf/message.h" +#include "third_party/protobuf/src/google/protobuf/text_format.h" + +namespace content { + +TestProtoLoader::TestProtoLoader() = default; + +TestProtoLoader::~TestProtoLoader() = default; + +const google::protobuf::Message* TestProtoLoader::GetPrototype( + base::FilePath descriptor_path, + base::StringPiece package, + base::StringPiece name) { + std::string file_contents; + + if (!base::ReadFileToString(descriptor_path, &file_contents)) { + NOTREACHED() << "Couldn't load contents of " << descriptor_path; + return nullptr; + } + + if (!descriptor_set_.ParseFromString(file_contents)) { + NOTREACHED() << "Couldn't parse descriptor from " << descriptor_path; + return nullptr; + } + + for (int file_i = 0; file_i < descriptor_set_.file_size(); ++file_i) { + const google::protobuf::FileDescriptorProto& file = + descriptor_set_.file(file_i); + if (file.package() != package) { + continue; + } + const google::protobuf::FileDescriptor* descriptor = + descriptor_pool_.BuildFile(file); + for (int message_type_i = 0; + message_type_i < descriptor->message_type_count(); ++message_type_i) { + const google::protobuf::Descriptor* message_type = + descriptor->message_type(message_type_i); + if (message_type->name() != name) { + continue; + } + return dynamic_message_factory_.GetPrototype(message_type); + } + } + NOTREACHED() << "Couldn't find " << package << "." << name << "in " + << descriptor_path; + return nullptr; +} + +void TestProtoLoader::ParseFromText(const base::FilePath& descriptor_path, + base::StringPiece type_name, + const std::string& proto_text, + std::string& serialized_message) { + // Load the descriptors and find the one for |type_name|. + std::string package, name; + std::vector<std::string> type_name_parts = base::SplitString( + type_name, ".", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL); + DCHECK_GE(type_name_parts.size(), 2U) << "|type_name| should include package"; + + const google::protobuf::Message* prototype = GetPrototype( + descriptor_path, /*package =*/ + base::JoinString( + base::make_span(type_name_parts.begin(), type_name_parts.size() - 1), + "."), + /* name = */ type_name_parts.back()); + DCHECK_NE(nullptr, prototype); + + // Parse the text using the descriptor-generated message and send it to + // |destination|. + std::unique_ptr<google::protobuf::Message> message(prototype->New()); + google::protobuf::TextFormat::ParseFromString(proto_text, message.get()); + serialized_message = message->SerializeAsString(); +} + +} // namespace content
diff --git a/content/public/test/test_proto_loader.h b/content/public/test/test_proto_loader.h new file mode 100644 index 0000000..9c4e0e5d --- /dev/null +++ b/content/public/test/test_proto_loader.h
@@ -0,0 +1,68 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CONTENT_PUBLIC_TEST_TEST_PROTO_LOADER_H_ +#define CONTENT_PUBLIC_TEST_TEST_PROTO_LOADER_H_ + +#include "base/files/file_path.h" +#include "third_party/protobuf/src/google/protobuf/descriptor.h" +#include "third_party/protobuf/src/google/protobuf/descriptor.pb.h" +#include "third_party/protobuf/src/google/protobuf/dynamic_message.h" + +namespace content { + +#if defined(COMPONENT_BUILD) +#if defined(WIN32) + +#if defined(PROTO_TEST_IMPLEMENTATION) +#define PROTO_TEST_EXPORT __declspec(dllexport) +#else +#define PROTO_TEST_EXPORT __declspec(dllimport) +#endif // defined(PROTO_TEST_IMPLEMENTATION) + +#else // defined(WIN32) +#if defined(PROTO_TEST_IMPLEMENTATION) +#define PROTO_TEST_EXPORT __attribute__((visibility("default"))) +#else +#define PROTO_TEST_EXPORT +#endif +#endif +#else // defined(COMPONENT_BUILD) +#define PROTO_TEST_EXPORT +#endif + +// This class works around the fact that chrome only includes the lite runtime +// of protobufs. Lite protobufs inherit from |MessageLite| and cannot be used to +// parse from text format. Parsing from text +// format is useful in tests. We cannot include the full version of a protobuf +// in test code because it would clash with the lite version. +// +// This class uses the protobuf descriptors (generated at compile time) to +// generate a |Message| that can be used to parse from text. This message can +// then be serialized to binary which can be parsed by the |MessageLite|. +class PROTO_TEST_EXPORT TestProtoLoader { + public: + TestProtoLoader(); + ~TestProtoLoader(); + TestProtoLoader(const TestProtoLoader&) = delete; + TestProtoLoader& operator=(const TestProtoLoader&) = delete; + + void ParseFromText(const base::FilePath& descriptor_path, + base::StringPiece type_name, + const std::string& proto_text, + std::string& message); + + private: + const google::protobuf::Message* GetPrototype(base::FilePath descriptor_path, + base::StringPiece package, + base::StringPiece name); + + google::protobuf::DescriptorPool descriptor_pool_; + google::protobuf::FileDescriptorSet descriptor_set_; + google::protobuf::DynamicMessageFactory dynamic_message_factory_; +}; + +} // namespace content + +#endif // CONTENT_PUBLIC_TEST_TEST_PROTO_LOADER_H_
diff --git a/content/test/BUILD.gn b/content/test/BUILD.gn index 81ad6635..6709d1b 100644 --- a/content/test/BUILD.gn +++ b/content/test/BUILD.gn
@@ -21,6 +21,7 @@ import("//third_party/blink/public/public_features.gni") import("//third_party/closure_compiler/closure_args.gni") import("//third_party/closure_compiler/compile_js.gni") +import("//third_party/protobuf/proto_library.gni") import("//tools/grit/grit_rule.gni") import("//tools/grit/preprocess_if_expr.gni") import("//tools/typescript/ts_library.gni") @@ -803,6 +804,34 @@ } } +component("proto_test_support") { + testonly = true + + sources = [ + "../public/test/test_proto_loader.cc", + "../public/test/test_proto_loader.h", + ] + + defines = [ "PROTO_TEST_IMPLEMENTATION" ] + + deps = [ + "//base", + "//third_party/protobuf:protobuf_full", + ] +} + +proto_library("test_proto") { + sources = [ "test.proto" ] + cc_generator_options = "lite" +} + +proto_library("test_proto_descriptor") { + sources = [ "test.proto" ] + generate_cc = false + generate_python = false + generate_descriptor = "test_proto.descriptor" +} + # Fuchsia performance tests on smart displays use web_engine and a browser # like shell instead of an actual browser, so Fuchsia needs a separate target. if (is_fuchsia) { @@ -2723,6 +2752,7 @@ "navigation_simulator_unittest.cc", "test_aggregation_service_impl_unittest.cc", "test_page_unittest.cc", + "test_proto_loader_unittest.cc", "test_render_frame_host_unittest.cc", ] @@ -2820,8 +2850,10 @@ deps = [ ":content_test_mojo_bindings", + ":proto_test_support", ":run_all_unittests", ":test_interfaces", + ":test_proto", ":test_support", "web_ui:test_webui_js_bridge2_webui_js_bridge_impl", "web_ui:test_webui_js_bridge_webui_js_bridge_impl", @@ -2972,8 +3004,15 @@ ] data_deps = [ + ":test_proto_descriptor", "//testing/buildbot/filters:content_unittests_filters", "//third_party/mesa_headers", + "//third_party/perfetto/protos/perfetto/config/chrome:scenario_descriptor", + ] + + data += [ + "$root_gen_dir/third_party/perfetto/protos/perfetto/config/chrome/scenario_config.descriptor", + "$root_gen_dir/content/test/test_proto.descriptor", ] # Platforms where sqlite_dev_shell is defined.
diff --git a/content/test/gpu/gpu_tests/pixel_test_pages.py b/content/test/gpu/gpu_tests/pixel_test_pages.py index 525c5d9..dadafc7 100644 --- a/content/test/gpu/gpu_tests/pixel_test_pages.py +++ b/content/test/gpu/gpu_tests/pixel_test_pages.py
@@ -598,6 +598,11 @@ cba.DISABLE_SOFTWARE_COMPOSITING_FALLBACK, ] + # The sRGB tests have been observed to create a large number + # (~15,000) of pixels with difference ~3. + srgb_fuzzy_algo = algo.FuzzyMatchingAlgorithm(max_different_pixels=20000, + pixel_delta_threshold=3) + return [ PixelTestPage('pixel_offscreenCanvas_transfer_after_style_resize.html', base_name + '_OffscreenCanvasTransferAfterStyleResize', @@ -682,16 +687,19 @@ PixelTestPage('pixel_canvas_display_srgb.html', base_name + '_CanvasDisplaySRGBAccelerated2D', test_rect=[0, 0, 140, 140], - browser_args=browser_args + accelerated_args), + browser_args=browser_args + accelerated_args, + matching_algorithm=srgb_fuzzy_algo), PixelTestPage('pixel_canvas_display_srgb.html', base_name + '_CanvasDisplaySRGBUnaccelerated2D', test_rect=[0, 0, 140, 140], - browser_args=browser_args + unaccelerated_args), + browser_args=browser_args + unaccelerated_args, + matching_algorithm=srgb_fuzzy_algo), PixelTestPage( 'pixel_canvas_display_srgb.html', base_name + '_CanvasDisplaySRGBUnaccelerated2DGPUCompositing', test_rect=[0, 0, 140, 140], - browser_args=browser_args + [cba.DISABLE_ACCELERATED_2D_CANVAS]), + browser_args=browser_args + [cba.DISABLE_ACCELERATED_2D_CANVAS], + matching_algorithm=srgb_fuzzy_algo), PixelTestPage('pixel_webgl_webcodecs_breakoutbox_displays_frame.html', base_name + '_WebGLWebCodecsBreakoutBoxDisplaysFrame', test_rect=[0, 0, 300, 300],
diff --git a/content/test/test.proto b/content/test/test.proto new file mode 100644 index 0000000..9384e9d --- /dev/null +++ b/content/test/test.proto
@@ -0,0 +1,12 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +syntax = "proto2"; + +package content.test; + +// A simple test proto with a string in it. +message TestMessage { + optional string test = 1; +}
diff --git a/content/test/test_proto_loader_unittest.cc b/content/test/test_proto_loader_unittest.cc new file mode 100644 index 0000000..4dfe690 --- /dev/null +++ b/content/test/test_proto_loader_unittest.cc
@@ -0,0 +1,47 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "content/public/test/test_proto_loader.h" + +#include "base/files/file_path.h" +#include "base/notreached.h" +#include "base/path_service.h" +#include "content/test/test.pb.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace content { +namespace { + +base::FilePath GetTestDataRoot() { + base::FilePath test_data_root = + base::PathService::CheckedGet(base::DIR_GEN_TEST_DATA_ROOT); +#if !BUILDFLAG(IS_FUCHSIA) + test_data_root = test_data_root.Append(FILE_PATH_LITERAL("gen")); +#endif // !BUILDFLAG(IS_FUCHSIA) + return test_data_root; +} + +void LoadTestProto(const std::string& proto_text, + google::protobuf::MessageLite& proto) { + content::TestProtoLoader loader; + std::string serialized_message; + loader.ParseFromText( + GetTestDataRoot().Append( + FILE_PATH_LITERAL("content/test/test_proto.descriptor")), + "content.test.TestMessage", proto_text, serialized_message); + ASSERT_TRUE(proto.ParseFromString(serialized_message)); +} + +TEST(TestProtoLoaderTest, LoadProto) { + test::TestMessage proto; + LoadTestProto( + R"pb( + test: "Message" + )pb", + proto); + EXPECT_EQ("Message", proto.test()); +} + +} // namespace +} // namespace content
diff --git a/device/fido/features.cc b/device/fido/features.cc index 0440c6e..af5669d2 100644 --- a/device/fido/features.cc +++ b/device/fido/features.cc
@@ -93,9 +93,9 @@ "WebAuthenticationNewHybridUI", base::FEATURE_DISABLED_BY_DEFAULT); -// Not yet enabled by default. +// Enabled in M116. Remove in or after M119. BASE_FEATURE(kWebAuthnPrelinkPlayServices, "WebAuthenticationPrelinkPlayServices", - base::FEATURE_DISABLED_BY_DEFAULT); + base::FEATURE_ENABLED_BY_DEFAULT); } // namespace device
diff --git a/device/vr/BUILD.gn b/device/vr/BUILD.gn index e698f6fd..4fca20e 100644 --- a/device/vr/BUILD.gn +++ b/device/vr/BUILD.gn
@@ -187,7 +187,6 @@ "openxr/openxr_extension_handle.h", "openxr/openxr_extension_helper.cc", "openxr/openxr_extension_helper.h", - "openxr/openxr_graphics_binding.cc", "openxr/openxr_graphics_binding.h", "openxr/openxr_input_helper.cc", "openxr/openxr_input_helper.h",
diff --git a/device/vr/openxr/android/openxr_graphics_binding_open_gles.cc b/device/vr/openxr/android/openxr_graphics_binding_open_gles.cc index ea532a3..08f614d 100644 --- a/device/vr/openxr/android/openxr_graphics_binding_open_gles.cc +++ b/device/vr/openxr/android/openxr_graphics_binding_open_gles.cc
@@ -8,6 +8,7 @@ #include "device/vr/openxr/openxr_api_wrapper.h" #include "device/vr/openxr/openxr_platform.h" #include "device/vr/openxr/openxr_util.h" +#include "gpu/GLES2/gl2extchromium.h" #include "third_party/openxr/src/include/openxr/openxr.h" #include "ui/gfx/buffer_types.h" #include "ui/gl/gl_context.h" @@ -134,9 +135,10 @@ } XrResult OpenXrGraphicsBindingOpenGLES::EnumerateSwapchainImages( - const XrSwapchain& color_swapchain) { + const XrSwapchain& color_swapchain, + std::vector<SwapChainInfo>& color_swapchain_images) const { CHECK(color_swapchain != XR_NULL_HANDLE); - CHECK(color_swapchain_images_.empty()); + CHECK(color_swapchain_images.empty()); uint32_t chain_length; RETURN_IF_XR_FAILED( @@ -149,20 +151,12 @@ reinterpret_cast<XrSwapchainImageBaseHeader*>( xr_color_swapchain_images.data()))); - color_swapchain_images_.reserve(xr_color_swapchain_images.size()); - for (const auto& swapchain_image : xr_color_swapchain_images) { - color_swapchain_images_.emplace_back(swapchain_image.image); + color_swapchain_images.reserve(xr_color_swapchain_images.size()); + for (size_t i = 0; i < xr_color_swapchain_images.size(); i++) { + color_swapchain_images.emplace_back(); } return XR_SUCCESS; } -void OpenXrGraphicsBindingOpenGLES::ClearSwapChainInfo() { - color_swapchain_images_.clear(); -} - -base::span<SwapChainInfo> OpenXrGraphicsBindingOpenGLES::GetSwapChainInfo() { - return color_swapchain_images_; -} - } // namespace device
diff --git a/device/vr/openxr/android/openxr_graphics_binding_open_gles.h b/device/vr/openxr/android/openxr_graphics_binding_open_gles.h index 6f1fd03..b089749 100644 --- a/device/vr/openxr/android/openxr_graphics_binding_open_gles.h +++ b/device/vr/openxr/android/openxr_graphics_binding_open_gles.h
@@ -12,7 +12,6 @@ #include "device/vr/openxr/openxr_platform.h" #include "device/vr/vr_export.h" #include "third_party/openxr/src/include/openxr/openxr.h" -#include "ui/gl/gl_bindings.h" namespace gl { class GLContext; @@ -35,15 +34,13 @@ const void* GetSessionCreateInfo() const override; int64_t GetSwapchainFormat(XrSession session) const override; XrResult EnumerateSwapchainImages( - const XrSwapchain& color_swapchain) override; - void ClearSwapChainInfo() override; - base::span<SwapChainInfo> GetSwapChainInfo() override; + const XrSwapchain& color_swapchain, + std::vector<SwapChainInfo>& color_swapchain_images) const override; private: bool initialized_ = false; XrGraphicsBindingOpenGLESAndroidKHR binding_{ XR_TYPE_GRAPHICS_BINDING_OPENGL_ES_ANDROID_KHR, nullptr}; - std::vector<SwapChainInfo> color_swapchain_images_; scoped_refptr<gl::GLSurface> surface_; scoped_refptr<gl::GLContext> context_;
diff --git a/device/vr/openxr/openxr_api_wrapper.cc b/device/vr/openxr/openxr_api_wrapper.cc index 0f620eba..9ac68f00f 100644 --- a/device/vr/openxr/openxr_api_wrapper.cc +++ b/device/vr/openxr/openxr_api_wrapper.cc
@@ -161,6 +161,29 @@ return environment_blend_modes; } +#if BUILDFLAG(IS_WIN) +SwapChainInfo::SwapChainInfo(ID3D11Texture2D* d3d11_texture) + : d3d11_texture(d3d11_texture) {} +#else +SwapChainInfo::SwapChainInfo() = default; +#endif + +SwapChainInfo::~SwapChainInfo() { + // If shared images are being used, the mailbox holder should have been + // cleared before destruction, either due to the context provider being lost + // or from normal session ending. If shared images are not being used, these + // should not have been initialized in the first place. + DCHECK(mailbox_holder.mailbox.IsZero()); + DCHECK(!mailbox_holder.sync_token.HasData()); +} +SwapChainInfo::SwapChainInfo(SwapChainInfo&&) = default; +SwapChainInfo& SwapChainInfo::operator=(SwapChainInfo&&) = default; + +void SwapChainInfo::Clear() { + mailbox_holder.mailbox.SetZero(); + mailbox_holder.sync_token.Clear(); +} + OpenXrApiWrapper::OpenXrApiWrapper() = default; OpenXrApiWrapper::~OpenXrApiWrapper() { @@ -278,7 +301,7 @@ bool OpenXrApiWrapper::HasColorSwapChain() const { return color_swapchain_ != XR_NULL_HANDLE && - graphics_binding_->GetSwapChainInfo().size() > 0; + color_swapchain_images_.size() > 0; } bool OpenXrApiWrapper::HasSpace(XrReferenceSpaceType type) const { @@ -528,7 +551,7 @@ DCHECK(IsInitialized()); DCHECK(HasSession()); DCHECK(!HasColorSwapChain()); - DCHECK(graphics_binding_->GetSwapChainInfo().empty()); + DCHECK(color_swapchain_images_.empty()); XrSwapchainCreateInfo swapchain_create_info = {XR_TYPE_SWAPCHAIN_CREATE_INFO}; swapchain_create_info.arraySize = 1; @@ -548,8 +571,8 @@ color_swapchain_ = color_swapchain; - RETURN_IF_XR_FAILED( - graphics_binding_->EnumerateSwapchainImages(color_swapchain_)); + RETURN_IF_XR_FAILED(graphics_binding_->EnumerateSwapchainImages( + color_swapchain_, color_swapchain_images_)); CreateSharedMailboxes(); @@ -667,7 +690,7 @@ if (context_provider_) { // Mark the shared mailboxes as invalid since the underlying GPU process // associated with them has gone down. - for (SwapChainInfo& info : graphics_binding_->GetSwapChainInfo()) { + for (SwapChainInfo& info : color_swapchain_images_) { info.Clear(); } context_provider_ = nullptr; @@ -678,7 +701,7 @@ if (context_provider_) { gpu::SharedImageInterface* shared_image_interface = context_provider_->SharedImageInterface(); - for (SwapChainInfo& info : graphics_binding_->GetSwapChainInfo()) { + for (SwapChainInfo& info : color_swapchain_images_) { if (shared_image_interface && !info.mailbox_holder.mailbox.IsZero() && info.mailbox_holder.sync_token.HasData()) { shared_image_interface->DestroySharedImage( @@ -688,9 +711,7 @@ } } - if (graphics_binding_) { - graphics_binding_->ClearSwapChainInfo(); - } + color_swapchain_images_.clear(); } void OpenXrApiWrapper::CreateSharedMailboxes() { @@ -704,7 +725,7 @@ context_provider_->SharedImageInterface(); // Create the MailboxHolders for each texture in the swap chain - for (SwapChainInfo& swap_chain_info : graphics_binding_->GetSwapChainInfo()) { + for (SwapChainInfo& swap_chain_info : color_swapchain_images_) { Microsoft::WRL::ComPtr<IDXGIResource1> dxgi_resource; HRESULT hr = swap_chain_info.d3d11_texture->QueryInterface( IID_PPV_ARGS(&dxgi_resource)); @@ -769,9 +790,8 @@ } bool OpenXrApiWrapper::IsUsingSharedImages() const { - const auto swapchain_info = graphics_binding_->GetSwapChainInfo(); - return ((swapchain_info.size() > 1) && - !swapchain_info[0].mailbox_holder.mailbox.IsZero()); + return ((color_swapchain_images_.size() > 1) && + !color_swapchain_images_[0].mailbox_holder.mailbox.IsZero()); } // TODO(https://crbug.com/1441073): Refactor OpenXR Rendering. @@ -779,11 +799,10 @@ void OpenXrApiWrapper::StoreFence( Microsoft::WRL::ComPtr<ID3D11Fence> d3d11_fence, int16_t frame_index) { - const size_t swapchain_images_size = - graphics_binding_->GetSwapChainInfo().size(); + const size_t swapchain_images_size = color_swapchain_images_.size(); if (swapchain_images_size > 0) { - graphics_binding_->GetSwapChainInfo()[frame_index % swapchain_images_size] - .d3d11_fence = std::move(d3d11_fence); + color_swapchain_images_[frame_index % swapchain_images_size].d3d11_fence = + std::move(d3d11_fence); } } #endif @@ -885,8 +904,7 @@ RETURN_IF_XR_FAILED(UpdateViewConfigurations()); - *swap_chain_info = - &graphics_binding_->GetSwapChainInfo()[color_swapchain_image_index]; + *swap_chain_info = &color_swapchain_images_[color_swapchain_image_index]; return XR_SUCCESS; }
diff --git a/device/vr/openxr/openxr_api_wrapper.h b/device/vr/openxr/openxr_api_wrapper.h index 18c909a..d92635d94 100644 --- a/device/vr/openxr/openxr_api_wrapper.h +++ b/device/vr/openxr/openxr_api_wrapper.h
@@ -49,6 +49,28 @@ using VisibilityChangedCallback = base::RepeatingCallback<void(mojom::XRVisibilityState)>; +// TODO(https://crbug.com/1441072): Refactor this class. +struct SwapChainInfo { +#if BUILDFLAG(IS_WIN) + explicit SwapChainInfo(ID3D11Texture2D*); +#else + SwapChainInfo(); +#endif + ~SwapChainInfo(); + SwapChainInfo(SwapChainInfo&&); + SwapChainInfo& operator=(SwapChainInfo&&); + + void Clear(); + +#if BUILDFLAG(IS_WIN) + // When shared images are being used, there is a corresponding MailboxHolder + // and D3D11Fence for each D3D11 texture in the vector. + raw_ptr<ID3D11Texture2D> d3d11_texture = nullptr; + Microsoft::WRL::ComPtr<ID3D11Fence> d3d11_fence; +#endif + gpu::MailboxHolder mailbox_holder; +}; + class OpenXrApiWrapper { public: OpenXrApiWrapper(); @@ -209,7 +231,7 @@ XrSpace unbounded_space_; bool stage_parameters_enabled_; std::unordered_set<mojom::XRSessionFeature> enabled_features_; - raw_ptr<OpenXrGraphicsBinding> graphics_binding_ = nullptr; + raw_ptr<OpenXrGraphicsBinding> graphics_binding_; // The swapchain is initializd when a session begins and is re-created when // the state of a secondary view configuration changes.
diff --git a/device/vr/openxr/openxr_graphics_binding.cc b/device/vr/openxr/openxr_graphics_binding.cc deleted file mode 100644 index 23b1f15..0000000 --- a/device/vr/openxr/openxr_graphics_binding.cc +++ /dev/null
@@ -1,32 +0,0 @@ -// Copyright 2023 The Chromium Authors -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "device/vr/openxr/openxr_graphics_binding.h" - -namespace device { - -#if BUILDFLAG(IS_WIN) -SwapChainInfo::SwapChainInfo(ID3D11Texture2D* d3d11_texture) - : d3d11_texture(d3d11_texture) {} -#elif BUILDFLAG(IS_ANDROID) -SwapChainInfo::SwapChainInfo(uint32_t texture) : texture(texture) {} -#endif - -SwapChainInfo::~SwapChainInfo() { - // If shared images are being used, the mailbox holder should have been - // cleared before destruction, either due to the context provider being lost - // or from normal session ending. If shared images are not being used, these - // should not have been initialized in the first place. - DCHECK(mailbox_holder.mailbox.IsZero()); - DCHECK(!mailbox_holder.sync_token.HasData()); -} -SwapChainInfo::SwapChainInfo(SwapChainInfo&&) = default; -SwapChainInfo& SwapChainInfo::operator=(SwapChainInfo&&) = default; - -void SwapChainInfo::Clear() { - mailbox_holder.mailbox.SetZero(); - mailbox_holder.sync_token.Clear(); -} - -} // namespace device
diff --git a/device/vr/openxr/openxr_graphics_binding.h b/device/vr/openxr/openxr_graphics_binding.h index 33cb3ce..6f267242 100644 --- a/device/vr/openxr/openxr_graphics_binding.h +++ b/device/vr/openxr/openxr_graphics_binding.h
@@ -8,46 +8,11 @@ #include <cstdint> #include <vector> -#include "base/containers/span.h" -#include "base/memory/raw_ptr.h" -#include "gpu/command_buffer/common/mailbox_holder.h" #include "third_party/openxr/src/include/openxr/openxr.h" -#if BUILDFLAG(IS_WIN) -#include <d3d11_4.h> -#include <wrl.h> -#endif - namespace device { -// TODO(https://crbug.com/1441072): Refactor this class. -struct SwapChainInfo { - public: -#if BUILDFLAG(IS_WIN) - explicit SwapChainInfo(ID3D11Texture2D*); -#elif BUILDFLAG(IS_ANDROID) - explicit SwapChainInfo(uint32_t texture); -#endif - SwapChainInfo(); - virtual ~SwapChainInfo(); - SwapChainInfo(SwapChainInfo&&); - SwapChainInfo& operator=(SwapChainInfo&&); - - void Clear(); - - gpu::MailboxHolder mailbox_holder; - -#if BUILDFLAG(IS_WIN) - // When shared images are being used, there is a corresponding MailboxHolder - // and D3D11Fence for each D3D11 texture in the vector. - raw_ptr<ID3D11Texture2D> d3d11_texture = nullptr; - Microsoft::WRL::ComPtr<ID3D11Fence> d3d11_fence; -#elif BUILDFLAG(IS_ANDROID) - // Ideally this would be a gluint, but there are conflicting headers for GL - // depending on *how* you want to use it; so we can't use it at the moment. - uint32_t texture; -#endif -}; +struct SwapChainInfo; // This class exists to provide an abstraction for the different rendering // paths that can be taken by OpenXR (e.g. DirectX vs. GLES). Any OpenXr methods @@ -70,13 +35,11 @@ // Gets the format that we expect from the platform swapchain. virtual int64_t GetSwapchainFormat(XrSession session) const = 0; - // Calls xrEnumerateSwapChain and updates the stored SwapChainInfo available - // via `GetSwapChainInfo`. + // Calls xrEnumerateSwapChain and stores all relevant data in the passed in + // array of `SwapChainInfo`s. virtual XrResult EnumerateSwapchainImages( - const XrSwapchain& color_swapchain) = 0; - virtual void ClearSwapChainInfo() = 0; - - virtual base::span<SwapChainInfo> GetSwapChainInfo() = 0; + const XrSwapchain& color_swapchain, + std::vector<SwapChainInfo>& color_swapchain_images) const = 0; }; } // namespace device
diff --git a/device/vr/openxr/windows/openxr_graphics_binding_d3d11.cc b/device/vr/openxr/windows/openxr_graphics_binding_d3d11.cc index fb8744a2..4945d56 100644 --- a/device/vr/openxr/windows/openxr_graphics_binding_d3d11.cc +++ b/device/vr/openxr/windows/openxr_graphics_binding_d3d11.cc
@@ -85,9 +85,10 @@ } XrResult OpenXrGraphicsBindingD3D11::EnumerateSwapchainImages( - const XrSwapchain& color_swapchain) { + const XrSwapchain& color_swapchain, + std::vector<SwapChainInfo>& color_swapchain_images) const { CHECK(color_swapchain != XR_NULL_HANDLE); - CHECK(color_swapchain_images_.empty()); + CHECK(color_swapchain_images.empty()); uint32_t chain_length; RETURN_IF_XR_FAILED( @@ -100,20 +101,12 @@ reinterpret_cast<XrSwapchainImageBaseHeader*>( xr_color_swapchain_images.data()))); - color_swapchain_images_.reserve(xr_color_swapchain_images.size()); + color_swapchain_images.reserve(xr_color_swapchain_images.size()); for (const auto& swapchain_image : xr_color_swapchain_images) { - color_swapchain_images_.emplace_back(swapchain_image.texture); + color_swapchain_images.emplace_back(swapchain_image.texture); } return XR_SUCCESS; } -void OpenXrGraphicsBindingD3D11::ClearSwapChainInfo() { - color_swapchain_images_.clear(); -} - -base::span<SwapChainInfo> OpenXrGraphicsBindingD3D11::GetSwapChainInfo() { - return color_swapchain_images_; -} - } // namespace device
diff --git a/device/vr/openxr/windows/openxr_graphics_binding_d3d11.h b/device/vr/openxr/windows/openxr_graphics_binding_d3d11.h index a03b768..091a5dc0 100644 --- a/device/vr/openxr/windows/openxr_graphics_binding_d3d11.h +++ b/device/vr/openxr/windows/openxr_graphics_binding_d3d11.h
@@ -32,15 +32,13 @@ const void* GetSessionCreateInfo() const override; int64_t GetSwapchainFormat(XrSession session) const override; XrResult EnumerateSwapchainImages( - const XrSwapchain& color_swapchain) override; - void ClearSwapChainInfo() override; - base::span<SwapChainInfo> GetSwapChainInfo() override; + const XrSwapchain& color_swapchain, + std::vector<SwapChainInfo>& color_swapchain_images) const override; private: bool initialized_ = false; raw_ptr<D3D11TextureHelper> texture_helper_; base::WeakPtr<OpenXrPlatformHelperWindows> weak_platform_helper_; - std::vector<SwapChainInfo> color_swapchain_images_; XrGraphicsBindingD3D11KHR binding_{XR_TYPE_GRAPHICS_BINDING_D3D11_KHR, nullptr, nullptr};
diff --git a/docs/mac/arc.md b/docs/mac/arc.md index ac436c98..f2f7dc9 100644 --- a/docs/mac/arc.md +++ b/docs/mac/arc.md
@@ -123,6 +123,17 @@ `#import` or an `@` keyword) unguarded by `__OBJC__`, it would not compile in C++ and therefore is not included by C++ code. +## Warning! Dangers! {#dangers} + +There are some bits of AppKit that are incompatible with ARC. Apple has not +updated the documentation to call this out, so a heads-up: + +When creating an `NSWindow`, you _must_ set the `.releasedWhenClosed` property +to `NO`. It's recommended that you do so immediately after creating it with +`alloc`/`init`. If you fail to do so, then closing the window will cause it to +release itself, and then when the owning pointer releases it, it will be a +double-release. + ## Examples of conversion from non-ARC to ARC {#examples} ### Objective-C Classes {#examples-objc-classes}
diff --git a/infra/config/generated/luci/cr-buildbucket.cfg b/infra/config/generated/luci/cr-buildbucket.cfg index 35a83a96..f8b99a79 100644 --- a/infra/config/generated/luci/cr-buildbucket.cfg +++ b/infra/config/generated/luci/cr-buildbucket.cfg
@@ -2329,7 +2329,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:high" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -2758,7 +2758,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -3443,7 +3443,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -8411,7 +8411,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:high" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -10878,7 +10878,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -11746,7 +11746,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -12525,7 +12525,7 @@ dimensions: "cores:16" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -21476,7 +21476,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -25213,7 +25213,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -26867,7 +26867,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -28406,7 +28406,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -31477,7 +31477,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -32079,7 +32079,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -37923,7 +37923,7 @@ dimensions: "cores:16" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:1" exe { @@ -39959,7 +39959,8 @@ dimensions: "cores:32" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" + dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -40206,7 +40207,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -40847,7 +40848,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -44730,7 +44731,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -45488,7 +45489,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -46308,7 +46309,7 @@ swarming_host: "chromium-swarm.appspot.com" dimensions: "builder:rts-model-packager" dimensions: "cpu:x86-64" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -47678,6 +47679,7 @@ dimensions: "cpu:x86-64" dimensions: "free_space:standard" dimensions: "os:Windows-10" + dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { cipd_package: "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build" @@ -48777,7 +48779,7 @@ dimensions: "cores:8" dimensions: "cpu:x86-64" dimensions: "free_space:standard" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -55220,7 +55222,7 @@ ' "recipe": "chromium_polymorphic/launcher",' ' "runner_builder": {' ' "bucket": "reviver",' - ' "builder": "runner",' + ' "builder": "coverage-runner",' ' "project": "chromium"' ' },' ' "target_builders": [' @@ -55400,11 +55402,65 @@ } } builders { + name: "coverage-runner" + swarming_host: "chromium-swarm.appspot.com" + dimensions: "builderless:1" + dimensions: "cores:8" + dimensions: "cpu:x86-64" + dimensions: "free_space:high" + dimensions: "os:Ubuntu-18.04" + dimensions: "pool:luci.chromium.ci" + dimensions: "ssd:0" + exe { + cipd_package: "infra/chromium/bootstrapper/${platform}" + cipd_version: "latest" + cmd: "bootstrapper" + cmd: "-polymorphic" + cmd: "-properties-optional" + } + properties: + '{' + ' "$bootstrap/exe": {' + ' "exe": {' + ' "cipd_package": "infra/recipe_bundles/chromium.googlesource.com/chromium/tools/build",' + ' "cipd_version": "refs/heads/main",' + ' "cmd": [' + ' "luciexe"' + ' ]' + ' }' + ' },' + ' "$recipe_engine/resultdb/test_presentation": {' + ' "column_keys": [],' + ' "grouping_keys": [' + ' "status",' + ' "v.test_suite"' + ' ]' + ' },' + ' "led_builder_is_bootstrapped": true,' + ' "recipe": "reviver/chromium/runner"' + '}' + execution_timeout_secs: 43200 + service_account: "chromium-ci-builder@chops-service-accounts.iam.gserviceaccount.com" + experiments { + key: "luci.recipes.use_python3" + value: 100 + } + resultdb { + enable: true + bq_exports { + project: "chrome-luci-data" + dataset: "chromium" + table: "reviver_test_results" + test_results {} + } + } + } + builders { name: "fuchsia-coordinator" swarming_host: "chromium-swarm.appspot.com" dimensions: "builderless:1" dimensions: "cores:8" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe { @@ -55462,7 +55518,7 @@ swarming_host: "chromium-swarm.appspot.com" dimensions: "builderless:1" dimensions: "cores:8" - dimensions: "os:Ubuntu-18.04" + dimensions: "os:Ubuntu-22.04" dimensions: "pool:luci.chromium.ci" dimensions: "ssd:0" exe {
diff --git a/infra/config/generated/luci/luci-milo.cfg b/infra/config/generated/luci/luci-milo.cfg index b1b67f56..0c09d57 100644 --- a/infra/config/generated/luci/luci-milo.cfg +++ b/infra/config/generated/luci/luci-milo.cfg
@@ -18303,6 +18303,9 @@ name: "buildbucket/luci.chromium.reviver/android-x64-launcher" } builders { + name: "buildbucket/luci.chromium.reviver/coverage-runner" + } + builders { name: "buildbucket/luci.chromium.reviver/fuchsia-coordinator" } builders {
diff --git a/infra/config/generated/luci/luci-scheduler.cfg b/infra/config/generated/luci/luci-scheduler.cfg index c333fc8f..341f3f8 100644 --- a/infra/config/generated/luci/luci-scheduler.cfg +++ b/infra/config/generated/luci/luci-scheduler.cfg
@@ -3445,7 +3445,7 @@ job { id: "android-coverage-launcher" realm: "reviver" - schedule: "0 10 * * *" + schedule: "0 6 * * *" buildbucket { server: "cr-buildbucket.appspot.com" bucket: "reviver"
diff --git a/infra/config/lib/linux-default.json b/infra/config/lib/linux-default.json index 0e1907f..502da31 100644 --- a/infra/config/lib/linux-default.json +++ b/infra/config/lib/linux-default.json
@@ -17,15 +17,18 @@ "Android arm64 Builder (dbg)": "Ubuntu-22.04", "Android arm64 Builder All Targets (dbg)": "Ubuntu-22.04", "Android x64 Builder (dbg)": "Ubuntu-22.04", + "Android x86 Builder (dbg)": "Ubuntu-22.04", "CFI Linux CF": "Ubuntu-22.04", "CFI Linux ToT": "Ubuntu-22.04", "Cast Android (dbg)": "Ubuntu-22.04", + "Cast Audio Linux": "Ubuntu-22.04", "Cast Linux": "Ubuntu-22.04", "Cast Linux ARM64": "Ubuntu-22.04", "Cast Linux Debug": "Ubuntu-22.04", "ChromeOS FYI Release (amd64-generic)": "Ubuntu-22.04", "ChromeOS FYI Release (kevin)": "Ubuntu-22.04", "ChromeOS FYI Release Skylab (kevin)": "Ubuntu-22.04", + "ChromiumOS ASAN Release": "Ubuntu-22.04", "Comparison Linux (reclient)": "Ubuntu-22.04", "Comparison Simple Chrome (reclient)": "Ubuntu-22.04", "Dawn Android arm DEPS Release (Pixel 4)": "Ubuntu-22.04", @@ -56,6 +59,7 @@ "Dawn Win10 x86 Release (Intel)": "Ubuntu-22.04", "Dawn Win10 x86 Release (NVIDIA)": "Ubuntu-22.04", "Deterministic Android": "Ubuntu-22.04", + "Deterministic Linux": "Ubuntu-22.04", "Deterministic Linux (dbg)": "Ubuntu-22.04", "GPU FYI Android arm Builder": "Ubuntu-22.04", "GPU FYI Android arm64 Builder": "Ubuntu-22.04", @@ -68,12 +72,15 @@ "Lacros FYI x64 Release (AMD)": "Ubuntu-22.04", "Lacros FYI x64 Release (Intel)": "Ubuntu-22.04", "Leak Detection Linux": "Ubuntu-22.04", + "Libfuzzer Upload Linux ASan Debug": "Ubuntu-22.04", + "Linux ASan LSan Builder": "Ubuntu-22.04", "Linux Builder": "Ubuntu-22.04", "Linux Builder (Wayland)": "Ubuntu-22.04", "Linux Builder (dbg)": "Ubuntu-22.04", "Linux Builder (j-500) (reclient)": "Ubuntu-22.04", "Linux Builder (reclient compare)": "Ubuntu-22.04", "Linux CFI": "Ubuntu-22.04", + "Linux Chromium OS ASan LSan Builder": "Ubuntu-22.04", "Linux ChromiumOS Full": "Ubuntu-22.04", "Linux Debug (NVIDIA)": "Ubuntu-22.04", "Linux FYI Debug (NVIDIA)": "Ubuntu-22.04", @@ -129,6 +136,7 @@ "ToTLinuxTSan": "Ubuntu-22.04", "ToTLinuxUBSanVptr": "Ubuntu-22.04", "VR Linux": "Ubuntu-22.04", + "WebKit Linux Leak": "Ubuntu-22.04", "Win10 FYI x64 DX12 Vulkan Debug (NVIDIA)": "Ubuntu-22.04", "Win10 FYI x64 DX12 Vulkan Release (NVIDIA)": "Ubuntu-22.04", "Win10 FYI x64 Debug (NVIDIA)": "Ubuntu-22.04", @@ -143,12 +151,15 @@ "Win10 x64 Release (NVIDIA)": "Ubuntu-22.04", "android-angle-chromium-arm64-builder": "Ubuntu-22.04", "android-angle-chromium-arm64-nexus5x": "Ubuntu-22.04", + "android-archive-dbg": "Ubuntu-22.04", "android-archive-rel": "Ubuntu-22.04", "android-arm64-archive-rel": "Ubuntu-22.04", "android-binary-size-generator": "Ubuntu-22.04", "android-build-perf-developer": "Ubuntu-22.04", + "android-cronet-x86-dbg": "Ubuntu-22.04", "android-official": "Ubuntu-22.04", "android-perfetto-rel": "Ubuntu-22.04", + "android-rust-arm32-rel": "Ubuntu-22.04", "build-perf-android": "Ubuntu-22.04", "build-perf-android-siso": "Ubuntu-22.04", "build-perf-linux": "Ubuntu-22.04", @@ -166,7 +177,9 @@ "chromeos-jacuzzi-rel": "Ubuntu-22.04", "chromeos-octopus-rel": "Ubuntu-22.04", "fuchsia-angle-builder": "Ubuntu-22.04", + "fuchsia-arm64-rel": "Ubuntu-22.04", "fuchsia-official": "Ubuntu-22.04", + "fuchsia-x64-accessibility-rel": "Ubuntu-22.04", "ios-angle-intel": "Ubuntu-22.04", "lacros-amd64-generic-binary-size-rel": "Ubuntu-22.04", "lacros-amd64-generic-rel": "Ubuntu-22.04", @@ -199,6 +212,7 @@ "linux-chromeos-dbg": "Ubuntu-22.04", "linux-chromeos-rel": "Ubuntu-22.04", "linux-cr23-rel": "Ubuntu-22.04", + "linux-exp-asan-lsan-fyi-rel": "Ubuntu-22.04", "linux-extended-tracing-rel": "Ubuntu-22.04", "linux-fieldtrial-rel": "Ubuntu-22.04", "linux-headless-shell-rel": "Ubuntu-22.04", @@ -215,13 +229,16 @@ "linux-network-sandbox-rel": "Ubuntu-22.04", "linux-official": "Ubuntu-22.04", "linux-perfetto-rel": "Ubuntu-22.04", + "linux-presubmit": "Ubuntu-22.04", "linux-rel-jammy-dev": "Ubuntu-22.04", "linux-rel-no-external-ip": "Ubuntu-22.04", "linux-remote-ssd-rel-dev": "Ubuntu-22.04", + "linux-rust-x64-dbg": "Ubuntu-22.04", "linux-swangle-chromium-x64": "Ubuntu-22.04", "linux-swangle-tot-swiftshader-x64": "Ubuntu-22.04", "linux-swangle-x64": "Ubuntu-22.04", "linux-ubsan-vptr": "Ubuntu-22.04", + "linux-updater-builder-dbg": "Ubuntu-22.04", "linux-upload-perfetto": "Ubuntu-22.04", "linux-win_cross-rel": "Ubuntu-22.04", "linux-wpt-content-shell-asan-fyi-rel": "Ubuntu-22.04", @@ -232,11 +249,15 @@ "linux-wpt-input-fyi-rel": "Ubuntu-22.04", "mac-angle-chromium-amd": "Ubuntu-22.04", "mac-angle-chromium-intel": "Ubuntu-22.04", + "mac10.15-updater-tester-rel": "Ubuntu-22.04", + "mac12-arm64-updater-tester-rel": "Ubuntu-22.04", + "rts-model-packager": "Ubuntu-22.04", "win-annotator-rel": "Ubuntu-22.04", "win-cr23-rel": "Ubuntu-22.04", "win-network-sandbox-tester": "Ubuntu-22.04", "win-perfetto-rel": "Ubuntu-22.04", "win-upload-perfetto": "Ubuntu-22.04", + "win10-32-on-64-updater-tester-dbg": "Ubuntu-22.04", "win10-angle-chromium-x64-intel": "Ubuntu-22.04", "win10-angle-chromium-x64-nvidia": "Ubuntu-22.04", "win10-wpt-content-shell-fyi-rel": "Ubuntu-22.04", @@ -245,6 +266,10 @@ "reclient": { "Linux Builder (canonical wd) (reclient compare)": "Ubuntu-22.04" }, + "reviver": { + "fuchsia-coordinator": "Ubuntu-22.04", + "lacros-coordinator": "Ubuntu-22.04" + }, "try": { "android-11-x86-rel": "Ubuntu-22.04", "android-12-x64-dbg": "Ubuntu-22.04",
diff --git a/infra/config/subprojects/chromium/ci/checks.star b/infra/config/subprojects/chromium/ci/checks.star index 1bb7fbb9..2cea48b 100644 --- a/infra/config/subprojects/chromium/ci/checks.star +++ b/infra/config/subprojects/chromium/ci/checks.star
@@ -8,6 +8,7 @@ load("//lib/consoles.star", "consoles") ci.defaults.set( + pool = ci.DEFAULT_POOL, console_view = "checks", service_account = ci.DEFAULT_SERVICE_ACCOUNT, )
diff --git a/infra/config/subprojects/reviver/reviver.star b/infra/config/subprojects/reviver/reviver.star index 03ff661..05932d4 100644 --- a/infra/config/subprojects/reviver/reviver.star +++ b/infra/config/subprojects/reviver/reviver.star
@@ -62,10 +62,10 @@ polymorphic.launcher( name = "android-coverage-launcher", # To avoid peak hours, we run it at 10AM UTC. - schedule = "0 10 * * *", + schedule = "0 6 * * *", pool = ci.DEFAULT_POOL, os = os.LINUX_DEFAULT, - runner = "reviver/runner", + runner = "reviver/coverage-runner", target_builders = [ "ci/android-code-coverage-native", ], @@ -220,3 +220,24 @@ # necessary permissions service_account = ci.DEFAULT_SERVICE_ACCOUNT, ) + +builder( + name = "coverage-runner", + executable = "recipe:reviver/chromium/runner", + pool = ci.DEFAULT_POOL, + builderless = 1, + os = os.LINUX_DEFAULT, + cpu = cpu.X86_64, + ssd = False, + free_space = free_space.high, + auto_builder_dimension = False, + execution_timeout = 12 * time.hour, + resultdb_bigquery_exports = [ + resultdb.export_test_results( + bq_table = "chrome-luci-data.chromium.reviver_test_results", + ), + ], + # TODO(crbug/1346396) Remove this once the reviver service account has + # necessary permissions + service_account = ci.DEFAULT_SERVICE_ACCOUNT, +)
diff --git a/ios/chrome/app/strings/ios_strings.grd b/ios/chrome/app/strings/ios_strings.grd index c621315..0ae0cfeb 100644 --- a/ios/chrome/app/strings/ios_strings.grd +++ b/ios/chrome/app/strings/ios_strings.grd
@@ -1966,6 +1966,9 @@ <message name="IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_THANKS" desc="The action button to dismiss the password bottom sheet and revert to using the keyboard to fill in username and password information on the webpage."> No Thanks </message> + <message name="IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME" desc="Text shown for an option in a list of the user's saved passwords, when the user did not save a username along with the password."> + No Username •••••••• + </message> <message name="IDS_IOS_PASSWORD_BOTTOM_SHEET_PASSWORD_MANAGER" desc="The password bottom sheet's long press menu item to open the password manager."> Password Manager </message>
diff --git a/ios/chrome/app/strings/ios_strings_grd/IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME.png.sha1 b/ios/chrome/app/strings/ios_strings_grd/IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME.png.sha1 new file mode 100644 index 0000000..05e5d8d --- /dev/null +++ b/ios/chrome/app/strings/ios_strings_grd/IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME.png.sha1
@@ -0,0 +1 @@ +fc382618729c72b5c41eda7d92863687594ee2dd \ No newline at end of file
diff --git a/ios/chrome/browser/flags/BUILD.gn b/ios/chrome/browser/flags/BUILD.gn index 59316f4..a84d244a 100644 --- a/ios/chrome/browser/flags/BUILD.gn +++ b/ios/chrome/browser/flags/BUILD.gn
@@ -77,6 +77,7 @@ "//ios/chrome/browser/ui/app_store_rating:features", "//ios/chrome/browser/ui/autofill:features", "//ios/chrome/browser/ui/content_suggestions:feature_flags", + "//ios/chrome/browser/ui/default_promo/post_restore:features", "//ios/chrome/browser/ui/download:features", "//ios/chrome/browser/ui/first_run:field_trial", "//ios/chrome/browser/ui/ntp:feature_flags",
diff --git a/ios/chrome/browser/flags/about_flags.mm b/ios/chrome/browser/flags/about_flags.mm index 6f7bff63..6bca8a3c 100644 --- a/ios/chrome/browser/flags/about_flags.mm +++ b/ios/chrome/browser/flags/about_flags.mm
@@ -91,6 +91,7 @@ #import "ios/chrome/browser/ui/app_store_rating/features.h" #import "ios/chrome/browser/ui/autofill/features.h" #import "ios/chrome/browser/ui/content_suggestions/content_suggestions_feature.h" +#import "ios/chrome/browser/ui/default_promo/post_restore/features.h" #import "ios/chrome/browser/ui/download/features.h" #import "ios/chrome/browser/ui/ntp/new_tab_page_feature.h" #import "ios/chrome/browser/ui/ntp/new_tab_page_field_trial_constants.h" @@ -670,6 +671,18 @@ std::size(kIOSEditMenuSearchWithTitleWebSearch), nullptr}, }; +const FeatureEntry::FeatureParam kPostRestoreDefaultBrowserPromoHalfscreen[] = { + {kPostRestoreDefaultBrowserPromoHalfscreenParam, "true"}}; +const FeatureEntry::FeatureParam kPostRestoreDefaultBrowserPromoFullscreen[] = { + {kPostRestoreDefaultBrowserPromoFullscreenParam, "true"}}; +const FeatureEntry::FeatureVariation + kPostRestoreDefaultBrowserPromoVariations[] = { + {"with half screen ui", kPostRestoreDefaultBrowserPromoHalfscreen, + std::size(kPostRestoreDefaultBrowserPromoHalfscreen), nullptr}, + {"with full screen ui", kPostRestoreDefaultBrowserPromoFullscreen, + std::size(kPostRestoreDefaultBrowserPromoFullscreen), nullptr}, +}; + // To add a new entry, add to the end of kFeatureEntries. There are four // distinct types of entries: // . ENABLE_DISABLE_VALUE: entry is either enabled, disabled, or uses the @@ -1564,6 +1577,14 @@ flag_descriptions::kAutofillEnableSupportForLandmarkDescription, flags_ui::kOsIos, FEATURE_VALUE_TYPE(autofill::features::kAutofillEnableSupportForLandmark)}, + {"post-restore-default-browser-promo", + flag_descriptions::kPostRestoreDefaultBrowserPromoName, + flag_descriptions::kPostRestoreDefaultBrowserPromoDescription, + flags_ui::kOsIos, + FEATURE_WITH_PARAMS_VALUE_TYPE( + kPostRestoreDefaultBrowserPromo, + kPostRestoreDefaultBrowserPromoVariations, + "PostRestoreDefaultBrowserPromoVariations")}, {"spotlight-open-tabs-source", flag_descriptions::kSpotlightOpenTabsSourceName, flag_descriptions::kSpotlightOpenTabsSourceDescription, flags_ui::kOsIos,
diff --git a/ios/chrome/browser/flags/ios_chrome_flag_descriptions.cc b/ios/chrome/browser/flags/ios_chrome_flag_descriptions.cc index 6390baa6..7127587 100644 --- a/ios/chrome/browser/flags/ios_chrome_flag_descriptions.cc +++ b/ios/chrome/browser/flags/ios_chrome_flag_descriptions.cc
@@ -883,6 +883,13 @@ "Enable the new chrome://policy/logs page containing logs for debugging " "policy related issues on IOS."; +const char kPostRestoreDefaultBrowserPromoName[] = + "Post Restore Default Browser Promo"; +const char kPostRestoreDefaultBrowserPromoDescription[] = + "When enabled, the user will be presented a promo showing how to set " + "Chrome as default browser after losing their default browser status from " + "an iOS restore."; + const char kPromosManagerUsesFETName[] = "Promos Manager using FET"; const char kPromosManagerUsesFETDescription[] = "Migrates the Promos Manager to use the Feature Engagement Tracker as its "
diff --git a/ios/chrome/browser/flags/ios_chrome_flag_descriptions.h b/ios/chrome/browser/flags/ios_chrome_flag_descriptions.h index 32db238..b44014a4 100644 --- a/ios/chrome/browser/flags/ios_chrome_flag_descriptions.h +++ b/ios/chrome/browser/flags/ios_chrome_flag_descriptions.h
@@ -776,6 +776,11 @@ extern const char kPolicyLogsPageIOSName[]; extern const char kPolicyLogsPageIOSDescription[]; +// Title and description for the flag to enable the post restore default browser +// promo. +extern const char kPostRestoreDefaultBrowserPromoName[]; +extern const char kPostRestoreDefaultBrowserPromoDescription[]; + // Title and description for the flag to have the Promos Manager use the FET as // its impression tracking system. extern const char kPromosManagerUsesFETName[];
diff --git a/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator.mm b/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator.mm index 4339b40..91dec49 100644 --- a/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator.mm +++ b/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator.mm
@@ -6,6 +6,7 @@ #import <UIKit/UIKit.h> +#import "base/metrics/user_metrics.h" #import "base/notreached.h" #import "base/strings/sys_string_conversions.h" #import "components/signin/public/identity_manager/identity_manager.h" @@ -26,6 +27,13 @@ #error "This file requires ARC support." #endif +using base::RecordAction; +using base::UserMetricsAction; + +@interface TwoScreensSigninCoordinator () < + UIAdaptivePresentationControllerDelegate> +@end + @implementation TwoScreensSigninCoordinator { // The accessPoint and promoAction used for signin merics. signin_metrics::AccessPoint _accessPoint; @@ -67,6 +75,7 @@ [[UINavigationController alloc] initWithNavigationBarClass:nil toolbarClass:nil]; _navigationController.modalPresentationStyle = UIModalPresentationFormSheet; + _navigationController.presentationController.delegate = self; [self presentScreen:[_screenProvider nextScreenType]]; @@ -165,6 +174,7 @@ [_childCoordinator stop]; _childCoordinator = nil; } + _navigationController.presentationController.delegate = nil; _navigationController = nil; _screenProvider = nil; SigninCompletionInfo* completionInfo = @@ -242,6 +252,16 @@ }]; } +#pragma mark - UIAdaptivePresentationControllerDelegate + +- (void)presentationControllerDidDismiss: + (UIPresentationController*)presentationController { + RecordAction(UserMetricsAction("Signin_TwoScreens_SwipeDismiss")); + [self interruptWithAction: + SigninCoordinatorInterruptActionDismissWithoutAnimation + completion:nil]; +} + #pragma mark - NSObject - (NSString*)description {
diff --git a/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator_unittest.mm b/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator_unittest.mm index 58743b5..cfb988e 100644 --- a/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator_unittest.mm +++ b/ios/chrome/browser/ui/authentication/signin/two_screens_signin/two_screens_signin_coordinator_unittest.mm
@@ -9,6 +9,7 @@ #import "base/ios/block_types.h" #import "base/mac/foundation_util.h" #import "base/test/ios/wait_util.h" +#import "base/test/metrics/user_action_tester.h" #import "ios/chrome/browser/shared/model/application_context/application_context.h" #import "ios/chrome/browser/shared/model/browser/test/test_browser.h" #import "ios/chrome/browser/shared/model/browser_state/test_chrome_browser_state.h" @@ -104,6 +105,7 @@ std::unique_ptr<Browser> browser_; std::unique_ptr<TestChromeBrowserState> browser_state_; TwoScreensSigninCoordinator* coordinator_; + base::UserActionTester user_actions_; UIWindow* window_; }; @@ -207,3 +209,39 @@ SigninCompletionActionNone); [coordinator_ stop]; } + +// Tests that the user can swipe to dismiss and that a user action is recorded. +TEST_F(TwoScreensSigninCoordinatorTest, SwipeToDismiss) { + __block SigninCoordinatorResult signin_result; + __block SigninCompletionInfo* signin_completion_info; + __block BOOL completion_block_done = NO; + coordinator_.signinCompletion = + ^(SigninCoordinatorResult signinResult, + SigninCompletionInfo* signinCompletionInfo) { + signin_result = signinResult; + signin_completion_info = signinCompletionInfo; + completion_block_done = YES; + }; + + [coordinator_ start]; + + // Simulate a swipe-to-dismiss. + EXPECT_EQ(0, user_actions_.GetActionCount("Signin_TwoScreens_SwipeDismiss")); + UIPresentationController* presentationController = + PresentedViewController().presentationController; + [presentationController.delegate + presentationControllerDidDismiss:presentationController]; + + auto completion_condition = ^{ + return completion_block_done; + }; + base::test::ios::WaitUntilCondition(completion_condition, true, + base::Seconds(1)); + EXPECT_EQ(signin_result, SigninCoordinatorResultInterrupted); + EXPECT_EQ(signin_completion_info.identity, nil); + EXPECT_EQ(signin_completion_info.signinCompletionAction, + SigninCompletionActionNone); + EXPECT_EQ(1, user_actions_.GetActionCount("Signin_TwoScreens_SwipeDismiss")); + + [coordinator_ stop]; +}
diff --git a/ios/chrome/browser/ui/authentication/tangible_sync/tangible_sync_coordinator.mm b/ios/chrome/browser/ui/authentication/tangible_sync/tangible_sync_coordinator.mm index 71c8745..d0957602 100644 --- a/ios/chrome/browser/ui/authentication/tangible_sync/tangible_sync_coordinator.mm +++ b/ios/chrome/browser/ui/authentication/tangible_sync/tangible_sync_coordinator.mm
@@ -81,7 +81,6 @@ [super start]; _viewController = [[TangibleSyncViewController alloc] init]; _viewController.delegate = self; - _viewController.modalInPresentation = YES; ChromeBrowserState* browserState = self.browser->GetBrowserState(); AuthenticationService* authenticationService = AuthenticationServiceFactory::GetForBrowserState(browserState); @@ -104,6 +103,7 @@ _mediator.consumer = _viewController; _mediator.delegate = self; if (_firstRun) { + _viewController.modalInPresentation = YES; base::UmaHistogramEnumeration("FirstRun.Stage", first_run::kTangibleSyncScreenStart); }
diff --git a/ios/chrome/browser/ui/content_suggestions/content_suggestions_egtest.mm b/ios/chrome/browser/ui/content_suggestions/content_suggestions_egtest.mm index a2efb36..c9445bd 100644 --- a/ios/chrome/browser/ui/content_suggestions/content_suggestions_egtest.mm +++ b/ios/chrome/browser/ui/content_suggestions/content_suggestions_egtest.mm
@@ -449,6 +449,44 @@ @"SetUpList item SignIn not completed."); } +// Tests that the signin and sync screens can be dismissed by a swipe. +- (void)testSetUpListSigninSwipeToDismiss { + [self prepareToTestSetUpList]; + [SigninEarlGrey addFakeIdentity:[FakeSystemIdentity fakeIdentity1]]; + + // Tap the signin item. + TapView(set_up_list::kSignInItemID); + // Verify the signin screen appears. + id<GREYMatcher> signinView = grey_accessibilityID( + first_run::kFirstRunSignInScreenAccessibilityIdentifier); + [[EarlGrey selectElementWithMatcher:signinView] + assertWithMatcher:grey_notNil()]; + // Swipe to dismiss the signin screen. + [[EarlGrey selectElementWithMatcher:signinView] + performAction:grey_swipeFastInDirection(kGREYDirectionDown)]; + // Verify that the signin screen is gone. + [[EarlGrey selectElementWithMatcher:signinView] assertWithMatcher:grey_nil()]; + + [self prepareToTestSetUpList]; + // Tap the signin item. + TapView(set_up_list::kSignInItemID); + // Verify the signin screen appears. + [[EarlGrey selectElementWithMatcher:signinView] + assertWithMatcher:grey_notNil()]; + // Tap "Continue as ...". + TapPromoStylePrimaryActionButton(); + // Verify the tangible sync screen appears. + id<GREYMatcher> syncView = + grey_accessibilityID(kTangibleSyncViewAccessibilityIdentifier); + [[EarlGrey selectElementWithMatcher:syncView] + assertWithMatcher:grey_notNil()]; + // Swipe to dismiss the sync screen. + [[EarlGrey selectElementWithMatcher:syncView] + performAction:grey_swipeFastInDirection(kGREYDirectionDown)]; + // Verify that the sync screen is gone. + [[EarlGrey selectElementWithMatcher:syncView] assertWithMatcher:grey_nil()]; +} + #pragma mark - Test utils // Sets up the test case to test SetUpList.
diff --git a/ios/chrome/browser/ui/default_promo/post_restore/BUILD.gn b/ios/chrome/browser/ui/default_promo/post_restore/BUILD.gn new file mode 100644 index 0000000..c605d32 --- /dev/null +++ b/ios/chrome/browser/ui/default_promo/post_restore/BUILD.gn
@@ -0,0 +1,12 @@ +# Copyright 2023 The Chromium Authors +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +source_set("features") { + configs += [ "//build/config/compiler:enable_arc" ] + sources = [ + "features.h", + "features.mm", + ] + deps = [ "//base" ] +}
diff --git a/ios/chrome/browser/ui/default_promo/post_restore/features.h b/ios/chrome/browser/ui/default_promo/post_restore/features.h new file mode 100644 index 0000000..a472b8f --- /dev/null +++ b/ios/chrome/browser/ui/default_promo/post_restore/features.h
@@ -0,0 +1,34 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef IOS_CHROME_BROWSER_UI_DEFAULT_PROMO_POST_RESTORE_FEATURES_H_ +#define IOS_CHROME_BROWSER_UI_DEFAULT_PROMO_POST_RESTORE_FEATURES_H_ + +#import "base/feature_list.h" + +// Feature flag to enable the post restore default browser promo. +BASE_DECLARE_FEATURE(kPostRestoreDefaultBrowserPromo); + +// Enum for "Post Restore Default Browser" experiment groups. +enum class PostRestoreDefaultBrowserPromoType { + // "Post Restore Default Browser" enabled with an alert style promo. + kAlert = 0, + // "Post Restore Default Browser" enabled with a half sheet promo. + kHalfscreen = 1, + // "Post Restore Default Browser" enabled with a full screen promo. + kFullscreen = 2, + // "Post Restore Default Browser" not enabled. + kDisabled, +}; + +// Feature param for the halfscreen promo. +extern const char kPostRestoreDefaultBrowserPromoHalfscreenParam[]; +// Feature param for the fullscreen promo. +extern const char kPostRestoreDefaultBrowserPromoFullscreenParam[]; + +// Returns the current PostRestoreDefaultBrowserPromoType according to the +// feature flag and experiment "PostRestoreDefaultBrowserPromoIOS". +PostRestoreDefaultBrowserPromoType GetPostRestoreDefaultBrowserPromoType(); + +#endif // IOS_CHROME_BROWSER_UI_DEFAULT_PROMO_POST_RESTORE_FEATURES_H_
diff --git a/ios/chrome/browser/ui/default_promo/post_restore/features.mm b/ios/chrome/browser/ui/default_promo/post_restore/features.mm new file mode 100644 index 0000000..8d142a0 --- /dev/null +++ b/ios/chrome/browser/ui/default_promo/post_restore/features.mm
@@ -0,0 +1,37 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#import "ios/chrome/browser/ui/default_promo/post_restore/features.h" + +#import "base/metrics/field_trial_params.h" + +#if !defined(__has_feature) || !__has_feature(objc_arc) +#error "This file requires ARC support." +#endif + +BASE_FEATURE(kPostRestoreDefaultBrowserPromo, + "PostRestoreDefaultBrowserPromo", + base::FEATURE_DISABLED_BY_DEFAULT); + +const char kPostRestoreDefaultBrowserPromoHalfscreenParam[] = + "post-restore-default-browser-promo-halfscreen"; +const char kPostRestoreDefaultBrowserPromoFullscreenParam[] = + "post-restore-default-browser-promo-fullscreen"; + +PostRestoreDefaultBrowserPromoType GetPostRestoreDefaultBrowserPromoType() { + if (!base::FeatureList::IsEnabled(kPostRestoreDefaultBrowserPromo)) { + return PostRestoreDefaultBrowserPromoType::kDisabled; + } + if (base::GetFieldTrialParamByFeatureAsBool( + kPostRestoreDefaultBrowserPromo, + kPostRestoreDefaultBrowserPromoHalfscreenParam, false)) { + return PostRestoreDefaultBrowserPromoType::kHalfscreen; + } + if (base::GetFieldTrialParamByFeatureAsBool( + kPostRestoreDefaultBrowserPromo, + kPostRestoreDefaultBrowserPromoFullscreenParam, false)) { + return PostRestoreDefaultBrowserPromoType::kFullscreen; + } + return PostRestoreDefaultBrowserPromoType::kAlert; +}
diff --git a/ios/chrome/browser/ui/first_run/signin/signin_screen_coordinator.mm b/ios/chrome/browser/ui/first_run/signin/signin_screen_coordinator.mm index 41a84d3c..57bddb09 100644 --- a/ios/chrome/browser/ui/first_run/signin/signin_screen_coordinator.mm +++ b/ios/chrome/browser/ui/first_run/signin/signin_screen_coordinator.mm
@@ -100,7 +100,6 @@ self.viewController = [[SigninScreenViewController alloc] init]; self.viewController.TOSHandler = TOSHandler; self.viewController.delegate = self; - self.viewController.modalInPresentation = YES; ChromeBrowserState* browserState = self.browser->GetBrowserState(); self.authenticationService = @@ -124,6 +123,9 @@ accessPoint:_accessPoint promoAction:_promoAction]; self.mediator.consumer = self.viewController; + if (self.mediator.firstRun) { + self.viewController.modalInPresentation = YES; + } BOOL animated = self.baseNavigationController.topViewController != nil; [self.baseNavigationController setViewControllers:@[ self.viewController ] animated:animated];
diff --git a/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.h b/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.h index 7f055a2..1ef917c 100644 --- a/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.h +++ b/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.h
@@ -42,6 +42,8 @@ @property(nonatomic, assign) BOOL UMALinkWasTapped; // Whether an account has been added. Must be set externally. @property(nonatomic, assign) BOOL addedAccount; +// YES if this is part of a first run signin. +@property(nonatomic, readonly) BOOL firstRun; // The designated initializer. // `accountManagerService` account manager service.
diff --git a/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.mm b/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.mm index 41b5f7e..5d35302 100644 --- a/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.mm +++ b/ios/chrome/browser/ui/first_run/signin/signin_screen_mediator.mm
@@ -30,7 +30,6 @@ @interface SigninScreenMediator () { std::unique_ptr<ChromeAccountManagerServiceObserverBridge> _accountManagerServiceObserver; - BOOL _firstRun; } // Account manager service to retrieve Chrome identities. @@ -88,7 +87,7 @@ _hadIdentitiesAtStartup = self.accountManagerService->HasIdentities(); _firstRun = accessPoint == signin_metrics::AccessPoint::ACCESS_POINT_START_PAGE; - if (_firstRun) { + if (self.firstRun) { _logger = [[FirstRunSigninLogger alloc] initWithAccessPoint:accessPoint promoAction:promoAction @@ -198,7 +197,7 @@ if (self.UMALinkWasTapped) { base::RecordAction(base::UserMetricsAction("MobileFreUMALinkTapped")); } - if (_firstRun) { + if (self.firstRun) { first_run::FirstRunStage firstRunStage = signIn ? first_run::kWelcomeAndSigninScreenCompletionWithSignIn : first_run::kWelcomeAndSigninScreenCompletionWithoutSignIn; @@ -246,7 +245,7 @@ break; } self.consumer.isManaged = IsApplicationManagedByPlatform(); - if (!_firstRun) { + if (!self.firstRun) { self.consumer.screenIntent = SigninScreenConsumerScreenIntentSigninOnly; } else { BOOL metricReportingDisabled =
diff --git a/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_egtest.mm b/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_egtest.mm index 603bd0db..8f14b13 100644 --- a/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_egtest.mm +++ b/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_egtest.mm
@@ -551,4 +551,31 @@ WaitForKeyboardToAppear(); } +- (void)testOpenPasswordBottomSheetNoUsername { + [PasswordSuggestionBottomSheetAppInterface setUpMockReauthenticationModule]; + [PasswordSuggestionBottomSheetAppInterface + mockReauthenticationModuleExpectedResult:ReauthenticationResult:: + kSuccess]; + [PasswordManagerAppInterface + storeCredentialWithUsername:@"" + password:@"password" + URL:net::NSURLWithGURL(self.testServer->GetURL( + "/simple_login_form.html"))]; + [SigninEarlGreyUI signinWithFakeIdentity:[FakeSystemIdentity fakeIdentity1] + enableSync:NO]; + [self loadLoginPage]; + + [[EarlGrey selectElementWithMatcher:chrome_test_util::WebViewMatcher()] + performAction:chrome_test_util::TapWebElementWithId(kFormPassword)]; + + [ChromeEarlGrey waitForUIElementToAppearWithMatcher: + grey_accessibilityID(l10n_util::GetNSString( + IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME))]; + + [[EarlGrey + selectElementWithMatcher:grey_accessibilityID(l10n_util::GetNSString( + IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME))] + assertWithMatcher:grey_sufficientlyVisible()]; +} + @end
diff --git a/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_view_controller.mm b/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_view_controller.mm index 99c1ccb4..30ade58 100644 --- a/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_view_controller.mm +++ b/ios/chrome/browser/ui/passwords/bottom_sheet/password_suggestion_bottom_sheet_view_controller.mm
@@ -397,7 +397,7 @@ withString:@""]; } if (!username || [username length] == 0) { - return kPasswordFormSuggestionSuffix; + return l10n_util::GetNSString(IDS_IOS_PASSWORD_BOTTOM_SHEET_NO_USERNAME); } return username; }
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.h b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.h index b829b25..90cdb1b 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.h +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.h
@@ -13,8 +13,6 @@ class PrefService; -using DestinationRanking = std::vector<overflow_menu::Destination>; - // Tracks destination usage from the new overflow menu and implements a // frecency-based sorting algorithm (i.e. an algorithm that uses the data // frequency and data recency when determining sort order) to order destinations @@ -73,8 +71,8 @@ // current ranking and a list of all available destinations. - (DestinationRanking) sortedDestinationsFromCurrentRanking:(DestinationRanking)currentRanking - carouselDestinations:(NSArray<OverflowMenuDestination*>*) - carouselDestinations; + availableDestinations: + (DestinationRanking)availableDestinations; // Stops the Destination Usage History. - (void)stop;
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.mm b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.mm index 6fdc8b7..1c65894f 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.mm +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.mm
@@ -186,22 +186,9 @@ - (DestinationRanking) sortedDestinationsFromCurrentRanking:(DestinationRanking)currentRanking - carouselDestinations:(NSArray<OverflowMenuDestination*>*) - carouselDestinations { - [self seedUsageHistoryForNewDestinations:carouselDestinations]; - - // Exit early if there's no `currentRanking`, which only happens if the device - // hasn't used Smart Sorting before. - if (currentRanking.empty()) { - // Given there's no existing `currentRanking`, the current carousel sort - // order will be used as the default ranking. - for (OverflowMenuDestination* destination in carouselDestinations) { - currentRanking.push_back( - static_cast<overflow_menu::Destination>(destination.destination)); - } - - return currentRanking; - } + availableDestinations: + (DestinationRanking)availableDestinations { + [self seedUsageHistoryForNewDestinations:availableDestinations]; DestinationRanking sortedRanking = [self calculateNewRankingFromCurrentRanking:currentRanking]; @@ -295,16 +282,12 @@ // clicks. This method skips seeding history for any `destinations` that // already exist in `_usageHistory`. - (void)seedUsageHistoryForNewDestinations: - (NSArray<OverflowMenuDestination*>*)destinations { + (DestinationRanking)availableDestinations { DCHECK_GT(kDampening, 1.0); DCHECK_GT(kInitialUsageThreshold, 1); - std::set<overflow_menu::Destination> newDestinations; - - for (OverflowMenuDestination* destination in destinations) { - newDestinations.insert( - static_cast<overflow_menu::Destination>(destination.destination)); - } + std::set<overflow_menu::Destination> newDestinations( + availableDestinations.begin(), availableDestinations.end()); std::set<overflow_menu::Destination> existingDestinations;
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history_unittest.mm b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history_unittest.mm index 306fc5a..d2ebbaef 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history_unittest.mm +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history_unittest.mm
@@ -55,7 +55,7 @@ // Initializes `destination_usage_history_` with empty pref data and returns // the initial ranking. DestinationRanking InitializeDestinationUsageHistory( - NSArray<OverflowMenuDestination*>* default_destinations) { + DestinationRanking default_destinations) { CreatePrefs(); destination_usage_history_ = @@ -68,7 +68,7 @@ DestinationRanking initial_ranking = [destination_usage_history_ sortedDestinationsFromCurrentRanking:{} - carouselDestinations:default_destinations]; + availableDestinations:default_destinations]; return initial_ranking; } @@ -78,7 +78,7 @@ DestinationRanking InitializeDestinationUsageHistoryWithData( DestinationRanking& ranking, base::Value::Dict& history, - NSArray<OverflowMenuDestination*>* default_destinations) { + DestinationRanking default_destinations) { base::Value::List previous_ranking; for (overflow_menu::Destination destination : ranking) { @@ -98,7 +98,7 @@ DestinationRanking initial_ranking = [destination_usage_history_ sortedDestinationsFromCurrentRanking:ranking - carouselDestinations:default_destinations]; + availableDestinations:default_destinations]; return initial_ranking; } @@ -145,55 +145,17 @@ return DottedPath(base::NumberToString(day), destination); } - OverflowMenuDestination* CreateOverflowMenuDestination( - overflow_menu::Destination destination) { - OverflowMenuDestination* result = - [[OverflowMenuDestination alloc] initWithName:@"Foobar" - symbolName:kSettingsSymbol - systemSymbol:YES - monochromeSymbol:NO - accessibilityIdentifier:@"Foobar" - enterpriseDisabled:NO - displayNewLabelIcon:NO - handler:^{ - // Do nothing - }]; - - result.destination = static_cast<NSInteger>(destination); - - return result; - } - - NSArray<OverflowMenuDestination*>* SampleDestinations() { - OverflowMenuDestination* bookmarksDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Bookmarks); - OverflowMenuDestination* historyDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::History); - OverflowMenuDestination* readingListDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::ReadingList); - OverflowMenuDestination* passwordsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Passwords); - OverflowMenuDestination* downloadsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Downloads); - OverflowMenuDestination* recentTabsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::RecentTabs); - OverflowMenuDestination* siteInfoDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::SiteInfo); - OverflowMenuDestination* settingsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Settings); - - NSArray<OverflowMenuDestination*>* destinations = @[ - bookmarksDestination, - historyDestination, - readingListDestination, - passwordsDestination, - downloadsDestination, - recentTabsDestination, - siteInfoDestination, - settingsDestination, - ]; - - return destinations; + DestinationRanking SampleDestinations() { + return { + overflow_menu::Destination::Bookmarks, + overflow_menu::Destination::History, + overflow_menu::Destination::ReadingList, + overflow_menu::Destination::Passwords, + overflow_menu::Destination::Downloads, + overflow_menu::Destination::RecentTabs, + overflow_menu::Destination::SiteInfo, + overflow_menu::Destination::Settings, + }; } std::unique_ptr<TestingPrefServiceSimple> prefs_; @@ -275,17 +237,16 @@ // number of clicks. TEST_F(DestinationUsageHistoryTest, InjectsDefaultClickCountForAllDestinations) { - NSArray<OverflowMenuDestination*>* sample_destinations = SampleDestinations(); + DestinationRanking sample_destinations = SampleDestinations(); InitializeDestinationUsageHistory(sample_destinations); ScopedDictPrefUpdate update(prefs_.get(), prefs::kOverflowMenuDestinationUsageHistory); - for (OverflowMenuDestination* destination in sample_destinations) { - const std::string dotted_path = DottedPath( - TodaysDay().InDays(), - static_cast<overflow_menu::Destination>(destination.destination)); + for (overflow_menu::Destination destination : sample_destinations) { + const std::string dotted_path = + DottedPath(TodaysDay().InDays(), destination); absl::optional<int> expected = update->FindIntByDottedPath(dotted_path); @@ -344,9 +305,9 @@ } TEST_F(DestinationUsageHistoryTest, DoesNotSwapTwoShownDestinations) { - NSArray<OverflowMenuDestination*>* sample_destinations = SampleDestinations(); + DestinationRanking sample_destinations = SampleDestinations(); - std::vector<overflow_menu::Destination> ranking = { + DestinationRanking ranking = { overflow_menu::Destination::Bookmarks, overflow_menu::Destination::History, overflow_menu::Destination::ReadingList, @@ -376,15 +337,15 @@ DestinationRanking sorted_ranking = [destination_usage_history_ sortedDestinationsFromCurrentRanking:initial_ranking - carouselDestinations:sample_destinations]; + availableDestinations:sample_destinations]; EXPECT_EQ(initial_ranking, sorted_ranking); } TEST_F(DestinationUsageHistoryTest, DoesNotSwapTwoUnshownDestinations) { - NSArray<OverflowMenuDestination*>* sample_destinations = SampleDestinations(); + DestinationRanking sample_destinations = SampleDestinations(); - std::vector<overflow_menu::Destination> ranking = { + DestinationRanking ranking = { overflow_menu::Destination::Bookmarks, overflow_menu::Destination::History, overflow_menu::Destination::ReadingList, @@ -442,15 +403,15 @@ DestinationRanking sorted_ranking = [destination_usage_history_ sortedDestinationsFromCurrentRanking:initial_ranking - carouselDestinations:sample_destinations]; + availableDestinations:sample_destinations]; EXPECT_EQ(initial_ranking, sorted_ranking); } TEST_F(DestinationUsageHistoryTest, DeletesExpiredUsageData) { - NSArray<OverflowMenuDestination*>* sample_destinations = SampleDestinations(); + DestinationRanking sample_destinations = SampleDestinations(); - std::vector<overflow_menu::Destination> ranking = { + DestinationRanking ranking = { overflow_menu::Destination::Bookmarks, overflow_menu::Destination::History, overflow_menu::Destination::ReadingList, @@ -486,7 +447,7 @@ [destination_usage_history_ sortedDestinationsFromCurrentRanking:ranking - carouselDestinations:sample_destinations]; + availableDestinations:sample_destinations]; ScopedDictPrefUpdate update(prefs_.get(), prefs::kOverflowMenuDestinationUsageHistory);
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_constants.h b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_constants.h index d862d54..3699deb8 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_constants.h +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_constants.h
@@ -35,4 +35,6 @@ void RecordUmaActionForDestination(Destination destination); } // namespace overflow_menu +using DestinationRanking = std::vector<overflow_menu::Destination>; + #endif // IOS_CHROME_BROWSER_UI_POPUP_MENU_OVERFLOW_MENU_OVERFLOW_MENU_CONSTANTS_H_
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_destination.swift b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_destination.swift index 68260f3..f07e04dd 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_destination.swift +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_destination.swift
@@ -8,6 +8,7 @@ @objcMembers public class OverflowMenuDestination: OverflowMenuItem { @objc public enum BadgeType: Int { + case none // Whether the destination should show an error badge. case error // Whether the destination should show a promo badge. @@ -15,7 +16,6 @@ // Whether the destination should show a "New" badge // indicating a new destination. case new - case none } public var badge: BadgeType = .none
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_mediator.mm b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_mediator.mm index 7168f6c..e300e35a 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_mediator.mm +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_mediator.mm
@@ -156,6 +156,7 @@ CRWWebStateObserver, FollowMenuUpdater, IOSLanguageDetectionTabHelperObserving, + OverflowMenuDestinationProvider, OverlayPresenterObserving, PrefObserverDelegate, WebStateListObserving> { @@ -314,6 +315,7 @@ if (!self.menuOrderer) { self.menuOrderer = [[OverflowMenuOrderer alloc] initWithIsIncognito:self.isIncognito]; + self.menuOrderer.destinationProvider = self; } [self updateModel]; } @@ -824,46 +826,26 @@ } } -// Adds SpotlightDebugger to the OverflowMenuDestination to be displayed in the -// destinations carousel. -- (NSArray<OverflowMenuDestination*>*)insertSpotlightDebuggerToDestinations: - (NSArray<OverflowMenuDestination*>*)destinations { - DCHECK(IsSpotlightDebuggingEnabled()); +- (DestinationRanking)baseDestinations { + std::vector<overflow_menu::Destination> destinations = { + overflow_menu::Destination::Bookmarks, + overflow_menu::Destination::History, + overflow_menu::Destination::ReadingList, + overflow_menu::Destination::Passwords, + overflow_menu::Destination::Downloads, + overflow_menu::Destination::RecentTabs, + overflow_menu::Destination::SiteInfo, + overflow_menu::Destination::Settings, + }; - NSMutableArray<OverflowMenuDestination*>* newDestinations = - [[NSMutableArray alloc] init]; - - // Place the debugger at the top of the overflow menu carousel. - [newDestinations addObject:self.spotlightDebuggerDestination]; - [newDestinations addObjectsFromArray:destinations]; - - return newDestinations; -} - -// Creates an NSArray containing the destinations contained in the overflow menu -// carousel. -- (NSArray<OverflowMenuDestination*>*)baseDestinations { - NSMutableArray* baseDestinations = [[NSMutableArray alloc] initWithArray:@[ - self.bookmarksDestination, - self.historyDestination, - self.readingListDestination, - self.passwordsDestination, - self.downloadsDestination, - self.recentTabsDestination, - self.siteInfoDestination, - self.settingsDestination, - ]]; - - if (self.webState && - IsPriceTrackingEnabled(ChromeBrowserState::FromBrowserState( - self.webState->GetBrowserState())) && + if (IsPriceNotificationsEnabled() && IsSmartSortingPriceTrackingDestinationEnabled()) { - [baseDestinations addObject:self.priceNotificationsDestination]; + destinations.push_back(overflow_menu::Destination::PriceNotifications); } - [baseDestinations addObject:self.whatsNewDestination]; + destinations.push_back(overflow_menu::Destination::WhatsNew); - return baseDestinations; + return destinations; } // Returns YES if the Overflow Menu should indicate an identity error. @@ -882,48 +864,7 @@ return; } - if ([self shouldIndicateIdentityError]) { - self.settingsDestination.badge = BadgeTypeError; - } else { - [self maybeHighlightSettingsWithPromoBadge]; - } - - if (!WasWhatsNewUsed()) { - // Highlight What's New with a badge if it was never used before. - self.whatsNewDestination.badge = BadgeTypeNew; - } - - // Set badges if necessary. - if (self.engagementTracker && - self.engagementTracker->ShouldTriggerHelpUI( - feature_engagement::kIPHBadgedReadingListFeature)) { - self.readingListDestination.badge = BadgeTypePromo; - } - - NSArray<OverflowMenuDestination*>* baseDestinations = [self baseDestinations]; - - baseDestinations = [self.menuOrderer - sortedDestinationsFromCarouselDestinations:baseDestinations]; - - if (IsSpotlightDebuggingEnabled()) { - baseDestinations = - [self insertSpotlightDebuggerToDestinations:baseDestinations]; - } - - self.overflowMenuModel.destinations = [baseDestinations - filteredArrayUsingPredicate:[NSPredicate predicateWithBlock:^BOOL( - id object, - NSDictionary* bindings) { - if (object == self.siteInfoDestination) { - return [self currentWebPageSupportsSiteInfo]; - } - // All other destinations are displayed in regular mode. - if (!self.isIncognito) { - return true; - } - return object != self.historyDestination && - object != self.recentTabsDestination; - }]]; + self.overflowMenuModel.destinations = [self.menuOrderer sortedDestinations]; NSMutableArray<OverflowMenuAction*>* appActions = [[NSMutableArray alloc] init]; @@ -1366,6 +1307,57 @@ self.webContentAreaShowingOverlay = NO; } +#pragma mark - OverflowMenuDestinationProvider + +- (OverflowMenuDestination*)destinationForDestinationType: + (overflow_menu::Destination)destinationType { + switch (destinationType) { + case overflow_menu::Destination::Bookmarks: + return self.bookmarksDestination; + case overflow_menu::Destination::History: + return (self.isIncognito) ? nil : self.historyDestination; + case overflow_menu::Destination::ReadingList: + // Set badges if necessary. + if (self.engagementTracker && + self.engagementTracker->ShouldTriggerHelpUI( + feature_engagement::kIPHBadgedReadingListFeature)) { + self.readingListDestination.badge = BadgeTypePromo; + } + return self.readingListDestination; + case overflow_menu::Destination::Passwords: + return self.passwordsDestination; + case overflow_menu::Destination::Downloads: + return self.downloadsDestination; + case overflow_menu::Destination::RecentTabs: + return self.isIncognito ? nil : self.recentTabsDestination; + case overflow_menu::Destination::SiteInfo: + return ([self currentWebPageSupportsSiteInfo]) ? self.siteInfoDestination + : nil; + case overflow_menu::Destination::Settings: + if ([self shouldIndicateIdentityError]) { + self.settingsDestination.badge = BadgeTypeError; + } else { + [self maybeHighlightSettingsWithPromoBadge]; + } + return self.settingsDestination; + case overflow_menu::Destination::WhatsNew: + if (!WasWhatsNewUsed()) { + // Highlight What's New with a badge if it was never used before. + self.whatsNewDestination.badge = BadgeTypeNew; + } + return self.whatsNewDestination; + case overflow_menu::Destination::SpotlightDebugger: + return self.spotlightDebuggerDestination; + case overflow_menu::Destination::PriceNotifications: + BOOL priceNotificationsActive = + self.webState && + IsPriceTrackingEnabled(ChromeBrowserState::FromBrowserState( + self.webState->GetBrowserState())); + return (priceNotificationsActive) ? self.priceNotificationsDestination + : nil; + } +} + #pragma mark - Action handlers // Dismisses the menu and reloads the current page.
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.h b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.h index 3a04e4c..8bfcb7b 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.h +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.h
@@ -7,12 +7,26 @@ #import <UIKit/UIKit.h> +#import "ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_constants.h" + namespace overflow_menu { enum class Destination; } @class OverflowMenuDestination; class PrefService; +@protocol OverflowMenuDestinationProvider <NSObject> + +- (DestinationRanking)baseDestinations; + +// Returns the correct `OverflowMenuDestination` for the corresponding +// `overflow_menu::Destination` on the current page. Returns nil if the current +// page does not support the given `destinationType`. +- (OverflowMenuDestination*)destinationForDestinationType: + (overflow_menu::Destination)destinationType; + +@end + // Controls the order of all the items in the overflow menu. @interface OverflowMenuOrderer : NSObject @@ -27,6 +41,9 @@ // new overflow menu (i.e. the number of "above-the-fold" destinations). @property(nonatomic, assign) int visibleDestinationsCount; +@property(nonatomic, weak) id<OverflowMenuDestinationProvider> + destinationProvider; + // Release any C++ objects that can't be reference counted. - (void)disconnect; @@ -34,9 +51,7 @@ - (void)recordClickForDestination:(overflow_menu::Destination)destination; // Returns a new, sorted list of destinations given the initial list. -- (NSArray<OverflowMenuDestination*>*) - sortedDestinationsFromCarouselDestinations: - (NSArray<OverflowMenuDestination*>*)carouselDestinations; +- (NSArray<OverflowMenuDestination*>*)sortedDestinations; @end
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.mm b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.mm index 8c6d1fb..27f2266 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.mm +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer.mm
@@ -6,10 +6,14 @@ #import "components/prefs/pref_service.h" #import "components/prefs/scoped_user_pref_update.h" +#import "ios/chrome/browser/commerce/push_notification/push_notification_feature.h" +#import "ios/chrome/browser/flags/system_flags.h" #import "ios/chrome/browser/shared/model/prefs/pref_names.h" #import "ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/constants.h" #import "ios/chrome/browser/ui/popup_menu/overflow_menu/destination_usage_history/destination_usage_history.h" +#import "ios/chrome/browser/ui/popup_menu/overflow_menu/feature_flags.h" #import "ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_swift.h" +#import "ios/chrome/browser/ui/whats_new/whats_new_util.h" #if !defined(__has_feature) || !__has_feature(objc_arc) #error "This file requires ARC support." @@ -127,39 +131,46 @@ [self.destinationUsageHistory recordClickForDestination:destination]; } -- (NSArray<OverflowMenuDestination*>*) - sortedDestinationsFromCarouselDestinations: - (NSArray<OverflowMenuDestination*>*)carouselDestinations { +- (NSArray<OverflowMenuDestination*>*)sortedDestinations { + DestinationRanking availableDestinations = + [self.destinationProvider baseDestinations]; // If there's no `_ranking`, which only happens if the device // hasn't used Smart Sorting before, use the default carousel order as the // initial ranking. if (_ranking.empty()) { - for (OverflowMenuDestination* destination in carouselDestinations) { - _ranking.push_back( - static_cast<overflow_menu::Destination>(destination.destination)); - } + _ranking = availableDestinations; } - DestinationLookup destinationLookup = - [self destinationLookupMapFromDestinations:carouselDestinations]; - if (self.destinationUsageHistory) { _ranking = [self.destinationUsageHistory sortedDestinationsFromCurrentRanking:_ranking - carouselDestinations:carouselDestinations]; + availableDestinations:availableDestinations]; [self flushToPrefs]; } - [self applyBadgeOrderingToRankingWithCarouselDestinations:carouselDestinations - destinationLookup:destinationLookup]; + [self applyBadgeOrderingToRankingWithAvailableDestinations: + availableDestinations]; - // Convert back to Objective-C array for returning. + // Convert back to Objective-C array for returning. This step also filters out + // any destinations that are not supported on the current page. NSMutableArray<OverflowMenuDestination*>* sortedDestinations = [[NSMutableArray alloc] init]; + + // Manually inject spotlight destination if it's supported. + if (experimental_flags::IsSpotlightDebuggingEnabled()) { + if (OverflowMenuDestination* spotlightDestination = + [self.destinationProvider + destinationForDestinationType:overflow_menu::Destination:: + SpotlightDebugger]) { + [sortedDestinations addObject:spotlightDestination]; + } + } for (overflow_menu::Destination destination : _ranking) { - if (destinationLookup.contains(destination)) { - [sortedDestinations addObject:destinationLookup[destination]]; + if (OverflowMenuDestination* overflowMenuDestination = + [self.destinationProvider + destinationForDestinationType:destination]) { + [sortedDestinations addObject:overflowMenuDestination]; } } @@ -244,23 +255,14 @@ // Modifies `_ranking` to re-order it based on the current badge status of the // various destinations -- (void)applyBadgeOrderingToRankingWithCarouselDestinations: - (NSArray<OverflowMenuDestination*>*)carouselDestinations - destinationLookup: - (DestinationLookup&) - destinationLookup { +- (void)applyBadgeOrderingToRankingWithAvailableDestinations: + (DestinationRanking)availableDestinations { // Detect new destinations added to the carousel by feature teams. New // destinations (`newDestinations`) are those now found in the carousel - // (`currentDestinations`), but not found in the ranking + // (`availableDestinations`), but not found in the ranking // (`existingDestinations`). - std::set<overflow_menu::Destination> currentDestinations; - - for (OverflowMenuDestination* carouselDestination in carouselDestinations) { - overflow_menu::Destination destination = - static_cast<overflow_menu::Destination>( - carouselDestination.destination); - currentDestinations.insert(destination); - } + std::set<overflow_menu::Destination> currentDestinations( + availableDestinations.begin(), availableDestinations.end()); std::set<overflow_menu::Destination> existingDestinations(_ranking.begin(), _ranking.end()); @@ -291,10 +293,12 @@ // position of kNewDestinationsInsertionIndex or worst. for (overflow_menu::Destination rankedDestination : _ranking) { if (remainingDestinations.contains(rankedDestination) && - destinationLookup.contains(rankedDestination) && !_untappedDestinations.contains(rankedDestination)) { + OverflowMenuDestination* overflowMenuDestination = + [self.destinationProvider + destinationForDestinationType:rankedDestination]; const bool dontSort = - destinationLookup[rankedDestination].badge == BadgeTypeNone || + overflowMenuDestination.badge == BadgeTypeNone || sortedDestinations.size() < kNewDestinationsInsertionIndex; if (dontSort) { @@ -315,10 +319,14 @@ if (!_untappedDestinations.empty()) { for (overflow_menu::Destination untappedDestination : _untappedDestinations) { - if (remainingDestinations.contains(untappedDestination) && - destinationLookup.contains(untappedDestination) && - destinationLookup[untappedDestination].badge == BadgeTypeNone) { - destinationLookup[untappedDestination].badge = BadgeTypeNew; + if (remainingDestinations.contains(untappedDestination)) { + OverflowMenuDestination* overflowMenuDestination = + [self.destinationProvider + destinationForDestinationType:untappedDestination]; + if (overflowMenuDestination.badge != BadgeTypeNone) { + continue; + } + overflowMenuDestination.badge = BadgeTypeNew; InsertDestination(untappedDestination, remainingDestinations, sortedDestinations); @@ -336,9 +344,12 @@ // Insert the destinations with a badge that is not for an error at // kNewDestinationsInsertionIndex before the untapped destinations. for (overflow_menu::Destination destination : allDestinations) { - if (remainingDestinations.contains(destination) && - destinationLookup.contains(destination) && - destinationLookup[destination].badge != BadgeTypeError) { + if (remainingDestinations.contains(destination)) { + OverflowMenuDestination* overflowMenuDestination = + [self.destinationProvider destinationForDestinationType:destination]; + if (overflowMenuDestination.badge == BadgeTypeError) { + continue; + } InsertDestination(destination, remainingDestinations, sortedDestinations); } } @@ -347,7 +358,7 @@ // other types of badges. for (overflow_menu::Destination destination : allDestinations) { if (remainingDestinations.contains(destination) && - destinationLookup.contains(destination)) { + [self.destinationProvider destinationForDestinationType:destination]) { InsertDestination(destination, remainingDestinations, sortedDestinations); } }
diff --git a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer_unittest.mm b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer_unittest.mm index 2021fdbf..5c3d48b 100644 --- a/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer_unittest.mm +++ b/ios/chrome/browser/ui/popup_menu/overflow_menu/overflow_menu_orderer_unittest.mm
@@ -36,13 +36,70 @@ (base::Time::Now() - base::Time::UnixEpoch()).InDaysFloored()); } +OverflowMenuDestination* CreateOverflowMenuDestination( + overflow_menu::Destination destination) { + OverflowMenuDestination* result = + [[OverflowMenuDestination alloc] initWithName:@"Foobar" + symbolName:kSettingsSymbol + systemSymbol:YES + monochromeSymbol:NO + accessibilityIdentifier:@"Foobar" + enterpriseDisabled:NO + displayNewLabelIcon:NO + handler:^{ + // Do nothing + }]; + + result.destination = static_cast<NSInteger>(destination); + + return result; +} + } // namespace +// Fake provider for test purposes. +@interface FakeOverflowMenuDestinationProvider + : NSObject <OverflowMenuDestinationProvider> + +@property(nonatomic, assign) DestinationRanking baseDestinations; + +// By default, the provider will create a standard `OverflowMenuDestination` +// and return that in `-destinationForDestinationType:`. This will override +// that to return a custom destination. +- (void)storeCustomDestination:(OverflowMenuDestination*)destination + forDestinationType:(overflow_menu::Destination)destinationType; + +@end + +@implementation FakeOverflowMenuDestinationProvider { + std::map<overflow_menu::Destination, OverflowMenuDestination*> + _destinationMap; +} + +- (void)storeCustomDestination:(OverflowMenuDestination*)destination + forDestinationType:(overflow_menu::Destination)destinationType { + _destinationMap[destinationType] = destination; +} + +- (OverflowMenuDestination*)destinationForDestinationType: + (overflow_menu::Destination)destinationType { + if (_destinationMap.contains(destinationType)) { + return _destinationMap[destinationType]; + } + return CreateOverflowMenuDestination(destinationType); +} + +@end + class OverflowMenuOrdererTest : public PlatformTest { public: OverflowMenuOrdererTest() {} protected: + void SetUp() override { + destination_provider_ = [[FakeOverflowMenuDestinationProvider alloc] init]; + } + void TearDown() override { [overflow_menu_orderer_ disconnect]; @@ -56,15 +113,15 @@ [[OverflowMenuOrderer alloc] initWithIsIncognito:isIncognito]; overflow_menu_orderer_.localStatePrefs = prefs_.get(); - overflow_menu_orderer_.visibleDestinationsCount = kVisibleDestinationsCount; + overflow_menu_orderer_.destinationProvider = destination_provider_; } - void InitializeOverflowMenuOrdererWithRanking( - BOOL isIncognito, - NSArray<OverflowMenuDestination*>* ranking) { + void InitializeOverflowMenuOrdererWithRanking(BOOL isIncognito, + DestinationRanking ranking) { InitializeOverflowMenuOrderer(isIncognito); - [overflow_menu_orderer_ sortedDestinationsFromCarouselDestinations:ranking]; + destination_provider_.baseDestinations = ranking; + [overflow_menu_orderer_ sortedDestinations]; } // Create pref registry for tests. @@ -77,72 +134,35 @@ prefs_->registry()->RegisterListPref(prefs::kOverflowMenuDestinationsOrder); } - OverflowMenuDestination* CreateOverflowMenuDestination( - overflow_menu::Destination destination) { - OverflowMenuDestination* result = - [[OverflowMenuDestination alloc] initWithName:@"Foobar" - symbolName:kSettingsSymbol - systemSymbol:YES - monochromeSymbol:NO - accessibilityIdentifier:@"Foobar" - enterpriseDisabled:NO - displayNewLabelIcon:NO - handler:^{ - // Do nothing - }]; - - result.destination = static_cast<NSInteger>(destination); - - return result; - } - - NSArray<OverflowMenuDestination*>* SampleDestinations() { - OverflowMenuDestination* bookmarksDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Bookmarks); - OverflowMenuDestination* historyDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::History); - OverflowMenuDestination* readingListDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::ReadingList); - OverflowMenuDestination* passwordsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Passwords); - OverflowMenuDestination* downloadsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Downloads); - OverflowMenuDestination* recentTabsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::RecentTabs); - OverflowMenuDestination* siteInfoDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::SiteInfo); - OverflowMenuDestination* settingsDestination = - CreateOverflowMenuDestination(overflow_menu::Destination::Settings); - - NSArray<OverflowMenuDestination*>* destinations = @[ - bookmarksDestination, - historyDestination, - readingListDestination, - passwordsDestination, - downloadsDestination, - recentTabsDestination, - siteInfoDestination, - settingsDestination, - ]; - - return destinations; + DestinationRanking SampleDestinations() { + return { + overflow_menu::Destination::Bookmarks, + overflow_menu::Destination::History, + overflow_menu::Destination::ReadingList, + overflow_menu::Destination::Passwords, + overflow_menu::Destination::Downloads, + overflow_menu::Destination::RecentTabs, + overflow_menu::Destination::SiteInfo, + overflow_menu::Destination::Settings, + }; } std::unique_ptr<TestingPrefServiceSimple> prefs_; OverflowMenuOrderer* overflow_menu_orderer_; + FakeOverflowMenuDestinationProvider* destination_provider_; }; // Tests that the ranking pref gets populated after sorting once. TEST_F(OverflowMenuOrdererTest, StoresInitialRanking) { InitializeOverflowMenuOrderer(NO); - NSArray<OverflowMenuDestination*>* sample_destinations = SampleDestinations(); - [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:sample_destinations]; + DestinationRanking sample_destinations = SampleDestinations(); + destination_provider_.baseDestinations = sample_destinations; + [overflow_menu_orderer_ sortedDestinations]; const base::Value::List& stored_ranking = prefs_->GetList(prefs::kOverflowMenuDestinationsOrder); - EXPECT_EQ(stored_ranking.size(), sample_destinations.count); + EXPECT_EQ(stored_ranking.size(), sample_destinations.size()); } // Tests that the old pref format (kOverflowMenuDestinationUsageHistory as a @@ -185,16 +205,12 @@ } TEST_F(OverflowMenuOrdererTest, InsertsNewDestinationInMiddleOfRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - all_destinations[6], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + all_destinations[6], + }; // Creates `OverflowMenuOrderer` with initial ranking // `current_destinations`. @@ -204,34 +220,34 @@ // `all_destinations[7]`, which should eventually be inserted starting at // position 4 in the carousel (this is the expected behavior defined by // product). - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - all_destinations[6], - // New destination - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + all_destinations[6], + // New destination + all_destinations[7], + }; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_NSEQ(sorted_ranking[3], all_destinations[7]); + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ( + static_cast<overflow_menu::Destination>(sorted_ranking[3].destination), + all_destinations[7]); } TEST_F(OverflowMenuOrdererTest, InsertsNewDestinationsInMiddleOfRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. @@ -240,200 +256,230 @@ // Same as `current_destinations`, but has new elements (`all_destinations[6]` // and `all_destinations[7]`) inserted starting at position 4 in the carousel // (this is the expected behavior defined by product). - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - // New destinations - all_destinations[6], - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + // New destinations + all_destinations[6], + all_destinations[7], + }; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[7]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[6]); } TEST_F(OverflowMenuOrdererTest, InsertsAndRemovesNewDestinationsInRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. InitializeOverflowMenuOrdererWithRanking(NO, current_destinations); - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - // NOTE: all_destinations[0] was removed - // NOTE: all_destinations[1] was removed - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - // New destinations - all_destinations[6], - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + // NOTE: all_destinations[0] was removed + // NOTE: all_destinations[1] was removed + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + // New destinations + all_destinations[6], + all_destinations[7], + }; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_EQ(sorted_ranking[0], all_destinations[2]); + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + ASSERT_EQ( + static_cast<overflow_menu::Destination>(sorted_ranking[0].destination), + all_destinations[2]); + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[7]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[6]); } // Tests that the destinations that have a badge are moved in the middle of the // ranking to get the user's attention; before the untapped destinations. TEST_F(OverflowMenuOrdererTest, MoveBadgedDestinationsInRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. InitializeOverflowMenuOrdererWithRanking(NO, current_destinations); - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - // New destinations - all_destinations[6], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + // New destinations + all_destinations[6], + }; - all_destinations[4].badge = BadgeTypeError; + OverflowMenuDestination* destination = + CreateOverflowMenuDestination(all_destinations[4]); + destination.badge = BadgeTypeError; + [destination_provider_ storeCustomDestination:destination + forDestinationType:all_destinations[4]]; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[4]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[6]); } // Tests that the destinations that have an error badge have priority over the // other badges when they are moved. TEST_F(OverflowMenuOrdererTest, PriorityToErrorBadgeOverOtherBadges) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; - all_destinations[5].badge = BadgeTypeError; - all_destinations[3].badge = BadgeTypePromo; + OverflowMenuDestination* destination5 = + CreateOverflowMenuDestination(all_destinations[5]); + destination5.badge = BadgeTypeError; + [destination_provider_ storeCustomDestination:destination5 + forDestinationType:all_destinations[5]]; + + OverflowMenuDestination* destination3 = + CreateOverflowMenuDestination(all_destinations[3]); + destination3.badge = BadgeTypePromo; + [destination_provider_ storeCustomDestination:destination3 + forDestinationType:all_destinations[3]]; + + destination_provider_.baseDestinations = current_destinations; // Initializes `OverflowMenuOrderer`. InitializeOverflowMenuOrderer(NO); // Set the initial ranking to `current_destinations`. - NSArray<OverflowMenuDestination*>* initial_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:current_destinations]; + NSArray<OverflowMenuDestination*>* initial_ranking = + [overflow_menu_orderer_ sortedDestinations]; - ASSERT_EQ(initial_ranking[kNewDestinationsInsertionIndex], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + initial_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[5]); - ASSERT_EQ(initial_ranking[kNewDestinationsInsertionIndex + 1], - all_destinations[3]); + ASSERT_EQ( + static_cast<overflow_menu::Destination>( + initial_ranking[kNewDestinationsInsertionIndex + 1].destination), + all_destinations[3]); } // Tests that the destinations that have a badge but are in a better position // than kNewDestinationsInsertionIndex won't be moved hence not demoted. TEST_F(OverflowMenuOrdererTest, DontMoveBadgedDestinationWithGoodRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; - all_destinations[0].badge = BadgeTypePromo; + OverflowMenuDestination* destination = + CreateOverflowMenuDestination(all_destinations[0]); + destination.badge = BadgeTypePromo; + [destination_provider_ storeCustomDestination:destination + forDestinationType:all_destinations[0]]; + + destination_provider_.baseDestinations = current_destinations; // Initializes `OverflowMenuOrderer`. InitializeOverflowMenuOrderer(NO); // Set the initial ranking to `current_destinations`. - NSArray<OverflowMenuDestination*>* initial_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:current_destinations]; + NSArray<OverflowMenuDestination*>* initial_ranking = + [overflow_menu_orderer_ sortedDestinations]; // Verify that the destination with a badge and with a better ranking than // kNewDestinationsInsertionIndex wasn't moved. - ASSERT_EQ(initial_ranking[0], all_destinations[0]); + ASSERT_EQ( + static_cast<overflow_menu::Destination>(initial_ranking[0].destination), + all_destinations[0]); } // Tests that if a destination is both new and has a badge, it will be inserted // before the other destinations that are only new without a badge assigned. TEST_F(OverflowMenuOrdererTest, PriorityToBadgeOverNewDestinationStatus) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. InitializeOverflowMenuOrdererWithRanking(NO, current_destinations); - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - // New destinations - all_destinations[5], - all_destinations[6], - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + // New destinations + all_destinations[5], + all_destinations[6], + all_destinations[7], + }; - all_destinations[6].badge = BadgeTypeNew; + OverflowMenuDestination* destination = + CreateOverflowMenuDestination(all_destinations[6]); + destination.badge = BadgeTypeNew; + [destination_provider_ storeCustomDestination:destination + forDestinationType:all_destinations[6]]; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[6]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[7]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 2], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 2].destination), all_destinations[5]); } @@ -441,64 +487,77 @@ // before the other destinations wtih a badge of the same priority that are not // new. TEST_F(OverflowMenuOrdererTest, PriorityToNewDestinationWithBadge) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. InitializeOverflowMenuOrdererWithRanking(NO, current_destinations); - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - // New destinations - all_destinations[6], - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + // New destinations + all_destinations[6], + all_destinations[7], + }; - all_destinations[4].badge = BadgeTypeError; - all_destinations[5].badge = BadgeTypePromo; - all_destinations[7].badge = BadgeTypeError; + OverflowMenuDestination* destination4 = + CreateOverflowMenuDestination(all_destinations[4]); + destination4.badge = BadgeTypeError; + [destination_provider_ storeCustomDestination:destination4 + forDestinationType:all_destinations[4]]; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + OverflowMenuDestination* destination5 = + CreateOverflowMenuDestination(all_destinations[5]); + destination5.badge = BadgeTypePromo; + [destination_provider_ storeCustomDestination:destination5 + forDestinationType:all_destinations[5]]; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + OverflowMenuDestination* destination7 = + CreateOverflowMenuDestination(all_destinations[7]); + destination7.badge = BadgeTypeError; + [destination_provider_ storeCustomDestination:destination7 + forDestinationType:all_destinations[7]]; + + destination_provider_.baseDestinations = updated_destinations; + + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[7]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[4]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 2], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 2].destination), all_destinations[5]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 3], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 3].destination), all_destinations[6]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 4], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 4].destination), all_destinations[3]); } // Tests that the destinations are still promoted when there is no usage // history ranking. TEST_F(OverflowMenuOrdererTest, TestNewDestinationsWhenNoHistoryUsageRanking) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - all_destinations[6], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + all_destinations[6], + }; // Creates `OverflowMenuOrderer` with initial ranking // `current_destinations`. @@ -508,57 +567,65 @@ // `all_destinations[7]`, which should eventually be inserted starting at // position 4 in the carousel (this is the expected behavior defined by // product). - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - all_destinations[6], - // New destination - all_destinations[7], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + all_destinations[6], + // New destination + all_destinations[7], + }; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_NSEQ(sorted_ranking[3], all_destinations[7]); + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ( + static_cast<overflow_menu::Destination>(sorted_ranking[3].destination), + all_destinations[7]); } TEST_F(OverflowMenuOrdererTest, MovesBadgedDestinationsWithNoUsageHistory) { - NSArray<OverflowMenuDestination*>* all_destinations = SampleDestinations(); - NSArray<OverflowMenuDestination*>* current_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - ]; + DestinationRanking all_destinations = SampleDestinations(); + DestinationRanking current_destinations = { + all_destinations[0], all_destinations[1], all_destinations[2], + all_destinations[3], all_destinations[4], all_destinations[5], + }; // Initializes `OverflowMenuOrderer` with initial ranking // `current_destinations`. InitializeOverflowMenuOrdererWithRanking(NO, current_destinations); - NSArray<OverflowMenuDestination*>* updated_destinations = @[ - all_destinations[0], - all_destinations[1], - all_destinations[2], - all_destinations[3], - all_destinations[4], - all_destinations[5], - // New destinations - all_destinations[6], - ]; + DestinationRanking updated_destinations = { + all_destinations[0], + all_destinations[1], + all_destinations[2], + all_destinations[3], + all_destinations[4], + all_destinations[5], + // New destinations + all_destinations[6], + }; - all_destinations[4].badge = BadgeTypeError; + OverflowMenuDestination* destination = + CreateOverflowMenuDestination(all_destinations[4]); + destination.badge = BadgeTypeError; + [destination_provider_ storeCustomDestination:destination + forDestinationType:all_destinations[4]]; - NSArray<OverflowMenuDestination*>* sorted_ranking = [overflow_menu_orderer_ - sortedDestinationsFromCarouselDestinations:updated_destinations]; + destination_provider_.baseDestinations = updated_destinations; - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex], + NSArray<OverflowMenuDestination*>* sorted_ranking = + [overflow_menu_orderer_ sortedDestinations]; + + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex].destination), all_destinations[4]); - ASSERT_EQ(sorted_ranking[kNewDestinationsInsertionIndex + 1], + ASSERT_EQ(static_cast<overflow_menu::Destination>( + sorted_ranking[kNewDestinationsInsertionIndex + 1].destination), all_destinations[6]); }
diff --git a/ios/chrome/browser/ui/reading_list/reading_list_coordinator.mm b/ios/chrome/browser/ui/reading_list/reading_list_coordinator.mm index 9536953..13a45a6 100644 --- a/ios/chrome/browser/ui/reading_list/reading_list_coordinator.mm +++ b/ios/chrome/browser/ui/reading_list/reading_list_coordinator.mm
@@ -256,6 +256,10 @@ self.started = NO; } +- (void)dealloc { + DCHECK(!self.mediator); +} + #pragma mark - ReadingListListViewControllerAudience - (void)readingListHasItems:(BOOL)hasItems {
diff --git a/ios/chrome/browser/ui/tab_switcher/tab_grid/tab_grid_coordinator.mm b/ios/chrome/browser/ui/tab_switcher/tab_grid/tab_grid_coordinator.mm index 076faa8..5e79e55 100644 --- a/ios/chrome/browser/ui/tab_switcher/tab_grid/tab_grid_coordinator.mm +++ b/ios/chrome/browser/ui/tab_switcher/tab_grid/tab_grid_coordinator.mm
@@ -1033,6 +1033,9 @@ [self.historyCoordinator stop]; self.historyCoordinator = nil; + + [_bookmarksCoordinator shutdown]; + _bookmarksCoordinator = nil; } #pragma mark - TabPresentationDelegate
diff --git a/ios/google_internal/frameworks/ChromeInternal.framework.dSYM.ios.zip.sha1 b/ios/google_internal/frameworks/ChromeInternal.framework.dSYM.ios.zip.sha1 index b40f9050..94b375b3 100644 --- a/ios/google_internal/frameworks/ChromeInternal.framework.dSYM.ios.zip.sha1 +++ b/ios/google_internal/frameworks/ChromeInternal.framework.dSYM.ios.zip.sha1
@@ -1 +1 @@ -0247904214914ced1ce5dbfd3842db063ac33be0 \ No newline at end of file +73923d5b84a13e968b6ade6c9ed6c9fdf797e3f0 \ No newline at end of file
diff --git a/ios/google_internal/frameworks/ChromeSSOInternal.framework.dSYM.ios.zip.sha1 b/ios/google_internal/frameworks/ChromeSSOInternal.framework.dSYM.ios.zip.sha1 index 24b18349..46cbbf3 100644 --- a/ios/google_internal/frameworks/ChromeSSOInternal.framework.dSYM.ios.zip.sha1 +++ b/ios/google_internal/frameworks/ChromeSSOInternal.framework.dSYM.ios.zip.sha1
@@ -1 +1 @@ -8ae52f785d3e066f50cea7cd9baffcdd7790f086 \ No newline at end of file +7faa08ca66730d92203afcca6054f413c12e702d \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_internal_dynamic_framework.ios.zip.sha1 b/ios/google_internal/frameworks/chrome_internal_dynamic_framework.ios.zip.sha1 index 3e62cc6..787a2a1 100644 --- a/ios/google_internal/frameworks/chrome_internal_dynamic_framework.ios.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_internal_dynamic_framework.ios.zip.sha1
@@ -1 +1 @@ -7f795ba75b422b40380cc4bfeee5108299e14cf9 \ No newline at end of file +4a0bb62f5261263ce5e66c7b60b9793ffd08f5a3 \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_internal_dynamic_framework.iossimulator.zip.sha1 b/ios/google_internal/frameworks/chrome_internal_dynamic_framework.iossimulator.zip.sha1 index df2b57b9..f39c789 100644 --- a/ios/google_internal/frameworks/chrome_internal_dynamic_framework.iossimulator.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_internal_dynamic_framework.iossimulator.zip.sha1
@@ -1 +1 @@ -35271582570023475f552c5f703effa0d2bfa5ee \ No newline at end of file +03466c84504eccb0bb206930cb82a7b08fc0216a \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.ios.zip.sha1 b/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.ios.zip.sha1 index 80467cdb..50efb87 100644 --- a/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.ios.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.ios.zip.sha1
@@ -1 +1 @@ -07ba1029740bf214d020388755dd122716e1de21 \ No newline at end of file +f8e5f1423ad43c85b3aef6c91db69a232e02573e \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.iossimulator.zip.sha1 b/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.iossimulator.zip.sha1 index 7a64844..b171683e 100644 --- a/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.iossimulator.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_sso_internal_dynamic_framework.iossimulator.zip.sha1
@@ -1 +1 @@ -53a2d6697702f4725959a3005f62296c57198160 \ No newline at end of file +2389e204f10df680affd5bf7ce07b74fb3cddcac \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.ios.zip.sha1 b/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.ios.zip.sha1 index 569ec5a..7e46501 100644 --- a/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.ios.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.ios.zip.sha1
@@ -1 +1 @@ -8f3ac78a36328e72fb935d35de07df15e605fd51 \ No newline at end of file +acf3532718ab894f2800233da3c1cdfd6aa6dfb4 \ No newline at end of file
diff --git a/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.iossimulator.zip.sha1 b/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.iossimulator.zip.sha1 index f7e9a0f7..ccae72b5 100644 --- a/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.iossimulator.zip.sha1 +++ b/ios/google_internal/frameworks/chrome_test_internal_dynamic_framework.iossimulator.zip.sha1
@@ -1 +1 @@ -eb0f19c9ac762b8cfd27322669c3cbcdd6dac888 \ No newline at end of file +de3438c3415d671d88dee4a06cadc277780b2ad9 \ No newline at end of file
diff --git a/ios/google_internal/frameworks/remoting_internal_dynamic_framework.ios.zip.sha1 b/ios/google_internal/frameworks/remoting_internal_dynamic_framework.ios.zip.sha1 index 671fd89e..1faf4038 100644 --- a/ios/google_internal/frameworks/remoting_internal_dynamic_framework.ios.zip.sha1 +++ b/ios/google_internal/frameworks/remoting_internal_dynamic_framework.ios.zip.sha1
@@ -1 +1 @@ -c273690e508ca7e5d73c9f8263bc90d369e35758 \ No newline at end of file +017743e976a2fd2c527b91a79bfd8cb54012990f \ No newline at end of file
diff --git a/ios/google_internal/frameworks/remoting_internal_dynamic_framework.iossimulator.zip.sha1 b/ios/google_internal/frameworks/remoting_internal_dynamic_framework.iossimulator.zip.sha1 index 51ff2d78..e239c85b 100644 --- a/ios/google_internal/frameworks/remoting_internal_dynamic_framework.iossimulator.zip.sha1 +++ b/ios/google_internal/frameworks/remoting_internal_dynamic_framework.iossimulator.zip.sha1
@@ -1 +1 @@ -73b14b16b8afb3cf3a76ab05ef2f54570f3b22f5 \ No newline at end of file +50d757824a37fb5cbf188c7aa1010a7f5da2651d \ No newline at end of file
diff --git a/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.ios.zip.sha1 b/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.ios.zip.sha1 index b31f3dd6..1420a69 100644 --- a/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.ios.zip.sha1 +++ b/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.ios.zip.sha1
@@ -1 +1 @@ -bf1d745367487d4315429c28d20c145eeaa67531 \ No newline at end of file +67da2651d1135db472eb4595984de2a1bebb2e6e \ No newline at end of file
diff --git a/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.iossimulator.zip.sha1 b/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.iossimulator.zip.sha1 index cdb9867..4ec09a202 100644 --- a/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.iossimulator.zip.sha1 +++ b/ios/google_internal/frameworks/web_view_shell_internal_dynamic_framework.iossimulator.zip.sha1
@@ -1 +1 @@ -4c80b16afb960e9e4c4c0d48ad98a14512859b31 \ No newline at end of file +0cc5f2ef9c7e63b5e1050b7b2039c1e2fd74ba5d \ No newline at end of file
diff --git a/media/cast/sender/openscreen_frame_sender_unittest.cc b/media/cast/sender/openscreen_frame_sender_unittest.cc index a759b65..bf121c4 100644 --- a/media/cast/sender/openscreen_frame_sender_unittest.cc +++ b/media/cast/sender/openscreen_frame_sender_unittest.cc
@@ -91,7 +91,7 @@ task_runner_)), openscreen_task_runner_(task_runner_), openscreen_environment_(openscreen::Clock::now, - &openscreen_task_runner_, + openscreen_task_runner_, openscreen::IPEndpoint::kAnyV4()), openscreen_packet_router_(&openscreen_environment_, 20,
diff --git a/media/gpu/h264_decoder.cc b/media/gpu/h264_decoder.cc index 8ad67e3..f7bc857a 100644 --- a/media/gpu/h264_decoder.cc +++ b/media/gpu/h264_decoder.cc
@@ -1224,6 +1224,9 @@ // then trigger color space change. if (new_color_space.IsSpecified() && new_color_space != picture_color_space_) { + if (!Flush()) { + return false; + } DVLOG(1) << "New color space: " << new_color_space.ToString(); picture_color_space_ = new_color_space; *color_space_changed = true;
diff --git a/media/gpu/h264_dpb.h b/media/gpu/h264_dpb.h index 601bc604..9172d5f 100644 --- a/media/gpu/h264_dpb.h +++ b/media/gpu/h264_dpb.h
@@ -103,7 +103,7 @@ // DPB - Decoded Picture Buffer. // Stores decoded pictures that will be used for future display // and/or reference. -class H264DPB { +class MEDIA_GPU_EXPORT H264DPB { public: H264DPB();
diff --git a/media/gpu/h265_decoder.cc b/media/gpu/h265_decoder.cc index 3757056e..1618c21 100644 --- a/media/gpu/h265_decoder.cc +++ b/media/gpu/h265_decoder.cc
@@ -538,6 +538,9 @@ if (new_color_space.IsSpecified() && new_color_space != picture_color_space_) { + if (!Flush()) { + return false; + } DVLOG(1) << "Picture color space: " << new_color_space.ToString(); picture_color_space_ = new_color_space; *color_space_changed = true;
diff --git a/media/gpu/mac/BUILD.gn b/media/gpu/mac/BUILD.gn index 339ff72d..59bb33c 100644 --- a/media/gpu/mac/BUILD.gn +++ b/media/gpu/mac/BUILD.gn
@@ -79,6 +79,7 @@ sources = [ "video_toolbox_decompression_interface_unittest.cc", + "video_toolbox_h264_accelerator_unittest.cc", "vp9_super_frame_bitstream_filter_unittest.cc", "vt_config_util_unittest.cc", ]
diff --git a/media/gpu/mac/video_toolbox_h264_accelerator.h b/media/gpu/mac/video_toolbox_h264_accelerator.h index 8ac9820..fff3387 100644 --- a/media/gpu/mac/video_toolbox_h264_accelerator.h +++ b/media/gpu/mac/video_toolbox_h264_accelerator.h
@@ -16,10 +16,12 @@ #include "base/mac/scoped_cftyperef.h" #include "base/sequence_checker.h" #include "media/gpu/h264_decoder.h" +#include "media/gpu/media_gpu_export.h" namespace media { -class VideoToolboxH264Accelerator : public H264Decoder::H264Accelerator { +class MEDIA_GPU_EXPORT VideoToolboxH264Accelerator + : public H264Decoder::H264Accelerator { public: using DecodeCB = base::RepeatingCallback<void(base::ScopedCFTypeRef<CMSampleBufferRef>,
diff --git a/media/gpu/mac/video_toolbox_h264_accelerator_unittest.cc b/media/gpu/mac/video_toolbox_h264_accelerator_unittest.cc new file mode 100644 index 0000000..6f9151a6 --- /dev/null +++ b/media/gpu/mac/video_toolbox_h264_accelerator_unittest.cc
@@ -0,0 +1,225 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include <memory> + +#include "base/containers/span.h" +#include "media/gpu/codec_picture.h" +#include "media/gpu/mac/video_toolbox_h264_accelerator.h" +#include "media/video/h264_parser.h" +#include "testing/gmock/include/gmock/gmock.h" +#include "testing/gtest/include/gtest/gtest.h" + +namespace media { + +using testing::_; +using testing::ElementsAre; +using testing::Eq; +using testing::Not; +using testing::SaveArg; + +namespace { + +// Configuration from buck180p30.mp4 +constexpr uint8_t kSPS0[] = {0x67, 0x64, 0x00, 0x28, 0xac, 0xd1, 0x00, + 0x78, 0x02, 0x27, 0xe5, 0xc0, 0x44, 0x00, + 0x00, 0x03, 0x00, 0x04, 0x00, 0x00, 0x03, + 0x00, 0xf0, 0x3c, 0x60, 0xc4, 0x48}; +constexpr uint8_t kPPS0[] = {0x68, 0xeb, 0xef, 0x2c}; + +// Configuration from bbb-320x240-2video-2audio.mp4 +constexpr uint8_t kSPS1[] = {0x67, 0x64, 0x00, 0x0d, 0xac, 0xd9, 0x41, + 0x41, 0xfb, 0x0e, 0x10, 0x00, 0x00, 0x03, + 0x00, 0x10, 0x00, 0x00, 0x03, 0x03, 0xc0, + 0xf1, 0x42, 0x99, 0x60}; +constexpr uint8_t kPPS1[] = {0x68, 0xeb, 0xe0, 0xa4, 0xb2, 0x2c}; + +constexpr uint8_t kSliceData[] = {0x02}; + +} // namespace + +class VideoToolboxH264AcceleratorTest : public testing::Test { + public: + VideoToolboxH264AcceleratorTest() = default; + ~VideoToolboxH264AcceleratorTest() override = default; + + protected: + MOCK_METHOD2(OnDecode, + void(base::ScopedCFTypeRef<CMSampleBufferRef>, + scoped_refptr<CodecPicture>)); + MOCK_METHOD1(OnOutput, void(scoped_refptr<CodecPicture>)); + + std::unique_ptr<VideoToolboxH264Accelerator> accelerator_{ + std::make_unique<VideoToolboxH264Accelerator>( + base::BindRepeating(&VideoToolboxH264AcceleratorTest::OnDecode, + base::Unretained(this)), + base::BindRepeating(&VideoToolboxH264AcceleratorTest::OnOutput, + base::Unretained(this)))}; +}; + +TEST_F(VideoToolboxH264AcceleratorTest, Construct) {} + +TEST_F(VideoToolboxH264AcceleratorTest, DecodeOne) { + scoped_refptr<H264Picture> pic = accelerator_->CreateH264Picture(); + H264SPS sps; + H264PPS pps; + H264DPB dpb; + H264Picture::Vector ref_pic_list; + H264SliceHeader slice_hdr; + std::vector<SubsampleEntry> subsamples; + + // Decode frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample)); + accelerator_->SubmitDecode(pic); + + // Verify sample. + CMBlockBufferRef buf = CMSampleBufferGetDataBuffer(sample); + std::vector<uint8_t> data(CMBlockBufferGetDataLength(buf)); + CMBlockBufferCopyDataBytes(buf, 0, CMBlockBufferGetDataLength(buf), + data.data()); + EXPECT_THAT(data, ElementsAre(0x00, 0x00, 0x00, 0x01, // length + 0x02 // kSliceData + )); + + // Check that OutputPicture() works. + EXPECT_CALL(*this, OnOutput(_)); + accelerator_->OutputPicture(pic); +} + +TEST_F(VideoToolboxH264AcceleratorTest, DecodeTwo) { + scoped_refptr<H264Picture> pic0 = accelerator_->CreateH264Picture(); + scoped_refptr<H264Picture> pic1 = accelerator_->CreateH264Picture(); + H264SPS sps; + H264PPS pps; + H264DPB dpb; + H264Picture::Vector ref_pic_list; + H264SliceHeader slice_hdr; + std::vector<SubsampleEntry> subsamples; + + // First frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic0); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic0, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample0; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample0)); + accelerator_->SubmitDecode(pic0); + + // Second frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic1); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic1, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample1; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample1)); + accelerator_->SubmitDecode(pic1); + + // The two samples should have the same configuration. + EXPECT_EQ(CMSampleBufferGetFormatDescription(sample0), + CMSampleBufferGetFormatDescription(sample1)); +} + +TEST_F(VideoToolboxH264AcceleratorTest, DecodeTwo_Reset) { + scoped_refptr<H264Picture> pic0 = accelerator_->CreateH264Picture(); + scoped_refptr<H264Picture> pic1 = accelerator_->CreateH264Picture(); + H264SPS sps; + H264PPS pps; + H264DPB dpb; + H264Picture::Vector ref_pic_list; + H264SliceHeader slice_hdr; + std::vector<SubsampleEntry> subsamples; + + // First frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic0); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic0, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample0; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample0)); + accelerator_->SubmitDecode(pic0); + + // Reset. + accelerator_->Reset(); + + // Second frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic1); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic1, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample1; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample1)); + accelerator_->SubmitDecode(pic1); + + // The two samples should have different configurations. + EXPECT_NE(CMSampleBufferGetFormatDescription(sample0), + CMSampleBufferGetFormatDescription(sample1)); +} + +TEST_F(VideoToolboxH264AcceleratorTest, DecodeTwo_ConfigChange) { + scoped_refptr<H264Picture> pic0 = accelerator_->CreateH264Picture(); + scoped_refptr<H264Picture> pic1 = accelerator_->CreateH264Picture(); + H264SPS sps; + H264PPS pps; + H264DPB dpb; + H264Picture::Vector ref_pic_list; + H264SliceHeader slice_hdr; + std::vector<SubsampleEntry> subsamples; + + // First frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS0)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS0)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic0); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic0, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample0; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample0)); + accelerator_->SubmitDecode(pic0); + + // Second frame. + accelerator_->ProcessSPS(&sps, base::make_span(kSPS1)); + accelerator_->ProcessPPS(&pps, base::make_span(kPPS1)); + accelerator_->SubmitFrameMetadata(&sps, &pps, dpb, ref_pic_list, ref_pic_list, + ref_pic_list, pic1); + accelerator_->SubmitSlice(&pps, &slice_hdr, ref_pic_list, ref_pic_list, pic1, + kSliceData, sizeof(kSliceData), subsamples); + + // Save the resulting sample. + base::ScopedCFTypeRef<CMSampleBufferRef> sample1; + EXPECT_CALL(*this, OnDecode(_, _)).WillOnce(SaveArg<0>(&sample1)); + accelerator_->SubmitDecode(pic1); + + // The two samples should have different configurations. + EXPECT_NE(CMSampleBufferGetFormatDescription(sample0), + CMSampleBufferGetFormatDescription(sample1)); +} + +} // namespace media
diff --git a/net/cert/pki/parsed_certificate_unittest.cc b/net/cert/pki/parsed_certificate_unittest.cc index 8c5503e..170007b 100644 --- a/net/cert/pki/parsed_certificate_unittest.cc +++ b/net/cert/pki/parsed_certificate_unittest.cc
@@ -52,8 +52,9 @@ VerifyCertErrors(expected_errors, errors, test_file_path); // Every parse failure being tested should emit error information. - if (!cert) + if (!cert) { EXPECT_FALSE(errors.ToDebugString().empty()); + } return cert; }
diff --git a/net/cert/pki/simple_path_builder_delegate_unittest.cc b/net/cert/pki/simple_path_builder_delegate_unittest.cc index a9dea50f..d2d4d9a4 100644 --- a/net/cert/pki/simple_path_builder_delegate_unittest.cc +++ b/net/cert/pki/simple_path_builder_delegate_unittest.cc
@@ -60,7 +60,7 @@ ::testing::ValuesIn(kSuccess1024Filenames)); TEST_P(SimplePathBuilderDelegate1024SuccessTest, IsAcceptableSignatureAndKey) { - SignatureAlgorithm signature_algorithm; + SignatureAlgorithm signature_algorithm{}; bssl::UniquePtr<EVP_PKEY> public_key; ASSERT_NO_FATAL_FAILURE( ReadTestCase(GetParam(), &signature_algorithm, &public_key)); @@ -87,7 +87,7 @@ ::testing::ValuesIn(kFail2048Filenames)); TEST_P(SimplePathBuilderDelegate2048FailTest, RsaKeySmallerThan2048) { - SignatureAlgorithm signature_algorithm; + SignatureAlgorithm signature_algorithm{}; bssl::UniquePtr<EVP_PKEY> public_key; ASSERT_NO_FATAL_FAILURE( ReadTestCase(GetParam(), &signature_algorithm, &public_key));
diff --git a/net/cert/pki/test_helpers.cc b/net/cert/pki/test_helpers.cc index b72a8cd..ac18beb9b 100644 --- a/net/cert/pki/test_helpers.cc +++ b/net/cert/pki/test_helpers.cc
@@ -67,23 +67,23 @@ return out; } -std::string_view StripString(std::string_view str) { +std::string StripString(std::string_view str) { size_t start = str.find_first_not_of(' '); if (start == str.npos) { - return std::string_view(); + return std::string(); } str = str.substr(start); size_t end = str.find_last_not_of(' '); if (end != str.npos) { ++end; } - return str.substr(0, end); + return std::string(str.substr(0, end)); } -std::vector<std::string_view> SplitString(std::string_view str) { +std::vector<std::string> SplitString(std::string_view str) { std::vector<std::string_view> split = string_util::SplitString(str, ','); - std::vector<std::string_view> out; + std::vector<std::string> out; for (const auto& s : split) { out.push_back(StripString(s)); } @@ -354,7 +354,7 @@ } } else if (GetValue("expected_user_constrained_policy_set: ", line_piece, &value, &has_user_constrained_policy_set)) { - std::vector<std::string_view> split_value(SplitString(value)); + std::vector<std::string> split_value(SplitString(value)); test->expected_user_constrained_policy_set = std::set<std::string>(split_value.begin(), split_value.end()); } else if (net::string_util::StartsWith(line_piece, "#")) {
diff --git a/net/cert_net/README b/net/cert_net/README deleted file mode 100644 index 67fe3ba..0000000 --- a/net/cert_net/README +++ /dev/null
@@ -1,6 +0,0 @@ -cert_net/ contains certificate functionality that depends on network loading (OCSP, CRL, AIA fetching). - -Conceptually certificates (net/cert/) is a separable concept from net/ and may -end up becoming its own build target. This file organization encourages not -adding dependencies in cert/ for doing network loading. Instead that code -should be placed here.
diff --git a/net/cert_net/README.md b/net/cert_net/README.md new file mode 100644 index 0000000..b5239ce --- /dev/null +++ b/net/cert_net/README.md
@@ -0,0 +1,11 @@ +`cert_net/` contains certificate functionality that depends on network loading +(OCSP, CRL, AIA fetching). + +The implementation in this directory is built on `URLRequest`. See also +`services/cert_verifier/cert_net_url_loader/` for an implementation built on +`URLLoader`. + +Conceptually certificates (`net/cert/`) is a separable concept from `net/` and +may end up becoming its own build target. This file organization encourages not +adding dependencies in `cert/` for doing network loading. Instead that code +should be placed here.
diff --git a/net/der/parse_values_unittest.cc b/net/der/parse_values_unittest.cc index 0b6d364..c1121e0 100644 --- a/net/der/parse_values_unittest.cc +++ b/net/der/parse_values_unittest.cc
@@ -220,8 +220,9 @@ uint64_t result; EXPECT_EQ(test_case.should_pass, ParseUint64(Input(test_case.input, test_case.length), &result)); - if (test_case.should_pass) + if (test_case.should_pass) { EXPECT_EQ(test_case.expected_value, result); + } } } @@ -256,8 +257,9 @@ uint8_t result; EXPECT_EQ(test_case.should_pass, ParseUint8(Input(test_case.input, test_case.length), &result)); - if (test_case.should_pass) + if (test_case.should_pass) { EXPECT_EQ(test_case.expected_value, result); + } } } @@ -302,8 +304,9 @@ EXPECT_EQ( test_case.should_pass, IsValidInteger(Input(test_case.input, test_case.length), &negative)); - if (test_case.should_pass) + if (test_case.should_pass) { EXPECT_EQ(test_case.negative, negative); + } } }
diff --git a/pdf/pdfium/pdfium_engine.cc b/pdf/pdfium/pdfium_engine.cc index 1096038..d88a544 100644 --- a/pdf/pdfium/pdfium_engine.cc +++ b/pdf/pdfium/pdfium_engine.cc
@@ -19,7 +19,6 @@ #include "base/check_op.h" #include "base/containers/contains.h" #include "base/containers/flat_map.h" -#include "base/debug/alias.h" #include "base/feature_list.h" #include "base/functional/bind.h" #include "base/location.h" @@ -2067,12 +2066,6 @@ gfx::Rect bounding_rect; gfx::Rect visible_rect = GetVisibleRect(); - // TODO(crbug.com/1108574): Remove after fixing the issue. - size_t find_results_size = find_results_.size(); - base::debug::Alias(&find_results_size); - size_t current_find_index_value = current_find_index_.value(); - base::debug::Alias(¤t_find_index_value); - // Use zoom of 1.0 since `visible_rect` is without zoom. const std::vector<gfx::Rect>& rects = find_results_[current_find_index_.value()].GetScreenRects(
diff --git a/ppapi/cpp/file_ref.h b/ppapi/cpp/file_ref.h index 56ed3e3..8f9263f 100644 --- a/ppapi/cpp/file_ref.h +++ b/ppapi/cpp/file_ref.h
@@ -5,6 +5,8 @@ #ifndef PPAPI_CPP_FILE_REF_H_ #define PPAPI_CPP_FILE_REF_H_ +#include <vector> + #include "ppapi/c/pp_file_info.h" #include "ppapi/c/pp_stdint.h" #include "ppapi/c/ppb_file_ref.h"
diff --git a/ppapi/cpp/input_event.h b/ppapi/cpp/input_event.h index b084d4f..bc843251 100644 --- a/ppapi/cpp/input_event.h +++ b/ppapi/cpp/input_event.h
@@ -7,7 +7,7 @@ #include <stdint.h> -#include <string> +#include <utility> #include <vector> #include "ppapi/c/ppb_input_event.h"
diff --git a/ppapi/cpp/private/var_private.h b/ppapi/cpp/private/var_private.h index 6f31246..96e929e9 100644 --- a/ppapi/cpp/private/var_private.h +++ b/ppapi/cpp/private/var_private.h
@@ -7,6 +7,9 @@ #include <stdint.h> +#include <string> +#include <vector> + #include "ppapi/cpp/var.h" namespace pp {
diff --git a/ppapi/cpp/var.h b/ppapi/cpp/var.h index 9e38379..a894df2 100644 --- a/ppapi/cpp/var.h +++ b/ppapi/cpp/var.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <string> -#include <vector> #include "ppapi/c/pp_var.h" #include "ppapi/cpp/pass_ref.h"
diff --git a/ppapi/proxy/dispatcher.h b/ppapi/proxy/dispatcher.h index f2b29b1..36f6b40 100644 --- a/ppapi/proxy/dispatcher.h +++ b/ppapi/proxy/dispatcher.h
@@ -5,9 +5,7 @@ #ifndef PPAPI_PROXY_DISPATCHER_H_ #define PPAPI_PROXY_DISPATCHER_H_ -#include <set> -#include <string> -#include <vector> +#include <memory> #include "base/compiler_specific.h" #include "base/memory/scoped_refptr.h"
diff --git a/ppapi/proxy/file_io_resource.h b/ppapi/proxy/file_io_resource.h index 34df3000..d723a2f 100644 --- a/ppapi/proxy/file_io_resource.h +++ b/ppapi/proxy/file_io_resource.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <memory> -#include <string> #include "base/files/file.h" #include "base/memory/ref_counted.h"
diff --git a/ppapi/proxy/file_ref_resource.h b/ppapi/proxy/file_ref_resource.h index 78535d0..414cf28 100644 --- a/ppapi/proxy/file_ref_resource.h +++ b/ppapi/proxy/file_ref_resource.h
@@ -7,7 +7,7 @@ #include <stdint.h> -#include <string> +#include <vector> #include "ppapi/c/pp_instance.h" #include "ppapi/c/pp_resource.h"
diff --git a/ppapi/proxy/plugin_var_tracker.h b/ppapi/proxy/plugin_var_tracker.h index 1783c3e..a187a07e 100644 --- a/ppapi/proxy/plugin_var_tracker.h +++ b/ppapi/proxy/plugin_var_tracker.h
@@ -6,7 +6,6 @@ #define PPAPI_PROXY_PLUGIN_VAR_TRACKER_H_ #include <map> -#include <string> #include "base/compiler_specific.h" #include "base/memory/scoped_refptr.h"
diff --git a/ppapi/proxy/ppb_graphics_3d_proxy.h b/ppapi/proxy/ppb_graphics_3d_proxy.h index 560b1cd..69ba5f1 100644 --- a/ppapi/proxy/ppb_graphics_3d_proxy.h +++ b/ppapi/proxy/ppb_graphics_3d_proxy.h
@@ -7,8 +7,6 @@ #include <stdint.h> -#include <vector> - #include "gpu/command_buffer/common/command_buffer.h" #include "gpu/command_buffer/common/command_buffer_id.h" #include "ppapi/c/pp_graphics_3d.h"
diff --git a/ppapi/proxy/ppb_instance_proxy.h b/ppapi/proxy/ppb_instance_proxy.h index 3f5b57d5..8f856641f 100644 --- a/ppapi/proxy/ppb_instance_proxy.h +++ b/ppapi/proxy/ppb_instance_proxy.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <string> -#include <vector> #include "build/build_config.h" #include "ppapi/c/pp_instance.h"
diff --git a/ppapi/proxy/ppp_printing_proxy.h b/ppapi/proxy/ppp_printing_proxy.h index fdf097c..3bb26a5 100644 --- a/ppapi/proxy/ppp_printing_proxy.h +++ b/ppapi/proxy/ppp_printing_proxy.h
@@ -7,7 +7,6 @@ #include <stdint.h> -#include <string> #include <vector> #include "ppapi/c/dev/ppp_printing_dev.h"
diff --git a/ppapi/proxy/serialized_handle.h b/ppapi/proxy/serialized_handle.h index 1c1029c..c2f3825 100644 --- a/ppapi/proxy/serialized_handle.h +++ b/ppapi/proxy/serialized_handle.h
@@ -7,8 +7,7 @@ #include <stdint.h> -#include <string> -#include <vector> +#include <utility> #include "base/atomicops.h" #include "base/check_op.h"
diff --git a/ppapi/shared_impl/ppb_video_decoder_shared.h b/ppapi/shared_impl/ppb_video_decoder_shared.h index e26ef2c..49c1d64 100644 --- a/ppapi/shared_impl/ppb_video_decoder_shared.h +++ b/ppapi/shared_impl/ppb_video_decoder_shared.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <map> -#include <vector> #include "base/compiler_specific.h" #include "ppapi/c/dev/ppb_video_decoder_dev.h"
diff --git a/ppapi/shared_impl/tracked_callback.h b/ppapi/shared_impl/tracked_callback.h index e612cd0..e2ffe4e 100644 --- a/ppapi/shared_impl/tracked_callback.h +++ b/ppapi/shared_impl/tracked_callback.h
@@ -7,9 +7,7 @@ #include <stdint.h> -#include <map> #include <memory> -#include <set> #include "base/functional/callback.h" #include "base/memory/ref_counted.h"
diff --git a/ppapi/tests/test_crypto.h b/ppapi/tests/test_crypto.h index 0d2f87b0..e68b409 100644 --- a/ppapi/tests/test_crypto.h +++ b/ppapi/tests/test_crypto.h
@@ -6,7 +6,6 @@ #define PPAPI_TESTS_TEST_CRYPTO_H_ #include <string> -#include <vector> #include "ppapi/c/dev/ppb_crypto_dev.h" #include "ppapi/tests/test_case.h"
diff --git a/ppapi/tests/test_cursor_control.h b/ppapi/tests/test_cursor_control.h index e0f1566..daba457 100644 --- a/ppapi/tests/test_cursor_control.h +++ b/ppapi/tests/test_cursor_control.h
@@ -6,7 +6,6 @@ #define PPAPI_TESTS_TEST_CURSOR_CONTROL_H_ #include <string> -#include <vector> #include "ppapi/c/dev/ppb_cursor_control_dev.h" #include "ppapi/tests/test_case.h"
diff --git a/ppapi/tests/test_input_event.h b/ppapi/tests/test_input_event.h index 977a2e2a..32992399 100644 --- a/ppapi/tests/test_input_event.h +++ b/ppapi/tests/test_input_event.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <string> -#include <vector> #include "ppapi/c/ppb_input_event.h" #include "ppapi/c/private/ppb_testing_private.h"
diff --git a/ppapi/tests/test_message_handler.h b/ppapi/tests/test_message_handler.h index d0d2e19..dabea70 100644 --- a/ppapi/tests/test_message_handler.h +++ b/ppapi/tests/test_message_handler.h
@@ -6,7 +6,6 @@ #define PPAPI_TESTS_TEST_MESSAGE_HANDLER_H_ #include <string> -#include <vector> #include "ppapi/c/ppb_messaging.h" #include "ppapi/tests/test_case.h"
diff --git a/ppapi/thunk/ppb_video_capture_api.h b/ppapi/thunk/ppb_video_capture_api.h index 7af5710..fbaf4983 100644 --- a/ppapi/thunk/ppb_video_capture_api.h +++ b/ppapi/thunk/ppb_video_capture_api.h
@@ -8,7 +8,6 @@ #include <stdint.h> #include <string> -#include <vector> #include "base/memory/scoped_refptr.h" #include "ppapi/c/dev/ppb_video_capture_dev.h"
diff --git a/remoting/host/file_transfer/file_chooser_chromeos.cc b/remoting/host/file_transfer/file_chooser_chromeos.cc index c8b7681..66ffad9 100644 --- a/remoting/host/file_transfer/file_chooser_chromeos.cc +++ b/remoting/host/file_transfer/file_chooser_chromeos.cc
@@ -108,7 +108,6 @@ std::move(callback_).Run(result); } -// TODO(b/284754221): Replace title to select file dialog. void FileChooserChromeOs::Core::Show() { select_file_dialog_->SelectFile( ui::SelectFileDialog::SELECT_OPEN_FILE,
diff --git a/remoting/host/it2me/it2me_host.cc b/remoting/host/it2me/it2me_host.cc index e6589c9..633da0dd 100644 --- a/remoting/host/it2me/it2me_host.cc +++ b/remoting/host/it2me/it2me_host.cc
@@ -267,6 +267,9 @@ chrome_os_enterprise_params_->terminate_upon_input); options.set_enable_curtaining( chrome_os_enterprise_params_->curtain_local_user_session); + // TODO(b/286835721): Inform in UI when file transfer is disabled by policy. + options.set_enable_file_transfer( + chrome_os_enterprise_params_->allow_file_transfer); } #endif
diff --git a/remoting/host/it2me/it2me_host_unittest.cc b/remoting/host/it2me/it2me_host_unittest.cc index 3c52d6fa..1f1c7d2 100644 --- a/remoting/host/it2me/it2me_host_unittest.cc +++ b/remoting/host/it2me/it2me_host_unittest.cc
@@ -877,6 +877,17 @@ EXPECT_FALSE(GetHost()->desktop_environment_options().enable_curtaining()); } +TEST_F(It2MeHostTest, ConnectRespectsAllowFileTransferParameter) { + enterprise_params_ = {.allow_file_transfer = true}; + StartHost(); + EXPECT_TRUE(GetHost()->desktop_environment_options().enable_file_transfer()); +} + +TEST_F(It2MeHostTest, EnableFileTransferDefaultsToFalse) { + StartHost(); + EXPECT_FALSE(GetHost()->desktop_environment_options().enable_file_transfer()); +} + TEST_F(It2MeHostTest, EnterpriseSessionsSucceedWhenRemoteSupportConnectionsPolicyDisabled) { SetPolicies({{policy::key::kRemoteAccessHostAllowRemoteSupportConnections,
diff --git a/remoting/host/it2me_desktop_environment.cc b/remoting/host/it2me_desktop_environment.cc index 6393b76..8914ae1 100644 --- a/remoting/host/it2me_desktop_environment.cc +++ b/remoting/host/it2me_desktop_environment.cc
@@ -12,11 +12,13 @@ #include "base/memory/weak_ptr.h" #include "base/task/single_thread_task_runner.h" #include "build/build_config.h" +#include "remoting/host/basic_desktop_environment.h" #include "remoting/host/client_session_control.h" #include "remoting/host/host_window.h" #include "remoting/host/host_window_proxy.h" #include "remoting/host/input_monitor/local_input_monitor.h" #include "remoting/host/session_terminator.h" +#include "remoting/protocol/capability_names.h" #include "remoting/protocol/errors.h" #if BUILDFLAG(IS_POSIX) @@ -121,6 +123,16 @@ #endif // BUILDFLAG(IS_CHROMEOS) } +std::string It2MeDesktopEnvironment::GetCapabilities() const { + std::string capabilities = BasicDesktopEnvironment::GetCapabilities(); + if (desktop_environment_options().enable_file_transfer()) { + capabilities += " "; + capabilities += protocol::kFileTransferCapability; + } + + return capabilities; +} + void It2MeDesktopEnvironment::InitializeCurtainModeIfNoUserLoggedIn( base::WeakPtr<ClientSessionControl> client_session_control, bool is_user_logged_in) {
diff --git a/remoting/host/it2me_desktop_environment.h b/remoting/host/it2me_desktop_environment.h index e8391e2..886cea5 100644 --- a/remoting/host/it2me_desktop_environment.h +++ b/remoting/host/it2me_desktop_environment.h
@@ -33,6 +33,9 @@ void InitializeCurtainMode( base::WeakPtr<ClientSessionControl> client_session_control); + // BasicDesktopEnvironment implementation: + std::string GetCapabilities() const override; + bool is_curtained() const { return curtain_mode_ != nullptr; } protected:
diff --git a/remoting/host/it2me_desktop_environment_unittest.cc b/remoting/host/it2me_desktop_environment_unittest.cc index f2fd9d64..9ea2a3e 100644 --- a/remoting/host/it2me_desktop_environment_unittest.cc +++ b/remoting/host/it2me_desktop_environment_unittest.cc
@@ -17,6 +17,7 @@ #include "remoting/host/client_session_control.h" #include "remoting/host/client_session_events.h" #include "remoting/proto/control.pb.h" +#include "remoting/protocol/capability_names.h" #include "remoting/protocol/errors.h" #include "testing/gmock/include/gmock/gmock.h" #include "testing/gtest/include/gtest/gtest.h" @@ -376,6 +377,33 @@ EXPECT_THAT(ash_proxy().request_sign_out_count(), Eq(0)); } + +TEST_F(It2MeDesktopEnvironmentTest, + ShouldHaveFileTransferCapabilitiesWhenEnabled) { + base::test::ScopedFeatureList feature_list; + feature_list.InitAndEnableFeature(features::kEnableCrdFileTransferForKiosk); + DesktopEnvironmentOptions options(default_options()); + std::string expected_capabilities(" "); + expected_capabilities += protocol::kFileTransferCapability; + + options.set_enable_file_transfer(true); + auto desktop_environment = Create(options); + + EXPECT_THAT(desktop_environment->GetCapabilities(), + testing::HasSubstr(expected_capabilities)); +} + +TEST_F(It2MeDesktopEnvironmentTest, + ShouldNotHaveFileTransferCapabilitiesWhenDisabled) { + base::test::ScopedFeatureList feature_list; + feature_list.InitAndEnableFeature(features::kEnableCrdFileTransferForKiosk); + DesktopEnvironmentOptions options(default_options()); + + options.set_enable_file_transfer(false); + auto desktop_environment = Create(options); + + EXPECT_THAT(desktop_environment->GetCapabilities(), testing::HasSubstr("")); +} #endif // BUILDFLAG(IS_CHROMEOS) } // namespace
diff --git a/services/cert_verifier/cert_verifier_creation.cc b/services/cert_verifier/cert_verifier_creation.cc index 558814f..f5e96ff 100644 --- a/services/cert_verifier/cert_verifier_creation.cc +++ b/services/cert_verifier/cert_verifier_creation.cc
@@ -11,7 +11,6 @@ #include "net/cert/cert_verify_proc.h" #include "net/cert/crl_set.h" #include "net/cert/multi_threaded_cert_verifier.h" -#include "net/cert_net/cert_net_fetcher_url_request.h" #include "net/net_buildflags.h" #if BUILDFLAG(IS_FUCHSIA) || BUILDFLAG(IS_LINUX) || BUILDFLAG(IS_CHROMEOS)
diff --git a/storage/browser/quota/quota_manager_proxy.cc b/storage/browser/quota/quota_manager_proxy.cc index b13b11bb..cd6eea4 100644 --- a/storage/browser/quota/quota_manager_proxy.cc +++ b/storage/browser/quota/quota_manager_proxy.cc
@@ -530,37 +530,6 @@ quota_manager_impl_->GetUsageAndQuota(storage_key, type, std::move(respond)); } -void QuotaManagerProxy::GetUsageAndQuotaWithBreakdown( - const StorageKey& storage_key, - blink::mojom::StorageType type, - scoped_refptr<base::SequencedTaskRunner> callback_task_runner, - UsageAndQuotaWithBreakdownCallback callback) { - DCHECK(callback_task_runner); - DCHECK(callback); - - if (!quota_manager_impl_task_runner_->RunsTasksInCurrentSequence()) { - quota_manager_impl_task_runner_->PostTask( - FROM_HERE, - base::BindOnce(&QuotaManagerProxy::GetUsageAndQuotaWithBreakdown, this, - storage_key, type, std::move(callback_task_runner), - std::move(callback))); - return; - } - - DCHECK_CALLED_ON_VALID_SEQUENCE(quota_manager_impl_sequence_checker_); - - auto respond = - base::BindPostTask(std::move(callback_task_runner), std::move(callback)); - if (!quota_manager_impl_) { - std::move(respond).Run(blink::mojom::QuotaStatusCode::kErrorAbort, 0, 0, - nullptr); - return; - } - - quota_manager_impl_->GetUsageAndQuotaWithBreakdown(storage_key, type, - std::move(respond)); -} - void QuotaManagerProxy::GetBucketUsageAndQuota( BucketId bucket, scoped_refptr<base::SequencedTaskRunner> callback_task_runner, @@ -643,6 +612,36 @@ std::move(respond).Run(is_storage_unlimited); } +void QuotaManagerProxy::GetStorageKeyUsageWithBreakdown( + const blink::StorageKey& storage_key, + blink::mojom::StorageType type, + scoped_refptr<base::SequencedTaskRunner> callback_task_runner, + UsageWithBreakdownCallback callback) { + CHECK(callback_task_runner); + CHECK(callback); + + if (!quota_manager_impl_task_runner_->RunsTasksInCurrentSequence()) { + quota_manager_impl_task_runner_->PostTask( + FROM_HERE, + base::BindOnce(&QuotaManagerProxy::GetStorageKeyUsageWithBreakdown, + this, storage_key, type, std::move(callback_task_runner), + std::move(callback))); + return; + } + + DCHECK_CALLED_ON_VALID_SEQUENCE(quota_manager_impl_sequence_checker_); + + auto respond = + base::BindPostTask(std::move(callback_task_runner), std::move(callback)); + if (!quota_manager_impl_) { + std::move(respond).Run(0, nullptr); + return; + } + + quota_manager_impl_->GetStorageKeyUsageWithBreakdown(storage_key, type, + std::move(respond)); +} + std::unique_ptr<QuotaOverrideHandle> QuotaManagerProxy::GetQuotaOverrideHandle() { return std::make_unique<QuotaOverrideHandle>(this);
diff --git a/storage/browser/quota/quota_manager_proxy.h b/storage/browser/quota/quota_manager_proxy.h index fbb0eba..f8bfa7f 100644 --- a/storage/browser/quota/quota_manager_proxy.h +++ b/storage/browser/quota/quota_manager_proxy.h
@@ -212,12 +212,6 @@ scoped_refptr<base::SequencedTaskRunner> callback_task_runner, UsageAndQuotaCallback callback); - void GetUsageAndQuotaWithBreakdown( - const blink::StorageKey& storage_key, - blink::mojom::StorageType type, - scoped_refptr<base::SequencedTaskRunner> callback_task_runner, - UsageAndQuotaWithBreakdownCallback callback); - void GetBucketUsageAndQuota( BucketId bucket, scoped_refptr<base::SequencedTaskRunner> callback_task_runner, @@ -238,6 +232,12 @@ scoped_refptr<base::SequencedTaskRunner> callback_task_runner, base::OnceCallback<void(bool)> callback); + void GetStorageKeyUsageWithBreakdown( + const blink::StorageKey& storage_key, + blink::mojom::StorageType type, + scoped_refptr<base::SequencedTaskRunner> callback_task_runner, + UsageWithBreakdownCallback callback); + // DevTools Quota Override methods: std::unique_ptr<QuotaOverrideHandle> GetQuotaOverrideHandle(); // Called by QuotaOverrideHandle upon construction to asynchronously
diff --git a/testing/buildbot/chromium.chromiumos.json b/testing/buildbot/chromium.chromiumos.json index 84db902..7235a06 100644 --- a/testing/buildbot/chromium.chromiumos.json +++ b/testing/buildbot/chromium.chromiumos.json
@@ -5669,9 +5669,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -5682,8 +5682,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -5834,9 +5834,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -5847,8 +5847,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -5981,9 +5981,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -5994,8 +5994,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [
diff --git a/testing/buildbot/chromium.coverage.json b/testing/buildbot/chromium.coverage.json index cd9ce2e..5d2b261 100644 --- a/testing/buildbot/chromium.coverage.json +++ b/testing/buildbot/chromium.coverage.json
@@ -25493,9 +25493,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -25506,8 +25506,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -25658,9 +25658,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -25671,8 +25671,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -25805,9 +25805,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -25818,8 +25818,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [
diff --git a/testing/buildbot/chromium.fyi.json b/testing/buildbot/chromium.fyi.json index e5f877f..24db1695 100644 --- a/testing/buildbot/chromium.fyi.json +++ b/testing/buildbot/chromium.fyi.json
@@ -37368,9 +37368,6 @@ "linux-cr23-rel": { "gtest_tests": [ { - "args": [ - "--enable-features=ChromeRefresh2023" - ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -37383,18 +37380,61 @@ } ], "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", - "shards": 10 + "shards": 20 }, "test": "browser_tests", "test_id_prefix": "ninja://chrome/test:browser_tests/" }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;../../testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter" ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, + "name": "cr23_browser_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Ubuntu-22.04" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 2 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_browser_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Ubuntu-22.04" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 20 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, "swarming": { "can_use_on_swarming_builders": true, "dimension_sets": [ @@ -37411,11 +37451,75 @@ }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;../../testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter" ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, + "name": "cr23_interactive_ui_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Ubuntu-22.04" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_interactive_ui_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Ubuntu-22.04" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 10 + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Ubuntu-22.04" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "views_unittests", + "test_id_prefix": "ninja://ui/views:views_unittests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_views_unittests", "swarming": { "can_use_on_swarming_builders": true, "dimension_sets": [ @@ -38322,9 +38426,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -38334,8 +38438,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -38487,9 +38591,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -38499,8 +38603,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -38634,9 +38738,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -38646,8 +38750,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -40111,9 +40215,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -40123,8 +40227,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -40276,9 +40380,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -40288,8 +40392,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -40423,9 +40527,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -40435,8 +40539,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -41171,9 +41275,9 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome" + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -41183,8 +41287,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -44042,9 +44146,7 @@ "mac-cr23-rel": { "gtest_tests": [ { - "args": [ - "--enable-features=ChromeRefresh2023" - ], + "ci_only": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -44064,8 +44166,53 @@ }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;../../testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter" ], + "ci_only": true, + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_browser_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Mac-12" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 2 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter" + ], + "ci_only": true, + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_browser_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Mac-12" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 20 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -44085,11 +44232,75 @@ }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;../../testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter" ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, + "name": "cr23_interactive_ui_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Mac-12" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_interactive_ui_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Mac-12" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 10 + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Mac-12" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "views_unittests", + "test_id_prefix": "ninja://ui/views:views_unittests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_views_unittests", "swarming": { "can_use_on_swarming_builders": true, "dimension_sets": [ @@ -46361,9 +46572,6 @@ "win-cr23-rel": { "gtest_tests": [ { - "args": [ - "--enable-features=ChromeRefresh2023" - ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, @@ -46376,18 +46584,61 @@ } ], "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", - "shards": 10 + "shards": 20 }, "test": "browser_tests", "test_id_prefix": "ninja://chrome/test:browser_tests/" }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;../../testing/buildbot/filters/cr23.win.cr23_browser_tests.filter" ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, + "name": "cr23_browser_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Windows-10" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 2 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.win.cr23_browser_tests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_browser_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Windows-10" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com", + "shards": 20 + }, + "test": "browser_tests", + "test_id_prefix": "ninja://chrome/test:browser_tests/" + }, + { + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, "swarming": { "can_use_on_swarming_builders": true, "dimension_sets": [ @@ -46404,11 +46655,74 @@ }, { "args": [ - "--enable-features=ChromeRefresh2023" + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;../../testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter" ], "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" }, + "name": "cr23_interactive_ui_tests", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Windows-10" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_interactive_ui_tests_full", + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Windows-10" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "interactive_ui_tests", + "test_id_prefix": "ninja://chrome/test:interactive_ui_tests/" + }, + { + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "swarming": { + "can_use_on_swarming_builders": true, + "dimension_sets": [ + { + "cpu": "x86-64", + "os": "Windows-10" + } + ], + "service_account": "chromium-tester@chops-service-accounts.iam.gserviceaccount.com" + }, + "test": "views_unittests", + "test_id_prefix": "ninja://ui/views:views_unittests/" + }, + { + "args": [ + "--enable-features=ChromeRefresh2023", + "--test-launcher-filter-file=../../testing/buildbot/filters/cr23.win.cr23_views_unittests.filter" + ], + "merge": { + "script": "//testing/merge_scripts/standard_gtest_merge.py" + }, + "name": "cr23_views_unittests", "swarming": { "can_use_on_swarming_builders": true, "dimension_sets": [
diff --git a/testing/buildbot/chromium.memory.json b/testing/buildbot/chromium.memory.json index 43962bc..6a51f841 100644 --- a/testing/buildbot/chromium.memory.json +++ b/testing/buildbot/chromium.memory.json
@@ -18080,12 +18080,12 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome", + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome", "--test-launcher-print-test-stdio=always", "--combine-ash-logs-on-bots", "--asan-symbolize-output" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -18096,8 +18096,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -18265,12 +18265,12 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome", + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome", "--test-launcher-print-test-stdio=always", "--combine-ash-logs-on-bots", "--asan-symbolize-output" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -18281,8 +18281,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [ @@ -18427,12 +18427,12 @@ { "args": [ "--test-launcher-filter-file=../../testing/buildbot/filters/linux-lacros.lacros_chrome_browsertests.skew.filter", - "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome", + "--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome", "--test-launcher-print-test-stdio=always", "--combine-ash-logs-on-bots", "--asan-symbolize-output" ], - "description": "Run with ash-chrome version 116.0.5827.0", + "description": "Run with ash-chrome version 116.0.5828.0", "isolate_profile_data": true, "merge": { "script": "//testing/merge_scripts/standard_gtest_merge.py" @@ -18443,8 +18443,8 @@ "cipd_packages": [ { "cipd_package": "chromium/testing/linux-ash-chromium/x86_64/ash.zip", - "location": "lacros_version_skew_tests_v116.0.5827.0", - "revision": "version:116.0.5827.0" + "location": "lacros_version_skew_tests_v116.0.5828.0", + "revision": "version:116.0.5828.0" } ], "dimension_sets": [
diff --git a/testing/buildbot/filters/BUILD.gn b/testing/buildbot/filters/BUILD.gn index 9aac7746..809e217 100644 --- a/testing/buildbot/filters/BUILD.gn +++ b/testing/buildbot/filters/BUILD.gn
@@ -57,6 +57,10 @@ "//testing/buildbot/filters/chromeos.msan.browser_tests.oobe_negative.filter", "//testing/buildbot/filters/chromeos.msan.browser_tests.oobe_positive.filter", "//testing/buildbot/filters/code_coverage.browser_tests.filter", + "//testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter", + "//testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter", + "//testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter", + "//testing/buildbot/filters/cr23.win.cr23_browser_tests.filter", "//testing/buildbot/filters/fuchsia.browser_tests.filter", "//testing/buildbot/filters/linux-chromeos.browser_tests.pixel_tests.filter", "//testing/buildbot/filters/linux-chromeos.browser_tests.require_lacros.filter", @@ -219,6 +223,9 @@ testonly = true data = [ + "//testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter", + "//testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter", + "//testing/buildbot/filters/cr23.win.cr23_views_unittests.filter", "//testing/buildbot/filters/fuchsia.debug.views_unittests.filter", "//testing/buildbot/filters/fuchsia.views_unittests.filter", ] @@ -294,6 +301,10 @@ testonly = true data = [ + "//testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter", + "//testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter", + "//testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter", + "//testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter", "//testing/buildbot/filters/linux-lacros.interactive_ui_tests.filter", "//testing/buildbot/filters/linux-lacros.interactive_ui_tests.skew.filter",
diff --git a/testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter b/testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter new file mode 100644 index 0000000..813fcea0 --- /dev/null +++ b/testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter
@@ -0,0 +1,10 @@ +# cr23_browser_tests that are expected to fail on linux bots: +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/0 +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/2 +-BrowserNonClientFrameViewBrowserTest.IncognitoIsCorrectColor +-LocationIconViewBrowserTest.InkDropMode +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/0 +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/1 +-OmniboxPopupViewViewsTest.ThemeIntegration +-PermissionRequestChipGestureSensitiveBrowserTest.ChipFinalizedWhenInteractingWithOmnibox +-WebAppBrowserTest.InstallToShelfContainsAppName
diff --git a/testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter b/testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter new file mode 100644 index 0000000..c98ea13 --- /dev/null +++ b/testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter
@@ -0,0 +1,18 @@ +# cr23_interactive_ui_tests that are expected to fail on linux bots: +-FocusRingBrowserTest.Anchor +-FocusRingBrowserTest.Button +-FocusRingBrowserTest.Checkbox +-FocusRingBrowserTest.DarkModeButton +-FocusRingBrowserTest.Radio +-LocalCardMigrationBrowserTest.AcceptingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserTest.CardIdentifierString +-LocalCardMigrationBrowserTest.ClickingCancelClosesDialog +-LocalCardMigrationBrowserTest.ClickingContinueOpensDialog +-LocalCardMigrationBrowserTest.ClickingSaveClosesDialog +-LocalCardMigrationBrowserTest.ClosedReason_BubbleAccepted +-LocalCardMigrationBrowserTest.DeleteSuccessfullyMigratedCardsFromLocal +-LocalCardMigrationBrowserTest.DialogContainsAllValidMigratableCard +-LocalCardMigrationBrowserTest.RejectingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserUiTest.InvokeUi_default +-TabDragging/DetachToBrowserTabDragControllerTest.DragMultipleTabsRightIntoGroup/0 +-TabDragging/DetachToBrowserTabDragControllerTest.DragMultipleTabsRightIntoGroup/1
diff --git a/testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter b/testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter new file mode 100644 index 0000000..2eb2e1c --- /dev/null +++ b/testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter
@@ -0,0 +1 @@ +# cr23_views_unittests that are expected to fail on linux bots:
diff --git a/testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter b/testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter new file mode 100644 index 0000000..49b8435 --- /dev/null +++ b/testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter
@@ -0,0 +1,21 @@ +# cr23_browser_tests that are expected to fail on mac bots: +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/0 +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/2 +-BrowserNonClientFrameViewBrowserTest.IncognitoIsCorrectColor +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_AllInfoTypes +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_DetailedPermission +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_External +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_ExternalWithPermission +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_ManyPermissions +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_MultilinePermission +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_ReEnable +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_Simple +-ExtensionInstallDialogViewInteractiveBrowserTest.InvokeUi_WithWithholdingOption +-LoadImageBrowserTest.LoadImage +-LoadImageBrowserTest.LoadImageWithMap +-LocationIconViewBrowserTest.InkDropMode +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/0 +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/1 +-PermissionRequestChipGestureSensitiveBrowserTest.ChipFinalizedWhenInteractingWithOmnibox +-PrivacySandboxDialogSmallWindowTest.All +-WebAppBrowserTest.InstallToShelfContainsAppName
diff --git a/testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter b/testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter new file mode 100644 index 0000000..ad265c5 --- /dev/null +++ b/testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter
@@ -0,0 +1,21 @@ +# cr23_interactive_ui_tests that are expected to fail on mac bots: +-FocusRingBrowserTest.Anchor +-FocusRingBrowserTest.Button +-FocusRingBrowserTest.Checkbox +-FocusRingBrowserTest.DarkModeButton +-FocusRingBrowserTest.Radio +-LocalCardMigrationBrowserTest.AcceptingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserTest.CardIdentifierString +-LocalCardMigrationBrowserTest.ClickingCancelClosesDialog +-LocalCardMigrationBrowserTest.ClickingContinueOpensDialog +-LocalCardMigrationBrowserTest.ClickingSaveClosesDialog +-LocalCardMigrationBrowserTest.ClosedReason_BubbleAccepted +-LocalCardMigrationBrowserTest.DeleteSuccessfullyMigratedCardsFromLocal +-LocalCardMigrationBrowserTest.DialogContainsAllValidMigratableCard +-LocalCardMigrationBrowserTest.RejectingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserUiTest.InvokeUi_default +-TabDragging/DetachToBrowserTabDragControllerTest.DragMultipleTabsRightIntoGroup/0 +-TabDragging/DetachToBrowserTabDragControllerTest.DragMultipleTabsRightIntoGroup/1 + +# This may be a flaky failure unrelated to the cr23 code paths. +-SitePerProcessInteractivePDFTest.ContextMenuPositionForEmbeddedPDFInCrossOriginFrame
diff --git a/testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter b/testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter new file mode 100644 index 0000000..8b3d0a6 --- /dev/null +++ b/testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter
@@ -0,0 +1,2 @@ +# cr23_views_unittests that are expected to fail on mac bots: +-MdTextButtonTest.BackgroundColorChangesWithWidgetActivation
diff --git a/testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter b/testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter new file mode 100644 index 0000000..0b30e6e5 --- /dev/null +++ b/testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter
@@ -0,0 +1,12 @@ +# browser_tests that are run with the Chrome2023Refresh feature flag enabled +# as the cr23_browser_tests test suite: +All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.* +BrowserNonClientFrameViewBrowserTest.* +ExtensionInstallDialogViewInteractiveBrowserTest.* +LoadImageBrowserTest.* +LocationIconViewBrowserTest.* +ManagedUiTest/ManagedUiTest.* +OutOfMemoryReporterPrerenderBrowserTest.* +PermissionRequestChipGestureSensitiveBrowserTest.* +PrivacySandboxDialogSmallWindowTest.* +WebAppBrowserTest.*
diff --git a/testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter b/testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter new file mode 100644 index 0000000..227178b0 --- /dev/null +++ b/testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter
@@ -0,0 +1,14 @@ +# interactive_ui_tests that are run on the bots with the Chrome2023Refresh +# feature flag enabled as the cr23_interactive_ui_tests test suite: +AppMenuModelInteractiveTest.* +ExtensionsMenuModelInteractiveTest.* +ExtensionsMenuModelPresenceTest.* +FindInPageTest.* +FocusRingBrowserTest.* +LocalCardMigrationBrowserTest.* +PasswordManagerMenuItemInteractiveTest.* +TabDragging/DetachToBrowserTabDragControllerTest.* +TestMenuControllerUITest.* + +# This may be a flaky failure unrelated to the c23 code paths. +SitePerProcessInteractivePDFTest.ContextMenuPositionForEmbeddedPDFInCrossOriginFrame
diff --git a/testing/buildbot/filters/cr23.win.cr23_browser_tests.filter b/testing/buildbot/filters/cr23.win.cr23_browser_tests.filter new file mode 100644 index 0000000..f97c61a --- /dev/null +++ b/testing/buildbot/filters/cr23.win.cr23_browser_tests.filter
@@ -0,0 +1,10 @@ +# cr23_browser_tests that are expected to fail on win bots: +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/0 +-All/PromotionalTabsEnabledPolicyWhatsNewInvalidTest.RunTest/2 +-BrowserNonClientFrameViewBrowserTest.IncognitoIsCorrectColor +-LocationIconViewBrowserTest.InkDropMode +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/0 +-ManagedUiTest/ManagedUiTest.GetManagedUiIconEnterprise/1 +-OutOfMemoryReporterPrerenderBrowserTest.NotReportedOnPrerenderPage +-PermissionRequestChipGestureSensitiveBrowserTest.ChipFinalizedWhenInteractingWithOmnibox +-WebAppBrowserTest.InstallToShelfContainsAppName
diff --git a/testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter b/testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter new file mode 100644 index 0000000..d13244a --- /dev/null +++ b/testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter
@@ -0,0 +1,12 @@ +# cr2023_interactive_ui_tests that are expected to fail on win bots: +-LocalCardMigrationBrowserTest.AcceptingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserTest.CardIdentifierString +-LocalCardMigrationBrowserTest.ClickingCancelClosesDialog +-LocalCardMigrationBrowserTest.ClickingContinueOpensDialog +-LocalCardMigrationBrowserTest.ClickingSaveClosesDialog +-LocalCardMigrationBrowserTest.ClosedReason_BubbleAccepted +-LocalCardMigrationBrowserTest.DeleteSuccessfullyMigratedCardsFromLocal +-LocalCardMigrationBrowserTest.DialogContainsAllValidMigratableCard +-LocalCardMigrationBrowserTest.RejectingDialogAddsLocalCardMigrationStrikes +-LocalCardMigrationBrowserUiTest.InvokeUi_default +-MenuControllerUITest.TestMouseOverShownMenu
diff --git a/testing/buildbot/filters/cr23.win.cr23_views_unittests.filter b/testing/buildbot/filters/cr23.win.cr23_views_unittests.filter new file mode 100644 index 0000000..f8cec17 --- /dev/null +++ b/testing/buildbot/filters/cr23.win.cr23_views_unittests.filter
@@ -0,0 +1,11 @@ +# cr23_views_unittests that are expected to fail on win bots: +-MenuControllerTest.AccessibilityDisabledItemsIndices +-MenuControllerTest.ContextMenuInitializesAuraWindowWhenShown +-MenuControllerTest.RepostEventToEmptyMenuItem +-MenuControllerTest.RootAndChildMenusInitializeAuraWindowWhenShown +-MenuControllerTest.SetSelectionIndices_Buttons +-MenuControllerTest.SetSelectionIndices_Buttons_SkipHiddenAndDisabled +-MenuControllerTest.SetSelectionIndices_ChildrenChanged +-MenuControllerTest.SetSelectionIndices_MenuItemsOnly +-MenuControllerTest.SetSelectionIndices_MenuItemsOnly_SkipHiddenAndDisabled +-MenuControllerTest.SetSelectionIndices_NestedButtons
diff --git a/testing/buildbot/filters/ios.content_browsertests.filter b/testing/buildbot/filters/ios.content_browsertests.filter index 3337232..1246926 100644 --- a/testing/buildbot/filters/ios.content_browsertests.filter +++ b/testing/buildbot/filters/ios.content_browsertests.filter
@@ -240,4 +240,6 @@ # These tests are flaky. -All/SitePerProcessBrowserTest.ChildFrameCrashMetrics_NeverShown/0 -All/SitePerProcessBrowserTest.ChildFrameCrashMetrics_NeverShown/1 +-All/SitePerProcessBrowserTest.DetachedIframeUnloadHandlerABCB/0 +-All/SitePerProcessBrowserTest.DetachedIframeUnloadHandlerABCB/1 -BackForwardCacheBrowserTest.DoesNotCacheIfSpeechRecognitionIsStarted
diff --git a/testing/buildbot/test_suites.pyl b/testing/buildbot/test_suites.pyl index 7d337e4..acc331d 100644 --- a/testing/buildbot/test_suites.pyl +++ b/testing/buildbot/test_suites.pyl
@@ -1746,28 +1746,223 @@ }, }, - 'cr23_gtests': { + # The `cr23` variants of tests run the normal test suites with + # `--enable-features=ChromeRefesh2023`. We do not have the capacity + # to run all of browser_tests and interactive_ui_tests in the CQ, + # so these suites define two variants of the two test suites. + # One set, in the `cr23_*tests` suites, are small subsets of the whole + # test suites that we hope will get us enough coverage of the feature + # to catch issues pre-submit. The others, in the `cr23_*tests_full` form, + # run the whole test suites, but they only run them on the `*cr23*` optional + # try bots and fyi bots. When tests start to fail in the _full variety + # (and aren't also failing in the non-cr23 variety), we can add them + # to the subsets. + 'cr23_linux_gtests': { 'browser_tests': { + 'swarming': { + 'shards': 20, + }, + }, + 'cr23_browser_tests': { 'args': [ '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;' + '../../testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter' + ), + ], + 'swarming': { + 'shards': 2, + }, + 'test': 'browser_tests', + }, + 'cr23_browser_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.linux.cr23_browser_tests.filter' + ), + ], + 'swarming': { + 'shards': 20, + }, + 'test': 'browser_tests', + }, + 'cr23_interactive_ui_tests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;' + '../../testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter' + ), + ], + 'test': 'interactive_ui_tests', + }, + 'cr23_interactive_ui_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.linux.cr23_interactive_ui_tests.filter' + ), ], 'swarming': { 'shards': 10, }, + 'test': 'interactive_ui_tests', + }, + 'cr23_views_unittests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.linux.cr23_views_unittests.filter' + ), + ], + 'test': 'views_unittests', }, 'interactive_ui_tests': { - 'args': [ - '--enable-features=ChromeRefresh2023', - ], 'swarming': { 'shards': 10, }, }, - 'views_unittests': { + 'views_unittests': {}, + }, + 'cr23_mac_gtests': { + 'browser_tests': { + 'swarming': { + 'shards': 20, + }, + 'ci_only': True, + }, + 'cr23_browser_tests': { 'args': [ '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;' + '../../testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter' + ), ], + 'swarming': { + 'shards': 2, + }, + 'test': 'browser_tests', + 'ci_only': True, }, + 'cr23_browser_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.mac.cr23_browser_tests.filter' + ), + ], + 'swarming': { + 'shards': 20, + }, + 'test': 'browser_tests', + 'ci_only': True, + }, + 'cr23_interactive_ui_tests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;' + '../../testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter' + ), + ], + 'test': 'interactive_ui_tests', + }, + 'cr23_interactive_ui_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.mac.cr23_interactive_ui_tests.filter' + ), + ], + 'swarming': { + 'shards': 10, + }, + 'test': 'interactive_ui_tests', + }, + 'cr23_views_unittests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.mac.cr23_views_unittests.filter' + ), + ], + 'test': 'views_unittests', + }, + 'interactive_ui_tests': { + 'swarming': { + 'shards': 10, + }, + }, + 'views_unittests': {}, + }, + 'cr23_win_gtests': { + 'browser_tests': { + 'swarming': { + 'shards': 20, + }, + }, + 'cr23_browser_tests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_browser_tests.filter;' + '../../testing/buildbot/filters/cr23.win.cr23_browser_tests.filter' + ), + ], + 'swarming': { + 'shards': 2, + }, + 'test': 'browser_tests', + }, + 'cr23_browser_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.win.cr23_browser_tests.filter' + ), + ], + 'swarming': { + 'shards': 20, + }, + 'test': 'browser_tests', + }, + 'cr23_interactive_ui_tests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.tests.cr23_interactive_ui_tests.filter;' + '../../testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter' + ), + ], + 'test': 'interactive_ui_tests', + }, + 'cr23_interactive_ui_tests_full': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.win.cr23_interactive_ui_tests.filter' + ), + ], + 'test': 'interactive_ui_tests', + }, + 'cr23_views_unittests': { + 'args': [ + '--enable-features=ChromeRefresh2023', + ('--test-launcher-filter-file=' + '../../testing/buildbot/filters/cr23.win.cr23_views_unittests.filter' + ), + ], + 'test': 'views_unittests', + }, + 'interactive_ui_tests': { + 'swarming': { + 'shards': 10, + }, + }, + 'views_unittests': {}, }, 'cronet_gtests': {
diff --git a/testing/buildbot/variants.pyl b/testing/buildbot/variants.pyl index 79d5dda..76432ce 100644 --- a/testing/buildbot/variants.pyl +++ b/testing/buildbot/variants.pyl
@@ -22,16 +22,16 @@ }, 'LACROS_VERSION_SKEW_CANARY': { 'args': [ - '--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5827.0/test_ash_chrome', + '--ash-chrome-path-override=../../lacros_version_skew_tests_v116.0.5828.0/test_ash_chrome', ], - 'description': 'Run with ash-chrome version 116.0.5827.0', + 'description': 'Run with ash-chrome version 116.0.5828.0', 'identifier': 'Lacros version skew testing ash canary', 'swarming': { 'cipd_packages': [ { 'cipd_package': 'chromium/testing/linux-ash-chromium/x86_64/ash.zip', - 'location': 'lacros_version_skew_tests_v116.0.5827.0', - 'revision': 'version:116.0.5827.0', + 'location': 'lacros_version_skew_tests_v116.0.5828.0', + 'revision': 'version:116.0.5828.0', }, ], },
diff --git a/testing/buildbot/waterfalls.pyl b/testing/buildbot/waterfalls.pyl index eee8ea4..133686c 100644 --- a/testing/buildbot/waterfalls.pyl +++ b/testing/buildbot/waterfalls.pyl
@@ -3558,7 +3558,7 @@ 'x86-64', ], 'test_suites': { - 'gtest_tests': 'cr23_gtests' + 'gtest_tests': 'cr23_linux_gtests' }, 'os_type': 'linux', }, @@ -3726,7 +3726,7 @@ }, 'mac-cr23-rel': { 'test_suites': { - 'gtest_tests': 'cr23_gtests', + 'gtest_tests': 'cr23_mac_gtests', }, 'mixins': [ 'mac_x64', @@ -3841,7 +3841,7 @@ }, 'win-cr23-rel': { 'test_suites': { - 'gtest_tests': 'cr23_gtests', + 'gtest_tests': 'cr23_win_gtests', }, 'mixins': [ 'win10-any',
diff --git a/third_party/blink/public/mojom/use_counter/metrics/web_feature.mojom b/third_party/blink/public/mojom/use_counter/metrics/web_feature.mojom index d0900e3..1a34812 100644 --- a/third_party/blink/public/mojom/use_counter/metrics/web_feature.mojom +++ b/third_party/blink/public/mojom/use_counter/metrics/web_feature.mojom
@@ -3920,6 +3920,8 @@ kFedCmRpContext = 4580, kEventTimingArtificialPointerupOrClick = 4581, kAbortSignalAny = 4582, + kFedCmIdpSigninStatusApi = 4583, + kFedCmIdpSigninStatusJsApi = 4584, // Add new features immediately above this line. Don't change assigned // numbers of any item, and don't reuse removed slots.
diff --git a/third_party/blink/renderer/bindings/core/v8/dictionary_helper_for_core.cc b/third_party/blink/renderer/bindings/core/v8/dictionary_helper_for_core.cc index 54ccc49..d4535fd 100644 --- a/third_party/blink/renderer/bindings/core/v8/dictionary_helper_for_core.cc +++ b/third_party/blink/renderer/bindings/core/v8/dictionary_helper_for_core.cc
@@ -182,20 +182,6 @@ template <> bool DictionaryHelper::Get(const Dictionary& dictionary, const StringView& key, - Member<DOMWindow>& value) { - v8::Local<v8::Value> v8_value; - if (!dictionary.Get(key, v8_value)) - return false; - - // We need to handle a DOMWindow specially, because a DOMWindow wrapper - // exists on a prototype chain of v8Value. - value = ToDOMWindow(dictionary.GetIsolate(), v8_value); - return true; -} - -template <> -bool DictionaryHelper::Get(const Dictionary& dictionary, - const StringView& key, Member<TrackBase>& value) { v8::Local<v8::Value> v8_value; if (!dictionary.Get(key, v8_value))
diff --git a/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.cc b/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.cc index 24d8cfec..002ad0a 100644 --- a/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.cc +++ b/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.cc
@@ -630,15 +630,20 @@ return String::Adopt(result); } -DOMWindow* ToDOMWindow(v8::Isolate* isolate, v8::Local<v8::Value> value) { - return V8Window::ToWrappable(isolate, value); -} - LocalDOMWindow* ToLocalDOMWindow(v8::Local<v8::Context> context) { if (context.IsEmpty()) return nullptr; + v8::Local<v8::Object> global = context->Global(); + + // There are several global objects that are not ScriptWrappable, and + // therefore are definitely not a LocalDOMWindow (GC context, DevTools' + // context (debug context), and maybe more). These types do not have any + // internal fields, and will therefore crash if passed to ToScriptWrappable(). + if (global->InternalFieldCount() == 0) { + return nullptr; + } return To<LocalDOMWindow>( - ToDOMWindow(context->GetIsolate(), context->Global())); + ToScriptWrappable(global)->ToMostDerived<DOMWindow>()); } LocalDOMWindow* EnteredDOMWindow(v8::Isolate* isolate) { @@ -673,10 +678,10 @@ CHECK(!global_proxy.IsEmpty()); CHECK(global_proxy->IsObject()); - // There are several contexts other than Window, WorkerGlobalScope or - // WorkletGlobalScope but entering into ToExecutionContext, namely GC context, - // DevTools' context (debug context), and maybe more. They all don't have - // any internal field. + // There are several global objects that are not ScriptWrappable, and + // therefore are definitely not an ExecutionContext (GC context, DevTools' + // context (debug context), and maybe more). These types do not have any + // internal fields, and will therefore crash if passed to ToScriptWrappable(). if (global_proxy->InternalFieldCount() == 0) return nullptr;
diff --git a/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.h b/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.h index 1df2bb5a..a16b1586 100644 --- a/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.h +++ b/third_party/blink/renderer/bindings/core/v8/v8_binding_for_core.h
@@ -62,7 +62,6 @@ // new utility function, consider adding it to V8Binding.h instead unless it has // dependencies to core/. -class DOMWindow; class ExceptionState; class ExecutionContext; class Frame; @@ -431,7 +430,6 @@ CORE_EXPORT v8::Isolate* ToIsolate(const LocalFrame*); -CORE_EXPORT DOMWindow* ToDOMWindow(v8::Isolate*, v8::Local<v8::Value>); CORE_EXPORT LocalDOMWindow* ToLocalDOMWindow(v8::Local<v8::Context>); CORE_EXPORT LocalDOMWindow* EnteredDOMWindow(v8::Isolate*); LocalDOMWindow* IncumbentDOMWindow(v8::Isolate*);
diff --git a/third_party/blink/renderer/core/inspector/inspector_dom_debugger_agent.cc b/third_party/blink/renderer/core/inspector/inspector_dom_debugger_agent.cc index 35ffd018..f42b980 100644 --- a/third_party/blink/renderer/core/inspector/inspector_dom_debugger_agent.cc +++ b/third_party/blink/renderer/core/inspector/inspector_dom_debugger_agent.cc
@@ -190,12 +190,7 @@ return; } - EventTarget* target = V8EventTarget::ToWrappable(isolate, value); - // We need to handle LocalDOMWindow specially, because LocalDOMWindow wrapper - // exists on prototype chain. - if (!target) - target = ToDOMWindow(isolate, value); - if (target) { + if (EventTarget* target = V8EventTarget::ToWrappable(isolate, value)) { CollectEventListeners(isolate, target, value, nullptr, false, event_information); }
diff --git a/third_party/blink/renderer/core/inspector/thread_debugger_common_impl.cc b/third_party/blink/renderer/core/inspector/thread_debugger_common_impl.cc index 100c7baf..addc162e 100644 --- a/third_party/blink/renderer/core/inspector/thread_debugger_common_impl.cc +++ b/third_party/blink/renderer/core/inspector/thread_debugger_common_impl.cc
@@ -678,11 +678,7 @@ const v8::FunctionCallbackInfo<v8::Value>& info) { if (info.Length() < 1) return nullptr; - if (EventTarget* target = - V8EventTarget::ToWrappable(info.GetIsolate(), info[0])) { - return target; - } - return ToDOMWindow(info.GetIsolate(), info[0]); + return V8EventTarget::ToWrappable(info.GetIsolate(), info[0]); } void ThreadDebuggerCommonImpl::SetMonitorEventsCallback(
diff --git a/third_party/blink/renderer/core/layout/ng/ng_fragmentation_utils.cc b/third_party/blink/renderer/core/layout/ng/ng_fragmentation_utils.cc index a56ceb9..144e66db 100644 --- a/third_party/blink/renderer/core/layout/ng/ng_fragmentation_utils.cc +++ b/third_party/blink/renderer/core/layout/ng/ng_fragmentation_utils.cc
@@ -113,12 +113,29 @@ const NGBoxFragmentBuilder& builder) { if (child.IsInline()) return EBreakBetween::kAuto; + + // Since it's not an inline node, if we have a fragment at all, it has to be a + // box fragment. + const NGPhysicalBoxFragment* box_fragment = nullptr; + if (layout_result.Status() == NGLayoutResult::kSuccess) { + box_fragment = &To<NGPhysicalBoxFragment>(layout_result.PhysicalFragment()); + if (!box_fragment->IsFirstForNode()) { + // If the node is resumed after a break, we are not *before* it anymore, + // so ignore values. We normally don't even consider breaking before a + // resumed node, since there normally is no container separation. The + // normal place to resume is at the very start of the fragmentainer - + // cannot break there! However, there are cases where a node is resumed + // at a location past the start of the fragmentainer, e.g. when printing + // monolithic overflowing content. + return EBreakBetween::kAuto; + } + } + EBreakBetween break_before = JoinFragmentainerBreakValues( child.Style().BreakBefore(), layout_result.InitialBreakBefore()); break_before = builder.JoinedBreakBetweenValue(break_before); const NGConstraintSpace& space = builder.ConstraintSpace(); - if (space.IsPaginated() && - layout_result.Status() == NGLayoutResult::kSuccess && + if (space.IsPaginated() && box_fragment && !IsForcedBreakValue(builder.ConstraintSpace(), break_before)) { AtomicString current_name = builder.PageName(); if (current_name == g_null_atom) { @@ -126,9 +143,7 @@ } // If the page name propagated from the child differs from what we already // have, we need to break before the child. - const auto& fragment = - To<NGPhysicalBoxFragment>(layout_result.PhysicalFragment()); - if (fragment.PageName() != current_name) { + if (box_fragment->PageName() != current_name) { return EBreakBetween::kPage; } }
diff --git a/third_party/blink/renderer/modules/credentialmanagement/credentials_container.cc b/third_party/blink/renderer/modules/credentialmanagement/credentials_container.cc index fe8780b..e3f3bf0b 100644 --- a/third_party/blink/renderer/modules/credentialmanagement/credentials_container.cc +++ b/third_party/blink/renderer/modules/credentialmanagement/credentials_container.cc
@@ -1369,6 +1369,12 @@ UseCounter::Count(resolver->GetExecutionContext(), WebFeature::kFedCmIframe); } + // Track when websites use FedCM with the IDP sign-in status opt-in + if (RuntimeEnabledFeatures::FedCmIdpSigninStatusEnabled( + resolver->GetExecutionContext())) { + UseCounter::Count(resolver->GetExecutionContext(), + WebFeature::kFedCmIdpSigninStatusApi); + } int provider_index = 0; Vector<mojom::blink::IdentityProviderPtr> identity_provider_ptrs; for (const auto& provider : options->identity()->providers()) {
diff --git a/third_party/blink/renderer/modules/credentialmanagement/identity_provider.idl b/third_party/blink/renderer/modules/credentialmanagement/identity_provider.idl index 9f0cf9f..992a4de 100644 --- a/third_party/blink/renderer/modules/credentialmanagement/identity_provider.idl +++ b/third_party/blink/renderer/modules/credentialmanagement/identity_provider.idl
@@ -19,9 +19,9 @@ [RuntimeEnabled=FedCmUserInfo, CallWith=ScriptState, RaisesException, MeasureAs=FedCmUserInfo] static Promise<sequence<IdentityUserInfo>> getUserInfo(IdentityProviderConfig config); - [RuntimeEnabled=FedCmIdpSigninStatus, CallWith=ScriptState] + [RuntimeEnabled=FedCmIdpSigninStatus, CallWith=ScriptState, MeasureAs=FedCmIdpSigninStatusJsApi] static void login(); - [RuntimeEnabled=FedCmIdpSigninStatus, CallWith=ScriptState] + [RuntimeEnabled=FedCmIdpSigninStatus, CallWith=ScriptState, MeasureAs=FedCmIdpSigninStatusJsApi] static void logout(); [RuntimeEnabled=FedCmIdpSigninStatus, CallWith=ScriptState] static void close();
diff --git a/third_party/blink/renderer/modules/indexeddb/indexed_db_blink_mojom_traits.cc b/third_party/blink/renderer/modules/indexeddb/indexed_db_blink_mojom_traits.cc index 4b13248..3c9ae32 100644 --- a/third_party/blink/renderer/modules/indexeddb/indexed_db_blink_mojom_traits.cc +++ b/third_party/blink/renderer/modules/indexeddb/indexed_db_blink_mojom_traits.cc
@@ -214,9 +214,10 @@ std::unique_ptr<blink::IDBValue>>:: Read(blink::mojom::IDBValueDataView data, std::unique_ptr<blink::IDBValue>* out) { - Vector<uint8_t> value_bits; - if (!data.ReadBits(&value_bits)) + Vector<char> value_bits; + if (!data.ReadBits(reinterpret_cast<Vector<uint8_t>*>(&value_bits))) { return false; + } if (value_bits.empty()) { *out = std::make_unique<blink::IDBValue>(scoped_refptr<SharedBuffer>(), @@ -224,8 +225,8 @@ return true; } - scoped_refptr<SharedBuffer> value_buffer = SharedBuffer::Create( - reinterpret_cast<const char*>(value_bits.data()), value_bits.size()); + scoped_refptr<SharedBuffer> value_buffer = + SharedBuffer::AdoptVector(value_bits); Vector<blink::mojom::blink::IDBExternalObjectPtr> external_objects; if (!data.ReadExternalObjects(&external_objects))
diff --git a/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer.cc b/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer.cc index 318a110..ca48b6f 100644 --- a/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer.cc +++ b/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer.cc
@@ -603,12 +603,11 @@ } } else { // Handle filter effect. - // The size parameter is only used to computed the origin of zoom - // operation, which we never generate. - gfx::SizeF empty; + // The `layer_bounds` parameter is only used to compute the ZOOM lens + // bounds, which we never generate. cc::PaintFlags filter_flags; filter_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - effect.Filter().AsCcFilterOperations(), empty)); + effect.Filter().AsCcFilterOperations())); save_layer_id = push<cc::SaveLayerOp>(filter_flags); } result_.EndPaintOfPairedBegin();
diff --git a/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer_test.cc b/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer_test.cc index 7e3f000a..665a0d7 100644 --- a/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer_test.cc +++ b/third_party/blink/renderer/platform/graphics/compositing/paint_chunks_to_cc_layer_test.cc
@@ -182,7 +182,7 @@ cc::PaintFlags expected_flags; expected_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - filter.AsCcFilterOperations(), gfx::SizeF())); + filter.AsCcFilterOperations())); EXPECT_THAT( output, ElementsAre( @@ -335,7 +335,7 @@ cc::PaintFlags expected_flags; expected_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - filter.AsCcFilterOperations(), gfx::SizeF())); + filter.AsCcFilterOperations())); EXPECT_THAT( output, ElementsAre( @@ -1056,7 +1056,7 @@ cc::PaintFlags expected_flags; expected_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - filter.AsCcFilterOperations(), gfx::SizeF())); + filter.AsCcFilterOperations())); EXPECT_THAT(output, ElementsAre(PaintOpEq<cc::SaveLayerOp>( SkRect::MakeXYWH(0, 0, 0, 0), expected_flags), // <e1> @@ -1094,7 +1094,7 @@ cc::PaintFlags expected_flags; expected_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - filter.AsCcFilterOperations(), gfx::SizeF())); + filter.AsCcFilterOperations())); EXPECT_THAT(output, ElementsAre(PaintOpIs<cc::SaveOp>(), PaintOpIs<cc::TranslateOp>(), // layer offset PaintOpEq<cc::SaveLayerOp>( @@ -1134,7 +1134,7 @@ cc::PaintFlags expected_flags; expected_flags.setImageFilter(cc::RenderSurfaceFilters::BuildImageFilter( - filter.AsCcFilterOperations(), gfx::SizeF())); + filter.AsCcFilterOperations())); EXPECT_THAT( output, ElementsAre(PaintOpIs<cc::SaveOp>(),
diff --git a/third_party/blink/renderer/platform/runtime_enabled_features.json5 b/third_party/blink/renderer/platform/runtime_enabled_features.json5 index 69c4846a..7675efc 100644 --- a/third_party/blink/renderer/platform/runtime_enabled_features.json5 +++ b/third_party/blink/renderer/platform/runtime_enabled_features.json5
@@ -1605,6 +1605,9 @@ name: "FedCmIdentityProviderInterface", implied_by: ["FedCmIdpSigninStatus", "FedCmUserInfo", "FedCmIdPRegistration", "FedCmAuthz"], base_feature: "none", + origin_trial_feature_name: "FedCmIdpSigninStatus", + origin_trial_allows_third_party: true, + origin_trial_os: ["Win", "ChromeOS", "Mac", "Linux"] }, { name: "FedCmIdPRegistration", @@ -1619,6 +1622,10 @@ public: true, status: "test", base_feature: "none", + origin_trial_feature_name: "FedCmIdpSigninStatus", + origin_trial_allows_third_party: true, + origin_trial_os: ["Win", "ChromeOS", "Mac", "Linux"], + browser_process_read_access: true, }, { name: "FedCmIdpSignout",
diff --git a/third_party/blink/tools/blinkpy/web_tests/fuzzy_diff_analyzer/fuzzy_diff_analyzer.py b/third_party/blink/tools/blinkpy/web_tests/fuzzy_diff_analyzer/fuzzy_diff_analyzer.py index 5db5820..bf9fa64 100644 --- a/third_party/blink/tools/blinkpy/web_tests/fuzzy_diff_analyzer/fuzzy_diff_analyzer.py +++ b/third_party/blink/tools/blinkpy/web_tests/fuzzy_diff_analyzer/fuzzy_diff_analyzer.py
@@ -16,6 +16,10 @@ import argparse +from blinkpy.web_tests.fuzzy_diff_analyzer import analyzer +from blinkpy.web_tests.fuzzy_diff_analyzer import queries +from blinkpy.web_tests.fuzzy_diff_analyzer import results + def ParseArgs() -> argparse.Namespace: parser = argparse.ArgumentParser(description=( @@ -40,4 +44,20 @@ def main() -> int: args = ParseArgs() + + querier_instance = queries.FuzzyDiffAnalyzerQuerier( + args.sample_period, args.project) + query_results = querier_instance.get_failed_image_comparison_ci_tests() + + results_processor = results.ResultProcessor() + aggregated_results = results_processor.aggregate_results(query_results) + + matching_analyzer = analyzer.FuzzyMatchingAnalyzer() + for test_name, test_data in aggregated_results.items(): + test_result = matching_analyzer.run_analyzer(test_data) + print('') + print('test_name: %s' % test_name) + print('test_result: %s' % test_result) + print('') + return 0
diff --git a/third_party/blink/tools/blinkpy/web_tests/port/server_process.py b/third_party/blink/tools/blinkpy/web_tests/port/server_process.py index 9ade702c..6eec48a 100644 --- a/third_party/blink/tools/blinkpy/web_tests/port/server_process.py +++ b/third_party/blink/tools/blinkpy/web_tests/port/server_process.py
@@ -50,9 +50,9 @@ else: import fcntl import os - import pipes + import shlex import select - _quote_cmd = lambda cmdline: ' '.join(pipes.quote(arg) for arg in cmdline) + _quote_cmd = lambda cmdline: ' '.join(shlex.quote(arg) for arg in cmdline) _log = logging.getLogger(__name__)
diff --git a/third_party/blink/web_tests/TestExpectations b/third_party/blink/web_tests/TestExpectations index 3bc6af6..6419228 100644 --- a/third_party/blink/web_tests/TestExpectations +++ b/third_party/blink/web_tests/TestExpectations
@@ -6673,9 +6673,6 @@ # Flaky test on mac crbug.com/1451711 [ Mac10.15 ] http/tests/inspector-protocol/tracing/prerender.js [ Failure Pass ] -# Sheriff 2023-06-06 -crbug.com/1451810 [ Linux ] wpt_internal/css/css-contain/container-queries/state-initially-stuck.html [ Failure ] - # Sheriff 2023-06-07 crbug.com/1452003 [ Mac12 ] svg/W3C-SVG-1.1/text-fonts-01-t.svg [ Failure Pass ] crbug.com/1451210 [ Mac ] external/wpt/scroll-to-text-fragment/iframe-scroll.sub.html [ Failure Pass ] @@ -6692,6 +6689,9 @@ fast/js/array-some.html [ Failure Pass ] fast/js/iterable-object.html [ Failure Pass ] +# Since HTTPS upgrades run in the //chrome layer, these will fail when ran in content shell. +external/wpt/https-upgrades/tentative/upgrade.https.sub.html [ Timeout ] + # Sherrif 2023-06-09 crbug.com/1358175 [ Mac12 ] media/controls/closed-captions-single-track.html [ Failure Pass ] crbug.com/1452003 [ Mac13 ] svg/W3C-SVG-1.1/text-fonts-01-t.svg [ Failure Pass ]
diff --git a/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print-ref.html b/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print-ref.html new file mode 100644 index 0000000..112b1eb --- /dev/null +++ b/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print-ref.html
@@ -0,0 +1,8 @@ +<!DOCTYPE html> +<link rel="author" title="Morten Stenshorne" href="mailto:mstensho@chromium.org"> +<style> + body { + margin: 0; + } +</style> +<div style="height:200vh; background:green;"></div>
diff --git a/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print.html b/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print.html new file mode 100644 index 0000000..584c494 --- /dev/null +++ b/third_party/blink/web_tests/external/wpt/css/printing/monolithic-overflow-021-print.html
@@ -0,0 +1,14 @@ +<!DOCTYPE html> +<link rel="author" title="Morten Stenshorne" href="mailto:mstensho@chromium.org"> +<link rel="help" href="https://bugs.chromium.org/p/chromium/issues/detail?id=1451760"> +<link rel="match" href="monolithic-overflow-021-print-ref.html"> +<style> + body { + margin: 0; + } +</style> +<div style="break-before:page; background:red;"> + <div style="border-bottom:50vh solid green; background:red;"> + <div style="contain:size; height:150vh; background:green;"></div> + </div> +</div>
diff --git a/third_party/blink/web_tests/external/wpt/https-upgrades/resources/pass.html b/third_party/blink/web_tests/external/wpt/https-upgrades/resources/pass.html new file mode 100644 index 0000000..5c7ca34 --- /dev/null +++ b/third_party/blink/web_tests/external/wpt/https-upgrades/resources/pass.html
@@ -0,0 +1,10 @@ +<!DOCTYPE html> +<html> + <body> + <script> + window.onload = (event) => { + window.opener.postMessage('pass', '*'); + }; + </script> + </body> +</html>
diff --git a/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/fallback.sub.html b/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/fallback.sub.html new file mode 100644 index 0000000..8fdf0dc8 --- /dev/null +++ b/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/fallback.sub.html
@@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html> + <head> + <title>HTTPS Upgrades: Fallback.</title> + <script src="/resources/testharness.js"></script> + <script src="/resources/testharnessreport.js"></script> + <script src="/common/get-host-info.sub.js"></script> + + </head> + <body> + <script> + setup({ single_test: true }); + // If HTTPS upgrades are enabled, this will fail to load since this test is http only, + // and should load properly once a fallback is triggered. + var url = new URL("http://{{host}}:{{ports[http][0]}}/https-upgrades/resources/pass.html") + window.onmessage = function(event) { + if (event.data === "pass") { + done(); + } + } + win = window.open(url) + </script> + </body> +</html>
diff --git a/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/upgrade.https.sub.html b/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/upgrade.https.sub.html new file mode 100644 index 0000000..71a1fe1dd --- /dev/null +++ b/third_party/blink/web_tests/external/wpt/https-upgrades/tentative/upgrade.https.sub.html
@@ -0,0 +1,24 @@ +<!DOCTYPE html> +<html> + <head> + <title>HTTPS Upgrades: Upgrade.</title> + <script src="/resources/testharness.js"></script> + <script src="/resources/testharnessreport.js"></script> + <script src="/common/get-host-info.sub.js"></script> + + </head> + <body> + <script> + setup({ single_test: true }); + // HTTPS upgrades don't change custom ports, so this will load correctly if an HTTPS upgrade is performed, + // and will fail to load otherwise (since the port will be wrong for http). + var url = new URL("http://{{host}}:{{ports[https][0]}}/https-upgrades/resources/pass.html") + window.onmessage = function(event) { + if (event.data === "pass") { + done(); + } + } + win = window.open(url) + </script> + </body> +</html>
diff --git a/third_party/blink/web_tests/fast/forms/file/file-appearance-no-default-width.html b/third_party/blink/web_tests/fast/forms/file/file-appearance-no-default-width.html index 3bfd26e..a1d8dce 100644 --- a/third_party/blink/web_tests/fast/forms/file/file-appearance-no-default-width.html +++ b/third_party/blink/web_tests/fast/forms/file/file-appearance-no-default-width.html
@@ -28,6 +28,12 @@ border: 5px solid red; } </style> + <script> + if (window.testRunner) { + testRunner.waitUntilDone(); + document.fonts.ready.then(() => testRunner.notifyDone()); + } + </script> </head> <body> <input type="file">
diff --git a/third_party/blink/web_tests/http/tests/.well-known/web-identity b/third_party/blink/web_tests/http/tests/.well-known/web-identity index 7987f8a..d6805e77 100644 --- a/third_party/blink/web_tests/http/tests/.well-known/web-identity +++ b/third_party/blink/web_tests/http/tests/.well-known/web-identity
@@ -1,5 +1,5 @@ { "provider_urls": [ - "https://devtools.test:8443/resources/fedcm/fedcm.json" + "https://127.0.0.1:8443/resources/fedcm/fedcm.json" ] }
diff --git a/third_party/blink/web_tests/http/tests/credentialmanagement/fedcm-idp-signin-status.https.html b/third_party/blink/web_tests/http/tests/credentialmanagement/fedcm-idp-signin-status.https.html new file mode 100644 index 0000000..f96fc1dc --- /dev/null +++ b/third_party/blink/web_tests/http/tests/credentialmanagement/fedcm-idp-signin-status.https.html
@@ -0,0 +1,37 @@ +<!DOCTYPE html> +<meta charset="utf-8"> +<!-- Generate token with the command: +generate_token.py https://127.0.0.1:8443 FedCmIdpSigninStatus --expire-timestamp=2000000000 +-- --> +<meta http-equiv="origin-trial" + content="A9a3fMLbgTmgdENpn0k3Xa5PyVf3Zuf7T93E0WkqOfannbR7I6EUCkuxOx/5x/vtou4iHIzov2F8huBcm/sFDgEAAABdeyJvcmlnaW4iOiAiaHR0cHM6Ly8xMjcuMC4wLjE6ODQ0MyIsICJmZWF0dXJlIjogIkZlZENtSWRwU2lnbmluU3RhdHVzIiwgImV4cGlyeSI6IDIwMDAwMDAwMDB9" /> +<title>FedCM IDP sign-in status API - check that the OT can be enabled</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script> +const prefix = 'https://127.0.0.1:8443/resources/fedcm'; +const provider = { + configURL: `${prefix}/fedcm.json`, + clientId: '123', + nonce: '2', +}; +const config = { + identity: { + providers: [provider] + }, + mediation: 'required', +}; + +promise_test(async t => { + await fetch(`${prefix}/mark-signout.php`, { mode: 'no-cors' }); + const result = navigator.credentials.get(config); + return promise_rejects_dom(t, 'NetworkError', result); +}, 'FedCM request should fail because we are marked as not logged in'); + +promise_test(async t => { + const prefix = 'https://127.0.0.1:8443/resources/fedcm'; + await fetch(`${prefix}/mark-signout.php`, { mode: 'no-cors' }); + const result = IdentityProvider.getUserInfo(provider); + return promise_rejects_dom(t, 'NetworkError', result); +}, 'User info request should fail because we are marked as not logged in'); +</script>
diff --git a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-after-dialog-expected.txt b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-after-dialog-expected.txt index 32e9070..b33b5ef8 100644 --- a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-after-dialog-expected.txt +++ b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-after-dialog-expected.txt
@@ -5,8 +5,8 @@ accountId : 1234 email : john_doe@idp.example givenName : John - idpConfigUrl : https://devtools.test:8443/resources/fedcm/fedcm.json - idpSigninUrl : https://devtools.test:8443/signin + idpConfigUrl : https://127.0.0.1:8443/resources/fedcm/fedcm.json + idpSigninUrl : https://127.0.0.1:8443/signin loginState : SignIn name : John Doe pictureUrl : https://idp.example/profile/123 @@ -15,8 +15,8 @@ accountId : 5678 email : aisha@idp.example givenName : Aisha - idpConfigUrl : https://devtools.test:8443/resources/fedcm/fedcm.json - idpSigninUrl : https://devtools.test:8443/signin + idpConfigUrl : https://127.0.0.1:8443/resources/fedcm/fedcm.json + idpSigninUrl : https://127.0.0.1:8443/signin loginState : SignUp name : Aisha Ahmad pictureUrl : https://idp.example/profile/567
diff --git a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-before-dialog-expected.txt b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-before-dialog-expected.txt index 3084f7d6..2dc7ecba 100644 --- a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-before-dialog-expected.txt +++ b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/fedcm-dialog-event-enable-before-dialog-expected.txt
@@ -5,8 +5,8 @@ accountId : 1234 email : john_doe@idp.example givenName : John - idpConfigUrl : https://devtools.test:8443/resources/fedcm/fedcm.json - idpSigninUrl : https://devtools.test:8443/signin + idpConfigUrl : https://127.0.0.1:8443/resources/fedcm/fedcm.json + idpSigninUrl : https://127.0.0.1:8443/signin loginState : SignIn name : John Doe pictureUrl : https://idp.example/profile/123 @@ -15,8 +15,8 @@ accountId : 5678 email : aisha@idp.example givenName : Aisha - idpConfigUrl : https://devtools.test:8443/resources/fedcm/fedcm.json - idpSigninUrl : https://devtools.test:8443/signin + idpConfigUrl : https://127.0.0.1:8443/resources/fedcm/fedcm.json + idpSigninUrl : https://127.0.0.1:8443/signin loginState : SignUp name : Aisha Ahmad pictureUrl : https://idp.example/profile/567
diff --git a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/resources/dialog-shown-event.https.html b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/resources/dialog-shown-event.https.html index da608de..de82437 100644 --- a/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/resources/dialog-shown-event.https.html +++ b/third_party/blink/web_tests/http/tests/inspector-protocol/fedcm/resources/dialog-shown-event.https.html
@@ -7,7 +7,7 @@ const result = await navigator.credentials.get({ identity: { providers: [{ - configURL: `https://${location.host}/resources/fedcm/fedcm.json`, + configURL: `https://127.0.0.1:8443/resources/fedcm/fedcm.json`, clientId: '123', nonce: '2', }]
diff --git a/third_party/blink/web_tests/http/tests/origin_trials/webexposed/fedcm-idp-signin-status-api-ot-interfaces.https.html b/third_party/blink/web_tests/http/tests/origin_trials/webexposed/fedcm-idp-signin-status-api-ot-interfaces.https.html new file mode 100644 index 0000000..b90b882 --- /dev/null +++ b/third_party/blink/web_tests/http/tests/origin_trials/webexposed/fedcm-idp-signin-status-api-ot-interfaces.https.html
@@ -0,0 +1,16 @@ +<!DOCTYPE html> +<meta charset="utf-8"> +<!-- Generate token with the command: +generate_token.py https://127.0.0.1:8443 FedCmIdpSigninStatus --expire-timestamp=2000000000 +-- --> +<meta http-equiv="origin-trial" + content="A9a3fMLbgTmgdENpn0k3Xa5PyVf3Zuf7T93E0WkqOfannbR7I6EUCkuxOx/5x/vtou4iHIzov2F8huBcm/sFDgEAAABdeyJvcmlnaW4iOiAiaHR0cHM6Ly8xMjcuMC4wLjE6ODQ0MyIsICJmZWF0dXJlIjogIkZlZENtSWRwU2lnbmluU3RhdHVzIiwgImV4cGlyeSI6IDIwMDAwMDAwMDB9" /> +<title>FedCM IDP sign-in status API - interfaces and properties exposed by origin trial</title> +<script src="/resources/testharness.js"></script> +<script src="/resources/testharnessreport.js"></script> +<script src="/resources/origin-trials-helper.js"></script> +<script> + test(t => { + OriginTrialsHelper.check_interfaces_exist(this, ['IdentityProvider']); + }, 'FedCM IDP sign-in status API interfaces and properties in Origin-Trial enabled document.'); +</script>
diff --git a/third_party/blink/web_tests/http/tests/resources/fedcm/mark-signout.php b/third_party/blink/web_tests/http/tests/resources/fedcm/mark-signout.php new file mode 100644 index 0000000..b81811a --- /dev/null +++ b/third_party/blink/web_tests/http/tests/resources/fedcm/mark-signout.php
@@ -0,0 +1,2 @@ +<?php header("Idp-Signin-Status: action=signout-all") ?> +Header sent.
diff --git a/third_party/crashpad/crashpad/build/run_tests.py b/third_party/crashpad/crashpad/build/run_tests.py index e03e06e..41368d06 100755 --- a/third_party/crashpad/crashpad/build/run_tests.py +++ b/third_party/crashpad/crashpad/build/run_tests.py
@@ -16,9 +16,9 @@ import argparse import os -import pipes import posixpath import re +import shlex import subprocess import sys import tempfile @@ -185,9 +185,9 @@ script_commands = [] for k, v in env.items(): script_commands.append('export %s=%s' % - (pipes.quote(k), pipes.quote(v))) + (shlex.quote(k), shlex.quote(v))) script_commands.extend([ - ' '.join(pipes.quote(x) for x in command_args), 'status=${?}', + ' '.join(shlex.quote(x) for x in command_args), 'status=${?}', 'echo "status=${status}"', 'exit ${status}' ]) adb_command.append('; '.join(script_commands))
diff --git a/third_party/crashpad/update.py b/third_party/crashpad/update.py index 3bf0082..bbf9856 100755 --- a/third_party/crashpad/update.py +++ b/third_party/crashpad/update.py
@@ -9,8 +9,8 @@ import argparse import os -import pipes import re +import shlex import subprocess import sys import tempfile @@ -139,7 +139,7 @@ '--force', '--index-filter', 'git rm -r --cached --ignore-unmatch ' + - ' '.join(pipes.quote(path) for path in parsed.exclude), + ' '.join(shlex.quote(path) for path in parsed.exclude), revision_old + '..UPDATE_TO'], cwd=toplevel, shell=IS_WINDOWS)
diff --git a/third_party/eigen3/README.chromium b/third_party/eigen3/README.chromium index 1f0af6c..ddd6b8f4 100644 --- a/third_party/eigen3/README.chromium +++ b/third_party/eigen3/README.chromium
@@ -1,8 +1,8 @@ Name: Eigen Short Name: eigen3 URL: http://eigen.tuxfamily.org/ -Version: 316eab8deb574d150f9cfc7f8b170156dc0cdd9f -Date: 2023/06/05 +Version: 7d7576f3262fa15c34d5575637bd8d7ff4a83f16 +Date: 2023/06/12 License: MPL 2 License File: LICENSE Security Critical: Yes
diff --git a/third_party/nearby/README.chromium b/third_party/nearby/README.chromium index 58e1d77..57bf2f454 100644 --- a/third_party/nearby/README.chromium +++ b/third_party/nearby/README.chromium
@@ -1,7 +1,7 @@ Name: Nearby Connections Library Short Name: Nearby URL: https://github.com/google/nearby -Version: 3a49499de49ca8c3fc5c26a8cee50b0d9cd69e63 +Version: 240159e8722c63f5be342ac8a6c07771478f8bb3 License: Apache 2.0 License File: LICENSE Security Critical: yes
diff --git a/third_party/protobuf/BUILD.gn b/third_party/protobuf/BUILD.gn index 22ee415..1444086 100644 --- a/third_party/protobuf/BUILD.gn +++ b/third_party/protobuf/BUILD.gn
@@ -112,10 +112,10 @@ configs += [ "//build/config/compiler:optimize_max" ] } - # Remove Sanitizers and coverage for a performance boost when fuzzing. This is - # OK because the only fuzzers that use protobuf are libprotobuf-mutator based - # fuzzers, and they don't actually target the protobuf code, they just use it. - configs -= not_fuzzed_remove_configs + # Remove coverage and Sanitizers other than ASan for a performance boost when + # fuzzing. ASan can't be removed here because of a bug preventing unsanitized + # code from using libc++, which protobuf_full uses. + configs -= not_fuzzed_remove_nonasan_configs configs += [ "//build/config/sanitizers:not_fuzzed" ] if (is_win) { @@ -149,6 +149,9 @@ visibility = [ ":*", + # Used for testing protobuf generation. + "//content/test:proto_test_support", + # requires descriptors & reflection; testonly. "//third_party/libprotobuf-mutator:*", @@ -206,17 +209,22 @@ # The Cast Core gRPC generator tool. "//third_party/cast_core/public/src/build/chromium:cast_core_grpc_generator", - - # Used for testing protobuf generation. - "//components/services/screen_ai:test_support", ] - # TODO(crbug.com/1338164): This ends up linking two copies of - # protobuf_lite_sources in some targets, which is an ODR violation. - sources = protobuf_lite_sources + protobuf_sources + protobuf_headers - deps = [ "//third_party/zlib" ] + # In component build, protobuf_full can't depend on protobuf_lite because + # it uses non-PROTOBUF_EXPORT symbols; in non-component build, protobuf_full + # must have protobuf_lite as a dependency instead of building + # protobuf_lite_sources to avoid ODR violations in targets that link both. + # See crbug.com/1338164. + if (is_component_build) { + sources = protobuf_lite_sources + protobuf_sources + protobuf_headers + } else { + sources = protobuf_sources + protobuf_headers + deps += [ ":protobuf_lite" ] + } + if (is_android) { libs = [ "log" ] # Used by src/google/protobuf/stubs/common.cc }
diff --git a/third_party/tensorflow-text/BUILD.gn b/third_party/tensorflow-text/BUILD.gn index 3d9f1de..9a8d5fe 100644 --- a/third_party/tensorflow-text/BUILD.gn +++ b/third_party/tensorflow-text/BUILD.gn
@@ -5,7 +5,10 @@ import("//build/config/sanitizers/sanitizers.gni") config("tensorflow-text-config") { - include_dirs = [ "src" ] + include_dirs = [ + "src", + "shims", + ] } config("tensorflow-text-flags") {
diff --git a/third_party/tensorflow-text/README.chromium b/third_party/tensorflow-text/README.chromium index addfb9b..861d684f 100644 --- a/third_party/tensorflow-text/README.chromium +++ b/third_party/tensorflow-text/README.chromium
@@ -1,8 +1,8 @@ Name: TensorFlow Text Short Name: tensorflow-text URL: https://github.com/tensorflow/text.git -Version: 2.7.3 -Date: 2021/12/08 +Version: 015d02a30c712c0d1dd31441db38c826ed8975af +Date: 2023/06/10 License: Apache 2.0 License File: LICENSE Security Critical: Yes
diff --git a/third_party/tensorflow-text/patches/0001-run-clang-format.patch b/third_party/tensorflow-text/patches/0001-run-clang-format.patch deleted file mode 100644 index 00f73d4..0000000 --- a/third_party/tensorflow-text/patches/0001-run-clang-format.patch +++ /dev/null
@@ -1,3432 +0,0 @@ -From ac56f05dfa07e5e2099f964ef87933d09c0d0f7f Mon Sep 17 00:00:00 2001 -From: Robert Ogden <robertogden@chromium.org> -Date: Wed, 8 Dec 2021 10:44:09 -0800 -Subject: [PATCH 1/5] run clang format - ---- - .../core/kernels/constrained_sequence.cc | 71 ++++++++------ - .../core/kernels/constrained_sequence.h | 34 ++++--- - .../kernels/constrained_sequence_kernel.cc | 44 +++++---- - .../core/kernels/darts_clone_trie_builder.cc | 3 +- - .../core/kernels/darts_clone_trie_builder.h | 3 +- - .../core/kernels/darts_clone_trie_wrapper.h | 3 +- - .../core/kernels/disjoint_set_forest.h | 23 +++-- - .../core/kernels/disjoint_set_forest_test.cc | 21 ++-- - .../core/kernels/edit_changes.proto | 1 - - .../core/kernels/fast_wordpiece_tokenizer.cc | 58 +++++++---- - .../core/kernels/fast_wordpiece_tokenizer.h | 37 ++++--- - ...fast_wordpiece_tokenizer_kernel_template.h | 10 +- - .../fast_wordpiece_tokenizer_model_builder.cc | 38 +++++--- - .../fast_wordpiece_tokenizer_model_builder.h | 9 +- - .../kernels/fast_wordpiece_tokenizer_test.cc | 6 +- - .../kernels/fast_wordpiece_tokenizer_utils.h | 6 +- - .../fast_wordpiece_tokenizer_utils_test.cc | 3 +- - ...iterbi_constrained_sequence_kernel_test.cc | 1 - - .../core/kernels/mst_op_kernels.cc | 20 ++-- - .../tensorflow_text/core/kernels/mst_solver.h | 72 ++++++++------ - .../mst_solver_random_comparison_test.cc | 35 ++++--- - .../core/kernels/mst_solver_test.cc | 16 ++-- - .../core/kernels/ngrams_kernel_template.h | 9 +- - .../core/kernels/ngrams_tflite_test.cc | 9 +- - .../kernels/ragged_tensor_to_tensor_tflite.cc | 49 ++++++---- - .../ragged_tensor_to_tensor_tflite_test.cc | 6 +- - .../core/kernels/regex_split.cc | 14 ++- - .../core/kernels/regex_split.h | 10 +- - .../core/kernels/regex_split_kernels.cc | 3 +- - .../core/kernels/rouge_l_kernel.cc | 61 ++++++------ - .../core/kernels/rouge_l_kernel_test.cc | 3 +- - .../core/kernels/sentence_breaking_kernels.cc | 3 +- - .../core/kernels/sentence_breaking_utils.cc | 24 +++-- - .../kernels/sentence_breaking_utils_test.cc | 15 ++- - .../core/kernels/sentence_fragmenter.cc | 73 +++++++------- - .../core/kernels/sentence_fragmenter.h | 44 +++++---- - .../core/kernels/sentence_fragmenter_v2.cc | 9 +- - .../core/kernels/sentence_fragmenter_v2.h | 6 +- - .../kernels/sentence_fragmenter_v2_test.cc | 18 ++-- - .../core/kernels/sentencepiece_kernels.cc | 16 ++-- - .../core/kernels/spanning_tree_iterator.cc | 39 +++++--- - .../core/kernels/spanning_tree_iterator.h | 8 +- - .../kernels/spanning_tree_iterator_test.cc | 13 ++- - .../kernels/split_merge_tokenize_kernel.cc | 10 +- - .../core/kernels/text_kernels_test_util.cc | 6 +- - .../core/kernels/text_kernels_test_util.h | 3 +- - .../kernels/tokenizer_from_logits_kernel.cc | 41 ++++---- - .../unicode_script_tokenize_kernel_test.cc | 6 +- - .../whitespace_tokenize_kernel_test.cc | 9 +- - .../core/kernels/whitespace_tokenizer.cc | 13 ++- - .../core/kernels/whitespace_tokenizer.h | 3 +- - .../whitespace_tokenizer_config_builder.h | 1 - - .../whitespace_tokenizer_kernel_template.h | 36 ++++--- - .../core/kernels/whitespace_tokenizer_test.cc | 4 +- - .../core/kernels/wordpiece_kernel.cc | 12 ++- - .../core/kernels/wordpiece_kernel_test.cc | 2 +- - .../core/kernels/wordpiece_tokenizer.cc | 96 ++++++++++++------- - .../core/kernels/wordpiece_tokenizer.h | 35 ++++--- - .../src/tensorflow_text/core/ops/mst_ops.cc | 2 +- - .../tensorflow_text/core/ops/rouge_l_op.cc | 4 +- - .../core/ops/split_merge_tokenize_op.cc | 2 +- - .../core/ops/tokenizer_from_logits_op.cc | 2 +- - .../tensorflow_text/core/ops/wordpiece_op.cc | 10 +- - ...rap_whitespace_tokenizer_config_builder.cc | 11 +-- - .../core/pybinds/tflite_registrar.cc | 8 +- - 65 files changed, 732 insertions(+), 530 deletions(-) - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc -index 261f293a9da6b..07c1e68bc70d7 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc -@@ -33,8 +33,8 @@ namespace text { - // transition. - constexpr int kErrorState = -1; - --ScoreAccessor::ScoreAccessor(const Tensor &score_tensor, -- const Tensor &lengths_tensor) { -+ScoreAccessor::ScoreAccessor(const Tensor& score_tensor, -+ const Tensor& lengths_tensor) { - data_ = score_tensor.flat<float>().data(); - if (lengths_tensor.dtype() == DT_INT64) { - use_long_lengths_ = true; -@@ -58,7 +58,8 @@ ScoreAccessor::ScoreAccessor(const Tensor &score_tensor, - } - - // Get a score out of the data tensor. --float ScoreAccessor::GetScore(int batch_idx, int step_idx, -+float ScoreAccessor::GetScore(int batch_idx, -+ int step_idx, - int score_idx) const { - DCHECK_LE(batch_idx, batch_size_); - DCHECK_LE(step_idx, num_steps_); -@@ -75,18 +76,28 @@ int64 ScoreAccessor::GetLength(int batch_idx) const { - } - } - --int ScoreAccessor::batch_size() const { return batch_size_; } --int ScoreAccessor::num_steps() const { return num_steps_; } --int ScoreAccessor::num_scores() const { return num_scores_; } --bool ScoreAccessor::has_explicit_batch() const { return has_explicit_batch_; } -+int ScoreAccessor::batch_size() const { -+ return batch_size_; -+} -+int ScoreAccessor::num_steps() const { -+ return num_steps_; -+} -+int ScoreAccessor::num_scores() const { -+ return num_scores_; -+} -+bool ScoreAccessor::has_explicit_batch() const { -+ return has_explicit_batch_; -+} - - // Perform Viterbi analysis on a single batch item. - void ViterbiAnalysis( -- const ScoreAccessor &scores, -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- const int batch, bool use_log_space, bool use_start_end_states, -- int32 *output_data) { -+ const ScoreAccessor& scores, -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ const int batch, -+ bool use_log_space, -+ bool use_start_end_states, -+ int32* output_data) { - VLOG(2) << "Analyzing batch " << batch; - const bool has_transition_weights = transition_weights.size() != 0; - const bool has_allowed_transitions = allowed_transitions.size() != 0; -@@ -109,12 +120,12 @@ void ViterbiAnalysis( - num_steps, std::vector<int>(num_states, kErrorState)); - - // Set current and previous references for step 0 -- std::vector<double> *previous_scores = &scores_a; -- std::vector<double> *current_scores = &scores_b; -+ std::vector<double>* previous_scores = &scores_a; -+ std::vector<double>* current_scores = &scores_b; - - const bool vlog3 = VLOG_IS_ON(3); - for (int curr_state = 0; curr_state < num_states; ++curr_state) { -- std::vector<int> ¤t_bps = backpointers[0]; -+ std::vector<int>& current_bps = backpointers[0]; - if (use_start_end_states) { - // Initialize the zeroth step BPs to kOutOfBoundsIndex for all states - // where the OOB->state transition is valid, and set scores as needed. -@@ -174,19 +185,20 @@ void ViterbiAnalysis( - const double max_score = - *std::max_element(current_scores->begin(), current_scores->end()); - if (max_score > 0) { -- for (double &score : *current_scores) score /= max_score; -+ for (double& score : *current_scores) -+ score /= max_score; - } - } - - // Swap current and previous score arrays, as we are advancing a step. -- std::vector<double> *tmp = previous_scores; -+ std::vector<double>* tmp = previous_scores; - previous_scores = current_scores; - current_scores = tmp; - - // Handle all steps save for the first and last in this loop. - for (int step = 1; step < num_steps; ++step) { -- const std::vector<int> &previous_bps = backpointers[step - 1]; -- std::vector<int> ¤t_bps = backpointers[step]; -+ const std::vector<int>& previous_bps = backpointers[step - 1]; -+ std::vector<int>& current_bps = backpointers[step]; - - for (int curr_state = 0; curr_state < num_states; ++curr_state) { - int best_source_state = kErrorState; -@@ -206,7 +218,7 @@ void ViterbiAnalysis( - !allowed_transitions(prev_state, curr_state)) { - if (vlog3) { - LOG(INFO) << "(" << batch << ", " << step << ", " << prev_state -- << "->" << curr_state << "): disallowed."; -+ << "->" << curr_state << "): disallowed."; - } - continue; - } -@@ -258,20 +270,21 @@ void ViterbiAnalysis( - const double max_score = - *std::max_element(current_scores->begin(), current_scores->end()); - if (max_score > 0) { -- for (double &score : *current_scores) score /= max_score; -+ for (double& score : *current_scores) -+ score /= max_score; - } - } - - // After each step, switch the current scores to the previous scores and - // use the previous previous scores as the current scores. -- std::vector<double> *tmp = previous_scores; -+ std::vector<double>* tmp = previous_scores; - previous_scores = current_scores; - current_scores = tmp; - } - - // Handle the final transition out of the sequence. - int final_state = out_of_bounds_index; -- const std::vector<int> &previous_bps = backpointers[num_steps - 1]; -+ const std::vector<int>& previous_bps = backpointers[num_steps - 1]; - int best_source_state = kErrorState; - float final_score = std::numeric_limits<float>::lowest(); - -@@ -354,11 +367,13 @@ void ViterbiAnalysis( - } - - void GreedyAnalysis( -- const ScoreAccessor &scores, -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- int batch, bool use_log_space, bool use_start_end_states, -- int32 *output_data) { -+ const ScoreAccessor& scores, -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ int batch, -+ bool use_log_space, -+ bool use_start_end_states, -+ int32* output_data) { - const bool has_transition_weights = transition_weights.size() != 0; - const bool has_allowed_transitions = allowed_transitions.size() != 0; - const int num_states = scores.num_scores(); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h -index e9821489ad2a3..6e473af728f77 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h -@@ -24,8 +24,8 @@ namespace text { - - class ScoreAccessor { - public: -- explicit ScoreAccessor(const Tensor &score_tensor, -- const Tensor &lengths_tensor); -+ explicit ScoreAccessor(const Tensor& score_tensor, -+ const Tensor& lengths_tensor); - - // Get a score out of the data tensor. - float GetScore(int batch_idx, int step_idx, int score_idx) const; -@@ -39,11 +39,11 @@ class ScoreAccessor { - - private: - // A pointer into the underlying data of the score tensor. Not owned. -- const float *data_; -+ const float* data_; - - // A pointer into the underlying data of the lengths tensor. Not owned. -- const int *lengths_; -- const int64 *long_lengths_; -+ const int* lengths_; -+ const int64* long_lengths_; - - // Whether the passed lengths tensor is int32 or int64. - bool use_long_lengths_; -@@ -72,19 +72,23 @@ class ScoreAccessor { - - // Perform Viterbi analysis on a single batch item. - void ViterbiAnalysis( -- const ScoreAccessor &scores, -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- const int batch, bool use_log_space, bool use_start_end_states, -- int32 *output_data); -+ const ScoreAccessor& scores, -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ const int batch, -+ bool use_log_space, -+ bool use_start_end_states, -+ int32* output_data); - - // Perform a greedy analysis on a single batch item. - void GreedyAnalysis( -- const ScoreAccessor &scores, -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- int batch, bool use_log_space, bool use_start_end_states, -- int32 *output_data); -+ const ScoreAccessor& scores, -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ int batch, -+ bool use_log_space, -+ bool use_start_end_states, -+ int32* output_data); - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc -index c2b2528bd6ebf..869a5c3e59371 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc -@@ -57,10 +57,10 @@ namespace { - - // Validate that a given constraint tensor is the proper shape (dimension - // 2, with shape [num_states + 1, num_states + 1]. --tensorflow::Status ValidateConstraintTensor(const Tensor &tensor, -+tensorflow::Status ValidateConstraintTensor(const Tensor& tensor, - const int num_states, - const bool use_start_end_states, -- const string &name) { -+ const string& name) { - if (tensor.shape().dims() != 2) { - return InvalidArgument( - tensorflow::strings::StrCat(name, " must be of rank 2")); -@@ -86,7 +86,7 @@ tensorflow::Status ValidateConstraintTensor(const Tensor &tensor, - template <typename Tin, typename Tsplits> - class ConstrainedSequence : public OpKernel { - public: -- explicit ConstrainedSequence(OpKernelConstruction *context) -+ explicit ConstrainedSequence(OpKernelConstruction* context) - : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("use_viterbi", &use_viterbi_)); - OP_REQUIRES_OK(context, context->GetAttr("use_log_space", &use_log_space_)); -@@ -94,13 +94,13 @@ class ConstrainedSequence : public OpKernel { - &use_start_end_states_)); - } - -- void Compute(OpKernelContext *context) override { -- const auto &score_tensor = context->input(0); -+ void Compute(OpKernelContext* context) override { -+ const auto& score_tensor = context->input(0); - OP_REQUIRES(context, - (score_tensor.shape().dims() == 2) || - (score_tensor.shape().dims() == 3), - InvalidArgument("The score tensor must be of rank 2 or 3.")); -- const auto &lengths_tensor = context->input(1); -+ const auto& lengths_tensor = context->input(1); - - ScoreAccessor scores(score_tensor, lengths_tensor); - -@@ -137,7 +137,7 @@ class ConstrainedSequence : public OpKernel { - "The scores tensor is too short for the longest sequence length.")); - - // Validate the constraint tensors. -- const auto &allowed_transitions_tensor = context->input(2); -+ const auto& allowed_transitions_tensor = context->input(2); - bool has_allowed_transitions = - allowed_transitions_tensor.NumElements() != 0; - VLOG(4) << allowed_transitions_tensor.NumElements(); -@@ -148,7 +148,7 @@ class ConstrainedSequence : public OpKernel { - "allowed_transitions")); - } - -- const auto &transition_weights_tensor = context->input(3); -+ const auto& transition_weights_tensor = context->input(3); - - VLOG(4) << transition_weights_tensor.NumElements(); - bool has_transition_weights = transition_weights_tensor.NumElements() != 0; -@@ -171,23 +171,23 @@ class ConstrainedSequence : public OpKernel { - const tensorflow::Tensor empty_float(DT_FLOAT, TensorShape({0, 0})); - const tensorflow::Tensor empty_bool(DT_BOOL, TensorShape({0, 0})); - -- const auto &transition_weights = -+ const auto& transition_weights = - has_transition_weights ? transition_weights_tensor.matrix<float>() - : empty_float.matrix<float>(); - -- const auto &allowed_transitions = -+ const auto& allowed_transitions = - has_allowed_transitions ? allowed_transitions_tensor.matrix<bool>() - : empty_bool.matrix<bool>(); - -- Tensor *output; -+ Tensor* output; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({total_length}), &output)); -- int32 *output_data = output->flat<int32>().data(); -+ int32* output_data = output->flat<int32>().data(); - -- Tensor *offsets; -+ Tensor* offsets; - OP_REQUIRES_OK(context, context->allocate_output( - 1, TensorShape({batch_size + 1}), &offsets)); -- Tsplits *offset_data = offsets->flat<Tsplits>().data(); -+ Tsplits* offset_data = offsets->flat<Tsplits>().data(); - offset_data[0] = 0; - - for (int batch = 0; batch < batch_size; ++batch) { -@@ -207,18 +207,22 @@ class ConstrainedSequence : public OpKernel { - private: - // Perform Viterbi analysis on a single batch item. - void DoViterbiAnalysis( -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- const int batch, const ScoreAccessor &scores, int32 *output_data) { -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ const int batch, -+ const ScoreAccessor& scores, -+ int32* output_data) { - ViterbiAnalysis(scores, transition_weights, allowed_transitions, batch, - use_log_space_, use_start_end_states_, output_data); - } - - // Perform a greedy analysis on a single batch item. - void DoGreedyAnalysis( -- const tensorflow::TTypes<const float>::Matrix &transition_weights, -- const tensorflow::TTypes<const bool>::Matrix &allowed_transitions, -- int batch, const ScoreAccessor &scores, int32 *output_data) { -+ const tensorflow::TTypes<const float>::Matrix& transition_weights, -+ const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, -+ int batch, -+ const ScoreAccessor& scores, -+ int32* output_data) { - GreedyAnalysis(scores, transition_weights, allowed_transitions, batch, - use_log_space_, use_start_end_states_, output_data); - } -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -index c1a1887da7467..87035a835ae5e 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -@@ -32,7 +32,8 @@ absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( - } - - absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( -- const std::vector<std::string>& keys, const std::vector<int>& values) { -+ const std::vector<std::string>& keys, -+ const std::vector<int>& values) { - if (keys.size() != values.size()) { - return absl::InvalidArgumentError(absl::StrCat( - "The sizes of 'keys' and 'values' must be equal! Keys size: ", -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h -index 171bd4f8d9b78..9b47debd0a2f4 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h -@@ -39,7 +39,8 @@ namespace trie_utils { - // addition, the empty string "" should not be in `keys`, because darts_clone - // does not support that. Furthermore, all `values` should be non-negative. - absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( -- const std::vector<std::string>& keys, const std::vector<int>& values); -+ const std::vector<std::string>& keys, -+ const std::vector<int>& values); - - // A variant where the values are indexes in the keys: i.e., the value for - // `keys[i]` is the index `i`. -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h -index a51bcec6d87e1..fce263372e8a4 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h -@@ -120,7 +120,8 @@ class DartsCloneTrieWrapper { - : trie_array_(trie_array) {} - - // The actual implementation of TryTraverseSeveralSteps. -- bool TryTraverseSeveralSteps(TraversalCursor& cursor, const char* ptr, -+ bool TryTraverseSeveralSteps(TraversalCursor& cursor, -+ const char* ptr, - int size) const { - uint32_t cur_id = cursor.node_id; - uint32_t cur_unit = cursor.unit; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h -index fcf5091434804..deff86c9f1f19 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h -@@ -92,19 +92,21 @@ template <class Index, bool kUseUnionByRank> - void DisjointSetForest<Index, kUseUnionByRank>::Init(Index size) { - size_ = size; - parents_.resize(size_); -- if (kUseUnionByRank) ranks_.resize(size_); -+ if (kUseUnionByRank) -+ ranks_.resize(size_); - - // Create singleton sets. - for (Index i = 0; i < size_; ++i) { - parents_[i] = i; -- if (kUseUnionByRank) ranks_[i] = 0; -+ if (kUseUnionByRank) -+ ranks_[i] = 0; - } - } - - template <class Index, bool kUseUnionByRank> - Index DisjointSetForest<Index, kUseUnionByRank>::FindRoot(Index element) { - DCHECK_LT(element, size()); -- Index *const __restrict parents = parents_.data(); -+ Index* const __restrict parents = parents_.data(); - - // Walk up to the root of the |element|. Unroll the first two comparisons - // because path compression ensures most FindRoot() calls end there. In -@@ -112,11 +114,13 @@ Index DisjointSetForest<Index, kUseUnionByRank>::FindRoot(Index element) { - // path compression updates can be skipped. - Index current = element; - Index parent = parents[current]; -- if (current == parent) return current; // |element| is a root -+ if (current == parent) -+ return current; // |element| is a root - current = parent; - parent = parents[current]; -- if (current == parent) return current; // |element| is the child of a root -- do { // otherwise, continue upwards until root -+ if (current == parent) -+ return current; // |element| is the child of a root -+ do { // otherwise, continue upwards until root - current = parent; - parent = parents[current]; - } while (current != parent); -@@ -147,12 +151,13 @@ void DisjointSetForest<Index, kUseUnionByRank>::UnionOfRoots(Index root1, - DCHECK_LT(root2, size()); - DCHECK_EQ(root1, parents_[root1]); - DCHECK_EQ(root2, parents_[root2]); -- if (root1 == root2) return; // already merged -- Index *const __restrict parents = parents_.data(); -+ if (root1 == root2) -+ return; // already merged -+ Index* const __restrict parents = parents_.data(); - - if (kUseUnionByRank) { - // Attach the lesser-rank root to the higher-rank root. -- Index *const __restrict ranks = ranks_.data(); -+ Index* const __restrict ranks = ranks_.data(); - const Index rank1 = ranks[root1]; - const Index rank2 = ranks[root2]; - if (rank2 < rank1) { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc -index a93e31d213f3f..2d3e09bd2fd85 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc -@@ -36,10 +36,10 @@ class DisjointSetForestTest : public ::testing::Test { - using Index = typename Forest::IndexType; - - // Expects that the |expected_sets| and |forest| match. -- void ExpectSets(const std::set<std::set<Index>> &expected_sets, -- Forest *forest) { -+ void ExpectSets(const std::set<std::set<Index>>& expected_sets, -+ Forest* forest) { - std::set<std::pair<Index, Index>> expected_pairs; -- for (const auto &expected_set : expected_sets) { -+ for (const auto& expected_set : expected_sets) { - for (auto it = expected_set.begin(); it != expected_set.end(); ++it) { - for (auto jt = expected_set.begin(); jt != expected_set.end(); ++jt) { - expected_pairs.emplace(*it, *jt); -@@ -61,11 +61,14 @@ class DisjointSetForestTest : public ::testing::Test { - } - }; - --using Forests = ::testing::Types< -- DisjointSetForest<uint8, false>, DisjointSetForest<uint8, true>, -- DisjointSetForest<uint16, false>, DisjointSetForest<uint16, true>, -- DisjointSetForest<uint32, false>, DisjointSetForest<uint32, true>, -- DisjointSetForest<uint64, false>, DisjointSetForest<uint64, true>>; -+using Forests = ::testing::Types<DisjointSetForest<uint8, false>, -+ DisjointSetForest<uint8, true>, -+ DisjointSetForest<uint16, false>, -+ DisjointSetForest<uint16, true>, -+ DisjointSetForest<uint32, false>, -+ DisjointSetForest<uint32, true>, -+ DisjointSetForest<uint64, false>, -+ DisjointSetForest<uint64, true>>; - TYPED_TEST_SUITE(DisjointSetForestTest, Forests); - - TYPED_TEST(DisjointSetForestTest, DefaultEmpty) { -@@ -114,7 +117,7 @@ class DisjointSetForestNoUnionByRankTest : public ::testing::Test { - using Forest = DisjointSetForest<uint32, false>; - - // Expects that the roots of the |forest| match |expected_roots|. -- void ExpectRoots(const std::vector<uint32> &expected_roots, Forest *forest) { -+ void ExpectRoots(const std::vector<uint32>& expected_roots, Forest* forest) { - ASSERT_EQ(expected_roots.size(), forest->size()); - for (uint32 i = 0; i < forest->size(); ++i) { - EXPECT_EQ(expected_roots[i], forest->FindRoot(i)); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto -index 62d622b7a7c2d..08f62778c4f46 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto -@@ -12,4 +12,3 @@ message EditChanges { - - repeated Change change = 1; - } -- -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc -index c1f0e4ea48c90..9dede81af00da 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc -@@ -155,7 +155,8 @@ absl::StatusOr<std::string> FastWordpieceTokenizer::Detokenize( - } - - int FastWordpieceTokenizer::SkipTheRemainingOfWordAndTrailingWhiteSpaces( -- absl::string_view input, int& cur_pos) const { -+ absl::string_view input, -+ int& cur_pos) const { - const int input_size = input.size(); - UChar32 cur_unicode_char; - int next_pos; -@@ -182,8 +183,10 @@ int FastWordpieceTokenizer::SkipTheRemainingOfWordAndTrailingWhiteSpaces( - - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void FastWordpieceTokenizer::TokenizeTextImpl( -- absl::string_view input_text, std::vector<std::string>* output_pieces, -- std::vector<int>* output_ids, std::vector<int>* output_start_offsets, -+ absl::string_view input_text, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, -+ std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - static_assert(kGetPieces || kGetIds, - "At least one of `kGetPieces` and `kGetIds` should be true."); -@@ -266,7 +269,8 @@ void FastWordpieceTokenizer::TokenizeTextImpl( - cur_offset_in_input_word, output_pieces, output_ids, - output_start_offsets, output_end_offsets); - // Skip the whitespace. -- if (is_white_space) cur_pos = next_pos; -+ if (is_white_space) -+ cur_pos = next_pos; - // Continue in the outer while loop to process the remaining input. - continue; - } -@@ -336,8 +340,10 @@ void FastWordpieceTokenizer::TokenizeTextImpl( - // immediately identifies the next matching token as "##efz". - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void FastWordpieceTokenizer::TokenizeSingleWordImpl( -- absl::string_view input_word, int input_word_offset_in_text, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ absl::string_view input_word, -+ int input_word_offset_in_text, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - static_assert(kGetPieces || kGetIds, -@@ -480,10 +486,12 @@ void FastWordpieceTokenizer::TokenizeSingleWordImpl( - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - ABSL_ATTRIBUTE_ALWAYS_INLINE bool - FastWordpieceTokenizer::TryFollowFailureLinkAndCollectTokens( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - int& cur_offset_in_input_word, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& node, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - int cur_node_data; -@@ -531,9 +539,12 @@ FastWordpieceTokenizer::TryFollowFailureLinkAndCollectTokens( - - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void FastWordpieceTokenizer::AppendTokenToOutput( -- absl::string_view input_word, int input_word_offset_in_text, -- int& cur_offset_in_input_word, int encoded_token_value, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ absl::string_view input_word, -+ int input_word_offset_in_text, -+ int& cur_offset_in_input_word, -+ int encoded_token_value, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - auto token_id = -@@ -584,10 +595,13 @@ void FastWordpieceTokenizer::AppendTokenToOutput( - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - ABSL_ATTRIBUTE_ALWAYS_INLINE void - FastWordpieceTokenizer::HandleTheRemainingStringOnTriePath( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, -- int& original_num_tokens, int& cur_offset_in_input_word, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int& original_num_tokens, -+ int& cur_offset_in_input_word, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - if (cur_node.node_id == trie_utils::DartsCloneTrieWrapper::kRootNodeId) { -@@ -642,8 +656,11 @@ FastWordpieceTokenizer::HandleTheRemainingStringOnTriePath( - - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void FastWordpieceTokenizer::ResetOutputAppendUnknownToken( -- int input_word_offset_in_text, int input_size, int& original_num_tokens, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int input_word_offset_in_text, -+ int input_size, -+ int& original_num_tokens, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - if constexpr (kGetPieces) { -@@ -669,10 +686,13 @@ void FastWordpieceTokenizer::ResetOutputAppendUnknownToken( - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - ABSL_ATTRIBUTE_ALWAYS_INLINE bool - FastWordpieceTokenizer::TryHandleTheInputWordBeingSuffixIndicatorItself( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - const trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, -- int& cur_offset_in_input_word, int original_num_tokens, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int& cur_offset_in_input_word, -+ int original_num_tokens, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - // Handle the special case where the input word is the suffix indicator (e.g., -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h -index c998f9567488e..4ab48f5537f0d 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h -@@ -81,13 +81,15 @@ class FastWordpieceTokenizer { - int input_word_offset_in_text = 0) const; - - // An override not returning `output_pieces`. -- void Tokenize(absl::string_view input, std::vector<int>* output_ids, -+ void Tokenize(absl::string_view input, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets, - int input_word_offset_in_text = 0) const; - - // An override only returning `output_ids`. -- void Tokenize(absl::string_view input, std::vector<int>* output_ids, -+ void Tokenize(absl::string_view input, -+ std::vector<int>* output_ids, - int input_word_offset_in_text = 0) const; - - // Detokenizes wordpiece ids into a vector of tokens. -@@ -151,10 +153,12 @@ class FastWordpieceTokenizer { - // after the new word piece tokens have been appended to the output. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - bool TryFollowFailureLinkAndCollectTokens( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - int& cur_offset_in_input_word, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& node, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - -@@ -200,10 +204,13 @@ class FastWordpieceTokenizer { - // outputs and appends unk_token at the end as expected. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void HandleTheRemainingStringOnTriePath( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, -- int& original_num_tokens, int& cur_offset_in_input_word, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int& original_num_tokens, -+ int& cur_offset_in_input_word, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - -@@ -222,8 +229,11 @@ class FastWordpieceTokenizer { - // after this method. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void ResetOutputAppendUnknownToken( -- int input_word_offset_in_text, int input_size, int& original_num_tokens, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int input_word_offset_in_text, -+ int input_size, -+ int& original_num_tokens, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - -@@ -232,10 +242,13 @@ class FastWordpieceTokenizer { - // output_ids, and returns true. Otherwise, it does nothing and returns false. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - bool TryHandleTheInputWordBeingSuffixIndicatorItself( -- absl::string_view input_word, int input_word_offset_in_text, -+ absl::string_view input_word, -+ int input_word_offset_in_text, - const trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, -- int& cur_offset_in_input_word, int original_num_tokens, -- std::vector<std::string>* output_pieces, std::vector<int>* output_ids, -+ int& cur_offset_in_input_word, -+ int original_num_tokens, -+ std::vector<std::string>* output_pieces, -+ std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h -index 446edf835853e..d71b9bc98e466 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h -@@ -383,12 +383,12 @@ absl::Status FastWordpieceDetokenizeOp<Rt>::ShapeInference( - return absl::OkStatus(); - } - -- template <tflite::shim::Runtime Rt> -- const char FastWordpieceDetokenizeOp<Rt>::kOpName[] = -- "TFText>FastWordpieceDetokenize"; -+template <tflite::shim::Runtime Rt> -+const char FastWordpieceDetokenizeOp<Rt>::kOpName[] = -+ "TFText>FastWordpieceDetokenize"; - -- template <tflite::shim::Runtime Rt> -- const char FastWordpieceDetokenizeOp<Rt>::kDoc[] = R"doc( -+template <tflite::shim::Runtime Rt> -+const char FastWordpieceDetokenizeOp<Rt>::kDoc[] = R"doc( - Detokenizes sub-word ids into sentences. - - ### Example: -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc -index a7a939e8f528b..0f4d213547f83 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc -@@ -31,12 +31,12 @@ - #include "icu4c/source/common/unicode/umachine.h" - #include "icu4c/source/common/unicode/utf8.h" - #include "tensorflow/lite/kernels/shim/status_macros.h" --#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" --#include "tensorflow_text/core/kernels/wordpiece_tokenizer.h" - #include "tensorflow_text/core/kernels/darts_clone_trie_builder.h" - #include "tensorflow_text/core/kernels/darts_clone_trie_wrapper.h" - #include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_generated.h" - #include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h" -+#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" -+#include "tensorflow_text/core/kernels/wordpiece_tokenizer.h" - - namespace tensorflow { - namespace text { -@@ -49,7 +49,8 @@ static constexpr char kInvalidControlChar = 0x11; - // A wrapper of vocab tokens that will be used to build the trie. - class TrieVocabToken { - public: -- TrieVocabToken(absl::string_view token, int token_id, -+ TrieVocabToken(absl::string_view token, -+ int token_id, - absl::string_view suffix_indicator) - : token_(std::string(token)), token_id_(token_id) { - if (!suffix_indicator.empty() && token_ != suffix_indicator && -@@ -293,9 +294,12 @@ class FastWordpieceBuilder { - }; - - absl::Status FastWordpieceBuilder::BuildModel( -- const std::vector<std::string>& vocab, int max_bytes_per_token, -- absl::string_view suffix_indicator, absl::string_view unk_token, -- bool no_pretokenization, bool support_detokenization) { -+ const std::vector<std::string>& vocab, -+ int max_bytes_per_token, -+ absl::string_view suffix_indicator, -+ absl::string_view unk_token, -+ bool no_pretokenization, -+ bool support_detokenization) { - unk_token_ = std::string(unk_token); - suffix_indicator_ = std::string(suffix_indicator); - max_bytes_per_token_ = max_bytes_per_token; -@@ -781,7 +785,8 @@ absl::Status FastWordpieceBuilder::BuildFailureStructure( - } - - absl::Status FastWordpieceBuilder::AssignFailureLinkAndPops( -- uint32_t cur_node, uint32_t failure_link, -+ uint32_t cur_node, -+ uint32_t failure_link, - const std::vector<int>& one_step_pops, - int parent_failure_pops_offset_length) { - if (failure_link == fast_wordpiece_tokenizer_utils::kNullNode) { -@@ -833,7 +838,8 @@ absl::Status FastWordpieceBuilder::AssignFailureLinkAndPops( - } - - void FastWordpieceBuilder::GetFailurePopsAndAppendToOut( -- uint32_t failure_pops_offset_length, std::vector<int>& out_failure_pops) { -+ uint32_t failure_pops_offset_length, -+ std::vector<int>& out_failure_pops) { - if (failure_pops_offset_length == - fast_wordpiece_tokenizer_utils::kNullFailurePopsList) { - return; -@@ -950,14 +956,16 @@ absl::StatusOr<std::string> FastWordpieceBuilder::ExportToFlatBuffer() const { - } // namespace - - absl::StatusOr<std::string> BuildModelAndExportToFlatBuffer( -- const std::vector<std::string>& vocab, int max_bytes_per_token, -- absl::string_view suffix_indicator, absl::string_view unk_token, -- bool no_pretokenization, bool support_detokenization) { -+ const std::vector<std::string>& vocab, -+ int max_bytes_per_token, -+ absl::string_view suffix_indicator, -+ absl::string_view unk_token, -+ bool no_pretokenization, -+ bool support_detokenization) { - FastWordpieceBuilder builder; -- SH_RETURN_IF_ERROR(builder.BuildModel(vocab, max_bytes_per_token, -- suffix_indicator, unk_token, -- no_pretokenization, -- support_detokenization)); -+ SH_RETURN_IF_ERROR(builder.BuildModel( -+ vocab, max_bytes_per_token, suffix_indicator, unk_token, -+ no_pretokenization, support_detokenization)); - SH_ASSIGN_OR_RETURN(std::string flatbuffer, builder.ExportToFlatBuffer()); - return flatbuffer; - } -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h -index 808dfb7bf92ba..769e66c1460f3 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h -@@ -44,9 +44,12 @@ namespace text { - // Returns: - // The bytes of the flatbuffer that stores the model. - absl::StatusOr<std::string> BuildModelAndExportToFlatBuffer( -- const std::vector<std::string>& vocab, int max_bytes_per_token, -- absl::string_view suffix_indicator, absl::string_view unk_token, -- bool no_pretokenization = false, bool support_detokenization = false); -+ const std::vector<std::string>& vocab, -+ int max_bytes_per_token, -+ absl::string_view suffix_indicator, -+ absl::string_view unk_token, -+ bool no_pretokenization = false, -+ bool support_detokenization = false); - } // namespace text - } // namespace tensorflow - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc -index 5198348775315..8aa05e86531ad 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc -@@ -1228,7 +1228,8 @@ TEST_P(TestTokenizeSingleWord, TestNoOutputPiecesWithPositiveSentenceOffsets) { - } - - INSTANTIATE_TEST_SUITE_P( -- FastWordpieceTokenizerParameterizedTest, TestTokenizeSingleWord, -+ FastWordpieceTokenizerParameterizedTest, -+ TestTokenizeSingleWord, - testing::ValuesIn(GetTestSpecsForTokenizeSingleWord())); - - // Test End-to-end FastWordPieceTokenization for tokenizing general texts. -@@ -2479,7 +2480,8 @@ TEST_P(TestTokenizeDetokenize, Test) { - } - - INSTANTIATE_TEST_SUITE_P( -- FastWordpieceTokenizerDetokenizeParameterizedTest, TestTokenizeDetokenize, -+ FastWordpieceTokenizerDetokenizeParameterizedTest, -+ TestTokenizeDetokenize, - testing::ValuesIn(GetTestSpecsForTokenizeDetokenize())); - - } // namespace -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h -index 9f0122c85a3e0..ba2cf6ab40cf6 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h -@@ -133,7 +133,8 @@ static constexpr uint32_t kMaskToEncodeVocabTokenId = - // Encodes a token into the encoded value. `token_length` is without the suffix - // indicator. The result is always a non-negative integer. Only used in building - // the model (in flatbuffer), not in doing WordPiece tokenization. --inline absl::StatusOr<int> EncodeToken(int token_id, int token_length, -+inline absl::StatusOr<int> EncodeToken(int token_id, -+ int token_length, - bool is_suffix_token) { - const int encoded_value = (is_suffix_token << kBitToIndicateSuffixToken) | - (token_id << kBitsToEncodeVocabTokenLength) | -@@ -227,7 +228,8 @@ inline uint32_t EncodeFailurePopList(int offset, int length) { - // Decodes the offset (in the failure pop pool) and the length of a failure pop - // list from the compact representation (an integer). - inline void GetFailurePopsOffsetAndLength(uint32_t offset_and_length, -- int& out_offset, int& out_length) { -+ int& out_offset, -+ int& out_length) { - out_offset = offset_and_length >> kBitsToEncodeFailurePopsListSize; - out_length = (offset_and_length & kMaskToEncodeFailurePopsListSize) + 1; - } -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc -index cc160dbf8eaa0..931438e6d7ab1 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc -@@ -90,7 +90,8 @@ TEST_P(TokenEncodingDecodingTest, GeneralTest) { - EXPECT_THAT(IsSuffixToken(encoded_value), spec.is_suffix_token); - } - --INSTANTIATE_TEST_SUITE_P(TestTokenEncodingDecoding, TokenEncodingDecodingTest, -+INSTANTIATE_TEST_SUITE_P(TestTokenEncodingDecoding, -+ TokenEncodingDecodingTest, - testing::ValuesIn(GetTokenSpecs())); - - struct FailurePopListSpec { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc -index f789c094cf383..b7db6069ebeca 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc -@@ -37,7 +37,6 @@ using tensorflow::TensorShape; - using tensorflow::text_kernels_test_util::MatrixEq; - using tensorflow::text_kernels_test_util::VectorEq; - -- - // TODO(b/122968457): There are a bunch of tests that only validate !ok instead - // of looking for specific error messages; fix that. - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc -index dc0adedecc8f4..d6b3a91dd4067 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc -@@ -33,14 +33,14 @@ namespace text { - template <class Index, class Score> - class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - public: -- explicit MaxSpanningTreeOpKernel(tensorflow::OpKernelConstruction *context) -+ explicit MaxSpanningTreeOpKernel(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("forest", &forest_)); - } - -- void Compute(tensorflow::OpKernelContext *context) override { -- const tensorflow::Tensor &num_nodes_tensor = context->input(0); -- const tensorflow::Tensor &scores_tensor = context->input(1); -+ void Compute(tensorflow::OpKernelContext* context) override { -+ const tensorflow::Tensor& num_nodes_tensor = context->input(0); -+ const tensorflow::Tensor& scores_tensor = context->input(1); - - // Check ranks. - OP_REQUIRES(context, num_nodes_tensor.dims() == 1, -@@ -73,8 +73,8 @@ class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - " but expected ", shape_bxmxm.DebugString())); - - // Create outputs. -- tensorflow::Tensor *max_scores_tensor = nullptr; -- tensorflow::Tensor *argmax_sources_tensor = nullptr; -+ tensorflow::Tensor* max_scores_tensor = nullptr; -+ tensorflow::Tensor* argmax_sources_tensor = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, shape_b, &max_scores_tensor)); - OP_REQUIRES_OK(context, context->allocate_output(1, shape_bxm, -@@ -97,7 +97,7 @@ class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - max_scores_b, argmax_sources_bxm); - } - }); -- for (const tensorflow::Status &status : statuses) { -+ for (const tensorflow::Status& status : statuses) { - OP_REQUIRES_OK(context, status); - } - } -@@ -112,7 +112,8 @@ class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - // at index |problem| in |num_nodes_b| and |scores_bxmxm|. On success, sets - // the values at index |problem| in |max_scores_b| and |argmax_sources_bxm|. - // On error, returns non-OK. -- tensorflow::Status RunSolver(int problem, BatchedSizes num_nodes_b, -+ tensorflow::Status RunSolver(int problem, -+ BatchedSizes num_nodes_b, - BatchedScores scores_bxmxm, - BatchedMaxima max_scores_b, - BatchedSources argmax_sources_bxm) const { -@@ -140,7 +141,8 @@ class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - for (Index target = 0; target < num_nodes_index; ++target) { - for (Index source = 0; source < num_nodes_index; ++source) { - const Score score = scores_bxmxm(problem, target, source); -- if (!std::isfinite(static_cast<double>(score))) continue; -+ if (!std::isfinite(static_cast<double>(score))) -+ continue; - if (source == target) { // root - solver.AddRoot(target, score); - } else { // arc -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h -index b1ced8eab69c9..7d73435f9fa14 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h -@@ -187,7 +187,8 @@ class MstSolver { - - // Returns a string representation of this arc. - std::string DebugString() const { -- if (!Exists()) return "[null]"; -+ if (!Exists()) -+ return "[null]"; - if (IsRoot()) { - return absl::StrCat("[*->", target, "=", score, "]"); - } -@@ -213,7 +214,7 @@ class MstSolver { - void MaybePenalizeRootScoresForTree(); - - // Returns the maximum inbound arc of the |node|, or null if there is none. -- const Arc *MaximumInboundArc(Index node) const; -+ const Arc* MaximumInboundArc(Index node) const; - - // Merges the inbound arcs of the |cycle_node| into the inbound arcs of the - // |contracted_node|. Arcs are merged as follows: -@@ -225,7 +226,8 @@ class MstSolver { - // |contracted_node| has the better-scoring arc. - // The |score_offset| is added to the arc scores of the |cycle_node| before - // they are merged into the |contracted_node|. -- void MergeInboundArcs(Index cycle_node, Score score_offset, -+ void MergeInboundArcs(Index cycle_node, -+ Score score_offset, - Index contracted_node); - - // Contracts the cycle in |argmax_arcs_| that contains the |node|. -@@ -293,11 +295,11 @@ class MstSolver { - - // The maximum inbound arc for each node. The first element is null because - // the artificial root has no inbound arcs. -- std::vector<const Arc *> argmax_arcs_; -+ std::vector<const Arc*> argmax_arcs_; - - // Workspace for ContractCycle(), which records the nodes and arcs in the - // cycle being contracted. -- std::vector<std::pair<Index, const Arc *>> cycle_; -+ std::vector<std::pair<Index, const Arc*>> cycle_; - }; - - // Implementation details below. -@@ -344,7 +346,7 @@ template <class Index, class Score> - void MstSolver<Index, Score>::AddArc(Index source, Index target, Score score) { - DCHECK_NE(source, target); - DCHECK(std::isfinite(score)); -- Arc &arc = arcs_[ArcIndex(source + 1, target + 1)]; -+ Arc& arc = arcs_[ArcIndex(source + 1, target + 1)]; - arc.score = score; - arc.source = source + 1; - arc.target = target + 1; -@@ -353,7 +355,7 @@ void MstSolver<Index, Score>::AddArc(Index source, Index target, Score score) { - template <class Index, class Score> - void MstSolver<Index, Score>::AddRoot(Index root, Score score) { - DCHECK(std::isfinite(score)); -- Arc &arc = arcs_[ArcIndex(0, root + 1)]; -+ Arc& arc = arcs_[ArcIndex(0, root + 1)]; - arc.score = score; - arc.source = 0; - arc.target = root + 1; -@@ -361,14 +363,14 @@ void MstSolver<Index, Score>::AddRoot(Index root, Score score) { - - template <class Index, class Score> - Score MstSolver<Index, Score>::ArcScore(Index source, Index target) const { -- const Arc &arc = arcs_[ArcIndex(source + 1, target + 1)]; -+ const Arc& arc = arcs_[ArcIndex(source + 1, target + 1)]; - DCHECK(arc.Exists()); - return arc.score; - } - - template <class Index, class Score> - Score MstSolver<Index, Score>::RootScore(Index root) const { -- const Arc &arc = arcs_[ArcIndex(0, root + 1)]; -+ const Arc& arc = arcs_[ArcIndex(0, root + 1)]; - DCHECK(arc.Exists()); - return arc.score; - } -@@ -391,7 +393,8 @@ inline size_t MstSolver<Index, Score>::ArcIndex(size_t source, - - template <class Index, class Score> - void MstSolver<Index, Score>::MaybePenalizeRootScoresForTree() { -- if (forest_) return; -+ if (forest_) -+ return; - DCHECK_EQ(num_current_nodes_, num_initial_nodes_) - << "Root penalties must be applied before starting the algorithm."; - -@@ -399,36 +402,40 @@ void MstSolver<Index, Score>::MaybePenalizeRootScoresForTree() { - // of possible tree scores. - Score max_score = std::numeric_limits<Score>::lowest(); - Score min_score = std::numeric_limits<Score>::max(); -- for (const Arc &arc : arcs_) { -- if (!arc.Exists()) continue; -+ for (const Arc& arc : arcs_) { -+ if (!arc.Exists()) -+ continue; - max_score = std::max(max_score, arc.score); - min_score = std::min(min_score, arc.score); - } - - // Nothing to do, no existing arcs. -- if (max_score < min_score) return; -+ if (max_score < min_score) -+ return; - - // A spanning tree or forest contains n arcs. The penalty below ensures that - // every structure with one root has a higher score than every structure with - // two roots, and so on. - const Score root_penalty = 1 + num_initial_nodes_ * (max_score - min_score); - for (Index root = 1; root < num_initial_nodes_; ++root) { -- Arc &arc = arcs_[ArcIndex(0, root)]; -- if (!arc.Exists()) continue; -+ Arc& arc = arcs_[ArcIndex(0, root)]; -+ if (!arc.Exists()) -+ continue; - arc.score -= root_penalty; - } - } - - template <class Index, class Score> --const typename MstSolver<Index, Score>::Arc * -+const typename MstSolver<Index, Score>::Arc* - MstSolver<Index, Score>::MaximumInboundArc(Index node) const { -- const Arc *__restrict arc = &arcs_[ArcIndex(0, node)]; -- const Arc *arc_end = arc + num_initial_nodes_; -+ const Arc* __restrict arc = &arcs_[ArcIndex(0, node)]; -+ const Arc* arc_end = arc + num_initial_nodes_; - - Score max_score = std::numeric_limits<Score>::lowest(); -- const Arc *argmax_arc = nullptr; -+ const Arc* argmax_arc = nullptr; - for (; arc < arc_end; ++arc) { -- if (!arc->Exists()) continue; -+ if (!arc->Exists()) -+ continue; - const Score score = arc->score; - if (max_score <= score) { - max_score = score; -@@ -442,12 +449,13 @@ template <class Index, class Score> - void MstSolver<Index, Score>::MergeInboundArcs(Index cycle_node, - Score score_offset, - Index contracted_node) { -- const Arc *__restrict cycle_arc = &arcs_[ArcIndex(0, cycle_node)]; -- const Arc *cycle_arc_end = cycle_arc + num_initial_nodes_; -- Arc *__restrict contracted_arc = &arcs_[ArcIndex(0, contracted_node)]; -+ const Arc* __restrict cycle_arc = &arcs_[ArcIndex(0, cycle_node)]; -+ const Arc* cycle_arc_end = cycle_arc + num_initial_nodes_; -+ Arc* __restrict contracted_arc = &arcs_[ArcIndex(0, contracted_node)]; - - for (; cycle_arc < cycle_arc_end; ++cycle_arc, ++contracted_arc) { -- if (!cycle_arc->Exists()) continue; // nothing to merge -+ if (!cycle_arc->Exists()) -+ continue; // nothing to merge - - // Skip self-loops; they are useless because they cannot be used to break - // the cycle represented by the |contracted_node|. -@@ -480,7 +488,7 @@ void MstSolver<Index, Score>::ContractCycle(Index node) { - Index cycle_node = node; - do { - // Gather the nodes and arcs in |cycle_| for the second pass. -- const Arc *cycle_arc = argmax_arcs_[cycle_node]; -+ const Arc* cycle_arc = argmax_arcs_[cycle_node]; - DCHECK(!cycle_arc->IsRoot()) << cycle_arc->DebugString(); - cycle_.emplace_back(cycle_node, cycle_arc); - -@@ -500,7 +508,7 @@ void MstSolver<Index, Score>::ContractCycle(Index node) { - } while (cycle_node != contracted_node); - - // Merge the inbound arcs of each cycle node into the |contracted_node|. -- for (const auto &node_and_arc : cycle_) { -+ for (const auto& node_and_arc : cycle_) { - // Set the |score_offset| to the cost of breaking the cycle by replacing the - // arc currently directed into the |cycle_node|. - const Index cycle_node = node_and_arc.first; -@@ -514,7 +522,7 @@ tensorflow::Status MstSolver<Index, Score>::ContractionPhase() { - // Skip the artificial root since it has no inbound arcs. - for (Index target = 1; target < num_current_nodes_; ++target) { - // Find the maximum inbound arc for the current |target|, if any. -- const Arc *arc = MaximumInboundArc(target); -+ const Arc* arc = MaximumInboundArc(target); - if (arc == nullptr) { - return tensorflow::errors::FailedPrecondition("Infeasible digraph"); - } -@@ -522,7 +530,8 @@ tensorflow::Status MstSolver<Index, Score>::ContractionPhase() { - - // The articifial root cannot be part of a cycle, so we do not need to check - // for cycles or even update its membership in the connected components. -- if (arc->IsRoot()) continue; -+ if (arc->IsRoot()) -+ continue; - - // Since every node has at most one selected inbound arc, cycles can be - // detected using weakly-connected components. -@@ -555,11 +564,12 @@ tensorflow::Status MstSolver<Index, Score>::ExpansionPhase( - // this loop, entries [1,n] of |argmax_arcs_| provide the arcs of the maximum - // spanning tree. - for (Index i = num_current_nodes_ - 1; i >= num_initial_nodes_; --i) { -- if (contracted_into_[i] == kNullIndex) continue; // already deleted -+ if (contracted_into_[i] == kNullIndex) -+ continue; // already deleted - const Index root = i; // if not deleted, must be a root due to toposorting - - // Copy the cycle-breaking arc to its specified target. -- const Arc *arc = argmax_arcs_[root]; -+ const Arc* arc = argmax_arcs_[root]; - argmax_arcs_[arc->target] = arc; - - // The |arc| not only breaks the cycle associated with the |root|, but also -@@ -577,7 +587,7 @@ tensorflow::Status MstSolver<Index, Score>::ExpansionPhase( - // for validation below. - Index num_roots = 0; - for (Index target = 0; target < num_original_nodes_; ++target) { -- const Arc &arc = *argmax_arcs_[target + 1]; -+ const Arc& arc = *argmax_arcs_[target + 1]; - DCHECK_EQ(arc.target, target + 1) << arc.DebugString(); - if (arc.IsRoot()) { - ++num_roots; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc -index d345ce201b3a8..69e48c25ab245 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc -@@ -25,7 +25,9 @@ - #include "tensorflow_text/core/kernels/mst_solver.h" - #include "tensorflow_text/core/kernels/spanning_tree_iterator.h" - --ABSL_FLAG(int64, seed, 0, -+ABSL_FLAG(int64, -+ seed, -+ 0, - "Seed for random comparison tests, or 0 for a weak random seed."); - ABSL_FLAG(int, num_trials, 3, "Number of trials for random comparison tests."); - -@@ -35,10 +37,14 @@ namespace text { - using ::testing::Contains; - - // Returns the random seed, or 0 for a weak random seed. --int64 GetSeed() { return absl::GetFlag(FLAGS_seed); } -+int64 GetSeed() { -+ return absl::GetFlag(FLAGS_seed); -+} - - // Returns the number of trials to run for each random comparison. --int64 GetNumTrials() { return absl::GetFlag(FLAGS_num_trials); } -+int64 GetNumTrials() { -+ return absl::GetFlag(FLAGS_num_trials); -+} - - // Testing rig. Runs a comparison between a brute-force MST solver and the - // MstSolver<> on random digraphs. When the first test parameter is true, -@@ -64,7 +70,7 @@ class MstSolverRandomComparisonTest - uint32 num_nodes() const { return ::testing::get<1>(GetParam()); } - - // Returns the score of the arcs in |sources| based on the |scores|. -- int32 ScoreArcs(const ScoreMatrix &scores, const SourceList &sources) const { -+ int32 ScoreArcs(const ScoreMatrix& scores, const SourceList& sources) const { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - int32 score = 0; - for (uint32 target = 0; target < num_nodes(); ++target) { -@@ -77,13 +83,13 @@ class MstSolverRandomComparisonTest - // Returns the score of the maximum spanning tree (or forest, if the first - // test parameter is true) of the dense digraph defined by the |scores|, and - // sets |argmax_trees| to contain all maximal trees. -- int32 RunBruteForceMstSolver(const ScoreMatrix &scores, -- std::set<SourceList> *argmax_trees) { -+ int32 RunBruteForceMstSolver(const ScoreMatrix& scores, -+ std::set<SourceList>* argmax_trees) { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - int32 max_score; - argmax_trees->clear(); - -- iterator_.ForEachTree(num_nodes(), [&](const SourceList &sources) { -+ iterator_.ForEachTree(num_nodes(), [&](const SourceList& sources) { - const int32 score = ScoreArcs(scores, sources); - if (argmax_trees->empty() || max_score < score) { - max_score = score; -@@ -98,7 +104,7 @@ class MstSolverRandomComparisonTest - } - - // As above, but uses the |solver_| and extracts only one |argmax_tree|. -- int32 RunMstSolver(const ScoreMatrix &scores, SourceList *argmax_tree) { -+ int32 RunMstSolver(const ScoreMatrix& scores, SourceList* argmax_tree) { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - TF_CHECK_OK(solver_.Init(forest(), num_nodes())); - -@@ -123,7 +129,8 @@ class MstSolverRandomComparisonTest - // Returns a random ScoreMatrix spanning num_nodes() nodes. - ScoreMatrix RandomScores() { - ScoreMatrix scores(num_nodes() * num_nodes()); -- for (int32 &value : scores) value = static_cast<int32>(prng_() % 201) - 100; -+ for (int32& value : scores) -+ value = static_cast<int32>(prng_() % 201) - 100; - return scores; - } - -@@ -133,7 +140,8 @@ class MstSolverRandomComparisonTest - // Seed the PRNG, possibly non-deterministically. Log the seed value so the - // test results can be reproduced, even when the seed is non-deterministic. - uint32 seed = GetSeed(); -- if (seed == 0) seed = time(nullptr); -+ if (seed == 0) -+ seed = time(nullptr); - prng_.seed(seed); - LOG(INFO) << "seed = " << seed; - -@@ -166,11 +174,14 @@ class MstSolverRandomComparisonTest - std::mt19937 prng_; - }; - --INSTANTIATE_TEST_SUITE_P(AllowForest, MstSolverRandomComparisonTest, -+INSTANTIATE_TEST_SUITE_P(AllowForest, -+ MstSolverRandomComparisonTest, - ::testing::Combine(::testing::Bool(), - ::testing::Range<uint32>(1, 9))); - --TEST_P(MstSolverRandomComparisonTest, Comparison) { RunComparison(); } -+TEST_P(MstSolverRandomComparisonTest, Comparison) { -+ RunComparison(); -+} - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc -index 6d67b08081359..ef5327fe535e7 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc -@@ -40,7 +40,8 @@ class MstSolverTest : public ::testing::Test { - void AddAllArcs(Index num_nodes, Score score) { - for (Index source = 0; source < num_nodes; ++source) { - for (Index target = 0; target < num_nodes; ++target) { -- if (source == target) continue; -+ if (source == target) -+ continue; - solver_.AddArc(source, target, score); - } - } -@@ -57,7 +58,7 @@ class MstSolverTest : public ::testing::Test { - // Runs the |solver_| using an argmax array of size |argmax_array_size| and - // expects it to fail with an error message that matches |error_substr|. - void SolveAndExpectError(int argmax_array_size, -- const std::string &error_message_substr) { -+ const std::string& error_message_substr) { - std::vector<Index> argmax(argmax_array_size); - EXPECT_TRUE(absl::StrContains(solver_.Solve(&argmax).error_message(), - error_message_substr)); -@@ -72,7 +73,7 @@ class MstSolverTest : public ::testing::Test { - - // As above, but expects the solution to be |expected_argmax| and infers the - // argmax array size. -- void SolveAndExpectArgmax(const std::vector<Index> &expected_argmax) { -+ void SolveAndExpectArgmax(const std::vector<Index>& expected_argmax) { - std::vector<Index> actual_argmax(expected_argmax.size()); - TF_ASSERT_OK(solver_.Solve(&actual_argmax)); - EXPECT_EQ(expected_argmax, actual_argmax); -@@ -83,10 +84,11 @@ class MstSolverTest : public ::testing::Test { - Solver solver_; - }; - --using Solvers = -- ::testing::Types<MstSolver<uint8, int16>, MstSolver<uint16, int32>, -- MstSolver<uint32, int64>, MstSolver<uint16, float>, -- MstSolver<uint32, double>>; -+using Solvers = ::testing::Types<MstSolver<uint8, int16>, -+ MstSolver<uint16, int32>, -+ MstSolver<uint32, int64>, -+ MstSolver<uint16, float>, -+ MstSolver<uint32, double>>; - TYPED_TEST_SUITE(MstSolverTest, Solvers); - - TYPED_TEST(MstSolverTest, FailIfNoNodes) { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h -index 790ebce2a67c6..c6f45988924fb 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h -@@ -191,7 +191,8 @@ class NGramsStrJoin : public tflite::shim::OpKernelShim<NGramsStrJoin, Rt> { - std::vector<tensorflow::tstring> tokens; - for (int j = input_row_splits[i]; j < input_row_splits[i + 1]; ++j) { - tokens.emplace_back(input_values_data.at(j)); -- if (tokens.size() < width_) continue; -+ if (tokens.size() < width_) -+ continue; - tokens.erase(tokens.begin(), tokens.begin() + tokens.size() - width_); - buffer.push_back(absl::StrJoin(tokens, string_separator_)); - } -@@ -206,11 +207,13 @@ class NGramsStrJoin : public tflite::shim::OpKernelShim<NGramsStrJoin, Rt> { - output_values_or = - ctx->GetOutput(kValues, Shape({static_cast<int>(buffer.size())})); - } -- if (!output_values_or.ok()) return output_values_or.status(); -+ if (!output_values_or.ok()) -+ return output_values_or.status(); - auto& output_buffer = - output_values_or.value()->template Data<tensorflow::tstring>(); - int i = 0; -- for (const auto& v : buffer) output_buffer[i++] = v; -+ for (const auto& v : buffer) -+ output_buffer[i++] = v; - return absl::OkStatus(); - } - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -index 7c98100d400b9..3c97969eb4e36 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -@@ -51,7 +51,8 @@ using ::testing::ElementsAreArray; - class NgramsModel : public SingleOpModel { - public: - // Constructor for testing the op with a tf.Tensor -- NgramsModel(int width, const std::string& string_separator, -+ NgramsModel(int width, -+ const std::string& string_separator, - const std::vector<std::string>& input_values, - const std::vector<int>& input_shape) { - input_values_ = AddInput(TensorType_STRING); -@@ -67,7 +68,8 @@ class NgramsModel : public SingleOpModel { - // Constructor for the op with a tf.RaggedTensor - // Note: This interface uses row_lengths, as they're closer to the - // dimensions in a TensorShape, but internally everything is row_splits. -- NgramsModel(int width, const std::string& string_separator, -+ NgramsModel(int width, -+ const std::string& string_separator, - const std::vector<std::string>& input_values, - const std::vector<std::vector<int64_t>> nested_row_lengths) { - std::vector<std::vector<int>> input_shapes; -@@ -214,8 +216,7 @@ TEST(NgramsTest, TensorMultidimensionalInputWidthTwo) { - TEST(NgramsTest, RaggedTensorSingleSequenceWidthTwo) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4}); -- NgramsModel m(2, " ", {"this", "is", "a", "test"}, -- nested_row_lengths); -+ NgramsModel m(2, " ", {"this", "is", "a", "test"}, nested_row_lengths); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this is", "is a", "a test")); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -index d68276f91d90b..a44e18f8e0534 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -@@ -33,9 +33,9 @@ limitations under the License. - #include "flatbuffers/flexbuffers.h" - #include "tensorflow/core/util/ragged_to_dense_util_common.h" - #include "tensorflow/lite/c/common.h" --#include "tensorflow/lite/mutable_op_resolver.h" - #include "tensorflow/lite/kernels/internal/types.h" - #include "tensorflow/lite/kernels/kernel_util.h" -+#include "tensorflow/lite/mutable_op_resolver.h" - - namespace tflite { - namespace ops { -@@ -176,8 +176,10 @@ RuntimeShape TensorShapeFromTensor(const TfLiteTensor& tensor) { - } - - const TfLiteTensor* GetRowPartitionTensor( -- const ConversionAttributes& conversion_attributes, TfLiteContext* context, -- TfLiteNode* node, int dimension) { -+ const ConversionAttributes& conversion_attributes, -+ TfLiteContext* context, -+ TfLiteNode* node, -+ int dimension) { - if (conversion_attributes.partition_types.front() == - tensorflow::RowPartitionType::FIRST_DIM_SIZE) { - return &context->tensors[node->inputs->data[kFirstPartitionInputIndex + 1 + -@@ -247,7 +249,9 @@ int GetMaxWidthRowSplit(const TfLiteTensor* tensor) { - } - - int GetMaxWidth(const ConversionAttributes& conversion_attributes, -- TfLiteContext* context, TfLiteNode* node, int dimension) { -+ TfLiteContext* context, -+ TfLiteNode* node, -+ int dimension) { - const TfLiteTensor* tensor = GetRowPartitionTensor( - conversion_attributes, context, node, dimension - 1); - switch (conversion_attributes.GetRowPartitionTypeByDimension(dimension - 1)) { -@@ -262,7 +266,8 @@ int GetMaxWidth(const ConversionAttributes& conversion_attributes, - } - - RuntimeShape CombineRaggedTensorToTensorShapes( -- int ragged_rank, const RuntimeShape& output_shape, -+ int ragged_rank, -+ const RuntimeShape& output_shape, - const RuntimeShape& value_shape) { - // TODO(mgubin): No checks, see - // third_party/tensorflow/core/ops/ragged_to_dense_util.cc -@@ -283,9 +288,13 @@ RuntimeShape CombineRaggedTensorToTensorShapes( - } - - RuntimeShape CalculateOutputSize( -- const ConversionAttributes& conversion_attributes, TfLiteContext* context, -- TfLiteNode* node, int first_dimension, int ragged_rank, -- const TfLiteTensor& values, const TfLiteTensor& default_value, -+ const ConversionAttributes& conversion_attributes, -+ TfLiteContext* context, -+ TfLiteNode* node, -+ int first_dimension, -+ int ragged_rank, -+ const TfLiteTensor& values, -+ const TfLiteTensor& default_value, - const TfLiteTensor& output_shape) { - RuntimeShape values_shape(values.dims->size, values.dims->data); - RuntimeShape default_value_shape(default_value.dims->size, -@@ -367,7 +376,8 @@ void CalculateFirstParentOutputIndex(int first_dimension, - void CalculateOutputIndexValueRowID(const TfLiteTensor& value_rowids, - const std::vector<int>& parent_output_index, - int output_index_multiplier, -- int output_size, std::vector<int>* result) { -+ int output_size, -+ std::vector<int>* result) { - const RuntimeShape tensor_shape(value_rowids.dims->size, - value_rowids.dims->data); - const int index_size = tensor_shape.FlatSize(); -@@ -416,7 +426,8 @@ void CalculateOutputIndexValueRowID(const TfLiteTensor& value_rowids, - - void CalculateOutputIndexRowSplit(const TfLiteTensor& row_split, - const std::vector<int>& parent_output_index, -- int output_index_multiplier, int output_size, -+ int output_index_multiplier, -+ int output_size, - std::vector<int>* result) { - const RuntimeShape row_split_shape(row_split.dims->size, - row_split.dims->data); -@@ -457,10 +468,14 @@ void CalculateOutputIndexRowSplit(const TfLiteTensor& row_split, - } - - TfLiteStatus CalculateOutputIndex( -- const ConversionAttributes& conversion_attributes, TfLiteContext* context, -- TfLiteNode* node, int dimension, -- const std::vector<int>& parent_output_index, int output_index_multiplier, -- int output_size, std::vector<int>* result) { -+ const ConversionAttributes& conversion_attributes, -+ TfLiteContext* context, -+ TfLiteNode* node, -+ int dimension, -+ const std::vector<int>& parent_output_index, -+ int output_index_multiplier, -+ int output_size, -+ std::vector<int>* result) { - const TfLiteTensor* row_partition_tensor = - GetRowPartitionTensor(conversion_attributes, context, node, dimension); - auto partition_type = -@@ -483,7 +498,8 @@ TfLiteStatus CalculateOutputIndex( - } - - template <typename VALUE_TYPE> --void SetOutputT(TfLiteContext* context, int ragged_rank, -+void SetOutputT(TfLiteContext* context, -+ int ragged_rank, - const std::vector<int>& output_index, - const TfLiteTensor& values_tensor, - const TfLiteTensor& default_value_tensor, -@@ -558,7 +574,8 @@ void SetOutputT(TfLiteContext* context, int ragged_rank, - } - } - --void SetOutput(TfLiteContext* context, int ragged_rank, -+void SetOutput(TfLiteContext* context, -+ int ragged_rank, - const std::vector<int>& output_index, - const TfLiteTensor& values_tensor, - const TfLiteTensor& default_value_tensor, -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -index 9044797e70568..5f74f683c4e36 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -@@ -98,7 +98,8 @@ class RaggedTensorToTensorOpModel : public SingleOpModel { - std::vector<int32> GetOutputInt() { return ExtractVector<int32>(output_); } - - void InvokeFloat(const std::vector<int>& shape, -- const std::vector<float>& values, float default_value, -+ const std::vector<float>& values, -+ float default_value, - const std::vector<std::vector<int>>& partition_values) { - PopulateTensor(input_shape_, shape); - PopulateTensor(input_values_, values); -@@ -109,7 +110,8 @@ class RaggedTensorToTensorOpModel : public SingleOpModel { - SingleOpModel::Invoke(); - } - void InvokeInt(const std::vector<int>& shape, -- const std::vector<int32>& values, int32 default_value, -+ const std::vector<int32>& values, -+ int32 default_value, - const std::vector<std::vector<int>>& partition_values) { - PopulateTensor(input_shape_, shape); - PopulateTensor(input_values_, values); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc -index 311dc52e7b3dc..aa17d772dcfc3 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc -@@ -21,8 +21,10 @@ namespace text { - namespace { - - template <typename T> --void RegexSplitImpl(absl::string_view input, const RE2& re2, -- bool include_delimiter, const RE2& include_delim_regex, -+void RegexSplitImpl(absl::string_view input, -+ const RE2& re2, -+ bool include_delimiter, -+ const RE2& include_delim_regex, - std::vector<absl::string_view>* tokens, - std::vector<T>* begin_offsets, - std::vector<T>* end_offsets) { -@@ -68,7 +70,9 @@ void RegexSplitImpl(absl::string_view input, const RE2& re2, - - } // namespace - --void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, -+void RegexSplit(absl::string_view input, -+ const RE2& re2, -+ bool include_delimiter, - const RE2& include_delim_regex, - std::vector<absl::string_view>* tokens, - std::vector<long>* begin_offsets, // NOLINT -@@ -77,7 +81,9 @@ void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, - begin_offsets, end_offsets); - } - --void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, -+void RegexSplit(absl::string_view input, -+ const RE2& re2, -+ bool include_delimiter, - const RE2& include_delim_regex, - std::vector<absl::string_view>* tokens, - std::vector<long long>* begin_offsets, // NOLINT -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h -index de7294ce535b9..e9df4727594df 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h -@@ -24,17 +24,21 @@ - namespace tensorflow { - namespace text { - --void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, -+void RegexSplit(absl::string_view input, -+ const RE2& re2, -+ bool include_delimiter, - const RE2& include_delim_regex, - std::vector<absl::string_view>* tokens, - std::vector<long>* begin_offsets, // NOLINT - std::vector<long>* end_offsets); // NOLINT - --void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, -+void RegexSplit(absl::string_view input, -+ const RE2& re2, -+ bool include_delimiter, - const RE2& include_delim_regex, - std::vector<absl::string_view>* tokens, - std::vector<long long>* begin_offsets, // NOLINT -- std::vector<long long>* end_offsets); // NOLINT -+ std::vector<long long>* end_offsets); // NOLINT - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc -index b563482d1be08..f7ee942676bc3 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc -@@ -193,7 +193,8 @@ class RegexSplitOp : public tensorflow::OpKernel { - }; - - REGISTER_KERNEL_BUILDER( -- Name("RegexSplitWithOffsets").Device(tensorflow::DEVICE_CPU), RegexSplitOp); -+ Name("RegexSplitWithOffsets").Device(tensorflow::DEVICE_CPU), -+ RegexSplitOp); - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc -index b30d4bc89a216..db53bc9326b81 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc -@@ -32,9 +32,7 @@ - namespace tensorflow { - namespace text { - --namespace { --} // namespace -- -+namespace {} // namespace - - // ROUGE-L implementation based on - // https://www.microsoft.com/en-us/research/publication/ -@@ -110,15 +108,12 @@ class RougeLOp : public OpKernel { - // Iterate over the splits, skipping the first split as it is always zero. - for (int i = 1; i < hyp_splits_flat.size(); i++) { - // Length of hyp and ref. -- SPLITS_TYPE lhyp = hyp_splits_flat(i) - hyp_splits_flat(i-1); -- SPLITS_TYPE lref = ref_splits_flat(i) - ref_splits_flat(i-1); -+ SPLITS_TYPE lhyp = hyp_splits_flat(i) - hyp_splits_flat(i - 1); -+ SPLITS_TYPE lref = ref_splits_flat(i) - ref_splits_flat(i - 1); - // Length of longest common substring. -- int32 llcs = LongestCommonSubsequenceLength(hyp_splits_flat(i-1), -- hyp_splits_flat(i), -- hyp_tensor_flat, -- ref_splits_flat(i-1), -- ref_splits_flat(i), -- ref_tensor_flat); -+ int32 llcs = LongestCommonSubsequenceLength( -+ hyp_splits_flat(i - 1), hyp_splits_flat(i), hyp_tensor_flat, -+ ref_splits_flat(i - 1), ref_splits_flat(i), ref_tensor_flat); - auto measures = ComputeMeasures(lhyp, lref, llcs, alpha); - f_measures_flat(i - 1) = std::get<0>(measures); - p_measures_flat(i - 1) = std::get<1>(measures); -@@ -129,13 +124,12 @@ class RougeLOp : public OpKernel { - private: - // By using LCS, the ROUGE-L algorithm does not require consecutive matches - // but rather credits the order of N-grams. -- int32 LongestCommonSubsequenceLength( -- const SPLITS_TYPE hyp_i, -- const SPLITS_TYPE hyp_j, -- const ConstFlatValues& hyp, -- const SPLITS_TYPE ref_i, -- const SPLITS_TYPE ref_j, -- const ConstFlatValues& ref) { -+ int32 LongestCommonSubsequenceLength(const SPLITS_TYPE hyp_i, -+ const SPLITS_TYPE hyp_j, -+ const ConstFlatValues& hyp, -+ const SPLITS_TYPE ref_i, -+ const SPLITS_TYPE ref_j, -+ const ConstFlatValues& ref) { - SPLITS_TYPE lhyp = hyp_j - hyp_i; - SPLITS_TYPE lref = ref_j - ref_i; - // Create a scratch matrix to keep track of the LCS seen so far using DP. -@@ -149,7 +143,8 @@ class RougeLOp : public OpKernel { - if (a == 0 || b == 0) { - // If in first row or column, we write a zero to the table. - scratch2d(a, b) = 0; -- } else if (x == hyp_j+1 || y == ref_j+1 || hyp(x-1) != ref(y-1)) { -+ } else if (x == hyp_j + 1 || y == ref_j + 1 || -+ hyp(x - 1) != ref(y - 1)) { - // If in the last row or column, or if the tokens are not equal, - // carry the largest score seen in the cell above or to the left of - // the current cell. -@@ -176,9 +171,8 @@ class RougeLOp : public OpKernel { - const float r_lcs = llcs / (lref + 1e-12); - // Use the tensor2tensor formulation if the alpha value is <0, - // which does not make sense as a weighted average term. -- const float f_lcs = alpha < 0 ? -- ComputeTensor2TensorF(p_lcs, r_lcs) : -- ComputeOfficialF(p_lcs, r_lcs, alpha); -+ const float f_lcs = alpha < 0 ? ComputeTensor2TensorF(p_lcs, r_lcs) -+ : ComputeOfficialF(p_lcs, r_lcs, alpha); - return std::make_tuple(f_lcs, p_lcs, r_lcs); - } - -@@ -192,7 +186,8 @@ class RougeLOp : public OpKernel { - return 0; - } - -- float ComputeOfficialF(const float p_lcs, const float r_lcs, -+ float ComputeOfficialF(const float p_lcs, -+ const float r_lcs, - const float alpha) { - float denominator = (alpha * r_lcs + (1 - alpha) * p_lcs); - if (denominator > 0) { -@@ -204,16 +199,16 @@ class RougeLOp : public OpKernel { - TF_DISALLOW_COPY_AND_ASSIGN(RougeLOp); - }; - --#define REGISTER(VALUES_TYPE) \ -- REGISTER_KERNEL_BUILDER(Name("RougeL") \ -- .Device(DEVICE_CPU) \ -- .TypeConstraint<int32>("Tsplits") \ -- .TypeConstraint<VALUES_TYPE>("Tvalues"), \ -- RougeLOp<int32, VALUES_TYPE>); \ -- REGISTER_KERNEL_BUILDER(Name("RougeL") \ -- .Device(DEVICE_CPU) \ -- .TypeConstraint<int64>("Tsplits") \ -- .TypeConstraint<VALUES_TYPE>("Tvalues"), \ -+#define REGISTER(VALUES_TYPE) \ -+ REGISTER_KERNEL_BUILDER(Name("RougeL") \ -+ .Device(DEVICE_CPU) \ -+ .TypeConstraint<int32>("Tsplits") \ -+ .TypeConstraint<VALUES_TYPE>("Tvalues"), \ -+ RougeLOp<int32, VALUES_TYPE>); \ -+ REGISTER_KERNEL_BUILDER(Name("RougeL") \ -+ .Device(DEVICE_CPU) \ -+ .TypeConstraint<int64>("Tsplits") \ -+ .TypeConstraint<VALUES_TYPE>("Tvalues"), \ - RougeLOp<int64, VALUES_TYPE>); - - TF_CALL_int32(REGISTER); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc -index bfc748b7638db..e218b416ba826 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc -@@ -38,8 +38,7 @@ TEST(RougeLFMeasureOpTest, ShapeFn) { - INFER_OK(op, "?;?;?;?;?", "[?];[?];[?]"); - INFER_ERROR("Dimension 0 in both shapes must be equal, but are 3 and 2.", op, - "[5];[3];[8];[2];[]"); -- INFER_ERROR("Shape must be rank 0 but is rank 1", op, -- "[5];[3];[8];[3];[1]"); -+ INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[5];[3];[8];[3];[1]"); - } - - } // namespace -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc -index 77e583418446c..180a82cba9895 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc -@@ -120,7 +120,8 @@ Status GetErrorOptions(OpKernelConstruction* context, ErrorOptions* out) { - } - - inline bool ShouldHandleFormatError(const ErrorOptions& error_options, -- UChar32 ch, bool format_error) { -+ UChar32 ch, -+ bool format_error) { - return ((error_options.replace_control_chars && ch <= 0x1F) || format_error); - } - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc -index 7131cbcf4d383..2937fe2f12316 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc -@@ -59,13 +59,16 @@ Status UnicodeUtil::IsTerminalPunc(const absl::string_view& input, - *result = false; - const auto& ellipsis_status = IsEllipsis(input, result); - // If there was a error decoding, or if we found an ellipsis, then return. -- if (!ellipsis_status.ok()) return ellipsis_status; -- if (*result) return Status::OK(); -+ if (!ellipsis_status.ok()) -+ return ellipsis_status; -+ if (*result) -+ return Status::OK(); - - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -@@ -100,7 +103,8 @@ Status UnicodeUtil::IsClosePunc(const absl::string_view& input, - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -@@ -134,7 +138,8 @@ Status UnicodeUtil::IsOpenParen(const absl::string_view& input, - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -@@ -161,7 +166,8 @@ Status UnicodeUtil::IsCloseParen(const absl::string_view& input, - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -@@ -189,7 +195,8 @@ Status UnicodeUtil::IsPunctuationWord(const absl::string_view& input, - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -@@ -224,7 +231,8 @@ Status UnicodeUtil::IsEllipsis(const absl::string_view& input, - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); -- if (!status.ok()) return status; -+ if (!status.ok()) -+ return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc -index 6c12cbff55264..14fc095d0cccb 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc -@@ -121,7 +121,8 @@ TEST_P(IsTerminalPuncParamTest, IsTerminalPunc) { - EXPECT_TRUE(result); - } - --INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, IsTerminalPuncParamTest, -+INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, -+ IsTerminalPuncParamTest, - ::testing::ValuesIn(is_terminal_punc_test_cases)); - - TEST_F(IsTerminalPuncTest, IsMultiCharEllipseTerminalPunc) { -@@ -220,7 +221,8 @@ TEST_P(ClosePuncParamTest, IsClosePunc) { - EXPECT_TRUE(result); - } - --INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, ClosePuncParamTest, -+INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, -+ ClosePuncParamTest, - ::testing::ValuesIn(close_punc_test_cases)); - - class OpenParenParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -269,7 +271,8 @@ TEST_P(OpenParenParamTest, IsOpenParen) { - EXPECT_TRUE(result); - } - --INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, OpenParenParamTest, -+INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, -+ OpenParenParamTest, - ::testing::ValuesIn(open_paren_test_cases)); - - class CloseParenParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -318,7 +321,8 @@ TEST_P(CloseParenParamTest, IsCloseParen) { - EXPECT_TRUE(result); - } - --INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, CloseParenParamTest, -+INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, -+ CloseParenParamTest, - ::testing::ValuesIn(close_paren_test_cases)); - - class IsPunctuationWordParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -543,7 +547,8 @@ TEST_P(IsPunctuationWordParamTest, IsPunctuation) { - EXPECT_TRUE(result); - } - --INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, IsPunctuationWordParamTest, -+INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, -+ IsPunctuationWordParamTest, - ::testing::ValuesIn(punc_word_test_cases)); - - class IsEllipsisTest : public SentenceBreakingUtilsTest, -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc -index 99a16b7c2914a..e0224f606015c 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc -@@ -25,27 +25,28 @@ namespace { - - // Sets a property of a sentence fragment. - void SetFragmentProperty(SentenceFragment::Property property, -- SentenceFragment *fragment) { -+ SentenceFragment* fragment) { - fragment->properties = fragment->properties | property; - } - - // Returns true iff a token has any of the given properties. --bool TokenHasProperty(uint32 properties, const Token &token) { -+bool TokenHasProperty(uint32 properties, const Token& token) { - return token.text_properties() & properties; - } - - // Returns true iff a token has the ACRONYM text property and token.word() - // ends with a period. --bool IsPeriodSeparatedAcronym(const Token &token) { -+bool IsPeriodSeparatedAcronym(const Token& token) { - return TokenHasProperty(Token::ACRONYM, token) && - (!token.word().empty() && token.word().back() == '.'); - } - - // Returns true iff the token can appear after a space in a sentence-terminal - // token sequence. --Status SpaceAllowedBeforeToken(const UnicodeUtil *util, const Token &token, -- bool *result) { -- const tstring &word = token.word(); -+Status SpaceAllowedBeforeToken(const UnicodeUtil* util, -+ const Token& token, -+ bool* result) { -+ const tstring& word = token.word(); - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util->IsEllipsis(word, &is_ellipsis)); - -@@ -63,9 +64,7 @@ Status SpaceAllowedBeforeToken(const UnicodeUtil *util, const Token &token, - - class SentenceFragmenter::FragmentBoundaryMatch { - public: -- FragmentBoundaryMatch() { -- Reset(); -- } -+ FragmentBoundaryMatch() { Reset(); } - - // Goes to initial state. - void Reset() { -@@ -77,10 +76,12 @@ class SentenceFragmenter::FragmentBoundaryMatch { - - // Follows the state transition for the token at the given index. Returns - // true for success, or false if there was no valid transition. -- Status Advance(const UnicodeUtil *util, const Document &document, int index, -- bool *result) { -- const Token &token = document.tokens()[index]; -- const tstring &word = token.word(); -+ Status Advance(const UnicodeUtil* util, -+ const Document& document, -+ int index, -+ bool* result) { -+ const Token& token = document.tokens()[index]; -+ const tstring& word = token.word(); - bool no_transition = false; - - bool is_terminal_punc = false; -@@ -141,20 +142,12 @@ class SentenceFragmenter::FragmentBoundaryMatch { - - // Returns true iff we have matched at least one terminal punctuation - // character. -- bool GotTerminalPunc() const { -- return first_terminal_punc_index_ >= 0; -- } -+ bool GotTerminalPunc() const { return first_terminal_punc_index_ >= 0; } - - // Field accessors. -- int first_terminal_punc_index() const { -- return first_terminal_punc_index_; -- } -- int first_close_punc_index() const { -- return first_close_punc_index_; -- } -- int limit_index() const { -- return limit_index_; -- } -+ int first_terminal_punc_index() const { return first_terminal_punc_index_; } -+ int first_close_punc_index() const { return first_close_punc_index_; } -+ int limit_index() const { return limit_index_; } - - private: - // Match state. -@@ -177,7 +170,7 @@ class SentenceFragmenter::FragmentBoundaryMatch { - }; - - Status SentenceFragmenter::FindFragments( -- std::vector<SentenceFragment> *result) { -+ std::vector<SentenceFragment>* result) { - // Partition tokens into sentence fragments. - for (int i_start = 0; i_start < document_->tokens().size();) { - SentenceFragment fragment; -@@ -216,12 +209,13 @@ Status SentenceFragmenter::FindFragments( - // time, we'll fail again this time and therefore continue past "y" to find the - // next boundary. We will not try to scan "!!!" a third time. - Status SentenceFragmenter::FindNextFragmentBoundary( -- int i_start, SentenceFragmenter::FragmentBoundaryMatch *result) const { -+ int i_start, -+ SentenceFragmenter::FragmentBoundaryMatch* result) const { - FragmentBoundaryMatch current_match; - FragmentBoundaryMatch previous_match; - - for (int i = i_start; i < static_cast<int>(document_->tokens().size()); ++i) { -- const auto &token = document_->tokens()[i]; -+ const auto& token = document_->tokens()[i]; - if (current_match.GotTerminalPunc() && i > i_start && - token.break_level() >= Token::SPACE_BREAK) { - // Got terminal punctuation and a space delimiter, so match is valid. -@@ -279,7 +273,7 @@ Status SentenceFragmenter::FindNextFragmentBoundary( - Status SentenceFragmenter::UpdateLatestOpenParenForFragment(int i_start, - int i_end) { - for (int i = i_end; i > i_start; --i) { -- const auto &token = document_->tokens()[i - 1]; -+ const auto& token = document_->tokens()[i - 1]; - bool is_open_paren = false; - TF_RETURN_IF_ERROR(util_->IsOpenParen(token.word(), &is_open_paren)); - if (is_open_paren) { -@@ -294,8 +288,9 @@ Status SentenceFragmenter::UpdateLatestOpenParenForFragment(int i_start, - } - - Status SentenceFragmenter::FillInFragmentFields( -- int i_start, const FragmentBoundaryMatch &match, -- SentenceFragment *fragment) const { -+ int i_start, -+ const FragmentBoundaryMatch& match, -+ SentenceFragment* fragment) const { - // Set the fragment's boundaries. - fragment->start = i_start; - fragment->limit = match.limit_index(); -@@ -344,7 +339,8 @@ Status SentenceFragmenter::FillInFragmentFields( - // We treat "!" as the first terminal punctuation mark; the ellipsis acts as - // left context. - Status SentenceFragmenter::GetAdjustedFirstTerminalPuncIndex( -- const FragmentBoundaryMatch &match, int *result) const { -+ const FragmentBoundaryMatch& match, -+ int* result) const { - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); - if (i1 < 0) { -@@ -354,7 +350,7 @@ Status SentenceFragmenter::GetAdjustedFirstTerminalPuncIndex( - int i2 = match.first_close_punc_index(); - - for (int i = i2; i > i1; --i) { -- const auto &token = document_->tokens()[i - 1]; -+ const auto& token = document_->tokens()[i - 1]; - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util_->IsEllipsis(token.word(), &is_ellipsis)); - if (is_ellipsis || TokenHasProperty(Token::EMOTICON, token)) { -@@ -386,7 +382,8 @@ Status SentenceFragmenter::GetAdjustedFirstTerminalPuncIndex( - // (.!?), as ambiguous ones (ellipsis/emoticon) do not necessarily imply a - // sentence boundary. - Status SentenceFragmenter::HasUnattachableTerminalPunc( -- const FragmentBoundaryMatch &match, bool *result) const { -+ const FragmentBoundaryMatch& match, -+ bool* result) const { - *result = false; - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); -@@ -398,7 +395,7 @@ Status SentenceFragmenter::HasUnattachableTerminalPunc( - - // Iterate over the second and later punctuation marks. - for (int i = i1 + 1; i < i2; ++i) { -- const auto &token = document_->tokens()[i]; -+ const auto& token = document_->tokens()[i]; - bool is_punctuation = false; - TF_RETURN_IF_ERROR(util_->IsPunctuationWord(token.word(), &is_punctuation)); - bool is_ellipsis = false; -@@ -415,8 +412,8 @@ Status SentenceFragmenter::HasUnattachableTerminalPunc( - return Status::OK(); - } - --Status SentenceFragmenter::HasCloseParen(const FragmentBoundaryMatch &match, -- bool *result) const { -+Status SentenceFragmenter::HasCloseParen(const FragmentBoundaryMatch& match, -+ bool* result) const { - *result = false; - // Get close punctuation span. - int i1 = match.first_close_punc_index(); -@@ -427,7 +424,7 @@ Status SentenceFragmenter::HasCloseParen(const FragmentBoundaryMatch &match, - int i2 = match.limit_index(); - - for (int i = i1; i < i2; ++i) { -- const auto &token = document_->tokens()[i]; -+ const auto& token = document_->tokens()[i]; - bool is_close_paren = false; - TF_RETURN_IF_ERROR(util_->IsCloseParen(token.word(), &is_close_paren)); - if (is_close_paren) { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h -index 8ca5acd2197fe..88b81988e601d 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h -@@ -106,7 +106,10 @@ class Token { - HYPERLINK = 0x200, - }; - -- Token(const tstring &word, uint32 start, uint32 end, BreakLevel break_level, -+ Token(const tstring& word, -+ uint32 start, -+ uint32 end, -+ BreakLevel break_level, - TextProperty text_properties) - : word_(word), - start_(start), -@@ -114,14 +117,14 @@ class Token { - break_level_(break_level), - text_properties_(text_properties) {} - -- const tstring &word() const { return word_; } -+ const tstring& word() const { return word_; } - const uint32 start() const { return start_; } - const uint32 end() const { return end_; } - const BreakLevel break_level() const { return break_level_; } - const TextProperty text_properties() const { return text_properties_; } - - private: -- const tstring &word_; -+ const tstring& word_; - uint32 start_; - uint32 end_; - BreakLevel break_level_; -@@ -131,19 +134,21 @@ class Token { - class Document { - public: - // Does NOT take ownership of 'tokens'. -- Document(std::vector<Token> *tokens) : tokens_(tokens) {} -+ Document(std::vector<Token>* tokens) : tokens_(tokens) {} - -- void AddToken(const tstring &word, uint32 start, uint32 end, -+ void AddToken(const tstring& word, -+ uint32 start, -+ uint32 end, - Token::BreakLevel break_level, - Token::TextProperty text_properties) { - tokens_->emplace_back(word, start, end, break_level, text_properties); - } - -- const std::vector<Token> &tokens() const { return *tokens_; } -+ const std::vector<Token>& tokens() const { return *tokens_; } - - private: - // not owned -- std::vector<Token> *tokens_; -+ std::vector<Token>* tokens_; - }; - - struct SentenceFragment { -@@ -165,12 +170,12 @@ struct SentenceFragment { - class SentenceFragmenter { - public: - // Constructs a fragmenter to process a specific part of a document. -- SentenceFragmenter(const Document *document, UnicodeUtil *util) -+ SentenceFragmenter(const Document* document, UnicodeUtil* util) - : document_(document), util_(util) {} - - // Finds sentence fragments in the [start_, limit_) range of the associated - // document. -- ::tensorflow::Status FindFragments(std::vector<SentenceFragment> *result); -+ ::tensorflow::Status FindFragments(std::vector<SentenceFragment>* result); - - private: - // State for matching a fragment-boundary regexp against a token sequence. -@@ -181,7 +186,8 @@ class SentenceFragmenter { - // 'i_start'. Returns the longest match found; will be non-empty as long as - // 'i_start' was not already at the end of the associated token range. - ::tensorflow::Status FindNextFragmentBoundary( -- int i_start, FragmentBoundaryMatch *result) const; -+ int i_start, -+ FragmentBoundaryMatch* result) const; - - // Updates 'latest_open_paren_is_sentential_' for the tokens in the given - // fragment. -@@ -190,30 +196,32 @@ class SentenceFragmenter { - // Populates a sentence fragment with the tokens from 'i_start' to the end - // of the given FragmentBoundaryMatch. - ::tensorflow::Status FillInFragmentFields(int i_start, -- const FragmentBoundaryMatch &match, -- SentenceFragment *fragment) const; -+ const FragmentBoundaryMatch& match, -+ SentenceFragment* fragment) const; - - // Returns the adjusted first terminal punctuation index in a - // FragmentBoundaryMatch. - ::tensorflow::Status GetAdjustedFirstTerminalPuncIndex( -- const FragmentBoundaryMatch &match, int *result) const; -+ const FragmentBoundaryMatch& match, -+ int* result) const; - - // Returns true iff a FragmentBoundaryMatch has an "unattachable" terminal - // punctuation mark. - ::tensorflow::Status HasUnattachableTerminalPunc( -- const FragmentBoundaryMatch &match, bool *result) const; -+ const FragmentBoundaryMatch& match, -+ bool* result) const; - - // Returns true iff a FragmentBoundaryMatch has a close paren in its closing - // punctuation. -- ::tensorflow::Status HasCloseParen(const FragmentBoundaryMatch &match, -- bool *result) const; -+ ::tensorflow::Status HasCloseParen(const FragmentBoundaryMatch& match, -+ bool* result) const; - - // Whether the latest open paren seen so far appears to be sentence-initial. - // See UpdateLatestOpenParenForFragment() in the .cc file for details. - bool latest_open_paren_is_sentential_ = false; - -- const Document *document_ = nullptr; // not owned -- UnicodeUtil *util_ = nullptr; // not owned -+ const Document* document_ = nullptr; // not owned -+ UnicodeUtil* util_ = nullptr; // not owned - - // TODO(thuang513): DISALLOW_COPY_AND_ASSIGN(SentenceFragmenter); - }; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc -index 33a8ccbcd84cd..6c6786d83d795 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc -@@ -25,7 +25,8 @@ - namespace tensorflow { - namespace text { - --void ConsumeOneUChar(const absl::string_view& input, UChar32* result, -+void ConsumeOneUChar(const absl::string_view& input, -+ UChar32* result, - int* offset) { - const char* source = input.data(); - -@@ -36,7 +37,8 @@ void ConsumeOneUChar(const absl::string_view& input, UChar32* result, - bool IsTerminalPunc(const absl::string_view& input, int* offset) { - *offset = 0; - bool is_ellipsis = IsEllipsis(input, offset); -- if (is_ellipsis) return true; -+ if (is_ellipsis) -+ return true; - - *offset = 0; - UChar32 char_value; -@@ -561,7 +563,8 @@ void SentenceFragmenterV2::UpdateLatestOpenParenForFragment(int i_start, - } - - void SentenceFragmenterV2::FillInFragmentFields( -- int i_start, const FragmentBoundaryMatch& match, -+ int i_start, -+ const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const { - // Set the fragment's boundaries. - fragment->start = i_start; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h -index 2a63b13055d0b..94903e36022e6 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h -@@ -73,7 +73,8 @@ bool IsEmoticon(const absl::string_view& input, int* offset); - - bool SpaceAllowedBeforeChar(const absl::string_view& input); - --void ConsumeOneUChar(const absl::string_view& input, UChar32* result, -+void ConsumeOneUChar(const absl::string_view& input, -+ UChar32* result, - int* offset); - - // Returns true iff a string is white space. -@@ -168,7 +169,8 @@ class SentenceFragmenterV2 { - - // Populates a sentence fragment with the text from 'i_start' to the end - // of the given FragmentBoundaryMatch. -- void FillInFragmentFields(int i_start, const FragmentBoundaryMatch& match, -+ void FillInFragmentFields(int i_start, -+ const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const; - - // Returns the adjusted first terminal punctuation index in a -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc -index e5942d77cd1b4..32f45ea952439 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc -@@ -89,7 +89,8 @@ TEST_P(IsTerminalPuncParamTest, IsTerminalPunc) { - EXPECT_TRUE(IsTerminalPunc(test_string, &offset)); - } - --INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, IsTerminalPuncParamTest, -+INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, -+ IsTerminalPuncParamTest, - ::testing::ValuesIn(is_terminal_punc_test_cases)); - - TEST_F(IsTerminalPuncTest, IsMultiCharEllipseTerminalPunc) { -@@ -194,7 +195,8 @@ TEST_P(ClosePuncParamTest, IsClosePunc) { - EXPECT_EQ(offset, expected_offset); - } - --INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, ClosePuncParamTest, -+INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, -+ ClosePuncParamTest, - ::testing::ValuesIn(close_punc_test_cases)); - - class OpenParenParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -240,7 +242,8 @@ TEST_P(OpenParenParamTest, IsOpenParen) { - EXPECT_TRUE(IsOpenParen(test_string)); - } - --INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, OpenParenParamTest, -+INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, -+ OpenParenParamTest, - ::testing::ValuesIn(open_paren_test_cases)); - - class CloseParenParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -286,7 +289,8 @@ TEST_P(CloseParenParamTest, IsCloseParen) { - EXPECT_TRUE(IsCloseParen(test_string)); - } - --INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, CloseParenParamTest, -+INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, -+ CloseParenParamTest, - ::testing::ValuesIn(close_paren_test_cases)); - - class IsPunctuationWordParamTest : public SentenceBreakingUtilsParamTest {}; -@@ -508,7 +512,8 @@ TEST_P(IsPunctuationWordParamTest, IsPunctuation) { - EXPECT_TRUE(IsPunctuationWord(test_string)); - } - --INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, IsPunctuationWordParamTest, -+INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, -+ IsPunctuationWordParamTest, - ::testing::ValuesIn(punc_word_test_cases)); - - class IsEllipsisTest : public ::testing::Test {}; -@@ -718,7 +723,8 @@ TEST_P(EmoticonParamTest, IsEmoticon) { - EXPECT_TRUE(IsEmoticon(GetParam(), &offset)); - } - --INSTANTIATE_TEST_SUITE_P(IsEmoticonParamTest, EmoticonParamTest, -+INSTANTIATE_TEST_SUITE_P(IsEmoticonParamTest, -+ EmoticonParamTest, - ::testing::ValuesIn(emoticon_test_cases)); - - class IsEmoticonTest : public ::testing::Test {}; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc -index 887b51d29ebad..a7ee974174bd6 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc -@@ -20,8 +20,8 @@ - #include "absl/strings/string_view.h" - #include "absl/synchronization/mutex.h" - #include "absl/types/span.h" --#include "src/sentencepiece_model.pb.h" - #include "src/sentencepiece.pb.h" -+#include "src/sentencepiece_model.pb.h" - #include "src/sentencepiece_processor.h" - #include "tensorflow/core/framework/bounds_check.h" - #include "tensorflow/core/framework/dataset_stateful_op_allowlist.h" -@@ -77,12 +77,11 @@ struct SentencepieceResource : public ResourceBase { - std::string unique_node_name = strings::StrCat( - "SentencepieceResourceFromGraphDef", "/", counter.fetch_add(1)); - std::string model = processor.model_proto().SerializeAsString(); -- *out = ops::SourceOp( -- "SentencepieceOp", -- builder->opts() -- .WithName(unique_node_name) -- .WithAttr("model", model) -- .WithAttr("use_node_name_sharing", true)); -+ *out = ops::SourceOp("SentencepieceOp", -+ builder->opts() -+ .WithName(unique_node_name) -+ .WithAttr("model", model) -+ .WithAttr("use_node_name_sharing", true)); - return Status::OK(); - } - }; -@@ -94,7 +93,8 @@ struct SentencepieceResource : public ResourceBase { - constexpr int64 kCostPerUnit = 10000; - - ::tensorflow::Status ToTFStatus(const ::util::Status& s) { -- if (s.ok()) return ::tensorflow::Status(); -+ if (s.ok()) -+ return ::tensorflow::Status(); - return ::tensorflow::Status(static_cast<::tensorflow::error::Code>(s.code()), - ::tensorflow::string(s.message())); - } -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc -index ea54de5fa5fdf..1cae7680ba838 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc -@@ -19,7 +19,7 @@ namespace text { - - SpanningTreeIterator::SpanningTreeIterator(bool forest) : forest_(forest) {} - --bool SpanningTreeIterator::HasCycle(const SourceList &sources) { -+bool SpanningTreeIterator::HasCycle(const SourceList& sources) { - // Flags for whether each node has already been searched. - searched_.assign(sources.size(), false); - -@@ -31,30 +31,35 @@ bool SpanningTreeIterator::HasCycle(const SourceList &sources) { - // Search upwards to try to find a cycle. - uint32 current_node = initial_node; - while (true) { -- if (searched_[current_node]) break; // already searched -- if (visiting_[current_node]) return true; // revisiting implies cycle -+ if (searched_[current_node]) -+ break; // already searched -+ if (visiting_[current_node]) -+ return true; // revisiting implies cycle - visiting_[current_node] = true; // mark as being currently visited - const uint32 source_node = sources[current_node]; -- if (source_node == current_node) break; // self-loops are roots -- current_node = source_node; // advance upwards -+ if (source_node == current_node) -+ break; // self-loops are roots -+ current_node = source_node; // advance upwards - } - - // No cycle; search upwards again to update flags. - current_node = initial_node; - while (true) { -- if (searched_[current_node]) break; // already searched -+ if (searched_[current_node]) -+ break; // already searched - searched_[current_node] = true; - visiting_[current_node] = false; - const uint32 source_node = sources[current_node]; -- if (source_node == current_node) break; // self-loops are roots -- current_node = source_node; // advance upwards -+ if (source_node == current_node) -+ break; // self-loops are roots -+ current_node = source_node; // advance upwards - } - } - - return false; - } - --uint32 SpanningTreeIterator::NumRoots(const SourceList &sources) { -+uint32 SpanningTreeIterator::NumRoots(const SourceList& sources) { - uint32 num_roots = 0; - for (uint32 node = 0; node < sources.size(); ++node) { - num_roots += (node == sources[node]); -@@ -62,29 +67,33 @@ uint32 SpanningTreeIterator::NumRoots(const SourceList &sources) { - return num_roots; - } - --bool SpanningTreeIterator::NextSourceList(SourceList *sources) { -+bool SpanningTreeIterator::NextSourceList(SourceList* sources) { - const uint32 num_nodes = sources->size(); - for (uint32 i = 0; i < num_nodes; ++i) { - const uint32 new_source = ++(*sources)[i]; -- if (new_source < num_nodes) return true; // absorbed in this digit -+ if (new_source < num_nodes) -+ return true; // absorbed in this digit - (*sources)[i] = 0; // overflowed this digit, carry to next digit - } - return false; // overflowed the last digit - } - --bool SpanningTreeIterator::NextTree(SourceList *sources) { -+bool SpanningTreeIterator::NextTree(SourceList* sources) { - // Iterate source lists, skipping non-trees. - while (NextSourceList(sources)) { - // Check the number of roots. - const uint32 num_roots = NumRoots(*sources); - if (forest_) { -- if (num_roots == 0) continue; -+ if (num_roots == 0) -+ continue; - } else { -- if (num_roots != 1) continue; -+ if (num_roots != 1) -+ continue; - } - - // Check for cycles. -- if (HasCycle(*sources)) continue; -+ if (HasCycle(*sources)) -+ continue; - - // Acyclic and rooted, therefore tree. - return true; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h -index ef7543e91a82b..89edc95a72fe2 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h -@@ -51,18 +51,18 @@ class SpanningTreeIterator { - - private: - // Returns true if the |sources| contains a cycle. -- bool HasCycle(const SourceList &sources); -+ bool HasCycle(const SourceList& sources); - - // Returns the number of roots in the |sources|. -- static uint32 NumRoots(const SourceList &sources); -+ static uint32 NumRoots(const SourceList& sources); - - // Advances |sources| to the next source list, or returns false if there are - // no more source lists. -- static bool NextSourceList(SourceList *sources); -+ static bool NextSourceList(SourceList* sources); - - // Advances |sources| to the next tree (or forest, if |forest_| is true), or - // returns false if there are no more trees. -- bool NextTree(SourceList *sources); -+ bool NextTree(SourceList* sources); - - // If true, iterate over spanning forests instead of spanning trees. - const bool forest_; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc -index ddd7bfc1a83cb..4000117bad460 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc -@@ -45,7 +45,7 @@ class SpanningTreeIteratorTest : public ::testing::TestWithParam<bool> { - void ExpectNumTrees(int num_nodes, int expected_num_trees) { - int actual_num_trees = 0; - iterator_.ForEachTree( -- num_nodes, [&](const SourceList &sources) { ++actual_num_trees; }); -+ num_nodes, [&](const SourceList& sources) { ++actual_num_trees; }); - LOG(INFO) << "num_nodes=" << num_nodes - << " expected_num_trees=" << expected_num_trees - << " actual_num_trees=" << actual_num_trees; -@@ -54,9 +54,9 @@ class SpanningTreeIteratorTest : public ::testing::TestWithParam<bool> { - - // Expects that the set of possible spanning trees for a complete digraph of - // |num_nodes| nodes is |expected_trees|. -- void ExpectTrees(int num_nodes, const std::set<SourceList> &expected_trees) { -+ void ExpectTrees(int num_nodes, const std::set<SourceList>& expected_trees) { - std::set<SourceList> actual_trees; -- iterator_.ForEachTree(num_nodes, [&](const SourceList &sources) { -+ iterator_.ForEachTree(num_nodes, [&](const SourceList& sources) { - CHECK(actual_trees.insert(sources).second); - }); - EXPECT_EQ(expected_trees, actual_trees); -@@ -66,7 +66,8 @@ class SpanningTreeIteratorTest : public ::testing::TestWithParam<bool> { - SpanningTreeIterator iterator_{GetParam()}; - }; - --INSTANTIATE_TEST_SUITE_P(AllowForest, SpanningTreeIteratorTest, -+INSTANTIATE_TEST_SUITE_P(AllowForest, -+ SpanningTreeIteratorTest, - ::testing::Bool()); - - TEST_P(SpanningTreeIteratorTest, NumberOfTrees) { -@@ -94,7 +95,9 @@ TEST_P(SpanningTreeIteratorTest, NumberOfTrees) { - } - } - --TEST_P(SpanningTreeIteratorTest, OneNodeDigraph) { ExpectTrees(1, {{0}}); } -+TEST_P(SpanningTreeIteratorTest, OneNodeDigraph) { -+ ExpectTrees(1, {{0}}); -+} - - TEST_P(SpanningTreeIteratorTest, TwoNodeDigraph) { - if (GetParam()) { // forest -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc -index 8a1e1a7f0c2f6..b0ab1dffafbe0 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc -@@ -69,7 +69,8 @@ Status TokenizeByLabel(const absl::string_view& text, - bool force_split_at_break_character, - std::vector<std::string>* tokens, - std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_tokens) { -+ std::vector<int>* end_offset, -+ int* num_tokens) { - std::vector<absl::string_view> chars; - if (!GetUTF8Chars(text, &chars)) { - return Status(error::Code::INVALID_ARGUMENT, -@@ -130,10 +131,9 @@ class SplitMergeTokenizeWithOffsetsOp : public OpKernel { - const Tensor* row_splits; - OP_REQUIRES_OK(ctx, ctx->input("row_splits", &row_splits)); - OP_REQUIRES(ctx, input_values->dim_size(0) == row_splits->dim_size(0) - 1, -- errors::InvalidArgument("Expecting row_splits have ", -- input_values->dim_size(0) + 1, -- " elements, got ", -- row_splits->dim_size(0))); -+ errors::InvalidArgument( -+ "Expecting row_splits have ", input_values->dim_size(0) + 1, -+ " elements, got ", row_splits->dim_size(0))); - - std::vector<string> tokens; - std::vector<int> begin_offset; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc -index 39c7d832c8671..f0f5e9931185c 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc -@@ -22,7 +22,8 @@ namespace tensorflow { - namespace text_kernels_test_util { - - bool TensorEqMatcher::MatchAndExplain( -- Tensor actual, ::testing::MatchResultListener* listener) const { -+ Tensor actual, -+ ::testing::MatchResultListener* listener) const { - string expect_values = expect_.SummarizeValue(expect_.NumElements()); - string actual_values = actual.SummarizeValue(actual.NumElements()); - if (expect_.dtype() != actual.dtype() || expect_.shape() != actual.shape() || -@@ -47,7 +48,8 @@ void TensorEqMatcher::DescribeNegationTo(::std::ostream* gmock_os) const { - } - - bool TensorHasShapeMatcher::MatchAndExplain( -- Tensor actual, ::testing::MatchResultListener* listener) const { -+ Tensor actual, -+ ::testing::MatchResultListener* listener) const { - if (expect_ != actual.shape()) { - *listener << "\n shape=" << actual.shape().DebugString(); - return false; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h -index 9e2194cf8d264..89b885b1725e8 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h -@@ -63,7 +63,8 @@ class TensorHasShapeMatcher : public ::testing::MatcherInterface<Tensor> { - // TensorHasShapeAndValues<int64>({3, 2}, {1, 2, 3, 4, 5, 6}); - template <typename DTYPE> - ::testing::Matcher<Tensor> TensorHasShapeAndValues( -- const TensorShape& shape, const std::vector<DTYPE>& values) { -+ const TensorShape& shape, -+ const std::vector<DTYPE>& values) { - Tensor expect = test::AsTensor<DTYPE>(values, shape); - // MakeMatcher takes ownership of the TensorEqMatcher. - return ::testing::MakeMatcher(new TensorEqMatcher(expect)); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc -index b4bcbdb2ed704..65099251edc22 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc -@@ -74,7 +74,8 @@ Status TokenizeByLogits(const absl::string_view& text, - bool force_split_at_break_character, - std::vector<std::string>* tokens, - std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_tokens) { -+ std::vector<int>* end_offset, -+ int* num_tokens) { - std::vector<absl::string_view> chars; - if (!GetUTF8Chars(text, &chars)) { - return Status(error::Code::INVALID_ARGUMENT, -@@ -84,8 +85,7 @@ Status TokenizeByLogits(const absl::string_view& text, - if (chars.size() > logits.dimension(1)) { - return Status(error::Code::INVALID_ARGUMENT, - absl::StrCat("Number of logits, ", logits.dimension(1), -- ", is insufficient for text \"", text, -- "\"")); -+ ", is insufficient for text \"", text, "\"")); - } - - bool last_character_is_break_character = false; -@@ -96,8 +96,7 @@ Status TokenizeByLogits(const absl::string_view& text, - if (!is_break_character) { - const float logit_split = logits(batch_index, i, 0); - const float logit_merge = logits(batch_index, i, 1); -- if ((logit_split > logit_merge) || -- !has_new_token_generated_for_text || -+ if ((logit_split > logit_merge) || !has_new_token_generated_for_text || - (last_character_is_break_character && - force_split_at_break_character)) { - tokens->emplace_back(chars[i].data(), chars[i].length()); -@@ -122,8 +121,7 @@ Status TokenizeByLogits(const absl::string_view& text, - - class TokenizerFromLogitsOp : public OpKernel { - public: -- explicit TokenizerFromLogitsOp(OpKernelConstruction* ctx) -- : OpKernel(ctx) {} -+ explicit TokenizerFromLogitsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - const Tensor* strings; -@@ -132,8 +130,7 @@ class TokenizerFromLogitsOp : public OpKernel { - OP_REQUIRES_OK(ctx, ctx->input("logits", &logits)); - OP_REQUIRES(ctx, strings->dim_size(0) == logits->dim_size(0), - errors::InvalidArgument("Expecting logits to have ", -- strings->dim_size(0), -- " rows, got ", -+ strings->dim_size(0), " rows, got ", - logits->dim_size(0))); - const Tensor* force_split_at_break_character; - OP_REQUIRES_OK(ctx, ctx->input("force_split_at_break_character", -@@ -153,9 +150,9 @@ class TokenizerFromLogitsOp : public OpKernel { - // Iterate through all the values and tokenize them. - const auto& strings_vec = strings->flat<tstring>(); - OP_REQUIRES(ctx, logits_tensor.dimension(0) >= strings_vec.size(), -- errors::Internal("Bad logits dimension #0: ", -- logits_tensor.dimension(0), " < ", -- strings_vec.size())); -+ errors::Internal( -+ "Bad logits dimension #0: ", logits_tensor.dimension(0), -+ " < ", strings_vec.size())); - // Dimension #1 of logits will be checked inside TokenizeByLogits. - OP_REQUIRES(ctx, logits_tensor.dimension(2) == 2, - errors::Internal("Bad logits dimension #2: ", -@@ -164,11 +161,9 @@ class TokenizerFromLogitsOp : public OpKernel { - // Tokenize into tokens and record the offset locations. - int num_tokens = 0; - OP_REQUIRES_OK( -- ctx, TokenizeByLogits( -- strings_vec(i), -- logits_tensor, i, -- force_split_at_break_character_bool, -- &tokens, &begin_offset, &end_offset, &num_tokens)); -+ ctx, TokenizeByLogits(strings_vec(i), logits_tensor, i, -+ force_split_at_break_character_bool, &tokens, -+ &begin_offset, &end_offset, &num_tokens)); - - // Record the row splits. - output_row_splits.push_back(num_tokens + output_row_splits.back()); -@@ -187,10 +182,9 @@ class TokenizerFromLogitsOp : public OpKernel { - auto output_values_vec = output_values->vec<tstring>(); - - Tensor* output_row_splits_tensor; -- OP_REQUIRES_OK(ctx, -- ctx->allocate_output("row_splits", -- TensorShape(output_row_splits_shape), -- &output_row_splits_tensor)); -+ OP_REQUIRES_OK(ctx, ctx->allocate_output( -+ "row_splits", TensorShape(output_row_splits_shape), -+ &output_row_splits_tensor)); - auto output_row_splits_vec = output_row_splits_tensor->vec<int64>(); - - Tensor* start_values; -@@ -226,9 +220,8 @@ class TokenizerFromLogitsOp : public OpKernel { - TF_DISALLOW_COPY_AND_ASSIGN(TokenizerFromLogitsOp); - }; - --REGISTER_KERNEL_BUILDER( -- Name("TokenizerFromLogits").Device(DEVICE_CPU), -- TokenizerFromLogitsOp); -+REGISTER_KERNEL_BUILDER(Name("TokenizerFromLogits").Device(DEVICE_CPU), -+ TokenizerFromLogitsOp); - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc -index 310f8f77ab439..ec712e85adddc 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc -@@ -39,9 +39,9 @@ class UnicodeScriptTokenizeWithOffsetsKernelTest - public: - void MakeOp() { - TF_ASSERT_OK(NodeDefBuilder("tested_op", "UnicodeScriptTokenizeWithOffsets") -- .Input(FakeInput()) -- .Input(FakeInput()) -- .Finalize(node_def())); -+ .Input(FakeInput()) -+ .Input(FakeInput()) -+ .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } - }; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc -index c1670263fa278..86a3be8198e1c 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc -@@ -34,14 +34,13 @@ using tensorflow::Status; - using tensorflow::TensorShape; - using tensorflow::text_kernels_test_util::VectorEq; - --class WhitespaceTokenizeWithOffsetsKernelTest -- : public tensorflow::OpsTestBase { -+class WhitespaceTokenizeWithOffsetsKernelTest : public tensorflow::OpsTestBase { - public: - void MakeOp() { - TF_ASSERT_OK(NodeDefBuilder("tested_op", "WhitespaceTokenizeWithOffsets") -- .Input(FakeInput()) -- .Input(FakeInput()) -- .Finalize(node_def())); -+ .Input(FakeInput()) -+ .Input(FakeInput()) -+ .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } - }; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc -index 45503fe3d08ac..10aed7da5c882 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc -@@ -19,7 +19,12 @@ - - #include "absl/strings/string_view.h" - #include "icu4c/source/common/unicode/appendable.h" -+#include "icu4c/source/common/unicode/bytestream.h" -+#include "icu4c/source/common/unicode/edits.h" -+#include "icu4c/source/common/unicode/normalizer2.h" - #include "icu4c/source/common/unicode/schriter.h" -+#include "icu4c/source/common/unicode/stringoptions.h" -+#include "icu4c/source/common/unicode/stringpiece.h" - #include "icu4c/source/common/unicode/uchar.h" - #include "icu4c/source/common/unicode/ucnv.h" - #include "icu4c/source/common/unicode/ucnv_err.h" -@@ -27,15 +32,9 @@ - #include "icu4c/source/common/unicode/uniset.h" - #include "icu4c/source/common/unicode/unistr.h" - #include "icu4c/source/common/unicode/uset.h" --#include "icu4c/source/common/unicode/utypes.h" --#include "icu4c/source/common/unicode/bytestream.h" --#include "icu4c/source/common/unicode/edits.h" --#include "icu4c/source/common/unicode/normalizer2.h" --#include "icu4c/source/common/unicode/stringoptions.h" --#include "icu4c/source/common/unicode/stringpiece.h" - #include "icu4c/source/common/unicode/utf.h" - #include "icu4c/source/common/unicode/utf8.h" -- -+#include "icu4c/source/common/unicode/utypes.h" - - namespace tensorflow { - namespace text { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h -index 26fcf20c1d862..4fd41d5caef93 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h -@@ -61,8 +61,7 @@ class WhitespaceTokenizer { - // Args: - // * config: A WhitespaceTokenizerConfig which should be created using the - // WhitespaceTokenizerConfigBuilder -- WhitespaceTokenizer(const WhitespaceTokenizerConfig& cfg) -- : config_(cfg) { } -+ WhitespaceTokenizer(const WhitespaceTokenizerConfig& cfg) : config_(cfg) {} - - // Tokenizes a string (or series of character codepoints) by whitespace. - // -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h -index 353bcddb644a3..1d41210c248e7 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h -@@ -17,7 +17,6 @@ - - #include <string> - -- - namespace tensorflow { - namespace text { - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h -index db6b1ac094b66..6a8b7c06e6d1b 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h -@@ -34,10 +34,7 @@ template <tflite::shim::Runtime Rt> - class WhitespaceTokenizeWithOffsetsV2Op - : public tflite::shim::OpKernelShim<WhitespaceTokenizeWithOffsetsV2Op, Rt> { - private: -- enum Inputs { -- kInputValues = 0, -- kInputConfig -- }; -+ enum Inputs { kInputValues = 0, kInputConfig }; - enum Outputs { - kOutputTokens = 0, - kOutputRowSplits, -@@ -114,8 +111,8 @@ absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::ShapeInference( - } - - template <tflite::shim::Runtime Rt> -- absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt> -- ::Invoke(InvokeContext* context) { -+absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::Invoke( -+ InvokeContext* context) { - // Inputs - const auto values_statusor = context->GetInput(kInputValues); - if (!values_statusor.ok()) { -@@ -151,15 +148,12 @@ template <tflite::shim::Runtime Rt> - // Allocate output & fill output tensors. - SH_RETURN_IF_ERROR(FillOutputTensor<std::string, tensorflow::tstring>( - tokens, kOutputTokens, context)); -- SH_RETURN_IF_ERROR(FillOutputTensor<int64_t, int64_t>(row_splits, -- kOutputRowSplits, -- context)); -- SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>(start_offsets, -- kOutputStartOffsets, -- context)); -- SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>(end_offsets, -- kOutputEndOffsets, -- context)); -+ SH_RETURN_IF_ERROR(FillOutputTensor<int64_t, int64_t>( -+ row_splits, kOutputRowSplits, context)); -+ SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>( -+ start_offsets, kOutputStartOffsets, context)); -+ SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>( -+ end_offsets, kOutputEndOffsets, context)); - - return absl::OkStatus(); - } -@@ -167,13 +161,17 @@ template <tflite::shim::Runtime Rt> - template <tflite::shim::Runtime Rt> - template <typename BufferType, typename DType> - absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::FillOutputTensor( -- const std::vector<BufferType>& buffer, const int index, -+ const std::vector<BufferType>& buffer, -+ const int index, - InvokeContext* context) { -- SH_ASSIGN_OR_RETURN(const auto tensorview, context->GetOutput( -- index, tflite::shim::Shape({static_cast<int>(buffer.size())}))); -+ SH_ASSIGN_OR_RETURN( -+ const auto tensorview, -+ context->GetOutput( -+ index, tflite::shim::Shape({static_cast<int>(buffer.size())}))); - auto data = tensorview->template As<DType, 1>(); - // TODO(broken): investigate using memcpy like previous WST -- for (int i = 0; i < buffer.size(); ++i) data(i) = buffer.at(i); -+ for (int i = 0; i < buffer.size(); ++i) -+ data(i) = buffer.at(i); - return absl::OkStatus(); - } - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc -index 8030d410b45c7..e7be52e8b305d 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc -@@ -17,10 +17,10 @@ - #include <gmock/gmock.h> - #include <gtest/gtest.h> - #include "absl/flags/flag.h" --#include "tensorflow/core/platform/env.h" --#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - #include "absl/status/status.h" - #include "absl/status/statusor.h" -+#include "tensorflow/core/platform/env.h" -+#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - - namespace tensorflow { - namespace text { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc -index 4042f1855c4d1..535c69559c8a5 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc -@@ -81,8 +81,10 @@ bool GetSplitUnknownCharacters(OpKernelConstruction* ctx) { - return split_unknown_characters; - } - --Status GetTableHandle(const string& input_name, OpKernelContext* ctx, -- string* container, string* table_handle) { -+Status GetTableHandle(const string& input_name, -+ OpKernelContext* ctx, -+ string* container, -+ string* table_handle) { - { - mutex* mu; - TF_RETURN_IF_ERROR(ctx->input_ref_mutex(input_name, &mu)); -@@ -104,7 +106,8 @@ Status GetTableHandle(const string& input_name, OpKernelContext* ctx, - // Gets the LookupTable stored in the ctx->resource_manager() with key - // passed by attribute with name input_name, returns null if the table - // doesn't exist. --Status GetLookupTable(const string& input_name, OpKernelContext* ctx, -+Status GetLookupTable(const string& input_name, -+ OpKernelContext* ctx, - lookup::LookupInterface** table) { - string container; - string table_handle; -@@ -159,7 +162,8 @@ LookupStatus LookupTableVocab::Contains(const absl::string_view key, - keys.flat<tstring>()(0) = tstring(key.data(), key.size()); - Tensor values(DT_INT64, TensorShape({1})); - auto status = table_->Find(ctx_, keys, &values, default_value_); -- if (!status.ok()) return LookupStatus(status.error_message()); -+ if (!status.ok()) -+ return LookupStatus(status.error_message()); - - if (static_cast<int64>(values.flat<int64>()(0)) != kOutOfVocabValue) { - *value = true; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc -index b063da82b52e4..31bf958dadad0 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc -@@ -29,7 +29,7 @@ TEST(WordpieceTokenizeWithOffsetsOpTest, ShapeFn) { - // WordpieceTokenizeWithOffsets(input_values, vocab_lookup_table) -> - // [output_values, output_row_lengths, start_values, limit_values] - ShapeInferenceTestOp op("WordpieceTokenizeWithOffsets"); -- auto &attr = *op.node_def.mutable_attr(); -+ auto& attr = *op.node_def.mutable_attr(); - - attr["output_row_partition_type"].set_s("row_lengths"); - INFER_OK(op, "?;?", "[?];[?];[?];[?]"); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -index a5a0690618161..e1a791b7963ab 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -@@ -24,10 +24,12 @@ namespace text { - - namespace { - --LookupStatus Lookup(int byte_start, int byte_end, -+LookupStatus Lookup(int byte_start, -+ int byte_end, - const absl::string_view& token, - const std::string& suffix_indicator, -- const WordpieceVocab* vocab_map, bool* in_vocab) { -+ const WordpieceVocab* vocab_map, -+ bool* in_vocab) { - int byte_len = byte_end - byte_start; - absl::string_view substr(token.data() + byte_start, byte_len); - std::string lookup_value; -@@ -45,11 +47,15 @@ LookupStatus Lookup(int byte_start, int byte_end, - // 2) is in the vocab OR if split_unknown_characters is true, is a single - // UTF8 character. - // If no match is found, found_match is set to false. --LookupStatus LongestMatchStartingAt( -- int byte_start, const absl::string_view& token, -- const std::string& suffix_indicator, const int max_chars_per_subtoken, -- bool split_unknown_characters, const WordpieceVocab* vocab_map, -- int* byte_end, bool* found_match, bool* match_is_unknown_character) { -+LookupStatus LongestMatchStartingAt(int byte_start, -+ const absl::string_view& token, -+ const std::string& suffix_indicator, -+ const int max_chars_per_subtoken, -+ bool split_unknown_characters, -+ const WordpieceVocab* vocab_map, -+ int* byte_end, -+ bool* found_match, -+ bool* match_is_unknown_character) { - *match_is_unknown_character = false; - *found_match = false; - const char* token_bytes = token.data(); -@@ -72,7 +78,8 @@ LookupStatus LongestMatchStartingAt( - bool in_vocab; - auto status = Lookup(byte_start, byte_ends[i], token, suffix_indicator, - vocab_map, &in_vocab); -- if (!status.success) return status; -+ if (!status.success) -+ return status; - if (in_vocab) { - *byte_end = byte_ends[i]; - *found_match = true; -@@ -95,7 +102,8 @@ LookupStatus NoTokenFound(const absl::string_view& token, - const std::string& unknown_token, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces) { -+ std::vector<int>* end_offset, -+ int* num_word_pieces) { - begin_offset->push_back(0); - if (use_unknown_token) { - subwords->push_back(unknown_token); -@@ -111,9 +119,12 @@ LookupStatus NoTokenFound(const absl::string_view& token, - - // When a subword is found, this helper function will add the outputs to - // 'subwords', 'begin_offset' and 'end_offset'. --void AddWord(const absl::string_view& token, int byte_start, int byte_end, -+void AddWord(const absl::string_view& token, -+ int byte_start, -+ int byte_end, - const std::string& suffix_indicator, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, - std::vector<int>* end_offset) { - begin_offset->push_back(byte_start); - int len = byte_end - byte_start; -@@ -130,8 +141,10 @@ void AddWord(const absl::string_view& token, int byte_start, int byte_end, - - // Adds a single unknown character subword, found when split_unknown_characters - // is true. --void AddUnknownCharacter(const absl::string_view& token, int byte_start, -- int byte_end, const std::string& suffix_indicator, -+void AddUnknownCharacter(const absl::string_view& token, -+ int byte_start, -+ int byte_end, -+ const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - std::vector<std::string>* subwords, -@@ -158,13 +171,18 @@ void AddUnknownCharacter(const absl::string_view& token, int byte_start, - } - } - --LookupStatus TokenizeL2RGreedy( -- const absl::string_view& token, const int max_bytes_per_token, -- const int max_chars_per_subtoken, const std::string& suffix_indicator, -- bool use_unknown_token, const std::string& unknown_token, -- bool split_unknown_characters, const WordpieceVocab* vocab_map, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces) { -+LookupStatus TokenizeL2RGreedy(const absl::string_view& token, -+ const int max_bytes_per_token, -+ const int max_chars_per_subtoken, -+ const std::string& suffix_indicator, -+ bool use_unknown_token, -+ const std::string& unknown_token, -+ bool split_unknown_characters, -+ const WordpieceVocab* vocab_map, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, -+ std::vector<int>* end_offset, -+ int* num_word_pieces) { - std::vector<std::string> candidate_subwords; - std::vector<int> candidate_begin_offsets; - std::vector<int> candidate_end_offsets; -@@ -177,7 +195,8 @@ LookupStatus TokenizeL2RGreedy( - byte_start, token, suffix_indicator, max_chars_per_subtoken, - split_unknown_characters, vocab_map, &byte_end, &found_subword, - &match_is_unknown_character); -- if (!status.success) return status; -+ if (!status.success) -+ return status; - if (found_subword) { - if (match_is_unknown_character) { - AddUnknownCharacter(token, byte_start, byte_end, suffix_indicator, -@@ -208,13 +227,18 @@ LookupStatus TokenizeL2RGreedy( - - } // namespace - --LookupStatus WordpieceTokenize( -- const absl::string_view& token, const int max_bytes_per_token, -- const int max_chars_per_subtoken, const std::string& suffix_indicator, -- bool use_unknown_token, const std::string& unknown_token, -- bool split_unknown_characters, const WordpieceVocab* vocab_map, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces) { -+LookupStatus WordpieceTokenize(const absl::string_view& token, -+ const int max_bytes_per_token, -+ const int max_chars_per_subtoken, -+ const std::string& suffix_indicator, -+ bool use_unknown_token, -+ const std::string& unknown_token, -+ bool split_unknown_characters, -+ const WordpieceVocab* vocab_map, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, -+ std::vector<int>* end_offset, -+ int* num_word_pieces) { - int token_len = token.size(); - if (token_len > max_bytes_per_token) { - begin_offset->push_back(0); -@@ -234,12 +258,16 @@ LookupStatus WordpieceTokenize( - begin_offset, end_offset, num_word_pieces); - } - --LookupStatus WordpieceTokenize( -- const absl::string_view& token, const int max_bytes_per_token, -- const std::string& suffix_indicator, bool use_unknown_token, -- const std::string& unknown_token, const WordpieceVocab* vocab_map, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces) { -+LookupStatus WordpieceTokenize(const absl::string_view& token, -+ const int max_bytes_per_token, -+ const std::string& suffix_indicator, -+ bool use_unknown_token, -+ const std::string& unknown_token, -+ const WordpieceVocab* vocab_map, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, -+ std::vector<int>* end_offset, -+ int* num_word_pieces) { - return WordpieceTokenize(token, max_bytes_per_token, - /* max_chars_per_subtoken= */ 0, suffix_indicator, - use_unknown_token, unknown_token, -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h -index 0547198a50f5b..464386c7db2b1 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h -@@ -39,22 +39,31 @@ class WordpieceVocab { - bool* value) const = 0; - }; - --LookupStatus WordpieceTokenize( -- const absl::string_view& token, const int max_bytes_per_token, -- const int max_chars_per_subtoken, const std::string& suffix_indicator, -- bool use_unknown_token, const std::string& unknown_token, -- bool split_unknown_characters, const WordpieceVocab* vocab_map, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces); -+LookupStatus WordpieceTokenize(const absl::string_view& token, -+ const int max_bytes_per_token, -+ const int max_chars_per_subtoken, -+ const std::string& suffix_indicator, -+ bool use_unknown_token, -+ const std::string& unknown_token, -+ bool split_unknown_characters, -+ const WordpieceVocab* vocab_map, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, -+ std::vector<int>* end_offset, -+ int* num_word_pieces); - - // As above but with `max_bytes_per_subtoken` unknown, - // and split_unknown_characters=false. (For backwards compatability.) --LookupStatus WordpieceTokenize( -- const absl::string_view& token, const int max_bytes_per_token, -- const std::string& suffix_indicator, bool use_unknown_token, -- const std::string& unknown_token, const WordpieceVocab* vocab_map, -- std::vector<std::string>* subwords, std::vector<int>* begin_offset, -- std::vector<int>* end_offset, int* num_word_pieces); -+LookupStatus WordpieceTokenize(const absl::string_view& token, -+ const int max_bytes_per_token, -+ const std::string& suffix_indicator, -+ bool use_unknown_token, -+ const std::string& unknown_token, -+ const WordpieceVocab* vocab_map, -+ std::vector<std::string>* subwords, -+ std::vector<int>* begin_offset, -+ std::vector<int>* end_offset, -+ int* num_word_pieces); - - } // namespace text - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc -index 0d51b4d60f89d..1c271abc32858 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc -@@ -25,7 +25,7 @@ REGISTER_OP("MaxSpanningTree") - .Input("scores: T") - .Output("max_scores: T") - .Output("argmax_sources: int32") -- .SetShapeFn([](tensorflow::shape_inference::InferenceContext *context) { -+ .SetShapeFn([](tensorflow::shape_inference::InferenceContext* context) { - tensorflow::shape_inference::ShapeHandle num_nodes; - tensorflow::shape_inference::ShapeHandle scores; - TF_RETURN_IF_ERROR(context->WithRank(context->input(0), 1, &num_nodes)); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc -index b896a47f94cbe..ac9a3ff90175b 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc -@@ -84,8 +84,8 @@ Status RougeLShapeFn(InferenceContext* c) { - TF_RETURN_IF_ERROR(c->WithRank(beta_shape, 0, &unused)); - - ShapeHandle output_nrows_plus_one; -- TF_RETURN_IF_ERROR(c->Merge(hyp_splits_shape, ref_splits_shape, -- &output_nrows_plus_one)); -+ TF_RETURN_IF_ERROR( -+ c->Merge(hyp_splits_shape, ref_splits_shape, &output_nrows_plus_one)); - - // Output shape is a 1-D tensor with size equal to number of splits minus 1. - DimensionHandle dim; -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc -index f04bf70b6a701..718ca926375d9 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc -@@ -86,7 +86,7 @@ Status SplitMergeTokenizeWithOffsetsShapeFn(InferenceContext* c) { - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(num_input_values, 1, &num_splits)); -- c->set_output(1, c->Vector(num_splits)); // row_splits -+ c->set_output(1, c->Vector(num_splits)); // row_splits - c->set_output(2, c->UnknownShapeOfRank(1)); // start_values - c->set_output(3, c->UnknownShapeOfRank(1)); // limit_values - return Status::OK(); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc -index b34a46e71ae93..93971119f22b4 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc -@@ -112,7 +112,7 @@ Status TokenizerFromLogitsShapeFn(InferenceContext* c) { - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(num_strings, 1, &num_splits)); -- c->set_output(1, c->Vector(num_splits)); // row_splits -+ c->set_output(1, c->Vector(num_splits)); // row_splits - c->set_output(2, c->UnknownShapeOfRank(1)); // start_values - c->set_output(3, c->UnknownShapeOfRank(1)); // limit_values - return Status::OK(); -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc -index 6a34995237fdc..aac35a6d6f7ca 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc -@@ -32,8 +32,9 @@ REGISTER_OP("WordpieceTokenizeWithOffsets") - .Attr("use_unknown_token: bool") - .Attr("unknown_token: string") - .Attr("split_unknown_characters: bool = false") -- .Attr("output_row_partition_type: {'row_lengths', 'row_splits'}" -- " = 'row_lengths'") -+ .Attr( -+ "output_row_partition_type: {'row_lengths', 'row_splits'}" -+ " = 'row_lengths'") - .Output("output_values: string") - .Output("output_row_lengths: int64") - .Output("start_values: int64") -@@ -96,8 +97,8 @@ Status WordpieceTokenizeWithOffsetsShapeFn(InferenceContext* c) { - string output_row_partition_type; - TF_RETURN_IF_ERROR(c->WithRank(input_values, 1, &input_values)); - TF_RETURN_IF_ERROR(c->WithRank(vocab_lookup_table, 0, &vocab_lookup_table)); -- TF_RETURN_IF_ERROR(c->GetAttr("output_row_partition_type", -- &output_row_partition_type)); -+ TF_RETURN_IF_ERROR( -+ c->GetAttr("output_row_partition_type", &output_row_partition_type)); - DimensionHandle num_input_values = c->Dim(input_values, 0); - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - if (output_row_partition_type == "row_lengths") { -@@ -112,5 +113,4 @@ Status WordpieceTokenizeWithOffsetsShapeFn(InferenceContext* c) { - return Status::OK(); - } - -- - } // namespace tensorflow -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -index 116d420dad2d5..6cd95d3eb865f 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -@@ -12,8 +12,8 @@ - // See the License for the specific language governing permissions and - // limitations under the License. - --#include <stdexcept> - #include <iostream> -+#include <stdexcept> - #include "include/pybind11/pybind11.h" - #include "include/pybind11/stl.h" - #include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" -@@ -24,11 +24,10 @@ namespace text { - namespace py = pybind11; - - PYBIND11_MODULE(pywrap_whitespace_tokenizer_config_builder, m) { -- m.def("build_whitespace_tokenizer_config", -- []() { -- const auto result = BuildWhitespaceTokenizerConfig(); -- return py::bytes(result); -- }); -+ m.def("build_whitespace_tokenizer_config", []() { -+ const auto result = BuildWhitespaceTokenizerConfig(); -+ return py::bytes(result); -+ }); - } - - } // namespace text -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -index 4b7792058367d..138ef9c3c542b 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -@@ -12,7 +12,6 @@ - // See the License for the specific language governing permissions and - // limitations under the License. - -- - #include "include/pybind11/pybind11.h" - #include "include/pybind11/pytypes.h" - #include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h" -@@ -26,11 +25,8 @@ PYBIND11_MODULE(tflite_registrar, m) { - A module with a Python wrapper for TFLite TFText ops. - )pbdoc"; - m.attr("_allowed_symbols") = pybind11::make_tuple( -- "AddFastWordpieceTokenize", -- "AddFastWordpieceDetokenize", -- "AddNgramsStringJoin", -- "AddRaggedTensorToTensor", -- "AddWhitespaceTokenize", -+ "AddFastWordpieceTokenize", "AddFastWordpieceDetokenize", -+ "AddNgramsStringJoin", "AddRaggedTensorToTensor", "AddWhitespaceTokenize", - "SELECT_TFTEXT_OPS"); - m.def( - "AddFastWordpieceTokenize", --- -2.34.1.400.ga245620fadb-goog -
diff --git a/third_party/tensorflow-text/patches/0002-use-chromium-third_party-icu.patch b/third_party/tensorflow-text/patches/0002-use-chromium-third_party-icu.patch deleted file mode 100644 index 23d88d5..0000000 --- a/third_party/tensorflow-text/patches/0002-use-chromium-third_party-icu.patch +++ /dev/null
@@ -1,25 +0,0 @@ -From b07d6a5ff66c3a4ffcc03c7ff24e888228d2b5fb Mon Sep 17 00:00:00 2001 -From: Robert Ogden <robertogden@chromium.org> -Date: Wed, 8 Dec 2021 10:45:07 -0800 -Subject: [PATCH 2/5] use chromium third_party icu - ---- - .../src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -index e1a791b7963ab..b125daa2efdb3 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc -@@ -17,7 +17,7 @@ - #include "absl/strings/str_cat.h" - #include "absl/strings/str_join.h" - #include "absl/strings/string_view.h" --#include "icu4c/source/common/unicode/utf8.h" -+#include "third_party/icu/source/common/unicode/utf8.h" - - namespace tensorflow { - namespace text { --- -2.34.1.400.ga245620fadb-goog -
diff --git a/third_party/tensorflow-text/patches/0004-rm-deps-in-non-built-files.patch b/third_party/tensorflow-text/patches/0004-rm-deps-in-non-built-files.patch deleted file mode 100644 index bb241196..0000000 --- a/third_party/tensorflow-text/patches/0004-rm-deps-in-non-built-files.patch +++ /dev/null
@@ -1,115 +0,0 @@ -From 0fcdbd30395dd9310bb86f0adb9790ae9240c9a8 Mon Sep 17 00:00:00 2001 -From: Robert Ogden <robertogden@chromium.org> -Date: Wed, 8 Dec 2021 10:47:16 -0800 -Subject: [PATCH 4/5] rm deps in non-built files - ---- - .../tensorflow_text/core/kernels/darts_clone_trie_builder.cc | 2 +- - .../src/tensorflow_text/core/kernels/ngrams_tflite_test.cc | 2 +- - .../core/kernels/ragged_tensor_to_tensor_tflite.cc | 2 +- - .../core/kernels/ragged_tensor_to_tensor_tflite_test.cc | 2 +- - .../pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc | 4 ++-- - .../pybinds/pywrap_whitespace_tokenizer_config_builder.cc | 4 ++-- - .../src/tensorflow_text/core/pybinds/tflite_registrar.cc | 4 ++-- - 7 files changed, 10 insertions(+), 10 deletions(-) - -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -index 87035a835ae5e..73ac94f34e058 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc -@@ -18,7 +18,7 @@ - #include <numeric> - - #include "absl/container/flat_hash_set.h" --#include "include/darts.h" -+// #include "include/darts.h" - - namespace tensorflow { - namespace text { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -index 3c97969eb4e36..af43e36ed3f2e 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc -@@ -34,7 +34,7 @@ limitations under the License. - - #include <gmock/gmock.h> - #include <gtest/gtest.h> --#include "flatbuffers/flexbuffers.h" -+// #include "flatbuffers/flexbuffers.h" - #include "tensorflow/lite/kernels/test_util.h" - #include "tensorflow/lite/schema/schema_generated.h" - #include "tensorflow/lite/string_util.h" -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -index a44e18f8e0534..141147a7713e9 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc -@@ -30,7 +30,7 @@ limitations under the License. - #include <cstdint> - #include <memory> - --#include "flatbuffers/flexbuffers.h" -+// #include "flatbuffers/flexbuffers.h" - #include "tensorflow/core/util/ragged_to_dense_util_common.h" - #include "tensorflow/lite/c/common.h" - #include "tensorflow/lite/kernels/internal/types.h" -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -index 5f74f683c4e36..8e10c51ce48db 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc -@@ -33,7 +33,7 @@ limitations under the License. - - #include <gmock/gmock.h> - #include <gtest/gtest.h> --#include "flatbuffers/flexbuffers.h" -+// #include "flatbuffers/flexbuffers.h" - #include "tensorflow/lite/c/common.h" - #include "tensorflow/lite/interpreter.h" - #include "tensorflow/lite/kernels/internal/tensor.h" -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc -index c1bd1ce2c8066..43fa6497fbed4 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc -@@ -14,8 +14,8 @@ - - #include <stdexcept> - --#include "include/pybind11/pybind11.h" --#include "include/pybind11/stl.h" -+// #include "include/pybind11/pybind11.h" -+// #include "include/pybind11/stl.h" - #include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h" - - namespace tensorflow { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -index 6cd95d3eb865f..66b4062fbf2e7 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc -@@ -14,8 +14,8 @@ - - #include <iostream> - #include <stdexcept> --#include "include/pybind11/pybind11.h" --#include "include/pybind11/stl.h" -+// #include "include/pybind11/pybind11.h" -+// #include "include/pybind11/stl.h" - #include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - - namespace tensorflow { -diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -index 138ef9c3c542b..99b5cc847b58e 100644 ---- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -+++ b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc -@@ -12,8 +12,8 @@ - // See the License for the specific language governing permissions and - // limitations under the License. - --#include "include/pybind11/pybind11.h" --#include "include/pybind11/pytypes.h" -+// #include "include/pybind11/pybind11.h" -+// #include "include/pybind11/pytypes.h" - #include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h" - #include "tensorflow_text/core/kernels/ngrams_tflite.h" - #include "tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.h" --- -2.34.1.400.ga245620fadb-goog -
diff --git a/third_party/tensorflow-text/patches/0005-set-exec-bit-on-file-with-shebang.patch b/third_party/tensorflow-text/patches/0005-set-exec-bit-on-file-with-shebang.patch deleted file mode 100644 index 77c5d12..0000000 --- a/third_party/tensorflow-text/patches/0005-set-exec-bit-on-file-with-shebang.patch +++ /dev/null
@@ -1,16 +0,0 @@ -From 8bab46f45e929c4a540cbc2337ead3217bc8f5c0 Mon Sep 17 00:00:00 2001 -From: Robert Ogden <robertogden@chromium.org> -Date: Wed, 8 Dec 2021 10:54:30 -0800 -Subject: [PATCH 5/5] set exec bit on file with shebang - ---- - third_party/tensorflow-text/src/oss_scripts/run_tests.sh | 0 - 1 file changed, 0 insertions(+), 0 deletions(-) - mode change 100644 => 100755 third_party/tensorflow-text/src/oss_scripts/run_tests.sh - -diff --git a/third_party/tensorflow-text/src/oss_scripts/run_tests.sh b/third_party/tensorflow-text/src/oss_scripts/run_tests.sh -old mode 100644 -new mode 100755 --- -2.34.1.400.ga245620fadb-goog -
diff --git a/third_party/tensorflow-text/shims/icu4c/source/common/unicode/utf8.h b/third_party/tensorflow-text/shims/icu4c/source/common/unicode/utf8.h new file mode 100644 index 0000000..aea2be0 --- /dev/null +++ b/third_party/tensorflow-text/shims/icu4c/source/common/unicode/utf8.h
@@ -0,0 +1,10 @@ +// Copyright 2023 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef THIRD_PARTY_TENSORFLOW_TEXT_SHIMS_ICU4C_SOURCE_COMMON_UNICODE_UTF8_H_ +#define THIRD_PARTY_TENSORFLOW_TEXT_SHIMS_ICU4C_SOURCE_COMMON_UNICODE_UTF8_H_ + +#include "third_party/icu/source/common/unicode/utf8.h" + +#endif // THIRD_PARTY_TENSORFLOW_TEXT_SHIMS_ICU4C_SOURCE_COMMON_UNICODE_UTF8_H_
diff --git a/third_party/tensorflow-text/src/.gitignore b/third_party/tensorflow-text/src/.gitignore deleted file mode 100644 index f0f9ef7..0000000 --- a/third_party/tensorflow-text/src/.gitignore +++ /dev/null
@@ -1,8 +0,0 @@ -# The .bazelrc file is autogenerated by oss_scripts/configure.sh. -.bazelrc - -# Do not track bazels output directories. -bazel-* - -# Do not track generated wheels. -*.whl
diff --git a/third_party/tensorflow-text/src/CONTRIBUTING.md b/third_party/tensorflow-text/src/CONTRIBUTING.md deleted file mode 100644 index db177d4a..0000000 --- a/third_party/tensorflow-text/src/CONTRIBUTING.md +++ /dev/null
@@ -1,28 +0,0 @@ -# How to Contribute - -We'd love to accept your patches and contributions to this project. There are -just a few small guidelines you need to follow. - -## Contributor License Agreement - -Contributions to this project must be accompanied by a Contributor License -Agreement. You (or your employer) retain the copyright to your contribution; -this simply gives us permission to use and redistribute your contributions as -part of the project. Head over to <https://cla.developers.google.com/> to see -your current agreements on file or to sign a new one. - -You generally only need to submit a CLA once, so if you've already submitted one -(even if it was for a different project), you probably don't need to do it -again. - -## Code reviews - -All submissions, including submissions by project members, require review. We -use GitHub pull requests for this purpose. Consult -[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more -information on using pull requests. - -## Community Guidelines - -This project follows -[Google's Open Source Community Guidelines](https://opensource.google.com/conduct/).
diff --git a/third_party/tensorflow-text/src/README.md b/third_party/tensorflow-text/src/README.md deleted file mode 100644 index adb81c84..0000000 --- a/third_party/tensorflow-text/src/README.md +++ /dev/null
@@ -1,345 +0,0 @@ -<div align="center"> - <img src="https://raw.githubusercontent.com/tensorflow/text/master/docs/include/tftext.png" width="60%"><br><br> -</div> - ------------------ - -[](https://badge.fury.io/py/tensorflow-text) -[](https://badge.fury.io/py/tensorflow-text-nightly) -[](https://pypi.org/project/tensorflow-text/) -[](https://github.com/tensorflow/text/blob/master/docs/api_docs/python/index.md) -[](CONTRIBUTING.md) -[](https://opensource.org/licenses/Apache-2.0) - -<!-- TODO(broken): Uncomment when badges are made public. -### Continuous Integration Test Status - -| Build | Status | -| --- | --- | -| **Linux** | [] | -| **MacOS** | [] | -| **Windows** | [] | ---> - -# TensorFlow Text - Text processing in Tensorflow - -IMPORTANT: When installing TF Text with `pip install`, please note the version -of TensorFlow you are running, as you should specify the corresponding minor -version of TF Text (eg. for tensorflow==2.3.x use tensorflow_text==2.3.x). - -## INDEX -* [Introduction](#introduction) -* [Documentation](#documentation) -* [Unicode](#unicode) -* [Normalization](#normalization) -* [Tokenization](#tokenization) - * [Whitespace Tokenizer](#whitespacetokenizer) - * [UnicodeScript Tokenizer](#unicodescripttokenizer) - * [Unicode split](#unicode-split) - * [Offsets](#offsets) - * [TF.Data Example](#tfdata-example) - * [Keras API](#keras-api) -* [Other Text Ops](#other-text-ops) - * [Wordshape](#wordshape) - * [N-grams & Sliding Window](#n-grams--sliding-window) -* [Installation](#installation) - * [Install using PIP](#install-using-pip) - * [Build from source steps:](#build-from-source-steps) - -## Introduction - -TensorFlow Text provides a collection of text related classes and ops ready to -use with TensorFlow 2.0. The library can perform the preprocessing regularly -required by text-based models, and includes other features useful for sequence -modeling not provided by core TensorFlow. - -The benefit of using these ops in your text preprocessing is that they are done -in the TensorFlow graph. You do not need to worry about tokenization in -training being different than the tokenization at inference, or managing -preprocessing scripts. - -## Documentation - -Please visit [http://tensorflow.org/text](http://tensorflow.org/text) for all -documentation. This site includes API docs, guides for working with TensorFlow -Text, as well as tutorials for building specific models. - -## Unicode - -Most ops expect that the strings are in UTF-8. If you're using a different -encoding, you can use the core tensorflow transcode op to transcode into UTF-8. -You can also use the same op to coerce your string to structurally valid UTF-8 -if your input could be invalid. - -```python -docs = tf.constant([u'Everything not saved will be lost.'.encode('UTF-16-BE'), - u'Sad☹'.encode('UTF-16-BE')]) -utf8_docs = tf.strings.unicode_transcode(docs, input_encoding='UTF-16-BE', - output_encoding='UTF-8') -``` - -## Normalization - -When dealing with different sources of text, it's important that the same words -are recognized to be identical. A common technique for case-insensitive matching -in Unicode is case folding (similar to lower-casing). (Note that case folding -internally applies NFKC normalization.) - -We also provide Unicode normalization ops for transforming strings into a -canonical representation of characters, with Normalization Form KC being the -default ([NFKC](http://unicode.org/reports/tr15/)). - -```python -print(text.case_fold_utf8(['Everything not saved will be lost.'])) -print(text.normalize_utf8(['Äffin'])) -print(text.normalize_utf8(['Äffin'], 'nfkd')) -``` - -```sh -tf.Tensor(['everything not saved will be lost.'], shape=(1,), dtype=string) -tf.Tensor(['\xc3\x84ffin'], shape=(1,), dtype=string) -tf.Tensor(['A\xcc\x88ffin'], shape=(1,), dtype=string) -``` - -## Tokenization - -Tokenization is the process of breaking up a string into tokens. Commonly, these -tokens are words, numbers, and/or punctuation. - -The main interfaces are `Tokenizer` and `TokenizerWithOffsets` which each have a -single method `tokenize` and `tokenizeWithOffsets` respectively. There are -multiple implementing tokenizers available now. Each of these implement -`TokenizerWithOffsets` (which extends `Tokenizer`) which includes an option for -getting byte offsets into the original string. This allows the caller to know -the bytes in the original string the token was created from. - -All of the tokenizers return RaggedTensors with the inner-most dimension of -tokens mapping to the original individual strings. As a result, the resulting -shape's rank is increased by one. Please review the ragged tensor guide if you -are unfamiliar with them. https://www.tensorflow.org/guide/ragged_tensors - -### WhitespaceTokenizer - -This is a basic tokenizer that splits UTF-8 strings on ICU defined whitespace -characters (eg. space, tab, new line). - -```python -tokenizer = text.WhitespaceTokenizer() -tokens = tokenizer.tokenize(['everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) -print(tokens.to_list()) -``` - -```sh -[['everything', 'not', 'saved', 'will', 'be', 'lost.'], ['Sad\xe2\x98\xb9']] -``` - -### UnicodeScriptTokenizer - -This tokenizer splits UTF-8 strings based on Unicode script boundaries. The -script codes used correspond to International Components for Unicode (ICU) -UScriptCode values. See: http://icu-project.org/apiref/icu4c/uscript_8h.html - -In practice, this is similar to the `WhitespaceTokenizer` with the most apparent -difference being that it will split punctuation (USCRIPT_COMMON) from language -texts (eg. USCRIPT_LATIN, USCRIPT_CYRILLIC, etc) while also separating language -texts from each other. - -```python -tokenizer = text.UnicodeScriptTokenizer() -tokens = tokenizer.tokenize(['everything not saved will be lost.', - u'Sad☹'.encode('UTF-8')]) -print(tokens.to_list()) -``` - -```sh -[['everything', 'not', 'saved', 'will', 'be', 'lost', '.'], - ['Sad', '\xe2\x98\xb9']] -``` - -### Unicode split - -When tokenizing languages without whitespace to segment words, it is common to -just split by character, which can be accomplished using the -[unicode_split](https://www.tensorflow.org/api_docs/python/tf/strings/unicode_split) -op found in core. - -```python -tokens = tf.strings.unicode_split([u"仅今年前".encode('UTF-8')], 'UTF-8') -print(tokens.to_list()) -``` - -```sh -[['\xe4\xbb\x85', '\xe4\xbb\x8a', '\xe5\xb9\xb4', '\xe5\x89\x8d']] -``` - -### Offsets - -When tokenizing strings, it is often desired to know where in the original -string the token originated from. For this reason, each tokenizer which -implements `TokenizerWithOffsets` has a *tokenize_with_offsets* method that will -return the byte offsets along with the tokens. The start_offsets lists the bytes -in the original string each token starts at (inclusive), and the end_offsets -lists the bytes where each token ends at (exclusive, i.e., first byte *after* -the token). - -```python -tokenizer = text.UnicodeScriptTokenizer() -(tokens, start_offsets, end_offsets) = tokenizer.tokenize_with_offsets( - ['everything not saved will be lost.', u'Sad☹'.encode('UTF-8')]) -print(tokens.to_list()) -print(start_offsets.to_list()) -print(end_offsets.to_list()) -``` - -```sh -[['everything', 'not', 'saved', 'will', 'be', 'lost', '.'], - ['Sad', '\xe2\x98\xb9']] -[[0, 11, 15, 21, 26, 29, 33], [0, 3]] -[[10, 14, 20, 25, 28, 33, 34], [3, 6]] -``` - -### TF.Data Example - -Tokenizers work as expected with the tf.data API. A simple example is provided -below. - -```python -docs = tf.data.Dataset.from_tensor_slices([['Never tell me the odds.'], - ["It's a trap!"]]) -tokenizer = text.WhitespaceTokenizer() -tokenized_docs = docs.map(lambda x: tokenizer.tokenize(x)) -iterator = tokenized_docs.make_one_shot_iterator() -print(iterator.get_next().to_list()) -print(iterator.get_next().to_list()) -``` - -```sh -[['Never', 'tell', 'me', 'the', 'odds.']] -[["It's", 'a', 'trap!']] -``` - -### Keras API - -When you use different tokenizers and ops to preprocess your data, the resulting -outputs are Ragged Tensors. The Keras API makes it easy now to train a model -using Ragged Tensors without having to worry about padding or masking the data, -by either using the ToDense layer which handles all of these for you or relying -on Keras built-in layers support for natively working on ragged data. - -```python -model = tf.keras.Sequential([ - tf.keras.layers.InputLayer(input_shape=(None,), dtype='int32', ragged=True) - text.keras.layers.ToDense(pad_value=0, mask=True), - tf.keras.layers.Embedding(100, 16), - tf.keras.layers.LSTM(32), - tf.keras.layers.Dense(32, activation='relu'), - tf.keras.layers.Dense(1, activation='sigmoid') -]) -``` - -## Other Text Ops - -TF.Text packages other useful preprocessing ops. We will review a couple below. - -### Wordshape - -A common feature used in some natural language understanding models is to see -if the text string has a certain property. For example, a sentence breaking -model might contain features which check for word capitalization or if a -punctuation character is at the end of a string. - -Wordshape defines a variety of useful regular expression based helper functions -for matching various relevant patterns in your input text. Here are a few -examples. - -```python -tokenizer = text.WhitespaceTokenizer() -tokens = tokenizer.tokenize(['Everything not saved will be lost.', - u'Sad☹'.encode('UTF-8')]) - -# Is capitalized? -f1 = text.wordshape(tokens, text.WordShape.HAS_TITLE_CASE) -# Are all letters uppercased? -f2 = text.wordshape(tokens, text.WordShape.IS_UPPERCASE) -# Does the token contain punctuation? -f3 = text.wordshape(tokens, text.WordShape.HAS_SOME_PUNCT_OR_SYMBOL) -# Is the token a number? -f4 = text.wordshape(tokens, text.WordShape.IS_NUMERIC_VALUE) - -print(f1.to_list()) -print(f2.to_list()) -print(f3.to_list()) -print(f4.to_list()) -``` - -```sh -[[True, False, False, False, False, False], [True]] -[[False, False, False, False, False, False], [False]] -[[False, False, False, False, False, True], [True]] -[[False, False, False, False, False, False], [False]] -``` - -### N-grams & Sliding Window - -N-grams are sequential words given a sliding window size of *n*. When combining -the tokens, there are three reduction mechanisms supported. For text, you would -want to use `Reduction.STRING_JOIN` which appends the strings to each other. -The default separator character is a space, but this can be changed with the -string_separater argument. - -The other two reduction methods are most often used with numerical values, and -these are `Reduction.SUM` and `Reduction.MEAN`. - -```python -tokenizer = text.WhitespaceTokenizer() -tokens = tokenizer.tokenize(['Everything not saved will be lost.', - u'Sad☹'.encode('UTF-8')]) - -# Ngrams, in this case bi-gram (n = 2) -bigrams = text.ngrams(tokens, 2, reduction_type=text.Reduction.STRING_JOIN) - -print(bigrams.to_list()) -``` - -```sh -[['Everything not', 'not saved', 'saved will', 'will be', 'be lost.'], []] -``` - -## Installation - -### Install using PIP - -When installing TF Text with `pip install`, please note the version -of TensorFlow you are running, as you should specify the corresponding version -of TF Text. For example, if you're using TF 2.0, install the 2.0 version of TF -Text, and if you're using TF 1.15, install the 1.15 version of TF Text. - -```bash -pip install -U tensorflow-text==<version> -``` - -### Build from source steps: - -Note that TF Text needs to be built in the same environment as TensorFlow. Thus, -if you manually build TF Text, it is highly recommended that you also build -TensorFlow. - -If building on MacOS, you must have coreutils installed. It is probably easiest -to do with Homebrew. - -1. [build and install TensorFlow](https://www.tensorflow.org/install/source). -1. Clone the TF Text repo: - ```Shell - git clone https://github.com/tensorflow/text.git - cd text - ``` -1. Run the build script to create a pip package: - ```Shell - ./oss_scripts/run_build.sh - ``` - After this step, there should be a `*.whl` file in current directory. File name similar to `tensorflow_text-2.5.0rc0-cp38-cp38-linux_x86_64.whl`. -1. Install the package to environment: - ```Shell - pip install ./tensorflow_text-*-*-*-os_platform.whl - ```
diff --git a/third_party/tensorflow-text/src/WORKSPACE b/third_party/tensorflow-text/src/WORKSPACE deleted file mode 100644 index 9228f59..0000000 --- a/third_party/tensorflow-text/src/WORKSPACE +++ /dev/null
@@ -1,110 +0,0 @@ -workspace(name = "org_tensorflow_text") - -load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") - -http_archive( - name = "icu", - strip_prefix = "icu-release-64-2", - sha256 = "dfc62618aa4bd3ca14a3df548cd65fe393155edd213e49c39f3a30ccd618fc27", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/unicode-org/icu/archive/release-64-2.zip", - "https://github.com/unicode-org/icu/archive/release-64-2.zip", - ], - build_file = "//third_party/icu:BUILD.bzl", - patches = ["//third_party/icu:udata.patch"], - patch_args = ["-p1"], -) - -http_archive( - name = "com_google_sentencepiece", - strip_prefix = "sentencepiece-1.0.0", - sha256 = "c05901f30a1d0ed64cbcf40eba08e48894e1b0e985777217b7c9036cac631346", - urls = [ - "https://github.com/google/sentencepiece/archive/1.0.0.zip" - ], - patches = ["//third_party/sentencepiece:processor.patch"], - patch_args = ["-p1"], -) - -http_archive( - name = "darts_clone", - build_file = "//third_party/darts_clone:BUILD.bzl", - sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c", - strip_prefix = "darts-clone-e40ce4627526985a7767444b6ed6893ab6ff8983", - urls = [ - "https://github.com/s-yata/darts-clone/archive/e40ce4627526985a7767444b6ed6893ab6ff8983.zip", - ], -) - -http_archive( - name = "io_bazel_rules_closure", - sha256 = "5b00383d08dd71f28503736db0500b6fb4dda47489ff5fc6bed42557c07c6ba9", - strip_prefix = "rules_closure-308b05b2419edb5c8ee0471b67a40403df940149", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", - "https://github.com/bazelbuild/rules_closure/archive/308b05b2419edb5c8ee0471b67a40403df940149.tar.gz", # 2019-06-13 - ], -) - -# NOTE: according to -# https://docs.bazel.build/versions/master/external.html#transitive-dependencies -# we should list the transitive dependencies of @org_tensorflow_hub in this -# WORKSPACE file. Still, all of them are already listed by tf_workspace() which -# is called later in this file. -http_archive( - name = "org_tensorflow_hub", - strip_prefix = "hub-0.8.0", - sha256 = "968af30c448d51c36501b68df2c916fb4a61007db3240adc9248fa3a9be2da6f", - urls = [ - "https://github.com/tensorflow/hub/archive/v0.8.0.zip" - ], -) - -http_archive( - name = "org_tensorflow", - strip_prefix = "tensorflow-2.7.0", - sha256 = "249b48ddee927801c7a4f8e5442cf1a3c860f6f46b85a2ff7a78b501507dd561", - urls = [ - "https://github.com/tensorflow/tensorflow/archive/v2.7.0.zip" - ], -) - -http_archive( - name = "org_tensorflow_datasets", - sha256 = "c6ff4e2306387f0ca45d4f616d9a1c5e79e02ef16d0a8958230a8049ea07fc98", - strip_prefix = "datasets-3.2.0", - urls = [ - "https://github.com/tensorflow/datasets/archive/v3.2.0.zip", - ], -) - -http_archive( - name = "pybind11", - strip_prefix = "pybind11-2.6.0", - sha256 = "90b705137b69ee3b5fc655eaca66d0dc9862ea1759226f7ccd3098425ae69571", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/pybind/pybind11/archive/v2.6.0.tar.gz", - "https://github.com/pybind/pybind11/archive/v2.6.0.tar.gz", - ], - build_file = "//third_party/pybind11:BUILD.bzl", -) - -# Initialize TensorFlow dependencies. -load("@org_tensorflow//tensorflow:workspace3.bzl", "tf_workspace3") -tf_workspace3() -load("@org_tensorflow//tensorflow:workspace2.bzl", "tf_workspace2") -tf_workspace2() -load("@org_tensorflow//tensorflow:workspace1.bzl", "tf_workspace1") -tf_workspace1() -load("@org_tensorflow//tensorflow:workspace0.bzl", "tf_workspace0") -tf_workspace0() - -load("//third_party/tensorflow:tf_configure.bzl", "tf_configure") - -tf_configure(name = "local_config_tf") - -# Set up Android. -load("@org_tensorflow//third_party/android:android_configure.bzl", "android_configure") -android_configure(name="local_config_android") -load("@local_config_android//:android.bzl", "android_workspace") -android_workspace()
diff --git a/third_party/tensorflow-text/src/docs/_book.yaml b/third_party/tensorflow-text/src/docs/_book.yaml deleted file mode 100644 index 9421c7d3..0000000 --- a/third_party/tensorflow-text/src/docs/_book.yaml +++ /dev/null
@@ -1,29 +0,0 @@ -upper_tabs: -- include: /_upper_tabs_left.yaml -- include: /api_docs/_upper_tabs_api.yaml -# Dropdown menu -- name: Resources - path: /resources - is_default: true - menu: - - include: /resources/_menu_toc.yaml - lower_tabs: - # Subsite tabs - other: - - name: Tutorials - contents: - - include: /text/tutorials/_toc.yaml - - - name: Guide - contents: - - include: /text/guide/_toc.yaml - - - name: API - skip_translation: true - contents: - - heading: TensorFlow Text - - include: /text/api_docs/python/text/_toc.yaml -# - heading: TensorFlow NLP Modeling -# - include: /text/api_docs/python/tfnlp/_toc.yaml - -- include: /_upper_tabs_right.yaml
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/_toc.yaml b/third_party/tensorflow-text/src/docs/api_docs/python/_toc.yaml deleted file mode 100644 index 7930fd9..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/_toc.yaml +++ /dev/null
@@ -1,80 +0,0 @@ -toc: -- title: text - section: - - title: Overview - path: /text/api_docs/python/text - - title: BertTokenizer - path: /text/api_docs/python/text/BertTokenizer - - title: case_fold_utf8 - path: /text/api_docs/python/text/case_fold_utf8 - - title: coerce_to_structurally_valid_utf8 - path: /text/api_docs/python/text/coerce_to_structurally_valid_utf8 - - title: Detokenizer - path: /text/api_docs/python/text/Detokenizer - - title: gather_with_default - path: /text/api_docs/python/text/gather_with_default - - title: greedy_constrained_sequence - path: /text/api_docs/python/text/greedy_constrained_sequence - - title: max_spanning_tree - path: /text/api_docs/python/text/max_spanning_tree - - title: max_spanning_tree_gradient - path: /text/api_docs/python/text/max_spanning_tree_gradient - - title: ngrams - path: /text/api_docs/python/text/ngrams - - title: normalize_utf8 - path: /text/api_docs/python/text/normalize_utf8 - - title: pad_along_dimension - path: /text/api_docs/python/text/pad_along_dimension - - title: Reduction - path: /text/api_docs/python/text/Reduction - - title: regex_split - path: /text/api_docs/python/text/regex_split - - title: regex_split_with_offsets - path: /text/api_docs/python/text/regex_split_with_offsets - - title: SentencepieceTokenizer - path: /text/api_docs/python/text/SentencepieceTokenizer - - title: sentence_fragments - status: deprecated - path: /text/api_docs/python/text/sentence_fragments - - title: sliding_window - path: /text/api_docs/python/text/sliding_window - - title: span_alignment - path: /text/api_docs/python/text/span_alignment - - title: span_overlaps - path: /text/api_docs/python/text/span_overlaps - - title: SplitMergeTokenizer - path: /text/api_docs/python/text/SplitMergeTokenizer - - title: Tokenizer - path: /text/api_docs/python/text/Tokenizer - - title: TokenizerWithOffsets - path: /text/api_docs/python/text/TokenizerWithOffsets - - title: UnicodeCharTokenizer - path: /text/api_docs/python/text/UnicodeCharTokenizer - - title: UnicodeScriptTokenizer - path: /text/api_docs/python/text/UnicodeScriptTokenizer - - title: viterbi_constrained_sequence - path: /text/api_docs/python/text/viterbi_constrained_sequence - - title: WhitespaceTokenizer - path: /text/api_docs/python/text/WhitespaceTokenizer - - title: WordpieceTokenizer - path: /text/api_docs/python/text/WordpieceTokenizer - - title: WordShape - path: /text/api_docs/python/text/WordShape - - title: wordshape - path: /text/api_docs/python/text/wordshape -- title: text.keras - section: - - title: Overview - path: /text/api_docs/python/text/keras - - title: layers - section: - - title: Overview - path: /text/api_docs/python/text/keras/layers - - title: ToDense - path: /text/api_docs/python/text/keras/layers/ToDense -- title: text.metrics - section: - - title: Overview - path: /text/api_docs/python/text/metrics - - title: rouge_l - path: /text/api_docs/python/text/metrics/rouge_l
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/index.md b/third_party/tensorflow-text/src/docs/api_docs/python/index.md deleted file mode 100644 index a182c83..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/index.md +++ /dev/null
@@ -1,41 +0,0 @@ -# All symbols in TensorFlow Text - -<!-- Insert buttons and diff --> - -## Primary symbols - -* <a href="./text.md"><code>text</code></a> -* <a href="./text/BertTokenizer.md"><code>text.BertTokenizer</code></a> -* <a href="./text/Detokenizer.md"><code>text.Detokenizer</code></a> -* <a href="./text/Reduction.md"><code>text.Reduction</code></a> -* <a href="./text/SentencepieceTokenizer.md"><code>text.SentencepieceTokenizer</code></a> -* <a href="./text/SplitMergeTokenizer.md"><code>text.SplitMergeTokenizer</code></a> -* <a href="./text/Tokenizer.md"><code>text.Tokenizer</code></a> -* <a href="./text/TokenizerWithOffsets.md"><code>text.TokenizerWithOffsets</code></a> -* <a href="./text/UnicodeCharTokenizer.md"><code>text.UnicodeCharTokenizer</code></a> -* <a href="./text/UnicodeScriptTokenizer.md"><code>text.UnicodeScriptTokenizer</code></a> -* <a href="./text/WhitespaceTokenizer.md"><code>text.WhitespaceTokenizer</code></a> -* <a href="./text/WordShape_cls.md"><code>text.WordShape</code></a> -* <a href="./text/WordpieceTokenizer.md"><code>text.WordpieceTokenizer</code></a> -* <a href="./text/case_fold_utf8.md"><code>text.case_fold_utf8</code></a> -* <a href="./text/coerce_to_structurally_valid_utf8.md"><code>text.coerce_to_structurally_valid_utf8</code></a> -* <a href="./text/gather_with_default.md"><code>text.gather_with_default</code></a> -* <a href="./text/greedy_constrained_sequence.md"><code>text.greedy_constrained_sequence</code></a> -* <a href="./text/keras.md"><code>text.keras</code></a> -* <a href="./text/keras/layers.md"><code>text.keras.layers</code></a> -* <a href="./text/keras/layers/ToDense.md"><code>text.keras.layers.ToDense</code></a> -* <a href="./text/max_spanning_tree.md"><code>text.max_spanning_tree</code></a> -* <a href="./text/max_spanning_tree_gradient.md"><code>text.max_spanning_tree_gradient</code></a> -* <a href="./text/metrics.md"><code>text.metrics</code></a> -* <a href="./text/metrics/rouge_l.md"><code>text.metrics.rouge_l</code></a> -* <a href="./text/ngrams.md"><code>text.ngrams</code></a> -* <a href="./text/normalize_utf8.md"><code>text.normalize_utf8</code></a> -* <a href="./text/pad_along_dimension.md"><code>text.pad_along_dimension</code></a> -* <a href="./text/regex_split.md"><code>text.regex_split</code></a> -* <a href="./text/regex_split_with_offsets.md"><code>text.regex_split_with_offsets</code></a> -* <a href="./text/sentence_fragments.md"><code>text.sentence_fragments</code></a> -* <a href="./text/sliding_window.md"><code>text.sliding_window</code></a> -* <a href="./text/span_alignment.md"><code>text.span_alignment</code></a> -* <a href="./text/span_overlaps.md"><code>text.span_overlaps</code></a> -* <a href="./text/viterbi_constrained_sequence.md"><code>text.viterbi_constrained_sequence</code></a> -* <a href="./text/wordshape.md"><code>text.wordshape</code></a>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text.md b/third_party/tensorflow-text/src/docs/api_docs/python/text.md deleted file mode 100644 index 78492a7..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text.md +++ /dev/null
@@ -1,172 +0,0 @@ -description: Various tensorflow ops related to text-processing. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__version__"/> -</div> - -# Module: text - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/__init__.py">View -source</a> - -Various tensorflow ops related to text-processing. - -## Modules - -[`keras`](./text/keras.md) module: Tensorflow Text Layers for Keras API. - -[`metrics`](./text/metrics.md) module: Tensorflow text-processing metrics. - -## Classes - -[`class BertTokenizer`](./text/BertTokenizer.md): Tokenizer used for BERT. - -[`class Detokenizer`](./text/Detokenizer.md): Base class for detokenizer -implementations. - -[`class FirstNItemSelector`](./text/FirstNItemSelector.md): An `ItemSelector` -that selects the first `n` items in the batch. - -[`class HubModuleSplitter`](./text/HubModuleSplitter.md): Splitter that uses a -Hub module. - -[`class HubModuleTokenizer`](./text/HubModuleTokenizer.md): Tokenizer that uses -a Hub module. - -[`class MaskValuesChooser`](./text/MaskValuesChooser.md): Assigns values to the -items chosen for masking. - -[`class RandomItemSelector`](./text/RandomItemSelector.md): An `ItemSelector` -implementation that randomly selects items in a batch. - -[`class Reduction`](./text/Reduction.md): Type of reduction to be done by the -n-gram op. - -[`class RegexSplitter`](./text/RegexSplitter.md): `RegexSplitter` splits text on -the given regular expression. - -[`class RoundRobinTrimmer`](./text/RoundRobinTrimmer.md): A `Trimmer` that -allocates a length budget to segments via round robin. - -[`class SentencepieceTokenizer`](./text/SentencepieceTokenizer.md): Tokenizes a -tensor of UTF-8 strings. - -[`class SplitMergeFromLogitsTokenizer`](./text/SplitMergeFromLogitsTokenizer.md): -Tokenizes a tensor of UTF-8 string into words according to logits. - -[`class SplitMergeTokenizer`](./text/SplitMergeTokenizer.md): Tokenizes a tensor -of UTF-8 string into words according to labels. - -[`class Splitter`](./text/Splitter.md): An abstract base class for splitting -text. - -[`class SplitterWithOffsets`](./text/SplitterWithOffsets.md): An abstract base -class for splitters that return offsets. - -[`class StateBasedSentenceBreaker`](./text/StateBasedSentenceBreaker.md): A -`Splitter` that uses a state machine to determine sentence breaks. - -[`class Tokenizer`](./text/Tokenizer.md): Base class for tokenizer -implementations. - -[`class TokenizerWithOffsets`](./text/TokenizerWithOffsets.md): Base class for -tokenizer implementations that return offsets. - -[`class UnicodeCharTokenizer`](./text/UnicodeCharTokenizer.md): Tokenizes a -tensor of UTF-8 strings on Unicode character boundaries. - -[`class UnicodeScriptTokenizer`](./text/UnicodeScriptTokenizer.md): Tokenizes -UTF-8 by splitting when there is a change in Unicode script. - -[`class WaterfallTrimmer`](./text/WaterfallTrimmer.md): A `Trimmer` that -allocates a length budget to segments in order. - -[`class WhitespaceTokenizer`](./text/WhitespaceTokenizer.md): Tokenizes a tensor -of UTF-8 strings on whitespaces. - -[`class WordShape`](./text/WordShape_cls.md): Values for the 'pattern' arg of the -wordshape op. - -[`class WordpieceTokenizer`](./text/WordpieceTokenizer.md): Tokenizes a tensor -of UTF-8 string tokens into subword pieces. - -## Functions - -[`case_fold_utf8(...)`](./text/case_fold_utf8.md): Applies case folding to every -UTF-8 string in the input. - -[`coerce_to_structurally_valid_utf8(...)`](./text/coerce_to_structurally_valid_utf8.md): Coerce UTF-8 input strings to structurally valid UTF-8. - -[`combine_segments(...)`](./text/combine_segments.md): Combine one or more input -segments for a model's input sequence. - -[`find_source_offsets(...)`](./text/find_source_offsets.md): Maps the input -post-normalized string offsets to pre-normalized offsets. - -[`gather_with_default(...)`](./text/gather_with_default.md): Gather slices with `indices=-1` mapped to `default`. - -[`greedy_constrained_sequence(...)`](./text/greedy_constrained_sequence.md): Performs greedy constrained sequence on a batch of examples. - -[`mask_language_model(...)`](./text/mask_language_model.md): Applies dynamic -language model masking. - -[`max_spanning_tree(...)`](./text/max_spanning_tree.md): Finds the maximum -directed spanning tree of a digraph. - -[`max_spanning_tree_gradient(...)`](./text/max_spanning_tree_gradient.md): -Returns a subgradient of the MaximumSpanningTree op. - -[`ngrams(...)`](./text/ngrams.md): Create a tensor of n-grams based on the input data `data`. - -[`normalize_utf8(...)`](./text/normalize_utf8.md): Normalizes each UTF-8 string -in the input tensor using the specified rule. - -[`normalize_utf8_with_offsets_map(...)`](./text/normalize_utf8_with_offsets_map.md): -Normalizes each UTF-8 string in the input tensor using the specified rule. - -[`pad_along_dimension(...)`](./text/pad_along_dimension.md): Add padding to the beginning and end of data in a specific dimension. - -[`pad_model_inputs(...)`](./text/pad_model_inputs.md): Pad model input and -generate corresponding input masks. - -[`regex_split(...)`](./text/regex_split.md): Split `input` by delimiters that -match a regex pattern. - -[`regex_split_with_offsets(...)`](./text/regex_split_with_offsets.md): Split -`input` by delimiters that match a regex pattern; returns offsets. - -[`sentence_fragments(...)`](./text/sentence_fragments.md): Find the sentence -fragments in a given text. (deprecated) - -[`sliding_window(...)`](./text/sliding_window.md): Builds a sliding window for `data` with a specified width. - -[`span_alignment(...)`](./text/span_alignment.md): Return an alignment from a set of source spans to a set of target spans. - -[`span_overlaps(...)`](./text/span_overlaps.md): Returns a boolean tensor indicating which source and target spans overlap. - -[`viterbi_constrained_sequence(...)`](./text/viterbi_constrained_sequence.md): Performs greedy constrained sequence on a batch of examples. - -[`wordshape(...)`](./text/wordshape.md): Determine wordshape features for each input string. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Other Members</h2></th></tr> - -<tr> -<td> -**version**<a id="__version__"></a> -</td> -<td> -`'2.7.0-rc0'` -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/BertTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/BertTokenizer.md deleted file mode 100644 index 01aa3019..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/BertTokenizer.md +++ /dev/null
@@ -1,375 +0,0 @@ -description: Tokenizer used for BERT. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.BertTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="detokenize"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.BertTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/bert_tokenizer.py">View -source</a> - -Tokenizer used for BERT. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md), [`Detokenizer`](../text/Detokenizer.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.BertTokenizer( - vocab_lookup_table, suffix_indicator='##', max_bytes_per_word=100, - max_chars_per_token=None, token_out_type=dtypes.int64, - unknown_token='[UNK]', split_unknown_characters=False, - lower_case=False, keep_whitespace=False, normalization_form=None, - preserve_unused_token=False, basic_tokenizer_class=BasicTokenizer -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -This tokenizer applies an end-to-end, text string to wordpiece tokenization. It -first applies basic tokenization, followed by wordpiece tokenization. - -See `WordpieceTokenizer` for details on the subword tokenization. - -For an example of use, see -https://www.tensorflow.org/text/guide/bert_preprocessing_guide - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr> - -<tr> -<td> -`vocab_lookup_table` -</td> -<td> -A lookup table implementing the LookupInterface -containing the vocabulary of subwords or a string which is the file path -to the vocab.txt file. -</td> -</tr><tr> -<td> -`suffix_indicator` -</td> -<td> -(optional) The characters prepended to a wordpiece to -indicate that it is a suffix to another subword. Default is '##'. -</td> -</tr><tr> -<td> -`max_bytes_per_word` -</td> -<td> -(optional) Max size of input token. Default is 100. -</td> -</tr><tr> -<td> -`max_chars_per_token` -</td> -<td> -(optional) Max size of subwords, excluding suffix -indicator. If known, providing this improves the efficiency of decoding -long words. -</td> -</tr><tr> -<td> -`token_out_type` -</td> -<td> -(optional) The type of the token to return. This can be -`tf.int64` IDs, or `tf.string` subwords. The default is `tf.int64`. -</td> -</tr><tr> -<td> -`unknown_token` -</td> -<td> -(optional) The value to use when an unknown token is found. -Default is "[UNK]". If this is set to a string, and `token_out_type` is -`tf.int64`, the `vocab_lookup_table` is used to convert the -`unknown_token` to an integer. If this is set to `None`, out-of-vocabulary -tokens are left as is. -</td> -</tr><tr> -<td> -`split_unknown_characters` -</td> -<td> -(optional) Whether to split out single unknown -characters as subtokens. If False (default), words containing unknown -characters will be treated as single unknown tokens. -</td> -</tr><tr> -<td> -`lower_case` -</td> -<td> -bool - If true, a preprocessing step is added to lowercase the -text, apply NFD normalization, and strip accents characters. -</td> -</tr><tr> -<td> -`keep_whitespace` -</td> -<td> -bool - If true, preserves whitespace characters instead of -stripping them away. -</td> -</tr><tr> -<td> -`normalization_form` -</td> -<td> -If set to a valid value and lower_case=False, the input -text will be normalized to `normalization_form`. See normalize_utf8() op -for a list of valid values. -</td> -</tr><tr> -<td> -`preserve_unused_token` -</td> -<td> -If true, text in the regex format `\\[unused\\d+\\]` -will be treated as a token and thus remain preserved as is to be looked up -in the vocabulary. -</td> -</tr><tr> -<td> -`basic_tokenizer_class` -</td> -<td> -If set, the class to use instead of BasicTokenizer -</td> -</tr> -</table> - -## Methods - -<h3 id="detokenize"><code>detokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/bert_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>detokenize( - token_ids -) -</code></pre> - -Convert a `Tensor` or `RaggedTensor` of wordpiece IDs to string-words. - -See -<a href="../text/WordpieceTokenizer.md#detokenize"><code>WordpieceTokenizer.detokenize</code></a> -for details. - -Note: -<a href="../text/BertTokenizer.md#tokenize"><code>BertTokenizer.tokenize</code></a>/<a href="../text/BertTokenizer.md#detokenize"><code>BertTokenizer.detokenize</code></a> -does not round trip losslessly. The result of `detokenize` will not, in general, -have the same content or offsets as the input to `tokenize`. This is because the -"basic tokenization" step, that splits the strings into words before applying -the `WordpieceTokenizer`, includes irreversible steps like lower-casing and -splitting on punctuation. `WordpieceTokenizer` on the other hand **is** -reversible. - -Note: This method assumes wordpiece IDs are dense on the interval `[0, -vocab_size)`. - -#### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokenizer = BertTokenizer( -... vocab_lookup_table='/tmp/tok_vocab.txt') ->>> text_inputs = tf.constant(['greatest'.encode('utf-8')]) ->>> tokenizer.detokenize([[4, 5]]) -<tf.RaggedTensor [[b'greatest']]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`token_ids` -</td> -<td> -A `RaggedTensor` or `Tensor` with an int dtype. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` with dtype `string` and the same rank as the input -`token_ids`. -</td> -</tr> - -</table> - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/bert_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - text_input -) -</code></pre> - -Tokenizes a tensor of string tokens into subword tokens for BERT. - -#### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokenizer = BertTokenizer( -... vocab_lookup_table='/tmp/tok_vocab.txt') ->>> text_inputs = tf.constant(['greatest'.encode('utf-8') ]) ->>> tokenizer.tokenize(text_inputs) -<tf.RaggedTensor [[[4, 5]]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`text_input` -</td> -<td> -input: A `Tensor` or `RaggedTensor` of untokenized UTF-8 -strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokens where `tokens[i1...iN, j]` is the string -contents (or ID in the vocab_lookup_table representing that string) -of the `jth` token in `input[i1...iN]` -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/bert_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - text_input -) -</code></pre> - -Tokenizes a tensor of string tokens into subword tokens for BERT. - -#### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokenizer = BertTokenizer( -... vocab_lookup_table='/tmp/tok_vocab.txt') ->>> text_inputs = tf.constant(['greatest'.encode('utf-8')]) ->>> tokenizer.tokenize_with_offsets(text_inputs) -(<tf.RaggedTensor [[[4, 5]]]>, - <tf.RaggedTensor [[[0, 5]]]>, - <tf.RaggedTensor [[[5, 8]]]>) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`text_input` -</td> -<td> -input: A `Tensor` or `RaggedTensor` of untokenized UTF-8 -strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of `RaggedTensor`s where the first element is the tokens where -`tokens[i1...iN, j]`, the second element is the starting offsets, the -third element is the end offset. (Please look at `tokenize` for details -on tokens.) -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/Detokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/Detokenizer.md deleted file mode 100644 index ac2fea4..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/Detokenizer.md +++ /dev/null
@@ -1,97 +0,0 @@ -description: Base class for detokenizer implementations. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.Detokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="detokenize"/> -</div> - -# text.Detokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -Base class for detokenizer implementations. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.Detokenizer( - name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -A Detokenizer is a module that combines tokens to form strings. Generally, -subclasses of `Detokenizer` will also be subclasses of `Tokenizer`; and the -`detokenize` method will be the inverse of the `tokenize` method. I.e., -`tokenizer.detokenize(tokenizer.tokenize(s)) == s`. - -Each Detokenizer subclass must implement a `detokenize` method, which combines -tokens together to form strings. E.g.: - -``` ->>> class SimpleDetokenizer(tf_text.Detokenizer): -... def detokenize(self, input): -... return tf.strings.reduce_join(input, axis=-1, separator=" ") ->>> text = tf.ragged.constant([["hello", "world"], ["a", "b", "c"]]) ->>> print(SimpleDetokenizer().detokenize(text)) -tf.Tensor([b'hello world' b'a b c'], shape=(2,), dtype=string) -``` - -## Methods - -<h3 id="detokenize"><code>detokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>detokenize( - input -) -</code></pre> - -Assembles the tokens in the input tensor into a string. - -Generally, `detokenize` is the inverse of the `tokenize` method, and can be used -to reconstrct a string from a set of tokens. This is especially helpful in cases -where the tokens are integer ids, such as indexes into a vocabulary table -- in -that case, the tokenized encoding is not very human-readable (since it's just a -list of integers), so the `detokenize` method can be used to turn it back into -something that's more readable. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string or integer `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An (N-1)-dimensional UTF-8 string `Tensor` or `RaggedTensor`. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/FirstNItemSelector.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/FirstNItemSelector.md deleted file mode 100644 index 1452c3d..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/FirstNItemSelector.md +++ /dev/null
@@ -1,151 +0,0 @@ -description: An ItemSelector that selects the first n items in the batch. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.FirstNItemSelector" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="get_selectable"/> -<meta itemprop="property" content="get_selection_mask"/> -</div> - -# text.FirstNItemSelector - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - - - -An `ItemSelector` that selects the first `n` items in the batch. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.FirstNItemSelector( - num_to_select, unselectable_ids=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`num_to_select` -</td> -<td> -An int which is the leading number of items to select. -</td> -</tr><tr> -<td> -`unselectable_ids` -</td> -<td> -(optional) A list of int ids that cannot be selected. -Default is empty list. -</td> -</tr> -</table> - - - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr> - -<tr> -<td> -`unselectable_ids` -</td> -<td> - -</td> -</tr> -</table> - - - -## Methods - -<h3 id="get_selectable"><code>get_selectable</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>get_selectable( - input_ids, axis -) -</code></pre> - -See `get_selectable()` in superclass. - - -<h3 id="get_selection_mask"><code>get_selection_mask</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>get_selection_mask( - input_ids, axis=1 -) -</code></pre> - -Returns a mask of items that have been selected. - -The default implementation simply returns all items not excluded by -`get_selectable`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_ids` -</td> -<td> -A `RaggedTensor`. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -(optional) An int detailing the dimension to apply selection on. -Default is the 1st dimension. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a `RaggedTensor` with shape `input_ids.shape[:axis]`. Its values are True -if the corresponding item (or broadcasted subitems) should be selected. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleSplitter.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleSplitter.md deleted file mode 100644 index 939ce2d5..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleSplitter.md +++ /dev/null
@@ -1,214 +0,0 @@ -description: Splitter that uses a Hub module. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.HubModuleSplitter" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -</div> - -# text.HubModuleSplitter - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_splitter.py">View source</a> - - - -Splitter that uses a Hub module. - -Inherits From: [`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.HubModuleSplitter( - hub_module_handle -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -The TensorFlow graph from the module performs the real work. The Python code -from this class handles the details of interfacing with that module, as well -as the support for ragged tensors and high-rank tensors. - -The Hub module should be supported by `hub.load() -<https://www.tensorflow.org/hub/api_docs/python/hub/load>`_ If a v1 module, it -should have a graph variant with an empty set of tags; we consider that graph -variant to be the module and ignore everything else. The module should have a -signature named `default` that takes a <a href="../text.md"><code>text</code></a> input (a rank-1 tensor of -strings to split into pieces) and returns a dictionary of tensors, let's say -`output_dict`, such that: - -* `output_dict['num_pieces']` is a rank-1 tensor of integers, where -num_pieces[i] is the number of pieces that text[i] was split into. - -* `output_dict['pieces']` is a rank-1 tensor of strings containing all pieces -for text[0] (in order), followed by all pieces for text[1] (in order) and so -on. - -* `output_dict['starts']` is a rank-1 tensor of integers with the byte offsets -where the pieces start (relative to the beginning of the corresponding input -string). - -* `output_dict['end']` is a rank-1 tensor of integers with the byte offsets -right after the end of the tokens (relative to the beginning of the -corresponding input string). - -The output dictionary may contain other tensors (e.g., for debugging) but this -class is not using them. - -#### Example: - -``` ->>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" ->>> segmenter = HubModuleSplitter(HUB_MODULE) ->>> segmenter.split(["新华社北京"]) -<tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> -``` - -You can also use this tokenizer to return the split strings and their offsets: - -``` ->>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" ->>> segmenter = HubModuleSplitter(HUB_MODULE) ->>> pieces, starts, ends = segmenter.split_with_offsets(["新华社北京"]) ->>> print("pieces: %s starts: %s ends: %s" % (pieces, starts, ends)) -pieces: <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> -starts: <tf.RaggedTensor [[0, 9]]> -ends: <tf.RaggedTensor [[9, 15]]> -``` - -Currently, this class also supports an older API, which uses slightly -different key names for the output dictionary. For new Hub modules, please -use the API described above. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`hub_module_handle` -</td> -<td> -A string handle accepted by hub.load(). Supported -cases include (1) a local path to a directory containing a module, and -(2) a handle to a module uploaded to e.g., https://tfhub.dev. The -module should implement the signature described in the docstring for -this class. -</td> -</tr> -</table> - - - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_splitter.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input_strs -) -</code></pre> - -Splits a tensor of UTF-8 strings into pieces. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_strs` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of segmented text. The returned shape is the shape of the -input tensor with an added ragged dimension for the pieces of each string. -</td> -</tr> - -</table> - - - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_splitter.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input_strs -) -</code></pre> - -Splits a tensor of UTF-8 strings into pieces with [start,end) offsets. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_strs` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(pieces, start_offsets, end_offsets)` where: -* `pieces` is a `RaggedTensor` of strings where `pieces[i1...iN, j]` is - the string content of the `j-th` piece in `input_strs[i1...iN]` -* `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i1...iN, j]` is the byte offset for the start of the - `j-th` piece in `input_strs[i1...iN]`. -* `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i1...iN, j]` is the byte offset immediately after the - end of the `j-th` piece in `input_strs[i...iN]`. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleTokenizer.md deleted file mode 100644 index d373c7c3..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/HubModuleTokenizer.md +++ /dev/null
@@ -1,212 +0,0 @@ -description: Tokenizer that uses a Hub module. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.HubModuleTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.HubModuleTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_tokenizer.py">View source</a> - - - -Tokenizer that uses a Hub module. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.HubModuleTokenizer( - hub_module_handle -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -This class is just a wrapper around an internal HubModuleSplitter. It offers -the same functionality, but with 'token'-based method names: e.g., one can use -tokenize() instead of the more general and less informatively named split(). - -#### Example: - -``` ->>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" ->>> segmenter = HubModuleTokenizer(HUB_MODULE) ->>> segmenter.tokenize(["新华社北京"]) -<tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> -``` - -You can also use this tokenizer to return the split strings and their offsets: - -``` ->>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" ->>> segmenter = HubModuleTokenizer(HUB_MODULE) ->>> pieces, starts, ends = segmenter.tokenize_with_offsets(["新华社北京"]) ->>> print("pieces: %s starts: %s ends: %s" % (pieces, starts, ends)) -pieces: <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> -starts: <tf.RaggedTensor [[0, 9]]> -ends: <tf.RaggedTensor [[9, 15]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`hub_module_handle` -</td> -<td> -A string handle accepted by hub.load(). Supported -cases include (1) a local path to a directory containing a module, and -(2) a handle to a module uploaded to e.g., https://tfhub.dev -</td> -</tr> -</table> - - - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_tokenizer.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input_strs -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings into words. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_strs` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of segmented text. The returned shape is the shape of the -input tensor with an added ragged dimension for tokens of each string. -</td> -</tr> - -</table> - - - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/hub_module_tokenizer.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input_strs -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings into words with [start,end) offsets. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_strs` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: -* `tokens` is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is - the string content of the `j-th` token in `input_strs[i1...iN]` -* `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i1...iN, j]` is the byte offset for the start of the - `j-th` token in `input_strs[i1...iN]`. -* `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i1...iN, j]` is the byte offset immediately after the - end of the `j-th` token in `input_strs[i...iN]`. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/MaskValuesChooser.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/MaskValuesChooser.md deleted file mode 100644 index 1546fca5..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/MaskValuesChooser.md +++ /dev/null
@@ -1,190 +0,0 @@ -description: Assigns values to the items chosen for masking. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.MaskValuesChooser" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="get_mask_values"/> -</div> - -# text.MaskValuesChooser - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/masking_ops.py">View source</a> - - - -Assigns values to the items chosen for masking. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.MaskValuesChooser( - vocab_size, mask_token, mask_token_rate=0.8, random_token_rate=0.1 -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -`MaskValuesChooser` encapsulates the logic for deciding the value to assign -items that where chosen for masking. The following are the behavior in the -default implementation: - -For `mask_token_rate` of the time, replace the item with the `[MASK]` token: - -``` -my dog is hairy -> my dog is [MASK] -``` - -For `random_token_rate` of the time, replace the item with a random word: - -``` -my dog is hairy -> my dog is apple -``` - -For `1 - mask_token_rate - random_token_rate` of the time, keep the item -unchanged: - -``` -my dog is hairy -> my dog is hairy. -``` - -The default behavior is consistent with the methodology specified in -`Masked LM and Masking Procedure` described in `BERT: Pre-training of Deep -Bidirectional Transformers for Language Understanding` -(https://arxiv.org/pdf/1810.04805.pdf). - -Users may further customize this with behavior through subclassing and -overriding `get_mask_values()`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`vocab_size` -</td> -<td> -size of vocabulary. -</td> -</tr><tr> -<td> -`mask_token` -</td> -<td> -The id of the mask token. -</td> -</tr><tr> -<td> -`mask_token_rate` -</td> -<td> -(optional) A float between 0 and 1 which indicates how -often the `mask_token` is substituted for tokens selected for masking. -Default is 0.8, NOTE: `mask_token_rate` + `random_token_rate` <= 1. -</td> -</tr><tr> -<td> -`random_token_rate` -</td> -<td> -A float between 0 and 1 which indicates how often a -random token is substituted for tokens selected for masking. Default is -0.1. NOTE: `mask_token_rate` + `random_token_rate` <= 1. -</td> -</tr> -</table> - - - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr> - -<tr> -<td> -`mask_token` -</td> -<td> - -</td> -</tr><tr> -<td> -`random_token_rate` -</td> -<td> - -</td> -</tr><tr> -<td> -`vocab_size` -</td> -<td> - -</td> -</tr> -</table> - - - -## Methods - -<h3 id="get_mask_values"><code>get_mask_values</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/masking_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>get_mask_values( - masked_lm_ids -) -</code></pre> - -Get the values used for masking, random injection or no-op. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`masked_lm_ids` -</td> -<td> -a `RaggedTensor` of n dimensions and dtype int32 or int64 -whose values are the ids of items that have been selected for masking. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a `RaggedTensor` of the same dtype and shape with `masked_lm_ids` whose -values contain either the mask token, randomly injected token or original -value. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/RandomItemSelector.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/RandomItemSelector.md deleted file mode 100644 index 6adc2280..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/RandomItemSelector.md +++ /dev/null
@@ -1,262 +0,0 @@ -description: An ItemSelector implementation that randomly selects items in a batch. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.RandomItemSelector" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="get_selectable"/> -<meta itemprop="property" content="get_selection_mask"/> -</div> - -# text.RandomItemSelector - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - - - -An `ItemSelector` implementation that randomly selects items in a batch. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.RandomItemSelector( - max_selections_per_batch, selection_rate, unselectable_ids=None, shuffle_fn=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -`RandomItemSelector` randomly selects items in a batch subject to -restrictions given (max_selections_per_batch, selection_rate and -unselectable_ids). - -#### Example: - -``` ->>> vocab = ["[UNK]", "[MASK]", "[RANDOM]", "[CLS]", "[SEP]", -... "abc", "def", "ghi"] ->>> # Note that commonly in masked language model work, there are ->>> # special tokens we don't want to mask, like CLS, SEP, and probably ->>> # any OOV (out-of-vocab) tokens here called UNK. ->>> # Note that if e.g. there are bucketed OOV tokens in the code, ->>> # that might be a use case for overriding `get_selectable()` to ->>> # exclude a range of IDs rather than enumerating them. ->>> tf.random.set_seed(1234) ->>> selector = tf_text.RandomItemSelector( -... max_selections_per_batch=2, -... selection_rate=0.2, -... unselectable_ids=[0, 3, 4]) # indices of UNK, CLS, SEP ->>> selection = selector.get_selection_mask( -... tf.ragged.constant([[3, 5, 7, 7], [4, 6, 7, 5]]), axis=1) ->>> print(selection) -<tf.RaggedTensor [[False, False, False, True], [False, False, True, False]]> -``` - -The selection has skipped the first elements (the CLS and SEP token codings) and -picked random elements from the other elements of the segments -- if run with a -different random seed the selections might be different. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`max_selections_per_batch` -</td> -<td> -An int of the max number of items to mask out. -</td> -</tr><tr> -<td> -`selection_rate` -</td> -<td> -The rate at which items are randomly selected. -</td> -</tr><tr> -<td> -`unselectable_ids` -</td> -<td> -(optional) A list of python ints or 1D `Tensor` of ints -which are ids that will be not be masked. -</td> -</tr><tr> -<td> -`shuffle_fn` -</td> -<td> -(optional) A function that shuffles a 1D `Tensor`. Default -uses `tf.random.shuffle`. -</td> -</tr> -</table> - - - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Attributes</h2></th></tr> - -<tr> -<td> -`max_selections_per_batch` -</td> -<td> - -</td> -</tr><tr> -<td> -`selection_rate` -</td> -<td> - -</td> -</tr><tr> -<td> -`shuffle_fn` -</td> -<td> - -</td> -</tr><tr> -<td> -`unselectable_ids` -</td> -<td> - -</td> -</tr> -</table> - - - -## Methods - -<h3 id="get_selectable"><code>get_selectable</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>get_selectable( - input_ids, axis -) -</code></pre> - -Return a boolean mask of items that can be chosen for selection. - -The default implementation marks all items whose IDs are not in the -`unselectable_ids` list. This can be overridden if there is a need for a more -complex or algorithmic approach for selectability. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_ids` -</td> -<td> -a `RaggedTensor`. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -axis to apply selection on. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a `RaggedTensor` with dtype of bool and with shape -`input_ids.shape[:axis]`. Its values are True if the -corresponding item (or broadcasted subitems) should be considered for -masking. In the default implementation, all `input_ids` items that are not -listed in `unselectable_ids` (from the class arg) are considered -selectable. -</td> -</tr> - -</table> - - - -<h3 id="get_selection_mask"><code>get_selection_mask</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/item_selector_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>get_selection_mask( - input_ids, axis -) -</code></pre> - -Returns a mask of items that have been selected. - -The default implementation simply returns all items not excluded by -`get_selectable`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input_ids` -</td> -<td> -A `RaggedTensor`. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -(optional) An int detailing the dimension to apply selection on. -Default is the 1st dimension. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a `RaggedTensor` with shape `input_ids.shape[:axis]`. Its values are True -if the corresponding item (or broadcasted subitems) should be selected. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/Reduction.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/Reduction.md deleted file mode 100644 index 75ba38b..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/Reduction.md +++ /dev/null
@@ -1,62 +0,0 @@ -description: Type of reduction to be done by the n-gram op. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.Reduction" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="MEAN"/> -<meta itemprop="property" content="STRING_JOIN"/> -<meta itemprop="property" content="SUM"/> -</div> - -# text.Reduction - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/ngrams_op.py">View -source</a> - -Type of reduction to be done by the n-gram op. - -<!-- Placeholder for "Used in" --> - -The supported reductions are as follows: - -* <a href="../text/Reduction.md#SUM"><code>Reduction.SUM</code></a>: Add - values in the window. -* <a href="../text/Reduction.md#MEAN"><code>Reduction.MEAN</code></a>: Average - values in the window. -* <a href="../text/Reduction.md#STRING_JOIN"><code>Reduction.STRING_JOIN</code></a>: - Join strings in the window. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Class Variables</h2></th></tr> - -<tr> -<td> -MEAN<a id="MEAN"></a> -</td> -<td> -`<Reduction.MEAN: 2>` -</td> -</tr><tr> -<td> -STRING_JOIN<a id="STRING_JOIN"></a> -</td> -<td> -`<Reduction.STRING_JOIN: 3>` -</td> -</tr><tr> -<td> -SUM<a id="SUM"></a> -</td> -<td> -`<Reduction.SUM: 1>` -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/RegexSplitter.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/RegexSplitter.md deleted file mode 100644 index 7c9a1d9..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/RegexSplitter.md +++ /dev/null
@@ -1,196 +0,0 @@ -description: RegexSplitter splits text on the given regular expression. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.RegexSplitter" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -</div> - -# text.RegexSplitter - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/regex_split_ops.py">View source</a> - - - -`RegexSplitter` splits text on the given regular expression. - -Inherits From: [`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.RegexSplitter( - split_regex=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -The default is a newline character pattern. It can also return the beginning and -ending byte offsets as well. - -By default, this splitter will break on newlines, ignoring any trailing ones. -``` - -> > > splitter = RegexSplitter() text_input=[ ... b"Hi there.\nWhat time is -> > > it?\nIt is gametime.", ... b"Who let the dogs out?\nWho?\nWho?\nWho?\n\n", -> > > ... ] splitter.split(text_input) -> > > <tf.RaggedTensor [[b'Hi there.', b'What time is it?', b'It is gametime.'], [b'Who let the dogs out?', b'Who?', b'Who?', b'Who?']]> -> > > ``` - -The splitter can be passed a custom split pattern, as well. The pattern can be -any string, but we're using a single character (tab) in this example. ``` - -> > > splitter = RegexSplitter(split_regex='\t') text_input=[ ... b"Hi -> > > there.\tWhat time is it?\tIt is gametime.", ... b"Who let the dogs -> > > out?\tWho?\tWho?\tWho?\t\t", ... ] splitter.split(text_input) -> > > <tf.RaggedTensor [[b'Hi there.', b'What time is it?', b'It is gametime.'], [b'Who let the dogs out?', b'Who?', b'Who?', b'Who?']]> -> > > ``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`split_regex` -</td> -<td> -(optional) A string containing the regex pattern of a -delimiter to split on. Default is '\r?\n'. -</td> -</tr> -</table> - - - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/regex_split_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Splits the input tensor into pieces. - -Generally, the pieces returned by a splitter correspond to substrings of the -original string, and can be encoded using either strings or integer ids. - -#### Example: - -``` ->>> print(tf_text.WhitespaceTokenizer().split("small medium large")) -tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. -For each string from the input tensor, the final, extra dimension contains -the pieces that string was split into. -</td> -</tr> - -</table> - - - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/regex_split_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Splits the input tensor, and returns the resulting pieces with offsets. - -#### Example: - -``` ->>> splitter = tf_text.WhitespaceTokenizer() ->>> pieces, starts, ends = splitter.split_with_offsets("a bb ccc") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(pieces, start_offsets, end_offsets)` where: - -* `pieces` is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. -* `start_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the starting indices of each piece (byte indices for input - strings). -* `end_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the exclusive ending indices of each piece (byte indices for - input strings). </td> </tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/RoundRobinTrimmer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/RoundRobinTrimmer.md deleted file mode 100644 index 299da26d..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/RoundRobinTrimmer.md +++ /dev/null
@@ -1,155 +0,0 @@ -description: A Trimmer that allocates a length budget to segments via round -robin. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.RoundRobinTrimmer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="generate_mask"/> -<meta itemprop="property" content="trim"/> -</div> - -# text.RoundRobinTrimmer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View -source</a> - -A `Trimmer` that allocates a length budget to segments via round robin. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.RoundRobinTrimmer( - max_seq_length, axis=-1 -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -A `Trimmer` that allocates a length budget to segments using a round robin -strategy, then drops elements outside of the segment's allocated budget. See -`generate_mask()` for more details. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`max_seq_length` -</td> -<td> -a scalar `Tensor` int32 that describes the number max -number of elements allowed in a batch. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -Axis to apply trimming on. -</td> -</tr> -</table> - -## Methods - -<h3 id="generate_mask"><code>generate_mask</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>generate_mask( - segments -) -</code></pre> - -Calculates a truncation mask given a per-batch budget. - -Calculate a truncation mask given a budget of the max number of items for each -or all batch row. The allocation of the budget is done using a 'round robin' -algorithm. This algorithm allocates quota in each bucket, left-to-right -repeatedly until all the buckets are filled. - -For example if the budget of [5] and we have segments of size [3, 4, 2], the -truncate budget will be allocated as [2, 2, 1]. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`segments` -</td> -<td> -A list of `RaggedTensor` each w/ a shape of [num_batch, -(num_items)]. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a list with len(segments) of `RaggedTensor`s, see superclass for details. -</td> -</tr> - -</table> - -<h3 id="trim"><code>trim</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>trim( - segments -) -</code></pre> - -Truncate the list of `segments`. - -Truncate the list of `segments` using the truncation strategy defined by -`generate_mask`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`segments` -</td> -<td> -A list of `RaggedTensor`s w/ shape [num_batch, (num_items)]. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a list of `RaggedTensor`s with len(segments) number of items and where -each item has the same shape as its counterpart in `segments` and -with unwanted values dropped. The values are dropped according to the -`TruncationStrategy` defined. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/SentencepieceTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/SentencepieceTokenizer.md deleted file mode 100644 index 6feac38ac..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/SentencepieceTokenizer.md +++ /dev/null
@@ -1,480 +0,0 @@ -description: Tokenizes a tensor of UTF-8 strings. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.SentencepieceTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="detokenize"/> -<meta itemprop="property" content="id_to_string"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="string_to_id"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -<meta itemprop="property" content="vocab_size"/> -</div> - -# text.SentencepieceTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -Tokenizes a tensor of UTF-8 strings. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md), [`Detokenizer`](../text/Detokenizer.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.SentencepieceTokenizer( - model=None, out_type=dtypes.int32, nbest_size=0, alpha=1.0, reverse=False, - add_bos=False, add_eos=False, return_nbest=False, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -SentencePiece is an unsupervised text tokenizer and detokenizer. It is used -mainly for Neural Network-based text generation systems where the vocabulary -size is predetermined prior to the neural model training. SentencePiece -implements subword units with the extension of direct training from raw -sentences. - -Before using the tokenizer, you will need to train a vocabulary and build a -model configuration for it. Please visit the -[Sentencepiece repository](https://github.com/google/sentencepiece#train-sentencepiece-model) -for the most up-to-date instructions on this process. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`model` -</td> -<td> -The sentencepiece model serialized proto. -</td> -</tr><tr> -<td> -`out_type` -</td> -<td> -output type. tf.int32 or tf.string (Default = tf.int32) Setting -tf.int32 directly encodes the string into an id sequence. -</td> -</tr><tr> -<td> -`nbest_size` -</td> -<td> -A scalar for sampling. -* `nbest_size = {0,1}`: No sampling is performed. (default) -* `nbest_size > 1`: samples from the nbest_size results. -* `nbest_size < 0`: assuming that nbest_size is infinite and samples - from the all hypothesis (lattice) using - forward-filtering-and-backward-sampling algorithm. -</td> -</tr><tr> -<td> -`alpha` -</td> -<td> -A scalar for a smoothing parameter. Inverse temperature for -probability rescaling. -</td> -</tr><tr> -<td> -`reverse` -</td> -<td> -Reverses the tokenized sequence (Default = false) -</td> -</tr><tr> -<td> -`add_bos` -</td> -<td> -Add beginning of sentence token to the result (Default = false) -</td> -</tr><tr> -<td> -`add_eos` -</td> -<td> -Add end of sentence token to the result (Default = false). When -`reverse=True` beginning/end of sentence tokens are added after -reversing. -</td> -</tr><tr> -<td> -`return_nbest` -</td> -<td> -If True requires that `nbest_size` is a scalar and `> 1`. -Returns the `nbest_size` best tokenizations for each sentence instead -of a single one. The returned tensor has shape -`[batch * nbest, (tokens)]`. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -## Methods - -<h3 id="detokenize"><code>detokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>detokenize( - input, name=None -) -</code></pre> - -Detokenizes tokens into preprocessed text. - -This function accepts tokenized text, and reforms it back into sentences. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` or `Tensor` of UTF-8 string tokens with a rank of -at least 1. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A N-1 dimensional string Tensor or RaggedTensor of the detokenized text. -</td> -</tr> - -</table> - -<h3 id="id_to_string"><code>id_to_string</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>id_to_string( - input, name=None -) -</code></pre> - -Converts vocabulary id into a token. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An arbitrary tensor of int32 representing the token IDs. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tensor of string with the same shape as input. -</td> -</tr> - -</table> - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="string_to_id"><code>string_to_id</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>string_to_id( - input, name=None -) -</code></pre> - -Converts token into a vocabulary id. - -This function is particularly helpful for determining the IDs for any special -tokens whose ID could not be determined through normal tokenization. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An arbitrary tensor of string tokens. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tensor of int32 representing the IDs with the same shape as input. -</td> -</tr> - -</table> - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input, name=None -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokenized text. The returned shape is the shape of the -input tensor with an added ragged dimension for tokens of each string. -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input, name=None -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings. - -This function returns a tuple containing the tokens along with start and end -byte offsets that mark where in the original string each token was located. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: -</td> -</tr> -<tr> -<td> -`tokens` -</td> -<td> -is an N+1-dimensional UTF-8 string or integer `Tensor` or -`RaggedTensor`. -</td> -</tr><tr> -<td> -`start_offsets` -</td> -<td> -is an N+1-dimensional integer `Tensor` or -`RaggedTensor` containing the starting indices of each token (byte -indices for input strings). -</td> -</tr><tr> -<td> -`end_offsets` -</td> -<td> -is an N+1-dimensional integer `Tensor` or -`RaggedTensor` containing the exclusive ending indices of each token -(byte indices for input strings). -</td> -</tr> -</table> - -<h3 id="vocab_size"><code>vocab_size</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentencepiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>vocab_size( - name=None -) -</code></pre> - -Returns the vocabulary size. - -The number of tokens from within the Sentencepiece vocabulary provided at the -time of initialization. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A scalar representing the vocabulary size. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeFromLogitsTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeFromLogitsTokenizer.md deleted file mode 100644 index 7fcdb463..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeFromLogitsTokenizer.md +++ /dev/null
@@ -1,394 +0,0 @@ -description: Tokenizes a tensor of UTF-8 string into words according to logits. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.SplitMergeFromLogitsTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.SplitMergeFromLogitsTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py">View source</a> - - - -Tokenizes a tensor of UTF-8 string into words according to logits. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.SplitMergeFromLogitsTokenizer( - force_split_at_break_character=True -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> <td> `force_split_at_break_character` </td> <td> a bool that indicates -whether to force start a new word after an ICU-defined whitespace character. -Regardless of this parameter, we never include a whitespace into a token, and we -always ignore the split/merge action for the whitespace character itself. This -parameter indicates what happens after a whitespace. * if -force_split_at_break_character is true, create a new word starting at the first -non-space character, regardless of the 0/1 label for that character, for -instance: - -~~~ -```python -s = [2.0, 1.0] # sample pair of logits indicating a split action -m = [1.0, 3.0] # sample pair of logits indicating a merge action - -strings=["New York"] -logits=[[s, m, m, s, m, m, m, m]] -output tokens=[["New", "York"]] - -strings=["New York"] -logits=[[s, m, m, m, m, m, m, m]] -output tokens=[["New", "York"]] - -strings=["New York"], -logits=[[s, m, m, m, s, m, m, m]] -output tokens=[["New", "York"]] -``` -~~~ - -* otherwise, create a new word / continue the current one depending on the - action for the first non-whitespace character. - - ```python - s = [2.0, 1.0] # sample pair of logits indicating a split action - m = [1.0, 3.0] # sample pair of logits indicating a merge action - - strings=["New York"], - logits=[[s, m, m, s, m, m, m, m]] - output tokens=[["NewYork"]] - - strings=["New York"], - logits=[[s, m, m, m, m, m, m, m]] - output tokens=[["NewYork"]] - - strings=["New York"], - logits=[[s, m, m, m, s, m, m, m]] - output tokens=[["New", "York"]] - ``` - - </td> - </tr> - </table> - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - strings, logits -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings according to logits. - -The logits refer to the split / merge action we should take for each -character. For more info, see the doc for the logits argument below. - -### Example: - -``` ->>> strings = ['IloveFlume!', 'and tensorflow'] ->>> logits = [ -... [ -... # 'I' -... [5.0, -3.2], # I: split -... # 'love' -... [2.2, -1.0], # l: split -... [0.2, 12.0], # o: merge -... [0.0, 11.0], # v: merge -... [-3.0, 3.0], # e: merge -... # 'Flume' -... [10.0, 0.0], # F: split -... [0.0, 11.0], # l: merge -... [0.0, 11.0], # u: merge -... [0.0, 12.0], # m: merge -... [0.0, 12.0], # e: merge -... # '!' -... [5.2, -7.0], # !: split -... # padding: -... [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], -... ], [ -... # 'and' -... [2.0, 0.7], # a: split -... [0.2, 1.5], # n: merge -... [0.5, 2.3], # d: merge -... # ' ' -... [1.7, 7.0], # <space>: merge -... # 'tensorflow' -... [2.2, 0.1], # t: split -... [0.2, 3.1], # e: merge -... [1.1, 2.5], # n: merge -... [0.7, 0.9], # s: merge -... [0.6, 1.0], # o: merge -... [0.3, 1.0], # r: merge -... [0.2, 2.2], # f: merge -... [0.7, 3.1], # l: merge -... [0.4, 5.0], # o: merge -... [0.8, 6.0], # w: merge -... ]] ->>> tokenizer = SplitMergeFromLogitsTokenizer() ->>> tokenizer.tokenize(strings, logits) -<tf.RaggedTensor [[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`strings` -</td> -<td> -a 1D `Tensor` of UTF-8 strings. -</td> -</tr><tr> -<td> -`logits` -</td> -<td> -3D Tensor; logits[i,j,0] is the logit for the split action for -j-th character of strings[i]. logits[i,j,1] is the logit for the merge -action for that same character. For each character, we pick the action -with the greatest logit. Split starts a new word at this character and -merge adds this character to the previous word. The shape of this -tensor should be (n, m, 2) where n is the number of strings, and m is -greater or equal with the number of characters from each strings[i]. As -the elements of the strings tensor may have different lengths (in UTF-8 -chars), padding may be required to get a dense vector; for each row, the -extra (padding) pairs of logits are ignored. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of strings where `tokens[i, k]` is the string -content of the `k-th` token in `strings[i]` -</td> -</tr> - -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Raises</th></tr> - -<tr> -<td> -`InvalidArgumentError` -</td> -<td> -if one of the input Tensors has the wrong shape. -E.g., if the logits tensor does not have enough elements for one of the -strings. -</td> -</tr> -</table> - - - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - strings, logits -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings into tokens with [start,end) offsets. - -### Example: - -``` ->>> strings = ['IloveFlume!', 'and tensorflow'] ->>> logits = [ -... [ -... # 'I' -... [5.0, -3.2], # I: split -... # 'love' -... [2.2, -1.0], # l: split -... [0.2, 12.0], # o: merge -... [0.0, 11.0], # v: merge -... [-3.0, 3.0], # e: merge -... # 'Flume' -... [10.0, 0.0], # F: split -... [0.0, 11.0], # l: merge -... [0.0, 11.0], # u: merge -... [0.0, 12.0], # m: merge -... [0.0, 12.0], # e: merge -... # '!' -... [5.2, -7.0], # !: split -... # padding: -... [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], -... ], [ -... # 'and' -... [2.0, 0.7], # a: split -... [0.2, 1.5], # n: merge -... [0.5, 2.3], # d: merge -... # ' ' -... [1.7, 7.0], # <space>: merge -... # 'tensorflow' -... [2.2, 0.1], # t: split -... [0.2, 3.1], # e: merge -... [1.1, 2.5], # n: merge -... [0.7, 0.9], # s: merge -... [0.6, 1.0], # o: merge -... [0.3, 1.0], # r: merge -... [0.2, 2.2], # f: merge -... [0.7, 3.1], # l: merge -... [0.4, 5.0], # o: merge -... [0.8, 6.0], # w: merge -... ]] ->>> tokenizer = SplitMergeFromLogitsTokenizer() ->>> tokens, starts, ends = tokenizer.tokenize_with_offsets(strings, logits) ->>> tokens -<tf.RaggedTensor [[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']]> ->>> starts -<tf.RaggedTensor [[0, 1, 5, 10], [0, 4]]> ->>> ends -<tf.RaggedTensor [[1, 5, 10, 11], [3, 14]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`strings` -</td> -<td> -A 1D `Tensor` of UTF-8 strings. -</td> -</tr><tr> -<td> -`logits` -</td> -<td> -3D Tensor; logits[i,j,0] is the logit for the split action for -j-th character of strings[i]. logits[i,j,1] is the logit for the merge -action for that same character. For each character, we pick the action -with the greatest logit. Split starts a new word at this character and -merge adds this character to the previous word. The shape of this -tensor should be (n, m, 2) where n is the number of strings, and m is -greater or equal with the number of characters from each strings[i]. As -the elements of the strings tensor may have different lengths (in UTF-8 -chars), padding may be required to get a dense vector; for each row, the -extra (padding) pairs of logits are ignored. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: -* `tokens` is a `RaggedTensor` of strings where `tokens[i, k]` is - the string content of the `k-th` token in `strings[i]` -* `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i, k]` is the byte offset for the start of the - `k-th` token in `strings[i]`. -* `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i, k]` is the byte offset immediately after the - end of the `k-th` token in `strings[i]`. -</td> -</tr> - -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Raises</th></tr> - -<tr> -<td> -`InvalidArgumentError` -</td> -<td> -if one of the input Tensors has the wrong shape. -E.g., if the tensor logits does not have enough elements for one of the -strings. -</td> -</tr> -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeTokenizer.md deleted file mode 100644 index fe36550..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitMergeTokenizer.md +++ /dev/null
@@ -1,292 +0,0 @@ -description: Tokenizes a tensor of UTF-8 string into words according to labels. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.SplitMergeTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.SplitMergeTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_tokenizer.py">View -source</a> - -Tokenizes a tensor of UTF-8 string into words according to labels. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.SplitMergeTokenizer() -</code></pre> - -<!-- Placeholder for "Used in" --> - - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input, labels, force_split_at_break_character=True -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings according to labels. - -### Example: - -``` ->>> strings = ["HelloMonday", "DearFriday"] ->>> labels = [[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], -... [0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0]] ->>> tokenizer = SplitMergeTokenizer() ->>> tokenizer.tokenize(strings, labels) -<tf.RaggedTensor [[b'Hello', b'Monday'], [b'Dear', b'Friday']]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> <td> `input` </td> <td> An N-dimensional `Tensor` or `RaggedTensor` of -UTF-8 strings. </td> </tr><tr> <td> `labels` </td> <td> An (N+1)-dimensional -`Tensor` or `RaggedTensor` of `int32`, with `labels[i1...iN, j]` being the -split(0)/merge(1) label of the j-th character for `input[i1...iN]`. Here split -means create a new word with this character and merge means adding this -character to the previous word. </td> </tr><tr> <td> -`force_split_at_break_character` </td> <td> bool indicates whether to force -start a new word after seeing a ICU defined whitespace character. When seeing -one or more ICU defined whitespace character: * if -`force_split_at_break_character` is set true, then create a new word at the -first non-space character, regardless of the label of that character, for -instance: - -```python - input="New York" - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["New", "York"] -``` - -```python - input="New York" - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["New", "York"] -``` - -```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] -``` - -* otherwise, whether to create a new word or not for the first non-space - character depends on the label of that character, for instance: - - ```python - input="New York", - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - </td> - </tr> - </table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of strings where `tokens[i1...iN, j]` is the string -content of the `j-th` token in `input[i1...iN]` -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/split_merge_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input, labels, force_split_at_break_character=True -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings into tokens with [start,end) offsets. - -### Example: - -``` ->>> strings = ["HelloMonday", "DearFriday"] ->>> labels = [[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], -... [0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0]] ->>> tokenizer = SplitMergeTokenizer() ->>> tokens, starts, ends = tokenizer.tokenize_with_offsets(strings, labels) ->>> tokens -<tf.RaggedTensor [[b'Hello', b'Monday'], [b'Dear', b'Friday']]> ->>> starts -<tf.RaggedTensor [[0, 5], [0, 4]]> ->>> ends -<tf.RaggedTensor [[5, 11], [4, 10]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> <td> `input` </td> <td> An N-dimensional `Tensor` or `RaggedTensor` of -UTF-8 strings. </td> </tr><tr> <td> `labels` </td> <td> An (N+1)-dimensional -`Tensor` or `RaggedTensor` of int32, with labels[i1...iN, j] being the -split(0)/merge(1) label of the j-th character for input[i1...iN]. Here split -means create a new word with this character and merge means adding this -character to the previous word. </td> </tr><tr> <td> -`force_split_at_break_character` </td> <td> bool indicates whether to force -start a new word after seeing a ICU defined whitespace character. When seeing -one or more ICU defined whitespace character: * if -`force_split_at_break_character` is set true, then create a new word at the -first non-space character, regardless of the label of that character, for -instance: - -```python - input="New York" - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["New", "York"] -``` - -```python - input="New York" - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["New", "York"] -``` - -```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] -``` - -* otherwise, whether to create a new word or not for the first non-space - character depends on the label of that character, for instance: - - ```python - input="New York", - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - </td> - </tr> - </table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: -</td> -</tr> -<tr> -<td> -`tokens` -</td> -<td> -is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is -the string content of the `j-th` token in `input[i1...iN]` -</td> -</tr><tr> -<td> -`start_offsets` -</td> -<td> -is a `RaggedTensor` of int64s where -`start_offsets[i1...iN, j]` is the byte offset for the start of the -`j-th` token in `input[i1...iN]`. -</td> -</tr><tr> -<td> -`end_offsets` -</td> -<td> -is a `RaggedTensor` of int64s where -`end_offsets[i1...iN, j]` is the byte offset immediately after the -end of the `j-th` token in `input[i...iN]`. -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/Splitter.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/Splitter.md deleted file mode 100644 index e06a204..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/Splitter.md +++ /dev/null
@@ -1,111 +0,0 @@ -description: An abstract base class for splitting text. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.Splitter" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -</div> - -# text.Splitter - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/splitter.py">View source</a> - - - -An abstract base class for splitting text. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.Splitter( - name=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -A Splitter is a module that splits strings into pieces. Generally, the pieces -returned by a splitter correspond to substrings of the original string, and can -be encoded using either strings or integer ids (where integer ids could be -created by hashing strings or by looking them up in a fixed vocabulary table -that maps strings to ids). - -Each Splitter subclass must implement a `split` method, which subdivides each -string in an input Tensor into pieces. E.g.: - -``` ->>> class SimpleSplitter(tf_text.Splitter): -... def split(self, input): -... return tf.strings.split(input) ->>> print(SimpleSplitter().split(["hello world", "this is a test"])) -<tf.RaggedTensor [[b'hello', b'world'], [b'this', b'is', b'a', b'test']]> -``` - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/splitter.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>split( - input -) -</code></pre> - -Splits the input tensor into pieces. - -Generally, the pieces returned by a splitter correspond to substrings of the -original string, and can be encoded using either strings or integer ids. - -#### Example: - -``` ->>> print(tf_text.WhitespaceTokenizer().split("small medium large")) -tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. -For each string from the input tensor, the final, extra dimension contains -the pieces that string was split into. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitterWithOffsets.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitterWithOffsets.md deleted file mode 100644 index 64f4371..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/SplitterWithOffsets.md +++ /dev/null
@@ -1,164 +0,0 @@ -description: An abstract base class for splitters that return offsets. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.SplitterWithOffsets" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -</div> - -# text.SplitterWithOffsets - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/splitter.py">View -source</a> - -An abstract base class for splitters that return offsets. - -Inherits From: [`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.SplitterWithOffsets( - name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Each SplitterWithOffsets subclass must implement the `split_with_offsets` -method, which returns a tuple containing both the pieces and the offsets where -those pieces occurred in the input string. E.g.: - -``` ->>> class CharSplitter(SplitterWithOffsets): -... def split_with_offsets(self, input): -... chars, starts = tf.strings.unicode_split_with_offsets(input, 'UTF-8') -... lengths = tf.expand_dims(tf.strings.length(input), -1) -... ends = tf.concat([starts[..., 1:], tf.cast(lengths, tf.int64)], -1) -... return chars, starts, ends -... def split(self, input): -... return self.split_with_offsets(input)[0] ->>> pieces, starts, ends = CharSplitter().split_with_offsets("a😊c") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'\xf0\x9f\x98\x8a' b'c'] [0 1 5] [1 5 6] -``` - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/splitter.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>split( - input -) -</code></pre> - -Splits the input tensor into pieces. - -Generally, the pieces returned by a splitter correspond to substrings of the -original string, and can be encoded using either strings or integer ids. - -#### Example: - -``` ->>> print(tf_text.WhitespaceTokenizer().split("small medium large")) -tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. -For each string from the input tensor, the final, extra dimension contains -the pieces that string was split into. -</td> -</tr> - -</table> - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/splitter.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>split_with_offsets( - input -) -</code></pre> - -Splits the input tensor, and returns the resulting pieces with offsets. - -#### Example: - -``` ->>> splitter = tf_text.WhitespaceTokenizer() ->>> pieces, starts, ends = splitter.split_with_offsets("a bb ccc") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(pieces, start_offsets, end_offsets)` where: - -* `pieces` is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. -* `start_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the starting indices of each piece (byte indices for input - strings). -* `end_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the exclusive ending indices of each piece (byte indices for - input strings). </td> </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/StateBasedSentenceBreaker.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/StateBasedSentenceBreaker.md deleted file mode 100644 index e895498..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/StateBasedSentenceBreaker.md +++ /dev/null
@@ -1,169 +0,0 @@ -description: A Splitter that uses a state machine to determine sentence breaks. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.StateBasedSentenceBreaker" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="break_sentences"/> -<meta itemprop="property" content="break_sentences_with_offsets"/> -</div> - -# text.StateBasedSentenceBreaker - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/state_based_sentence_breaker_op.py">View source</a> - - - -A `Splitter` that uses a state machine to determine sentence breaks. - -<!-- Placeholder for "Used in" --> - -`StateBasedSentenceBreaker` splits text into sentences by using a state -machine to determine when a sequence of characters indicates a potential -sentence break. - -The state machine consists of an `initial state`, then transitions to a -`collecting terminal punctuation state` once an acronym, an emoticon, or -terminal punctuation (ellipsis, question mark, exclamation point, etc.), is -encountered. - -It transitions to the `collecting close punctuation state` when a close -punctuation (close bracket, end quote, etc.) is found. - -If non-punctuation is encountered in the collecting terminal punctuation or -collecting close punctuation states, then the state machine exits, returning -false, indicating it has moved past the end of a potential sentence fragment. - -## Methods - -<h3 id="break_sentences"><code>break_sentences</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/state_based_sentence_breaker_op.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>break_sentences( - doc -) -</code></pre> - -Splits `doc` into sentence fragments and returns the fragments' text. - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`doc` -</td> -<td> -A string `Tensor` of shape [batch] with a batch of documents. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> - -<tr> -<td> -`results` -</td> -<td> -A string `RaggedTensor` of shape [batch, (num_sentences)] -with each input broken up into its constituent sentence fragments. -</td> -</tr> -</table> - - - -<h3 id="break_sentences_with_offsets"><code>break_sentences_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/state_based_sentence_breaker_op.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>break_sentences_with_offsets( - doc -) -</code></pre> - -Splits `doc` into sentence fragments, returns text, start & end offsets. - - -#### Example: - -``` - 1 1 2 3 - 012345678901234 01234567890123456789012345678901234567 -doc: 'Hello...foo bar', 'Welcome to the U.S. don't be surprised' - -fragment_text: [ - ['Hello...', 'foo bar'], - ['Welcome to the U.S.' , 'don't be surprised'] -] -start: [[0, 8],[0, 20]] -end: [[8, 15],[19, 38]] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`doc` -</td> -<td> -A string `Tensor` of shape `[batch]` or `[batch, 1]`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of `(fragment_text, start, end)` where: -</td> -</tr> -<tr> -<td> -`fragment_text` -</td> -<td> -A string `RaggedTensor` of shape [batch, (num_sentences)] -with each input broken up into its constituent sentence fragments. -</td> -</tr><tr> -<td> -`start` -</td> -<td> -A int64 `RaggedTensor` of shape [batch, (num_sentences)] -where each entry is the inclusive beginning byte offset of a sentence. -</td> -</tr><tr> -<td> -`end` -</td> -<td> -A int64 `RaggedTensor` of shape [batch, (num_sentences)] -where each entry is the exclusive ending byte offset of a sentence. -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/Tokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/Tokenizer.md deleted file mode 100644 index a0381f2..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/Tokenizer.md +++ /dev/null
@@ -1,122 +0,0 @@ -description: Base class for tokenizer implementations. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.Tokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="tokenize"/> -</div> - -# text.Tokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -Base class for tokenizer implementations. - -Inherits From: [`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.Tokenizer( - name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -A Tokenizer is a <a href="../text/Splitter.md"><code>text.Splitter</code></a> -that splits strings into *tokens*. Tokens generally correspond to short -substrings of the source string. Tokens can be encoded using either strings or -integer ids (where integer ids could be created by hashing strings or by looking -them up in a fixed vocabulary table that maps strings to ids). - -Each Tokenizer subclass must implement a `tokenize` method, which splits each -string in a Tensor into tokens. E.g.: - -``` ->>> class SimpleTokenizer(tf_text.Tokenizer): -... def tokenize(self, input): -... return tf.strings.split(input) ->>> print(SimpleTokenizer().tokenize(["hello world", "this is a test"])) -<tf.RaggedTensor [[b'hello', b'world'], [b'this', b'is', b'a', b'test']]> -``` - -By default, the `split` method simply delegates to `tokenize`. - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>tokenize( - input -) -</code></pre> - -Tokenizes the input tensor. - -Splits each string in the input tensor into a sequence of tokens. Tokens -generally correspond to short substrings of the source string. Tokens can be -encoded using either strings or integer ids. - -#### Example: - -``` ->>> print(tf_text.WhitespaceTokenizer().tokenize("small medium large")) -tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. -For each string from the input tensor, the final, extra dimension contains -the tokens that string was split into. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/TokenizerWithOffsets.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/TokenizerWithOffsets.md deleted file mode 100644 index 04d376eb..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/TokenizerWithOffsets.md +++ /dev/null
@@ -1,215 +0,0 @@ -description: Base class for tokenizer implementations that return offsets. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.TokenizerWithOffsets" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.TokenizerWithOffsets - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -Base class for tokenizer implementations that return offsets. - -Inherits From: [`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.TokenizerWithOffsets( - name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The offsets indicate which substring from the input string was used to generate -each token. E.g., if `input` is a single string, then each token `token[i]` was -generated from the substring `input[starts[i]:ends[i]]`. - -Each TokenizerWithOffsets subclass must implement the `tokenize_with_offsets` -method, which returns a tuple containing both the pieces and the start and end -offsets where those pieces occurred in the input string. I.e., if `tokens, -starts, ends = tokenize_with_offsets(s)`, then each token `token[i]` corresponds -with `tf.strings.substr(s, starts[i], ends[i] - starts[i])`. - -If the tokenizer encodes tokens as strings (and not token ids), then it will -usually be the case that these corresponding strings are equal; but that is not -technically required. For example, a tokenizer might choose to downcase strings - -#### Example: - -``` ->>> class CharTokenizer(TokenizerWithOffsets): -... def tokenize_with_offsets(self, input): -... chars, starts = tf.strings.unicode_split_with_offsets(input, 'UTF-8') -... lengths = tf.expand_dims(tf.strings.length(input), -1) -... ends = tf.concat([starts[..., 1:], tf.cast(lengths, tf.int64)], -1) -... return chars, starts, ends -... def tokenize(self, input): -... return self.tokenize_with_offsets(input)[0] ->>> pieces, starts, ends = CharTokenizer().split_with_offsets("a😊c") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'\xf0\x9f\x98\x8a' b'c'] [0 1 5] [1 5 6] -``` - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>tokenize( - input -) -</code></pre> - -Tokenizes the input tensor. - -Splits each string in the input tensor into a sequence of tokens. Tokens -generally correspond to short substrings of the source string. Tokens can be -encoded using either strings or integer ids. - -#### Example: - -``` ->>> print(tf_text.WhitespaceTokenizer().tokenize("small medium large")) -tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. -For each string from the input tensor, the final, extra dimension contains -the tokens that string was split into. -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>@abc.abstractmethod</code> -<code>tokenize_with_offsets( - input -) -</code></pre> - -Tokenizes the input tensor and returns the result with offsets. - -The offsets indicate which substring from the input string was used to generate -each token. E.g., if `input` is a single string, then each token `token[i]` was -generated from the substring `input[starts[i]:ends[i]]`. - -#### Example: - -``` ->>> splitter = tf_text.WhitespaceTokenizer() ->>> pieces, starts, ends = splitter.tokenize_with_offsets("a bb ccc") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] ->>> print(tf.strings.substr("a bb ccc", starts, ends-starts)) -tf.Tensor([b'a' b'bb' b'ccc'], shape=(3,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional UTF-8 string (or optionally integer) `Tensor` or -`RaggedTensor`. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: - -* `tokens` is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. -* `start_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the starting indices of each token (byte indices for input - strings). -* `end_offsets` is an N+1-dimensional integer `Tensor` or `RaggedTensor` - containing the exclusive ending indices of each token (byte indices for - input strings). </td> </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeCharTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeCharTokenizer.md deleted file mode 100644 index 960404f..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeCharTokenizer.md +++ /dev/null
@@ -1,275 +0,0 @@ -description: Tokenizes a tensor of UTF-8 strings on Unicode character -boundaries. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.UnicodeCharTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="detokenize"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.UnicodeCharTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_char_tokenizer.py">View -source</a> - -Tokenizes a tensor of UTF-8 strings on Unicode character boundaries. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md), [`Detokenizer`](../text/Detokenizer.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.UnicodeCharTokenizer() -</code></pre> - -<!-- Placeholder for "Used in" --> - -Resulting tokens are integers (unicode codepoints). Scalar input will produce a -`Tensor` output containing the codepoints. Tensor inputs will produce -`RaggedTensor` outputs. - -#### Example: - -``` ->>> tokenizer = tf_text.UnicodeCharTokenizer() ->>> tokens = tokenizer.tokenize("abc") ->>> print(tokens) -tf.Tensor([97 98 99], shape=(3,), dtype=int32) -``` - -``` ->>> tokens = tokenizer.tokenize(["abc", "de"]) ->>> print(tokens) -<tf.RaggedTensor [[97, 98, 99], [100, 101]]> -``` - -Note: any remaining illegal and special UTF-8 characters (like BOM characters) -in the input string will not be treated specially by the tokenizer and show up -in the output tokens. These should be normalized out before or after -tokenization if they are unwanted in the application. - -``` ->>> t = ["abc" + chr(0xfffe) + chr(0x1fffe) ] ->>> tokens = tokenizer.tokenize(t) ->>> print(tokens.to_list()) -[[97, 98, 99, 65534, 131070]] -``` - -Passing malformed UTF-8 will result in unpredictable behavior. Make sure inputs -conform to UTF-8. - -## Methods - -<h3 id="detokenize"><code>detokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_char_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>detokenize( - input, name=None -) -</code></pre> - -Detokenizes input codepoints (integers) to UTF-8 strings. - -#### Example: - -``` ->>> tokenizer = tf_text.UnicodeCharTokenizer() ->>> tokens = tokenizer.tokenize(["abc", "de"]) ->>> s = tokenizer.detokenize(tokens) ->>> print(s) -tf.Tensor([b'abc' b'de'], shape=(2,), dtype=string) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` or `Tensor` of codepoints (ints) with a rank of at -least 1. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A N-1 dimensional string tensor of the text corresponding to the UTF-8 -codepoints in the input. -</td> -</tr> - -</table> - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_char_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings on Unicode character boundaries. - -Input strings are split on character boundaries using -unicode_decode_with_offsets. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokenized text. The returned shape is the shape of the -input tensor with an added ragged dimension for tokens (characters) of -each string. -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_char_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings to Unicode characters. - -#### Example: - -``` ->>> tokenizer = tf_text.UnicodeCharTokenizer() ->>> tokens = tokenizer.tokenize_with_offsets("a"+chr(8364)+chr(10340)) ->>> print(tokens[0]) -tf.Tensor([ 97 8364 10340], shape=(3,), dtype=int32) ->>> print(tokens[1]) -tf.Tensor([0 1 4], shape=(3,), dtype=int64) ->>> print(tokens[2]) -tf.Tensor([1 4 7], shape=(3,), dtype=int64) -``` - -The `start_offsets` and `end_offsets` are in byte indices of the original -string. When calling with multiple string inputs, the offset indices will be -relative to the individual source strings: - -``` ->>> toks = tokenizer.tokenize_with_offsets(["a"+chr(8364), "b"+chr(10300) ]) ->>> print(toks[0]) -<tf.RaggedTensor [[97, 8364], [98, 10300]]> ->>> print(toks[1]) -<tf.RaggedTensor [[0, 1], [0, 1]]> ->>> print(toks[2]) -<tf.RaggedTensor [[1, 4], [1, 4]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: - -* `tokens`: A `RaggedTensor` of code points (integer type). -* `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. -* `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. </td> - </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeScriptTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeScriptTokenizer.md deleted file mode 100644 index a161c29f..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/UnicodeScriptTokenizer.md +++ /dev/null
@@ -1,240 +0,0 @@ -description: Tokenizes UTF-8 by splitting when there is a change in Unicode -script. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.UnicodeScriptTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.UnicodeScriptTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_script_tokenizer.py">View -source</a> - -Tokenizes UTF-8 by splitting when there is a change in Unicode script. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.UnicodeScriptTokenizer( - keep_whitespace=False -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -By default, this tokenizer leaves out scripts matching the whitespace unicode -property (use the `keep_whitespace` argument to keep it), so in this case the -results are similar to the `WhitespaceTokenizer`. Any punctuation will get its -own token (since it is in a different script), and any script change in the -input string will be the location of a split. - -#### Example: - -``` ->>> tokenizer = tf_text.UnicodeScriptTokenizer() ->>> tokens = tokenizer.tokenize(["xy.,z de", "fg?h", "abαβ"]) ->>> print(tokens.to_list()) -[[b'xy', b'.,', b'z', b'de'], [b'fg', b'?', b'h'], - [b'ab', b'\xce\xb1\xce\xb2']] -``` - -``` ->>> tokens = tokenizer.tokenize(u"累計7239人") ->>> print(tokens) -tf.Tensor([b'\xe7\xb4\xaf\xe8\xa8\x88' b'7239' b'\xe4\xba\xba'], shape=(3,), - dtype=string) -``` - -Both the punctuation and the whitespace in the first string have been split, but -the punctuation run is present as a token while the whitespace isn't emitted (by -default). The third example shows the case of a script change without any -whitespace. This results in a split at that boundary point. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`keep_whitespace` -</td> -<td> -A boolean that specifices whether to emit whitespace -tokens (default `False`). -</td> -</tr> -</table> - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_script_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input -) -</code></pre> - -Tokenizes UTF-8 by splitting when there is a change in Unicode script. - -The strings are split when successive tokens change their Unicode script or -change being whitespace or not. The script codes used correspond to -International Components for Unicode (ICU) UScriptCode values. See: -http://icu-project.org/apiref/icu4c/uscript_8h.html - -ICU-defined whitespace characters are dropped, unless the `keep_whitespace` -option was specified at construction time. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokenized text. The returned shape is the shape of the -input tensor with an added ragged dimension for tokens of each string. -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/unicode_script_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input -) -</code></pre> - -Tokenizes UTF-8 by splitting when there is a change in Unicode script. - -The strings are split when a change in the Unicode script is detected between -sequential tokens. The script codes used correspond to International Components -for Unicode (ICU) UScriptCode values. See: -http://icu-project.org/apiref/icu4c/uscript_8h.html - -ICU defined whitespace characters are dropped, unless the keep_whitespace option -was specified at construction time. - -#### Example: - -``` ->>> tokenizer = tf_text.UnicodeScriptTokenizer() ->>> tokens = tokenizer.tokenize_with_offsets(["xy.,z de", "abαβ"]) ->>> print(tokens[0].to_list()) -[[b'xy', b'.,', b'z', b'de'], [b'ab', b'\xce\xb1\xce\xb2']] ->>> print(tokens[1].to_list()) -[[0, 2, 4, 6], [0, 2]] ->>> print(tokens[2].to_list()) -[[2, 4, 5, 8], [2, 6]] -``` - -``` ->>> tokens = tokenizer.tokenize_with_offsets(u"累計7239人") ->>> print(tokens[0]) -tf.Tensor([b'\xe7\xb4\xaf\xe8\xa8\x88' b'7239' b'\xe4\xba\xba'], - shape=(3,), dtype=string) ->>> print(tokens[1]) -tf.Tensor([ 0 6 10], shape=(3,), dtype=int64) ->>> print(tokens[2]) -tf.Tensor([ 6 10 13], shape=(3,), dtype=int64) -``` - -The start_offsets and end_offsets are in byte indices of the original string. -When calling with multiple string inputs, the offset indices will be relative to -the individual source strings. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: - -* `tokens`: A `RaggedTensor` of tokenized text. -* `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. -* `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. </td> - </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/WaterfallTrimmer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/WaterfallTrimmer.md deleted file mode 100644 index 7399ed1..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/WaterfallTrimmer.md +++ /dev/null
@@ -1,199 +0,0 @@ -description: A Trimmer that allocates a length budget to segments in order. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.WaterfallTrimmer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="generate_mask"/> -<meta itemprop="property" content="trim"/> -</div> - -# text.WaterfallTrimmer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View source</a> - - - -A `Trimmer` that allocates a length budget to segments in order. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.WaterfallTrimmer( - max_seq_length, axis=-1 -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -A `Trimmer` that allocates a length budget to segments in order. It selects -elements to drop, according to a max sequence length budget, and then applies -this mask to actually drop the elements. See `generate_mask()` for more details. - -#### Example: - -``` ->>> a = tf.ragged.constant([['a', 'b', 'c'], [], ['d']]) ->>> b = tf.ragged.constant([['1', '2', '3'], [], ['4', '5', '6', '7']]) ->>> trimmer = tf_text.WaterfallTrimmer(4) ->>> trimmer.trim([a, b]) -[<tf.RaggedTensor [[b'a', b'b', b'c'], [], [b'd']]>, - <tf.RaggedTensor [[b'1'], [], [b'4', b'5', b'6']]>] -``` - -Here, for the first pair of elements, `['a', 'b', 'c']` and `['1', '2', '3']`, -the `'2'` and `'3'` are dropped to fit the sequence within the max sequence -length budget. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`max_seq_length` -</td> -<td> -a scalar `Tensor` or a 1D `Tensor` of type int32 that -describes the number max number of elements allowed in a batch. If a -scalar is provided, the value is broadcasted and applied to all values -across the batch. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -Axis to apply trimming on. -</td> -</tr> -</table> - - - -## Methods - -<h3 id="generate_mask"><code>generate_mask</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>generate_mask( - segments -) -</code></pre> - -Calculates a truncation mask given a per-batch budget. - -Calculate a truncation mask given a budget of the max number of items for -each or all batch row. The allocation of the budget is done using a -'waterfall' algorithm. This algorithm allocates quota in a left-to-right -manner and fill up the buckets until we run out of budget. - -For example if the budget of [5] and we have segments of size -[3, 4, 2], the truncate budget will be allocated as [3, 2, 0]. - -The budget can be a scalar, in which case the same budget is broadcasted -and applied to all batch rows. It can also be a 1D `Tensor` of size -`batch_size`, in which each batch row i will have a budget corresponding to -`per_batch_quota[i]`. - -#### Example: - -``` ->>> a = tf.ragged.constant([['a', 'b', 'c'], [], ['d']]) ->>> b = tf.ragged.constant([['1', '2', '3'], [], ['4', '5', '6', '7']]) ->>> trimmer = tf_text.WaterfallTrimmer(4) ->>> trimmer.generate_mask([a, b]) -[<tf.RaggedTensor [[True, True, True], [], [True]]>, - <tf.RaggedTensor [[True, False, False], [], [True, True, True, False]]>] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`segments` -</td> -<td> -A list of `RaggedTensor` each w/ a shape of [num_batch, -(num_items)]. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a list with len(segments) of `RaggedTensor`s, see superclass for details. -</td> -</tr> - -</table> - - - -<h3 id="trim"><code>trim</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/trimmer_ops.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>trim( - segments -) -</code></pre> - -Truncate the list of `segments`. - -Truncate the list of `segments` using the truncation strategy defined by -`generate_mask`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`segments` -</td> -<td> -A list of `RaggedTensor`s w/ shape [num_batch, (num_items)]. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -a list of `RaggedTensor`s with len(segments) number of items and where -each item has the same shape as its counterpart in `segments` and -with unwanted values dropped. The values are dropped according to the -`TruncationStrategy` defined. -</td> -</tr> - -</table> - - - - -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/WhitespaceTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/WhitespaceTokenizer.md deleted file mode 100644 index 4838903..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/WhitespaceTokenizer.md +++ /dev/null
@@ -1,172 +0,0 @@ -description: Tokenizes a tensor of UTF-8 strings on whitespaces. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.WhitespaceTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -</div> - -# text.WhitespaceTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/whitespace_tokenizer.py">View -source</a> - -Tokenizes a tensor of UTF-8 strings on whitespaces. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.WhitespaceTokenizer() -</code></pre> - -<!-- Placeholder for "Used in" --> - -## Methods - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/whitespace_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings on whitespaces. - -The strings are split on ICU defined whitespace characters. These whitespace -characters are dropped. - -#### Example: - -``` ->>> WhitespaceTokenizer().tokenize("small medium large") -<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'small', b'medium', -b'large'], dtype=object)> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokenized text. The returned shape is the shape of the -input tensor with an added ragged dimension for tokens of each string. -</td> -</tr> - -</table> - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/whitespace_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 strings on whitespaces. - -The strings are split on ICU defined whitespace characters. These whitespace -characters are dropped. - -#### Example: - -``` ->>> splitter = WhitespaceTokenizer() ->>> pieces, starts, ends = splitter.tokenize_with_offsets("a bb ccc") ->>> print(pieces.numpy(), starts.numpy(), ends.numpy()) -[b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: - -* `tokens`: A `RaggedTensor` of tokenized text. -* `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. -* `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. </td> - </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/WordShape_cls.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/WordShape_cls.md deleted file mode 100644 index fadc02af9..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/WordShape_cls.md +++ /dev/null
@@ -1,446 +0,0 @@ -description: Values for the 'pattern' arg of the wordshape op. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.WordShape" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="BEGINS_WITH_OPEN_QUOTE"/> -<meta itemprop="property" content="BEGINS_WITH_PUNCT_OR_SYMBOL"/> -<meta itemprop="property" content="ENDS_WITH_CLOSE_QUOTE"/> -<meta itemprop="property" content="ENDS_WITH_ELLIPSIS"/> -<meta itemprop="property" content="ENDS_WITH_EMOTICON"/> -<meta itemprop="property" content="ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL"/> -<meta itemprop="property" content="ENDS_WITH_MULTIPLE_TERMINAL_PUNCT"/> -<meta itemprop="property" content="ENDS_WITH_PUNCT_OR_SYMBOL"/> -<meta itemprop="property" content="ENDS_WITH_SENTENCE_TERMINAL"/> -<meta itemprop="property" content="ENDS_WITH_TERMINAL_PUNCT"/> -<meta itemprop="property" content="HAS_CURRENCY_SYMBOL"/> -<meta itemprop="property" content="HAS_EMOJI"/> -<meta itemprop="property" content="HAS_MATH_SYMBOL"/> -<meta itemprop="property" content="HAS_MIXED_CASE"/> -<meta itemprop="property" content="HAS_NON_LETTER"/> -<meta itemprop="property" content="HAS_NO_DIGITS"/> -<meta itemprop="property" content="HAS_NO_PUNCT_OR_SYMBOL"/> -<meta itemprop="property" content="HAS_NO_QUOTES"/> -<meta itemprop="property" content="HAS_ONLY_DIGITS"/> -<meta itemprop="property" content="HAS_PUNCTUATION_DASH"/> -<meta itemprop="property" content="HAS_QUOTE"/> -<meta itemprop="property" content="HAS_SOME_DIGITS"/> -<meta itemprop="property" content="HAS_SOME_PUNCT_OR_SYMBOL"/> -<meta itemprop="property" content="HAS_TITLE_CASE"/> -<meta itemprop="property" content="IS_ACRONYM_WITH_PERIODS"/> -<meta itemprop="property" content="IS_EMOTICON"/> -<meta itemprop="property" content="IS_LOWERCASE"/> -<meta itemprop="property" content="IS_MIXED_CASE_LETTERS"/> -<meta itemprop="property" content="IS_NUMERIC_VALUE"/> -<meta itemprop="property" content="IS_PUNCT_OR_SYMBOL"/> -<meta itemprop="property" content="IS_UPPERCASE"/> -<meta itemprop="property" content="IS_WHITESPACE"/> -</div> - -# text.WordShape - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordshape_ops.py">View -source</a> - -Values for the 'pattern' arg of the wordshape op. - -<!-- Placeholder for "Used in" --> - -The supported wordshape identifiers are: - -* <a href="../text/WordShape_cls.md#BEGINS_WITH_OPEN_QUOTE"><code>WordShape.BEGINS_WITH_OPEN_QUOTE</code></a>: - The input begins with an open quote. - - The following strings are considered open quotes: - - ``` - " QUOTATION MARK - ' APOSTROPHE - ` GRAVE ACCENT - `` Pair of GRAVE ACCENTs - \uFF02 FULLWIDTH QUOTATION MARK - \uFF07 FULLWIDTH APOSTROPHE - \u00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - \u2018 LEFT SINGLE QUOTATION MARK - \u201A SINGLE LOW-9 QUOTATION MARK - \u201B SINGLE HIGH-REVERSED-9 QUOTATION MARK - \u201C LEFT DOUBLE QUOTATION MARK - \u201E DOUBLE LOW-9 QUOTATION MARK - \u201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK - \u2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - \u300C LEFT CORNER BRACKET - \u300E LEFT WHITE CORNER BRACKET - \u301D REVERSED DOUBLE PRIME QUOTATION MARK - \u2E42 DOUBLE LOW-REVERSED-9 QUOTATION MARK - \uFF62 HALFWIDTH LEFT CORNER BRACKET - \uFE41 PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET - \uFE43 PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET - ``` - - Note: U+B4 (acute accent) not included. - -* <a href="../text/WordShape_cls.md#BEGINS_WITH_PUNCT_OR_SYMBOL"><code>WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL</code></a>: - The input starts with a punctuation or symbol character. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_CLOSE_QUOTE"><code>WordShape.ENDS_WITH_CLOSE_QUOTE</code></a>: - The input ends witha closing quote character. - - The following strings are considered close quotes: - - ``` - " QUOTATION MARK - ' APOSTROPHE - ` GRAVE ACCENT - '' Pair of APOSTROPHEs - \uFF02 FULLWIDTH QUOTATION MARK - \uFF07 FULLWIDTH APOSTROPHE - \u00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - \u2019 RIGHT SINGLE QUOTATION MARK - \u201D RIGHT DOUBLE QUOTATION MARK - \u203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - \u300D RIGHT CORNER BRACKET - \u300F RIGHT WHITE CORNER BRACKET - \u301E DOUBLE PRIME QUOTATION MARK - \u301F LOW DOUBLE PRIME QUOTATION MARK - \uFE42 PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET - \uFE44 PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET - \uFF63 HALFWIDTH RIGHT CORNER BRACKET - ``` - - Note: U+B4 (ACUTE ACCENT) is not included. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_ELLIPSIS"><code>WordShape.ENDS_WITH_ELLIPSIS</code></a>: - The input ends with an ellipsis (i.e. with three or more periods or a - unicode ellipsis character). - -* <a href="../text/WordShape_cls.md#ENDS_WITH_EMOTICON"><code>WordShape.ENDS_WITH_EMOTICON</code></a>: - The input ends with an emoticon. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL"><code>WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL</code></a>: - The input ends with multiple sentence-terminal characters. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_MULTIPLE_TERMINAL_PUNCT"><code>WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT</code></a>: - The input ends with multiple terminal-punctuation characters. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_PUNCT_OR_SYMBOL"><code>WordShape.ENDS_WITH_PUNCT_OR_SYMBOL</code></a>: - The input ends with a punctuation or symbol character. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_SENTENCE_TERMINAL"><code>WordShape.ENDS_WITH_SENTENCE_TERMINAL</code></a>: - The input ends with a sentence-terminal character. - -* <a href="../text/WordShape_cls.md#ENDS_WITH_TERMINAL_PUNCT"><code>WordShape.ENDS_WITH_TERMINAL_PUNCT</code></a>: - The input ends with a terminal-punctuation character. - -* <a href="../text/WordShape_cls.md#HAS_CURRENCY_SYMBOL"><code>WordShape.HAS_CURRENCY_SYMBOL</code></a>: - The input contains a currency symbol. - -* <a href="../text/WordShape_cls.md#HAS_EMOJI"><code>WordShape.HAS_EMOJI</code></a>: - The input contains an emoji character. - - See http://www.unicode.org/Public/emoji/1.0//emoji-data.txt. Emojis are in - unicode ranges `2600-26FF`, `1F300-1F6FF`, and `1F900-1F9FF`. - -* <a href="../text/WordShape_cls.md#HAS_MATH_SYMBOL"><code>WordShape.HAS_MATH_SYMBOL</code></a>: - The input contains a mathematical symbol. - -* <a href="../text/WordShape_cls.md#HAS_MIXED_CASE"><code>WordShape.HAS_MIXED_CASE</code></a>: - The input contains both uppercase and lowercase letterforms. - -* <a href="../text/WordShape_cls.md#HAS_NON_LETTER"><code>WordShape.HAS_NON_LETTER</code></a>: - The input contains a non-letter character. - -* <a href="../text/WordShape_cls.md#HAS_NO_DIGITS"><code>WordShape.HAS_NO_DIGITS</code></a>: - The input contains no digit characters. - -* <a href="../text/WordShape_cls.md#HAS_NO_PUNCT_OR_SYMBOL"><code>WordShape.HAS_NO_PUNCT_OR_SYMBOL</code></a>: - The input contains no unicode punctuation or symbol characters. - -* <a href="../text/WordShape_cls.md#HAS_NO_QUOTES"><code>WordShape.HAS_NO_QUOTES</code></a>: - The input string contains no quote characters. - -* <a href="../text/WordShape_cls.md#HAS_ONLY_DIGITS"><code>WordShape.HAS_ONLY_DIGITS</code></a>: - The input consists entirely of unicode digit characters. - -* <a href="../text/WordShape_cls.md#HAS_PUNCTUATION_DASH"><code>WordShape.HAS_PUNCTUATION_DASH</code></a>: - The input contains at least one unicode dash character. - - Note that this uses the Pd (Dash) unicode property. This property will not - match to soft-hyphens and katakana middle dot characters. - -* <a href="../text/WordShape_cls.md#HAS_QUOTE"><code>WordShape.HAS_QUOTE</code></a>: - The input starts or ends with a unicode quotation mark. - -* <a href="../text/WordShape_cls.md#HAS_SOME_DIGITS"><code>WordShape.HAS_SOME_DIGITS</code></a>: - The input contains a mix of digit characters and non-digit characters. - -* <a href="../text/WordShape_cls.md#HAS_SOME_PUNCT_OR_SYMBOL"><code>WordShape.HAS_SOME_PUNCT_OR_SYMBOL</code></a>: - The input contains a mix of punctuation or symbol characters, and - non-punctuation non-symbol characters. - -* <a href="../text/WordShape_cls.md#HAS_TITLE_CASE"><code>WordShape.HAS_TITLE_CASE</code></a>: - The input has title case (i.e. the first character is upper or title case, - and the remaining characters are lowercase). - -* <a href="../text/WordShape_cls.md#IS_ACRONYM_WITH_PERIODS"><code>WordShape.IS_ACRONYM_WITH_PERIODS</code></a>: - The input is a period-separated acronym. This matches for strings of the - form "I.B.M." but not "IBM". - -* <a href="../text/WordShape_cls.md#IS_EMOTICON"><code>WordShape.IS_EMOTICON</code></a>: - The input is a single emoticon. - -* <a href="../text/WordShape_cls.md#IS_LOWERCASE"><code>WordShape.IS_LOWERCASE</code></a>: - The input contains only lowercase letterforms. - -* <a href="../text/WordShape_cls.md#IS_MIXED_CASE_LETTERS"><code>WordShape.IS_MIXED_CASE_LETTERS</code></a>: - The input contains only uppercase and lowercase letterforms. - -* <a href="../text/WordShape_cls.md#IS_NUMERIC_VALUE"><code>WordShape.IS_NUMERIC_VALUE</code></a>: - The input is parseable as a numeric value. This will match a fairly broad - set of floating point and integer representations (but not Nan or Inf). - -* <a href="../text/WordShape_cls.md#IS_PUNCT_OR_SYMBOL"><code>WordShape.IS_PUNCT_OR_SYMBOL</code></a>: - The input contains only punctuation and symbol characters. - -* <a href="../text/WordShape_cls.md#IS_UPPERCASE"><code>WordShape.IS_UPPERCASE</code></a>: - The input contains only uppercase letterforms. - -* <a href="../text/WordShape_cls.md#IS_WHITESPACE"><code>WordShape.IS_WHITESPACE</code></a>: - The input consists entirely of whitespace. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Class Variables</h2></th></tr> - -<tr> -<td> -BEGINS_WITH_OPEN_QUOTE<a id="BEGINS_WITH_OPEN_QUOTE"></a> -</td> -<td> -``<WordShape.BEGINS_WITH_OPEN_QUOTE: '\`\`.*|["\'\`'"‘‚‛“«„‟‹「『〝⹂「﹁﹃][^"\'\`'"‘‚‛“«„‟‹「『〝⹂「﹁﹃]*'>`` -</td> -</tr><tr> -<td> -BEGINS_WITH_PUNCT_OR_SYMBOL<a id="BEGINS_WITH_PUNCT_OR_SYMBOL"></a> -</td> -<td> -`<WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL: '[\\p{P}\\p{S}].*'>` -</td> -</tr><tr> -<td> -ENDS_WITH_CLOSE_QUOTE<a id="ENDS_WITH_CLOSE_QUOTE"></a> -</td> -<td> -``<WordShape.ENDS_WITH_CLOSE_QUOTE: '.*\'\'|[^"\'\`'"»’”›」』〞〟﹂﹄」]*["\'\`'"»’”›」』〞〟﹂﹄」]'>`` -</td> -</tr><tr> -<td> -ENDS_WITH_ELLIPSIS<a id="ENDS_WITH_ELLIPSIS"></a> -</td> -<td> -`<WordShape.ENDS_WITH_ELLIPSIS: '.*(\\.{3}|[…⋯])'>` -</td> -</tr><tr> -<td> -ENDS_WITH_EMOTICON<a id="ENDS_WITH_EMOTICON"></a> -</td> -<td> -`<WordShape.ENDS_WITH_EMOTICON: ".*(:\\-\\)|:\\)|:o\\)|:\\]|:3|:>|=\\]|=\\)|:\\}|:\\^\\)|:\\-D|:\\-\\)\\)|:\\-\\)\\)\\)|:\\-\\)\\)\\)\\)|:\\-\\)\\)\\)\\)\\)|>:\\[|:\\-\\(|:\\(|:\\-c|:c|:\\-<|:<|:\\-\\[|:\\[|:\\{|;\\(|:\\-\\|\\||:@|>:\\(|:'\\-\\(|:'\\(|:'\\-\\)|:'\\)|D:<|>:O|:\\-O|:\\-o|:\\*|:\\-\\*|:\\^\\*|;\\-\\)|;\\)|\\*\\-\\)|\\*\\)|;\\-\\]|;\\]|;\\^\\)|:\\-,|>:P|:\\-P|:p|=p|:\\-p|=p|:P|=P|;p|;\\-p|;P|;\\-P>:\\\\|>:/|:\\-/|:\\-\\.|:/|:\\\\|=/|=\\\\|:\\||:\\-\\||:\\$|:\\-\\#|:\\#|O:\\-\\)|0:\\-\\)|0:\\)|0;\\^\\)|>:\\)|>;\\)|>:\\-\\)|\\}:\\-\\)|\\}:\\)|3:\\-\\)|>_>\\^|\\^<_<|\\|;\\-\\)|\\|\\-O|:\\-J|:\\-\\&|:\\&|\\#\\-\\)|%\\-\\)|%\\)|<:\\-\\||\\~:\\-\\\\|\\*<\\|:\\-\\)|=:o\\]|,:\\-\\)|7:\\^\\]|</3|<3|8\\-\\)|\\^_\\^|:D|:\\-D|=D|\\^_\\^;;|O=\\)|\\}=\\)|B\\)|B\\-\\)|=\\||\\-_\\-|o_o;|u_u|:\\-\\\\|:s|:S|:\\-s|:\\-S|;\\*|;\\-\\*:\\(|=\\(|>\\.<|>:\\-\\(|>:\\(|>=\\(|;_;|T_T|='\\(|>_<|D:|:o|:\\-o|=o|o\\.o|:O|:\\-O|=O|O\\.O|x_x|X\\-\\(|X\\(|X\\-o|X\\-O|:X\\)|\\(=\\^\\.\\^=\\)|\\(=\\^\\.\\.\\^=\\)|=\\^_\\^=|\\-<@%|:\\(\\|\\)|:\\(:\\)|\\(\\]:\\{|<\\\\3|\\~@\\~|8'\\(|XD|DX\\:っ\\)|\\:っC|ಠ\\_ಠ)$">` -</td> -</tr><tr> -<td> -ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL<a id="ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL"></a> -</td> -<td> -`<WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL: '.*[\\p{Sentence_Terminal}]{2}'>` -</td> -</tr><tr> -<td> -ENDS_WITH_MULTIPLE_TERMINAL_PUNCT<a id="ENDS_WITH_MULTIPLE_TERMINAL_PUNCT"></a> -</td> -<td> -`<WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT: '.*[\\p{Terminal_Punctuation}]{2}'>` -</td> -</tr><tr> -<td> -ENDS_WITH_PUNCT_OR_SYMBOL<a id="ENDS_WITH_PUNCT_OR_SYMBOL"></a> -</td> -<td> -`<WordShape.ENDS_WITH_PUNCT_OR_SYMBOL: '.*[\\p{P}\\p{S}]'>` -</td> -</tr><tr> -<td> -ENDS_WITH_SENTENCE_TERMINAL<a id="ENDS_WITH_SENTENCE_TERMINAL"></a> -</td> -<td> -`<WordShape.ENDS_WITH_SENTENCE_TERMINAL: '.*[\\p{Sentence_Terminal}]'>` -</td> -</tr><tr> -<td> -ENDS_WITH_TERMINAL_PUNCT<a id="ENDS_WITH_TERMINAL_PUNCT"></a> -</td> -<td> -`<WordShape.ENDS_WITH_TERMINAL_PUNCT: '.*[\\p{Terminal_Punctuation}]'>` -</td> -</tr><tr> -<td> -HAS_CURRENCY_SYMBOL<a id="HAS_CURRENCY_SYMBOL"></a> -</td> -<td> -`<WordShape.HAS_CURRENCY_SYMBOL: '.*\\p{Sc}.*'>` -</td> -</tr><tr> -<td> -HAS_EMOJI<a id="HAS_EMOJI"></a> -</td> -<td> -`<WordShape.HAS_EMOJI: '.*(.*[‼⁉ℹ↔-↙↩↪⌚⌛⌨⏏⏩-⏳⏸-⏺Ⓜ▪▫▶◀◻-◾☀-⛿✂✅✈-✍✏✒✔✖✝✡✨✳✴❄❇❌❎❓-❕❗❣❤➕-➗⤴⤵⬅-⬇⬛⬜⭐⭕〰〽㊗㊙🀄🃏🅰🅱🅾🅿🆎🆑-🆚🇦-🇿🈁🈂🈚🈯🈲-🈺🉐🉑🌀-\U0001f6ff🤀-🧿\U0001fa70-\U0001fa74\U0001fa78-\U0001fa7a\U0001fa80-\U0001fa86\U0001fa90-\U0001faa8\U0001fab0-\U0001fab6\U0001fac0-\U0001fac2\U0001fad0-\U0001fad6].*)$'>` -</td> -</tr><tr> -<td> -HAS_MATH_SYMBOL<a id="HAS_MATH_SYMBOL"></a> -</td> -<td> -`<WordShape.HAS_MATH_SYMBOL: '.*\\p{Sm}.*'>` -</td> -</tr><tr> -<td> -HAS_MIXED_CASE<a id="HAS_MIXED_CASE"></a> -</td> -<td> -`<WordShape.HAS_MIXED_CASE: '.*\\p{Lu}.*\\p{Ll}.*|.*\\p{Ll}.*\\p{Lu}.*'>` -</td> -</tr><tr> -<td> -HAS_NON_LETTER<a id="HAS_NON_LETTER"></a> -</td> -<td> -`<WordShape.HAS_NON_LETTER: '.*\\P{L}.*'>` -</td> -</tr><tr> -<td> -HAS_NO_DIGITS<a id="HAS_NO_DIGITS"></a> -</td> -<td> -`<WordShape.HAS_NO_DIGITS: '\\P{Nd}*'>` -</td> -</tr><tr> -<td> -HAS_NO_PUNCT_OR_SYMBOL<a id="HAS_NO_PUNCT_OR_SYMBOL"></a> -</td> -<td> -`<WordShape.HAS_NO_PUNCT_OR_SYMBOL: '[^\\p{P}\\p{S}]*'>` -</td> -</tr><tr> -<td> -HAS_NO_QUOTES<a id="HAS_NO_QUOTES"></a> -</td> -<td> -``<WordShape.HAS_NO_QUOTES: '[^"\'\`\\p{Quotation_Mark}]*'>`` -</td> -</tr><tr> -<td> -HAS_ONLY_DIGITS<a id="HAS_ONLY_DIGITS"></a> -</td> -<td> -`<WordShape.HAS_ONLY_DIGITS: '\\p{Nd}+'>` -</td> -</tr><tr> -<td> -HAS_PUNCTUATION_DASH<a id="HAS_PUNCTUATION_DASH"></a> -</td> -<td> -`<WordShape.HAS_PUNCTUATION_DASH: '.*\\p{Pd}+.*'>` -</td> -</tr><tr> -<td> -HAS_QUOTE<a id="HAS_QUOTE"></a> -</td> -<td> -``<WordShape.HAS_QUOTE: '^[\`\\p{Quotation_Mark}].*|.*[\`\\p{Quotation_Mark}]$'>`` -</td> -</tr><tr> -<td> -HAS_SOME_DIGITS<a id="HAS_SOME_DIGITS"></a> -</td> -<td> -`<WordShape.HAS_SOME_DIGITS: '.*\\P{Nd}\\p{Nd}.*|.*\\p{Nd}\\P{Nd}.*'>` -</td> -</tr><tr> -<td> -HAS_SOME_PUNCT_OR_SYMBOL<a id="HAS_SOME_PUNCT_OR_SYMBOL"></a> -</td> -<td> -`<WordShape.HAS_SOME_PUNCT_OR_SYMBOL: '.*[^\\p{P}\\p{S}][\\p{P}\\p{S}].*|.*[\\p{P}\\p{S}][^\\p{P}\\p{S}].*'>` -</td> -</tr><tr> -<td> -HAS_TITLE_CASE<a id="HAS_TITLE_CASE"></a> -</td> -<td> -`<WordShape.HAS_TITLE_CASE: '\\P{L}*[\\p{Lu}\\p{Lt}]\\p{Ll}+.*'>` -</td> -</tr><tr> -<td> -IS_ACRONYM_WITH_PERIODS<a id="IS_ACRONYM_WITH_PERIODS"></a> -</td> -<td> -`<WordShape.IS_ACRONYM_WITH_PERIODS: '(\\p{Lu}\\.)+'>` -</td> -</tr><tr> -<td> -IS_EMOTICON<a id="IS_EMOTICON"></a> -</td> -<td> -`<WordShape.IS_EMOTICON: ":\\-\\)|:\\)|:o\\)|:\\]|:3|:>|=\\]|=\\)|:\\}|:\\^\\)|:\\-D|:\\-\\)\\)|:\\-\\)\\)\\)|:\\-\\)\\)\\)\\)|:\\-\\)\\)\\)\\)\\)|>:\\[|:\\-\\(|:\\(|:\\-c|:c|:\\-<|:<|:\\-\\[|:\\[|:\\{|;\\(|:\\-\\|\\||:@|>:\\(|:'\\-\\(|:'\\(|:'\\-\\)|:'\\)|D:<|>:O|:\\-O|:\\-o|:\\*|:\\-\\*|:\\^\\*|;\\-\\)|;\\)|\\*\\-\\)|\\*\\)|;\\-\\]|;\\]|;\\^\\)|:\\-,|>:P|:\\-P|:p|=p|:\\-p|=p|:P|=P|;p|;\\-p|;P|;\\-P>:\\\\|>:/|:\\-/|:\\-\\.|:/|:\\\\|=/|=\\\\|:\\||:\\-\\||:\\$|:\\-\\#|:\\#|O:\\-\\)|0:\\-\\)|0:\\)|0;\\^\\)|>:\\)|>;\\)|>:\\-\\)|\\}:\\-\\)|\\}:\\)|3:\\-\\)|>_>\\^|\\^<_<|\\|;\\-\\)|\\|\\-O|:\\-J|:\\-\\&|:\\&|\\#\\-\\)|%\\-\\)|%\\)|<:\\-\\||\\~:\\-\\\\|\\*<\\|:\\-\\)|=:o\\]|,:\\-\\)|7:\\^\\]|</3|<3|8\\-\\)|\\^_\\^|:D|:\\-D|=D|\\^_\\^;;|O=\\)|\\}=\\)|B\\)|B\\-\\)|=\\||\\-_\\-|o_o;|u_u|:\\-\\\\|:s|:S|:\\-s|:\\-S|;\\*|;\\-\\*:\\(|=\\(|>\\.<|>:\\-\\(|>:\\(|>=\\(|;_;|T_T|='\\(|>_<|D:|:o|:\\-o|=o|o\\.o|:O|:\\-O|=O|O\\.O|x_x|X\\-\\(|X\\(|X\\-o|X\\-O|:X\\)|\\(=\\^\\.\\^=\\)|\\(=\\^\\.\\.\\^=\\)|=\\^_\\^=|\\-<@%|:\\(\\|\\)|:\\(:\\)|\\(\\]:\\{|<\\\\3|\\~@\\~|8'\\(|XD|DX\\:っ\\)|\\:っC|ಠ\\_ಠ">` -</td> -</tr><tr> -<td> -IS_LOWERCASE<a id="IS_LOWERCASE"></a> -</td> -<td> -`<WordShape.IS_LOWERCASE: '\\p{Ll}+'>` -</td> -</tr><tr> -<td> -IS_MIXED_CASE_LETTERS<a id="IS_MIXED_CASE_LETTERS"></a> -</td> -<td> -`<WordShape.IS_MIXED_CASE_LETTERS: '\\p{L}*\\p{Lu}\\p{L}*\\p{Ll}\\p{L}*|\\p{L}*\\p{Ll}\\p{L}*\\p{Lu}\\p{L}*'>` -</td> -</tr><tr> -<td> -IS_NUMERIC_VALUE<a id="IS_NUMERIC_VALUE"></a> -</td> -<td> -`<WordShape.IS_NUMERIC_VALUE: '([+-]?((\\p{Nd}+\\.?\\p{Nd}*)|(\\.\\p{Nd}+)))([eE]-?\\p{Nd}+)?'>` -</td> -</tr><tr> -<td> -IS_PUNCT_OR_SYMBOL<a id="IS_PUNCT_OR_SYMBOL"></a> -</td> -<td> -`<WordShape.IS_PUNCT_OR_SYMBOL: '[\\p{P}|\\p{S}]+'>` -</td> -</tr><tr> -<td> -IS_UPPERCASE<a id="IS_UPPERCASE"></a> -</td> -<td> -`<WordShape.IS_UPPERCASE: '\\p{Lu}+'>` -</td> -</tr><tr> -<td> -IS_WHITESPACE<a id="IS_WHITESPACE"></a> -</td> -<td> -`<WordShape.IS_WHITESPACE: '\\p{Whitespace}+'>` -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/WordpieceTokenizer.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/WordpieceTokenizer.md deleted file mode 100644 index dcc7e87a..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/WordpieceTokenizer.md +++ /dev/null
@@ -1,436 +0,0 @@ -description: Tokenizes a tensor of UTF-8 string tokens into subword pieces. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.WordpieceTokenizer" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="detokenize"/> -<meta itemprop="property" content="split"/> -<meta itemprop="property" content="split_with_offsets"/> -<meta itemprop="property" content="tokenize"/> -<meta itemprop="property" content="tokenize_with_offsets"/> -<meta itemprop="property" content="vocab_size"/> -</div> - -# text.WordpieceTokenizer - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordpiece_tokenizer.py">View -source</a> - -Tokenizes a tensor of UTF-8 string tokens into subword pieces. - -Inherits From: [`TokenizerWithOffsets`](../text/TokenizerWithOffsets.md), -[`Tokenizer`](../text/Tokenizer.md), -[`SplitterWithOffsets`](../text/SplitterWithOffsets.md), -[`Splitter`](../text/Splitter.md), [`Detokenizer`](../text/Detokenizer.md) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.WordpieceTokenizer( - vocab_lookup_table, suffix_indicator='##', max_bytes_per_word=100, - max_chars_per_token=None, token_out_type=dtypes.int64, - unknown_token='[UNK]', split_unknown_characters=False -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Each UTF-8 string token in the input is split into its corresponding wordpieces, -drawing from the list in the file `vocab_lookup_table`. - -Algorithm summary: For each token, the longest token prefix that is in the -vocabulary is split off. Any part of the token that remains is prefixed using -the `suffix_indicator`, and the process of removing the longest token prefix -continues. The `unknown_token` (UNK) is used when what remains of the token is -not in the vocabulary, or if the token is too long. - -When `token_out_type` is tf.string, the output tensor contains strings in the -vocabulary (or UNK). When it is an integer type, the output tensor contains -indices into the vocabulary list (with UNK being after the last entry). - -#### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', -... token_out_type=tf.string) -``` - -``` ->>> tokenizer.tokenize(["they're", "the", "greatest"]) -<tf.RaggedTensor [[b'they', b"##'", b'##re'], [b'the'], [b'great', b'##est']]> -``` - -``` ->>> tokenizer.tokenize(["they", "are", "great"]) -<tf.RaggedTensor [[b'they'], [b'[UNK]'], [b'great']]> -``` - -``` ->>> int_tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', -... token_out_type=tf.int32) -``` - -``` ->>> int_tokenizer.tokenize(["the", "greatest"]) -<tf.RaggedTensor [[3], [4, 5]]> -``` - -``` ->>> int_tokenizer.tokenize(["really", "the", "greatest"]) -<tf.RaggedTensor [[6], [3], [4, 5]]> -``` - -Tensor or ragged tensor inputs result in ragged tensor outputs. Scalar inputs -(which are just a single token) result in tensor outputs. - -``` ->>> tokenizer.tokenize("they're") -<tf.Tensor: shape=(3,), dtype=string, numpy=array([b'they', b"##'", b'##re'], -dtype=object)> ->>> tokenizer.tokenize(["they're"]) -<tf.RaggedTensor [[b'they', b"##'", b'##re']]> ->>> tokenizer.tokenize(tf.ragged.constant([["they're"]])) -<tf.RaggedTensor [[[b'they', b"##'", b'##re']]]> -``` - -Empty strings are tokenized into empty (ragged) tensors. - -``` ->>> tokenizer.tokenize([""]) -<tf.RaggedTensor [[]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`vocab_lookup_table` -</td> -<td> -A lookup table implementing the LookupInterface -containing the vocabulary of subwords or a string which is the file path -to the vocab.txt file. -</td> -</tr><tr> -<td> -`suffix_indicator` -</td> -<td> -(optional) The characters prepended to a wordpiece to -indicate that it is a suffix to another subword. Default is '##'. -</td> -</tr><tr> -<td> -`max_bytes_per_word` -</td> -<td> -(optional) Max size of input token. Default is 100. -</td> -</tr><tr> -<td> -`max_chars_per_token` -</td> -<td> -(optional) Max size of subwords, excluding suffix -indicator. If known, providing this improves the efficiency of decoding -long words. -</td> -</tr><tr> -<td> -`token_out_type` -</td> -<td> -(optional) The type of the token to return. This can be -`tf.int64` or `tf.int32` IDs, or `tf.string` subwords. The default is -`tf.int64`. -</td> -</tr><tr> -<td> -`unknown_token` -</td> -<td> -(optional) The string value to substitute for an unknown -token. Default is "[UNK]". If set to `None`, no substitution occurs. -If `token_out_type` is `tf.int32`/`tf.int64`, the `vocab_lookup_table` -is used (after substitution) to convert the unknown token to an integer. -</td> -</tr><tr> -<td> -`split_unknown_characters` -</td> -<td> -(optional) Whether to split out single unknown -characters as subtokens. If False (default), words containing unknown -characters will be treated as single unknown tokens. -</td> -</tr> -</table> - -## Methods - -<h3 id="detokenize"><code>detokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordpiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>detokenize( - token_ids -) -</code></pre> - -Convert a `Tensor` or `RaggedTensor` of wordpiece IDs to string-words. - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/detok_vocab.txt').write_text( -... 'a b c ##a ##b ##c'.replace(' ', '\n')) ->>> wordpiece = WordpieceTokenizer('/tmp/detok_vocab.txt') ->>> token_ids = [[0, 4, 5, 2, 5, 5, 5]] ->>> wordpiece.detokenize(token_ids) -<tf.RaggedTensor [[b'abc', b'cccc']]> -``` - -The word pieces are joined along the innermost axis to make words. So the result -has the same rank as the input, but the innermost axis of the result indexes -words instead of word pieces. - -The shape transformation is: `[..., wordpieces] => [..., words]` - -When the input shape is `[..., words, wordpieces]` (like the output of -<a href="../text/WordpieceTokenizer.md#tokenize"><code>WordpieceTokenizer.tokenize</code></a>) -the result's shape is `[..., words, 1]`. The additional ragged axis can be -removed using `words.merge_dims(-2, -1)`. - -Note: This method assumes wordpiece IDs are dense on the interval `[0, -vocab_size)`. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`token_ids` -</td> -<td> -A `RaggedTensor` or `Tensor` with an int dtype. Must have -`ndims >= 2` -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` with dtype `string` and the rank as the input -`token_ids`. -</td> -</tr> - -</table> - -<h3 id="split"><code>split</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split( - input -) -</code></pre> - -Alias for -<a href="../text/Tokenizer.md#tokenize"><code>Tokenizer.tokenize</code></a>. - -<h3 id="split_with_offsets"><code>split_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/tokenization.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>split_with_offsets( - input -) -</code></pre> - -Alias for -<a href="../text/TokenizerWithOffsets.md#tokenize_with_offsets"><code>TokenizerWithOffsets.tokenize_with_offsets</code></a>. - -<h3 id="tokenize"><code>tokenize</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordpiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - -### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokens = [["they're", 'the', 'greatest']] ->>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', -... token_out_type=tf.string) ->>> tokenizer.tokenize(tokens) -<tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A `RaggedTensor` of tokens where `tokens[i1...iN, j]` is the string -contents (or ID in the vocab_lookup_table representing that string) -of the `jth` token in `input[i1...iN]` -</td> -</tr> - -</table> - - - -<h3 id="tokenize_with_offsets"><code>tokenize_with_offsets</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordpiece_tokenizer.py">View source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>tokenize_with_offsets( - input -) -</code></pre> - -Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - -### Example: - -``` ->>> import pathlib ->>> pathlib.Path('/tmp/tok_vocab.txt').write_text( -... "they ##' ##re the great ##est".replace(' ', '\n')) ->>> tokens = [["they're", 'the', 'greatest']] ->>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', -... token_out_type=tf.string) ->>> subtokens, starts, ends = tokenizer.tokenize_with_offsets(tokens) ->>> subtokens -<tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> ->>> starts -<tf.RaggedTensor [[[0, 4, 5], [0], [0, 5]]]> ->>> ends -<tf.RaggedTensor [[[4, 5, 7], [3], [5, 8]]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`input` -</td> -<td> -An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple `(tokens, start_offsets, end_offsets)` where: - -tokens[i1...iN, j]: is a `RaggedTensor` of the string contents (or ID in the -vocab_lookup_table representing that string) of the `jth` token in -`input[i1...iN]`. start_offsets[i1...iN, j]: is a `RaggedTensor` of the byte -offsets for the inclusive start of the `jth` token in `input[i1...iN]`. -end_offsets[i1...iN, j]: is a `RaggedTensor` of the byte offsets for the -exclusive end of the `jth` token in `input[i`...iN]` (exclusive, i.e., first -byte after the end of the token). </td> </tr> - -</table> - -<h3 id="vocab_size"><code>vocab_size</code></h3> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordpiece_tokenizer.py">View -source</a> - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>vocab_size( - name=None -) -</code></pre> - -Returns the vocabulary size. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Args</th></tr> - -<tr> -<td> -`name` -</td> -<td> -The name argument that is passed to the op function. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2">Returns</th></tr> -<tr class="alt"> -<td colspan="2"> -A scalar representing the vocabulary size. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/_api_cache.json b/third_party/tensorflow-text/src/docs/api_docs/python/text/_api_cache.json deleted file mode 100644 index 87ccf99..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/_api_cache.json +++ /dev/null
@@ -1,539 +0,0 @@ -{ - "duplicate_of": { - "text.BertTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.BertTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.BertTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.BertTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.BertTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.BertTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.BertTokenizer.split": "text.Tokenizer.split", - "text.BertTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.Detokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.Detokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.Detokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.Detokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.Detokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.Detokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.Detokenizer.__new__": "text.BertTokenizer.__new__", - "text.FirstNItemSelector.__eq__": "text.keras.layers.ToDense.__eq__", - "text.FirstNItemSelector.__ge__": "text.keras.layers.ToDense.__ge__", - "text.FirstNItemSelector.__gt__": "text.keras.layers.ToDense.__gt__", - "text.FirstNItemSelector.__le__": "text.keras.layers.ToDense.__le__", - "text.FirstNItemSelector.__lt__": "text.keras.layers.ToDense.__lt__", - "text.FirstNItemSelector.__ne__": "text.keras.layers.ToDense.__ne__", - "text.FirstNItemSelector.__new__": "text.BertTokenizer.__new__", - "text.HubModuleSplitter.__eq__": "text.keras.layers.ToDense.__eq__", - "text.HubModuleSplitter.__ge__": "text.keras.layers.ToDense.__ge__", - "text.HubModuleSplitter.__gt__": "text.keras.layers.ToDense.__gt__", - "text.HubModuleSplitter.__le__": "text.keras.layers.ToDense.__le__", - "text.HubModuleSplitter.__lt__": "text.keras.layers.ToDense.__lt__", - "text.HubModuleSplitter.__ne__": "text.keras.layers.ToDense.__ne__", - "text.HubModuleSplitter.__new__": "text.BertTokenizer.__new__", - "text.HubModuleTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.HubModuleTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.HubModuleTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.HubModuleTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.HubModuleTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.HubModuleTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.HubModuleTokenizer.__new__": "text.BertTokenizer.__new__", - "text.HubModuleTokenizer.split": "text.Tokenizer.split", - "text.HubModuleTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.MaskValuesChooser.__eq__": "text.keras.layers.ToDense.__eq__", - "text.MaskValuesChooser.__ge__": "text.keras.layers.ToDense.__ge__", - "text.MaskValuesChooser.__gt__": "text.keras.layers.ToDense.__gt__", - "text.MaskValuesChooser.__le__": "text.keras.layers.ToDense.__le__", - "text.MaskValuesChooser.__lt__": "text.keras.layers.ToDense.__lt__", - "text.MaskValuesChooser.__ne__": "text.keras.layers.ToDense.__ne__", - "text.MaskValuesChooser.__new__": "text.BertTokenizer.__new__", - "text.RandomItemSelector.__eq__": "text.keras.layers.ToDense.__eq__", - "text.RandomItemSelector.__ge__": "text.keras.layers.ToDense.__ge__", - "text.RandomItemSelector.__gt__": "text.keras.layers.ToDense.__gt__", - "text.RandomItemSelector.__le__": "text.keras.layers.ToDense.__le__", - "text.RandomItemSelector.__lt__": "text.keras.layers.ToDense.__lt__", - "text.RandomItemSelector.__ne__": "text.keras.layers.ToDense.__ne__", - "text.RandomItemSelector.__new__": "text.BertTokenizer.__new__", - "text.RandomItemSelector.unselectable_ids": "text.FirstNItemSelector.unselectable_ids", - "text.RegexSplitter.__eq__": "text.keras.layers.ToDense.__eq__", - "text.RegexSplitter.__ge__": "text.keras.layers.ToDense.__ge__", - "text.RegexSplitter.__gt__": "text.keras.layers.ToDense.__gt__", - "text.RegexSplitter.__le__": "text.keras.layers.ToDense.__le__", - "text.RegexSplitter.__lt__": "text.keras.layers.ToDense.__lt__", - "text.RegexSplitter.__ne__": "text.keras.layers.ToDense.__ne__", - "text.RegexSplitter.__new__": "text.BertTokenizer.__new__", - "text.RoundRobinTrimmer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.RoundRobinTrimmer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.RoundRobinTrimmer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.RoundRobinTrimmer.__le__": "text.keras.layers.ToDense.__le__", - "text.RoundRobinTrimmer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.RoundRobinTrimmer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.RoundRobinTrimmer.__new__": "text.BertTokenizer.__new__", - "text.SentencepieceTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.SentencepieceTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.SentencepieceTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.SentencepieceTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.SentencepieceTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.SentencepieceTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.SentencepieceTokenizer.__new__": "text.BertTokenizer.__new__", - "text.SentencepieceTokenizer.split": "text.Tokenizer.split", - "text.SentencepieceTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.SplitMergeFromLogitsTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.SplitMergeFromLogitsTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.SplitMergeFromLogitsTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.SplitMergeFromLogitsTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.SplitMergeFromLogitsTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.SplitMergeFromLogitsTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.SplitMergeFromLogitsTokenizer.__new__": "text.BertTokenizer.__new__", - "text.SplitMergeFromLogitsTokenizer.split": "text.Tokenizer.split", - "text.SplitMergeFromLogitsTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.SplitMergeTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.SplitMergeTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.SplitMergeTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.SplitMergeTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.SplitMergeTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.SplitMergeTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.SplitMergeTokenizer.__new__": "text.BertTokenizer.__new__", - "text.SplitMergeTokenizer.split": "text.Tokenizer.split", - "text.SplitMergeTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.Splitter.__eq__": "text.keras.layers.ToDense.__eq__", - "text.Splitter.__ge__": "text.keras.layers.ToDense.__ge__", - "text.Splitter.__gt__": "text.keras.layers.ToDense.__gt__", - "text.Splitter.__init__": "text.Detokenizer.__init__", - "text.Splitter.__le__": "text.keras.layers.ToDense.__le__", - "text.Splitter.__lt__": "text.keras.layers.ToDense.__lt__", - "text.Splitter.__ne__": "text.keras.layers.ToDense.__ne__", - "text.Splitter.__new__": "text.BertTokenizer.__new__", - "text.SplitterWithOffsets.__eq__": "text.keras.layers.ToDense.__eq__", - "text.SplitterWithOffsets.__ge__": "text.keras.layers.ToDense.__ge__", - "text.SplitterWithOffsets.__gt__": "text.keras.layers.ToDense.__gt__", - "text.SplitterWithOffsets.__init__": "text.Detokenizer.__init__", - "text.SplitterWithOffsets.__le__": "text.keras.layers.ToDense.__le__", - "text.SplitterWithOffsets.__lt__": "text.keras.layers.ToDense.__lt__", - "text.SplitterWithOffsets.__ne__": "text.keras.layers.ToDense.__ne__", - "text.SplitterWithOffsets.__new__": "text.BertTokenizer.__new__", - "text.SplitterWithOffsets.split": "text.Splitter.split", - "text.StateBasedSentenceBreaker.__eq__": "text.keras.layers.ToDense.__eq__", - "text.StateBasedSentenceBreaker.__ge__": "text.keras.layers.ToDense.__ge__", - "text.StateBasedSentenceBreaker.__gt__": "text.keras.layers.ToDense.__gt__", - "text.StateBasedSentenceBreaker.__le__": "text.keras.layers.ToDense.__le__", - "text.StateBasedSentenceBreaker.__lt__": "text.keras.layers.ToDense.__lt__", - "text.StateBasedSentenceBreaker.__ne__": "text.keras.layers.ToDense.__ne__", - "text.StateBasedSentenceBreaker.__new__": "text.BertTokenizer.__new__", - "text.Tokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.Tokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.Tokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.Tokenizer.__init__": "text.Detokenizer.__init__", - "text.Tokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.Tokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.Tokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.Tokenizer.__new__": "text.BertTokenizer.__new__", - "text.TokenizerWithOffsets.__eq__": "text.keras.layers.ToDense.__eq__", - "text.TokenizerWithOffsets.__ge__": "text.keras.layers.ToDense.__ge__", - "text.TokenizerWithOffsets.__gt__": "text.keras.layers.ToDense.__gt__", - "text.TokenizerWithOffsets.__init__": "text.Detokenizer.__init__", - "text.TokenizerWithOffsets.__le__": "text.keras.layers.ToDense.__le__", - "text.TokenizerWithOffsets.__lt__": "text.keras.layers.ToDense.__lt__", - "text.TokenizerWithOffsets.__ne__": "text.keras.layers.ToDense.__ne__", - "text.TokenizerWithOffsets.__new__": "text.BertTokenizer.__new__", - "text.TokenizerWithOffsets.split": "text.Tokenizer.split", - "text.TokenizerWithOffsets.tokenize": "text.Tokenizer.tokenize", - "text.UnicodeCharTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.UnicodeCharTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.UnicodeCharTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.UnicodeCharTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.UnicodeCharTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.UnicodeCharTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.UnicodeCharTokenizer.__new__": "text.BertTokenizer.__new__", - "text.UnicodeCharTokenizer.split": "text.Tokenizer.split", - "text.UnicodeCharTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.UnicodeScriptTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.UnicodeScriptTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.UnicodeScriptTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.UnicodeScriptTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.UnicodeScriptTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.UnicodeScriptTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.UnicodeScriptTokenizer.__new__": "text.BertTokenizer.__new__", - "text.UnicodeScriptTokenizer.split": "text.Tokenizer.split", - "text.UnicodeScriptTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.WaterfallTrimmer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.WaterfallTrimmer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.WaterfallTrimmer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.WaterfallTrimmer.__le__": "text.keras.layers.ToDense.__le__", - "text.WaterfallTrimmer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.WaterfallTrimmer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.WaterfallTrimmer.__new__": "text.BertTokenizer.__new__", - "text.WaterfallTrimmer.trim": "text.RoundRobinTrimmer.trim", - "text.WhitespaceTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.WhitespaceTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.WhitespaceTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.WhitespaceTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.WhitespaceTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.WhitespaceTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.WhitespaceTokenizer.__new__": "text.BertTokenizer.__new__", - "text.WhitespaceTokenizer.split": "text.Tokenizer.split", - "text.WhitespaceTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets", - "text.WordpieceTokenizer.__eq__": "text.keras.layers.ToDense.__eq__", - "text.WordpieceTokenizer.__ge__": "text.keras.layers.ToDense.__ge__", - "text.WordpieceTokenizer.__gt__": "text.keras.layers.ToDense.__gt__", - "text.WordpieceTokenizer.__le__": "text.keras.layers.ToDense.__le__", - "text.WordpieceTokenizer.__lt__": "text.keras.layers.ToDense.__lt__", - "text.WordpieceTokenizer.__ne__": "text.keras.layers.ToDense.__ne__", - "text.WordpieceTokenizer.__new__": "text.BertTokenizer.__new__", - "text.WordpieceTokenizer.split": "text.Tokenizer.split", - "text.WordpieceTokenizer.split_with_offsets": "text.TokenizerWithOffsets.split_with_offsets" - }, - "is_fragment": { - "text": false, - "text.BertTokenizer": false, - "text.BertTokenizer.__eq__": true, - "text.BertTokenizer.__ge__": true, - "text.BertTokenizer.__gt__": true, - "text.BertTokenizer.__init__": true, - "text.BertTokenizer.__le__": true, - "text.BertTokenizer.__lt__": true, - "text.BertTokenizer.__ne__": true, - "text.BertTokenizer.__new__": true, - "text.BertTokenizer.detokenize": true, - "text.BertTokenizer.split": true, - "text.BertTokenizer.split_with_offsets": true, - "text.BertTokenizer.tokenize": true, - "text.BertTokenizer.tokenize_with_offsets": true, - "text.Detokenizer": false, - "text.Detokenizer.__eq__": true, - "text.Detokenizer.__ge__": true, - "text.Detokenizer.__gt__": true, - "text.Detokenizer.__init__": true, - "text.Detokenizer.__le__": true, - "text.Detokenizer.__lt__": true, - "text.Detokenizer.__ne__": true, - "text.Detokenizer.__new__": true, - "text.Detokenizer.detokenize": true, - "text.FirstNItemSelector": false, - "text.FirstNItemSelector.__eq__": true, - "text.FirstNItemSelector.__ge__": true, - "text.FirstNItemSelector.__gt__": true, - "text.FirstNItemSelector.__init__": true, - "text.FirstNItemSelector.__le__": true, - "text.FirstNItemSelector.__lt__": true, - "text.FirstNItemSelector.__ne__": true, - "text.FirstNItemSelector.__new__": true, - "text.FirstNItemSelector.get_selectable": true, - "text.FirstNItemSelector.get_selection_mask": true, - "text.FirstNItemSelector.unselectable_ids": true, - "text.HubModuleSplitter": false, - "text.HubModuleSplitter.__eq__": true, - "text.HubModuleSplitter.__ge__": true, - "text.HubModuleSplitter.__gt__": true, - "text.HubModuleSplitter.__init__": true, - "text.HubModuleSplitter.__le__": true, - "text.HubModuleSplitter.__lt__": true, - "text.HubModuleSplitter.__ne__": true, - "text.HubModuleSplitter.__new__": true, - "text.HubModuleSplitter.split": true, - "text.HubModuleSplitter.split_with_offsets": true, - "text.HubModuleTokenizer": false, - "text.HubModuleTokenizer.__eq__": true, - "text.HubModuleTokenizer.__ge__": true, - "text.HubModuleTokenizer.__gt__": true, - "text.HubModuleTokenizer.__init__": true, - "text.HubModuleTokenizer.__le__": true, - "text.HubModuleTokenizer.__lt__": true, - "text.HubModuleTokenizer.__ne__": true, - "text.HubModuleTokenizer.__new__": true, - "text.HubModuleTokenizer.split": true, - "text.HubModuleTokenizer.split_with_offsets": true, - "text.HubModuleTokenizer.tokenize": true, - "text.HubModuleTokenizer.tokenize_with_offsets": true, - "text.MaskValuesChooser": false, - "text.MaskValuesChooser.__eq__": true, - "text.MaskValuesChooser.__ge__": true, - "text.MaskValuesChooser.__gt__": true, - "text.MaskValuesChooser.__init__": true, - "text.MaskValuesChooser.__le__": true, - "text.MaskValuesChooser.__lt__": true, - "text.MaskValuesChooser.__ne__": true, - "text.MaskValuesChooser.__new__": true, - "text.MaskValuesChooser.get_mask_values": true, - "text.MaskValuesChooser.mask_token": true, - "text.MaskValuesChooser.random_token_rate": true, - "text.MaskValuesChooser.vocab_size": true, - "text.RandomItemSelector": false, - "text.RandomItemSelector.__eq__": true, - "text.RandomItemSelector.__ge__": true, - "text.RandomItemSelector.__gt__": true, - "text.RandomItemSelector.__init__": true, - "text.RandomItemSelector.__le__": true, - "text.RandomItemSelector.__lt__": true, - "text.RandomItemSelector.__ne__": true, - "text.RandomItemSelector.__new__": true, - "text.RandomItemSelector.get_selectable": true, - "text.RandomItemSelector.get_selection_mask": true, - "text.RandomItemSelector.max_selections_per_batch": true, - "text.RandomItemSelector.selection_rate": true, - "text.RandomItemSelector.shuffle_fn": true, - "text.RandomItemSelector.unselectable_ids": true, - "text.Reduction": false, - "text.Reduction.MEAN": true, - "text.Reduction.STRING_JOIN": true, - "text.Reduction.SUM": true, - "text.RegexSplitter": false, - "text.RegexSplitter.__eq__": true, - "text.RegexSplitter.__ge__": true, - "text.RegexSplitter.__gt__": true, - "text.RegexSplitter.__init__": true, - "text.RegexSplitter.__le__": true, - "text.RegexSplitter.__lt__": true, - "text.RegexSplitter.__ne__": true, - "text.RegexSplitter.__new__": true, - "text.RegexSplitter.split": true, - "text.RegexSplitter.split_with_offsets": true, - "text.RoundRobinTrimmer": false, - "text.RoundRobinTrimmer.__eq__": true, - "text.RoundRobinTrimmer.__ge__": true, - "text.RoundRobinTrimmer.__gt__": true, - "text.RoundRobinTrimmer.__init__": true, - "text.RoundRobinTrimmer.__le__": true, - "text.RoundRobinTrimmer.__lt__": true, - "text.RoundRobinTrimmer.__ne__": true, - "text.RoundRobinTrimmer.__new__": true, - "text.RoundRobinTrimmer.generate_mask": true, - "text.RoundRobinTrimmer.trim": true, - "text.SentencepieceTokenizer": false, - "text.SentencepieceTokenizer.__eq__": true, - "text.SentencepieceTokenizer.__ge__": true, - "text.SentencepieceTokenizer.__gt__": true, - "text.SentencepieceTokenizer.__init__": true, - "text.SentencepieceTokenizer.__le__": true, - "text.SentencepieceTokenizer.__lt__": true, - "text.SentencepieceTokenizer.__ne__": true, - "text.SentencepieceTokenizer.__new__": true, - "text.SentencepieceTokenizer.detokenize": true, - "text.SentencepieceTokenizer.id_to_string": true, - "text.SentencepieceTokenizer.split": true, - "text.SentencepieceTokenizer.split_with_offsets": true, - "text.SentencepieceTokenizer.string_to_id": true, - "text.SentencepieceTokenizer.tokenize": true, - "text.SentencepieceTokenizer.tokenize_with_offsets": true, - "text.SentencepieceTokenizer.vocab_size": true, - "text.SplitMergeFromLogitsTokenizer": false, - "text.SplitMergeFromLogitsTokenizer.__eq__": true, - "text.SplitMergeFromLogitsTokenizer.__ge__": true, - "text.SplitMergeFromLogitsTokenizer.__gt__": true, - "text.SplitMergeFromLogitsTokenizer.__init__": true, - "text.SplitMergeFromLogitsTokenizer.__le__": true, - "text.SplitMergeFromLogitsTokenizer.__lt__": true, - "text.SplitMergeFromLogitsTokenizer.__ne__": true, - "text.SplitMergeFromLogitsTokenizer.__new__": true, - "text.SplitMergeFromLogitsTokenizer.split": true, - "text.SplitMergeFromLogitsTokenizer.split_with_offsets": true, - "text.SplitMergeFromLogitsTokenizer.tokenize": true, - "text.SplitMergeFromLogitsTokenizer.tokenize_with_offsets": true, - "text.SplitMergeTokenizer": false, - "text.SplitMergeTokenizer.__eq__": true, - "text.SplitMergeTokenizer.__ge__": true, - "text.SplitMergeTokenizer.__gt__": true, - "text.SplitMergeTokenizer.__init__": true, - "text.SplitMergeTokenizer.__le__": true, - "text.SplitMergeTokenizer.__lt__": true, - "text.SplitMergeTokenizer.__ne__": true, - "text.SplitMergeTokenizer.__new__": true, - "text.SplitMergeTokenizer.split": true, - "text.SplitMergeTokenizer.split_with_offsets": true, - "text.SplitMergeTokenizer.tokenize": true, - "text.SplitMergeTokenizer.tokenize_with_offsets": true, - "text.Splitter": false, - "text.Splitter.__eq__": true, - "text.Splitter.__ge__": true, - "text.Splitter.__gt__": true, - "text.Splitter.__init__": true, - "text.Splitter.__le__": true, - "text.Splitter.__lt__": true, - "text.Splitter.__ne__": true, - "text.Splitter.__new__": true, - "text.Splitter.split": true, - "text.SplitterWithOffsets": false, - "text.SplitterWithOffsets.__eq__": true, - "text.SplitterWithOffsets.__ge__": true, - "text.SplitterWithOffsets.__gt__": true, - "text.SplitterWithOffsets.__init__": true, - "text.SplitterWithOffsets.__le__": true, - "text.SplitterWithOffsets.__lt__": true, - "text.SplitterWithOffsets.__ne__": true, - "text.SplitterWithOffsets.__new__": true, - "text.SplitterWithOffsets.split": true, - "text.SplitterWithOffsets.split_with_offsets": true, - "text.StateBasedSentenceBreaker": false, - "text.StateBasedSentenceBreaker.__eq__": true, - "text.StateBasedSentenceBreaker.__ge__": true, - "text.StateBasedSentenceBreaker.__gt__": true, - "text.StateBasedSentenceBreaker.__init__": true, - "text.StateBasedSentenceBreaker.__le__": true, - "text.StateBasedSentenceBreaker.__lt__": true, - "text.StateBasedSentenceBreaker.__ne__": true, - "text.StateBasedSentenceBreaker.__new__": true, - "text.StateBasedSentenceBreaker.break_sentences": true, - "text.StateBasedSentenceBreaker.break_sentences_with_offsets": true, - "text.Tokenizer": false, - "text.Tokenizer.__eq__": true, - "text.Tokenizer.__ge__": true, - "text.Tokenizer.__gt__": true, - "text.Tokenizer.__init__": true, - "text.Tokenizer.__le__": true, - "text.Tokenizer.__lt__": true, - "text.Tokenizer.__ne__": true, - "text.Tokenizer.__new__": true, - "text.Tokenizer.split": true, - "text.Tokenizer.tokenize": true, - "text.TokenizerWithOffsets": false, - "text.TokenizerWithOffsets.__eq__": true, - "text.TokenizerWithOffsets.__ge__": true, - "text.TokenizerWithOffsets.__gt__": true, - "text.TokenizerWithOffsets.__init__": true, - "text.TokenizerWithOffsets.__le__": true, - "text.TokenizerWithOffsets.__lt__": true, - "text.TokenizerWithOffsets.__ne__": true, - "text.TokenizerWithOffsets.__new__": true, - "text.TokenizerWithOffsets.split": true, - "text.TokenizerWithOffsets.split_with_offsets": true, - "text.TokenizerWithOffsets.tokenize": true, - "text.TokenizerWithOffsets.tokenize_with_offsets": true, - "text.UnicodeCharTokenizer": false, - "text.UnicodeCharTokenizer.__eq__": true, - "text.UnicodeCharTokenizer.__ge__": true, - "text.UnicodeCharTokenizer.__gt__": true, - "text.UnicodeCharTokenizer.__init__": true, - "text.UnicodeCharTokenizer.__le__": true, - "text.UnicodeCharTokenizer.__lt__": true, - "text.UnicodeCharTokenizer.__ne__": true, - "text.UnicodeCharTokenizer.__new__": true, - "text.UnicodeCharTokenizer.detokenize": true, - "text.UnicodeCharTokenizer.split": true, - "text.UnicodeCharTokenizer.split_with_offsets": true, - "text.UnicodeCharTokenizer.tokenize": true, - "text.UnicodeCharTokenizer.tokenize_with_offsets": true, - "text.UnicodeScriptTokenizer": false, - "text.UnicodeScriptTokenizer.__eq__": true, - "text.UnicodeScriptTokenizer.__ge__": true, - "text.UnicodeScriptTokenizer.__gt__": true, - "text.UnicodeScriptTokenizer.__init__": true, - "text.UnicodeScriptTokenizer.__le__": true, - "text.UnicodeScriptTokenizer.__lt__": true, - "text.UnicodeScriptTokenizer.__ne__": true, - "text.UnicodeScriptTokenizer.__new__": true, - "text.UnicodeScriptTokenizer.split": true, - "text.UnicodeScriptTokenizer.split_with_offsets": true, - "text.UnicodeScriptTokenizer.tokenize": true, - "text.UnicodeScriptTokenizer.tokenize_with_offsets": true, - "text.WaterfallTrimmer": false, - "text.WaterfallTrimmer.__eq__": true, - "text.WaterfallTrimmer.__ge__": true, - "text.WaterfallTrimmer.__gt__": true, - "text.WaterfallTrimmer.__init__": true, - "text.WaterfallTrimmer.__le__": true, - "text.WaterfallTrimmer.__lt__": true, - "text.WaterfallTrimmer.__ne__": true, - "text.WaterfallTrimmer.__new__": true, - "text.WaterfallTrimmer.generate_mask": true, - "text.WaterfallTrimmer.trim": true, - "text.WhitespaceTokenizer": false, - "text.WhitespaceTokenizer.__eq__": true, - "text.WhitespaceTokenizer.__ge__": true, - "text.WhitespaceTokenizer.__gt__": true, - "text.WhitespaceTokenizer.__init__": true, - "text.WhitespaceTokenizer.__le__": true, - "text.WhitespaceTokenizer.__lt__": true, - "text.WhitespaceTokenizer.__ne__": true, - "text.WhitespaceTokenizer.__new__": true, - "text.WhitespaceTokenizer.split": true, - "text.WhitespaceTokenizer.split_with_offsets": true, - "text.WhitespaceTokenizer.tokenize": true, - "text.WhitespaceTokenizer.tokenize_with_offsets": true, - "text.WordShape": false, - "text.WordShape.BEGINS_WITH_OPEN_QUOTE": true, - "text.WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL": true, - "text.WordShape.ENDS_WITH_CLOSE_QUOTE": true, - "text.WordShape.ENDS_WITH_ELLIPSIS": true, - "text.WordShape.ENDS_WITH_EMOTICON": true, - "text.WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL": true, - "text.WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT": true, - "text.WordShape.ENDS_WITH_PUNCT_OR_SYMBOL": true, - "text.WordShape.ENDS_WITH_SENTENCE_TERMINAL": true, - "text.WordShape.ENDS_WITH_TERMINAL_PUNCT": true, - "text.WordShape.HAS_CURRENCY_SYMBOL": true, - "text.WordShape.HAS_EMOJI": true, - "text.WordShape.HAS_MATH_SYMBOL": true, - "text.WordShape.HAS_MIXED_CASE": true, - "text.WordShape.HAS_NON_LETTER": true, - "text.WordShape.HAS_NO_DIGITS": true, - "text.WordShape.HAS_NO_PUNCT_OR_SYMBOL": true, - "text.WordShape.HAS_NO_QUOTES": true, - "text.WordShape.HAS_ONLY_DIGITS": true, - "text.WordShape.HAS_PUNCTUATION_DASH": true, - "text.WordShape.HAS_QUOTE": true, - "text.WordShape.HAS_SOME_DIGITS": true, - "text.WordShape.HAS_SOME_PUNCT_OR_SYMBOL": true, - "text.WordShape.HAS_TITLE_CASE": true, - "text.WordShape.IS_ACRONYM_WITH_PERIODS": true, - "text.WordShape.IS_EMOTICON": true, - "text.WordShape.IS_LOWERCASE": true, - "text.WordShape.IS_MIXED_CASE_LETTERS": true, - "text.WordShape.IS_NUMERIC_VALUE": true, - "text.WordShape.IS_PUNCT_OR_SYMBOL": true, - "text.WordShape.IS_UPPERCASE": true, - "text.WordShape.IS_WHITESPACE": true, - "text.WordpieceTokenizer": false, - "text.WordpieceTokenizer.__eq__": true, - "text.WordpieceTokenizer.__ge__": true, - "text.WordpieceTokenizer.__gt__": true, - "text.WordpieceTokenizer.__init__": true, - "text.WordpieceTokenizer.__le__": true, - "text.WordpieceTokenizer.__lt__": true, - "text.WordpieceTokenizer.__ne__": true, - "text.WordpieceTokenizer.__new__": true, - "text.WordpieceTokenizer.detokenize": true, - "text.WordpieceTokenizer.split": true, - "text.WordpieceTokenizer.split_with_offsets": true, - "text.WordpieceTokenizer.tokenize": true, - "text.WordpieceTokenizer.tokenize_with_offsets": true, - "text.WordpieceTokenizer.vocab_size": true, - "text.__version__": true, - "text.case_fold_utf8": false, - "text.coerce_to_structurally_valid_utf8": false, - "text.combine_segments": false, - "text.find_source_offsets": false, - "text.gather_with_default": false, - "text.greedy_constrained_sequence": false, - "text.keras": false, - "text.keras.layers": false, - "text.keras.layers.ToDense": false, - "text.keras.layers.ToDense.__eq__": true, - "text.keras.layers.ToDense.__ge__": true, - "text.keras.layers.ToDense.__gt__": true, - "text.keras.layers.ToDense.__init__": true, - "text.keras.layers.ToDense.__le__": true, - "text.keras.layers.ToDense.__lt__": true, - "text.keras.layers.ToDense.__ne__": true, - "text.keras.layers.ToDense.__new__": true, - "text.mask_language_model": false, - "text.max_spanning_tree": false, - "text.max_spanning_tree_gradient": false, - "text.metrics": false, - "text.metrics.rouge_l": false, - "text.ngrams": false, - "text.normalize_utf8": false, - "text.normalize_utf8_with_offsets_map": false, - "text.pad_along_dimension": false, - "text.pad_model_inputs": false, - "text.regex_split": false, - "text.regex_split_with_offsets": false, - "text.sentence_fragments": false, - "text.sliding_window": false, - "text.span_alignment": false, - "text.span_overlaps": false, - "text.viterbi_constrained_sequence": false, - "text.wordshape": false - }, - "py_module_names": [ - "text" - ], - "site_link": null -}
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/_toc.yaml b/third_party/tensorflow-text/src/docs/api_docs/python/text/_toc.yaml deleted file mode 100644 index 9d3da62..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/_toc.yaml +++ /dev/null
@@ -1,114 +0,0 @@ -toc: -- title: text - section: - - title: Overview - path: /text/api_docs/python/text - - title: BertTokenizer - path: /text/api_docs/python/text/BertTokenizer - - title: Detokenizer - path: /text/api_docs/python/text/Detokenizer - - title: FirstNItemSelector - path: /text/api_docs/python/text/FirstNItemSelector - - title: HubModuleSplitter - path: /text/api_docs/python/text/HubModuleSplitter - - title: HubModuleTokenizer - path: /text/api_docs/python/text/HubModuleTokenizer - - title: MaskValuesChooser - path: /text/api_docs/python/text/MaskValuesChooser - - title: RandomItemSelector - path: /text/api_docs/python/text/RandomItemSelector - - title: Reduction - path: /text/api_docs/python/text/Reduction - - title: RegexSplitter - path: /text/api_docs/python/text/RegexSplitter - - title: RoundRobinTrimmer - path: /text/api_docs/python/text/RoundRobinTrimmer - - title: SentencepieceTokenizer - path: /text/api_docs/python/text/SentencepieceTokenizer - - title: SplitMergeFromLogitsTokenizer - path: /text/api_docs/python/text/SplitMergeFromLogitsTokenizer - - title: SplitMergeTokenizer - path: /text/api_docs/python/text/SplitMergeTokenizer - - title: Splitter - path: /text/api_docs/python/text/Splitter - - title: SplitterWithOffsets - path: /text/api_docs/python/text/SplitterWithOffsets - - title: StateBasedSentenceBreaker - path: /text/api_docs/python/text/StateBasedSentenceBreaker - - title: Tokenizer - path: /text/api_docs/python/text/Tokenizer - - title: TokenizerWithOffsets - path: /text/api_docs/python/text/TokenizerWithOffsets - - title: UnicodeCharTokenizer - path: /text/api_docs/python/text/UnicodeCharTokenizer - - title: UnicodeScriptTokenizer - path: /text/api_docs/python/text/UnicodeScriptTokenizer - - title: WaterfallTrimmer - path: /text/api_docs/python/text/WaterfallTrimmer - - title: WhitespaceTokenizer - path: /text/api_docs/python/text/WhitespaceTokenizer - - title: WordShape - path: /text/api_docs/python/text/WordShape - - title: WordpieceTokenizer - path: /text/api_docs/python/text/WordpieceTokenizer - - title: case_fold_utf8 - path: /text/api_docs/python/text/case_fold_utf8 - - title: coerce_to_structurally_valid_utf8 - path: /text/api_docs/python/text/coerce_to_structurally_valid_utf8 - - title: combine_segments - path: /text/api_docs/python/text/combine_segments - - title: find_source_offsets - path: /text/api_docs/python/text/find_source_offsets - - title: gather_with_default - path: /text/api_docs/python/text/gather_with_default - - title: greedy_constrained_sequence - path: /text/api_docs/python/text/greedy_constrained_sequence - - title: mask_language_model - path: /text/api_docs/python/text/mask_language_model - - title: max_spanning_tree - path: /text/api_docs/python/text/max_spanning_tree - - title: max_spanning_tree_gradient - path: /text/api_docs/python/text/max_spanning_tree_gradient - - title: ngrams - path: /text/api_docs/python/text/ngrams - - title: normalize_utf8 - path: /text/api_docs/python/text/normalize_utf8 - - title: normalize_utf8_with_offsets_map - path: /text/api_docs/python/text/normalize_utf8_with_offsets_map - - title: pad_along_dimension - path: /text/api_docs/python/text/pad_along_dimension - - title: pad_model_inputs - path: /text/api_docs/python/text/pad_model_inputs - - title: regex_split - path: /text/api_docs/python/text/regex_split - - title: regex_split_with_offsets - path: /text/api_docs/python/text/regex_split_with_offsets - - title: sentence_fragments - status: deprecated - path: /text/api_docs/python/text/sentence_fragments - - title: sliding_window - path: /text/api_docs/python/text/sliding_window - - title: span_alignment - path: /text/api_docs/python/text/span_alignment - - title: span_overlaps - path: /text/api_docs/python/text/span_overlaps - - title: viterbi_constrained_sequence - path: /text/api_docs/python/text/viterbi_constrained_sequence - - title: wordshape - path: /text/api_docs/python/text/wordshape -- title: text.keras - section: - - title: Overview - path: /text/api_docs/python/text/keras - - title: layers - section: - - title: Overview - path: /text/api_docs/python/text/keras/layers - - title: ToDense - path: /text/api_docs/python/text/keras/layers/ToDense -- title: text.metrics - section: - - title: Overview - path: /text/api_docs/python/text/metrics - - title: rouge_l - path: /text/api_docs/python/text/metrics/rouge_l
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/all_symbols.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/all_symbols.md deleted file mode 100644 index 64269d3..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/all_symbols.md +++ /dev/null
@@ -1,58 +0,0 @@ -# All symbols in TensorFlow Text - -<!-- Insert buttons and diff --> - -## Primary symbols - -* <a href="../text.md"><code>text</code></a> -* <a href="../text/BertTokenizer.md"><code>text.BertTokenizer</code></a> -* <a href="../text/Detokenizer.md"><code>text.Detokenizer</code></a> -* <a href="../text/FirstNItemSelector.md"><code>text.FirstNItemSelector</code></a> -* <a href="../text/HubModuleSplitter.md"><code>text.HubModuleSplitter</code></a> -* <a href="../text/HubModuleTokenizer.md"><code>text.HubModuleTokenizer</code></a> -* <a href="../text/MaskValuesChooser.md"><code>text.MaskValuesChooser</code></a> -* <a href="../text/RandomItemSelector.md"><code>text.RandomItemSelector</code></a> -* <a href="../text/Reduction.md"><code>text.Reduction</code></a> -* <a href="../text/RegexSplitter.md"><code>text.RegexSplitter</code></a> -* <a href="../text/RoundRobinTrimmer.md"><code>text.RoundRobinTrimmer</code></a> -* <a href="../text/SentencepieceTokenizer.md"><code>text.SentencepieceTokenizer</code></a> -* <a href="../text/SplitMergeFromLogitsTokenizer.md"><code>text.SplitMergeFromLogitsTokenizer</code></a> -* <a href="../text/SplitMergeTokenizer.md"><code>text.SplitMergeTokenizer</code></a> -* <a href="../text/Splitter.md"><code>text.Splitter</code></a> -* <a href="../text/SplitterWithOffsets.md"><code>text.SplitterWithOffsets</code></a> -* <a href="../text/StateBasedSentenceBreaker.md"><code>text.StateBasedSentenceBreaker</code></a> -* <a href="../text/Tokenizer.md"><code>text.Tokenizer</code></a> -* <a href="../text/TokenizerWithOffsets.md"><code>text.TokenizerWithOffsets</code></a> -* <a href="../text/UnicodeCharTokenizer.md"><code>text.UnicodeCharTokenizer</code></a> -* <a href="../text/UnicodeScriptTokenizer.md"><code>text.UnicodeScriptTokenizer</code></a> -* <a href="../text/WaterfallTrimmer.md"><code>text.WaterfallTrimmer</code></a> -* <a href="../text/WhitespaceTokenizer.md"><code>text.WhitespaceTokenizer</code></a> -* <a href="../text/WordShape_cls.md"><code>text.WordShape</code></a> -* <a href="../text/WordpieceTokenizer.md"><code>text.WordpieceTokenizer</code></a> -* <a href="../text/case_fold_utf8.md"><code>text.case_fold_utf8</code></a> -* <a href="../text/coerce_to_structurally_valid_utf8.md"><code>text.coerce_to_structurally_valid_utf8</code></a> -* <a href="../text/combine_segments.md"><code>text.combine_segments</code></a> -* <a href="../text/find_source_offsets.md"><code>text.find_source_offsets</code></a> -* <a href="../text/gather_with_default.md"><code>text.gather_with_default</code></a> -* <a href="../text/greedy_constrained_sequence.md"><code>text.greedy_constrained_sequence</code></a> -* <a href="../text/keras.md"><code>text.keras</code></a> -* <a href="../text/keras/layers.md"><code>text.keras.layers</code></a> -* <a href="../text/keras/layers/ToDense.md"><code>text.keras.layers.ToDense</code></a> -* <a href="../text/mask_language_model.md"><code>text.mask_language_model</code></a> -* <a href="../text/max_spanning_tree.md"><code>text.max_spanning_tree</code></a> -* <a href="../text/max_spanning_tree_gradient.md"><code>text.max_spanning_tree_gradient</code></a> -* <a href="../text/metrics.md"><code>text.metrics</code></a> -* <a href="../text/metrics/rouge_l.md"><code>text.metrics.rouge_l</code></a> -* <a href="../text/ngrams.md"><code>text.ngrams</code></a> -* <a href="../text/normalize_utf8.md"><code>text.normalize_utf8</code></a> -* <a href="../text/normalize_utf8_with_offsets_map.md"><code>text.normalize_utf8_with_offsets_map</code></a> -* <a href="../text/pad_along_dimension.md"><code>text.pad_along_dimension</code></a> -* <a href="../text/pad_model_inputs.md"><code>text.pad_model_inputs</code></a> -* <a href="../text/regex_split.md"><code>text.regex_split</code></a> -* <a href="../text/regex_split_with_offsets.md"><code>text.regex_split_with_offsets</code></a> -* <a href="../text/sentence_fragments.md"><code>text.sentence_fragments</code></a> -* <a href="../text/sliding_window.md"><code>text.sliding_window</code></a> -* <a href="../text/span_alignment.md"><code>text.span_alignment</code></a> -* <a href="../text/span_overlaps.md"><code>text.span_overlaps</code></a> -* <a href="../text/viterbi_constrained_sequence.md"><code>text.viterbi_constrained_sequence</code></a> -* <a href="../text/wordshape.md"><code>text.wordshape</code></a>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/api_report.pb b/third_party/tensorflow-text/src/docs/api_docs/python/text/api_report.pb deleted file mode 100644 index e6b83c49..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/api_report.pb +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/case_fold_utf8.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/case_fold_utf8.md deleted file mode 100644 index dcd20ac..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/case_fold_utf8.md +++ /dev/null
@@ -1,78 +0,0 @@ -description: Applies case folding to every UTF-8 string in the input. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.case_fold_utf8" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.case_fold_utf8 - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/normalize_ops.py">View -source</a> - -Applies case folding to every UTF-8 string in the input. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.case_fold_utf8( - input, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The input is a `Tensor` or `RaggedTensor` of any shape, and the resulting output -has the same shape as the input. Note that NFKC normalization is implicitly -applied to the strings. - -#### Examples: - -``` ->>> # input: <string>[num_strings] ->>> case_fold_utf8(['The Quick-Brown', -... 'CAT jumped over', -... 'the lazy dog !! ']) ->>> # output: <string>[num_strings] -<tf.Tensor: shape=(3,), dtype=string, numpy= - array([b'the quick-brown', b'cat jumped over', b'the lazy dog !! '], - dtype=object)> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `Tensor` or `RaggedTensor` of UTF-8 encoded strings. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name for this op (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A `Tensor` or `RaggedTensor` of type string, with case-folded contents. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/coerce_to_structurally_valid_utf8.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/coerce_to_structurally_valid_utf8.md deleted file mode 100644 index 2ce5f69..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/coerce_to_structurally_valid_utf8.md +++ /dev/null
@@ -1,88 +0,0 @@ -description: Coerce UTF-8 input strings to structurally valid UTF-8. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.coerce_to_structurally_valid_utf8" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.coerce_to_structurally_valid_utf8 - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/string_ops.py">View -source</a> - -Coerce UTF-8 input strings to structurally valid UTF-8. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.coerce_to_structurally_valid_utf8( - input, replacement_char=_unichr(65533), name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Any bytes which cause the input string to be invalid UTF-8 are substituted with -the provided replacement character codepoint (default 65533). If you plan on -overriding the default, use a single byte replacement character codepoint to -preserve alignment to the source input string. - -In this example, the character \xDEB2 is an invalid UTF-8 bit sequence; the call -to `coerce_to_structurally_valid_utf8` replaces it with \xef\xbf\xbd, which is -the default replacement character encoding. ``` - -> > > input_data = ["A", b"\xDEB2", "C"] -> > > coerce_to_structurally_valid_utf8(input_data) -> > > <tf.Tensor: shape=(3,), dtype=string, numpy=array([b'A', b'\xef\xbf\xbdB2', b'C'], dtype=object)> -> > > ``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -UTF-8 string tensor to coerce to valid UTF-8. -</td> -</tr><tr> -<td> -`replacement_char` -</td> -<td> -The replacement character to be used in place of any -invalid byte in the input. Any valid Unicode character may be used. The -default value is the default Unicode replacement character which is -0xFFFD (or U+65533). Note that passing a replacement character -expressible in 1 byte, such as ' ' or '?', will preserve string -alignment to the source since individual invalid bytes will be replaced -with a 1-byte replacement. (optional) -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tensor of type string with the same shape as the input. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/combine_segments.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/combine_segments.md deleted file mode 100644 index f87c133..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/combine_segments.md +++ /dev/null
@@ -1,144 +0,0 @@ -description: Combine one or more input segments for a model's input sequence. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.combine_segments" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.combine_segments - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/segment_combiner_ops.py">View source</a> - - - -Combine one or more input segments for a model's input sequence. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.combine_segments( - segments, start_of_sequence_id, end_of_segment_id -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -`combine_segments` combines the tokens of one or more input segments to a -single sequence of token values and generates matching segment ids. -`combine_segments` can follow a `Trimmer`, who limit segment lengths and -emit `RaggedTensor` outputs, and can be followed up by `ModelInputPacker`. - -See `Detailed Experimental Setup` in `BERT: Pre-training of Deep Bidirectional -Transformers for Language Understanding` -(https://arxiv.org/pdf/1810.04805.pdf) for more examples of combined -segments. - - -`combine_segments` first flattens and combines a list of one or more -segments -(`RaggedTensor`s of n dimensions) together along the 1st axis, then packages -any special tokens into a final n dimensional `RaggedTensor`. - -And finally `combine_segments` generates another `RaggedTensor` (with the -same rank as the final combined `RaggedTensor`) that contains a distinct int -id for each segment. - -#### Example usage: - - - -``` -segment_a = [[1, 2], - [3, 4,], - [5, 6, 7, 8, 9]] - -segment_b = [[10, 20,], - [30, 40, 50, 60,], - [70, 80]] -expected_combined, expected_ids = combine_segments([segment_a, segment_b]) - -# segment_a and segment_b have been combined w/ special tokens describing -# the beginning of a sequence and end of a sequence inserted. -expected_combined=[ - [101, 1, 2, 102, 10, 20, 102], - [101, 3, 4, 102, 30, 40, 50, 60, 102], - [101, 5, 6, 7, 8, 9, 102, 70, 80, 102], -] - -# ids describing which items belong to which segment. -expected_ids=[ - [0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`segments` -</td> -<td> -A list of `RaggedTensor`s with the tokens of the input segments. -All elements must have the same dtype (int32 or int64), same rank, and -same dimension 0 (namely batch size). Slice `segments[i][j, ...]` -contains the tokens of the i-th input segment to the j-th example in the -batch. -</td> -</tr><tr> -<td> -`start_of_sequence_id` -</td> -<td> -a python int or scalar Tensor containing the id used -to denote the start of a sequence (e.g. `[CLS]` token in BERT -terminology). -</td> -</tr><tr> -<td> -`end_of_segment_id` -</td> -<td> -a python int or scalar Tensor containing the id used to -denote end of a segment (e.g. the `[SEP]` token in BERT terminology). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -a tuple of (combined_segments, segment_ids), where: -</td> -</tr> -<tr> -<td> -`combined_segments` -</td> -<td> -A `RaggedTensor` with segments combined and special -tokens inserted. -</td> -</tr><tr> -<td> -`segment_ids` -</td> -<td> - A `RaggedTensor` w/ the same shape as `combined_segments` -and containing int ids for each item detailing the segment that they -correspond to. -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/find_source_offsets.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/find_source_offsets.md deleted file mode 100644 index dbf3453b..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/find_source_offsets.md +++ /dev/null
@@ -1,101 +0,0 @@ -description: Maps the input post-normalized string offsets to pre-normalized offsets. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.find_source_offsets" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.find_source_offsets - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/normalize_ops.py">View source</a> - - - -Maps the input post-normalized string offsets to pre-normalized offsets. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.find_source_offsets( - offsets_map, input_offsets, name=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -Returns the source (i.e. pre-normalized) string offsets mapped from the input -post-normalized string offsets using the input offsets_map, which is an output -from the `normalize_utf8_with_offsets_map` op. offsets_map can be indexed or -sliced along with the input_offsets. - -#### Examples: - -``` ->>> # input: <string>[num_strings] ->>> post_normalized_str, offsets_map = normalize_utf8_with_offsets_map( -... ["株式会社", "KADOKAWA"]) ->>> # input: <variant>[num_strings], <int64>[num_strings, num_offsets] ->>> find_source_offsets(offsets_map, [[0, 1, 2], [0, 1, 2]]) ->>> # output: <int64>[num_strings, num_offsets] -<tf.Tensor: shape=(2, 3), dtype=int64, numpy=array([[0, 1, 2], [0, 3, 6]])> ->>> # Offsets map can be indexed. ->>> find_source_offsets(offsets_map[1], [[0, 1, 2]]) -<tf.Tensor: shape=(1, 3), dtype=int64, numpy=array([[0, 3, 6]])> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`offsets_map` -</td> -<td> -A `Tensor` or `RaggedTensor` of type `variant`, used to map the -post-normalized string offsets to pre-normalized string offsets. -offsets_map is an output from `normalize_utf8_with_offsets_map` function. -</td> -</tr><tr> -<td> -`input_offsets` -</td> -<td> -A `Tensor` or `RaggedTensor` of type int64 representing the -the post-normalized string offsets, -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name for this op (optional). -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> - -<tr> -<td> -`results` -</td> -<td> -A `Tensor` or `RaggedTensor` of type int64, with pre-normalized -string offsets. -</td> -</tr> -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/gather_with_default.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/gather_with_default.md deleted file mode 100644 index 552de11..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/gather_with_default.md +++ /dev/null
@@ -1,96 +0,0 @@ -description: Gather slices with indices=-1 mapped to default. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.gather_with_default" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.gather_with_default - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/pointer_ops.py">View -source</a> - -Gather slices with `indices=-1` mapped to `default`. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.gather_with_default( - params, indices, default, name=None, axis=0 -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -This operation is similar to `tf.gather()`, except that any value of `-1` -in `indices` will be mapped to `default`. Example: - -``` ->>> gather_with_default(['a', 'b', 'c', 'd'], [2, 0, -1, 2, -1], '_') -<tf.Tensor: shape=(5,), dtype=string, - numpy=array([b'c', b'a', b'_', b'c', b'_'], dtype=object)> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`params` -</td> -<td> -The `Tensor` from which to gather values. Must be at least rank -`axis + 1`. -</td> -</tr><tr> -<td> -`indices` -</td> -<td> -The index `Tensor`. Must have dtype `int32` or `int64`, and values -must be in the range `[-1, params.shape[axis])`. -</td> -</tr><tr> -<td> -`default` -</td> -<td> -The value to use when `indices` is `-1`. `default.shape` must -be equal to `params.shape[axis + 1:]`. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -The axis in `params` to gather `indices` from. Must be a scalar -`int32` or `int64`. Supports negative indices. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A `Tensor` with the same type as `param`, and with shape -`params.shape[:axis] + indices.shape + params.shape[axis + 1:]`. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/greedy_constrained_sequence.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/greedy_constrained_sequence.md deleted file mode 100644 index 5f3476a4..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/greedy_constrained_sequence.md +++ /dev/null
@@ -1,159 +0,0 @@ -description: Performs greedy constrained sequence on a batch of examples. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.greedy_constrained_sequence" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.greedy_constrained_sequence - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/greedy_constrained_sequence_op.py">View -source</a> - -Performs greedy constrained sequence on a batch of examples. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.greedy_constrained_sequence( - scores, sequence_length=None, allowed_transitions=None, transition_weights=None, - use_log_space=False, use_start_and_end_states=False, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Constrains a set of predictions based on a set of legal transitions and/or a set -of transition weights, returning the legal sequence that maximizes the product -or sum of the state scores and the transition weights at each step. If -`use_log_space` is true, the sum is used; if false, the product is used. - -This op also takes a parameter `use_start_and_end_states`, which when true will -add an implicit start and end state to each sequence. These implicit states -allow the user to specify additional weights and permitted transitions to start -and end a sequence (so, for instance, if you wanted to forbid your output from -ending in a certain set of states you could do so). - -Inputs to this op can take one of three forms: a single TensorFlow tensor of -scores with no sequence lengths, a TensorFlow tensor of scores along with a -TensorFlow tensor of sequence lengths, or a RaggedTensor. If only the scores -tensor is passed, this op will assume that the sequence lengths are equal to the -size of the tensor (and so use all the data provided). If a scores tensor and -sequence_lengths tensor is provided, the op will only use the data in the scores -tensor as specified by the sequence_lengths tensor. Finally, if a RaggedTensor -is provided, the sequence_lengths will be ignored and the variable length -sequences in the RaggedTensor will be used. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`scores` -</td> -<td> -`<float32> [batch_size, num_steps, |num_states|]` -A tensor of scores, where `scores[b, t, s]` is the predicted score for -transitioning to state `s` at step `t` for batch `b`. The |num_states| -dimension must correspond to the num_states attribute for this op. This -input may be ragged; if it is ragged, the ragged tensor should have the -same structure [b, t, s] and only axis 1 should be ragged. -</td> -</tr><tr> -<td> -`sequence_length` -</td> -<td> -`<{int32, int64}>[batch_size]` -A rank-1 tensor representing the length of the output sequence. If None, -and the 'scores' input is not ragged, sequence lengths will be assumed -to be the length of the score tensor. -</td> -</tr><tr> -<td> -`allowed_transitions` -</td> -<td> - if use_start_and_end_states is TRUE: - `<bool>[num_states+1, num_states+1]` -if use_start_and_end_states is FALSE: - `<bool>[num_states, num_states]` -A rank-2 tensor representing allowed transitions. -- allowed_transitions[i][j] is true if the transition from state i to - state j is allowed for i and j in 0...(num_states). -- allowed_transitions[num_states][num_states] is ignored. -If use_start_and_end_states is TRUE: - - allowed_transitions[num_states][j] is true if the sequence is allowed - to start from state j. - - allowed_transitions[i][num_states] is true if the sequence is allowed - to end on state i. -Default - An empty tensor. This allows all sequence states to transition - to all other sequence states. -</td> -</tr><tr> -<td> -`transition_weights` -</td> -<td> - if use_start_and_end_states is TRUE: - `<float32>[num_states+1, num_states+1]` -if use_start_and_end_states is FALSE: - `<float32>[num_states, num_states]` -A rank-2 tensor representing transition weights. -- transition_weights[i][j] is the coefficient that a candidate transition - score will be multiplied by if that transition is from state i to - state j. -- transition_weights[num_states][num_states] is ignored. -If use_start_and_end_states is TRUE: - - transition_weights[num_states][j] is the coefficient that will be used - if the transition starts with state j. - - transition_weights[i][num_states] is the coefficient that will be used - if the final state in the sequence is state i. -Default - An empty tensor. This assigns a wieght of 1.0 all transitions -</td> -</tr><tr> -<td> -`use_log_space` -</td> -<td> -Whether to use log space for the calculation. If false, -calculations will be done in exp-space. -</td> -</tr><tr> -<td> -`use_start_and_end_states` -</td> -<td> -If True, sequences will have an implicit start -and end state added. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name scope within which this op should be constructed. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -An <int32>[batch_size, (num_steps)] ragged tensor containing the appropriate -sequence of transitions. If a sequence is impossible, the value of the -RaggedTensor for that and all following transitions in that sequence shall -be '-1'. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/keras.md deleted file mode 100644 index cac1437..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras.md +++ /dev/null
@@ -1,24 +0,0 @@ -description: Tensorflow Text Layers for Keras API. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.keras" /> -<meta itemprop="path" content="Stable" /> -</div> - -# Module: text.keras - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/keras/__init__.py">View -source</a> - -Tensorflow Text Layers for Keras API. - -## Modules - -[`layers`](../text/keras/layers.md) module: Tensorflow Text layers for Keras -API.
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers.md deleted file mode 100644 index e74b51a..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers.md +++ /dev/null
@@ -1,24 +0,0 @@ -description: Tensorflow Text layers for Keras API. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.keras.layers" /> -<meta itemprop="path" content="Stable" /> -</div> - -# Module: text.keras.layers - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/keras/layers/__init__.py">View -source</a> - -Tensorflow Text layers for Keras API. - -## Classes - -[`class ToDense`](../../text/keras/layers/ToDense.md): Layer that makes padding -and masking a Composite Tensors effortless.
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers/ToDense.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers/ToDense.md deleted file mode 100644 index a8f05d8..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/keras/layers/ToDense.md +++ /dev/null
@@ -1,97 +0,0 @@ -description: Layer that makes padding and masking a Composite Tensors -effortless. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.keras.layers.ToDense" /> -<meta itemprop="path" content="Stable" /> -<meta itemprop="property" content="__init__"/> -<meta itemprop="property" content="__new__"/> -</div> - -# text.keras.layers.ToDense - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/keras/layers/todense.py">View -source</a> - -Layer that makes padding and masking a Composite Tensors effortless. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.keras.layers.ToDense( - pad_value=0, mask=False, shape=None, **kwargs -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The layer takes a RaggedTensor or a SparseTensor and converts it to a uniform -tensor by right-padding it or filling in missing values. - -#### Example: - -```python -x = tf.keras.layers.Input(shape=(None, None), ragged=True) -y = tf_text.keras.layers.ToDense(mask=True)(x) -model = tf.keras.Model(x, y) - -rt = tf.RaggedTensor.from_nested_row_splits( - flat_values=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - nested_row_splits=([0, 1, 1, 5], [0, 3, 3, 5, 9, 10])) -model.predict(rt) - -[[[10, 11, 12, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0]], - [[ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0]], - [[ 0, 0, 0, 0], [13, 14, 0, 0], [15, 16, 17, 18], [19, 0, 0, 0]]] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`pad_value` -</td> -<td> -A value used to pad and fill in the missing values. Should be a -meaningless value for the input data. Default is '0'. -</td> -</tr><tr> -<td> -`mask` -</td> -<td> -A Boolean value representing whether to mask the padded values. If -true, no any downstream Masking layer or Embedding layer with -mask_zero=True should be added. Default is 'False'. -</td> -</tr><tr> -<td> -`shape` -</td> -<td> -If not `None`, the resulting dense tensor will be guaranteed to have -this shape. For RaggedTensor inputs, this is passed to `tf.RaggedTensor`'s -`to_tensor` method. For other tensor types, a `tf.ensure_shape` call is -added to assert that the output has this shape. -</td> -</tr><tr> -<td> -`**kwargs` -</td> -<td> -kwargs of parent class. -</td> -</tr> -</table> - -Input shape: Any Ragged or Sparse Tensor is accepted, but it requires the type -of input to be specified via the Input or InputLayer from the Keras API. Output -shape: The output is a uniform tensor having the same shape, in case of a ragged -input or the same dense shape, in case of a sparse input.
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/mask_language_model.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/mask_language_model.md deleted file mode 100644 index b062077..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/mask_language_model.md +++ /dev/null
@@ -1,184 +0,0 @@ -description: Applies dynamic language model masking. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.mask_language_model" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.mask_language_model - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/masking_ops.py">View source</a> - - - -Applies dynamic language model masking. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.mask_language_model( - input_ids, item_selector, mask_values_chooser, axis=1 -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -`mask_language_model` implements the `Masked LM and Masking Procedure` -described in `BERT: Pre-training of Deep Bidirectional Transformers for -Language Understanding` (https://arxiv.org/pdf/1810.04805.pdf). -`mask_language_model` uses an `ItemSelector` to select the items for masking, -and a `MaskValuesChooser` to assign the values to the selected items. -The purpose of this is to bias the representation towards the actual -observed item. - -Masking is performed on items in an axis. A decision is taken independently at -random to mask with [MASK], mask with random tokens from the full vocab, or -not mask at all. Note that the masking decision is broadcasted to the -sub-dimensions. - -For example, in a RaggedTensor of shape `[batch, (wordpieces)]` and if axis=1, -each wordpiece independently gets masked (or not). - -With the following input: - -``` -[[b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" ], -[b"Bar", b"##ack", b"Ob", b"##ama"], -[b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], -``` - -`mask_language_model` could end up masking individual wordpieces: - -``` -[[b"[MASK]", b"##onge", b"bob", b"Sq", b"[MASK]", b"##pants" ], -[b"Bar", b"##ack", b"[MASK]", b"##ama"], -[b"[MASK]", b"##vel", b"A", b"##ven", b"##gers"]] -``` - -Or with random token inserted: - -``` -[[b"[MASK]", b"##onge", b"bob", b"Sq", b"[MASK]", b"##pants" ], -[b"Bar", b"##ack", b"Sq", b"##ama"], # random token inserted for 'Ob' -[b"Bar", b"##vel", b"A", b"##ven", b"##gers"]] # random token inserted for - # 'Mar' -``` - -In a RaggedTensor of shape `[batch, (words), (wordpieces)]`, whole words get -masked (or not). If a word gets masked, all its tokens are independently -either replaced by `[MASK]`, by random tokens, or no substitution occurs. -Note that any arbitrary spans that can be constructed by a `RaggedTensor` can -be masked in the same way. - -For example, if we have an `RaggedTensor` with shape -`[batch, (token), (wordpieces)]`: - -``` -[[[b"Sp", "##onge"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"Ob", b"##ama"]], - [[b"Mar", "##vel"], [b"A", b"##ven", b"##gers"]]] -``` - -`mask_language_model` could mask whole spans (items grouped together -by the same 1st dimension): - -``` -[[[b"[MASK]", "[MASK]"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"[MASK]", b"[MASK]"]], - [[b"[MASK]", "[MASK]"], [b"A", b"##ven", b"##gers"]]] -``` - -or insert random items in spans: - -``` - [[[b"Mar", "##ama"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"##onge", b"##gers"]], - [[b"Ob", "Sp"], [b"A", b"##ven", b"##gers"]]] -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input_ids` -</td> -<td> -A `RaggedTensor` of n dimensions (where n >= 2) on which -masking will be applied to items up to dimension 1. -</td> -</tr><tr> -<td> -`item_selector` -</td> -<td> -An instance of `ItemSelector` that is used for selecting -items to be masked. -</td> -</tr><tr> -<td> -`mask_values_chooser` -</td> -<td> -An instance of `MaskValuesChooser` which determines the -values assigned to the ids chosen for masking. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -the axis where items will be treated atomically for masking. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of (masked_input_ids, masked_positions, masked_ids) where: -</td> -</tr> -<tr> -<td> -`masked_input_ids` -</td> -<td> -A `RaggedTensor` in the same shape and dtype as -`input_ids`, but with items in `masked_positions` possibly replaced -with `mask_token`, random id, or no change. -</td> -</tr><tr> -<td> -`masked_positions` -</td> -<td> -A `RaggedTensor` of ints with shape -[batch, (num_masked)] containing the positions of items selected for -masking. -</td> -</tr><tr> -<td> -`masked_ids` -</td> -<td> -A `RaggedTensor` with shape [batch, (num_masked)] and same -type as `input_ids` containing the original values before masking -and thus used as labels for the task. -</td> -</tr> -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree.md deleted file mode 100644 index 6347ca25..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree.md +++ /dev/null
@@ -1,120 +0,0 @@ -description: Finds the maximum directed spanning tree of a digraph. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.max_spanning_tree" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.max_spanning_tree - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/gen_mst_ops.py">View -source</a> - -Finds the maximum directed spanning tree of a digraph. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.max_spanning_tree( - num_nodes, scores, forest=False, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Given a batch of directed graphs with scored arcs and root selections, solves -for the maximum spanning tree of each digraph, where the score of a tree is -defined as the sum of the scores of the arcs and roots making up the tree. - -Returns the score of the maximum spanning tree of each digraph, as well as the -arcs and roots in that tree. Each digraph in a batch may contain a different -number of nodes, so the sizes of the digraphs must be provided as an input. - -Note that this operation is only differentiable w.r.t. its |scores| input and -its |max_scores| output. - -The code here is intended for NLP applications, but attempts to remain agnostic -to particular NLP tasks (such as dependency parsing). - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`num_nodes` -</td> -<td> -A `Tensor` of type `int32`. -[B] vector where entry b is number of nodes in the b'th digraph. -</td> -</tr><tr> -<td> -`scores` -</td> -<td> -A `Tensor`. Must be one of the following types: `int32`, `float32`, `float64`. -[B,M,M] tensor where entry b,t,s is the score of the arc from node s to -node t in the b'th directed graph if s!=t, or the score of selecting -node t as a root in the b'th digraph if s==t. This uniform tenosor -requires that M is >= num_nodes[b] for all b (ie. all graphs in the -batch), and ignores entries b,s,t where s or t is >= num_nodes[b]. -Arcs or root selections with non-finite score are treated as -nonexistent. -</td> -</tr><tr> -<td> -`forest` -</td> -<td> -An optional `bool`. Defaults to `False`. -If true, solves for a maximum spanning forest instead of a maximum -spanning tree, where a spanning forest is a set of disjoint trees that -span the nodes of the digraph. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of `Tensor` objects (max_scores, argmax_sources). -</td> -</tr> -<tr> -<td> -`max_scores` -</td> -<td> -A `Tensor`. Has the same type as `scores`. [B] vector where entry b is the score of the maximum spanning tree -of the b'th digraph. -</td> -</tr><tr> -<td> -`argmax_sources` -</td> -<td> -A `Tensor` of type `int32`. [B,M] matrix where entry b,t is the source of the arc inbound to -t in the maximum spanning tree of the b'th digraph, or t if t is -a root. Entries b,t where t is >= num_nodes[b] are set to -1. -Quickly finding the roots can be done as: -tf.equal(tf.map_fn(lambda x: tf.range(tf.size(x)), -argmax_sources), argmax_sources) -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree_gradient.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree_gradient.md deleted file mode 100644 index e93091b..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/max_spanning_tree_gradient.md +++ /dev/null
@@ -1,75 +0,0 @@ -description: Returns a subgradient of the MaximumSpanningTree op. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.max_spanning_tree_gradient" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.max_spanning_tree_gradient - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/mst_ops.py">View -source</a> - -Returns a subgradient of the MaximumSpanningTree op. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.max_spanning_tree_gradient( - mst_op, d_loss_d_max_scores, *_ -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Note that MaximumSpanningTree is only differentiable w.r.t. its |scores| input -and its |max_scores| output. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`mst_op` -</td> -<td> -The MaximumSpanningTree op being differentiated. -</td> -</tr><tr> -<td> -`d_loss_d_max_scores` -</td> -<td> -[B] vector where entry b is the gradient of the network -loss w.r.t. entry b of the |max_scores| output of the |mst_op|. -</td> -</tr><tr> -<td> -`*_` -</td> -<td> -The gradients w.r.t. the other outputs; ignored. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -1. None, since the op is not differentiable w.r.t. its |num_nodes| input. -2. [B,M,M] tensor where entry b,t,s is a subgradient of the network loss - w.r.t. entry b,t,s of the |scores| input, with the same dtype as - |d_loss_d_max_scores|. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics.md deleted file mode 100644 index 31e3ca2..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics.md +++ /dev/null
@@ -1,24 +0,0 @@ -description: Tensorflow text-processing metrics. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.metrics" /> -<meta itemprop="path" content="Stable" /> -</div> - -# Module: text.metrics - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/metrics/__init__.py">View -source</a> - -Tensorflow text-processing metrics. - -## Functions - -[`rouge_l(...)`](../text/metrics/rouge_l.md): Computes LCS-based similarity -score between the hypotheses and references.
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics/rouge_l.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics/rouge_l.md deleted file mode 100644 index 7c555f7..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/metrics/rouge_l.md +++ /dev/null
@@ -1,101 +0,0 @@ -description: Computes LCS-based similarity score between the hypotheses and -references. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.metrics.rouge_l" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.metrics.rouge_l - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/metrics/text_similarity_metric_ops.py">View -source</a> - -Computes LCS-based similarity score between the hypotheses and references. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.metrics.rouge_l( - hypotheses, references, alpha=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The Rouge-L metric is a score from 0 to 1 indicating how similar two sequences -are, based on the length of the longest common subsequence (LCS). In particular, -Rouge-L is the weighted harmonic mean (or f-measure) combining the LCS precision -(the percentage of the hypothesis sequence covered by the LCS) and the LCS -recall (the percentage of the reference sequence covered by the LCS). - -Source: https://www.microsoft.com/en-us/research/publication/ -rouge-a-package-for-automatic-evaluation-of-summaries/ - -This method returns the F-measure, Precision, and Recall for each (hypothesis, -reference) pair. - -Alpha is used as a weight for the harmonic mean of precision and recall. A value -of 0 means recall is more important and 1 means precision is more important. -Leaving alpha unset implies alpha=.5, which is the default in the official -ROUGE-1.5.5.pl script. Setting alpha to a negative number triggers a -compatibility mode with the tensor2tensor implementation of ROUGE-L. - -``` ->>> hypotheses = tf.ragged.constant([["a","b"]]) ->>> references = tf.ragged.constant([["b"]]) ->>> f, p, r = rouge_l(hypotheses, references, alpha=1) ->>> print("f: %s, p: %s, r: %s" % (f, p, r)) -f: tf.Tensor([0.5], shape=(1,), dtype=float32), -p: tf.Tensor([0.5], shape=(1,), dtype=float32), -r: tf.Tensor([1.], shape=(1,), dtype=float32) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`hypotheses` -</td> -<td> -A RaggedTensor with shape [N, (hyp_sentence_len)] and integer or -string values. -</td> -</tr><tr> -<td> -`references` -</td> -<td> -A RaggedTensor with shape [N, (ref_sentence_len)] and integer or -string values. -</td> -</tr><tr> -<td> -`alpha` -</td> -<td> -optional float parameter for weighting -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -an (f_measure, p_measure, r_measure) tuple, where each element is a -vector of floats with shape [N]. The i-th float in each vector contains -the similarity measure of hypotheses[i] and references[i]. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/ngrams.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/ngrams.md deleted file mode 100644 index 37b813b..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/ngrams.md +++ /dev/null
@@ -1,114 +0,0 @@ -description: Create a tensor of n-grams based on the input data data. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.ngrams" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.ngrams - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/ngrams_op.py">View -source</a> - -Create a tensor of n-grams based on the input data `data`. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.ngrams( - data, width, axis=-1, reduction_type=None, string_separator=' ', - name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Creates a tensor of n-grams based on `data`. The n-grams are of width `width` -and are created along axis `axis`; the n-grams are created by combining -windows of `width` adjacent elements from `data` using `reduction_type`. This -op is intended to cover basic use cases; more complex combinations can be -created using the sliding_window op. - -``` ->>> input_data = tf.ragged.constant([["e", "f", "g"], ["dd", "ee"]]) ->>> ngrams( -... input_data, -... width=2, -... axis=-1, -... reduction_type=Reduction.STRING_JOIN, -... string_separator="|") -<tf.RaggedTensor [[b'e|f', b'f|g'], [b'dd|ee']]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> <td> `data` </td> <td> The data to reduce. </td> </tr><tr> <td> `width` -</td> <td> The width of the ngram window. If there is not sufficient data to -fill out the ngram window, the resulting ngram will be empty. </td> </tr><tr> -<td> `axis` </td> <td> The axis to create ngrams along. Note that for string -join reductions, only axis '-1' is supported; for other reductions, any positive -or negative axis can be used. Should be a constant. </td> </tr><tr> <td> -`reduction_type` </td> <td> A member of the Reduction enum. Should be a -constant. Currently supports: - -* <a href="../text/Reduction.md#SUM"><code>Reduction.SUM</code></a>: Add - values in the window. -* <a href="../text/Reduction.md#MEAN"><code>Reduction.MEAN</code></a>: Average - values in the window. -* <a href="../text/Reduction.md#STRING_JOIN"><code>Reduction.STRING_JOIN</code></a>: Join strings in the window. - Note that axis must be -1 here. - </td> - </tr><tr> - <td> - `string_separator` - </td> - <td> - The separator string used for <a href="../text/Reduction.md#STRING_JOIN"><code>Reduction.STRING_JOIN</code></a>. - Ignored otherwise. Must be a string constant, not a Tensor. - </td> - </tr><tr> - <td> - `name` - </td> - <td> - The op name. - </td> - </tr> - </table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tensor of ngrams. If the input is a tf.Tensor, the output will also -be a tf.Tensor; if the input is a tf.RaggedTensor, the output will be -a tf.RaggedTensor. -</td> -</tr> - -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Raises</h2></th></tr> - -<tr> -<td> -`InvalidArgumentError` -</td> -<td> -if `reduction_type` is either None or not a Reduction, -or if `reduction_type` is STRING_JOIN and `axis` is not -1. -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8.md deleted file mode 100644 index 82fbf35..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8.md +++ /dev/null
@@ -1,83 +0,0 @@ -description: Normalizes each UTF-8 string in the input tensor using the -specified rule. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.normalize_utf8" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.normalize_utf8 - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/normalize_ops.py">View -source</a> - -Normalizes each UTF-8 string in the input tensor using the specified rule. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.normalize_utf8( - input, normalization_form='NFKC', name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -See http://unicode.org/reports/tr15/ - -#### Examples: - -``` ->>> # input: <string>[num_strings] ->>> normalize_utf8(["株式会社", "KADOKAWA"]) ->>> # output: <string>[num_strings] -<tf.Tensor: shape=(2,), dtype=string, numpy= -array([b'\xe6\xa0\xaa\xe5\xbc\x8f\xe4\xbc\x9a\xe7\xa4\xbe', b'KADOKAWA'], - dtype=object)> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.) -</td> -</tr><tr> -<td> -`normalization_form` -</td> -<td> -One of the following string values ('NFC', 'NFKC', -'NFD', 'NFKD'). Default is 'NFKC'. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name for this op (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A `Tensor` or `RaggedTensor` of type string, with normalized contents. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8_with_offsets_map.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8_with_offsets_map.md deleted file mode 100644 index a0de05e..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/normalize_utf8_with_offsets_map.md +++ /dev/null
@@ -1,105 +0,0 @@ -description: Normalizes each UTF-8 string in the input tensor using the specified rule. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.normalize_utf8_with_offsets_map" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.normalize_utf8_with_offsets_map - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/normalize_ops.py">View source</a> - - - -Normalizes each UTF-8 string in the input tensor using the specified rule. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.normalize_utf8_with_offsets_map( - input, normalization_form='NFKC', name=None -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -Returns normalized strings and an offset map used by another operation to map -post-normalized string offsets to pre-normalized string offsets. - -See http://unicode.org/reports/tr15/ - -#### Examples: - -``` ->>> # input: <string>[num_strings] ->>> normalize_utf8_with_offsets_map(["株式会社", "KADOKAWA"]) ->>> # output: <string>[num_strings], <variant>[num_strings] -NormalizeUTF8WithOffsetsMap(output=<tf.Tensor: shape=(2,), dtype=string, -numpy= -array([b'\xe6\xa0\xaa\xe5\xbc\x8f\xe4\xbc\x9a\xe7\xa4\xbe', b'KADOKAWA'], - dtype=object)>, offsets_map=<tf.Tensor: shape=(2,), dtype=variant, - numpy=<unprintable>>) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.) -normalization_form: One of the following string values ('NFC', 'NFKC', -'NFD', 'NFKD'). Default is 'NFKC'. NOTE: `NFD` and `NFKD` for - `normalize_utf8_with_offsets_map` will not be available until the - tf.text release w/ ICU 69 (scheduled after 4/2021). -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name for this op (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of (results, offsets_map) where: -</td> -</tr> -<tr> -<td> -`results` -</td> -<td> -A `Tensor` or `RaggedTensor` of type string, with normalized -contents. -</td> -</tr><tr> -<td> -`offsets_map` -</td> -<td> -A `Tensor` or `RaggedTensor` of type `variant`, used to map -the post-normalized string offsets to pre-normalized string offsets. It -has the same shape as the results tensor. offsets_map is an input to -`find_source_offsets` op. -</td> -</tr> -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_along_dimension.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_along_dimension.md deleted file mode 100644 index 83c6194..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_along_dimension.md +++ /dev/null
@@ -1,106 +0,0 @@ -description: Add padding to the beginning and end of data in a specific -dimension. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.pad_along_dimension" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.pad_along_dimension - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/pad_along_dimension_op.py">View -source</a> - -Add padding to the beginning and end of data in a specific dimension. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.pad_along_dimension( - data, axis=-1, left_pad=None, right_pad=None, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Returns a tensor constructed from `data`, where each row in dimension `axis` -is replaced by the concatenation of the left padding followed by the row -followed by the right padding. I.e., if `L=left_pad.shape[0]` and -`R=right_pad.shape[0]`, then: - -```python -result[i1...iaxis, 0:L] = left_pad -result[i1...iaxis, L:-R] = data[i0...iaxis] -result[i1...iaxis, -R:] = right_pad -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`data` -</td> -<td> -`<dtype>[O1...ON, A, I1...IM]` A potentially ragged `K` dimensional -tensor with outer dimensions of size `O1...ON`; axis dimension of size -`A`; and inner dimensions of size `I1...IM`. I.e. `K = N + 1 + M`, where -`N>=0` and `M>=0`. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -An integer constant specifying the axis along which padding is added. -Negative axis values from `-K` to `-1` are supported. -</td> -</tr><tr> -<td> -`left_pad` -</td> -<td> -`<dtype>[L, I1...IM]` An `M+1` dimensional tensor that should be -prepended to each row along dimension `axis`; or `None` if no padding -should be added to the left side. -</td> -</tr><tr> -<td> -`right_pad` -</td> -<td> -`<dtype>[R, I1...IM]` An `M+1` dimensional tensor that should be -appended to each row along dimension `axis`; or `None` if no padding -should be added to the right side. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name of this op (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -`<dtype>[O1...ON, L + A + R, I1...IM]` -A potentially ragged `K` dimensional tensor with outer dimensions of size -`O1...ON`; padded axis dimension size `L+A+R`; and inner dimensions of -size `I1...IM`. If `data` is a `RaggedTensor`, then the returned tensor -is a `RaggedTensor` with the same `ragged_rank`. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_model_inputs.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_model_inputs.md deleted file mode 100644 index 2e2205d4..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/pad_model_inputs.md +++ /dev/null
@@ -1,122 +0,0 @@ -description: Pad model input and generate corresponding input masks. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.pad_model_inputs" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.pad_model_inputs - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/pad_model_inputs_ops.py">View source</a> - - - -Pad model input and generate corresponding input masks. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.pad_model_inputs( - input, max_seq_length, pad_value=0 -) -</code></pre> - - - -<!-- Placeholder for "Used in" --> - -`pad_model_inputs` performs the final packaging of a model's inputs commonly -found in text models. This includes padding out (or simply truncating) to a -fixed-size, 2-dimensional `Tensor` and generating mask `Tensor`s (of the same 2D -shape) with values of 0 if the corresponding item is a pad value and 1 if it is -part of the original input. - -Note that a simple truncation strategy (drop everything after max sequence -length) is used to force the inputs to the specified shape. This may be -incorrect and users should instead apply a `Trimmer` upstream to safely truncate -large inputs. - -``` ->>> input_data = tf.ragged.constant([ -... [101, 1, 2, 102, 10, 20, 102], -... [101, 3, 4, 102, 30, 40, 50, 60, 70, 80], -... [101, 5, 6, 7, 8, 9, 102, 70], -... ], np.int32) ->>> data, mask = pad_model_inputs(input=input_data, max_seq_length=9) ->>> print("data: %s, mask: %s" % (data, mask)) - data: tf.Tensor( - [[101 1 2 102 10 20 102 0 0] - [101 3 4 102 30 40 50 60 70] - [101 5 6 7 8 9 102 70 0]], shape=(3, 9), dtype=int32), - mask: tf.Tensor( - [[1 1 1 1 1 1 1 0 0] - [1 1 1 1 1 1 1 1 1] - [1 1 1 1 1 1 1 1 0]], shape=(3, 9), dtype=int32) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A `RaggedTensor` with rank >= 2. -</td> -</tr><tr> -<td> -`max_seq_length` -</td> -<td> -An int, or scalar `Tensor`. The "input" `Tensor` will be -flattened down to 2 dimensions and then have its 2nd dimension either -padded out or truncated to this size. -</td> -</tr><tr> -<td> -`pad_value` -</td> -<td> -An int or scalar `Tensor` specifying the value used for padding. -</td> -</tr> -</table> - - - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of (padded_input, pad_mask) where: -</td> -</tr> -<tr> -<td> -`padded_input` -</td> -<td> -A `Tensor` corresponding to `inputs` that has been -padded/truncated out to a fixed size and flattened to 2 -dimensions. -</td> -</tr><tr> -<td> -`pad_mask` -</td> -<td> -A `Tensor` corresponding to `padded_input` whose values are -0 if the corresponding item is a pad value and 1 if it is not. -</td> -</tr> -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split.md deleted file mode 100644 index 1c5cad1..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split.md +++ /dev/null
@@ -1,116 +0,0 @@ -description: Split input by delimiters that match a regex pattern. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.regex_split" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.regex_split - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/regex_split_ops.py">View source</a> - - - -Split `input` by delimiters that match a regex pattern. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.regex_split( - input, delim_regex_pattern, keep_delim_regex_pattern='', name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -`regex_split` will split `input` using delimiters that match a -regex pattern in `delim_regex_pattern`. Here is an example: - -``` ->>> text_input=["hello there"] ->>> # split by whitespace ->>> regex_split(input=text_input, -... delim_regex_pattern="\s") -<tf.RaggedTensor [[b'hello', b'there']]> -``` - -By default, delimiters are not included in the split string results. -Delimiters may be included by specifying a regex pattern -`keep_delim_regex_pattern`. For example: - -``` ->>> text_input=["hello there"] ->>> # split by whitespace ->>> regex_split(input=text_input, -... delim_regex_pattern="\s", -... keep_delim_regex_pattern="\s") -<tf.RaggedTensor [[b'hello', b' ', b'there']]> -``` - -If there are multiple delimiters in a row, there are no empty splits emitted. -For example: - -``` ->>> text_input=["hello there"] # Note the two spaces between the words. ->>> # split by whitespace ->>> regex_split(input=text_input, -... delim_regex_pattern="\s") -<tf.RaggedTensor [[b'hello', b'there']]> -``` - -See https://github.com/google/re2/wiki/Syntax for the full list of supported -expressions. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A Tensor or RaggedTensor of string input. -</td> -</tr><tr> -<td> -`delim_regex_pattern` -</td> -<td> -A string containing the regex pattern of a delimiter. -</td> -</tr><tr> -<td> -`keep_delim_regex_pattern` -</td> -<td> -(optional) Regex pattern of delimiters that should -be kept in the result. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -(optional) Name of the op. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A RaggedTensors containing of type string containing the split string -pieces. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split_with_offsets.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split_with_offsets.md deleted file mode 100644 index 1062878..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/regex_split_with_offsets.md +++ /dev/null
@@ -1,134 +0,0 @@ -description: Split input by delimiters that match a regex pattern; returns -offsets. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.regex_split_with_offsets" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.regex_split_with_offsets - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/regex_split_ops.py">View source</a> - - - -Split `input` by delimiters that match a regex pattern; returns offsets. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.regex_split_with_offsets( - input, delim_regex_pattern, keep_delim_regex_pattern='', name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -`regex_split_with_offsets` will split `input` using delimiters that match a -regex pattern in `delim_regex_pattern`. It will return three tensors: one -containing the split substrings ('result' in the examples below), one containing -the offsets of the starts of each substring ('begin' in the examples below), and -one containing the offsets of the ends of each substring ('end' in the examples -below). - -#### Here is an example: - -``` ->>> text_input=["hello there"] ->>> # split by whitespace ->>> result, begin, end = regex_split_with_offsets(input=text_input, -... delim_regex_pattern="\s") ->>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) -result: <tf.RaggedTensor [[b'hello', b'there']]> -begin: <tf.RaggedTensor [[0, 6]]> -end: <tf.RaggedTensor [[5, 11]]> -``` - -By default, delimiters are not included in the split string results. -Delimiters may be included by specifying a regex pattern -`keep_delim_regex_pattern`. For example: - -``` ->>> text_input=["hello there"] ->>> # split by whitespace ->>> result, begin, end = regex_split_with_offsets(input=text_input, -... delim_regex_pattern="\s", -... keep_delim_regex_pattern="\s") ->>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) -result: <tf.RaggedTensor [[b'hello', b' ', b'there']]> -begin: <tf.RaggedTensor [[0, 5, 6]]> -end: <tf.RaggedTensor [[5, 6, 11]]> -``` - -If there are multiple delimiters in a row, there are no empty splits emitted. -For example: - -``` ->>> text_input=["hello there"] # Note the two spaces between the words. ->>> # split by whitespace ->>> result, begin, end = regex_split_with_offsets(input=text_input, -... delim_regex_pattern="\s") ->>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) -result: <tf.RaggedTensor [[b'hello', b'there']]> -begin: <tf.RaggedTensor [[0, 7]]> -end: <tf.RaggedTensor [[5, 12]]> -``` - -See https://github.com/google/re2/wiki/Syntax for the full list of supported -expressions. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input` -</td> -<td> -A Tensor or RaggedTensor of string input. -</td> -</tr><tr> -<td> -`delim_regex_pattern` -</td> -<td> -A string containing the regex pattern of a delimiter. -</td> -</tr><tr> -<td> -`keep_delim_regex_pattern` -</td> -<td> -(optional) Regex pattern of delimiters that should -be kept in the result. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -(optional) Name of the op. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A tuple of RaggedTensors containing: - (split_results, begin_offsets, end_offsets) -where tokens is of type string, begin_offsets and end_offsets are of type -int64. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/sentence_fragments.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/sentence_fragments.md deleted file mode 100644 index d9ccc5e..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/sentence_fragments.md +++ /dev/null
@@ -1,121 +0,0 @@ -description: Find the sentence fragments in a given text. (deprecated) - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.sentence_fragments" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.sentence_fragments - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sentence_breaking_ops.py">View -source</a> - -Find the sentence fragments in a given text. (deprecated) - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.sentence_fragments( - token_word, token_starts, token_ends, token_properties, - input_encoding='UTF-8', errors='replace', - replacement_char=65533, replace_control_characters=False -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Warning: THIS FUNCTION IS DEPRECATED. It will be removed in a future version. -Instructions for updating: Deprecated, use 'StateBasedSentenceBreaker' instead. - -A sentence fragment is a potential next sentence determined using -deterministic heuristics based on punctuation, capitalization, and similar -text attributes. - -NOTE: This op is deprecated. Use `StateBasedSentenceBreaker` instead. - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> <td> `token_word` </td> <td> A Tensor (w/ rank=2) or a RaggedTensor (w/ -ragged_rank=1) containing the token strings. </td> </tr><tr> <td> `token_starts` -</td> <td> A Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) containing -offsets where the token starts. </td> </tr><tr> <td> `token_ends` </td> <td> A -Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) containing offsets where -the token ends. </td> </tr><tr> <td> `token_properties` </td> <td> A Tensor (w/ -rank=2) or a RaggedTensor (w/ ragged_rank=1) containing a bitmask. - -The values of the bitmask are: - -* 0x01 (ILL_FORMED) - Text is ill-formed: typically applies to all tokens of a - paragraph that is too short or lacks terminal punctuation. -* 0x02 (HEADING) -* 0x04 (BOLD) -* 0x10 (UNDERLINED) -* 0x20 (LIST) -* 0x40 (TITLE) -* 0x80 (EMOTICON) -* 0x100 (ACRONYM) - Token was identified as an acronym. Period-, hyphen-, and - space-separated acronyms: "U.S.", "U-S", and "U S". -* 0x200 (HYPERLINK) - Indicates that the token (or part of the token) is - covered by at least one hyperlink. </td> </tr><tr> <td> `input_encoding` - </td> <td> String name for the unicode encoding that should be used to - decode each string. </td> </tr><tr> <td> `errors` </td> <td> Specifies the - response when an input string can't be converted using the indicated - encoding. One of: - -* `'strict'`: Raise an exception for any illegal substrings. - -* `'replace'`: Replace illegal substrings with `replacement_char`. - -* `'ignore'`: Skip illegal substrings. - </td> - </tr><tr> - <td> - `replacement_char` - </td> - <td> - The replacement codepoint to be used in place of invalid - substrings in `input` when `errors='replace'`; and in place of C0 control - characters in `input` when `replace_control_characters=True`. - </td> - </tr><tr> - <td> - `replace_control_characters` - </td> - <td> - Whether to replace the C0 control characters - `(U+0000 - U+001F)` with the `replacement_char`. - </td> - </tr> - </table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A RaggedTensor of `fragment_start`, `fragment_end`, `fragment_properties` -and `terminal_punc_token`. - -`fragment_properties` is an int32 bitmask whose values may contain: - -* 1 = fragment ends with terminal punctuation -* 2 = fragment ends with multiple terminal punctuations (e.g. "She said - what?!") -* 3 = Has close parenthesis (e.g. "Mushrooms (they're fungi).") -* 4 = Has sentential close parenthesis (e.g. "(Mushrooms are fungi!)") - - `terminal_punc_token` is a RaggedTensor containing the index of terminal - punctuation token immediately following the last word in the fragment -- or - index of the last word itself, if it's an acronym (since acronyms include - the terminal punctuation). index of the terminal punctuation token. </td> - </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/sliding_window.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/sliding_window.md deleted file mode 100644 index 15e383b..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/sliding_window.md +++ /dev/null
@@ -1,162 +0,0 @@ -description: Builds a sliding window for data with a specified width. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.sliding_window" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.sliding_window - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/sliding_window_op.py">View -source</a> - -Builds a sliding window for `data` with a specified width. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.sliding_window( - data, width, axis=-1, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Returns a tensor constructed from `data`, where each element in -dimension `axis` is a slice of `data` starting at the corresponding -position, with the given width and step size. I.e.: - -* `result.shape.ndims = data.shape.ndims + 1` -* `result[i1..iaxis, a] = data[i1..iaxis, a:a+width]` - (where `0 <= a < data[i1...iaxis].shape[0] - (width - 1)`). - -Note that each result row (along dimension `axis`) has `width - 1` fewer items -than the corresponding `data` row. If a `data` row has fewer than `width` -items, then the corresponding `result` row will be empty. If you wish for -the `result` rows to be the same size as the `data` rows, you can use -`pad_along_dimension` to add `width - 1` padding elements before calling -this op. - -#### Examples: - -Sliding window (width=3) across a sequence of tokens: - -``` ->>> # input: <string>[sequence_length] ->>> input = tf.constant(["one", "two", "three", "four", "five", "six"]) ->>> # output: <string>[sequence_length-2, 3] ->>> sliding_window(data=input, width=3, axis=0) -<tf.Tensor: shape=(4, 3), dtype=string, numpy= - array([[b'one', b'two', b'three'], - [b'two', b'three', b'four'], - [b'three', b'four', b'five'], - [b'four', b'five', b'six']], dtype=object)> -``` - -Sliding window (width=2) across the inner dimension of a ragged matrix -containing a batch of token sequences: - -``` ->>> # input: <string>[num_sentences, (num_words)] ->>> input = tf.ragged.constant( -... [['Up', 'high', 'in', 'the', 'air'], -... ['Down', 'under', 'water'], -... ['Away', 'to', 'outer', 'space']]) ->>> # output: <string>[num_sentences, (num_word-1), 2] ->>> sliding_window(input, width=2, axis=-1) -<tf.RaggedTensor [[[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'], - [b'the', b'air']], [[b'Down', b'under'], - [b'under', b'water']], - [[b'Away', b'to'], [b'to', b'outer'], - [b'outer', b'space']]]> -``` - -Sliding window across the second dimension of a 3-D tensor containing batches of -sequences of embedding vectors: - -``` ->>> # input: <int32>[num_sequences, sequence_length, embedding_size] ->>> input = tf.constant([ -... [[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], [5, 5, 1]], -... [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], [5, 5, 2]]]) ->>> # output: <int32>[num_sequences, sequence_length-1, 2, embedding_size] ->>> sliding_window(data=input, width=2, axis=1) -<tf.Tensor: shape=(2, 4, 2, 3), dtype=int32, numpy= - array([[[[1, 1, 1], - [2, 2, 1]], - [[2, 2, 1], - [3, 3, 1]], - [[3, 3, 1], - [4, 4, 1]], - [[4, 4, 1], - [5, 5, 1]]], - [[[1, 1, 2], - [2, 2, 2]], - [[2, 2, 2], - [3, 3, 2]], - [[3, 3, 2], - [4, 4, 2]], - [[4, 4, 2], - [5, 5, 2]]]], dtype=int32)> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`data` -</td> -<td> -`<dtype> [O1...ON, A, I1...IM]` -A potentially ragged K-dimensional tensor with outer dimensions of size -`O1...ON`; axis dimension of size `A`; and inner dimensions of size -`I1...IM`. I.e. `K = N + 1 + M`, where `N>=0` and `M>=0`. -</td> -</tr><tr> -<td> -`width` -</td> -<td> -An integer constant specifying the width of the window. Must be -greater than zero. -</td> -</tr><tr> -<td> -`axis` -</td> -<td> -An integer constant specifying the axis along which sliding window -is computed. Negative axis values from `-K` to `-1` are supported. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name for this op (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A `K+1` dimensional tensor with the same dtype as `data`, where: - -* `result[i1..iaxis, a]` = `data[i1..iaxis, a:a+width]` -* `result.shape[:axis]` = `data.shape[:axis]` -* `result.shape[axis]` = `data.shape[axis] - (width - 1)` -* `result.shape[axis + 1]` = `width` -* `result.shape[axis + 2:]` = `data.shape[axis + 1:]` </td> </tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/span_alignment.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/span_alignment.md deleted file mode 100644 index 4142a60..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/span_alignment.md +++ /dev/null
@@ -1,183 +0,0 @@ -description: Return an alignment from a set of source spans to a set of target -spans. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.span_alignment" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.span_alignment - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/pointer_ops.py">View -source</a> - -Return an alignment from a set of source spans to a set of target spans. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.span_alignment( - source_start, source_limit, target_start, target_limit, contains=False, - contained_by=False, partial_overlap=False, multivalent_result=False, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The source and target spans are specified using B+1 dimensional tensors, -with `B>=0` batch dimensions followed by a final dimension that lists the -span offsets for each span in the batch: - -* The `i`th source span in batch `b1...bB` starts at - `source_start[b1...bB, i]` (inclusive), and extends to just before - `source_limit[b1...bB, i]` (exclusive). -* The `j`th target span in batch `b1...bB` starts at - `target_start[b1...bB, j]` (inclusive), and extends to just before - `target_limit[b1...bB, j]` (exclusive). - -`result[b1...bB, i]` contains the index (or indices) of the target span that -overlaps with the `i`th source span in batch `b1...bB`. The -`multivalent_result` parameter indicates whether the result should contain -a single span that aligns with the source span, or all spans that align with -the source span. - -* If `multivalent_result` is false (the default), then `result[b1...bB, i]=j` - indicates that the `j`th target span overlaps with the `i`th source span - in batch `b1...bB`. If no target spans overlap with the `i`th target span, - then `result[b1...bB, i]=-1`. - -* If `multivalent_result` is true, then `result[b1...bB, i, n]=j` indicates - that the `j`th target span is the `n`th span that overlaps with the `i`th - source span in in batch `b1...bB`. - -For a definition of span overlap, see the docstring for `span_overlaps()`. - -#### Examples: - -Given the following source and target spans (with no batch dimensions): - -``` ->>> # 0 5 10 15 20 25 30 35 40 45 50 55 60 ->>> # |====|====|====|====|====|====|====|====|====|====|====|====| ->>> # Source: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] ->>> # Target: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-][10] ->>> # |====|====|====|====|====|====|====|====|====|====|====|====| ->>> source_starts = [0, 10, 16, 20, 27, 30, 35, 40, 45, 50] ->>> source_limits = [5, 15, 19, 23, 30, 35, 40, 45, 50, 55] ->>> target_starts = [0, 5, 15, 20, 25, 31, 35, 42, 47, 52, 57] ->>> target_limits = [5, 10, 20, 25, 30, 34, 38, 45, 52, 57, 61] ->>> span_alignment(source_starts, source_limits, target_starts, target_limits) -<tf.Tensor: shape=(10,), dtype=int64, - numpy=array([ 0, -1, -1, -1, -1, -1, -1, -1, -1, -1])> ->>> span_alignment(source_starts, source_limits, target_starts, target_limits, -... multivalent_result=True) -<tf.RaggedTensor [[0], [], [], [], [], [], [], [], [], []]> ->>> span_alignment(source_starts, source_limits, target_starts, target_limits, -... contains=True) -<tf.Tensor: shape=(10,), dtype=int64, - numpy=array([ 0, -1, -1, -1, -1, 5, 6, 7, -1, -1])> ->>> span_alignment(source_starts, source_limits, target_starts, target_limits, -... partial_overlap=True, multivalent_result=True) -<tf.RaggedTensor [[0], [], [2], [3], [4], [5], [6], [7], [8], [8, 9]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`source_start` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, source_size]`: the start offset of each source span. -</td> -</tr><tr> -<td> -`source_limit` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, source_size]`: the limit offset of each source span. -</td> -</tr><tr> -<td> -`target_start` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, target_size]`: the start offset of each target span. -</td> -</tr><tr> -<td> -`target_limit` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, target_size]`: the limit offset of each target span. -</td> -</tr><tr> -<td> -`contains` -</td> -<td> -If true, then a source span is considered to overlap a target span -when the source span contains the target span. -</td> -</tr><tr> -<td> -`contained_by` -</td> -<td> -If true, then a source span is considered to overlap a target -span when the source span is contained by the target span. -</td> -</tr><tr> -<td> -`partial_overlap` -</td> -<td> -If true, then a source span is considered to overlap a -target span when the source span partially overlaps the target span. -</td> -</tr><tr> -<td> -`multivalent_result` -</td> -<td> -Whether the result should contain a single target span -index (if `multivalent_result=False`) or a list of target span indices (if -`multivalent_result=True`) for each source span. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -An int64 tensor with values in the range: `-1 <= result < target_size`. -If `multivalent_result=False`, then the returned tensor has shape - `[source_size]`, where `source_size` is the length of the `source_start` - and `source_limit` input tensors. If `multivalent_result=True`, then the - returned tensor has shape `[source_size, (num_aligned_target_spans)]. -</td> -</tr> - -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/span_overlaps.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/span_overlaps.md deleted file mode 100644 index 1c20fd1..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/span_overlaps.md +++ /dev/null
@@ -1,178 +0,0 @@ -description: Returns a boolean tensor indicating which source and target spans -overlap. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.span_overlaps" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.span_overlaps - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/pointer_ops.py">View -source</a> - -Returns a boolean tensor indicating which source and target spans overlap. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.span_overlaps( - source_start, source_limit, target_start, target_limit, contains=False, - contained_by=False, partial_overlap=False, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -The source and target spans are specified using B+1 dimensional tensors, -with `B>=0` batch dimensions followed by a final dimension that lists the -span offsets for each span in the batch: - -* The `i`th source span in batch `b1...bB` starts at - `source_start[b1...bB, i]` (inclusive), and extends to just before - `source_limit[b1...bB, i]` (exclusive). -* The `j`th target span in batch `b1...bB` starts at - `target_start[b1...bB, j]` (inclusive), and extends to just before - `target_limit[b1...bB, j]` (exclusive). - -`result[b1...bB, i, j]` is true if the `i`th source span overlaps with the -`j`th target span in batch `b1...bB`, where a source span overlaps a target -span if any of the following are true: - - * The spans are identical. - * `contains` is true, and the source span contains the target span. - * `contained_by` is true, and the source span is contained by the target - span. - * `partial_overlap` is true, and there is a non-zero overlap between the - source span and the target span. - -#### Example: - -Given the following source and target spans (with no batch dimensions): - -``` - >>> # 0 5 10 15 20 25 30 35 40 - >>> # |====|====|====|====|====|====|====|====| - >>> # Source: [-0-] [-1-] [2] [-3-][-4-][-5-] - >>> # Target: [-0-][-1-] [-2-] [3] [-4-][-5-] - >>> # |====|====|====|====|====|====|====|====| - >>> source_start = [0, 10, 16, 20, 25, 30] - >>> source_limit = [5, 15, 19, 25, 30, 35] - >>> target_start = [0, 5, 15, 21, 27, 31] - >>> target_limit = [5, 10, 20, 24, 32, 37] -``` - -`result[i, j]` will be true at the following locations: - -``` -* `[0, 0]` (always) -* `[2, 2]` (if contained_by=True or partial_overlaps=True) -* `[3, 3]` (if contains=True or partial_overlaps=True) -* `[4, 4]` (if partial_overlaps=True) -* `[5, 4]` (if partial_overlaps=True) -* `[5, 5]` (if partial_overlaps=True) -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`source_start` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, source_size]`: the start offset of each source span. -</td> -</tr><tr> -<td> -`source_limit` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, source_size]`: the limit offset of each source span. -</td> -</tr><tr> -<td> -`target_start` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, target_size]`: the start offset of each target span. -</td> -</tr><tr> -<td> -`target_limit` -</td> -<td> -A B+1 dimensional potentially ragged tensor with shape -`[D1...DB, target_size]`: the limit offset of each target span. -</td> -</tr><tr> -<td> -`contains` -</td> -<td> -If true, then a source span is considered to overlap a target span -when the source span contains the target span. -</td> -</tr><tr> -<td> -`contained_by` -</td> -<td> -If true, then a source span is considered to overlap a target -span when the source span is contained by the target span. -</td> -</tr><tr> -<td> -`partial_overlap` -</td> -<td> -If true, then a source span is considered to overlap a -target span when the source span partially overlaps the target span. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -A B+2 dimensional potentially ragged boolean tensor with shape -`[D1...DB, source_size, target_size]`. -</td> -</tr> - -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Raises</h2></th></tr> - -<tr> -<td> -`ValueError` -</td> -<td> -If the span tensors are incompatible. -</td> -</tr> -</table> -
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/viterbi_constrained_sequence.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/viterbi_constrained_sequence.md deleted file mode 100644 index 881aa4d..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/viterbi_constrained_sequence.md +++ /dev/null
@@ -1,183 +0,0 @@ -description: Performs greedy constrained sequence on a batch of examples. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.viterbi_constrained_sequence" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.viterbi_constrained_sequence - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/viterbi_constrained_sequence_op.py">View -source</a> - -Performs greedy constrained sequence on a batch of examples. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.viterbi_constrained_sequence( - scores, sequence_length=None, allowed_transitions=None, transition_weights=None, - use_log_space=False, use_start_and_end_states=True, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -Constrains a set of predictions based on a set of legal transitions and/or a set -of transition weights, returning the legal sequence that maximizes the product -of the state scores and the transition weights according to the Viterbi -algorithm. If `use_log_space` is True, the Viterbi calculation will be performed -in log space (with sums); if it is False, the Viterbi calculation will be -performed in exp space (with normalized products). - -This op also takes a parameter `use_start_and_end_states`, which when true will -add an implicit start and end state to each sequence. These implicit states -allow the user to specify additional weights and permitted transitions to start -and end a sequence (so, for instance, if you wanted to forbid your output from -ending in a certain set of states you could do so). - -Inputs to this op can take one of three forms: a single TensorFlow tensor of -scores with no sequence lengths, a TensorFlow tensor of scores along with a -TensorFlow tensor of sequence lengths, or a RaggedTensor. If only the scores -tensor is passed, this op will assume that the sequence lengths are equal to the -size of the tensor (and so use all the data provided). If a scores tensor and -sequence_lengths tensor is provided, the op will only use the data in the scores -tensor as specified by the sequence_lengths tensor. Finally, if a RaggedTensor -is provided, the sequence_lengths will be ignored and the variable length -sequences in the RaggedTensor will be used. - -``` ->>> scores = np.array([[10.0, 12.0, 6.0, 4.0], -... [13.0, 12.0, 11.0, 10.0]], dtype=np.float32) ->>> sequence_length = np.array([2]) ->>> transition_weights = np.array([[ .1, .2, .3, .4], -... [ .5, .6, .7, .8], -... [ .9, .1, .15, .2], -... [.25, .35, .45, .55]], dtype=np.float32) ->>> allowed_transitions = np.array([[True, True, True, True], -... [True, True, True, True], -... [True, False, True, False], -... [True, True, True, True]]) ->>> viterbi_constrained_sequence( -... scores=scores, -... sequence_length=sequence_length, -... allowed_transitions=allowed_transitions, -... transition_weights=transition_weights, -... use_log_space=False, -... use_start_and_end_states=False) -<tf.RaggedTensor [[1, 3]]> -``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`scores` -</td> -<td> -`<float32> [batch_size, num_steps, |num_states|]` -A tensor of scores, where `scores[b, t, s]` is the predicted score for -transitioning to state `s` at step `t` for batch `b`. The |num_states| -dimension must correspond to the num_states attribute for this op. This -input may be ragged; if it is ragged, the ragged tensor should have the -same structure [b, t, s] and only axis 1 should be ragged. -</td> -</tr><tr> -<td> -`sequence_length` -</td> -<td> -`<{int32, int64}>[batch_size]` -A rank-1 tensor representing the length of the output sequence. If None, -and the 'scores' input is not ragged, sequence lengths will be assumed -to be the length of the score tensor. -</td> -</tr><tr> -<td> -`allowed_transitions` -</td> -<td> - if use_start_and_end_states is TRUE: - `<bool>[num_states+1, num_states+1]` -if use_start_and_end_states is FALSE: - `<bool>[num_states, num_states]` -A rank-2 tensor representing allowed transitions. -- allowed_transitions[i][j] is true if the transition from state i to - state j is allowed for i and j in 0...(num_states). -- allowed_transitions[num_states][num_states] is ignored. -If use_start_and_end_states is TRUE: - - allowed_transitions[num_states][j] is true if the sequence is allowed - to start from state j. - - allowed_transitions[i][num_states] is true if the sequence is allowed - to end on state i. -Default - An empty tensor. This allows all sequence states to transition - to all other sequence states. -</td> -</tr><tr> -<td> -`transition_weights` -</td> -<td> - if use_start_and_end_states is TRUE: - `<float32>[num_states+1, num_states+1]` -if use_start_and_end_states is FALSE: - `<float32>[num_states, num_states]` -A rank-2 tensor representing transition weights. -- transition_weights[i][j] is the coefficient that a candidate transition - score will be multiplied by if that transition is from state i to - state j. -- transition_weights[num_states][num_states] is ignored. -If use_start_and_end_states is TRUE: - - transition_weights[num_states][j] is the coefficient that will be used - if the transition starts with state j. - - transition_weights[i][num_states] is the coefficient that will be used - if the final state in the sequence is state i. -Default - An empty tensor. This assigns a wieght of 1.0 all transitions -</td> -</tr><tr> -<td> -`use_log_space` -</td> -<td> -Whether to use log space for the calculation. If false, -calculations will be done in exp-space. -</td> -</tr><tr> -<td> -`use_start_and_end_states` -</td> -<td> -If True, sequences will have an implicit start -and end state added. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -The name scope within which this op should be constructed. -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -An <int32>[batch_size, (num_steps)] ragged tensor containing the appropriate -sequence of transitions. If a sequence is impossible, the value of the -RaggedTensor for that and all following transitions in that sequence shall -be '-1'. -</td> -</tr> - -</table>
diff --git a/third_party/tensorflow-text/src/docs/api_docs/python/text/wordshape.md b/third_party/tensorflow-text/src/docs/api_docs/python/text/wordshape.md deleted file mode 100644 index 043c946..0000000 --- a/third_party/tensorflow-text/src/docs/api_docs/python/text/wordshape.md +++ /dev/null
@@ -1,94 +0,0 @@ -description: Determine wordshape features for each input string. - -<div itemscope itemtype="http://developers.google.com/ReferenceObject"> -<meta itemprop="name" content="text.wordshape" /> -<meta itemprop="path" content="Stable" /> -</div> - -# text.wordshape - -<!-- Insert buttons and diff --> - -<table class="tfo-notebook-buttons tfo-api nocontent" align="left"> - -</table> - -<a target="_blank" href="https://github.com/tensorflow/text/tree/master/tensorflow_text/python/ops/wordshape_ops.py">View -source</a> - -Determine wordshape features for each input string. - -<pre class="devsite-click-to-copy prettyprint lang-py tfo-signature-link"> -<code>text.wordshape( - input_tensor, pattern, name=None -) -</code></pre> - -<!-- Placeholder for "Used in" --> - -In this example, we test for title case (the first character is upper or title -case, and the remaining characters are lowercase). ``` - -> > > input = [ ... u"abc", u"ABc", u"ABC", u"Abc", u"aBcd", -> > > u"\u01c8bc".encode("utf-8") ... ] wordshape(input, -> > > WordShape.HAS_TITLE_CASE) -> > > <tf.Tensor: shape=(6,), dtype=bool, numpy=array([False, False, False, True, False, True])> -> > > ``` - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Args</h2></th></tr> - -<tr> -<td> -`input_tensor` -</td> -<td> -string `Tensor` with any shape. -</td> -</tr><tr> -<td> -`pattern` -</td> -<td> -A `tftext.WordShape` or a list of WordShapes. -</td> -</tr><tr> -<td> -`name` -</td> -<td> -A name for the operation (optional). -</td> -</tr> -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Returns</h2></th></tr> -<tr class="alt"> -<td colspan="2"> -`<bool>[input_tensor.shape + pattern.shape]`: A tensor where -`result[i1...iN, j]` is true if `input_tensor[i1...iN]` has the wordshape -specified by `pattern[j]`. -</td> -</tr> - -</table> - -<!-- Tabular view --> - <table class="responsive fixed orange"> -<colgroup><col width="214px"><col></colgroup> -<tr><th colspan="2"><h2 class="add-link">Raises</h2></th></tr> - -<tr> -<td> -`ValueError` -</td> -<td> -If `pattern` contains an unknown identifier. -</td> -</tr> -</table>
diff --git a/third_party/tensorflow-text/src/docs/guide/BUILD b/third_party/tensorflow-text/src/docs/guide/BUILD deleted file mode 100644 index cf700c9..0000000 --- a/third_party/tensorflow-text/src/docs/guide/BUILD +++ /dev/null
@@ -1,27 +0,0 @@ -load("//third_party/py/tensorflow_docs/google:tf_org.bzl", "tf_org_check_links", "tf_org_notebook_test") - -licenses(["notice"]) - -tf_org_check_links(name = "check_links") - -# We couldn't get internal notebook tests working for these: -# Usually the reason is the use of external data. -# -# * tokenizers.ipynb -# * word_embeddings.ipynb -# * subwords_tokenizer.ipynb -# * decoding_api.ipynb - -tf_org_notebook_test( - name = "unicode", - ipynb = "unicode.ipynb", - deps = [], -) - -tf_org_notebook_test( - name = "bert_preprocessing_guide", - ipynb = "bert_preprocessing_guide.ipynb", - deps = [ - "//third_party/py/tensorflow_text", - ], -)
diff --git a/third_party/tensorflow-text/src/docs/guide/_toc.yaml b/third_party/tensorflow-text/src/docs/guide/_toc.yaml deleted file mode 100644 index 0bcec11..0000000 --- a/third_party/tensorflow-text/src/docs/guide/_toc.yaml +++ /dev/null
@@ -1,22 +0,0 @@ -toc: -- title: "Get started with TF-Text" - path: /text/guide/tf_text_intro -- heading: Concepts - style: divider -- title: "Work with Unicode" - path: /text/guide/unicode -- title: "Word embeddings" - path: /text/guide/word_embeddings -- title: "Decoding API" - path: /text/guide/decoding_api -- title: "Convert to TF Lite" - path: /text/guide/text_tf_lite - status: nightly -- heading: Pre-processing - style: divider -- title: "BERT preprocessing" - path: /text/guide/bert_preprocessing_guide -- title: "Tokenize strings" - path: /text/guide/tokenizers -- title: "Subword Tokenization" - path: /text/guide/subwords_tokenizer
diff --git a/third_party/tensorflow-text/src/docs/guide/bert_preprocessing_guide.ipynb b/third_party/tensorflow-text/src/docs/guide/bert_preprocessing_guide.ipynb deleted file mode 100644 index 56b34cb..0000000 --- a/third_party/tensorflow-text/src/docs/guide/bert_preprocessing_guide.ipynb +++ /dev/null
@@ -1,777 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ], - "metadata": { - "id": "Tce3stUlHN0L" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "outputs": [], - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - } - }, - { - "cell_type": "markdown", - "source": [ - "# BERT Preprocessing with TF Text" - ], - "metadata": { - "id": "qFdPvlXBOdUN" - } - }, - { - "cell_type": "markdown", - "source": [ - "<table class=\"tfo-notebook-buttons\" align=\"left\">\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/bert_preprocessing_guide\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n", - " </td>\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/bert_preprocessing_guide.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n", - " </td>\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/bert_preprocessing_guide.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n", - " </td>\n", - " <td>\n", - " <a href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/bert_preprocessing_guide.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n", - " </td>\n", - "</table>" - ], - "metadata": { - "id": "MfBg1C5NB3X0" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Overview\n", - "\n", - "Text preprocessing is the end-to-end transformation of raw text into a model’s integer inputs. NLP models are often accompanied by several hundreds (if not thousands) of lines of Python code for preprocessing text. Text preprocessing is often a challenge for models because:\n", - "\n", - "* **Training-serving skew.** It becomes increasingly difficult to ensure that the preprocessing logic of the model's inputs are consistent at all stages of model development (e.g. pretraining, fine-tuning, evaluation, inference). \n", - "Using different hyperparameters, tokenization, string preprocessing algorithms or simply packaging model inputs inconsistently at different stages could yield hard-to-debug and disastrous effects to the model. \n", - "\n", - "* **Efficiency and flexibility.** While preprocessing can be done offline (e.g. by writing out processed outputs to files on disk and then reconsuming said preprocessed data in the input pipeline), this method incurs an additional file read and write cost. Preprocessing offline is also inconvenient if there are preprocessing decisions that need to happen dynamically. Experimenting with a different option would require regenerating the dataset again.\n", - "\n", - "* **Complex model interface.** Text models are much more understandable when their inputs are pure text. It's hard to understand a model when its inputs require an extra, indirect encoding step. Reducing the preprocessing complexity is especially appreciated for model debugging, serving, and evaluation. \n", - "\n", - "Additionally, simpler model interfaces also make it more convenient to try the model (e.g. inference or training) on different, unexplored datasets.\n" - ], - "metadata": { - "id": "xHxb-dlhMIzW" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Text preprocessing with TF.Text\n", - "\n", - "Using TF.Text's text preprocessing APIs, we can construct a preprocessing\n", - "function that can transform a user's text dataset into the model's\n", - "integer inputs. Users can package preprocessing directly as part of their model to alleviate the above mentioned problems.\n", - "\n", - "This tutorial will show how to use TF.Text preprocessing ops to transform text data into inputs for the BERT model and inputs for language masking pretraining task described in \"Masked LM and Masking Procedure\" of [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805.pdf). The process involves tokenizing text into subword units, combining sentences, trimming content to a fixed size and extracting labels for the masked language modeling task." - ], - "metadata": { - "id": "Y6DTHtXbxPgw" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Setup" - ], - "metadata": { - "id": "MUXex9ctTuDB" - } - }, - { - "cell_type": "markdown", - "source": [ - "Let's import the packages and libraries we need first." - ], - "metadata": { - "id": "pmIjNKsfeTpm" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "!pip install -q -U tensorflow-text" - ], - "outputs": [], - "metadata": { - "id": "gTWQ5swI7FRJ" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "import tensorflow as tf\n", - "import tensorflow_text as text\n", - "import functools" - ], - "outputs": [], - "metadata": { - "id": "IqR2PQG4ZaZ0" - } - }, - { - "cell_type": "markdown", - "source": [ - "Our data contains two text features and we can create a example `tf.data.Dataset`. Our goal is to create a function that we can supply `Dataset.map()` with to be used in training." - ], - "metadata": { - "id": "-brDHSrRaMii" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "examples = {\n", - " \"text_a\": [\n", - " b\"Sponge bob Squarepants is an Avenger\",\n", - " b\"Marvel Avengers\"\n", - " ],\n", - " \"text_b\": [\n", - " b\"Barack Obama is the President.\",\n", - " b\"President is the highest office\"\n", - " ],\n", - "}\n", - "\n", - "dataset = tf.data.Dataset.from_tensor_slices(examples)\n", - "next(iter(dataset))" - ], - "outputs": [], - "metadata": { - "id": "DQyj7OQ9yk7K" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Tokenizing\n", - "\n", - "Our first step is to run any string preprocessing and tokenize our dataset. This can be done using the [`text.BertTokenizer`](https://tensorflow.org/text/api_docs/python/text/BertTokenizer), which is a [`text.Splitter`](https://tensorflow.org/text/api_docs/python/text/Splitter) that can tokenize sentences into subwords or wordpieces for the [BERT model](https://github.com/google-research/bert) given a vocabulary generated from the [Wordpiece algorithm](https://www.tensorflow.org/text/guide/subwords_tokenizer#optional_the_algorithm). You can learn more about other subword tokenizers available in TF.Text from [here](https://www.tensorflow.org/text/guide/subwords_tokenizer). \n", - "\n", - "\n", - "The vocabulary can be from a previously generated BERT checkpoint, or you can generate one yourself on your own data. For the purposes of this example, let's create a toy vocabulary:" - ], - "metadata": { - "id": "1laUIs3g5Qsz" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "_VOCAB = [\n", - " # Special tokens\n", - " b\"[UNK]\", b\"[MASK]\", b\"[RANDOM]\", b\"[CLS]\", b\"[SEP]\",\n", - " # Suffixes\n", - " b\"##ack\", b\"##ama\", b\"##ger\", b\"##gers\", b\"##onge\", b\"##pants\", b\"##uare\",\n", - " b\"##vel\", b\"##ven\", b\"an\", b\"A\", b\"Bar\", b\"Hates\", b\"Mar\", b\"Ob\",\n", - " b\"Patrick\", b\"President\", b\"Sp\", b\"Sq\", b\"bob\", b\"box\", b\"has\", b\"highest\",\n", - " b\"is\", b\"office\", b\"the\",\n", - "]\n", - "\n", - "_START_TOKEN = _VOCAB.index(b\"[CLS]\")\n", - "_END_TOKEN = _VOCAB.index(b\"[SEP]\")\n", - "_MASK_TOKEN = _VOCAB.index(b\"[MASK]\")\n", - "_RANDOM_TOKEN = _VOCAB.index(b\"[RANDOM]\")\n", - "_UNK_TOKEN = _VOCAB.index(b\"[UNK]\")\n", - "_MAX_SEQ_LEN = 8\n", - "_MAX_PREDICTIONS_PER_BATCH = 5\n", - " \n", - "_VOCAB_SIZE = len(_VOCAB)\n", - "\n", - "lookup_table = tf.lookup.StaticVocabularyTable(\n", - " tf.lookup.KeyValueTensorInitializer(\n", - " keys=_VOCAB,\n", - " key_dtype=tf.string,\n", - " values=tf.range(\n", - " tf.size(_VOCAB, out_type=tf.int64), dtype=tf.int64),\n", - " value_dtype=tf.int64),\n", - " num_oov_buckets=1\n", - ")" - ], - "outputs": [], - "metadata": { - "id": "ChpIFy515S1z" - } - }, - { - "cell_type": "markdown", - "source": [ - "Let's construct a [`text.BertTokenizer`](https://tensorflow.org/text/api_docs/python/text/BertTokenizer) using the above vocabulary and tokenize the text inputs into a [`RaggedTensor`](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor).`." - ], - "metadata": { - "id": "7t2tgbSn6nvX" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "bert_tokenizer = text.BertTokenizer(lookup_table, token_out_type=tf.string)\n", - "bert_tokenizer.tokenize(examples[\"text_a\"])" - ], - "outputs": [], - "metadata": { - "id": "564UPrFB5Zm6" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "bert_tokenizer.tokenize(examples[\"text_b\"])" - ], - "outputs": [], - "metadata": { - "id": "AiTs3_FHHBlR" - } - }, - { - "cell_type": "markdown", - "source": [ - "Text output from [`text.BertTokenizer`](https://tensorflow.org/text/api_docs/python/text/BertTokenizer) allows us see how the text is being tokenized, but the model requires integer IDs. We can set the `token_out_type` param to `tf.int64` to obtain integer IDs (which are the indices into the vocabulary)." - ], - "metadata": { - "id": "cK6DHjio65MV" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "bert_tokenizer = text.BertTokenizer(lookup_table, token_out_type=tf.int64)\n", - "segment_a = bert_tokenizer.tokenize(examples[\"text_a\"])\n", - "segment_a" - ], - "outputs": [], - "metadata": { - "id": "odeosiPz58Qu" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "segment_b = bert_tokenizer.tokenize(examples[\"text_b\"])\n", - "segment_b" - ], - "outputs": [], - "metadata": { - "id": "v4IP2P4EHQpa" - } - }, - { - "cell_type": "markdown", - "source": [ - "[`text.BertTokenizer`](https://tensorflow.org/text/api_docs/python/text/BertTokenizer) returns a `RaggedTensor` with shape `[batch, num_tokens, num_wordpieces]`. Because we don't need the extra `num_tokens` dimensions for our current use case, we can merge the last two dimensions to obtain a `RaggedTensor` with shape `[batch, num_wordpieces]`:" - ], - "metadata": { - "id": "TU3GJ0jx94fx" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "segment_a = segment_a.merge_dims(-2, -1)\n", - "segment_a" - ], - "outputs": [], - "metadata": { - "id": "Fb5vt5dA-Rwf" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "segment_b = segment_b.merge_dims(-2, -1)\n", - "segment_b" - ], - "outputs": [], - "metadata": { - "id": "NyEW0sjhHoPM" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Content Trimming\n", - "\n", - "The main input to BERT is a concatenation of two sentences. However, BERT requires inputs to be in a fixed-size and shape and we may have content which exceed our budget. \n", - "\n", - "We can tackle this by using a [`text.Trimmer`](https://tensorflow.org/text/api_docs/python/text/Trimmer) to trim our content down to a predetermined size (once concatenated along the last axis). There are different `text.Trimmer` types which select content to preserve using different algorithms. [`text.RoundRobinTrimmer`](https://tensorflow.org/text/api_docs/python/text/RoundRobinTrimmer) for example will allocate quota equally for each segment but may trim the ends of sentences. [`text.WaterfallTrimmer`](https://tensorflow.org/text/api_docs/python/text/WaterfallTrimmer) will trim starting from the end of the last sentence.\n", - "\n", - "For our example, we will use `RoundRobinTrimmer` which selects items from each segment in a left-to-right manner.\n" - ], - "metadata": { - "id": "R9YicLN5UFkz" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "trimmer = text.RoundRobinTrimmer(max_seq_length=[_MAX_SEQ_LEN])\n", - "trimmed = trimmer.trim([segment_a, segment_b])\n", - "trimmed" - ], - "outputs": [], - "metadata": { - "id": "aLV-1uDgwFnr" - } - }, - { - "cell_type": "markdown", - "source": [ - "`trimmed` now contains the segments where the number of elements across a batch is 8 elements (when concatenated along axis=-1)." - ], - "metadata": { - "id": "zPj7jM9oQ-P3" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Combining segments\n", - "\n", - "Now that we have segments trimmed, we can combine them together to get a single `RaggedTensor`. BERT uses special tokens to indicate the beginning (`[CLS]`) and end of a segment (`[SEP]`). We also need a `RaggedTensor` indicating which items in the combined `Tensor` belong to which segment. We can use [`text.combine_segments()`](https://tensorflow.org/text/api_docs/python/text/combine_segments) to get both of these `Tensor` with special tokens inserted." - ], - "metadata": { - "id": "3J2AWfmAUio8" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "segments_combined, segments_ids = text.combine_segments(\n", - " [segment_a, segment_b],\n", - " start_of_sequence_id=_START_TOKEN, end_of_segment_id=_END_TOKEN)\n", - "segments_combined, segments_ids" - ], - "outputs": [], - "metadata": { - "id": "L-5nMh5pk8x1" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Masked Language Model Task\n", - "\n", - "Now that we have our basic inputs, we can begin to extract the inputs needed for the \"Masked LM and Masking Procedure\" task described in [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805.pdf)\n", - "\n", - "The masked language model task has two sub-problems for us to think about: (1) what items to select for masking and (2) what values are they assigned? \n" - ], - "metadata": { - "id": "hSKla2OxUOWl" - } - }, - { - "cell_type": "markdown", - "source": [ - "#### Item Selection\n", - "Because we will choose to select items randomly for masking, we will use a [`text.RandomItemSelector`](https://tensorflow.org/text/api_docs/python/text/RandomItemSelector). `RandomItemSelector` randomly selects items in a batch subject to restrictions given (`max_selections_per_batch`, `selection_rate` and `unselectable_ids`) and returns a boolean mask indicating which items were selected." - ], - "metadata": { - "id": "mkx4w9-3DT0p" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "random_selector = text.RandomItemSelector(\n", - " max_selections_per_batch=_MAX_PREDICTIONS_PER_BATCH,\n", - " selection_rate=0.2,\n", - " unselectable_ids=[_START_TOKEN, _END_TOKEN, _UNK_TOKEN]\n", - ")\n", - "selected = random_selector.get_selection_mask(\n", - " segments_combined, axis=1)\n", - "selected\n" - ], - "outputs": [], - "metadata": { - "id": "94BncqVkVJT2" - } - }, - { - "cell_type": "markdown", - "source": [ - "#### Choosing the Masked Value\n", - "\n", - "The methodology described the original BERT paper for choosing the value for masking is as follows:\n", - "\n", - "For `mask_token_rate` of the time, replace the item with the `[MASK]` token:\n", - "\n", - " \"my dog is hairy\" -> \"my dog is [MASK]\"\n", - " \n", - "For `random_token_rate` of the time, replace the item with a random word:\n", - "\n", - " \"my dog is hairy\" -> \"my dog is apple\"\n", - " \n", - "For `1 - mask_token_rate - random_token_rate` of the time, keep the item\n", - "unchanged:\n", - "\n", - " \"my dog is hairy\" -> \"my dog is hairy.\"\n", - "\n", - "[`text.MaskedValuesChooser`](https://tensorflow.org/text/api_docs/python/text/MaskValuesChooser) encapsulates this logic and can be used for our preprocessing function. Here's an example of what `MaskValuesChooser` returns given a `mask_token_rate` of 80% and default `random_token_rate`:\n" - ], - "metadata": { - "id": "p4NAHL_GUi-C" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "input_ids = tf.ragged.constant([[19, 7, 21, 20, 9, 8], [13, 4, 16, 5], [15, 10, 12, 11, 6]])\n", - "mask_values_chooser = text.MaskValuesChooser(_VOCAB_SIZE, _MASK_TOKEN, 0.8)\n", - "mask_values_chooser.get_mask_values(input_ids)" - ], - "outputs": [], - "metadata": { - "id": "Amk0Lqd5VJ4n" - } - }, - { - "cell_type": "markdown", - "source": [ - "When supplied with a `RaggedTensor` input, `text.MaskValuesChooser` returns a `RaggedTensor` of the same shape with either `_MASK_VALUE` (0), a random ID, or the same unchanged id." - ], - "metadata": { - "id": "UCp1CQcPC6IT" - } - }, - { - "cell_type": "markdown", - "source": [ - "#### Generating Inputs for Masked Language Model Task\n", - "\n", - "Now that we have a `RandomItemSelector` to help us select items for masking and `text.MaskValuesChooser` to assign the values, we can use [`text.mask_language_model()`](https://tensorflow.org/text/api_docs/python/text/mask_language_model) to assemble all the inputs of this task for our BERT model.\n" - ], - "metadata": { - "id": "EYpKg_sLUi1B" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "masked_token_ids, masked_pos, masked_lm_ids = text.mask_language_model(\n", - " segments_combined,\n", - " item_selector=random_selector, mask_values_chooser=mask_values_chooser)" - ], - "outputs": [], - "metadata": { - "id": "Q0fqQzXGUrkM" - } - }, - { - "cell_type": "markdown", - "source": [ - "Let's dive deeper and examine the outputs of `mask_language_model()`. The output of `masked_token_ids` is:" - ], - "metadata": { - "id": "pJqcbOJ0AYBX" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "masked_token_ids" - ], - "outputs": [], - "metadata": { - "id": "PavYEhmN_tHa" - } - }, - { - "cell_type": "markdown", - "source": [ - "Remember that our input is encoded using a vocabulary. If we decode `masked_token_ids` using our vocabulary, we get:" - ], - "metadata": { - "id": "Q0c2wkC9AnUX" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "tf.gather(_VOCAB, masked_token_ids)" - ], - "outputs": [], - "metadata": { - "id": "5axqrUOc_0h1" - } - }, - { - "cell_type": "markdown", - "source": [ - "Notice that some wordpiece tokens have been replaced with either `[MASK]`, `[RANDOM]` or a different ID value. `masked_pos` output gives us the indices (in the respective batch) of the tokens that have been replaced." - ], - "metadata": { - "id": "v8DCOtEAiz_E" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "masked_pos" - ], - "outputs": [], - "metadata": { - "id": "d-nc5m5Y_wP_" - } - }, - { - "cell_type": "markdown", - "source": [ - "`masked_lm_ids` gives us the original value of the token." - ], - "metadata": { - "id": "6fua7ANijN3_" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - " masked_lm_ids" - ], - "outputs": [], - "metadata": { - "id": "azzxmO_f_xJp" - } - }, - { - "cell_type": "markdown", - "source": [ - "We can again decode the IDs here to get human readable values." - ], - "metadata": { - "id": "5bW0rdX9jYh-" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "tf.gather(_VOCAB, masked_lm_ids)" - ], - "outputs": [], - "metadata": { - "id": "F-RP-paUjUuP" - } - }, - { - "cell_type": "markdown", - "source": [ - "### Padding Model Inputs\n", - "\n", - "Now that we have all the inputs for our model, the last step in our preprocessing is to package them into fixed 2-dimensional `Tensor`s with padding and also generate a mask `Tensor` indicating the values which are pad values. We can use [`text.pad_model_inputs()`](https://tensorflow.org/text/api_docs/python/text/pad_model_inputs) to help us with this task." - ], - "metadata": { - "id": "3-P0PTiCUz2J" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "# Prepare and pad combined segment inputs\n", - "input_word_ids, input_mask = text.pad_model_inputs(\n", - " masked_token_ids, max_seq_length=_MAX_SEQ_LEN)\n", - "input_type_ids, _ = text.pad_model_inputs(\n", - " masked_token_ids, max_seq_length=_MAX_SEQ_LEN)\n", - "\n", - "# Prepare and pad masking task inputs\n", - "masked_lm_positions, masked_lm_weights = text.pad_model_inputs(\n", - " masked_token_ids, max_seq_length=_MAX_PREDICTIONS_PER_BATCH)\n", - "masked_lm_ids, _ = text.pad_model_inputs(\n", - " masked_lm_ids, max_seq_length=_MAX_PREDICTIONS_PER_BATCH)\n", - "\n", - "model_inputs = {\n", - " \"input_word_ids\": input_word_ids,\n", - " \"input_mask\": input_mask,\n", - " \"input_type_ids\": input_type_ids,\n", - " \"masked_lm_ids\": masked_lm_ids,\n", - " \"masked_lm_positions\": masked_lm_positions,\n", - " \"masked_lm_weights\": masked_lm_weights,\n", - "}\n", - "model_inputs" - ], - "outputs": [], - "metadata": { - "id": "FGE7XuXRwsYF" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Review" - ], - "metadata": { - "id": "KIWy4nVyT6gf" - } - }, - { - "cell_type": "markdown", - "source": [ - "Let's review what we have so far and assemble our preprocessing function. Here's what we have:" - ], - "metadata": { - "id": "TwCdO1Z5yjS-" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "def bert_pretrain_preprocess(vocab_table, features):\n", - " # Input is a string Tensor of documents, shape [batch, 1].\n", - " text_a = features[\"text_a\"]\n", - " text_b = features[\"text_b\"]\n", - "\n", - " # Tokenize segments to shape [num_sentences, (num_words)] each.\n", - " tokenizer = text.BertTokenizer(\n", - " vocab_table,\n", - " token_out_type=tf.int64)\n", - " segments = [tokenizer.tokenize(text).merge_dims(\n", - " 1, -1) for text in (text_a, text_b)]\n", - "\n", - " # Truncate inputs to a maximum length.\n", - " trimmer = text.RoundRobinTrimmer(max_seq_length=6)\n", - " trimmed_segments = trimmer.trim(segments)\n", - "\n", - " # Combine segments, get segment ids and add special tokens.\n", - " segments_combined, segment_ids = text.combine_segments(\n", - " trimmed_segments,\n", - " start_of_sequence_id=_START_TOKEN,\n", - " end_of_segment_id=_END_TOKEN)\n", - "\n", - " # Apply dynamic masking task.\n", - " masked_input_ids, masked_lm_positions, masked_lm_ids = (\n", - " text.mask_language_model(\n", - " segments_combined,\n", - " random_selector,\n", - " mask_values_chooser,\n", - " )\n", - " )\n", - " \n", - " # Prepare and pad combined segment inputs\n", - " input_word_ids, input_mask = text.pad_model_inputs(\n", - " masked_input_ids, max_seq_length=_MAX_SEQ_LEN)\n", - " input_type_ids, _ = text.pad_model_inputs(\n", - " masked_input_ids, max_seq_length=_MAX_SEQ_LEN)\n", - "\n", - " # Prepare and pad masking task inputs\n", - " masked_lm_positions, masked_lm_weights = text.pad_model_inputs(\n", - " masked_input_ids, max_seq_length=_MAX_PREDICTIONS_PER_BATCH)\n", - " masked_lm_ids, _ = text.pad_model_inputs(\n", - " masked_lm_ids, max_seq_length=_MAX_PREDICTIONS_PER_BATCH)\n", - "\n", - " model_inputs = {\n", - " \"input_word_ids\": input_word_ids,\n", - " \"input_mask\": input_mask,\n", - " \"input_type_ids\": input_type_ids,\n", - " \"masked_lm_ids\": masked_lm_ids,\n", - " \"masked_lm_positions\": masked_lm_positions,\n", - " \"masked_lm_weights\": masked_lm_weights,\n", - " }\n", - " return model_inputs" - ], - "outputs": [], - "metadata": { - "id": "7jKtbVCYTsIC" - } - }, - { - "cell_type": "markdown", - "source": [ - "We previously constructed a `tf.data.Dataset` and we can now use our assembled preprocessing function `bert_pretrain_preprocess()` in `Dataset.map()`. This allows us to create an input pipeline for transforming our raw string data into integer inputs and feed directly into our model." - ], - "metadata": { - "id": "bOAeo97VyfQg" - } - }, - { - "cell_type": "code", - "execution_count": null, - "source": [ - "dataset = tf.data.Dataset.from_tensors(examples)\n", - "dataset = dataset.map(functools.partial(\n", - " bert_pretrain_preprocess, lookup_table))\n", - "\n", - "next(iter(dataset))" - ], - "outputs": [], - "metadata": { - "id": "xm4gTLEgjTa3" - } - }, - { - "cell_type": "markdown", - "source": [ - "## Related Tutorials\n", - "\n", - "* [Classify text with BERT](https://www.tensorflow.org/text/tutorials/classify_text_with_bert) - A tutorial on how to use a pretrained BERT model to classify text. This is a nice follow up now that you are familiar with how to preprocess the inputs used by the BERT model.\n", - "\n", - "* [Tokenizing with TF Text](https://www.tensorflow.org/text/guide/tokenizers) - Tutorial detailing the different types of tokenizers that exist in TF.Text.\n", - "\n", - "* [Handling Text with `RaggedTensor`](https://www.tensorflow.org/guide/ragged_tensor) - Detailed guide on how to create, use and manipulate `RaggedTensor`s.\n" - ], - "metadata": { - "id": "FyiMxeEp0m2O" - } - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "bert_preprocessing_guide.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file
diff --git a/third_party/tensorflow-text/src/docs/guide/decoding_api.ipynb b/third_party/tensorflow-text/src/docs/guide/decoding_api.ipynb deleted file mode 100644 index ea898fb1..0000000 --- a/third_party/tensorflow-text/src/docs/guide/decoding_api.ipynb +++ /dev/null
@@ -1,502 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qFdPvlXBOdUN" - }, - "source": [ - "# Decoding API" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfBg1C5NB3X0" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/decoding_api\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/decoding_api.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/decoding_api.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/decoding_api.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xHxb-dlhMIzW" - }, - "source": [ - "## Overview\n", - "In the recent past, there has been a lot of research in language generation with auto-regressive models. In auto-regressive language generation, the probability distribution of token at time step *K* is dependent on the model's token-predictions till step *K-1*. For these models, decoding strategies such as Beam search, Greedy, Top-p, and Top-k are critical components of the model and largely influence the style/nature of the generated output token at a given time step *K*. \n", - "\n", - "For example, **Beam search** reduces the risk of missing hidden high probability tokens by\n", - "keeping the most likely num_beams of hypotheses at each time step and eventually\n", - "choosing the hypothesis that has the overall highest probability.\n", - "[Murray et al. (2018)](https://arxiv.org/abs/1808.10006) and\n", - "[Yang et al. (2018)](https://arxiv.org/abs/1808.09582) show that beam search\n", - "works well in Machine Translation tasks.\n", - "Both **Beam search** \u0026 **Greedy** strategies have a possibility of generating\n", - "repeating tokens.\n", - "\n", - "[Fan et. al (2018)](https://arxiv.org/pdf/1805.04833.pdf) introduced **Top-K\n", - "sampling**, in which the K most likely tokens are filtered and the probability mass\n", - "is redistributed among only those K tokens.\n", - "\n", - "[Ari Holtzman et. al (2019)](https://arxiv.org/pdf/1904.09751.pdf) introduced\n", - "**Top-p sampling**, which chooses from the smallest possible set of tokens with\n", - "cumulative probability that adds upto the probability *p*. The probability mass is then\n", - "redistributed among this set. This way, the size of the set of tokens can\n", - "dynamically increase and decrease.\n", - "**Top-p, Top-k** are generally used in tasks such as story-generation.\n", - "\n", - "The Decoding API provides an interface to experiment with different decoding strategies on auto-regressive models.\n", - "\n", - "1. The following sampling strategies are provided in sampling_module.py, which inherits from the base Decoding class:\n", - " * [top_p](https://arxiv.org/abs/1904.09751) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L65) \n", - "\n", - " * [top_k](https://arxiv.org/pdf/1805.04833.pdf) : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L48)\n", - "\n", - " * Greedy : [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/sampling_module.py#L26)\n", - "\n", - "2. Beam search is provided in beam_search.py. [github](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search.py)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MUXex9ctTuDB" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "60_D9NLa9KhJ" - }, - "outputs": [], - "source": [ - "!pip install -q -U tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FJV1ttb8dZyQ" - }, - "outputs": [], - "source": [ - "!pip install -q tf-models-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IqR2PQG4ZaZ0" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow as tf\n", - "\n", - "from official import nlp\n", - "from official.nlp.modeling.ops import sampling_module\n", - "from official.nlp.modeling.ops import beam_search" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j9r8-Q_CekYK" - }, - "source": [ - "## Initialize Sampling Module in TF-NLP.\n", - "\n", - "\n", - "* **symbols_to_logits_fn** : Use this closure to call the model to predict the logits for the `index+1` step. Inputs and outputs for this closure are as follows: \n", - "```\n", - "Args:\n", - " 1] ids : Current decoded sequences. int tensor with shape (batch_size, index + 1 or 1 if padded_decode is True)],\n", - " 2] index [scalar] : current decoded step,\n", - " 3] cache [nested dictionary of tensors] : Only used for faster decoding to store pre-computed attention hidden states for keys and values. More explanation in the cell below.\n", - "Returns:\n", - " 1] tensor for next-step logits [batch_size, vocab]\n", - " 2] the updated_cache [nested dictionary of tensors].\n", - "```\n", - "The cache is used for faster decoding.\n", - "Here is a [reference](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/ops/beam_search_test.py#L88) implementation for the above closure.\n", - "\n", - "\n", - "* **length_normalization_fn** : Use this closure for returning length normalization parameter.\n", - "```\n", - "Args: \n", - " 1] length : scalar for decoded step index.\n", - " 2] dtype : data-type of output tensor\n", - "Returns:\n", - " 1] value of length normalization factor.\n", - "```\n", - "\n", - "* **vocab_size** : Output vocabulary size.\n", - "\n", - "* **max_decode_length** : Scalar for total number of decoding steps.\n", - "\n", - "* **eos_id** : Decoding will stop if all output decoded ids in the batch have this eos_id.\n", - "\n", - "* **padded_decode** : Set this to True if running on TPU. Tensors are padded to max_decoding_length if this is True.\n", - "\n", - "* **top_k** : top_k is enabled if this value is \u003e 1.\n", - "\n", - "* **top_p** : top_p is enabled if this value is \u003e 0 and \u003c 1.0\n", - "\n", - "* **sampling_temperature** : This is used to re-estimate the softmax output. Temperature skews the distribution towards high probability tokens and lowers the mass in tail distribution. Value has to be positive. Low temperature is equivalent to greedy and makes the distribution sharper, while high temperature makes it more flat.\n", - "\n", - "* **enable_greedy** : By default, this is True and greedy decoding is enabled. To experiment with other strategies, please set this to False." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xqpGECmAeu7Q" - }, - "source": [ - "## Initialize the Model hyperparameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KtylpxOmceaC" - }, - "outputs": [], - "source": [ - "params = {}\n", - "params['num_heads'] = 2\n", - "params['num_layers'] = 2\n", - "params['batch_size'] = 2\n", - "params['n_dims'] = 256\n", - "params['max_decode_length'] = 4" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pwdM2pl3RSPb" - }, - "source": [ - "In auto-regressive architectures like Transformer based [Encoder-Decoder](https://arxiv.org/abs/1706.03762) models, \n", - "Cache is used for fast sequential decoding.\n", - "It is a nested dictionary storing pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention blocks) for every layer." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A_xX-fbze8_S" - }, - "source": [ - "## Initialize cache. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mMOeXVmbdilM" - }, - "outputs": [], - "source": [ - "cache = {\n", - " 'layer_%d' % layer: {\n", - " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], int(params['n_dims']/params['num_heads'])], dtype=tf.float32),\n", - " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], int(params['n_dims']/params['num_heads'])], dtype=tf.float32)\n", - " } for layer in range(params['num_layers'])\n", - " }\n", - "print(\"cache key shape for layer 1 :\", cache['layer_1']['k'].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-9BGL3gOe-2K" - }, - "source": [ - "## Define closure for length normalization if needed.\n", - "This is used for normalizing the final scores of generated sequences and is optional\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U82B_tH2d294" - }, - "outputs": [], - "source": [ - "def length_norm(length, dtype):\n", - " \"\"\"Return length normalization factor.\"\"\"\n", - " return tf.pow(((5. + tf.cast(length, dtype)) / 6.), 0.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TJdqBNBbS78n" - }, - "source": [ - "## Create model_fn\n", - " In practice, this will be replaced by an actual model implementation such as [here](https://github.com/tensorflow/models/blob/master/official/nlp/transformer/transformer.py#L236)\n", - "```\n", - "Args:\n", - "i : Step that is being decoded.\n", - "Returns:\n", - " logit probabilities of size [batch_size, 1, vocab_size]\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xVeis7YZfaQM" - }, - "outputs": [], - "source": [ - "probabilities = tf.constant([[[0.3, 0.4, 0.3], [0.3, 0.3, 0.4],\n", - " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]],\n", - " [[0.2, 0.5, 0.3], [0.2, 0.7, 0.1],\n", - " [0.1, 0.1, 0.8], [0.1, 0.1, 0.8]]])\n", - "def model_fn(i):\n", - " return probabilities[:, i, :]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_G2rkaCPfcka" - }, - "source": [ - "## Initialize symbols_to_logits_fn\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1B6T3629fdKJ" - }, - "outputs": [], - "source": [ - "def _symbols_to_logits_fn():\n", - " \"\"\"Calculates logits of the next tokens.\"\"\"\n", - " def symbols_to_logits_fn(ids, i, temp_cache):\n", - " del ids\n", - " logits = tf.cast(tf.math.log(model_fn(i)), tf.float32)\n", - " return logits, temp_cache\n", - " return symbols_to_logits_fn" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rhosGmvZffke" - }, - "source": [ - "## Greedy \n", - "Greedy decoding selects the token id with the highest probability as its next id: $id_t = argmax_{w}P(id | id_{1:t-1})$ at each timestep $t$. The following sketch shows greedy decoding. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JZ-p0JdbfyJ7" - }, - "outputs": [], - "source": [ - "greedy_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=None,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " padded_decode=False)\n", - "ids, _ = greedy_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"Greedy Decoded Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pOmG0IE6ff40" - }, - "source": [ - "## top_k sampling\n", - "In *Top-K* sampling, the *K* most likely next token ids are filtered and the probability mass is redistributed among only those *K* ids. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qkIDv7VZfuzr" - }, - "outputs": [], - "source": [ - "top_k_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=length_norm,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " sample_temperature=tf.constant(1.0),\n", - " top_k=tf.constant(3),\n", - " padded_decode=False,\n", - " enable_greedy=False)\n", - "ids, _ = top_k_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"top-k sampled Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PaEv2c_cflsE" - }, - "source": [ - "## top_p sampling\n", - "Instead of sampling only from the most likely *K* token ids, in *Top-p* sampling chooses from the smallest possible set of ids whose cumulative probability exceeds the probability *p*." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WzHslibyfs6K" - }, - "outputs": [], - "source": [ - "top_p_obj = sampling_module.SamplingModule(\n", - " length_normalization_fn=length_norm,\n", - " dtype=tf.float32,\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " vocab_size=3,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " sample_temperature=tf.constant(1.0),\n", - " top_p=tf.constant(0.9),\n", - " padded_decode=False,\n", - " enable_greedy=False)\n", - "ids, _ = top_p_obj.generate(\n", - " initial_ids=tf.constant([9, 1]), initial_cache=cache)\n", - "print(\"top-p sampled Ids:\", ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hTSdHdTjfoPV" - }, - "source": [ - "## Beam search decoding\n", - "Beam search reduces the risk of missing hidden high probability token ids by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U1jIPF_qfqcO" - }, - "outputs": [], - "source": [ - "beam_size = 2\n", - "params['batch_size'] = 1\n", - "beam_cache = {\n", - " 'layer_%d' % layer: {\n", - " 'k': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32),\n", - " 'v': tf.zeros([params['batch_size'], params['max_decode_length'], params['num_heads'], params['n_dims']], dtype=tf.float32)\n", - " } for layer in range(params['num_layers'])\n", - " }\n", - "print(\"cache key shape for layer 1 :\", beam_cache['layer_1']['k'].shape)\n", - "ids, _ = beam_search.sequence_beam_search(\n", - " symbols_to_logits_fn=_symbols_to_logits_fn(),\n", - " initial_ids=tf.constant([9], tf.int32),\n", - " initial_cache=beam_cache,\n", - " vocab_size=3,\n", - " beam_size=beam_size,\n", - " alpha=0.6,\n", - " max_decode_length=params['max_decode_length'],\n", - " eos_id=10,\n", - " padded_decode=False,\n", - " dtype=tf.float32)\n", - "print(\"Beam search ids:\", ids)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "decoding_api.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/guide/images/embedding.jpg b/third_party/tensorflow-text/src/docs/guide/images/embedding.jpg deleted file mode 100644 index 3f9cf1e..0000000 --- a/third_party/tensorflow-text/src/docs/guide/images/embedding.jpg +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/guide/images/embedding2.png b/third_party/tensorflow-text/src/docs/guide/images/embedding2.png deleted file mode 100644 index a0f1666..0000000 --- a/third_party/tensorflow-text/src/docs/guide/images/embedding2.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/guide/images/embeddings_classifier_accuracy.png b/third_party/tensorflow-text/src/docs/guide/images/embeddings_classifier_accuracy.png deleted file mode 100644 index 3a45f54..0000000 --- a/third_party/tensorflow-text/src/docs/guide/images/embeddings_classifier_accuracy.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/guide/images/one-hot.png b/third_party/tensorflow-text/src/docs/guide/images/one-hot.png deleted file mode 100644 index adfe9b9..0000000 --- a/third_party/tensorflow-text/src/docs/guide/images/one-hot.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/guide/subwords_tokenizer.ipynb b/third_party/tensorflow-text/src/docs/guide/subwords_tokenizer.ipynb deleted file mode 100644 index c830312..0000000 --- a/third_party/tensorflow-text/src/docs/guide/subwords_tokenizer.ipynb +++ /dev/null
@@ -1,1241 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "s_qNSzzyaCbD" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "jmjh290raIky" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AOpGoE2T-YXS" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/subwords_tokenizer\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/subwords_tokenizer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/subwords_tokenizer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/subwords_tokenizer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ES8iTKcdPCLt" - }, - "source": [ - "# Subword tokenizers\n", - "\n", - "This tutorial demonstrates how to generate a subword vocabulary from a dataset, and use it to build a `text.BertTokenizer` from the vocabulary.\n", - "\n", - "The main advantage of a subword tokenizer is that it interpolates between word-based and character-based tokenization. Common words get a slot in the vocabulary, but the tokenizer can fall back to word pieces and individual characters for unknown words.\n", - "\n", - "Objective: At the end of this tutorial you'll have built a complete end-to-end wordpiece tokenizer and detokenizer from scratch, and saved it as a `saved_model` that you can load and use in this [translation tutorial](https://tensorflow.org/text/tutorials/transformer)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BHfrtG1YPJdR" - }, - "source": [ - "## Overview" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iIMuBnQO6ZoV" - }, - "source": [ - "The `tensorflow_text` package includes TensorFlow implementations of many common tokenizers. This includes three subword-style tokenizers:\n", - "\n", - "* `text.BertTokenizer` - The `BertTokenizer` class is a higher level interface. It includes BERT's token splitting algorithm and a `WordPieceTokenizer`. It takes **sentences** as input and returns **token-IDs**.\n", - "* `text.WordpieceTokenizer` - The `WordPieceTokenizer` class is a lower level interface. It only implements the [WordPiece algorithm](#applying_wordpiece). You must standardize and split the text into words before calling it. It takes **words** as input and returns token-IDs.\n", - "* `text.SentencepieceTokenizer` - The `SentencepieceTokenizer` requires a more complex setup. Its initializer requires a pre-trained sentencepiece model. See the [google/sentencepiece repository](https://github.com/google/sentencepiece#train-sentencepiece-model) for instructions on how to build one of these models. It can accept **sentences** as input when tokenizing.\n", - "\n", - "This tutorial builds a Wordpiece vocabulary in a top down manner, starting from existing words. This process doesn't work for Japanese, Chinese, or Korean since these languages don't have clear multi-character units. To tokenize these languages conside using `text.SentencepieceTokenizer`, `text.UnicodeCharTokenizer` or [this approach](https://tfhub.dev/google/zh_segmentation/1). " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "swymtxpl7W7w" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rJTYbk1E9QOk" - }, - "outputs": [], - "source": [ - "!pip install -q -U tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XFG0NDRu5mYQ" - }, - "outputs": [], - "source": [ - "!pip install -q tensorflow_datasets" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JjJJyJTZYebt" - }, - "outputs": [], - "source": [ - "import collections\n", - "import os\n", - "import pathlib\n", - "import re\n", - "import string\n", - "import sys\n", - "import tempfile\n", - "import time\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow_text as text\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QZi9RstHxO_Z" - }, - "outputs": [], - "source": [ - "tf.get_logger().setLevel('ERROR')\n", - "pwd = pathlib.Path.cwd()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wzJbGA5N5mXr" - }, - "source": [ - "## Download the dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kC9TeTd47j8p" - }, - "source": [ - "Fetch the Portuguese/English translation dataset from [tfds](https://tensorflow.org/datasets):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qDaAOTKHNy8e" - }, - "outputs": [], - "source": [ - "examples, metadata = tfds.load('ted_hrlr_translate/pt_to_en', with_info=True,\n", - " as_supervised=True)\n", - "train_examples, val_examples = examples['train'], examples['validation'] " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5GHc3O2W8Hgg" - }, - "source": [ - "This dataset produces Portuguese/English sentence pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-_ezZT8w8GqD" - }, - "outputs": [], - "source": [ - "for pt, en in train_examples.take(1):\n", - " print(\"Portuguese: \", pt.numpy().decode('utf-8'))\n", - " print(\"English: \", en.numpy().decode('utf-8'))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nNGwm45vKttj" - }, - "source": [ - "Note a few things about the example sentences above:\n", - "* They're lower case.\n", - "* There are spaces around the punctuation.\n", - "* It's not clear if or what unicode normalization is being used." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Pm5Eah5F6B1I" - }, - "outputs": [], - "source": [ - "train_en = train_examples.map(lambda pt, en: en)\n", - "train_pt = train_examples.map(lambda pt, en: pt)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VCD57yALsF0D" - }, - "source": [ - "## Generate the vocabulary\n", - "\n", - "This section generates a wordpiece vocabulary from a dataset. If you already have a vocabulary file and just want to see how to build a `text.BertTokenizer` or `text.Wordpiece` tokenizer with it then you can skip ahead to the [Build the tokenizer](#build_the_tokenizer) section." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "v4CX7_KlO8lX" - }, - "source": [ - "Note: The vocabulary generation code used in this tutorial is optimized for **simplicity**. If you need a more scalable solution consider using the Apache Beam implementation available in [tools/wordpiece_vocab/generate_vocab.py](https://github.com/tensorflow/text/blob/master/tensorflow_text/tools/wordpiece_vocab/generate_vocab.py)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "R74W3QabgWmX" - }, - "source": [ - "The vocabulary generation code is included in the `tensorflow_text` pip package. It is not imported by default , you need to manually import it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iqX1fYdpnLS2" - }, - "outputs": [], - "source": [ - "from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset as bert_vocab" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HaWSnj8xFgI7" - }, - "source": [ - "The `bert_vocab.bert_vocab_from_dataset` function will generate the vocabulary. \n", - "\n", - "There are many arguments you can set to adjust its behavior. For this tutorial, you'll mostly use the defaults. If you want to learn more about the options, first read about [the algorithm](#algorithm), and then have a look at [the code](https://github.com/tensorflow/text/blob/master/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset.py).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6gTty2Wh-dHm" - }, - "source": [ - "This takes about 2 minutes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FwFzYjBy-h8W" - }, - "outputs": [], - "source": [ - "bert_tokenizer_params=dict(lower_case=True)\n", - "reserved_tokens=[\"[PAD]\", \"[UNK]\", \"[START]\", \"[END]\"]\n", - "\n", - "bert_vocab_args = dict(\n", - " # The target vocabulary size\n", - " vocab_size = 8000,\n", - " # Reserved tokens that must be included in the vocabulary\n", - " reserved_tokens=reserved_tokens,\n", - " # Arguments for `text.BertTokenizer`\n", - " bert_tokenizer_params=bert_tokenizer_params,\n", - " # Arguments for `wordpiece_vocab.wordpiece_tokenizer_learner_lib.learn`\n", - " learn_params={},\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PMN6Lli_3sJW" - }, - "outputs": [], - "source": [ - "%%time\n", - "pt_vocab = bert_vocab.bert_vocab_from_dataset(\n", - " train_pt.batch(1000).prefetch(2),\n", - " **bert_vocab_args\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3Cl4d2O34gkH" - }, - "source": [ - "Here are some slices of the resulting vocabulary." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mfaPmX54FvhW" - }, - "outputs": [], - "source": [ - "print(pt_vocab[:10])\n", - "print(pt_vocab[100:110])\n", - "print(pt_vocab[1000:1010])\n", - "print(pt_vocab[-10:])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "owkP3wbYVQv0" - }, - "source": [ - "Write a vocabulary file:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VY6v1ThkKDyZ" - }, - "outputs": [], - "source": [ - "def write_vocab_file(filepath, vocab):\n", - " with open(filepath, 'w') as f:\n", - " for token in vocab:\n", - " print(token, file=f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "X_TR5U1xWvAV" - }, - "outputs": [], - "source": [ - "write_vocab_file('pt_vocab.txt', pt_vocab)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0ag3qcx54nii" - }, - "source": [ - "Use that function to generate a vocabulary from the english data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "R3cMumvHWWtl" - }, - "outputs": [], - "source": [ - "%%time\n", - "en_vocab = bert_vocab.bert_vocab_from_dataset(\n", - " train_en.batch(1000).prefetch(2),\n", - " **bert_vocab_args\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NxOpzMd8ol5B" - }, - "outputs": [], - "source": [ - "print(en_vocab[:10])\n", - "print(en_vocab[100:110])\n", - "print(en_vocab[1000:1010])\n", - "print(en_vocab[-10:])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ck3LG_f34wCs" - }, - "source": [ - "Here are the two vocabulary files:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xfc2jxPznM6H" - }, - "outputs": [], - "source": [ - "write_vocab_file('en_vocab.txt', en_vocab)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "djehfEL6Zn-I" - }, - "outputs": [], - "source": [ - "!ls *.txt" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vb5ddYLTBJhk" - }, - "source": [ - "## Build the tokenizer\n", - "\u003ca id=\"build_the_tokenizer\"\u003e\u003c/a\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_qgp5gvR-2tQ" - }, - "source": [ - "The `text.BertTokenizer` can be initialized by passing the vocabulary file's path as the first argument (see the section on [tf.lookup](#tf.lookup) for other options): " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gdMpt9ZEjVGu" - }, - "outputs": [], - "source": [ - "pt_tokenizer = text.BertTokenizer('pt_vocab.txt', **bert_tokenizer_params)\n", - "en_tokenizer = text.BertTokenizer('en_vocab.txt', **bert_tokenizer_params)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BhPZafCUds86" - }, - "source": [ - "Now you can use it to encode some text. Take a batch of 3 examples from the english data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NKF0QJjtUm9T" - }, - "outputs": [], - "source": [ - "for pt_examples, en_examples in train_examples.batch(3).take(1):\n", - " for ex in en_examples:\n", - " print(ex.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "k9OEIBWopMxW" - }, - "source": [ - "Run it through the `BertTokenizer.tokenize` method. Initially, this returns a `tf.RaggedTensor` with axes `(batch, word, word-piece)`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AeTM81lAc8q1" - }, - "outputs": [], - "source": [ - "# Tokenize the examples -\u003e (batch, word, word-piece)\n", - "token_batch = en_tokenizer.tokenize(en_examples)\n", - "# Merge the word and word-piece axes -\u003e (batch, tokens)\n", - "token_batch = token_batch.merge_dims(-2,-1)\n", - "\n", - "for ex in token_batch.to_list():\n", - " print(ex)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UbdIaW6kX8hu" - }, - "source": [ - "If you replace the token IDs with their text representations (using `tf.gather`) you can see that in the first example the words `\"searchability\"` and `\"serendipity\"` have been decomposed into `\"search ##ability\"` and `\"s ##ere ##nd ##ip ##ity\"`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "FA6nKYx5U3Nj" - }, - "outputs": [], - "source": [ - "# Lookup each token id in the vocabulary.\n", - "txt_tokens = tf.gather(en_vocab, token_batch)\n", - "# Join with spaces.\n", - "tf.strings.reduce_join(txt_tokens, separator=' ', axis=-1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wY2XrhyRem2O" - }, - "source": [ - "To re-assemble words from the extracted tokens, use the `BertTokenizer.detokenize` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "toBXQSrgemRw" - }, - "outputs": [], - "source": [ - "words = en_tokenizer.detokenize(token_batch)\n", - "tf.strings.reduce_join(words, separator=' ', axis=-1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WIZWWy_iueQY" - }, - "source": [ - "\u003e Note: `BertTokenizer.tokenize`/`BertTokenizer.detokenize` does not round\n", - "trip losslessly. The result of `detokenize` will not, in general, have the\n", - "same content or offsets as the input to `tokenize`. This is because of the\n", - "\"basic tokenization\" step, that splits the strings into words before\n", - "applying the `WordpieceTokenizer`, includes irreversible\n", - "steps like lower-casing and splitting on punctuation. `WordpieceTokenizer`\n", - "on the other hand **is** reversible." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_bN30iCexTPY" - }, - "source": [ - "## Customization and export\n", - "\n", - "This tutorial builds the text tokenizer and detokenizer used by the [Transformer](https://tensorflow.org/text/tutorials/transformer) tutorial. This section adds methods and processing steps to simplify that tutorial, and exports the tokenizers using `tf.saved_model` so they can be imported by the other tutorials." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5wpc7oFkwgni" - }, - "source": [ - "### Custom tokenization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NaUR9hHj0PUy" - }, - "source": [ - "The downstream tutorials both expect the tokenized text to include `[START]` and `[END]` tokens.\n", - "\n", - "The `reserved_tokens` reserve space at the beginning of the vocabulary, so `[START]` and `[END]` have the same indexes for both languages:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gyyoa5De0WQu" - }, - "outputs": [], - "source": [ - "START = tf.argmax(tf.constant(reserved_tokens) == \"[START]\")\n", - "END = tf.argmax(tf.constant(reserved_tokens) == \"[END]\")\n", - "\n", - "def add_start_end(ragged):\n", - " count = ragged.bounding_shape()[0]\n", - " starts = tf.fill([count,1], START)\n", - " ends = tf.fill([count,1], END)\n", - " return tf.concat([starts, ragged, ends], axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MrZjQIwZ6NHu" - }, - "outputs": [], - "source": [ - "words = en_tokenizer.detokenize(add_start_end(token_batch))\n", - "tf.strings.reduce_join(words, separator=' ', axis=-1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WMmHS5VT_suH" - }, - "source": [ - "### Custom detokenization\n", - "\n", - "Before exporting the tokenizers there are a couple of things you can cleanup for the downstream tutorials:\n", - "\n", - "1. They want to generate clean text output, so drop reserved tokens like `[START]`, `[END]` and `[PAD]`.\n", - "2. They're interested in complete strings, so apply a string join along the `words` axis of the result. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "x9vXUQPX1ZFA" - }, - "outputs": [], - "source": [ - "def cleanup_text(reserved_tokens, token_txt):\n", - " # Drop the reserved tokens, except for \"[UNK]\".\n", - " bad_tokens = [re.escape(tok) for tok in reserved_tokens if tok != \"[UNK]\"]\n", - " bad_token_re = \"|\".join(bad_tokens)\n", - " \n", - " bad_cells = tf.strings.regex_full_match(token_txt, bad_token_re)\n", - " result = tf.ragged.boolean_mask(token_txt, ~bad_cells)\n", - "\n", - " # Join them into strings.\n", - " result = tf.strings.reduce_join(result, separator=' ', axis=-1)\n", - "\n", - " return result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NMSpZUV7sQYw" - }, - "outputs": [], - "source": [ - "en_examples.numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yB3MJhNvkuBb" - }, - "outputs": [], - "source": [ - "token_batch = en_tokenizer.tokenize(en_examples).merge_dims(-2,-1)\n", - "words = en_tokenizer.detokenize(token_batch)\n", - "words" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ED5rMeZE6HT3" - }, - "outputs": [], - "source": [ - "cleanup_text(reserved_tokens, words).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HEfEdRi11Re4" - }, - "source": [ - "### Export" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uFuo1KZjpEPR" - }, - "source": [ - "The following code block builds a `CustomTokenizer` class to contain the `text.BertTokenizer` instances, the custom logic, and the `@tf.function` wrappers required for export. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "f1q1hCpH72Vj" - }, - "outputs": [], - "source": [ - "class CustomTokenizer(tf.Module):\n", - " def __init__(self, reserved_tokens, vocab_path):\n", - " self.tokenizer = text.BertTokenizer(vocab_path, lower_case=True)\n", - " self._reserved_tokens = reserved_tokens\n", - " self._vocab_path = tf.saved_model.Asset(vocab_path)\n", - "\n", - " vocab = pathlib.Path(vocab_path).read_text().splitlines()\n", - " self.vocab = tf.Variable(vocab)\n", - "\n", - " ## Create the signatures for export: \n", - "\n", - " # Include a tokenize signature for a batch of strings. \n", - " self.tokenize.get_concrete_function(\n", - " tf.TensorSpec(shape=[None], dtype=tf.string))\n", - " \n", - " # Include `detokenize` and `lookup` signatures for:\n", - " # * `Tensors` with shapes [tokens] and [batch, tokens]\n", - " # * `RaggedTensors` with shape [batch, tokens]\n", - " self.detokenize.get_concrete_function(\n", - " tf.TensorSpec(shape=[None, None], dtype=tf.int64))\n", - " self.detokenize.get_concrete_function(\n", - " tf.RaggedTensorSpec(shape=[None, None], dtype=tf.int64))\n", - "\n", - " self.lookup.get_concrete_function(\n", - " tf.TensorSpec(shape=[None, None], dtype=tf.int64))\n", - " self.lookup.get_concrete_function(\n", - " tf.RaggedTensorSpec(shape=[None, None], dtype=tf.int64))\n", - "\n", - " # These `get_*` methods take no arguments\n", - " self.get_vocab_size.get_concrete_function()\n", - " self.get_vocab_path.get_concrete_function()\n", - " self.get_reserved_tokens.get_concrete_function()\n", - " \n", - " @tf.function\n", - " def tokenize(self, strings):\n", - " enc = self.tokenizer.tokenize(strings)\n", - " # Merge the `word` and `word-piece` axes.\n", - " enc = enc.merge_dims(-2,-1)\n", - " enc = add_start_end(enc)\n", - " return enc\n", - "\n", - " @tf.function\n", - " def detokenize(self, tokenized):\n", - " words = self.tokenizer.detokenize(tokenized)\n", - " return cleanup_text(self._reserved_tokens, words)\n", - "\n", - " @tf.function\n", - " def lookup(self, token_ids):\n", - " return tf.gather(self.vocab, token_ids)\n", - "\n", - " @tf.function\n", - " def get_vocab_size(self):\n", - " return tf.shape(self.vocab)[0]\n", - "\n", - " @tf.function\n", - " def get_vocab_path(self):\n", - " return self._vocab_path\n", - "\n", - " @tf.function\n", - " def get_reserved_tokens(self):\n", - " return tf.constant(self._reserved_tokens)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RHzEnTQM6nBD" - }, - "source": [ - "Build a `CustomTokenizer` for each language:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cU8yFBCSruz4" - }, - "outputs": [], - "source": [ - "tokenizers = tf.Module()\n", - "tokenizers.pt = CustomTokenizer(reserved_tokens, 'pt_vocab.txt')\n", - "tokenizers.en = CustomTokenizer(reserved_tokens, 'en_vocab.txt')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZYfrmDhy6syT" - }, - "source": [ - "Export the tokenizers as a `saved_model`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aieDGooa9ms7" - }, - "outputs": [], - "source": [ - "model_name = 'ted_hrlr_translate_pt_en_converter'\n", - "tf.saved_model.save(tokenizers, model_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XoCMz2Fm61v6" - }, - "source": [ - "Reload the `saved_model` and test the methods:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9SB_BHwqsHkb" - }, - "outputs": [], - "source": [ - "reloaded_tokenizers = tf.saved_model.load(model_name)\n", - "reloaded_tokenizers.en.get_vocab_size().numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "W_Ze3WL3816x" - }, - "outputs": [], - "source": [ - "tokens = reloaded_tokenizers.en.tokenize(['Hello TensorFlow!'])\n", - "tokens.numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "v9o93bzcuhyC" - }, - "outputs": [], - "source": [ - "text_tokens = reloaded_tokenizers.en.lookup(tokens)\n", - "text_tokens" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Y0205N_8dDT5" - }, - "outputs": [], - "source": [ - "round_trip = reloaded_tokenizers.en.detokenize(tokens)\n", - "\n", - "print(round_trip.numpy()[0].decode('utf-8'))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pSKFDQoBjnNp" - }, - "source": [ - "Archive it for the [translation tutorials](https://tensorflow.org/text/tutorials/transformer):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eY0SoE3Yj2it" - }, - "outputs": [], - "source": [ - "!zip -r {model_name}.zip {model_name}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0Synq0RekAXe" - }, - "outputs": [], - "source": [ - "!du -h *.zip" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AtmGkGBuGHa2" - }, - "source": [ - "\u003ca id=\"algorithm\"\u003e\u003c/a\u003e\n", - "\n", - "## Optional: The algorithm\n", - "\n", - "\n", - "It's worth noting here that there are two versions of the WordPiece algorithm: Bottom-up and top-down. In both cases goal is the same: \"Given a training corpus and a number of desired\n", - "tokens D, the optimization problem is to select D wordpieces such that the resulting corpus is minimal in the\n", - "number of wordpieces when segmented according to the chosen wordpiece model.\"\n", - "\n", - "The original [bottom-up WordPiece algorithm](https://static.googleusercontent.com/media/research.google.com/ja//pubs/archive/37842.pdf), is based on [byte-pair encoding](https://towardsdatascience.com/byte-pair-encoding-the-dark-horse-of-modern-nlp-eb36c7df4f10). Like BPE, It starts with the alphabet, and iteratively combines common bigrams to form word-pieces and words.\n", - "\n", - "TensorFlow Text's vocabulary generator follows the top-down implementation from [BERT](https://arxiv.org/pdf/1810.04805.pdf). Starting with words and breaking them down into smaller components until they hit the frequency threshold, or can't be broken down further. The next section describes this in detail. For Japanese, Chinese and Korean this top-down approach doesn't work since there are no explicit word units to start with. For those you need a [different approach](https://tfhub.dev/google/zh_segmentation/1).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FLA2QhffYEo0" - }, - "source": [ - "### Choosing the vocabulary\n", - "\n", - "The top-down WordPiece generation algorithm takes in a set of (word, count) pairs and a threshold `T`, and returns a vocabulary `V`.\n", - "\n", - "The algorithm is iterative. It is run for `k` iterations, where typically `k = 4`, but only the first two are really important. The third and fourth (and beyond) are just identical to the second. Note that each step of the binary search runs the algorithm from scratch for `k` iterations.\n", - "\n", - "The iterations described below:\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZqfY0p3PYIKr" - }, - "source": [ - "#### First iteration\n", - "\n", - "1. Iterate over every word and count pair in the input, denoted as `(w, c)`.\n", - "2. For each word `w`, generate every substring, denoted as `s`. E.g., for the\n", - " word `human`, we generate `{h, hu, hum, huma,\n", - " human, ##u, ##um, ##uma, ##uman, ##m, ##ma, ##man, #a, ##an, ##n}`.\n", - "3. Maintain a substring-to-count hash map, and increment the count of each `s`\n", - " by `c`. E.g., if we have `(human, 113)` and `(humas, 3)` in our input, the\n", - " count of `s = huma` will be `113+3=116`.\n", - "4. Once we've collected the counts of every substring, iterate over the `(s,\n", - " c)` pairs *starting with the longest `s` first*.\n", - "5. Keep any `s` that has a `c \u003e T`. E.g., if `T = 100` and we have `(pers,\n", - " 231); (dogs, 259); (##rint; 76)`, then we would keep `pers` and `dogs`.\n", - "6. When an `s` is kept, subtract off its count from all of its prefixes. This\n", - " is the reason for sorting all of the `s` by length in step 4. This is a\n", - " critical part of the algorithm, because otherwise words would be double\n", - " counted. For example, let's say that we've kept `human` and we get to\n", - " `(huma, 116)`. We know that `113` of those `116` came from `human`, and `3`\n", - " came from `humas`. However, now that `human` is in our vocabulary, we know\n", - " we will never segment `human` into `huma ##n`. So once `human` has been\n", - " kept, then `huma` only has an *effective* count of `3`.\n", - "\n", - "This algorithm will generate a set of word pieces `s` (many of which will be\n", - "whole words `w`), which we *could* use as our WordPiece vocabulary.\n", - "\n", - "However, there is a problem: This algorithm will severely overgenerate word\n", - "pieces. The reason is that we only subtract off counts of prefix tokens.\n", - "Therefore, if we keep the word `human`, we will subtract off the count for `h,\n", - "hu, hu, huma`, but not for `##u, ##um, ##uma, ##uman` and so on. So we might\n", - "generate both `human` and `##uman` as word pieces, even though `##uman` will\n", - "never be applied.\n", - "\n", - "So why not subtract off the counts for every *substring*, not just every\n", - "*prefix*? Because then we could end up subtracting off the counts multiple\n", - "times. Let's say that we're processing `s` of length 5 and we keep both\n", - "`(##denia, 129)` and `(##eniab, 137)`, where `65` of those counts came from the\n", - "word `undeniable`. If we subtract off from *every* substring, we would subtract\n", - "`65` from the substring `##enia` twice, even though we should only subtract\n", - "once. However, if we only subtract off from prefixes, it will correctly only be\n", - "subtracted once." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NNCtKR8xT9wX" - }, - "source": [ - "#### Second (and third ...) iteration\n", - "\n", - "To solve the overgeneration issue mentioned above, we perform multiple\n", - "iterations of the algorithm.\n", - "\n", - "Subsequent iterations are identical to the first, with one important\n", - "distinction: In step 2, instead of considering *every* substring, we apply the\n", - "WordPiece tokenization algorithm using the vocabulary from the previous\n", - "iteration, and only consider substrings which *start* on a split point.\n", - "\n", - "For example, let's say that we're performing step 2 of the algorithm and\n", - "encounter the word `undeniable`. In the first iteration, we would consider every\n", - "substring, e.g., `{u, un, und, ..., undeniable, ##n, ##nd, ..., ##ndeniable,\n", - "...}`.\n", - "\n", - "Now, for the second iteration, we will only consider a subset of these. Let's\n", - "say that after the first iteration, the relevant word pieces are:\n", - "\n", - "`un, ##deni, ##able, ##ndeni, ##iable`\n", - "\n", - "The WordPiece algorithm will segment this into `un ##deni ##able` (see the\n", - "section [Applying WordPiece](#applying-wordpiece) for more information). In this\n", - "case, we will only consider substrings that *start* at a segmentation point. We\n", - "will still consider every possible *end* position. So during the second\n", - "iteration, the set of `s` for `undeniable` is:\n", - "\n", - "`{u, un, und, unden, undeni, undenia, undeniab, undeniabl,\n", - "undeniable, ##d, ##de, ##den, ##deni, ##denia, ##deniab, ##deniabl\n", - ", ##deniable, ##a, ##ab, ##abl, ##able}`\n", - "\n", - "The algorithm is otherwise identical. In this example, in the first iteration,\n", - "the algorithm produces the suprious tokens `##ndeni` and `##iable`. Now, these\n", - "tokens are never considered, so they will not be generated by the second\n", - "iteration. We perform several iterations just to make sure the results converge\n", - "(although there is no literal convergence guarantee).\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AdUkqe84YQA5" - }, - "source": [ - "### Applying WordPiece\n", - "\n", - "\u003ca id=\"applying_wordpiece\"\u003e\u003c/a\u003e\n", - "\n", - "Once a WordPiece vocabulary has been generated, we need to be able to apply it\n", - "to new data. The algorithm is a simple greedy longest-match-first application.\n", - "\n", - "For example, consider segmenting the word `undeniable`.\n", - "\n", - "We first lookup `undeniable` in our WordPiece dictionary, and if it's present,\n", - "we're done. If not, we decrement the end point by one character, and repeat,\n", - "e.g., `undeniabl`.\n", - "\n", - "Eventually, we will either find a subtoken in our vocabulary, or get down to a\n", - "single character subtoken. (In general, we assume that every character is in our\n", - "vocabulary, although this might not be the case for rare Unicode characters. If\n", - "we encounter a rare Unicode character that's not in the vocabulary we simply map\n", - "the entire word to `\u003cunk\u003e`).\n", - "\n", - "In this case, we find `un` in our vocabulary. So that's our first word piece.\n", - "Then we jump to the end of `un` and repeat the processing, e.g., try to find\n", - "`##deniable`, then `##deniabl`, etc. This is repeated until we've segmented the\n", - "entire word." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rjRQKQzpYMl2" - }, - "source": [ - "### Intuition\n", - "\n", - "Intuitively, WordPiece tokenization is trying to satisfy two different\n", - "objectives:\n", - "\n", - "1. Tokenize the data into the *least* number of pieces as possible. It is\n", - " important to keep in mind that the WordPiece algorithm does not \"want\" to\n", - " split words. Otherwise, it would just split every word into its characters,\n", - " e.g., `human -\u003e {h, ##u, ##m, ##a, #n}`. This is one critical thing that\n", - " makes WordPiece different from morphological splitters, which will split\n", - " linguistic morphemes even for common words (e.g., `unwanted -\u003e {un, want,\n", - " ed}`).\n", - "\n", - "2. When a word does have to be split into pieces, split it into pieces that\n", - " have maximal counts in the training data. For example, the reason why the\n", - " word `undeniable` would be split into `{un, ##deni, ##able}` rather than\n", - " alternatives like `{unde, ##niab, ##le}` is that the counts for `un` and\n", - " `##able` in particular will be very high, since these are common prefixes\n", - " and suffixes. Even though the count for `##le` must be higher than `##able`,\n", - " the low counts of `unde` and `##niab` will make this a less \"desirable\"\n", - " tokenization to the algorithm." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KQZ38Uus-Xv1" - }, - "source": [ - "## Optional: tf.lookup\n", - "\n", - "\u003ca id=\"tf.lookup\"\u003e\u003c/a\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NreDSRmJNG_h" - }, - "source": [ - "If you need access to, or more control over the vocabulary it's worth noting that you can build the lookup table yourself and pass that to `BertTokenizer`.\n", - "\n", - "When you pass a string, `BertTokenizer` does the following:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "thAF1DzQOQXl" - }, - "outputs": [], - "source": [ - "pt_lookup = tf.lookup.StaticVocabularyTable(\n", - " num_oov_buckets=1,\n", - " initializer=tf.lookup.TextFileInitializer(\n", - " filename='pt_vocab.txt',\n", - " key_dtype=tf.string,\n", - " key_index = tf.lookup.TextFileIndex.WHOLE_LINE,\n", - " value_dtype = tf.int64,\n", - " value_index=tf.lookup.TextFileIndex.LINE_NUMBER)) \n", - "pt_tokenizer = text.BertTokenizer(pt_lookup)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ERY4FYN7O66R" - }, - "source": [ - "Now you have direct access to the lookup table used in the tokenizer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "337_DcAMOs6N" - }, - "outputs": [], - "source": [ - "pt_lookup.lookup(tf.constant(['é', 'um', 'uma', 'para', 'não']))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BdZ82x5mPDE9" - }, - "source": [ - "You don't need to use a vocabulary file, `tf.lookup` has other initializer options. If you have the vocabulary in memory you can use `lookup.KeyValueTensorInitializer`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mzkrmO9H-b9i" - }, - "outputs": [], - "source": [ - "pt_lookup = tf.lookup.StaticVocabularyTable(\n", - " num_oov_buckets=1,\n", - " initializer=tf.lookup.KeyValueTensorInitializer(\n", - " keys=pt_vocab,\n", - " values=tf.range(len(pt_vocab), dtype=tf.int64))) \n", - "pt_tokenizer = text.BertTokenizer(pt_lookup)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "subwords_tokenizer.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/guide/text_tf_lite.ipynb b/third_party/tensorflow-text/src/docs/guide/text_tf_lite.ipynb deleted file mode 100644 index 15213e2..0000000 --- a/third_party/tensorflow-text/src/docs/guide/text_tf_lite.ipynb +++ /dev/null
@@ -1,283 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "TensorFlow Text On Device Guide", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "_as3tyDPAvzM" - }, - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "-CoWjX1EBXJX" - }, - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7hQmWrtkBBQB" - }, - "source": [ - "# Converting TensorFlow Text operators to TensorFlow Lite" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qmGnheU8BPKN" - }, - "source": [ - "<table class=\"tfo-notebook-buttons\" align=\"left\">\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/text_tf_lite\"><img src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" />View on TensorFlow.org</a>\n", - " </td>\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/text_tf_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" />Run in Google Colab</a>\n", - " </td>\n", - " <td>\n", - " <a target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/text_tf_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" />View on GitHub</a>\n", - " </td>\n", - " <td>\n", - " <a href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/text_tf_lite.ipynb\"><img src=\"https://www.tensorflow.org/images/download_logo_32px.png\" />Download notebook</a>\n", - " </td>\n", - "</table>" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hz1hOEHPTF2n" - }, - "source": [ - "## Overview\n", - "\n", - "Machine learning models are frequently deployed using TensorFlow Lite to mobile, embedded, and IoT devices to improve data privacy and lower response times. These models often require support for text processing operations. TensorFlow Text version 2.7 and higher provides improved performance, reduced binary sizes, and operations specifically optimized for use in these environments.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_mdIyFfqTMjc" - }, - "source": [ - "## Text operators\n", - "\n", - "The following TensorFlow Text classes can be used from within a TensorFlow Lite model.\n", - "\n", - "* `FastWordpieceTokenizer`\n", - "* `WhitespaceTokenizer`\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x6NAs1fcUwUn" - }, - "source": [ - "## Model Example" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "8ZalFZQvTJf5", - "outputId": "9e7ba769-9b2c-401f-d26a-353fcf3ced3f" - }, - "source": [ - "!pip install -q -U tf-nightly\n", - "!pip install -q -U tensorflow-text-nightly" - ], - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "uL-I0CyPTXnN" - }, - "source": [ - "from absl import app\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "import tensorflow_text as tf_text\n", - "\n", - "from tensorflow.lite.python import interpreter" - ], - "execution_count": 2, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qj_bJ-xVTfU1" - }, - "source": [ - "The following code example shows the conversion process and interpretation in Python using a simple test model. Note that the output of a model cannot be a `tf.RaggedTensor` object when you are using TensorFlow Lite. However, you can return the components of a `tf.RaggedTensor` object or convert it using its `to_tensor` function. See [the RaggedTensor guide](https://www.tensorflow.org/guide/ragged_tensor) for more details." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nqQjBcXqTf_0" - }, - "source": [ - "class TokenizerModel(tf.keras.Model):\n", - "\n", - " def __init__(self, **kwargs):\n", - " super().__init__(**kwargs)\n", - " self.tokenizer = tf_text.WhitespaceTokenizer()\n", - "\n", - " @tf.function(input_signature=[\n", - " tf.TensorSpec(shape=[None], dtype=tf.string, name='input')\n", - " ])\n", - " def call(self, input_tensor):\n", - " return { 'tokens': self.tokenizer.tokenize(input_tensor).flat_values }" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "jsPFI-55TiF_", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "721b9c61-feab-4a17-e09f-c9f4775a1ed9" - }, - "source": [ - "# Test input data.\n", - "input_data = np.array(['Some minds are better kept apart'])\n", - "\n", - "# Define a Keras model.\n", - "model = TokenizerModel()\n", - "\n", - "# Perform TensorFlow Text inference.\n", - "tf_result = model(tf.constant(input_data))\n", - "print('TensorFlow result = ', tf_result['tokens'])" - ], - "execution_count": 4, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YKpFsvJGTlPq" - }, - "source": [ - "\n", - "## Convert the TensorFlow model to TensorFlow Lite\n", - "\n", - "When converting a TensorFlow model with TensorFlow Text operators to TensorFlow Lite, you need to\n", - "indicate to the `TFLiteConverter` that there are custom operators using the\n", - "`allow_custom_ops` attribute as in the example below. You can then run the model conversion as you normally would. Review the [TensorFlow Lite converter](https://www.tensorflow.org/lite/convert) documentation for a detailed guide on the basics of model conversion." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6hYWezs1Tndo", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "7af5faf6-96a5-41ae-b876-e3934756cd7f" - }, - "source": [ - "# Convert to TensorFlow Lite.\n", - "converter = tf.lite.TFLiteConverter.from_keras_model(model)\n", - "converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]\n", - "converter.allow_custom_ops = True\n", - "tflite_model = converter.convert()" - ], - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cxCdhrHATpSR" - }, - "source": [ - "## Inference\n", - "\n", - "For the TensorFlow Lite interpreter to properly read your model containing TensorFlow Text operators, you must configure it to use these custom operators, and provide registration methods for them. Use `tf_text.tflite_registrar.SELECT_TFTEXT_OPS` to provide the full suite of registration functions for the supported TensorFlow Text operators to `InterpreterWithCustomOps`.\n", - "\n", - "Note, that while the example below shows inference in Python, the steps are similar in other languages with some minor API translations, and the necessity to build the `tflite_registrar` into your binary. See [TensorFlow Lite Inference](https://www.tensorflow.org/lite/guide/inference) for more details." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "kykFg2pXTriw" - }, - "source": [ - "# Perform TensorFlow Lite inference.\n", - "interp = interpreter.InterpreterWithCustomOps(\n", - " model_content=tflite_model,\n", - " custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS)\n", - "interp.get_signature_list()" - ], - "execution_count": 6, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rNGPpHCCTxVX" - }, - "source": [ - "Next, the TensorFlow Lite interpreter is invoked with the input, providing a result which matches the TensorFlow result from above." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "vmSbfbgJTyKY" - }, - "source": [ - "tokenize = interp.get_signature_runner('serving_default')\n", - "output = tokenize(input=input_data)\n", - "print('TensorFlow Lite result = ', output['tokens'])" - ], - "execution_count": 7, - "outputs": [] - } - ] -} \ No newline at end of file
diff --git a/third_party/tensorflow-text/src/docs/guide/tf_text_intro.md b/third_party/tensorflow-text/src/docs/guide/tf_text_intro.md deleted file mode 100644 index 22f1e1a7..0000000 --- a/third_party/tensorflow-text/src/docs/guide/tf_text_intro.md +++ /dev/null
@@ -1,44 +0,0 @@ -# Introduction to TensorFlow Text - -TensorFlow Text provides a collection of text related classes and ops ready to -use with TensorFlow 2.0. The library can perform the preprocessing regularly -required by text-based models, and includes other features useful for sequence -modeling not provided by core TensorFlow. - -The benefit of using these ops in your text preprocessing is that they are done -in the TensorFlow graph. You do not need to worry about tokenization in training -being different than the tokenization at inference, or managing preprocessing -scripts. - -## Install TensorFlow Text - -### Install using pip - -When installing TF Text with pip install, note the version of TensorFlow you are -running, as you should specify the corresponding version of TF Text. - -```python -pip install -U tensorflow-text==<version> -``` - -### Build from source - -TensorFlow Text must be built in the same environment as TensorFlow. Thus, if -you manually build TF Text, it is highly recommended that you also build -TensorFlow. - -If building on MacOS, you must have coreutils installed. It is probably easiest -to do with Homebrew. First, build TensorFlow -[from source](https://www.tensorflow.org/install/source). - -Clone the TF Text repo. - -```shell -git clone https://github.com/tensorflow/text.git -``` - -Finally, run the build script to create a pip package. - -```shell -./oss_scripts/run_build.sh -```
diff --git a/third_party/tensorflow-text/src/docs/guide/tokenizers.ipynb b/third_party/tensorflow-text/src/docs/guide/tokenizers.ipynb deleted file mode 100644 index 397d6ea4..0000000 --- a/third_party/tensorflow-text/src/docs/guide/tokenizers.ipynb +++ /dev/null
@@ -1,643 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qFdPvlXBOdUN" - }, - "source": [ - "# Tokenizing with TF Text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfBg1C5NB3X0" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/tokenizers\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/tokenizers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/tokenizers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/tokenizers.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/google/zh_segmentation/1\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub models\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xHxb-dlhMIzW" - }, - "source": [ - "## Overview\n", - "\n", - "Tokenization is the process of breaking up a string into tokens. Commonly, these tokens are words, numbers, and/or punctuation. The `tensorflow_text` package provides a number of tokenizers available for preprocessing text required by your text-based models. By performing the tokenization in the TensorFlow graph, you will not need to worry about differences between the training and inference workflows and managing preprocessing scripts.\n", - "\n", - "This guide discusses the many tokenization options provided by TensorFlow Text, when you might want to use one option over another, and how these tokenizers are called from within your model." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MUXex9ctTuDB" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "z0oj4HS26x05" - }, - "outputs": [], - "source": [ - "!pip install -q tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "alf2kDHJ60rO" - }, - "outputs": [], - "source": [ - "import requests\n", - "import tensorflow as tf\n", - "import tensorflow_text as tf_text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "i4rfKxVvBBu0" - }, - "source": [ - "## Splitter API\n", - "\n", - "The main interfaces are `Splitter` and `SplitterWithOffsets` which have single methods `split` and `split_with_offsets`. The `SplitterWithOffsets` variant (which extends `Splitter`) includes an option for getting byte offsets. This allows the caller to know which bytes in the original string the created token was created from.\n", - "\n", - "The `Tokenizer` and `TokenizerWithOffsets` are specialized versions of the `Splitter` that provide the convenience methods `tokenize` and `tokenize_with_offsets` respectively.\n", - "\n", - "Generally, for any N-dimensional input, the returned tokens are in a N+1-dimensional [RaggedTensor](https://www.tensorflow.org/guide/ragged_tensor) with the inner-most dimension of tokens mapping to the original individual strings.\n", - "\n", - "```python\n", - "class Splitter {\n", - " @abstractmethod\n", - " def split(self, input)\n", - "}\n", - "\n", - "class SplitterWithOffsets(Splitter) {\n", - " @abstractmethod\n", - " def split_with_offsets(self, input)\n", - "}\n", - "```\n", - "\n", - "There is also a `Detokenizer` interface. Any tokenizer implementing this interface can accept a N-dimensional ragged tensor of tokens, and normally returns a N-1-dimensional tensor or ragged tensor that has the given tokens assembled together.\n", - "\n", - "```python\n", - "class Detokenizer {\n", - " @abstractmethod\n", - " def detokenize(self, input)\n", - "}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BhviJXy0BDoa" - }, - "source": [ - "## Tokenizers\n", - "\n", - "Below is the suite of tokenizers provided by TensorFlow Text. String inputs are assumed to be UTF-8. Please review the [Unicode guide](https://www.tensorflow.org/text/guide/unicode) for converting strings to UTF-8." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eWFisXk-68BQ" - }, - "source": [ - "### Whole word tokenizers\n", - "\n", - "These tokenizers attempt to split a string by words, and is the most intuitive way to split text.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-CxjAs5wOYKh" - }, - "source": [ - "#### WhitespaceTokenizer\n", - "\n", - "The `text.WhitespaceTokenizer` is the most basic tokenizer which splits strings on ICU defined whitespace characters (eg. space, tab, new line). This is often good for quickly building out prototype models." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "k4a11Hlm7C4k" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.WhitespaceTokenizer()\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "VHS6dEQ7cR9E" - }, - "source": [ - "You may notice a shortcome of this tokenizer is that punctuation is included with the word to make up a token. To split the words and punctuation into separate tokens, the `UnicodeScriptTokenizer` should be used." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-xohhm0Q7AmN" - }, - "source": [ - "#### UnicodeScriptTokenizer\n", - "\n", - "The `UnicodeScriptTokenizer` splits strings based on Unicode script boundaries. The script codes used correspond to International Components for Unicode (ICU) UScriptCode values. See: http://icu-project.org/apiref/icu4c/uscript_8h.html\n", - "\n", - "In practice, this is similar to the `WhitespaceTokenizer` with the most apparent difference being that it will split punctuation (USCRIPT_COMMON) from language texts (eg. USCRIPT_LATIN, USCRIPT_CYRILLIC, etc) while also separating language texts from each other. Note that this will also split contraction words into separate tokens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "68u0XF3L6-ay" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.UnicodeScriptTokenizer()\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J0Ja_h1qO7P0" - }, - "source": [ - "### Subword tokenizers\n", - "\n", - "Subword tokenizers can be used with a smaller vocabulary, and allow the model to have some information about novel words from the subwords that make create it.\n", - "\n", - "We briefly discuss the Subword tokenization options below, but the [Subword Tokenization tutorial](https://www.tensorflow.org/text/guide/subwords_tokenizer) goes more in depth and also explains how to generate the vocab files." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BLif2owYPBos" - }, - "source": [ - "#### WordpieceTokenizer\n", - "\n", - "WordPiece tokenization is a data-driven tokenization scheme which generates a set of sub-tokens. These sub tokens may correspond to linguistic morphemes, but this is often not the case.\n", - "\n", - "The WordpieceTokenizer expects the input to already be split into tokens. Because of this prerequisite, you will often want to split using the `WhitespaceTokenizer` or `UnicodeScriptTokenizer` beforehand." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "srIHtzU2fxCi" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.WhitespaceTokenizer()\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uUZe66RngCGU" - }, - "source": [ - "After the string is split into tokens, the `WordpieceTokenizer` can be used to split into subtokens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ISEUjIsYAl2S" - }, - "outputs": [], - "source": [ - "url = \"https://github.com/tensorflow/text/blob/master/tensorflow_text/python/ops/test_data/test_wp_en_vocab.txt?raw=true\"\n", - "r = requests.get(url)\n", - "filepath = \"vocab.txt\"\n", - "open(filepath, 'wb').write(r.content)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uU8wJlUfsskU" - }, - "outputs": [], - "source": [ - "subtokenizer = tf_text.UnicodeScriptTokenizer(filepath)\n", - "subtokens = tokenizer.tokenize(tokens)\n", - "print(subtokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ncBcigHAPFBd" - }, - "source": [ - "#### BertTokenizer\n", - "\n", - "The BertTokenizer mirrors the original implementation of tokenization from the BERT paper. This is backed by the WordpieceTokenizer, but also performs additional tasks such as normalization and tokenizing to words first." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2tOz1hNhtdV2" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.BertTokenizer(filepath, token_out_type=tf.string, lower_case=True)\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-rb_dORMO-3t" - }, - "source": [ - "#### SentencepieceTokenizer\n", - "\n", - "The SentencepieceTokenizer is a sub-token tokenizer that is highly configurable. This is backed by the Sentencepiece library. Like the BertTokenizer, it can include normalization and token splitting before splitting into sub-tokens.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0dUbFCfDCojr" - }, - "outputs": [], - "source": [ - "url = \"https://github.com/tensorflow/text/blob/master/tensorflow_text/python/ops/test_data/test_oss_model.model?raw=true\"\n", - "sp_model = requests.get(url).content" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uvsm6iuNupEZ" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.SentencepieceTokenizer(sp_model, out_type=tf.string)\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1TatehW0Q0qV" - }, - "source": [ - "### Other splitters\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wqNgtoFPQ1sG" - }, - "source": [ - "#### UnicodeCharTokenizer\n", - "\n", - "This splits a string into UTF-8 characters. It is useful for CJK languages that do not have spaces between words." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4GjiAnQFvIhW" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.UnicodeCharTokenizer()\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XHyWQcJZGOwL" - }, - "source": [ - "The output is Unicode codepoints. This can be also useful for creating character ngrams, such as bigrams. To convert back into UTF-8 characters." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_uuyz3XC0NdU" - }, - "outputs": [], - "source": [ - "characters = tf.strings.unicode_encode(tf.expand_dims(tokens, -1), \"UTF-8\")\n", - "bigrams = tf_text.ngrams(characters, 2, reduction_type=tf_text.Reduction.STRING_JOIN, string_separator='')\n", - "print(bigrams.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oCmTbCnkQ4At" - }, - "source": [ - "#### HubModuleTokenizer\n", - "\n", - "This is a wrapper around models deployed to TF Hub to make the calls easier since TF Hub currently does not support ragged tensors. Having a model perform tokenization is particularly useful for CJK languages when you want to split into words, but do not have spaces to provide a heuristic guide. At this time, we have a single segmentation model for Chinese." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "R8rWv3rAv_cb" - }, - "outputs": [], - "source": [ - "MODEL_HANDLE = \"https://tfhub.dev/google/zh_segmentation/1\"\n", - "segmenter = tf_text.HubModuleTokenizer(MODEL_HANDLE)\n", - "tokens = segmenter.tokenize([\"新华社北京\"])\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cRXOToXTVCep" - }, - "source": [ - "It may be difficult to view the results of the UTF-8 encoded byte strings. Decode the list values to make viewing easier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XeJHbr8XVctR" - }, - "outputs": [], - "source": [ - "def decode_list(x):\n", - " if type(x) is list:\n", - " return list(map(decode_list, x))\n", - " return x.decode(\"UTF-8\")\n", - "\n", - "def decode_utf8_tensor(x):\n", - " return list(map(decode_list, x.to_list()))\n", - "\n", - "print(decode_utf8_tensor(tokens))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eCnKgtjYRhOK" - }, - "source": [ - "#### SplitMergeTokenizer\n", - "\n", - "The `SplitMergeTokenizer` \u0026 `SplitMergeFromLogitsTokenizer` have a targeted purpose of splitting a string based on provided values that indicate where the string should be split. This is useful when building your own segmentation models like the previous Segmentation example.\n", - "\n", - "For the `SplitMergeTokenizer`, a value of 0 is used to indicate the start of a new string, and the value of 1 indicates the character is part of the current string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3c-2iBiuWgjP" - }, - "outputs": [], - "source": [ - "strings = [\"新华社北京\"]\n", - "labels = [[0, 1, 1, 0, 1]]\n", - "tokenizer = tf_text.SplitMergeTokenizer()\n", - "tokens = tokenizer.tokenize(strings, labels)\n", - "print(decode_utf8_tensor(tokens))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l5F0zPFDwmcb" - }, - "source": [ - "The `SplitMergeFromLogitsTokenizer` is similar, but it instead accepts logit value pairs from a neural network that predict if each character should be split into a new string or merged into the current one." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JRWtRYMxw3oc" - }, - "outputs": [], - "source": [ - "strings = [[\"新华社北京\"]]\n", - "labels = [[[5.0, -3.2], [0.2, 12.0], [0.0, 11.0], [2.2, -1.0], [-3.0, 3.0]]]\n", - "tokenizer = tf_text.SplitMergeFromLogitsTokenizer()\n", - "tokenizer.tokenize(strings, labels)\n", - "print(decode_utf8_tensor(tokens))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mWrGTOzbVb8U" - }, - "source": [ - "#### RegexSplitter\n", - "\n", - "The `RegexSplitter` is able to segment strings at arbitrary breakpoints defined by a provided regular expression." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Szw0QZ6ecExC" - }, - "outputs": [], - "source": [ - "splitter = tf_text.RegexSplitter(\"\\s\")\n", - "tokens = splitter.split([\"What you know you can't explain, but you feel it.\"], )\n", - "print(tokens.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uPIMvyot7GFv" - }, - "source": [ - "## Offsets\n", - "\n", - "When tokenizing strings, it is often desired to know where in the original string the token originated from. For this reason, each tokenizer which implements `TokenizerWithOffsets` has a *tokenize_with_offsets* method that will return the byte offsets along with the tokens. The start_offsets lists the bytes in the original string each token starts at, and the end_offsets lists the bytes immediately after the point where each token ends. To refrase, the start offsets are inclusive and the end offsets are exclusive." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UmZ91zl87J7y" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.UnicodeScriptTokenizer()\n", - "(tokens, start_offsets, end_offsets) = tokenizer.tokenize_with_offsets(['Everything not saved will be lost.'])\n", - "print(tokens.to_list())\n", - "print(start_offsets.to_list())\n", - "print(end_offsets.to_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mVGbkB-80819" - }, - "source": [ - "## Detokenization\n", - "\n", - "Tokenizers which implement the `Detokenizer` provide a `detokenize` method which attempts to combine the strings. This has the chance of being lossy, so the detokenized string may not always match exactly the original, pre-tokenized string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iyThnPPQ0_6Q" - }, - "outputs": [], - "source": [ - "tokenizer = tf_text.UnicodeCharTokenizer()\n", - "tokens = tokenizer.tokenize([\"What you know you can't explain, but you feel it.\"])\n", - "print(tokens.to_list())\n", - "strings = tokenizer.detokenize(tokens)\n", - "print(strings.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iVNFPYSZ7sf1" - }, - "source": [ - "## TF Data\n", - "\n", - "TF Data is a powerful API for creating an input pipeline for training models. Tokenizers work as expected with the API." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YSykDr1d7uxr" - }, - "outputs": [], - "source": [ - "docs = tf.data.Dataset.from_tensor_slices([['Never tell me the odds.'], [\"It's a trap!\"]])\n", - "tokenizer = tf_text.WhitespaceTokenizer()\n", - "tokenized_docs = docs.map(lambda x: tokenizer.tokenize(x))\n", - "iterator = iter(tokenized_docs)\n", - "print(next(iterator).to_list())\n", - "print(next(iterator).to_list())" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "tokenizers.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/guide/unicode.ipynb b/third_party/tensorflow-text/src/docs/guide/unicode.ipynb deleted file mode 100644 index 5c51632..0000000 --- a/third_party/tensorflow-text/src/docs/guide/unicode.ipynb +++ /dev/null
@@ -1,735 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "oL9KopJirB2g" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "SKaX3Hd3ra6C" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AXH1bmUctMld" - }, - "source": [ - "# Unicode strings\n", - "\n", - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/unicode\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/unicode.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/unicode.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/unicode.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LrHJrKYis06U" - }, - "source": [ - "## Introduction\n", - "\n", - "NLP models often handle different languages with different character sets. *Unicode* is a standard encoding system that is used to represent characters from almost all languages. Every Unicode character is encoded using a unique integer [code point](https://en.wikipedia.org/wiki/Code_point) between `0` and `0x10FFFF`. A *Unicode string* is a sequence of zero or more code points.\n", - "\n", - "This tutorial shows how to represent Unicode strings in TensorFlow and manipulate them using Unicode equivalents of standard string ops. It separates Unicode strings into tokens based on script detection." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OIKHl5Lvn4gh" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "n-LkcI-vtWNj" - }, - "source": [ - "## The `tf.string` data type\n", - "\n", - "The basic TensorFlow `tf.string` `dtype` allows you to build tensors of byte strings.\n", - "Unicode strings are [utf-8](https://en.wikipedia.org/wiki/UTF-8) encoded by default." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3yo-Qv6ntaFr" - }, - "outputs": [], - "source": [ - "tf.constant(u\"Thanks 😊\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2kA1ziG2tyCT" - }, - "source": [ - "A `tf.string` tensor treats byte strings as atomic units. This enables it to store byte strings of varying lengths. The string length is not included in the tensor dimensions.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eyINCmTztyyS" - }, - "outputs": [], - "source": [ - "tf.constant([u\"You're\", u\"welcome!\"]).shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jsMPnjb6UDJ1" - }, - "source": [ - "If you use Python to construct strings, note that [string literals](https://docs.python.org/3/reference/lexical_analysis.html) are Unicode-encoded by default." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hUFZ7B1Lk-uj" - }, - "source": [ - "## Representing Unicode\n", - "\n", - "There are two standard ways to represent a Unicode string in TensorFlow:\n", - "\n", - "* `string` scalar — where the sequence of code points is encoded using a known [character encoding](https://en.wikipedia.org/wiki/Character_encoding).\n", - "* `int32` vector — where each position contains a single code point.\n", - "\n", - "For example, the following three values all represent the Unicode string `\"语言处理\"` (which means \"language processing\" in Chinese):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cjQIkfJWvC_u" - }, - "outputs": [], - "source": [ - "# Unicode string, represented as a UTF-8 encoded string scalar.\n", - "text_utf8 = tf.constant(u\"语言处理\")\n", - "text_utf8" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yQqcUECcvF2r" - }, - "outputs": [], - "source": [ - "# Unicode string, represented as a UTF-16-BE encoded string scalar.\n", - "text_utf16be = tf.constant(u\"语言处理\".encode(\"UTF-16-BE\"))\n", - "text_utf16be" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ExdBr1t7vMuS" - }, - "outputs": [], - "source": [ - "# Unicode string, represented as a vector of Unicode code points.\n", - "text_chars = tf.constant([ord(char) for char in u\"语言处理\"])\n", - "text_chars" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B8czv4JNpBnZ" - }, - "source": [ - "### Converting between representations\n", - "\n", - "TensorFlow provides operations to convert between these different representations:\n", - "\n", - "* `tf.strings.unicode_decode`: Converts an encoded string scalar to a vector of code points.\n", - "* `tf.strings.unicode_encode`: Converts a vector of code points to an encoded string scalar.\n", - "* `tf.strings.unicode_transcode`: Converts an encoded string scalar to a different encoding." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qb-UQ_oLpAJg" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_decode(text_utf8,\n", - " input_encoding='UTF-8')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kEBUcunnp-9n" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_encode(text_chars,\n", - " output_encoding='UTF-8')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0MLhWcLZrph-" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_transcode(text_utf8,\n", - " input_encoding='UTF8',\n", - " output_encoding='UTF-16-BE')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QVeLeVohqN7I" - }, - "source": [ - "### Batch dimensions\n", - "\n", - "When decoding multiple strings, the number of characters in each string may not be equal. The return result is a [`tf.RaggedTensor`](../../guide/ragged_tensor.ipynb), where the innermost dimension length varies depending on the number of characters in each string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N2jVzPymr_Mm" - }, - "outputs": [], - "source": [ - "# A batch of Unicode strings, each represented as a UTF8-encoded string.\n", - "batch_utf8 = [s.encode('UTF-8') for s in\n", - " [u'hÃllo', u'What is the weather tomorrow', u'Göödnight', u'😊']]\n", - "batch_chars_ragged = tf.strings.unicode_decode(batch_utf8,\n", - " input_encoding='UTF-8')\n", - "for sentence_chars in batch_chars_ragged.to_list():\n", - " print(sentence_chars)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iRh3n1hPsJ9v" - }, - "source": [ - "You can use this `tf.RaggedTensor` directly, or convert it to a dense `tf.Tensor` with padding or a `tf.SparseTensor` using the methods `tf.RaggedTensor.to_tensor` and `tf.RaggedTensor.to_sparse`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yz17yeSMsUid" - }, - "outputs": [], - "source": [ - "batch_chars_padded = batch_chars_ragged.to_tensor(default_value=-1)\n", - "print(batch_chars_padded.numpy())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kBjsPQp3rhfm" - }, - "outputs": [], - "source": [ - "batch_chars_sparse = batch_chars_ragged.to_sparse()\n", - "\n", - "nrows, ncols = batch_chars_sparse.dense_shape.numpy()\n", - "elements = [['_' for i in range(ncols)] for j in range(nrows)]\n", - "for (row, col), value in zip(batch_chars_sparse.indices.numpy(), batch_chars_sparse.values.numpy()):\n", - " elements[row][col] = str(value)\n", - "# max_width = max(len(value) for row in elements for value in row)\n", - "value_lengths = []\n", - "for row in elements:\n", - " for value in row:\n", - " value_lengths.append(len(value))\n", - "max_width = max(value_lengths)\n", - "print('[%s]' % '\\n '.join(\n", - " '[%s]' % ', '.join(value.rjust(max_width) for value in row)\n", - " for row in elements))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GCCkZh-nwlbL" - }, - "source": [ - "When encoding multiple strings with the same lengths, use a `tf.Tensor` as the input." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_lP62YUAwjK9" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_encode([[99, 97, 116], [100, 111, 103], [99, 111, 119]],\n", - " output_encoding='UTF-8')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w58CMRg9tamW" - }, - "source": [ - "When encoding multiple strings with varying length, use a `tf.RaggedTensor` as the input." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "d7GtOtrltaMl" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_encode(batch_chars_ragged, output_encoding='UTF-8')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "T2Nh5Aj9xob3" - }, - "source": [ - "If you have a tensor with multiple strings in padded or sparse format, convert it first into a `tf.RaggedTensor` before calling `tf.strings.unicode_encode`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "R2bYCYl0u-Ue" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_encode(\n", - " tf.RaggedTensor.from_sparse(batch_chars_sparse),\n", - " output_encoding='UTF-8')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UlV2znh_u_zm" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_encode(\n", - " tf.RaggedTensor.from_tensor(batch_chars_padded, padding=-1),\n", - " output_encoding='UTF-8')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hQOOGkscvDpc" - }, - "source": [ - "## Unicode operations" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NkmtsA_yvMB0" - }, - "source": [ - "### Character length\n", - "\n", - "Use the `unit` parameter of the `tf.strings.length` op to indicate how character lengths should be computed. `unit` defaults to `\"BYTE\"`, but it can be set to other values, such as `\"UTF8_CHAR\"` or `\"UTF16_CHAR\"`, to determine the number of Unicode codepoints in each encoded string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1ZzMe59mvLHr" - }, - "outputs": [], - "source": [ - "# Note that the final character takes up 4 bytes in UTF8.\n", - "thanks = u'Thanks 😊'.encode('UTF-8')\n", - "num_bytes = tf.strings.length(thanks).numpy()\n", - "num_chars = tf.strings.length(thanks, unit='UTF8_CHAR').numpy()\n", - "print('{} bytes; {} UTF-8 characters'.format(num_bytes, num_chars))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fHG85gxlvVU0" - }, - "source": [ - "### Character substrings\n", - "\n", - "The `tf.strings.substr` op accepts the `unit` parameter, and uses it to determine what kind of offsets the `pos` and `len` paremeters contain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WlWRLV-4xWYq" - }, - "outputs": [], - "source": [ - "# Here, unit='BYTE' (default). Returns a single byte with len=1\n", - "tf.strings.substr(thanks, pos=7, len=1).numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JfNUVDPwxkCS" - }, - "outputs": [], - "source": [ - "# Specifying unit='UTF8_CHAR', returns a single 4 byte character in this case\n", - "print(tf.strings.substr(thanks, pos=7, len=1, unit='UTF8_CHAR').numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zJUEsVSyeIa3" - }, - "source": [ - "### Split Unicode strings\n", - "\n", - "The `tf.strings.unicode_split` operation splits unicode strings into substrings of individual characters." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dDjkh5G1ejMt" - }, - "outputs": [], - "source": [ - "tf.strings.unicode_split(thanks, 'UTF-8').numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HQqEEZEbdG9O" - }, - "source": [ - "### Byte offsets for characters\n", - "\n", - "To align the character tensor generated by `tf.strings.unicode_decode` with the original string, it's useful to know the offset for where each character begins. The method `tf.strings.unicode_decode_with_offsets` is similar to `unicode_decode`, except that it returns a second tensor containing the start offset of each character." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Cug7cmwYdowd" - }, - "outputs": [], - "source": [ - "codepoints, offsets = tf.strings.unicode_decode_with_offsets(u'🎈🎉🎊', 'UTF-8')\n", - "\n", - "for (codepoint, offset) in zip(codepoints.numpy(), offsets.numpy()):\n", - " print('At byte offset {}: codepoint {}'.format(offset, codepoint))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2ZnCNxOvx66T" - }, - "source": [ - "## Unicode scripts" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nRRHqkqNyGZ6" - }, - "source": [ - "Each Unicode code point belongs to a single collection of codepoints known as a [script](https://en.wikipedia.org/wiki/Script_%28Unicode%29) . A character's script is helpful in determining which language the character might be in. For example, knowing that 'Б' is in Cyrillic script indicates that modern text containing that character is likely from a Slavic language such as Russian or Ukrainian.\n", - "\n", - "TensorFlow provides the `tf.strings.unicode_script` operation to determine which script a given codepoint uses. The script codes are `int32` values corresponding to [International Components for\n", - "Unicode](http://site.icu-project.org/home) (ICU) [`UScriptCode`](http://icu-project.org/apiref/icu4c/uscript_8h.html) values.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "K7DeYHrRyFPy" - }, - "outputs": [], - "source": [ - "uscript = tf.strings.unicode_script([33464, 1041]) # ['芸', 'Б']\n", - "\n", - "print(uscript.numpy()) # [17, 8] == [USCRIPT_HAN, USCRIPT_CYRILLIC]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2fW992a1lIY6" - }, - "source": [ - "The `tf.strings.unicode_script` operation can also be applied to multidimensional `tf.Tensor`s or `tf.RaggedTensor`s of codepoints:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uR7b8meLlFnp" - }, - "outputs": [], - "source": [ - "print(tf.strings.unicode_script(batch_chars_ragged))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mx7HEFpBzEsB" - }, - "source": [ - "## Example: Simple segmentation\n", - "\n", - "Segmentation is the task of splitting text into word-like units. This is often easy when space characters are used to separate words, but some languages (like Chinese and Japanese) do not use spaces, and some languages (like German) contain long compounds that must be split in order to analyze their meaning. In web text, different languages and scripts are frequently mixed together, as in \"NY株価\" (New York Stock Exchange).\n", - "\n", - "We can perform very rough segmentation (without implementing any ML models) by using changes in script to approximate word boundaries. This will work for strings like the \"NY株価\" example above. It will also work for most languages that use spaces, as the space characters of various scripts are all classified as USCRIPT_COMMON, a special script code that differs from that of any actual text." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "grsvFiC4BoPb" - }, - "outputs": [], - "source": [ - "# dtype: string; shape: [num_sentences]\n", - "#\n", - "# The sentences to process. Edit this line to try out different inputs!\n", - "sentence_texts = [u'Hello, world.', u'世界こんにちは']" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CapnbShuGU8i" - }, - "source": [ - "First, decode the sentences into character codepoints, and find the script identifeir for each character." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ReQVcDQh1MB8" - }, - "outputs": [], - "source": [ - "# dtype: int32; shape: [num_sentences, (num_chars_per_sentence)]\n", - "#\n", - "# sentence_char_codepoint[i, j] is the codepoint for the j'th character in\n", - "# the i'th sentence.\n", - "sentence_char_codepoint = tf.strings.unicode_decode(sentence_texts, 'UTF-8')\n", - "print(sentence_char_codepoint)\n", - "\n", - "# dtype: int32; shape: [num_sentences, (num_chars_per_sentence)]\n", - "#\n", - "# sentence_char_scripts[i, j] is the Unicode script of the j'th character in\n", - "# the i'th sentence.\n", - "sentence_char_script = tf.strings.unicode_script(sentence_char_codepoint)\n", - "print(sentence_char_script)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "O2fapF5UGcUc" - }, - "source": [ - "Use the script identifiers to determine where word boundaries should be added. Add a word boundary at the beginning of each sentence, and for each character whose script differs from the previous character." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7v5W6MOr1Rlc" - }, - "outputs": [], - "source": [ - "# dtype: bool; shape: [num_sentences, (num_chars_per_sentence)]\n", - "#\n", - "# sentence_char_starts_word[i, j] is True if the j'th character in the i'th\n", - "# sentence is the start of a word.\n", - "sentence_char_starts_word = tf.concat(\n", - " [tf.fill([sentence_char_script.nrows(), 1], True),\n", - " tf.not_equal(sentence_char_script[:, 1:], sentence_char_script[:, :-1])],\n", - " axis=1)\n", - "\n", - "# dtype: int64; shape: [num_words]\n", - "#\n", - "# word_starts[i] is the index of the character that starts the i'th word (in\n", - "# the flattened list of characters from all sentences).\n", - "word_starts = tf.squeeze(tf.where(sentence_char_starts_word.values), axis=1)\n", - "print(word_starts)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LAwh-1QkGuC9" - }, - "source": [ - "You can then use those start offsets to build a `RaggedTensor` containing the list of words from all batches." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bNiA1O_eBBCL" - }, - "outputs": [], - "source": [ - "# dtype: int32; shape: [num_words, (num_chars_per_word)]\n", - "#\n", - "# word_char_codepoint[i, j] is the codepoint for the j'th character in the\n", - "# i'th word.\n", - "word_char_codepoint = tf.RaggedTensor.from_row_starts(\n", - " values=sentence_char_codepoint.values,\n", - " row_starts=word_starts)\n", - "print(word_char_codepoint)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "66a2ZnYmG2ao" - }, - "source": [ - "To finish, segment the word codepoints `RaggedTensor` back into sentences and encode into UTF-8 strings for readability." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NCfwcqLSEjZb" - }, - "outputs": [], - "source": [ - "# dtype: int64; shape: [num_sentences]\n", - "#\n", - "# sentence_num_words[i] is the number of words in the i'th sentence.\n", - "sentence_num_words = tf.reduce_sum(\n", - " tf.cast(sentence_char_starts_word, tf.int64),\n", - " axis=1)\n", - "\n", - "# dtype: int32; shape: [num_sentences, (num_words_per_sentence), (num_chars_per_word)]\n", - "#\n", - "# sentence_word_char_codepoint[i, j, k] is the codepoint for the k'th character\n", - "# in the j'th word in the i'th sentence.\n", - "sentence_word_char_codepoint = tf.RaggedTensor.from_row_lengths(\n", - " values=word_char_codepoint,\n", - " row_lengths=sentence_num_words)\n", - "print(sentence_word_char_codepoint)\n", - "\n", - "tf.strings.unicode_encode(sentence_word_char_codepoint, 'UTF-8').to_list()" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "oL9KopJirB2g" - ], - "name": "unicode.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/guide/word_embeddings.ipynb b/third_party/tensorflow-text/src/docs/guide/word_embeddings.ipynb deleted file mode 100644 index 51d075dc..0000000 --- a/third_party/tensorflow-text/src/docs/guide/word_embeddings.ipynb +++ /dev/null
@@ -1,738 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "IZBRUaiBBEpa" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "YS3NA-i6nAFC" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7SN5USFEIIK3" - }, - "source": [ - "# Word embeddings" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Aojnnc7sXrab" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/guide/word_embeddings\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003e\n", - " View on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/guide/word_embeddings.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e\n", - " Run in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/guide/word_embeddings.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e\n", - " View source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/guide/word_embeddings.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q6mJg1g3apaz" - }, - "source": [ - "This tutorial contains an introduction to word embeddings. You will train your own word embeddings using a simple Keras model for a sentiment classification task, and then visualize them in the [Embedding Projector](http://projector.tensorflow.org) (shown in the image below). \n", - "\n", - "\u003cimg src=\"images/embedding.jpg\" alt=\"Screenshot of the embedding projector\" width=\"400\"/\u003e\n", - "\n", - "## Representing text as numbers\n", - "\n", - "Machine learning models take vectors (arrays of numbers) as input. When working with text, the first thing you must do is come up with a strategy to convert strings to numbers (or to \"vectorize\" the text) before feeding it to the model. In this section, you will look at three strategies for doing so.\n", - "\n", - "### One-hot encodings\n", - "\n", - "As a first idea, you might \"one-hot\" encode each word in your vocabulary. Consider the sentence \"The cat sat on the mat\". The vocabulary (or unique words) in this sentence is (cat, mat, on, sat, the). To represent each word, you will create a zero vector with length equal to the vocabulary, then place a one in the index that corresponds to the word. This approach is shown in the following diagram.\n", - "\n", - "\u003cimg src=\"images/one-hot.png\" alt=\"Diagram of one-hot encodings\" width=\"400\" /\u003e\n", - "\n", - "To create a vector that contains the encoding of the sentence, you could then concatenate the one-hot vectors for each word.\n", - "\n", - "Key point: This approach is inefficient. A one-hot encoded vector is sparse (meaning, most indices are zero). Imagine you have 10,000 words in the vocabulary. To one-hot encode each word, you would create a vector where 99.99% of the elements are zero.\n", - "\n", - "### Encode each word with a unique number\n", - "\n", - "A second approach you might try is to encode each word using a unique number. Continuing the example above, you could assign 1 to \"cat\", 2 to \"mat\", and so on. You could then encode the sentence \"The cat sat on the mat\" as a dense vector like [5, 1, 4, 3, 5, 2]. This approach is efficient. Instead of a sparse vector, you now have a dense one (where all elements are full).\n", - "\n", - "There are two downsides to this approach, however:\n", - "\n", - "* The integer-encoding is arbitrary (it does not capture any relationship between words).\n", - "\n", - "* An integer-encoding can be challenging for a model to interpret. A linear classifier, for example, learns a single weight for each feature. Because there is no relationship between the similarity of any two words and the similarity of their encodings, this feature-weight combination is not meaningful.\n", - "\n", - "### Word embeddings\n", - "\n", - "Word embeddings give us a way to use an efficient, dense representation in which similar words have a similar encoding. Importantly, you do not have to specify this encoding by hand. An embedding is a dense vector of floating point values (the length of the vector is a parameter you specify). Instead of specifying the values for the embedding manually, they are trainable parameters (weights learned by the model during training, in the same way a model learns weights for a dense layer). It is common to see word embeddings that are 8-dimensional (for small datasets), up to 1024-dimensions when working with large datasets. A higher dimensional embedding can capture fine-grained relationships between words, but takes more data to learn.\n", - "\n", - "\u003cimg src=\"images/embedding2.png\" alt=\"Diagram of an embedding\" width=\"400\"/\u003e\n", - "\n", - "Above is a diagram for a word embedding. Each word is represented as a 4-dimensional vector of floating point values. Another way to think of an embedding is as \"lookup table\". After these weights have been learned, you can encode each word by looking up the dense vector it corresponds to in the table." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SZUQErGewZxE" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RutaI-Tpev3T" - }, - "outputs": [], - "source": [ - "import io\n", - "import os\n", - "import re\n", - "import shutil\n", - "import string\n", - "import tensorflow as tf\n", - "\n", - "from tensorflow.keras import Sequential\n", - "from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D\n", - "from tensorflow.keras.layers import TextVectorization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SBFctV8-JZOc" - }, - "source": [ - "### Download the IMDb Dataset\n", - "You will use the [Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/) through the tutorial. You will train a sentiment classifier model on this dataset and in the process learn embeddings from scratch. To read more about loading a dataset from scratch, see the [Loading text tutorial](https://www.tensorflow.org/tutorials/load_data/text). \n", - "\n", - "Download the dataset using Keras file utility and take a look at the directories." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aPO4_UmfF0KH" - }, - "outputs": [], - "source": [ - "url = \"https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz\"\n", - "\n", - "dataset = tf.keras.utils.get_file(\"aclImdb_v1.tar.gz\", url,\n", - " untar=True, cache_dir='.',\n", - " cache_subdir='')\n", - "\n", - "dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')\n", - "os.listdir(dataset_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eY6yROZNKvbd" - }, - "source": [ - "Take a look at the `train/` directory. It has `pos` and `neg` folders with movie reviews labelled as positive and negative respectively. You will use reviews from `pos` and `neg` folders to train a binary classification model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9-iOHJGN6SDu" - }, - "outputs": [], - "source": [ - "train_dir = os.path.join(dataset_dir, 'train')\n", - "os.listdir(train_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9O59BdioK8jY" - }, - "source": [ - "The `train` directory also has additional folders which should be removed before creating training dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1_Vfi9oWMSh-" - }, - "outputs": [], - "source": [ - "remove_dir = os.path.join(train_dir, 'unsup')\n", - "shutil.rmtree(remove_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oFoJjiEyJz9u" - }, - "source": [ - "Next, create a `tf.data.Dataset` using `tf.keras.preprocessing.text_dataset_from_directory`. You can read more about using this utility in this [text classification tutorial](https://www.tensorflow.org/tutorials/keras/text_classification). \n", - "\n", - "Use the `train` directory to create both train and validation datasets with a split of 20% for validation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ItYD3TLkCOP1" - }, - "outputs": [], - "source": [ - "batch_size = 1024\n", - "seed = 123\n", - "train_ds = tf.keras.preprocessing.text_dataset_from_directory(\n", - " 'aclImdb/train', batch_size=batch_size, validation_split=0.2,\n", - " subset='training', seed=seed)\n", - "val_ds = tf.keras.preprocessing.text_dataset_from_directory(\n", - " 'aclImdb/train', batch_size=batch_size, validation_split=0.2,\n", - " subset='validation', seed=seed)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eHa6cq0-Ym0g" - }, - "source": [ - "Take a look at a few movie reviews and their labels `(1: positive, 0: negative)` from the train dataset.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aTCbSkvkYmTT" - }, - "outputs": [], - "source": [ - "for text_batch, label_batch in train_ds.take(1):\n", - " for i in range(5):\n", - " print(label_batch[i].numpy(), text_batch.numpy()[i])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FHV2pchDhzDn" - }, - "source": [ - "### Configure the dataset for performance\n", - "\n", - "These are two important methods you should use when loading data to make sure that I/O does not become blocking.\n", - "\n", - "`.cache()` keeps data in memory after it's loaded off disk. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache, which is more efficient to read than many small files.\n", - "\n", - "`.prefetch()` overlaps data preprocessing and model execution while training. \n", - "\n", - "You can learn more about both methods, as well as how to cache data to disk in the [data performance guide](https://www.tensorflow.org/guide/data_performance)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Oz6k1IW7h1TO" - }, - "outputs": [], - "source": [ - "AUTOTUNE = tf.data.AUTOTUNE\n", - "\n", - "train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)\n", - "val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eqBazMiVQkj1" - }, - "source": [ - "## Using the Embedding layer\n", - "\n", - "Keras makes it easy to use word embeddings. Take a look at the [Embedding](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding) layer.\n", - "\n", - "The Embedding layer can be understood as a lookup table that maps from integer indices (which stand for specific words) to dense vectors (their embeddings). The dimensionality (or width) of the embedding is a parameter you can experiment with to see what works well for your problem, much in the same way you would experiment with the number of neurons in a Dense layer.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-OjxLVrMvWUE" - }, - "outputs": [], - "source": [ - "# Embed a 1,000 word vocabulary into 5 dimensions.\n", - "embedding_layer = tf.keras.layers.Embedding(1000, 5)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2dKKV1L2Rk7e" - }, - "source": [ - "When you create an Embedding layer, the weights for the embedding are randomly initialized (just like any other layer). During training, they are gradually adjusted via backpropagation. Once trained, the learned word embeddings will roughly encode similarities between words (as they were learned for the specific problem your model is trained on).\n", - "\n", - "If you pass an integer to an embedding layer, the result replaces each integer with the vector from the embedding table:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0YUjPgP7w0PO" - }, - "outputs": [], - "source": [ - "result = embedding_layer(tf.constant([1, 2, 3]))\n", - "result.numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "O4PC4QzsxTGx" - }, - "source": [ - "For text or sequence problems, the Embedding layer takes a 2D tensor of integers, of shape `(samples, sequence_length)`, where each entry is a sequence of integers. It can embed sequences of variable lengths. You could feed into the embedding layer above batches with shapes `(32, 10)` (batch of 32 sequences of length 10) or `(64, 15)` (batch of 64 sequences of length 15).\n", - "\n", - "The returned tensor has one more axis than the input, the embedding vectors are aligned along the new last axis. Pass it a `(2, 3)` input batch and the output is `(2, 3, N)`\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vwSYepRjyRGy" - }, - "outputs": [], - "source": [ - "result = embedding_layer(tf.constant([[0, 1, 2], [3, 4, 5]]))\n", - "result.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WGQp2N92yOyB" - }, - "source": [ - "When given a batch of sequences as input, an embedding layer returns a 3D floating point tensor, of shape `(samples, sequence_length, embedding_dimensionality)`. To convert from this sequence of variable length to a fixed representation there are a variety of standard approaches. You could use an RNN, Attention, or pooling layer before passing it to a Dense layer. This tutorial uses pooling because it's the simplest. The [Text Classification with an RNN](https://www.tensorflow.org/text/tutorials/text_classification_rnn) tutorial is a good next step." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aGicgV5qT0wh" - }, - "source": [ - "## Text preprocessing" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "N6NZSqIIoU0Y" - }, - "source": [ - "Next, define the dataset preprocessing steps required for your sentiment classification model. Initialize a TextVectorization layer with the desired parameters to vectorize movie reviews. You can learn more about using this layer in the [Text Classification](https://www.tensorflow.org/tutorials/keras/text_classification) tutorial." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2MlsXzo-ZlfK" - }, - "outputs": [], - "source": [ - "# Create a custom standardization function to strip HTML break tags '\u003cbr /\u003e'.\n", - "def custom_standardization(input_data):\n", - " lowercase = tf.strings.lower(input_data)\n", - " stripped_html = tf.strings.regex_replace(lowercase, '\u003cbr /\u003e', ' ')\n", - " return tf.strings.regex_replace(stripped_html,\n", - " '[%s]' % re.escape(string.punctuation), '')\n", - "\n", - "\n", - "# Vocabulary size and number of words in a sequence.\n", - "vocab_size = 10000\n", - "sequence_length = 100\n", - "\n", - "# Use the text vectorization layer to normalize, split, and map strings to\n", - "# integers. Note that the layer uses the custom standardization defined above.\n", - "# Set maximum_sequence length as all samples are not of the same length.\n", - "vectorize_layer = TextVectorization(\n", - " standardize=custom_standardization,\n", - " max_tokens=vocab_size,\n", - " output_mode='int',\n", - " output_sequence_length=sequence_length)\n", - "\n", - "# Make a text-only dataset (no labels) and call adapt to build the vocabulary.\n", - "text_ds = train_ds.map(lambda x, y: x)\n", - "vectorize_layer.adapt(text_ds)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zI9_wLIiWO8Z" - }, - "source": [ - "## Create a classification model\n", - "\n", - "Use the [Keras Sequential API](https://www.tensorflow.org/guide/keras/sequential_model) to define the sentiment classification model. In this case it is a \"Continuous bag of words\" style model.\n", - "* The [`TextVectorization`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/experimental/preprocessing/TextVectorization) layer transforms strings into vocabulary indices. You have already initialized `vectorize_layer` as a TextVectorization layer and built it's vocabulary by calling `adapt` on `text_ds`. Now vectorize_layer can be used as the first layer of your end-to-end classification model, feeding transformed strings into the Embedding layer.\n", - "* The [`Embedding`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Embedding) layer takes the integer-encoded vocabulary and looks up the embedding vector for each word-index. These vectors are learned as the model trains. The vectors add a dimension to the output array. The resulting dimensions are: `(batch, sequence, embedding)`.\n", - "\n", - "* The [`GlobalAveragePooling1D`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GlobalAveragePooling1D) layer returns a fixed-length output vector for each example by averaging over the sequence dimension. This allows the model to handle input of variable length, in the simplest way possible.\n", - "\n", - "* The fixed-length output vector is piped through a fully-connected ([`Dense`](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense)) layer with 16 hidden units.\n", - "\n", - "* The last layer is densely connected with a single output node. \n", - "\n", - "Caution: This model doesn't use masking, so the zero-padding is used as part of the input and hence the padding length may affect the output. To fix this, see the [masking and padding guide](https://www.tensorflow.org/guide/keras/masking_and_padding)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pHLcFtn5Wsqj" - }, - "outputs": [], - "source": [ - "embedding_dim=16\n", - "\n", - "model = Sequential([\n", - " vectorize_layer,\n", - " Embedding(vocab_size, embedding_dim, name=\"embedding\"),\n", - " GlobalAveragePooling1D(),\n", - " Dense(16, activation='relu'),\n", - " Dense(1)\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JjLNgKO7W2fe" - }, - "source": [ - "## Compile and train the model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jpX9etB6IOQd" - }, - "source": [ - "You will use [TensorBoard](https://www.tensorflow.org/tensorboard) to visualize metrics including loss and accuracy. Create a `tf.keras.callbacks.TensorBoard`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "W4Hg3IHFt4Px" - }, - "outputs": [], - "source": [ - "tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=\"logs\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7OrKAKAKIbuH" - }, - "source": [ - "Compile and train the model using the `Adam` optimizer and `BinaryCrossentropy` loss. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lCUgdP69Wzix" - }, - "outputs": [], - "source": [ - "model.compile(optimizer='adam',\n", - " loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", - " metrics=['accuracy'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5mQehiQyv8rP" - }, - "outputs": [], - "source": [ - "model.fit(\n", - " train_ds,\n", - " validation_data=val_ds,\n", - " epochs=15,\n", - " callbacks=[tensorboard_callback])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1wYnVedSPfmX" - }, - "source": [ - "With this approach the model reaches a validation accuracy of around 78% (note that the model is overfitting since training accuracy is higher).\n", - "\n", - "Note: Your results may be a bit different, depending on how weights were randomly initialized before training the embedding layer. \n", - "\n", - "You can look into the model summary to learn more about each layer of the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mDCgjWyq_0dc" - }, - "outputs": [], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hiQbOJZ2WBFY" - }, - "source": [ - "Visualize the model metrics in TensorBoard." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_Uanp2YH8RzU" - }, - "outputs": [], - "source": [ - "#docs_infra: no_execute\n", - "%load_ext tensorboard\n", - "%tensorboard --logdir logs" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QvURkGVpXDOy" - }, - "source": [ - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KCoA6qwqP836" - }, - "source": [ - "## Retrieve the trained word embeddings and save them to disk\n", - "\n", - "Next, retrieve the word embeddings learned during training. The embeddings are weights of the Embedding layer in the model. The weights matrix is of shape `(vocab_size, embedding_dimension)`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zp5rv01WG2YA" - }, - "source": [ - "Obtain the weights from the model using `get_layer()` and `get_weights()`. The `get_vocabulary()` function provides the vocabulary to build a metadata file with one token per line. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_Uamp1YH8RzU" - }, - "outputs": [], - "source": [ - "weights = model.get_layer('embedding').get_weights()[0]\n", - "vocab = vectorize_layer.get_vocabulary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J8MiCA77X8B8" - }, - "source": [ - "Write the weights to disk. To use the [Embedding Projector](http://projector.tensorflow.org), you will upload two files in tab separated format: a file of vectors (containing the embedding), and a file of meta data (containing the words)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VLIahl9s53XT" - }, - "outputs": [], - "source": [ - "out_v = io.open('vectors.tsv', 'w', encoding='utf-8')\n", - "out_m = io.open('metadata.tsv', 'w', encoding='utf-8')\n", - "\n", - "for index, word in enumerate(vocab):\n", - " if index == 0:\n", - " continue # skip 0, it's padding.\n", - " vec = weights[index]\n", - " out_v.write('\\t'.join([str(x) for x in vec]) + \"\\n\")\n", - " out_m.write(word + \"\\n\")\n", - "out_v.close()\n", - "out_m.close()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JQyMZWyxYjMr" - }, - "source": [ - "If you are running this tutorial in [Colaboratory](https://colab.research.google.com), you can use the following snippet to download these files to your local machine (or use the file browser, *View -\u003e Table of contents -\u003e File browser*)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lUsjQOKMIV2z" - }, - "outputs": [], - "source": [ - "try:\n", - " from google.colab import files\n", - " files.download('vectors.tsv')\n", - " files.download('metadata.tsv')\n", - "except Exception:\n", - " pass" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PXLfFA54Yz-o" - }, - "source": [ - "## Visualize the embeddings\n", - "\n", - "To visualize the embeddings, upload them to the embedding projector.\n", - "\n", - "Open the [Embedding Projector](http://projector.tensorflow.org/) (this can also run in a local TensorBoard instance).\n", - "\n", - "* Click on \"Load data\".\n", - "\n", - "* Upload the two files you created above: `vecs.tsv` and `meta.tsv`.\n", - "\n", - "The embeddings you have trained will now be displayed. You can search for words to find their closest neighbors. For example, try searching for \"beautiful\". You may see neighbors like \"wonderful\". \n", - "\n", - "Note: Experimentally, you may be able to produce more interpretable embeddings by using a simpler model. Try deleting the `Dense(16)` layer, retraining the model, and visualizing the embeddings again.\n", - "\n", - "Note: Typically, a much larger dataset is needed to train more interpretable word embeddings. This tutorial uses a small IMDb dataset for the purpose of demonstration.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wvKiEHjramNh" - }, - "source": [ - "## Next Steps" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BSgAZpwF5xF_" - }, - "source": [ - "This tutorial has shown you how to train and visualize word embeddings from scratch on a small dataset.\n", - "\n", - "* To train word embeddings using Word2Vec algorithm, try the [Word2Vec](https://www.tensorflow.org/tutorials/text/word2vec) tutorial. \n", - "\n", - "* To learn more about advanced text processing, read the [Transformer model for language understanding](https://www.tensorflow.org/text/tutorials/transformer)." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "word_embeddings.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/include/architecture-with-tf-text.png b/third_party/tensorflow-text/src/docs/include/architecture-with-tf-text.png deleted file mode 100644 index 071b2b0..0000000 --- a/third_party/tensorflow-text/src/docs/include/architecture-with-tf-text.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/include/architecture-without-tf-text.png b/third_party/tensorflow-text/src/docs/include/architecture-without-tf-text.png deleted file mode 100644 index 0aea83a..0000000 --- a/third_party/tensorflow-text/src/docs/include/architecture-without-tf-text.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/include/architecture.gif b/third_party/tensorflow-text/src/docs/include/architecture.gif deleted file mode 100644 index 911a697..0000000 --- a/third_party/tensorflow-text/src/docs/include/architecture.gif +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/include/default.css b/third_party/tensorflow-text/src/docs/include/default.css deleted file mode 100644 index 870dc14..0000000 --- a/third_party/tensorflow-text/src/docs/include/default.css +++ /dev/null
@@ -1,3 +0,0 @@ -code { - color: #188038; -}
diff --git a/third_party/tensorflow-text/src/docs/include/tftext.png b/third_party/tensorflow-text/src/docs/include/tftext.png deleted file mode 100644 index 1a99878..0000000 --- a/third_party/tensorflow-text/src/docs/include/tftext.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/BUILD b/third_party/tensorflow-text/src/docs/tutorials/BUILD deleted file mode 100644 index bec764f..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/BUILD +++ /dev/null
@@ -1,55 +0,0 @@ -load("//third_party/py/tensorflow_docs/google:tf_org.bzl", "tf_org_check_links", "tf_org_notebook_test") - -licenses(["notice"]) - -tf_org_check_links(name = "check_links") - -# We couldn't get internal notebook tests working for these: -# Usually the reason is the use of external data. -# -# * bert_glue.ipynb -# * classify_text_with_bert.ipynb -# * nmt_with_attention.ipynb -# * fine_tune_bert.ipynb -# * uncertainty_quantification_with_sngp_bert.ipynb - -tf_org_notebook_test( - name = "text_generation", - ipynb = "text_generation.ipynb", - deps = [ - "//third_party/py/matplotlib", - # numpy dep, - # tensorflow datasets dep, - ], -) - -tf_org_notebook_test( - name = "text_similarity", - ipynb = "text_similarity.ipynb", - deps = [ - "//third_party/py/tensorflow_text", - ], -) - -tf_org_notebook_test( - name = "transformer", - size = "large", - ipynb = "transformer.ipynb", - deps = [ - "//third_party/py/matplotlib", - # numpy dep, - # tensorflow datasets dep, - "//third_party/py/tensorflow_text", - ], -) - -tf_org_notebook_test( - name = "text_classification_rnn", - size = "large", - ipynb = "text_classification_rnn.ipynb", - deps = [ - "//third_party/py/matplotlib", - # numpy dep, - # tensorflow datasets dep, - ], -)
diff --git a/third_party/tensorflow-text/src/docs/tutorials/_toc.yaml b/third_party/tensorflow-text/src/docs/tutorials/_toc.yaml deleted file mode 100644 index 1db7b24..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/_toc.yaml +++ /dev/null
@@ -1,27 +0,0 @@ -toc: -- heading: Text Generation - style: divider -- title: "Generate Text with RNNs" - path: /text/tutorials/text_generation -- title: "Translate text with seq2seq models" - path: /text/tutorials/nmt_with_attention -- title: "Translate text with transformer models" - path: /text/tutorials/transformer - -- heading: Text Classification - style: divider -- title: "Text classification with BERT" - path: /text/tutorials/classify_text_with_bert -- title: "Text classification with RNNs" - path: /text/tutorials/text_classification_rnn -- title: "Compute Similarity Metrics" - path: /text/tutorials/text_similarity - -- heading: NLP with BERT - style: divider -- title: "Fine tune BERT" - path: /text/tutorials/fine_tune_bert -- title: "Fine tune BERT with GLUE" - path: /text/tutorials/bert_glue -- title: "Quantify uncertainty with BERT" - path: /text/tutorials/uncertainty_quantification_with_sngp_bert
diff --git a/third_party/tensorflow-text/src/docs/tutorials/bert_glue.ipynb b/third_party/tensorflow-text/src/docs/tutorials/bert_glue.ipynb deleted file mode 100644 index 30a2fc6..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/bert_glue.ipynb +++ /dev/null
@@ -1,1150 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "58MWWgq75lMh" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Hub Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "jM3hCI1UUzar" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4_NEJlxKKjyI" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/bert_glue\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/bert_glue.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/bert_glue.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/bert_glue.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/google/collections/bert/1\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U5POcTVNB_dv" - }, - "source": [ - "# Solve GLUE tasks using BERT on TPU\n", - "\n", - "BERT can be used to solve many problems in natural language processing. You will learn how to fine-tune BERT for many tasks from the [GLUE benchmark](https://gluebenchmark.com/):\n", - "\n", - "1. [CoLA](https://nyu-mll.github.io/CoLA/) (Corpus of Linguistic Acceptability): Is the sentence grammatically correct?\n", - "\n", - "1. [SST-2](https://nlp.stanford.edu/sentiment/index.html) (Stanford Sentiment Treebank): The task is to predict the sentiment of a given sentence.\n", - "\n", - "1. [MRPC](https://www.microsoft.com/en-us/download/details.aspx?id=52398) (Microsoft Research Paraphrase Corpus): Determine whether a pair of sentences are semantically equivalent.\n", - "\n", - "1. [QQP](https://data.quora.com/First-Quora-Dataset-Release-Question-Pairs) (Quora Question Pairs2): Determine whether a pair of questions are semantically equivalent.\n", - "\n", - "1. [MNLI](http://www.nyu.edu/projects/bowman/multinli/) (Multi-Genre Natural Language Inference): Given a premise sentence and a hypothesis sentence, the task is to predict whether the premise entails the hypothesis (entailment), contradicts the hypothesis (contradiction), or neither (neutral).\n", - "\n", - "1. [QNLI](https://rajpurkar.github.io/SQuAD-explorer/)(Question-answering Natural Language Inference): The task is to determine whether the context sentence contains the answer to the question.\n", - "\n", - "1. [RTE](https://aclweb.org/aclwiki/Recognizing_Textual_Entailment)(Recognizing Textual Entailment): Determine if a sentence entails a given hypothesis or not.\n", - "\n", - "1. [WNLI](https://cs.nyu.edu/faculty/davise/papers/WinogradSchemas/WS.html)(Winograd Natural Language Inference): The task is to predict if the sentence with the pronoun substituted is entailed by the original sentence.\n", - "\n", - "This tutorial contains complete end-to-end code to train these models on a TPU. You can also run this notebook on a GPU, by changing one line (described below).\n", - "\n", - "In this notebook, you will:\n", - "\n", - "- Load a BERT model from TensorFlow Hub\n", - "- Choose one of GLUE tasks and download the dataset\n", - "- Preprocess the text\n", - "- Fine-tune BERT (examples are given for single-sentence and multi-sentence datasets)\n", - "- Save the trained model and use it\n", - "\n", - "Key point: The model you develop will be end-to-end. The preprocessing logic will be included in the model itself, making it capable of accepting raw strings as input.\n", - "\n", - "Note: This notebook should be run using a TPU. In Colab, choose **Runtime -\u003e Change runtime type** and verify that a **TPU** is selected.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SCjmX4zTCkRK" - }, - "source": [ - "## Setup\n", - "\n", - "You will use a separate model to preprocess text before using it to fine-tune BERT. This model depends on [tensorflow/text](https://github.com/tensorflow/text), which you will install below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rE2dA1XO9clA" - }, - "outputs": [], - "source": [ - "!pip install -q -U tensorflow-text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WMaudPO1a2Hx" - }, - "source": [ - "You will use the AdamW optimizer from [tensorflow/models](https://github.com/tensorflow/models) to fine-tune BERT, which you will install as well." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5zwJyopqa3uH" - }, - "outputs": [], - "source": [ - "!pip install -q -U tf-models-official" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Kx_Acxvo1nje" - }, - "outputs": [], - "source": [ - "!pip install -U tfds-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_XgTpm9ZxoN9" - }, - "outputs": [], - "source": [ - "import os\n", - "import tensorflow as tf\n", - "import tensorflow_hub as hub\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow_text as text # A dependency of the preprocessing model\n", - "import tensorflow_addons as tfa\n", - "from official.nlp import optimization\n", - "import numpy as np\n", - "\n", - "tf.get_logger().setLevel('ERROR')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sv7A19G32Kfw" - }, - "source": [ - "Next, configure TFHub to read checkpoints directly from TFHub's Cloud Storage buckets. This is only recommended when running TFHub models on TPU.\n", - "\n", - "Without this setting TFHub would download the compressed file and extract the checkpoint locally. Attempting to load from these local files will fail with the following error:\n", - "\n", - "```\n", - "InvalidArgumentError: Unimplemented: File system scheme '[local]' not implemented\n", - "```\n", - "\n", - "This is because the [TPU can only read directly from Cloud Storage buckets](https://cloud.google.com/tpu/docs/troubleshooting#cannot_use_local_filesystem).\n", - "\n", - "Note: This setting is automatic in Colab." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Sz6P5pK3ldxQ" - }, - "outputs": [], - "source": [ - "os.environ[\"TFHUB_MODEL_LOAD_FORMAT\"]=\"UNCOMPRESSED\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "__Uqe2vNETAu" - }, - "source": [ - "### Connect to the TPU worker\n", - "\n", - "The following code connects to the TPU worker and changes TensorFlow's default device to the CPU device on the TPU worker. It also defines a TPU distribution strategy that you will use to distribute model training onto the 8 separate TPU cores available on this one TPU worker. See TensorFlow's [TPU guide](https://www.tensorflow.org/guide/tpu) for more information." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cpHWNs1nV0Zn" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "if os.environ['COLAB_TPU_ADDR']:\n", - " cluster_resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n", - " tf.config.experimental_connect_to_cluster(cluster_resolver)\n", - " tf.tpu.experimental.initialize_tpu_system(cluster_resolver)\n", - " strategy = tf.distribute.TPUStrategy(cluster_resolver)\n", - " print('Using TPU')\n", - "elif tf.config.list_physical_devices('GPU'):\n", - " strategy = tf.distribute.MirroredStrategy()\n", - " print('Using GPU')\n", - "else:\n", - " raise ValueError('Running on CPU is not recommended.')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UVtEyxDFpKE1" - }, - "source": [ - "## Loading models from TensorFlow Hub\n", - "\n", - "Here you can choose which BERT model you will load from TensorFlow Hub and fine-tune.\n", - "There are multiple BERT models available to choose from.\n", - "\n", - " - [BERT-Base](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3), [Uncased](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3) and [seven more models](https://tfhub.dev/google/collections/bert/1) with trained weights released by the original BERT authors.\n", - " - [Small BERTs](https://tfhub.dev/google/collections/bert/1) have the same general architecture but fewer and/or smaller Transformer blocks, which lets you explore tradeoffs between speed, size and quality.\n", - " - [ALBERT](https://tfhub.dev/google/collections/albert/1): four different sizes of \"A Lite BERT\" that reduces model size (but not computation time) by sharing parameters between layers.\n", - " - [BERT Experts](https://tfhub.dev/google/collections/experts/bert/1): eight models that all have the BERT-base architecture but offer a choice between different pre-training domains, to align more closely with the target task.\n", - " - [Electra](https://tfhub.dev/google/collections/electra/1) has the same architecture as BERT (in three different sizes), but gets pre-trained as a discriminator in a set-up that resembles a Generative Adversarial Network (GAN).\n", - " - BERT with Talking-Heads Attention and Gated GELU [[base](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1), [large](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_large/1)] has two improvements to the core of the Transformer architecture.\n", - "\n", - "See the model documentation linked above for more details.\n", - "\n", - "In this tutorial, you will start with BERT-base. You can use larger and more recent models for higher accuracy, or smaller models for faster training times. To change the model, you only need to switch a single line of code (shown below). All the differences are encapsulated in the SavedModel you will download from TensorFlow Hub." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "y8_ctG55-uTX" - }, - "outputs": [], - "source": [ - "#@title Choose a BERT model to fine-tune\n", - "\n", - "bert_model_name = 'bert_en_uncased_L-12_H-768_A-12' #@param [\"bert_en_uncased_L-12_H-768_A-12\", \"bert_en_uncased_L-24_H-1024_A-16\", \"bert_en_wwm_uncased_L-24_H-1024_A-16\", \"bert_en_cased_L-12_H-768_A-12\", \"bert_en_cased_L-24_H-1024_A-16\", \"bert_en_wwm_cased_L-24_H-1024_A-16\", \"bert_multi_cased_L-12_H-768_A-12\", \"small_bert/bert_en_uncased_L-2_H-128_A-2\", \"small_bert/bert_en_uncased_L-2_H-256_A-4\", \"small_bert/bert_en_uncased_L-2_H-512_A-8\", \"small_bert/bert_en_uncased_L-2_H-768_A-12\", \"small_bert/bert_en_uncased_L-4_H-128_A-2\", \"small_bert/bert_en_uncased_L-4_H-256_A-4\", \"small_bert/bert_en_uncased_L-4_H-512_A-8\", \"small_bert/bert_en_uncased_L-4_H-768_A-12\", \"small_bert/bert_en_uncased_L-6_H-128_A-2\", \"small_bert/bert_en_uncased_L-6_H-256_A-4\", \"small_bert/bert_en_uncased_L-6_H-512_A-8\", \"small_bert/bert_en_uncased_L-6_H-768_A-12\", \"small_bert/bert_en_uncased_L-8_H-128_A-2\", \"small_bert/bert_en_uncased_L-8_H-256_A-4\", \"small_bert/bert_en_uncased_L-8_H-512_A-8\", \"small_bert/bert_en_uncased_L-8_H-768_A-12\", \"small_bert/bert_en_uncased_L-10_H-128_A-2\", \"small_bert/bert_en_uncased_L-10_H-256_A-4\", \"small_bert/bert_en_uncased_L-10_H-512_A-8\", \"small_bert/bert_en_uncased_L-10_H-768_A-12\", \"small_bert/bert_en_uncased_L-12_H-128_A-2\", \"small_bert/bert_en_uncased_L-12_H-256_A-4\", \"small_bert/bert_en_uncased_L-12_H-512_A-8\", \"small_bert/bert_en_uncased_L-12_H-768_A-12\", \"albert_en_base\", \"albert_en_large\", \"albert_en_xlarge\", \"albert_en_xxlarge\", \"electra_small\", \"electra_base\", \"experts_pubmed\", \"experts_wiki_books\", \"talking-heads_base\", \"talking-heads_large\"]\n", - "\n", - "map_name_to_handle = {\n", - " 'bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',\n", - " 'bert_en_uncased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/3',\n", - " 'bert_en_wwm_uncased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_wwm_uncased_L-24_H-1024_A-16/3',\n", - " 'bert_en_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',\n", - " 'bert_en_cased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_L-24_H-1024_A-16/3',\n", - " 'bert_en_wwm_cased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_wwm_cased_L-24_H-1024_A-16/3',\n", - " 'bert_multi_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',\n", - " 'albert_en_base':\n", - " 'https://tfhub.dev/tensorflow/albert_en_base/2',\n", - " 'albert_en_large':\n", - " 'https://tfhub.dev/tensorflow/albert_en_large/2',\n", - " 'albert_en_xlarge':\n", - " 'https://tfhub.dev/tensorflow/albert_en_xlarge/2',\n", - " 'albert_en_xxlarge':\n", - " 'https://tfhub.dev/tensorflow/albert_en_xxlarge/2',\n", - " 'electra_small':\n", - " 'https://tfhub.dev/google/electra_small/2',\n", - " 'electra_base':\n", - " 'https://tfhub.dev/google/electra_base/2',\n", - " 'experts_pubmed':\n", - " 'https://tfhub.dev/google/experts/bert/pubmed/2',\n", - " 'experts_wiki_books':\n", - " 'https://tfhub.dev/google/experts/bert/wiki_books/2',\n", - " 'talking-heads_base':\n", - " 'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',\n", - " 'talking-heads_large':\n", - " 'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_large/1',\n", - "}\n", - "\n", - "map_model_to_preprocess = {\n", - " 'bert_en_uncased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'bert_en_wwm_cased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',\n", - " 'bert_en_cased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',\n", - " 'bert_en_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',\n", - " 'bert_en_wwm_uncased_L-24_H-1024_A-16':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'bert_multi_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',\n", - " 'albert_en_base':\n", - " 'https://tfhub.dev/tensorflow/albert_en_preprocess/3',\n", - " 'albert_en_large':\n", - " 'https://tfhub.dev/tensorflow/albert_en_preprocess/3',\n", - " 'albert_en_xlarge':\n", - " 'https://tfhub.dev/tensorflow/albert_en_preprocess/3',\n", - " 'albert_en_xxlarge':\n", - " 'https://tfhub.dev/tensorflow/albert_en_preprocess/3',\n", - " 'electra_small':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'electra_base':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'experts_pubmed':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'experts_wiki_books':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'talking-heads_base':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'talking-heads_large':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - "}\n", - "\n", - "tfhub_handle_encoder = map_name_to_handle[bert_model_name]\n", - "tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]\n", - "\n", - "print('BERT model selected :', tfhub_handle_encoder)\n", - "print('Preprocessing model auto-selected:', tfhub_handle_preprocess)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7WrcxxTRDdHi" - }, - "source": [ - "## Preprocess the text\n", - "\n", - "On the [Classify text with BERT colab](https://www.tensorflow.org/text/tutorials/classify_text_with_bert) the preprocessing model is used directly embedded with the BERT encoder.\n", - "\n", - "This tutorial demonstrates how to do preprocessing as part of your input pipeline for training, using Dataset.map, and then merge it into the model that gets exported for inference. That way, both training and inference can work from raw text inputs, although the TPU itself requires numeric inputs.\n", - "\n", - "TPU requirements aside, it can help performance have preprocessing done asynchronously in an input pipeline (you can learn more in the [tf.data performance guide](https://www.tensorflow.org/guide/data_performance)).\n", - "\n", - "This tutorial also demonstrates how to build multi-input models, and how to adjust the sequence length of the inputs to BERT.\n", - "\n", - "Let's demonstrate the preprocessing model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-ePjboKOPmv4" - }, - "outputs": [], - "source": [ - "bert_preprocess = hub.load(tfhub_handle_preprocess)\n", - "tok = bert_preprocess.tokenize(tf.constant(['Hello TensorFlow!']))\n", - "print(tok)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tRMCuruaQb5X" - }, - "source": [ - "Each preprocessing model also provides a method, `.bert_pack_inputs(tensors, seq_length)`, which takes a list of tokens (like `tok` above) and a sequence length argument. This packs the inputs to create a dictionary of tensors in the format expected by the BERT model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lraoc4csP0lY" - }, - "outputs": [], - "source": [ - "text_preprocessed = bert_preprocess.bert_pack_inputs([tok, tok], tf.constant(20))\n", - "\n", - "print('Shape Word Ids : ', text_preprocessed['input_word_ids'].shape)\n", - "print('Word Ids : ', text_preprocessed['input_word_ids'][0, :16])\n", - "print('Shape Mask : ', text_preprocessed['input_mask'].shape)\n", - "print('Input Mask : ', text_preprocessed['input_mask'][0, :16])\n", - "print('Shape Type Ids : ', text_preprocessed['input_type_ids'].shape)\n", - "print('Type Ids : ', text_preprocessed['input_type_ids'][0, :16])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KyBsEhoa0_7r" - }, - "source": [ - "Here are some details to pay attention to:\n", - "- `input_mask` The mask allows the model to cleanly differentiate between the content and the padding. The mask has the same shape as the `input_word_ids`, and contains a 1 anywhere the `input_word_ids` is not padding.\n", - "- `input_type_ids` has the same shape as `input_mask`, but inside the non-padded region, contains a 0 or a 1 indicating which sentence the token is a part of." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "H63KFuKcRwjO" - }, - "source": [ - "Next, you will create a preprocessing model that encapsulates all this logic. Your model will take strings as input, and return appropriately formatted objects which can be passed to BERT.\n", - "\n", - "Each BERT model has a specific preprocessing model, make sure to use the proper one described on the BERT's model documentation.\n", - "\n", - "Note: BERT adds a \"position embedding\" to the token embedding of each input, and these come from a fixed-size lookup table. That imposes a max seq length of 512 (which is also a practical limit, due to the quadratic growth of attention computation). For this Colab 128 is good enough." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KeHEYKXGqjAZ" - }, - "outputs": [], - "source": [ - "def make_bert_preprocess_model(sentence_features, seq_length=128):\n", - " \"\"\"Returns Model mapping string features to BERT inputs.\n", - "\n", - " Args:\n", - " sentence_features: a list with the names of string-valued features.\n", - " seq_length: an integer that defines the sequence length of BERT inputs.\n", - "\n", - " Returns:\n", - " A Keras Model that can be called on a list or dict of string Tensors\n", - " (with the order or names, resp., given by sentence_features) and\n", - " returns a dict of tensors for input to BERT.\n", - " \"\"\"\n", - "\n", - " input_segments = [\n", - " tf.keras.layers.Input(shape=(), dtype=tf.string, name=ft)\n", - " for ft in sentence_features]\n", - "\n", - " # Tokenize the text to word pieces.\n", - " bert_preprocess = hub.load(tfhub_handle_preprocess)\n", - " tokenizer = hub.KerasLayer(bert_preprocess.tokenize, name='tokenizer')\n", - " segments = [tokenizer(s) for s in input_segments]\n", - "\n", - " # Optional: Trim segments in a smart way to fit seq_length.\n", - " # Simple cases (like this example) can skip this step and let\n", - " # the next step apply a default truncation to approximately equal lengths.\n", - " truncated_segments = segments\n", - "\n", - " # Pack inputs. The details (start/end token ids, dict of output tensors)\n", - " # are model-dependent, so this gets loaded from the SavedModel.\n", - " packer = hub.KerasLayer(bert_preprocess.bert_pack_inputs,\n", - " arguments=dict(seq_length=seq_length),\n", - " name='packer')\n", - " model_inputs = packer(truncated_segments)\n", - " return tf.keras.Model(input_segments, model_inputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kk5SS1bStmfP" - }, - "source": [ - "Let's demonstrate the preprocessing model. You will create a test with two sentences input (input1 and input2). The output is what a BERT model would expect as input: `input_word_ids`, `input_masks` and `input_type_ids`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BehJu3wLtAg-" - }, - "outputs": [], - "source": [ - "test_preprocess_model = make_bert_preprocess_model(['my_input1', 'my_input2'])\n", - "test_text = [np.array(['some random test sentence']),\n", - " np.array(['another sentence'])]\n", - "text_preprocessed = test_preprocess_model(test_text)\n", - "\n", - "print('Keys : ', list(text_preprocessed.keys()))\n", - "print('Shape Word Ids : ', text_preprocessed['input_word_ids'].shape)\n", - "print('Word Ids : ', text_preprocessed['input_word_ids'][0, :16])\n", - "print('Shape Mask : ', text_preprocessed['input_mask'].shape)\n", - "print('Input Mask : ', text_preprocessed['input_mask'][0, :16])\n", - "print('Shape Type Ids : ', text_preprocessed['input_type_ids'].shape)\n", - "print('Type Ids : ', text_preprocessed['input_type_ids'][0, :16])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qXU6bQWmNfhp" - }, - "source": [ - "Let's take a look at the model's structure, paying attention to the two inputs you just defined." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a2_XrcVPFiz_" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(test_preprocess_model, show_shapes=True, show_dtype=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GRVVol5G9i0b" - }, - "source": [ - "To apply the preprocessing in all the inputs from the dataset, you will use the `map` function from the dataset. The result is then cached for [performance](https://www.tensorflow.org/guide/data_performance#top_of_page)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1zhR-SVwx4_J" - }, - "outputs": [], - "source": [ - "AUTOTUNE = tf.data.AUTOTUNE\n", - "\n", - "\n", - "def load_dataset_from_tfds(in_memory_ds, info, split, batch_size,\n", - " bert_preprocess_model):\n", - " is_training = split.startswith('train')\n", - " dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[split])\n", - " num_examples = info.splits[split].num_examples\n", - "\n", - " if is_training:\n", - " dataset = dataset.shuffle(num_examples)\n", - " dataset = dataset.repeat()\n", - " dataset = dataset.batch(batch_size)\n", - " dataset = dataset.map(lambda ex: (bert_preprocess_model(ex), ex['label']))\n", - " dataset = dataset.cache().prefetch(buffer_size=AUTOTUNE)\n", - " return dataset, num_examples" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pDNKfAXbDnJH" - }, - "source": [ - "## Define your model\n", - "\n", - "You are now ready to define your model for sentence or sentence pair classification by feeding the preprocessed inputs through the BERT encoder and putting a linear classifier on top (or other arrangement of layers as you prefer), and using dropout for regularization." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aksj743St9ga" - }, - "outputs": [], - "source": [ - "def build_classifier_model(num_classes):\n", - "\n", - " class Classifier(tf.keras.Model):\n", - " def __init__(self, num_classes):\n", - " super(Classifier, self).__init__(name=\"prediction\")\n", - " self.encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True)\n", - " self.dropout = tf.keras.layers.Dropout(0.1)\n", - " self.dense = tf.keras.layers.Dense(num_classes)\n", - "\n", - " def call(self, preprocessed_text):\n", - " encoder_outputs = self.encoder(preprocessed_text)\n", - " pooled_output = encoder_outputs[\"pooled_output\"]\n", - " x = self.dropout(pooled_output)\n", - " x = self.dense(x)\n", - " return x\n", - "\n", - " model = Classifier(num_classes)\n", - " return model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TTa5VZssizDm" - }, - "source": [ - "Let's try running the model on some preprocessed inputs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "e6mJ3WXhwUa8" - }, - "outputs": [], - "source": [ - "test_classifier_model = build_classifier_model(2)\n", - "bert_raw_result = test_classifier_model(text_preprocessed)\n", - "print(tf.sigmoid(bert_raw_result))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1s0xpHS-XQcP" - }, - "source": [ - "## Choose a task from GLUE\n", - "\n", - "You are going to use a TensorFlow DataSet from the [GLUE](https://www.tensorflow.org/datasets/catalog/glue) benchmark suite.\n", - "\n", - "Colab lets you download these small datasets to the local filesystem, and the code below reads them entirely into memory, because the separate TPU worker host cannot access the local filesystem of the colab runtime.\n", - "\n", - "For bigger datasets, you'll need to create your own [Google Cloud Storage](https://cloud.google.com/storage) bucket and have the TPU worker read the data from there. You can learn more in the [TPU guide](https://www.tensorflow.org/guide/tpu#input_datasets).\n", - "\n", - "It's recommended to start with the CoLa dataset (for single sentence) or MRPC (for multi sentence) since these are small and don't take long to fine tune." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "RhL__V2mwRNH" - }, - "outputs": [], - "source": [ - "tfds_name = 'glue/cola' #@param ['glue/cola', 'glue/sst2', 'glue/mrpc', 'glue/qqp', 'glue/mnli', 'glue/qnli', 'glue/rte', 'glue/wnli']\n", - "\n", - "tfds_info = tfds.builder(tfds_name).info\n", - "\n", - "sentence_features = list(tfds_info.features.keys())\n", - "sentence_features.remove('idx')\n", - "sentence_features.remove('label')\n", - "\n", - "available_splits = list(tfds_info.splits.keys())\n", - "train_split = 'train'\n", - "validation_split = 'validation'\n", - "test_split = 'test'\n", - "if tfds_name == 'glue/mnli':\n", - " validation_split = 'validation_matched'\n", - " test_split = 'test_matched'\n", - "\n", - "num_classes = tfds_info.features['label'].num_classes\n", - "num_examples = tfds_info.splits.total_num_examples\n", - "\n", - "print(f'Using {tfds_name} from TFDS')\n", - "print(f'This dataset has {num_examples} examples')\n", - "print(f'Number of classes: {num_classes}')\n", - "print(f'Features {sentence_features}')\n", - "print(f'Splits {available_splits}')\n", - "\n", - "with tf.device('/job:localhost'):\n", - " # batch_size=-1 is a way to load the dataset into memory\n", - " in_memory_ds = tfds.load(tfds_name, batch_size=-1, shuffle_files=True)\n", - "\n", - "# The code below is just to show some samples from the selected dataset\n", - "print(f'Here are some sample rows from {tfds_name} dataset')\n", - "sample_dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[train_split])\n", - "\n", - "labels_names = tfds_info.features['label'].names\n", - "print(labels_names)\n", - "print()\n", - "\n", - "sample_i = 1\n", - "for sample_row in sample_dataset.take(5):\n", - " samples = [sample_row[feature] for feature in sentence_features]\n", - " print(f'sample row {sample_i}')\n", - " for sample in samples:\n", - " print(sample.numpy())\n", - " sample_label = sample_row['label']\n", - "\n", - " print(f'label: {sample_label} ({labels_names[sample_label]})')\n", - " print()\n", - " sample_i += 1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lFhjoYtsoVNF" - }, - "source": [ - "The dataset also determines the problem type (classification or regression) and the appropriate loss function for training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OWPOZE-L3AgE" - }, - "outputs": [], - "source": [ - "def get_configuration(glue_task):\n", - "\n", - " loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - " if glue_task == 'glue/cola':\n", - " metrics = tfa.metrics.MatthewsCorrelationCoefficient(num_classes=2)\n", - " else:\n", - " metrics = tf.keras.metrics.SparseCategoricalAccuracy(\n", - " 'accuracy', dtype=tf.float32)\n", - "\n", - " return metrics, loss" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EgJoTvo2DsWq" - }, - "source": [ - "## Train your model\n", - "\n", - "Finally, you can train the model end-to-end on the dataset you chose.\n", - "\n", - "### Distribution\n", - "\n", - "Recall the set-up code at the top, which has connected the colab runtime to\n", - "a TPU worker with multiple TPU devices. To distribute training onto them, you will create and compile your main Keras model within the scope of the TPU distribution strategy. (For details, see [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras).)\n", - "\n", - "Preprocessing, on the other hand, runs on the CPU of the worker host, not the TPUs, so the Keras model for preprocessing as well as the training and validation datasets mapped with it are built outside the distribution strategy scope. The call to `Model.fit()` will take care of distributing the passed-in dataset to the model replicas.\n", - "\n", - "Note: The single TPU worker host already has the resource objects (think: a lookup table) needed for tokenization. Scaling up to multiple workers requires use of `Strategy.experimental_distribute_datasets_from_function` with a function that loads the preprocessing model separately onto each worker.\n", - "\n", - "### Optimizer\n", - "\n", - "Fine-tuning follows the optimizer set-up from BERT pre-training (as in [Classify text with BERT](https://www.tensorflow.org/text/tutorials/classify_text_with_bert)): It uses the AdamW optimizer with a linear decay of a notional initial learning rate, prefixed with a linear warm-up phase over the first 10% of training steps (`num_warmup_steps`). In line with the BERT paper, the initial learning rate is smaller for fine-tuning (best of 5e-5, 3e-5, 2e-5)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AiU5_ioh_fEr" - }, - "outputs": [], - "source": [ - "epochs = 3\n", - "batch_size = 32\n", - "init_lr = 2e-5\n", - "\n", - "print(f'Fine tuning {tfhub_handle_encoder} model')\n", - "bert_preprocess_model = make_bert_preprocess_model(sentence_features)\n", - "\n", - "with strategy.scope():\n", - "\n", - " # metric have to be created inside the strategy scope\n", - " metrics, loss = get_configuration(tfds_name)\n", - "\n", - " train_dataset, train_data_size = load_dataset_from_tfds(\n", - " in_memory_ds, tfds_info, train_split, batch_size, bert_preprocess_model)\n", - " steps_per_epoch = train_data_size // batch_size\n", - " num_train_steps = steps_per_epoch * epochs\n", - " num_warmup_steps = num_train_steps // 10\n", - "\n", - " validation_dataset, validation_data_size = load_dataset_from_tfds(\n", - " in_memory_ds, tfds_info, validation_split, batch_size,\n", - " bert_preprocess_model)\n", - " validation_steps = validation_data_size // batch_size\n", - "\n", - " classifier_model = build_classifier_model(num_classes)\n", - "\n", - " optimizer = optimization.create_optimizer(\n", - " init_lr=init_lr,\n", - " num_train_steps=num_train_steps,\n", - " num_warmup_steps=num_warmup_steps,\n", - " optimizer_type='adamw')\n", - "\n", - " classifier_model.compile(optimizer=optimizer, loss=loss, metrics=[metrics])\n", - "\n", - " classifier_model.fit(\n", - " x=train_dataset,\n", - " validation_data=validation_dataset,\n", - " steps_per_epoch=steps_per_epoch,\n", - " epochs=epochs,\n", - " validation_steps=validation_steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Rtn7jewb6dg4" - }, - "source": [ - "## Export for inference\n", - "\n", - "You will create a final model that has the preprocessing part and the fine-tuned BERT we've just created.\n", - "\n", - "At inference time, preprocessing needs to be part of the model (because there is no longer a separate input queue as for training data that does it). Preprocessing is not just computation; it has its own resources (the vocab table) that must be attached to the Keras Model that is saved for export.\n", - "This final assembly is what will be saved.\n", - "\n", - "You are going to save the model on colab and later you can download to keep it for the future (**View -\u003e Table of contents -\u003e Files**).\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ShcvqJAgVera" - }, - "outputs": [], - "source": [ - "main_save_path = './my_models'\n", - "bert_type = tfhub_handle_encoder.split('/')[-2]\n", - "saved_model_name = f'{tfds_name.replace(\"/\", \"_\")}_{bert_type}'\n", - "\n", - "saved_model_path = os.path.join(main_save_path, saved_model_name)\n", - "\n", - "preprocess_inputs = bert_preprocess_model.inputs\n", - "bert_encoder_inputs = bert_preprocess_model(preprocess_inputs)\n", - "bert_outputs = classifier_model(bert_encoder_inputs)\n", - "model_for_export = tf.keras.Model(preprocess_inputs, bert_outputs)\n", - "\n", - "print('Saving', saved_model_path)\n", - "\n", - "# Save everything on the Colab host (even the variables from TPU memory)\n", - "save_options = tf.saved_model.SaveOptions(experimental_io_device='/job:localhost')\n", - "model_for_export.save(saved_model_path, include_optimizer=False,\n", - " options=save_options)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C2qyM9Q9z12v" - }, - "source": [ - "## Test the model\n", - "\n", - "The final step is testing the results of your exported model.\n", - "\n", - "Just to make some comparison, let's reload the model and test it using some inputs from the test split from the dataset.\n", - "\n", - "Note: The test is done on the colab host, not the TPU worker that it has connected to, so it appears below with explicit device placements. You can omit those when loading the SavedModel elsewhere." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BhI0_W0kbXji" - }, - "outputs": [], - "source": [ - "with tf.device('/job:localhost'):\n", - " reloaded_model = tf.saved_model.load(saved_model_path)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "4yl-CEcDDXzX" - }, - "outputs": [], - "source": [ - "#@title Utility methods\n", - "\n", - "def prepare(record):\n", - " model_inputs = [[record[ft]] for ft in sentence_features]\n", - " return model_inputs\n", - "\n", - "\n", - "def prepare_serving(record):\n", - " model_inputs = {ft: record[ft] for ft in sentence_features}\n", - " return model_inputs\n", - "\n", - "\n", - "def print_bert_results(test, bert_result, dataset_name):\n", - "\n", - " bert_result_class = tf.argmax(bert_result, axis=1)[0]\n", - "\n", - " if dataset_name == 'glue/cola':\n", - " print('sentence:', test[0].numpy())\n", - " if bert_result_class == 1:\n", - " print('This sentence is acceptable')\n", - " else:\n", - " print('This sentence is unacceptable')\n", - "\n", - " elif dataset_name == 'glue/sst2':\n", - " print('sentence:', test[0])\n", - " if bert_result_class == 1:\n", - " print('This sentence has POSITIVE sentiment')\n", - " else:\n", - " print('This sentence has NEGATIVE sentiment')\n", - "\n", - " elif dataset_name == 'glue/mrpc':\n", - " print('sentence1:', test[0])\n", - " print('sentence2:', test[1])\n", - " if bert_result_class == 1:\n", - " print('Are a paraphrase')\n", - " else:\n", - " print('Are NOT a paraphrase')\n", - "\n", - " elif dataset_name == 'glue/qqp':\n", - " print('question1:', test[0])\n", - " print('question2:', test[1])\n", - " if bert_result_class == 1:\n", - " print('Questions are similar')\n", - " else:\n", - " print('Questions are NOT similar')\n", - "\n", - " elif dataset_name == 'glue/mnli':\n", - " print('premise :', test[0])\n", - " print('hypothesis:', test[1])\n", - " if bert_result_class == 1:\n", - " print('This premise is NEUTRAL to the hypothesis')\n", - " elif bert_result_class == 2:\n", - " print('This premise CONTRADICTS the hypothesis')\n", - " else:\n", - " print('This premise ENTAILS the hypothesis')\n", - "\n", - " elif dataset_name == 'glue/qnli':\n", - " print('question:', test[0])\n", - " print('sentence:', test[1])\n", - " if bert_result_class == 1:\n", - " print('The question is NOT answerable by the sentence')\n", - " else:\n", - " print('The question is answerable by the sentence')\n", - "\n", - " elif dataset_name == 'glue/rte':\n", - " print('sentence1:', test[0])\n", - " print('sentence2:', test[1])\n", - " if bert_result_class == 1:\n", - " print('Sentence1 DOES NOT entails sentence2')\n", - " else:\n", - " print('Sentence1 entails sentence2')\n", - "\n", - " elif dataset_name == 'glue/wnli':\n", - " print('sentence1:', test[0])\n", - " print('sentence2:', test[1])\n", - " if bert_result_class == 1:\n", - " print('Sentence1 DOES NOT entails sentence2')\n", - " else:\n", - " print('Sentence1 entails sentence2')\n", - "\n", - " print('BERT raw results:', bert_result[0])\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "12VA4BcKuR7n" - }, - "source": [ - "### Test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dt-O94gcwbIi" - }, - "outputs": [], - "source": [ - "with tf.device('/job:localhost'):\n", - " test_dataset = tf.data.Dataset.from_tensor_slices(in_memory_ds[test_split])\n", - " for test_row in test_dataset.shuffle(1000).map(prepare).take(5):\n", - " if len(sentence_features) == 1:\n", - " result = reloaded_model(test_row[0])\n", - " else:\n", - " result = reloaded_model(list(test_row))\n", - "\n", - " print_bert_results(test_row, result, tfds_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3cOmih754Y_M" - }, - "source": [ - "If you want to use your model on [TF Serving](https://www.tensorflow.org/tfx/guide/serving), remember that it will call your SavedModel through one of its named signatures. Notice there are some small differences in the input. In Python, you can test them as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "b0vTQAXKN_K0" - }, - "outputs": [], - "source": [ - "with tf.device('/job:localhost'):\n", - " serving_model = reloaded_model.signatures['serving_default']\n", - " for test_row in test_dataset.shuffle(1000).map(prepare_serving).take(5):\n", - " result = serving_model(**test_row)\n", - " # The 'prediction' key is the classifier's defined model name.\n", - " print_bert_results(list(test_row.values()), result['prediction'], tfds_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GOA5bX2g3wCW" - }, - "source": [ - "You did it! Your saved model could be used for serving or simple inference in a process, with a simpler api with less code and easier to maintain.\n", - "\n", - "## Next Steps\n", - "\n", - "Now that you've tried one of the base BERT models, you can try other ones to achieve more accuracy or maybe with smaller model versions.\n", - "\n", - "You can also try in other datasets." - ] - } - ], - "metadata": { - "accelerator": "TPU", - "colab": { - "collapsed_sections": [], - "name": "bert_glue.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/classify_text_with_bert.ipynb b/third_party/tensorflow-text/src/docs/tutorials/classify_text_with_bert.ipynb deleted file mode 100644 index 0ff6132..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/classify_text_with_bert.ipynb +++ /dev/null
@@ -1,1000 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Cb4espuLKJiA" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Hub Authors.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "jM3hCI1UUzar" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4_NEJlxKKjyI" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/classify_text_with_bert\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/classify_text_with_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/classify_text_with_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/classify_text_with_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/google/collections/bert/1\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IZ6SNYq_tVVC" - }, - "source": [ - "# Classify text with BERT\n", - "\n", - "This tutorial contains complete code to fine-tune BERT to perform sentiment analysis on a dataset of plain-text IMDB movie reviews.\n", - "In addition to training a model, you will learn how to preprocess text into an appropriate format.\n", - "\n", - "In this notebook, you will:\n", - "\n", - "- Load the IMDB dataset\n", - "- Load a BERT model from TensorFlow Hub\n", - "- Build your own model by combining BERT with a classifier\n", - "- Train your own model, fine-tuning BERT as part of that\n", - "- Save your model and use it to classify sentences\n", - "\n", - "If you're new to working with the IMDB dataset, please see [Basic text classification](https://www.tensorflow.org/tutorials/keras/text_classification) for more details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2PHBpLPuQdmK" - }, - "source": [ - "## About BERT\n", - "\n", - "[BERT](https://arxiv.org/abs/1810.04805) and other Transformer encoder architectures have been wildly successful on a variety of tasks in NLP (natural language processing). They compute vector-space representations of natural language that are suitable for use in deep learning models. The BERT family of models uses the Transformer encoder architecture to process each token of input text in the full context of all tokens before and after, hence the name: Bidirectional Encoder Representations from Transformers. \n", - "\n", - "BERT models are usually pre-trained on a large corpus of text, then fine-tuned for specific tasks.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SCjmX4zTCkRK" - }, - "source": [ - "## Setup\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "q-YbjCkzw0yU" - }, - "outputs": [], - "source": [ - "# A dependency of the preprocessing for BERT inputs\n", - "!pip install -q -U tensorflow-text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5w_XlxN1IsRJ" - }, - "source": [ - "You will use the AdamW optimizer from [tensorflow/models](https://github.com/tensorflow/models)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "b-P1ZOA0FkVJ" - }, - "outputs": [], - "source": [ - "!pip install -q tf-models-official" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_XgTpm9ZxoN9" - }, - "outputs": [], - "source": [ - "import os\n", - "import shutil\n", - "\n", - "import tensorflow as tf\n", - "import tensorflow_hub as hub\n", - "import tensorflow_text as text\n", - "from official.nlp import optimization # to create AdamW optimizer\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "tf.get_logger().setLevel('ERROR')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "q6MugfEgDRpY" - }, - "source": [ - "## Sentiment analysis\n", - "\n", - "This notebook trains a sentiment analysis model to classify movie reviews as *positive* or *negative*, based on the text of the review.\n", - "\n", - "You'll use the [Large Movie Review Dataset](https://ai.stanford.edu/~amaas/data/sentiment/) that contains the text of 50,000 movie reviews from the [Internet Movie Database](https://www.imdb.com/)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vnvd4mrtPHHV" - }, - "source": [ - "### Download the IMDB dataset\n", - "\n", - "Let's download and extract the dataset, then explore the directory structure.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pOdqCMoQDRJL" - }, - "outputs": [], - "source": [ - "url = 'https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'\n", - "\n", - "dataset = tf.keras.utils.get_file('aclImdb_v1.tar.gz', url,\n", - " untar=True, cache_dir='.',\n", - " cache_subdir='')\n", - "\n", - "dataset_dir = os.path.join(os.path.dirname(dataset), 'aclImdb')\n", - "\n", - "train_dir = os.path.join(dataset_dir, 'train')\n", - "\n", - "# remove unused folders to make it easier to load the data\n", - "remove_dir = os.path.join(train_dir, 'unsup')\n", - "shutil.rmtree(remove_dir)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lN9lWCYfPo7b" - }, - "source": [ - "Next, you will use the `text_dataset_from_directory` utility to create a labeled `tf.data.Dataset`.\n", - "\n", - "The IMDB dataset has already been divided into train and test, but it lacks a validation set. Let's create a validation set using an 80:20 split of the training data by using the `validation_split` argument below.\n", - "\n", - "Note: When using the `validation_split` and `subset` arguments, make sure to either specify a random seed, or to pass `shuffle=False`, so that the validation and training splits have no overlap." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6IwI_2bcIeX8" - }, - "outputs": [], - "source": [ - "AUTOTUNE = tf.data.AUTOTUNE\n", - "batch_size = 32\n", - "seed = 42\n", - "\n", - "raw_train_ds = tf.keras.preprocessing.text_dataset_from_directory(\n", - " 'aclImdb/train',\n", - " batch_size=batch_size,\n", - " validation_split=0.2,\n", - " subset='training',\n", - " seed=seed)\n", - "\n", - "class_names = raw_train_ds.class_names\n", - "train_ds = raw_train_ds.cache().prefetch(buffer_size=AUTOTUNE)\n", - "\n", - "val_ds = tf.keras.preprocessing.text_dataset_from_directory(\n", - " 'aclImdb/train',\n", - " batch_size=batch_size,\n", - " validation_split=0.2,\n", - " subset='validation',\n", - " seed=seed)\n", - "\n", - "val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)\n", - "\n", - "test_ds = tf.keras.preprocessing.text_dataset_from_directory(\n", - " 'aclImdb/test',\n", - " batch_size=batch_size)\n", - "\n", - "test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HGm10A5HRGXp" - }, - "source": [ - "Let's take a look at a few reviews." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JuxDkcvVIoev" - }, - "outputs": [], - "source": [ - "for text_batch, label_batch in train_ds.take(1):\n", - " for i in range(3):\n", - " print(f'Review: {text_batch.numpy()[i]}')\n", - " label = label_batch.numpy()[i]\n", - " print(f'Label : {label} ({class_names[label]})')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dX8FtlpGJRE6" - }, - "source": [ - "## Loading models from TensorFlow Hub\n", - "\n", - "Here you can choose which BERT model you will load from TensorFlow Hub and fine-tune. There are multiple BERT models available.\n", - "\n", - " - [BERT-Base](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3), [Uncased](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3) and [seven more models](https://tfhub.dev/google/collections/bert/1) with trained weights released by the original BERT authors.\n", - " - [Small BERTs](https://tfhub.dev/google/collections/bert/1) have the same general architecture but fewer and/or smaller Transformer blocks, which lets you explore tradeoffs between speed, size and quality.\n", - " - [ALBERT](https://tfhub.dev/google/collections/albert/1): four different sizes of \"A Lite BERT\" that reduces model size (but not computation time) by sharing parameters between layers.\n", - " - [BERT Experts](https://tfhub.dev/google/collections/experts/bert/1): eight models that all have the BERT-base architecture but offer a choice between different pre-training domains, to align more closely with the target task.\n", - " - [Electra](https://tfhub.dev/google/collections/electra/1) has the same architecture as BERT (in three different sizes), but gets pre-trained as a discriminator in a set-up that resembles a Generative Adversarial Network (GAN).\n", - " - BERT with Talking-Heads Attention and Gated GELU [[base](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1), [large](https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_large/1)] has two improvements to the core of the Transformer architecture.\n", - "\n", - "The model documentation on TensorFlow Hub has more details and references to the\n", - "research literature. Follow the links above, or click on the [`tfhub.dev`](http://tfhub.dev) URL\n", - "printed after the next cell execution.\n", - "\n", - "The suggestion is to start with a Small BERT (with fewer parameters) since they are faster to fine-tune. If you like a small model but with higher accuracy, ALBERT might be your next option. If you want even better accuracy, choose\n", - "one of the classic BERT sizes or their recent refinements like Electra, Talking Heads, or a BERT Expert.\n", - "\n", - "Aside from the models available below, there are [multiple versions](https://tfhub.dev/google/collections/transformer_encoders_text/1) of the models that are larger and can yield even better accuracy, but they are too big to be fine-tuned on a single GPU. You will be able to do that on the [Solve GLUE tasks using BERT on a TPU colab](https://www.tensorflow.org/text/tutorials/bert_glue).\n", - "\n", - "You'll see in the code below that switching the tfhub.dev URL is enough to try any of these models, because all the differences between them are encapsulated in the SavedModels from TF Hub." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "y8_ctG55-uTX" - }, - "outputs": [], - "source": [ - "#@title Choose a BERT model to fine-tune\n", - "\n", - "bert_model_name = 'small_bert/bert_en_uncased_L-4_H-512_A-8' #@param [\"bert_en_uncased_L-12_H-768_A-12\", \"bert_en_cased_L-12_H-768_A-12\", \"bert_multi_cased_L-12_H-768_A-12\", \"small_bert/bert_en_uncased_L-2_H-128_A-2\", \"small_bert/bert_en_uncased_L-2_H-256_A-4\", \"small_bert/bert_en_uncased_L-2_H-512_A-8\", \"small_bert/bert_en_uncased_L-2_H-768_A-12\", \"small_bert/bert_en_uncased_L-4_H-128_A-2\", \"small_bert/bert_en_uncased_L-4_H-256_A-4\", \"small_bert/bert_en_uncased_L-4_H-512_A-8\", \"small_bert/bert_en_uncased_L-4_H-768_A-12\", \"small_bert/bert_en_uncased_L-6_H-128_A-2\", \"small_bert/bert_en_uncased_L-6_H-256_A-4\", \"small_bert/bert_en_uncased_L-6_H-512_A-8\", \"small_bert/bert_en_uncased_L-6_H-768_A-12\", \"small_bert/bert_en_uncased_L-8_H-128_A-2\", \"small_bert/bert_en_uncased_L-8_H-256_A-4\", \"small_bert/bert_en_uncased_L-8_H-512_A-8\", \"small_bert/bert_en_uncased_L-8_H-768_A-12\", \"small_bert/bert_en_uncased_L-10_H-128_A-2\", \"small_bert/bert_en_uncased_L-10_H-256_A-4\", \"small_bert/bert_en_uncased_L-10_H-512_A-8\", \"small_bert/bert_en_uncased_L-10_H-768_A-12\", \"small_bert/bert_en_uncased_L-12_H-128_A-2\", \"small_bert/bert_en_uncased_L-12_H-256_A-4\", \"small_bert/bert_en_uncased_L-12_H-512_A-8\", \"small_bert/bert_en_uncased_L-12_H-768_A-12\", \"albert_en_base\", \"electra_small\", \"electra_base\", \"experts_pubmed\", \"experts_wiki_books\", \"talking-heads_base\"]\n", - "\n", - "map_name_to_handle = {\n", - " 'bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3',\n", - " 'bert_en_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3',\n", - " 'bert_multi_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-2_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-4_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-6_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-6_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-8_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-8_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-10_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-10_H-768_A-12/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-128_A-2/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-256_A-4/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-512_A-8/1',\n", - " 'small_bert/bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-12_H-768_A-12/1',\n", - " 'albert_en_base':\n", - " 'https://tfhub.dev/tensorflow/albert_en_base/2',\n", - " 'electra_small':\n", - " 'https://tfhub.dev/google/electra_small/2',\n", - " 'electra_base':\n", - " 'https://tfhub.dev/google/electra_base/2',\n", - " 'experts_pubmed':\n", - " 'https://tfhub.dev/google/experts/bert/pubmed/2',\n", - " 'experts_wiki_books':\n", - " 'https://tfhub.dev/google/experts/bert/wiki_books/2',\n", - " 'talking-heads_base':\n", - " 'https://tfhub.dev/tensorflow/talkheads_ggelu_bert_en_base/1',\n", - "}\n", - "\n", - "map_model_to_preprocess = {\n", - " 'bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'bert_en_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_cased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-2_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-4_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-6_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-8_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-10_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-128_A-2':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-256_A-4':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-512_A-8':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'small_bert/bert_en_uncased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'bert_multi_cased_L-12_H-768_A-12':\n", - " 'https://tfhub.dev/tensorflow/bert_multi_cased_preprocess/3',\n", - " 'albert_en_base':\n", - " 'https://tfhub.dev/tensorflow/albert_en_preprocess/3',\n", - " 'electra_small':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'electra_base':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'experts_pubmed':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'experts_wiki_books':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - " 'talking-heads_base':\n", - " 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3',\n", - "}\n", - "\n", - "tfhub_handle_encoder = map_name_to_handle[bert_model_name]\n", - "tfhub_handle_preprocess = map_model_to_preprocess[bert_model_name]\n", - "\n", - "print(f'BERT model selected : {tfhub_handle_encoder}')\n", - "print(f'Preprocess model auto-selected: {tfhub_handle_preprocess}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7WrcxxTRDdHi" - }, - "source": [ - "## The preprocessing model\n", - "\n", - "Text inputs need to be transformed to numeric token ids and arranged in several Tensors before being input to BERT. TensorFlow Hub provides a matching preprocessing model for each of the BERT models discussed above, which implements this transformation using TF ops from the TF.text library. It is not necessary to run pure Python code outside your TensorFlow model to preprocess text.\n", - "\n", - "The preprocessing model must be the one referenced by the documentation of the BERT model, which you can read at the URL printed above. For BERT models from the drop-down above, the preprocessing model is selected automatically.\n", - "\n", - "Note: You will load the preprocessing model into a [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) to compose your fine-tuned model. This is the preferred API to load a TF2-style SavedModel from TF Hub into a Keras model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0SQi-jWd_jzq" - }, - "outputs": [], - "source": [ - "bert_preprocess_model = hub.KerasLayer(tfhub_handle_preprocess)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "x4naBiEE_cZX" - }, - "source": [ - "Let's try the preprocessing model on some text and see the output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "r9-zCzJpnuwS" - }, - "outputs": [], - "source": [ - "text_test = ['this is such an amazing movie!']\n", - "text_preprocessed = bert_preprocess_model(text_test)\n", - "\n", - "print(f'Keys : {list(text_preprocessed.keys())}')\n", - "print(f'Shape : {text_preprocessed[\"input_word_ids\"].shape}')\n", - "print(f'Word Ids : {text_preprocessed[\"input_word_ids\"][0, :12]}')\n", - "print(f'Input Mask : {text_preprocessed[\"input_mask\"][0, :12]}')\n", - "print(f'Type Ids : {text_preprocessed[\"input_type_ids\"][0, :12]}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EqL7ihkN_862" - }, - "source": [ - "As you can see, now you have the 3 outputs from the preprocessing that a BERT model would use (`input_words_id`, `input_mask` and `input_type_ids`).\n", - "\n", - "Some other important points:\n", - "- The input is truncated to 128 tokens. The number of tokens can be customized, and you can see more details on the [Solve GLUE tasks using BERT on a TPU colab](https://www.tensorflow.org/text/tutorials/bert_glue).\n", - "- The `input_type_ids` only have one value (0) because this is a single sentence input. For a multiple sentence input, it would have one number for each input.\n", - "\n", - "Since this text preprocessor is a TensorFlow model, It can be included in your model directly." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DKnLPSEmtp9i" - }, - "source": [ - "## Using the BERT model\n", - "\n", - "Before putting BERT into your own model, let's take a look at its outputs. You will load it from TF Hub and see the returned values." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tXxYpK8ixL34" - }, - "outputs": [], - "source": [ - "bert_model = hub.KerasLayer(tfhub_handle_encoder)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_OoF9mebuSZc" - }, - "outputs": [], - "source": [ - "bert_results = bert_model(text_preprocessed)\n", - "\n", - "print(f'Loaded BERT: {tfhub_handle_encoder}')\n", - "print(f'Pooled Outputs Shape:{bert_results[\"pooled_output\"].shape}')\n", - "print(f'Pooled Outputs Values:{bert_results[\"pooled_output\"][0, :12]}')\n", - "print(f'Sequence Outputs Shape:{bert_results[\"sequence_output\"].shape}')\n", - "print(f'Sequence Outputs Values:{bert_results[\"sequence_output\"][0, :12]}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sm61jDrezAll" - }, - "source": [ - "The BERT models return a map with 3 important keys: `pooled_output`, `sequence_output`, `encoder_outputs`:\n", - "\n", - "- `pooled_output` represents each input sequence as a whole. The shape is `[batch_size, H]`. You can think of this as an embedding for the entire movie review.\n", - "- `sequence_output` represents each input token in the context. The shape is `[batch_size, seq_length, H]`. You can think of this as a contextual embedding for every token in the movie review.\n", - "- `encoder_outputs` are the intermediate activations of the `L` Transformer blocks. `outputs[\"encoder_outputs\"][i]` is a Tensor of shape `[batch_size, seq_length, 1024]` with the outputs of the i-th Transformer block, for `0 \u003c= i \u003c L`. The last value of the list is equal to `sequence_output`.\n", - "\n", - "For the fine-tuning you are going to use the `pooled_output` array." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pDNKfAXbDnJH" - }, - "source": [ - "## Define your model\n", - "\n", - "You will create a very simple fine-tuned model, with the preprocessing model, the selected BERT model, one Dense and a Dropout layer.\n", - "\n", - "Note: for more information about the base model's input and output you can follow the model's URL for documentation. Here specifically, you don't need to worry about it because the preprocessing model will take care of that for you.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aksj743St9ga" - }, - "outputs": [], - "source": [ - "def build_classifier_model():\n", - " text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')\n", - " preprocessing_layer = hub.KerasLayer(tfhub_handle_preprocess, name='preprocessing')\n", - " encoder_inputs = preprocessing_layer(text_input)\n", - " encoder = hub.KerasLayer(tfhub_handle_encoder, trainable=True, name='BERT_encoder')\n", - " outputs = encoder(encoder_inputs)\n", - " net = outputs['pooled_output']\n", - " net = tf.keras.layers.Dropout(0.1)(net)\n", - " net = tf.keras.layers.Dense(1, activation=None, name='classifier')(net)\n", - " return tf.keras.Model(text_input, net)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Zs4yhFraBuGQ" - }, - "source": [ - "Let's check that the model runs with the output of the preprocessing model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mGMF8AZcB2Zy" - }, - "outputs": [], - "source": [ - "classifier_model = build_classifier_model()\n", - "bert_raw_result = classifier_model(tf.constant(text_test))\n", - "print(tf.sigmoid(bert_raw_result))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZTUzNV2JE2G3" - }, - "source": [ - "The output is meaningless, of course, because the model has not been trained yet.\n", - "\n", - "Let's take a look at the model's structure." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0EmzyHZXKIpm" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(classifier_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WbUWoZMwc302" - }, - "source": [ - "## Model training\n", - "\n", - "You now have all the pieces to train a model, including the preprocessing module, BERT encoder, data, and classifier." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WpJ3xcwDT56v" - }, - "source": [ - "### Loss function\n", - "\n", - "Since this is a binary classification problem and the model outputs a probability (a single-unit layer), you'll use `losses.BinaryCrossentropy` loss function.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OWPOZE-L3AgE" - }, - "outputs": [], - "source": [ - "loss = tf.keras.losses.BinaryCrossentropy(from_logits=True)\n", - "metrics = tf.metrics.BinaryAccuracy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "77psrpfzbxtp" - }, - "source": [ - "### Optimizer\n", - "\n", - "For fine-tuning, let's use the same optimizer that BERT was originally trained with: the \"Adaptive Moments\" (Adam). This optimizer minimizes the prediction loss and does regularization by weight decay (not using moments), which is also known as [AdamW](https://arxiv.org/abs/1711.05101).\n", - "\n", - "For the learning rate (`init_lr`), you will use the same schedule as BERT pre-training: linear decay of a notional initial learning rate, prefixed with a linear warm-up phase over the first 10% of training steps (`num_warmup_steps`). In line with the BERT paper, the initial learning rate is smaller for fine-tuning (best of 5e-5, 3e-5, 2e-5)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P9eP2y9dbw32" - }, - "outputs": [], - "source": [ - "epochs = 5\n", - "steps_per_epoch = tf.data.experimental.cardinality(train_ds).numpy()\n", - "num_train_steps = steps_per_epoch * epochs\n", - "num_warmup_steps = int(0.1*num_train_steps)\n", - "\n", - "init_lr = 3e-5\n", - "optimizer = optimization.create_optimizer(init_lr=init_lr,\n", - " num_train_steps=num_train_steps,\n", - " num_warmup_steps=num_warmup_steps,\n", - " optimizer_type='adamw')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SqlarlpC_v0g" - }, - "source": [ - "### Loading the BERT model and training\n", - "\n", - "Using the `classifier_model` you created earlier, you can compile the model with the loss, metric and optimizer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-7GPDhR98jsD" - }, - "outputs": [], - "source": [ - "classifier_model.compile(optimizer=optimizer,\n", - " loss=loss,\n", - " metrics=metrics)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CpBuV5j2cS_b" - }, - "source": [ - "Note: training time will vary depending on the complexity of the BERT model you have selected." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "HtfDFAnN_Neu" - }, - "outputs": [], - "source": [ - "print(f'Training model with {tfhub_handle_encoder}')\n", - "history = classifier_model.fit(x=train_ds,\n", - " validation_data=val_ds,\n", - " epochs=epochs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uBthMlTSV8kn" - }, - "source": [ - "### Evaluate the model\n", - "\n", - "Let's see how the model performs. Two values will be returned. Loss (a number which represents the error, lower values are better), and accuracy." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "slqB-urBV9sP" - }, - "outputs": [], - "source": [ - "loss, accuracy = classifier_model.evaluate(test_ds)\n", - "\n", - "print(f'Loss: {loss}')\n", - "print(f'Accuracy: {accuracy}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uttWpgmSfzq9" - }, - "source": [ - "### Plot the accuracy and loss over time\n", - "\n", - "Based on the `History` object returned by `model.fit()`. You can plot the training and validation loss for comparison, as well as the training and validation accuracy:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fiythcODf0xo" - }, - "outputs": [], - "source": [ - "history_dict = history.history\n", - "print(history_dict.keys())\n", - "\n", - "acc = history_dict['binary_accuracy']\n", - "val_acc = history_dict['val_binary_accuracy']\n", - "loss = history_dict['loss']\n", - "val_loss = history_dict['val_loss']\n", - "\n", - "epochs = range(1, len(acc) + 1)\n", - "fig = plt.figure(figsize=(10, 6))\n", - "fig.tight_layout()\n", - "\n", - "plt.subplot(2, 1, 1)\n", - "# r is for \"solid red line\"\n", - "plt.plot(epochs, loss, 'r', label='Training loss')\n", - "# b is for \"solid blue line\"\n", - "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", - "plt.title('Training and validation loss')\n", - "# plt.xlabel('Epochs')\n", - "plt.ylabel('Loss')\n", - "plt.legend()\n", - "\n", - "plt.subplot(2, 1, 2)\n", - "plt.plot(epochs, acc, 'r', label='Training acc')\n", - "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n", - "plt.title('Training and validation accuracy')\n", - "plt.xlabel('Epochs')\n", - "plt.ylabel('Accuracy')\n", - "plt.legend(loc='lower right')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WzJZCo-cf-Jf" - }, - "source": [ - "In this plot, the red lines represent the training loss and accuracy, and the blue lines are the validation loss and accuracy." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Rtn7jewb6dg4" - }, - "source": [ - "## Export for inference\n", - "\n", - "Now you just save your fine-tuned model for later use." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ShcvqJAgVera" - }, - "outputs": [], - "source": [ - "dataset_name = 'imdb'\n", - "saved_model_path = './{}_bert'.format(dataset_name.replace('/', '_'))\n", - "\n", - "classifier_model.save(saved_model_path, include_optimizer=False)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PbI25bS1vD7s" - }, - "source": [ - "Let's reload the model, so you can try it side by side with the model that is still in memory." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gUEWVskZjEF0" - }, - "outputs": [], - "source": [ - "reloaded_model = tf.saved_model.load(saved_model_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oyTappHTvNCz" - }, - "source": [ - "Here you can test your model on any sentence you want, just add to the examples variable below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VBWzH6exlCPS" - }, - "outputs": [], - "source": [ - "def print_my_examples(inputs, results):\n", - " result_for_printing = \\\n", - " [f'input: {inputs[i]:\u003c30} : score: {results[i][0]:.6f}'\n", - " for i in range(len(inputs))]\n", - " print(*result_for_printing, sep='\\n')\n", - " print()\n", - "\n", - "\n", - "examples = [\n", - " 'this is such an amazing movie!', # this is the same sentence tried earlier\n", - " 'The movie was great!',\n", - " 'The movie was meh.',\n", - " 'The movie was okish.',\n", - " 'The movie was terrible...'\n", - "]\n", - "\n", - "reloaded_results = tf.sigmoid(reloaded_model(tf.constant(examples)))\n", - "original_results = tf.sigmoid(classifier_model(tf.constant(examples)))\n", - "\n", - "print('Results from the saved model:')\n", - "print_my_examples(examples, reloaded_results)\n", - "print('Results from the model in memory:')\n", - "print_my_examples(examples, original_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3cOmih754Y_M" - }, - "source": [ - "If you want to use your model on [TF Serving](https://www.tensorflow.org/tfx/guide/serving), remember that it will call your SavedModel through one of its named signatures. In Python, you can test them as follows:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0FdVD3973S-O" - }, - "outputs": [], - "source": [ - "serving_results = reloaded_model \\\n", - " .signatures['serving_default'](tf.constant(examples))\n", - "\n", - "serving_results = tf.sigmoid(serving_results['classifier'])\n", - "\n", - "print_my_examples(examples, serving_results)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B4gN1KwReLPN" - }, - "source": [ - "## Next steps\n", - "\n", - "As a next step, you can try [Solve GLUE tasks using BERT on a TPU tutorial](https://www.tensorflow.org/text/tutorials/bert_glue), which runs on a TPU and shows you how to work with multiple inputs." - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [], - "name": "classify_text_with_bert.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/fine_tune_bert.ipynb b/third_party/tensorflow-text/src/docs/tutorials/fine_tune_bert.ipynb deleted file mode 100644 index 68952a0..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/fine_tune_bert.ipynb +++ /dev/null
@@ -1,1685 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "vXLA5InzXydn" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "RuRlpLL-X0R_" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1mLJmVotXs64" - }, - "source": [ - "# Fine-tuning a BERT model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hYEwGTeCXnnX" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/fine_tune_bert\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/fine_tune_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/fine_tune_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/fine_tune_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YN2ACivEPxgD" - }, - "source": [ - "In this example, we will work through fine-tuning a BERT model using the tensorflow-models PIP package.\n", - "\n", - "The pretrained BERT model this tutorial is based on is also available on [TensorFlow Hub](https://tensorflow.org/hub), to see how to use it refer to the [Hub Appendix](#hub_bert)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s2d9S2CSSO1z" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fsACVQpVSifi" - }, - "source": [ - "### Install the TensorFlow Model Garden pip package\n", - "\n", - "* `tf-models-official` is the stable Model Garden package. Note that it may not include the latest changes in the `tensorflow_models` github repo. To include latest changes, you may install `tf-models-nightly`,\n", - "which is the nightly Model Garden package created daily automatically.\n", - "* pip will install all models and dependencies automatically." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yic2y7_o-BCC" - }, - "outputs": [], - "source": [ - "!pip install -q -U tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "NvNr2svBM-p3" - }, - "outputs": [], - "source": [ - "!pip install -q tf-models-official==2.4.0" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U-7qPCjWUAyy" - }, - "source": [ - "### Imports" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lXsXev5MNr20" - }, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow as tf\n", - "\n", - "import tensorflow_hub as hub\n", - "import tensorflow_datasets as tfds\n", - "tfds.disable_progress_bar()\n", - "\n", - "from official.modeling import tf_utils\n", - "from official import nlp\n", - "from official.nlp import bert\n", - "\n", - "# Load the required submodules\n", - "import official.nlp.optimization\n", - "import official.nlp.bert.bert_models\n", - "import official.nlp.bert.configs\n", - "import official.nlp.bert.run_classifier\n", - "import official.nlp.bert.tokenization\n", - "import official.nlp.data.classifier_data_lib\n", - "import official.nlp.modeling.losses\n", - "import official.nlp.modeling.models\n", - "import official.nlp.modeling.networks\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mbanlzTvJBsz" - }, - "source": [ - "### Resources" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PpW0x8TpR8DT" - }, - "source": [ - "This directory contains the configuration, vocabulary, and a pre-trained checkpoint used in this tutorial:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vzRHOLciR8eq" - }, - "outputs": [], - "source": [ - "gs_folder_bert = \"gs://cloud-tpu-checkpoints/bert/v3/uncased_L-12_H-768_A-12\"\n", - "tf.io.gfile.listdir(gs_folder_bert)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9uFskufsR2LT" - }, - "source": [ - "You can get a pre-trained BERT encoder from [TensorFlow Hub](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "e0dAkUttJAzj" - }, - "outputs": [], - "source": [ - "hub_url_bert = \"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Qv6abtRvH4xO" - }, - "source": [ - "## The data\n", - "For this example we used the [GLUE MRPC dataset from TFDS](https://www.tensorflow.org/datasets/catalog/glue#gluemrpc).\n", - "\n", - "This dataset is not set up so that it can be directly fed into the BERT model, so this section also handles the necessary preprocessing." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "28DvUhC1YUiB" - }, - "source": [ - "### Get the dataset from TensorFlow Datasets\n", - "\n", - "The Microsoft Research Paraphrase Corpus (Dolan \u0026 Brockett, 2005) is a corpus of sentence pairs automatically extracted from online news sources, with human annotations for whether the sentences in the pair are semantically equivalent.\n", - "\n", - "* Number of labels: 2.\n", - "* Size of training dataset: 3668.\n", - "* Size of evaluation dataset: 408.\n", - "* Maximum sequence length of training and evaluation dataset: 128.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ijikx5OsH9AT" - }, - "outputs": [], - "source": [ - "glue, info = tfds.load('glue/mrpc', with_info=True,\n", - " # It's small, load the whole dataset\n", - " batch_size=-1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xf9zz4vLYXjr" - }, - "outputs": [], - "source": [ - "list(glue.keys())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZgBg2r2nYT-K" - }, - "source": [ - "The `info` object describes the dataset and it's features:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IQrHxv7W7jH5" - }, - "outputs": [], - "source": [ - "info.features" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vhsVWYNxazz5" - }, - "source": [ - "The two classes are:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n0gfc_VTayfQ" - }, - "outputs": [], - "source": [ - "info.features['label'].names" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "38zJcap6xkbC" - }, - "source": [ - "Here is one example from the training set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xON_i6SkwApW" - }, - "outputs": [], - "source": [ - "glue_train = glue['train']\n", - "\n", - "for key, value in glue_train.items():\n", - " print(f\"{key:9s}: {value[0].numpy()}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9fbTyfJpNr7x" - }, - "source": [ - "### The BERT tokenizer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wqeN54S61ZKQ" - }, - "source": [ - "To fine tune a pre-trained model you need to be sure that you're using exactly the same tokenization, vocabulary, and index mapping as you used during training.\n", - "\n", - "The BERT tokenizer used in this tutorial is written in pure Python (It's not built out of TensorFlow ops). So you can't just plug it into your model as a `keras.layer` like you can with `preprocessing.TextVectorization`.\n", - "\n", - "The following code rebuilds the tokenizer that was used by the base model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "idxyhmrCQcw5" - }, - "outputs": [], - "source": [ - "# Set up tokenizer to generate Tensorflow dataset\n", - "tokenizer = bert.tokenization.FullTokenizer(\n", - " vocab_file=os.path.join(gs_folder_bert, \"vocab.txt\"),\n", - " do_lower_case=True)\n", - "\n", - "print(\"Vocab size:\", len(tokenizer.vocab))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zYHDSquU2lDU" - }, - "source": [ - "Tokenize a sentence:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "L_OfOYPg853R" - }, - "outputs": [], - "source": [ - "tokens = tokenizer.tokenize(\"Hello TensorFlow!\")\n", - "print(tokens)\n", - "ids = tokenizer.convert_tokens_to_ids(tokens)\n", - "print(ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kkAXLtuyWWDI" - }, - "source": [ - "### Preprocess the data\n", - "\n", - "The section manually preprocessed the dataset into the format expected by the model.\n", - "\n", - "This dataset is small, so preprocessing can be done quickly and easily in memory. For larger datasets the `tf_models` library includes some tools for preprocessing and re-serializing a dataset. See [Appendix: Re-encoding a large dataset](#re_encoding_tools) for details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "62UTWLQd9-LB" - }, - "source": [ - "#### Encode the sentences\n", - "\n", - "The model expects its two inputs sentences to be concatenated together. This input is expected to start with a `[CLS]` \"This is a classification problem\" token, and each sentence should end with a `[SEP]` \"Separator\" token:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bdL-dRNRBRJT" - }, - "outputs": [], - "source": [ - "tokenizer.convert_tokens_to_ids(['[CLS]', '[SEP]'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UrPktnqpwqie" - }, - "source": [ - "Start by encoding all the sentences while appending a `[SEP]` token, and packing them into ragged-tensors:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BR7BmtU498Bh" - }, - "outputs": [], - "source": [ - "def encode_sentence(s):\n", - " tokens = list(tokenizer.tokenize(s.numpy()))\n", - " tokens.append('[SEP]')\n", - " return tokenizer.convert_tokens_to_ids(tokens)\n", - "\n", - "sentence1 = tf.ragged.constant([\n", - " encode_sentence(s) for s in glue_train[\"sentence1\"]])\n", - "sentence2 = tf.ragged.constant([\n", - " encode_sentence(s) for s in glue_train[\"sentence2\"]])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "has42aUdfky-" - }, - "outputs": [], - "source": [ - "print(\"Sentence1 shape:\", sentence1.shape.as_list())\n", - "print(\"Sentence2 shape:\", sentence2.shape.as_list())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MU9lTWy_xXbb" - }, - "source": [ - "Now prepend a `[CLS]` token, and concatenate the ragged tensors to form a single `input_word_ids` tensor for each example. `RaggedTensor.to_tensor()` zero pads to the longest sequence." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "USD8uihw-g4J" - }, - "outputs": [], - "source": [ - "cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0]\n", - "input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1)\n", - "_ = plt.pcolormesh(input_word_ids.to_tensor())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xmNv4l4k-dBZ" - }, - "source": [ - "#### Mask and input type" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DIWjNIKq-ldh" - }, - "source": [ - "The model expects two additional inputs:\n", - "\n", - "* The input mask\n", - "* The input type" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ulNZ4U96-8JZ" - }, - "source": [ - "The mask allows the model to cleanly differentiate between the content and the padding. The mask has the same shape as the `input_word_ids`, and contains a `1` anywhere the `input_word_ids` is not padding." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EezOO9qj91kP" - }, - "outputs": [], - "source": [ - "input_mask = tf.ones_like(input_word_ids).to_tensor()\n", - "\n", - "plt.pcolormesh(input_mask)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rxLenwAvCkBf" - }, - "source": [ - "The \"input type\" also has the same shape, but inside the non-padded region, contains a `0` or a `1` indicating which sentence the token is a part of. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2CetH_5C9P2m" - }, - "outputs": [], - "source": [ - "type_cls = tf.zeros_like(cls)\n", - "type_s1 = tf.zeros_like(sentence1)\n", - "type_s2 = tf.ones_like(sentence2)\n", - "input_type_ids = tf.concat([type_cls, type_s1, type_s2], axis=-1).to_tensor()\n", - "\n", - "plt.pcolormesh(input_type_ids)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "P5UBnCn8Ii6s" - }, - "source": [ - "#### Put it all together\n", - "\n", - "Collect the above text parsing code into a single function, and apply it to each split of the `glue/mrpc` dataset." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sDGiWYPLEd5a" - }, - "outputs": [], - "source": [ - "def encode_sentence(s, tokenizer):\n", - " tokens = list(tokenizer.tokenize(s))\n", - " tokens.append('[SEP]')\n", - " return tokenizer.convert_tokens_to_ids(tokens)\n", - "\n", - "def bert_encode(glue_dict, tokenizer):\n", - " num_examples = len(glue_dict[\"sentence1\"])\n", - " \n", - " sentence1 = tf.ragged.constant([\n", - " encode_sentence(s, tokenizer)\n", - " for s in np.array(glue_dict[\"sentence1\"])])\n", - " sentence2 = tf.ragged.constant([\n", - " encode_sentence(s, tokenizer)\n", - " for s in np.array(glue_dict[\"sentence2\"])])\n", - "\n", - " cls = [tokenizer.convert_tokens_to_ids(['[CLS]'])]*sentence1.shape[0]\n", - " input_word_ids = tf.concat([cls, sentence1, sentence2], axis=-1)\n", - "\n", - " input_mask = tf.ones_like(input_word_ids).to_tensor()\n", - "\n", - " type_cls = tf.zeros_like(cls)\n", - " type_s1 = tf.zeros_like(sentence1)\n", - " type_s2 = tf.ones_like(sentence2)\n", - " input_type_ids = tf.concat(\n", - " [type_cls, type_s1, type_s2], axis=-1).to_tensor()\n", - "\n", - " inputs = {\n", - " 'input_word_ids': input_word_ids.to_tensor(),\n", - " 'input_mask': input_mask,\n", - " 'input_type_ids': input_type_ids}\n", - "\n", - " return inputs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yuLKxf6zHxw-" - }, - "outputs": [], - "source": [ - "glue_train = bert_encode(glue['train'], tokenizer)\n", - "glue_train_labels = glue['train']['label']\n", - "\n", - "glue_validation = bert_encode(glue['validation'], tokenizer)\n", - "glue_validation_labels = glue['validation']['label']\n", - "\n", - "glue_test = bert_encode(glue['test'], tokenizer)\n", - "glue_test_labels = glue['test']['label']" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7FC5aLVxKVKK" - }, - "source": [ - "Each subset of the data has been converted to a dictionary of features, and a set of labels. Each feature in the input dictionary has the same shape, and the number of labels should match:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jyjTdGpFhO_1" - }, - "outputs": [], - "source": [ - "for key, value in glue_train.items():\n", - " print(f'{key:15s} shape: {value.shape}')\n", - "\n", - "print(f'glue_train_labels shape: {glue_train_labels.shape}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FSwymsbkbLDA" - }, - "source": [ - "## The model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Efrj3Cn1kLAp" - }, - "source": [ - "### Build the model\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xxpOY5r2Ayq6" - }, - "source": [ - "The first step is to download the configuration for the pre-trained model.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ujapVfZ_AKW7" - }, - "outputs": [], - "source": [ - "import json\n", - "\n", - "bert_config_file = os.path.join(gs_folder_bert, \"bert_config.json\")\n", - "config_dict = json.loads(tf.io.gfile.GFile(bert_config_file).read())\n", - "\n", - "bert_config = bert.configs.BertConfig.from_dict(config_dict)\n", - "\n", - "config_dict" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "96ldxDSwkVkj" - }, - "source": [ - "The `config` defines the core BERT Model, which is a Keras model to predict the outputs of `num_classes` from the inputs with maximum sequence length `max_seq_length`.\n", - "\n", - "This function returns both the encoder and the classifier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cH682__U0FBv" - }, - "outputs": [], - "source": [ - "bert_classifier, bert_encoder = bert.bert_models.classifier_model(\n", - " bert_config, num_labels=2)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XqKp3-5GIZlw" - }, - "source": [ - "The classifier has three inputs and one output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bAQblMIjwkvx" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_classifier, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sFmVG4SKZAw8" - }, - "source": [ - "Run it on a test batch of data 10 examples from the training set. The output is the logits for the two classes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VTjgPbp4ZDKo" - }, - "outputs": [], - "source": [ - "glue_batch = {key: val[:10] for key, val in glue_train.items()}\n", - "\n", - "bert_classifier(\n", - " glue_batch, training=True\n", - ").numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q0NTdwZsQK8n" - }, - "source": [ - "The `TransformerEncoder` in the center of the classifier above **is** the `bert_encoder`.\n", - "\n", - "Inspecting the encoder, we see its stack of `Transformer` layers connected to those same three inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8L__-erBwLIQ" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(bert_encoder, show_shapes=True, dpi=48)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mKAvkQc3heSy" - }, - "source": [ - "### Restore the encoder weights\n", - "\n", - "When built the encoder is randomly initialized. Restore the encoder's weights from the checkpoint:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "97Ll2Gichd_Y" - }, - "outputs": [], - "source": [ - "checkpoint = tf.train.Checkpoint(encoder=bert_encoder)\n", - "checkpoint.read(\n", - " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2oHOql35k3Dd" - }, - "source": [ - "Note: The pretrained `TransformerEncoder` is also available on [TensorFlow Hub](https://tensorflow.org/hub). See the [Hub appendix](#hub_bert) for details. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "115caFLMk-_l" - }, - "source": [ - "### Set up the optimizer\n", - "\n", - "BERT adopts the Adam optimizer with weight decay (aka \"[AdamW](https://arxiv.org/abs/1711.05101)\").\n", - "It also employs a learning rate schedule that firstly warms up from 0 and then decays to 0." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w8qXKRZuCwW4" - }, - "outputs": [], - "source": [ - "# Set up epochs and steps\n", - "epochs = 3\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "train_data_size = len(glue_train_labels)\n", - "steps_per_epoch = int(train_data_size / batch_size)\n", - "num_train_steps = steps_per_epoch * epochs\n", - "warmup_steps = int(epochs * train_data_size * 0.1 / batch_size)\n", - "\n", - "# creates an optimizer with learning rate schedule\n", - "optimizer = nlp.optimization.create_optimizer(\n", - " 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pXRGxiRNEHS2" - }, - "source": [ - "This returns an `AdamWeightDecay` optimizer with the learning rate schedule set:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eQNA16bhDpky" - }, - "outputs": [], - "source": [ - "type(optimizer)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xqu_K71fJQB8" - }, - "source": [ - "To see an example of how to customize the optimizer and it's schedule, see the [Optimizer schedule appendix](#optimizer_schedule)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "78FEUOOEkoP0" - }, - "source": [ - "### Train the model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OTNcA0O0nSq9" - }, - "source": [ - "The metric is accuracy and we use sparse categorical cross-entropy as loss." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nzi8hjeTQTRs" - }, - "outputs": [], - "source": [ - "metrics = [tf.keras.metrics.SparseCategoricalAccuracy('accuracy', dtype=tf.float32)]\n", - "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", - "bert_classifier.compile(\n", - " optimizer=optimizer,\n", - " loss=loss,\n", - " metrics=metrics)\n", - "\n", - "bert_classifier.fit(\n", - " glue_train, glue_train_labels,\n", - " validation_data=(glue_validation, glue_validation_labels),\n", - " batch_size=32,\n", - " epochs=epochs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IFtKFWbNKb0u" - }, - "source": [ - "Now run the fine-tuned model on a custom example to see that it works.\n", - "\n", - "Start by encoding some sentence pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9ZoUgDUNJPz3" - }, - "outputs": [], - "source": [ - "my_examples = bert_encode(\n", - " glue_dict = {\n", - " 'sentence1':[\n", - " 'The rain in Spain falls mainly on the plain.',\n", - " 'Look I fine tuned BERT.'],\n", - " 'sentence2':[\n", - " 'It mostly rains on the flat lands of Spain.',\n", - " 'Is it working? This does not match.']\n", - " },\n", - " tokenizer=tokenizer)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7ynJibkBRTJF" - }, - "source": [ - "The model should report class `1` \"match\" for the first example and class `0` \"no-match\" for the second:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "umo0ttrgRYIM" - }, - "outputs": [], - "source": [ - "result = bert_classifier(my_examples, training=False)\n", - "\n", - "result = tf.argmax(result).numpy()\n", - "result" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "utGl0M3aZCE4" - }, - "outputs": [], - "source": [ - "np.array(info.features['label'].names)[result]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fVo_AnT0l26j" - }, - "source": [ - "### Save the model\n", - "\n", - "Often the goal of training a model is to _use_ it for something, so export the model and then restore it to be sure that it works." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Nl5x6nElZqkP" - }, - "outputs": [], - "source": [ - "export_dir='./saved_model'\n", - "tf.saved_model.save(bert_classifier, export_dir=export_dir)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "y_ACvKPsVUXC" - }, - "outputs": [], - "source": [ - "reloaded = tf.saved_model.load(export_dir)\n", - "reloaded_result = reloaded([my_examples['input_word_ids'],\n", - " my_examples['input_mask'],\n", - " my_examples['input_type_ids']], training=False)\n", - "\n", - "original_result = bert_classifier(my_examples, training=False)\n", - "\n", - "# The results are (nearly) identical:\n", - "print(original_result.numpy())\n", - "print()\n", - "print(reloaded_result.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eQceYqRFT_Eg" - }, - "source": [ - "## Appendix" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SaC1RlFawUpc" - }, - "source": [ - "\u003ca id=re_encoding_tools\u003e\u003c/a\u003e\n", - "### Re-encoding a large dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CwUdjFBkzUgh" - }, - "source": [ - "This tutorial you re-encoded the dataset in memory, for clarity.\n", - "\n", - "This was only possible because `glue/mrpc` is a very small dataset. To deal with larger datasets `tf_models` library includes some tools for processing and re-encoding a dataset for efficient training." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2UTQrkyOT5wD" - }, - "source": [ - "The first step is to describe which features of the dataset should be transformed:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XQeDFOzYR9Z9" - }, - "outputs": [], - "source": [ - "processor = nlp.data.classifier_data_lib.TfdsProcessor(\n", - " tfds_params=\"dataset=glue/mrpc,text_key=sentence1,text_b_key=sentence2\",\n", - " process_text_fn=bert.tokenization.convert_to_unicode)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XrFQbfErUWxa" - }, - "source": [ - "Then apply the transformation to generate new TFRecord files." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ymw7GOHpSHKU" - }, - "outputs": [], - "source": [ - "# Set up output of training and evaluation Tensorflow dataset\n", - "train_data_output_path=\"./mrpc_train.tf_record\"\n", - "eval_data_output_path=\"./mrpc_eval.tf_record\"\n", - "\n", - "max_seq_length = 128\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "# Generate and save training data into a tf record file\n", - "input_meta_data = (\n", - " nlp.data.classifier_data_lib.generate_tf_record_from_data_file(\n", - " processor=processor,\n", - " data_dir=None, # It is `None` because data is from tfds, not local dir.\n", - " tokenizer=tokenizer,\n", - " train_data_output_path=train_data_output_path,\n", - " eval_data_output_path=eval_data_output_path,\n", - " max_seq_length=max_seq_length))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uX_Sp-wTUoRm" - }, - "source": [ - "Finally create `tf.data` input pipelines from those TFRecord files:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rkHxIK57SQ_r" - }, - "outputs": [], - "source": [ - "training_dataset = bert.run_classifier.get_dataset_fn(\n", - " train_data_output_path,\n", - " max_seq_length,\n", - " batch_size,\n", - " is_training=True)()\n", - "\n", - "evaluation_dataset = bert.run_classifier.get_dataset_fn(\n", - " eval_data_output_path,\n", - " max_seq_length,\n", - " eval_batch_size,\n", - " is_training=False)()\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "stbaVouogvzS" - }, - "source": [ - "The resulting `tf.data.Datasets` return `(features, labels)` pairs, as expected by `keras.Model.fit`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gwhrlQl4gxVF" - }, - "outputs": [], - "source": [ - "training_dataset.element_spec" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dbJ76vSJj77j" - }, - "source": [ - "#### Create tf.data.Dataset for training and evaluation\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9J95LFRohiYw" - }, - "source": [ - "If you need to modify the data loading here is some code to get you started:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "gCvaLLAxPuMc" - }, - "outputs": [], - "source": [ - "def create_classifier_dataset(file_path, seq_length, batch_size, is_training):\n", - " \"\"\"Creates input dataset from (tf)records files for train/eval.\"\"\"\n", - " dataset = tf.data.TFRecordDataset(file_path)\n", - " if is_training:\n", - " dataset = dataset.shuffle(100)\n", - " dataset = dataset.repeat()\n", - "\n", - " def decode_record(record):\n", - " name_to_features = {\n", - " 'input_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'input_mask': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'segment_ids': tf.io.FixedLenFeature([seq_length], tf.int64),\n", - " 'label_ids': tf.io.FixedLenFeature([], tf.int64),\n", - " }\n", - " return tf.io.parse_single_example(record, name_to_features)\n", - "\n", - " def _select_data_from_record(record):\n", - " x = {\n", - " 'input_word_ids': record['input_ids'],\n", - " 'input_mask': record['input_mask'],\n", - " 'input_type_ids': record['segment_ids']\n", - " }\n", - " y = record['label_ids']\n", - " return (x, y)\n", - "\n", - " dataset = dataset.map(decode_record,\n", - " num_parallel_calls=tf.data.AUTOTUNE)\n", - " dataset = dataset.map(\n", - " _select_data_from_record,\n", - " num_parallel_calls=tf.data.AUTOTUNE)\n", - " dataset = dataset.batch(batch_size, drop_remainder=is_training)\n", - " dataset = dataset.prefetch(tf.data.AUTOTUNE)\n", - " return dataset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rutkBadrhzdR" - }, - "outputs": [], - "source": [ - "# Set up batch sizes\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "# Return Tensorflow dataset\n", - "training_dataset = create_classifier_dataset(\n", - " train_data_output_path,\n", - " input_meta_data['max_seq_length'],\n", - " batch_size,\n", - " is_training=True)\n", - "\n", - "evaluation_dataset = create_classifier_dataset(\n", - " eval_data_output_path,\n", - " input_meta_data['max_seq_length'],\n", - " eval_batch_size,\n", - " is_training=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "59TVgt4Z7fuU" - }, - "outputs": [], - "source": [ - "training_dataset.element_spec" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QbklKt-w_CiI" - }, - "source": [ - "\u003ca id=\"hub_bert\"\u003e\u003c/a\u003e\n", - "\n", - "### TFModels BERT on TFHub\n", - "\n", - "You can get [the BERT model](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2) off the shelf from [TFHub](https://tensorflow.org/hub). It would not be hard to add a classification head on top of this `hub.KerasLayer`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GDWrHm0BGpbX" - }, - "outputs": [], - "source": [ - "# Note: 350MB download.\n", - "import tensorflow_hub as hub" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "Y29meH0qGq_5" - }, - "outputs": [], - "source": [ - "hub_model_name = \"bert_en_uncased_L-12_H-768_A-12\" #@param [\"bert_en_uncased_L-24_H-1024_A-16\", \"bert_en_wwm_cased_L-24_H-1024_A-16\", \"bert_en_uncased_L-12_H-768_A-12\", \"bert_en_wwm_uncased_L-24_H-1024_A-16\", \"bert_en_cased_L-24_H-1024_A-16\", \"bert_en_cased_L-12_H-768_A-12\", \"bert_zh_L-12_H-768_A-12\", \"bert_multi_cased_L-12_H-768_A-12\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lo6479At4sP1" - }, - "outputs": [], - "source": [ - "hub_encoder = hub.KerasLayer(f\"https://tfhub.dev/tensorflow/{hub_model_name}/3\",\n", - " trainable=True)\n", - "\n", - "print(f\"The Hub encoder has {len(hub_encoder.trainable_variables)} trainable variables\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "iTzF574wivQv" - }, - "source": [ - "Test run it on a batch of data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XEcYrCR45Uwo" - }, - "outputs": [], - "source": [ - "result = hub_encoder(\n", - " inputs=dict(\n", - " input_word_ids=glue_train['input_word_ids'][:10],\n", - " input_mask=glue_train['input_mask'][:10],\n", - " input_type_ids=glue_train['input_type_ids'][:10],),\n", - " training=False,\n", - ")\n", - "\n", - "print(\"Pooled output shape:\", result['pooled_output'].shape)\n", - "print(\"Sequence output shape:\", result['sequence_output'].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cjojn8SmLSRI" - }, - "source": [ - "At this point it would be simple to add a classification head yourself.\n", - "\n", - "The `bert_models.classifier_model` function can also build a classifier onto the encoder from TensorFlow Hub:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9nTDaApyLR70" - }, - "outputs": [], - "source": [ - "hub_classifier = nlp.modeling.models.BertClassifier(\n", - " bert_encoder,\n", - " num_classes=2,\n", - " dropout_rate=0.1,\n", - " initializer=tf.keras.initializers.TruncatedNormal(\n", - " stddev=0.02))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xMJX3wV0_v7I" - }, - "source": [ - "The one downside to loading this model from TFHub is that the structure of internal keras layers is not restored. So it's more difficult to inspect or modify the model. The `BertEncoder` model is now a single layer:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pD71dnvhM2QS" - }, - "outputs": [], - "source": [ - "tf.keras.utils.plot_model(hub_classifier, show_shapes=True, dpi=64)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nLZD-isBzNKi" - }, - "outputs": [], - "source": [ - "try:\n", - " tf.keras.utils.plot_model(hub_encoder, show_shapes=True, dpi=64)\n", - " assert False\n", - "except Exception as e:\n", - " print(f\"{type(e).__name__}: {e}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZxSqH0dNAgXV" - }, - "source": [ - "\u003ca id=\"model_builder_functions\"\u003e\u003c/a\u003e\n", - "\n", - "### Low level model building\n", - "\n", - "If you need a more control over the construction of the model it's worth noting that the `classifier_model` function used earlier is really just a thin wrapper over the `nlp.modeling.networks.BertEncoder` and `nlp.modeling.models.BertClassifier` classes. Just remember that if you start modifying the architecture it may not be correct or possible to reload the pre-trained checkpoint so you'll need to retrain from scratch." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0cgABEwDj06P" - }, - "source": [ - "Build the encoder:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5r_yqhBFSVEM" - }, - "outputs": [], - "source": [ - "bert_encoder_config = config_dict.copy()\n", - "\n", - "# You need to rename a few fields to make this work:\n", - "bert_encoder_config['attention_dropout_rate'] = bert_encoder_config.pop('attention_probs_dropout_prob')\n", - "bert_encoder_config['activation'] = tf_utils.get_activation(bert_encoder_config.pop('hidden_act'))\n", - "bert_encoder_config['dropout_rate'] = bert_encoder_config.pop('hidden_dropout_prob')\n", - "bert_encoder_config['initializer'] = tf.keras.initializers.TruncatedNormal(\n", - " stddev=bert_encoder_config.pop('initializer_range'))\n", - "bert_encoder_config['max_sequence_length'] = bert_encoder_config.pop('max_position_embeddings')\n", - "bert_encoder_config['num_layers'] = bert_encoder_config.pop('num_hidden_layers')\n", - "\n", - "bert_encoder_config" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rIO8MI7LLijh" - }, - "outputs": [], - "source": [ - "manual_encoder = nlp.modeling.networks.BertEncoder(**bert_encoder_config)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4a4tFSg9krRi" - }, - "source": [ - "Restore the weights:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "X6N9NEqfXJCx" - }, - "outputs": [], - "source": [ - "checkpoint = tf.train.Checkpoint(encoder=manual_encoder)\n", - "checkpoint.read(\n", - " os.path.join(gs_folder_bert, 'bert_model.ckpt')).assert_consumed()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1BPiPO4ykuwM" - }, - "source": [ - "Test run it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hlVdgJKmj389" - }, - "outputs": [], - "source": [ - "result = manual_encoder(my_examples, training=True)\n", - "\n", - "print(\"Sequence output shape:\", result[0].shape)\n", - "print(\"Pooled output shape:\", result[1].shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nJMXvVgJkyBv" - }, - "source": [ - "Wrap it in a classifier:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tQX57GJ6wkAb" - }, - "outputs": [], - "source": [ - "manual_classifier = nlp.modeling.models.BertClassifier(\n", - " bert_encoder,\n", - " num_classes=2,\n", - " dropout_rate=bert_encoder_config['dropout_rate'],\n", - " initializer=bert_encoder_config['initializer'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kB-nBWhQk0dS" - }, - "outputs": [], - "source": [ - "manual_classifier(my_examples, training=True).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E6AJlOSyIO1L" - }, - "source": [ - "\u003ca id=\"optimizer_schedule\"\u003e\u003c/a\u003e\n", - "\n", - "### Optimizers and schedules\n", - "\n", - "The optimizer used to train the model was created using the `nlp.optimization.create_optimizer` function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "28Dv3BPRlFTD" - }, - "outputs": [], - "source": [ - "optimizer = nlp.optimization.create_optimizer(\n", - " 2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LRjcHr0UlT8c" - }, - "source": [ - "That high level wrapper sets up the learning rate schedules and the optimizer.\n", - "\n", - "The base learning rate schedule used here is a linear decay to zero over the training run:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MHY8K6kDngQn" - }, - "outputs": [], - "source": [ - "epochs = 3\n", - "batch_size = 32\n", - "eval_batch_size = 32\n", - "\n", - "train_data_size = len(glue_train_labels)\n", - "steps_per_epoch = int(train_data_size / batch_size)\n", - "num_train_steps = steps_per_epoch * epochs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wKIcSprulu3P" - }, - "outputs": [], - "source": [ - "decay_schedule = tf.keras.optimizers.schedules.PolynomialDecay(\n", - " initial_learning_rate=2e-5,\n", - " decay_steps=num_train_steps,\n", - " end_learning_rate=0)\n", - "\n", - "plt.plot([decay_schedule(n) for n in range(num_train_steps)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IMTC_gfAl_PZ" - }, - "source": [ - "This, in turn is wrapped in a `WarmUp` schedule that linearly increases the learning rate to the target value over the first 10% of training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YRt3VTmBmCBY" - }, - "outputs": [], - "source": [ - "warmup_steps = num_train_steps * 0.1\n", - "\n", - "warmup_schedule = nlp.optimization.WarmUp(\n", - " initial_learning_rate=2e-5,\n", - " decay_schedule_fn=decay_schedule,\n", - " warmup_steps=warmup_steps)\n", - "\n", - "# The warmup overshoots, because it warms up to the `initial_learning_rate`\n", - "# following the original implementation. You can set\n", - "# `initial_learning_rate=decay_schedule(warmup_steps)` if you don't like the\n", - "# overshoot.\n", - "plt.plot([warmup_schedule(n) for n in range(num_train_steps)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l8D9Lv3Bn740" - }, - "source": [ - "Then create the `nlp.optimization.AdamWeightDecay` using that schedule, configured for the BERT model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2Hf2rpRXk89N" - }, - "outputs": [], - "source": [ - "optimizer = nlp.optimization.AdamWeightDecay(\n", - " learning_rate=warmup_schedule,\n", - " weight_decay_rate=0.01,\n", - " epsilon=1e-6,\n", - " exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias'])" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "fine_tune_bert.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_1.jpg b/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_1.jpg deleted file mode 100644 index 33ec60b..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_1.jpg +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_2.jpg b/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_2.jpg deleted file mode 100644 index 5dcbea3..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_2.jpg +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_3.jpg b/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_3.jpg deleted file mode 100644 index 972a0a4..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_3.jpg +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_4.jpg b/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_4.jpg deleted file mode 100644 index c45b6a5..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/attention_equation_4.jpg +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/bidirectional.png b/third_party/tensorflow-text/src/docs/tutorials/images/bidirectional.png deleted file mode 100644 index 5e52a8a..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/bidirectional.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/layered_bidirectional.png b/third_party/tensorflow-text/src/docs/tutorials/images/layered_bidirectional.png deleted file mode 100644 index 22126ee..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/layered_bidirectional.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_sampling.png b/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_sampling.png deleted file mode 100644 index 817cb89..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_sampling.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_training.png b/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_training.png deleted file mode 100644 index 575d3d4..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/images/text_generation_training.png +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/docs/tutorials/nmt_with_attention.ipynb b/third_party/tensorflow-text/src/docs/tutorials/nmt_with_attention.ipynb deleted file mode 100644 index 046b6fc2..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/nmt_with_attention.ipynb +++ /dev/null
@@ -1,2893 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "s_qNSzzyaCbD" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "jmjh290raIky" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J0Qjg6vuaHNt" - }, - "source": [ - "# Neural machine translation with attention" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AOpGoE2T-YXS" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/nmt_with_attention\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003e\n", - " View on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/nmt_with_attention.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e\n", - " Run in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/nmt_with_attention.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e\n", - " View source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/nmt_with_attention.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "CiwtNgENbx2g" - }, - "source": [ - "This notebook trains a sequence to sequence (seq2seq) model for Spanish to English translation based on [Effective Approaches to Attention-based Neural Machine Translation](https://arxiv.org/abs/1508.04025v5). This is an advanced example that assumes some knowledge of:\n", - "\n", - "* Sequence to sequence models\n", - "* TensorFlow fundamentals below the keras layer: \n", - " * Working with tensors directly \n", - " * Writing custom `keras.Model`s and `keras.layers`\n", - "\n", - "While this architecture is somewhat outdated it is still a very useful project to work through to get a deeper understanding of attention mechanisms (before going on to [Transformers](transformer.ipynb)).\n", - "\n", - "After training the model in this notebook, you will be able to input a Spanish sentence, such as \"*¿todavia estan en casa?*\", and return the English translation: \"*are you still at home?*\"\n", - "\n", - "The resulting model is exportable as a `tf.saved_model`, so it can be used in other TensorFlow environments.\n", - "\n", - "The translation quality is reasonable for a toy example, but the generated attention plot is perhaps more interesting. This shows which parts of the input sentence has the model's attention while translating:\n", - "\n", - "\u003cimg src=\"https://tensorflow.org/images/spanish-english.png\" alt=\"spanish-english attention plot\"\u003e\n", - "\n", - "Note: This example takes approximately 10 minutes to run on a single P100 GPU." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yAmSR1FaqKrl" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DGFTkuRvzWqc" - }, - "outputs": [], - "source": [ - "!pip install tensorflow_text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tnxXKDjq3jEL" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "import typing\n", - "from typing import Any, Tuple\n", - "\n", - "import tensorflow as tf\n", - "from tensorflow.keras.layers.experimental import preprocessing\n", - "\n", - "import tensorflow_text as tf_text\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import matplotlib.ticker as ticker" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Vs8zge-RUdC2" - }, - "source": [ - "This tutorial builds a few layers from scratch, use this variable if you want to switch between the custom and builtin implementations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KPJ9J7iPUchc" - }, - "outputs": [], - "source": [ - "use_builtins = True" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l_yq8kvIqoqQ" - }, - "source": [ - "This tutorial uses a lot of low level API's where it's easy to get shapes wrong. This class is used to check shapes throughout the tutorial. \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KqFqKi4fqN9X" - }, - "outputs": [], - "source": [ - "#@title Shape checker\n", - "class ShapeChecker():\n", - " def __init__(self):\n", - " # Keep a cache of every axis-name seen\n", - " self.shapes = {}\n", - "\n", - " def __call__(self, tensor, names, broadcast=False):\n", - " if not tf.executing_eagerly():\n", - " return\n", - "\n", - " if isinstance(names, str):\n", - " names = (names,)\n", - "\n", - " shape = tf.shape(tensor)\n", - " rank = tf.rank(tensor)\n", - "\n", - " if rank != len(names):\n", - " raise ValueError(f'Rank mismatch:\\n'\n", - " f' found {rank}: {shape.numpy()}\\n'\n", - " f' expected {len(names)}: {names}\\n')\n", - "\n", - " for i, name in enumerate(names):\n", - " if isinstance(name, int):\n", - " old_dim = name\n", - " else:\n", - " old_dim = self.shapes.get(name, None)\n", - " new_dim = shape[i]\n", - "\n", - " if (broadcast and new_dim == 1):\n", - " continue\n", - "\n", - " if old_dim is None:\n", - " # If the axis name is new, add its length to the cache.\n", - " self.shapes[name] = new_dim\n", - " continue\n", - "\n", - " if new_dim != old_dim:\n", - " raise ValueError(f\"Shape mismatch for dimension: '{name}'\\n\"\n", - " f\" found: {new_dim}\\n\"\n", - " f\" expected: {old_dim}\\n\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gjUROhJfH3ML" - }, - "source": [ - "## The data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "puE_K74DIE9W" - }, - "source": [ - "We'll use a language dataset provided by http://www.manythings.org/anki/. This dataset contains language translation pairs in the format:\n", - "\n", - "```\n", - "May I borrow this book?\t¿Puedo tomar prestado este libro?\n", - "```\n", - "\n", - "They have a variety of languages available, but we'll use the English-Spanish dataset." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wfodePkj3jEa" - }, - "source": [ - "### Download and prepare the dataset\n", - "\n", - "For convenience, we've hosted a copy of this dataset on Google Cloud, but you can also download your own copy. After downloading the dataset, here are the steps we'll take to prepare the data:\n", - "\n", - "1. Add a *start* and *end* token to each sentence.\n", - "2. Clean the sentences by removing special characters.\n", - "3. Create a word index and reverse word index (dictionaries mapping from word → id and id → word).\n", - "4. Pad each sentence to a maximum length." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kRVATYOgJs1b" - }, - "outputs": [], - "source": [ - "# Download the file\n", - "import pathlib\n", - "\n", - "path_to_zip = tf.keras.utils.get_file(\n", - " 'spa-eng.zip', origin='http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip',\n", - " extract=True)\n", - "\n", - "path_to_file = pathlib.Path(path_to_zip).parent/'spa-eng/spa.txt'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OHn4Dct23jEm" - }, - "outputs": [], - "source": [ - "def load_data(path):\n", - " text = path.read_text(encoding='utf-8')\n", - "\n", - " lines = text.splitlines()\n", - " pairs = [line.split('\\t') for line in lines]\n", - "\n", - " inp = [inp for targ, inp in pairs]\n", - " targ = [targ for targ, inp in pairs]\n", - "\n", - " return targ, inp" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cTbSbBz55QtF" - }, - "outputs": [], - "source": [ - "targ, inp = load_data(path_to_file)\n", - "print(inp[-1])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lH_dPY8TRp3c" - }, - "outputs": [], - "source": [ - "print(targ[-1])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rgCLkfv5uO3d" - }, - "source": [ - "### Create a tf.data dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "PfVWx3WaI5Df" - }, - "source": [ - "From these arrays of strings you can create a `tf.data.Dataset` of strings that shuffles and batches them efficiently:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "TqHsArVZ3jFS" - }, - "outputs": [], - "source": [ - "BUFFER_SIZE = len(inp)\n", - "BATCH_SIZE = 64\n", - "\n", - "dataset = tf.data.Dataset.from_tensor_slices((inp, targ)).shuffle(BUFFER_SIZE)\n", - "dataset = dataset.batch(BATCH_SIZE)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qc6-NK1GtWQt" - }, - "outputs": [], - "source": [ - "for example_input_batch, example_target_batch in dataset.take(1):\n", - " print(example_input_batch[:5])\n", - " print()\n", - " print(example_target_batch[:5])\n", - " break" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zCoxLcuN3bwv" - }, - "source": [ - "### Text preprocessing" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7kwdPcHvzz_a" - }, - "source": [ - "One of the goals of this tutorial is to build a model that can be exported as a `tf.saved_model`. To make that exported model useful it should take `tf.string` inputs, and retrun `tf.string` outputs: All the text processing happens inside the model. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EOQ5n55X4uDB" - }, - "source": [ - "#### Standardization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "upKhKAMK4zzI" - }, - "source": [ - "The model is dealing with multilingual text with a limited vocabulary. So it will be important to standardize the input text.\n", - "\n", - "The first step is Unicode normalization to split accented characters and replace compatibility characters with their ASCII equivalents.\n", - "\n", - "The `tensroflow_text` package contains a unicode normalize operation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mD0e-DWGQ2Vo" - }, - "outputs": [], - "source": [ - "example_text = tf.constant('¿Todavía está en casa?')\n", - "\n", - "print(example_text.numpy())\n", - "print(tf_text.normalize_utf8(example_text, 'NFKD').numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6hTllEjK6RSo" - }, - "source": [ - "Unicode normalization will be the first step in the text standardization function:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "chTF5N885F0P" - }, - "outputs": [], - "source": [ - "def tf_lower_and_split_punct(text):\n", - " # Split accecented characters.\n", - " text = tf_text.normalize_utf8(text, 'NFKD')\n", - " text = tf.strings.lower(text)\n", - " # Keep space, a to z, and select punctuation.\n", - " text = tf.strings.regex_replace(text, '[^ a-z.?!,¿]', '')\n", - " # Add spaces around punctuation.\n", - " text = tf.strings.regex_replace(text, '[.?!,¿]', r' \\0 ')\n", - " # Strip whitespace.\n", - " text = tf.strings.strip(text)\n", - "\n", - " text = tf.strings.join(['[START]', text, '[END]'], separator=' ')\n", - " return text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UREvDg3sEKYa" - }, - "outputs": [], - "source": [ - "print(example_text.numpy().decode())\n", - "print(tf_lower_and_split_punct(example_text).numpy().decode())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4q-sKsSI7xRZ" - }, - "source": [ - "#### Text Vectorization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6aKn8qd37abi" - }, - "source": [ - "This standardization function will be wrapped up in a `preprocessing.TextVectorization` layer which will handle the vocabulary extraction and conversion of input text to sequences of tokens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "eAY9k49G3jE_" - }, - "outputs": [], - "source": [ - "max_vocab_size = 5000\n", - "\n", - "input_text_processor = preprocessing.TextVectorization(\n", - " standardize=tf_lower_and_split_punct,\n", - " max_tokens=max_vocab_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7kbC6ODP8IK_" - }, - "source": [ - "The `TextVectorization` layer and many other `experimental.preprocessing` layers have an `adapt` method. This method reads one epoch of the training data, and works a lot like `Model.fix`. This `adapt` method initializes the layer based on the data. Here it determines the vocabulary: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bmsI1Yql8FYe" - }, - "outputs": [], - "source": [ - "input_text_processor.adapt(inp)\n", - "\n", - "# Here are the first 10 words from the vocabulary:\n", - "input_text_processor.get_vocabulary()[:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9kGjIFjX8_Wp" - }, - "source": [ - "That's the Spanish `TextVectorization` layer, now build and `.adapt()` the English one:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jlC4xuZnKLBS" - }, - "outputs": [], - "source": [ - "output_text_processor = preprocessing.TextVectorization(\n", - " standardize=tf_lower_and_split_punct,\n", - " max_tokens=max_vocab_size)\n", - "\n", - "output_text_processor.adapt(targ)\n", - "output_text_processor.get_vocabulary()[:10]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BWQqlP_s9eIv" - }, - "source": [ - "Now these layers can convert a batch of strings into a batch of token IDs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9KZxj8IrNZ9S" - }, - "outputs": [], - "source": [ - "example_tokens = input_text_processor(example_input_batch)\n", - "example_tokens[:3, :10]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AA9rUn9G9n78" - }, - "source": [ - "The `get_vocabulary` method can be used to convert token IDs back to text:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "98g9rcxGQY0I" - }, - "outputs": [], - "source": [ - "input_vocab = np.array(input_text_processor.get_vocabulary())\n", - "tokens = input_vocab[example_tokens[0].numpy()]\n", - "' '.join(tokens)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Ot0aCL9t-Ghi" - }, - "source": [ - "The returned token IDs are zero-padded. This can easily be turned into a mask:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_jx4Or_eFRSz" - }, - "outputs": [], - "source": [ - "plt.subplot(1, 2, 1)\n", - "plt.pcolormesh(example_tokens)\n", - "plt.title('Token IDs')\n", - "\n", - "plt.subplot(1, 2, 2)\n", - "plt.pcolormesh(example_tokens != 0)\n", - "plt.title('Mask')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TNfHIF71ulLu" - }, - "source": [ - "## The encoder/decoder model\n", - "\n", - "The following diagram shows an overview of the model. At each time-step the decoder's output is combined with a weighted sum over the encoded input, to predict the next word. The diagram and formulas are from [Luong's paper](https://arxiv.org/abs/1508.04025v5).\n", - "\n", - "\u003cimg src=\"https://www.tensorflow.org/images/seq2seq/attention_mechanism.jpg\" width=\"500\" alt=\"attention mechanism\"\u003e\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gzQWx2saImMV" - }, - "source": [ - "Before getting into it define a few constants for the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_a9uNz3-IrF-" - }, - "outputs": [], - "source": [ - "embedding_dim = 256\n", - "units = 1024" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "blNgVbLSzpsr" - }, - "source": [ - "### The encoder\n", - "\n", - "Start by building the encoder, the blue part of the diagram above.\n", - "\n", - "The encoder:\n", - "\n", - "1. Takes a list of token IDs (from `input_text_processor`).\n", - "3. Looks up an embedding vector for each token (Using a `layers.Embedding`).\n", - "4. Processes the embeddings into a new sequence (Using a `layers.GRU`).\n", - "5. Returns:\n", - " * The processed sequence. This will be passed to the attention head.\n", - " * The internal state. This will be used to initialize the decoder\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "nZ2rI24i3jFg" - }, - "outputs": [], - "source": [ - "class Encoder(tf.keras.layers.Layer):\n", - " def __init__(self, input_vocab_size, embedding_dim, enc_units):\n", - " super(Encoder, self).__init__()\n", - " self.enc_units = enc_units\n", - " self.input_vocab_size = input_vocab_size\n", - "\n", - " # The embedding layer converts tokens to vectors\n", - " self.embedding = tf.keras.layers.Embedding(self.input_vocab_size,\n", - " embedding_dim)\n", - "\n", - " # The GRU RNN layer processes those vectors sequentially.\n", - " self.gru = tf.keras.layers.GRU(self.enc_units,\n", - " # Return the sequence and state\n", - " return_sequences=True,\n", - " return_state=True,\n", - " recurrent_initializer='glorot_uniform')\n", - "\n", - " def call(self, tokens, state=None):\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(tokens, ('batch', 's'))\n", - "\n", - " # 2. The embedding layer looks up the embedding for each token.\n", - " vectors = self.embedding(tokens)\n", - " shape_checker(vectors, ('batch', 's', 'embed_dim'))\n", - "\n", - " # 3. The GRU processes the embedding sequence.\n", - " # output shape: (batch, s, enc_units)\n", - " # state shape: (batch, enc_units)\n", - " output, state = self.gru(vectors, initial_state=state)\n", - " shape_checker(output, ('batch', 's', 'enc_units'))\n", - " shape_checker(state, ('batch', 'enc_units'))\n", - "\n", - " # 4. Returns the new sequence and its state.\n", - " return output, state" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "D3SKkaQeGn-Q" - }, - "source": [ - "Here is how it fits together so far:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "60gSVh05Jl6l" - }, - "outputs": [], - "source": [ - "# Convert the input text to tokens.\n", - "example_tokens = input_text_processor(example_input_batch)\n", - "\n", - "# Encode the input sequence.\n", - "encoder = Encoder(input_text_processor.vocabulary_size(),\n", - " embedding_dim, units)\n", - "example_enc_output, example_enc_state = encoder(example_tokens)\n", - "\n", - "print(f'Input batch, shape (batch): {example_input_batch.shape}')\n", - "print(f'Input batch tokens, shape (batch, s): {example_tokens.shape}')\n", - "print(f'Encoder output, shape (batch, s, units): {example_enc_output.shape}')\n", - "print(f'Encoder state, shape (batch, units): {example_enc_state.shape}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2RIPHh4O9ixB" - }, - "source": [ - "The encoder returns its internal state so that its state can be used to initialize the decoder.\n", - "\n", - "It's also common for an RNN to return its state so that it can process a sequence over multiple calls. You'll see more of that building the decoder." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "45xM_Gl1MgXY" - }, - "source": [ - "### The attention head\n", - "\n", - "The decoder uses attention to selectively focus on parts of the input sequence.\n", - "The attention takes a sequence of vectors as input for each example and returns an \"attention\" vector for each example. This attention layer is similar to a `layers.GlobalAveragePoling1D` but the attention layer performs a _weighted_ average.\n", - "\n", - "Let's look at how this works:\n", - "\n", - "\u003cimg src=\"images/attention_equation_1.jpg\" alt=\"attention equation 1\" width=\"800\"\u003e\n", - "\n", - "\u003cimg src=\"images/attention_equation_2.jpg\" alt=\"attention equation 2\" width=\"800\"\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NX2JsKzzzgZ5" - }, - "source": [ - "Where:\n", - "\n", - "* $s$ is the encoder index.\n", - "* $t$ is the decoder index.\n", - "* $\\alpha_{ts}$ is the attention weights.\n", - "* $h_s$ is the sequence of encoder outputs being attended to (the attention \"key\" and \"value\" in transformer terminology).\n", - "* $h_t$ is the the decoder state attending to the sequence (the attention \"query\" in transformer terminology).\n", - "* $c_t$ is the resulting context vector.\n", - "* $a_t$ is the final output combining the \"context\" and \"query\".\n", - "\n", - "The equations:\n", - "\n", - "1. Calculates the attention weights, $\\alpha_{ts}$, as a softmax across the encoder's output sequence.\n", - "2. Calculates the context vector as the weighted sum of the encoder outputs.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fNA5GeHHPsGL" - }, - "source": [ - "Last is the $score$ function. Its job is to calculate a scalar logit-score for each key-query pair. There are two common approaches:\n", - "\n", - "\u003cimg src=\"images/attention_equation_4.jpg\" alt=\"attention equation 4\" width=\"800\"\u003e\n", - "\n", - "This tutorial uses [Bahdanau's additive attention](https://arxiv.org/pdf/1409.0473.pdf). TensorFlow includes implementations of both as `layers.Attention` and\n", - "`layers.AdditiveAttention`. The class below handles the weight matrices in a pair of `layers.Dense` layers, and calls the builtin implementation." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "momiE59lXo6U" - }, - "outputs": [], - "source": [ - "class BahdanauAttention(tf.keras.layers.Layer):\n", - " def __init__(self, units):\n", - " super().__init__()\n", - " # For Eqn. (4), the Bahdanau attention\n", - " self.W1 = tf.keras.layers.Dense(units, use_bias=False)\n", - " self.W2 = tf.keras.layers.Dense(units, use_bias=False)\n", - "\n", - " self.attention = tf.keras.layers.AdditiveAttention()\n", - "\n", - " def call(self, query, value, mask):\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(query, ('batch', 't', 'query_units'))\n", - " shape_checker(value, ('batch', 's', 'value_units'))\n", - " shape_checker(mask, ('batch', 's'))\n", - "\n", - " # From Eqn. (4), `W1@ht`.\n", - " w1_query = self.W1(query)\n", - " shape_checker(w1_query, ('batch', 't', 'attn_units'))\n", - "\n", - " # From Eqn. (4), `W2@hs`.\n", - " w2_key = self.W2(value)\n", - " shape_checker(w2_key, ('batch', 's', 'attn_units'))\n", - "\n", - " query_mask = tf.ones(tf.shape(query)[:-1], dtype=bool)\n", - " value_mask = mask\n", - "\n", - " context_vector, attention_weights = self.attention(\n", - " inputs = [w1_query, value, w2_key],\n", - " mask=[query_mask, value_mask],\n", - " return_attention_scores = True,\n", - " )\n", - " shape_checker(context_vector, ('batch', 't', 'value_units'))\n", - " shape_checker(attention_weights, ('batch', 't', 's'))\n", - "\n", - " return context_vector, attention_weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Cf13LubPGjDO" - }, - "source": [ - "### Test the Attention layer\n", - "\n", - "Create a `BahdanauAttention` layer:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "t4QMlOp8Gidh" - }, - "outputs": [], - "source": [ - "attention_layer = BahdanauAttention(units)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "snA1uL9AI-JE" - }, - "source": [ - "This layer takes 3 inputs:\n", - "\n", - "* The `query`: This will be generated by the decoder, later.\n", - "* The `value`: This Will be the output of the encoder.\n", - "* The `mask`: To exclude the padding, `example_tokens != 0`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DYSHqmORgVFo" - }, - "outputs": [], - "source": [ - "(example_tokens != 0).shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "g2bmvT25pXnr" - }, - "source": [ - "The vectorized implementation of the attention layer lets you pass a batch of sequences of query vectors and a batch of sequence of value vectors. The result is:\n", - "\n", - "1. A batch of sequences of result vectors the size of the queries.\n", - "2. A batch attention maps, with size `(query_length, value_length)`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7y7hjPkNMmHh" - }, - "outputs": [], - "source": [ - "# Later, the decoder will generate this attention query\n", - "example_attention_query = tf.random.normal(shape=[len(example_tokens), 2, 10])\n", - "\n", - "# Attend to the encoded tokens\n", - "\n", - "context_vector, attention_weights = attention_layer(\n", - " query=example_attention_query,\n", - " value=example_enc_output,\n", - " mask=(example_tokens != 0))\n", - "\n", - "print(f'Attention result shape: (batch_size, query_seq_length, units): {context_vector.shape}')\n", - "print(f'Attention weights shape: (batch_size, query_seq_length, value_seq_length): {attention_weights.shape}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AagyXMH-Jhqt" - }, - "source": [ - "The attention weights should sum to `1.0` for each sequence.\n", - "\n", - "Here are the attention weights across the sequences at `t=0`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Rqr8XGsAJlf6" - }, - "outputs": [], - "source": [ - "plt.subplot(1, 2, 1)\n", - "plt.pcolormesh(attention_weights[:, 0, :])\n", - "plt.title('Attention weights')\n", - "\n", - "plt.subplot(1, 2, 2)\n", - "plt.pcolormesh(example_tokens != 0)\n", - "plt.title('Mask')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6Eil-C_NN1rp" - }, - "source": [ - "Because of the small-random initialization the attention weights are all close to `1/(sequence_length)`. If you zoom in on the weights for a single sequence, you can see that there is some _small_ variation that the model can learn to expand, and exploit." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZuzrCdmYlTcJ" - }, - "outputs": [], - "source": [ - "attention_weights.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qIMwC-f-ZC8N" - }, - "outputs": [], - "source": [ - "attention_slice = attention_weights[0, 0].numpy()\n", - "attention_slice = attention_slice[attention_slice != 0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ysWDPO6hOS8X" - }, - "outputs": [], - "source": [ - "#@title\n", - "plt.suptitle('Attention weights for one sequence')\n", - "\n", - "plt.figure(figsize=(12, 6))\n", - "a1 = plt.subplot(1, 2, 1)\n", - "plt.bar(range(len(attention_slice)), attention_slice)\n", - "# freeze the xlim\n", - "plt.xlim(plt.xlim())\n", - "plt.xlabel('Attention weights')\n", - "\n", - "a2 = plt.subplot(1, 2, 2)\n", - "plt.bar(range(len(attention_slice)), attention_slice)\n", - "plt.xlabel('Attention weights, zoomed')\n", - "\n", - "# zoom in\n", - "top = max(a1.get_ylim())\n", - "zoom = 0.85*top\n", - "a2.set_ylim([0.90*top, top])\n", - "a1.plot(a1.get_xlim(), [zoom, zoom], color='k')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aQ638eHN4iCK" - }, - "source": [ - "### The decoder\n", - "\n", - "The decoder's job is to generate predictions for the next output token.\n", - "\n", - "1. The decoder receives the complete encoder output.\n", - "2. It uses an RNN to keep track of what it has generated so far.\n", - "3. It uses its RNN output as the query to the attention over the encoder's output, producing the context vector.\n", - "4. It combines the RNN output and the context vector using Equation 3 (below) to generate the \"attention vector\".\n", - "5. It generates logit predictions for the next token based on the \"attention vector\".\n", - "\n", - "\u003cimg src=\"images/attention_equation_3.jpg\" alt=\"attention equation 3\" width=\"800\"\u003e\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pZsQJMqNmg_L" - }, - "source": [ - "Here is the `Decoder` class and its initializer. The initializer creates all the necessary layers." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "erYvHIgAl8kh" - }, - "outputs": [], - "source": [ - "class Decoder(tf.keras.layers.Layer):\n", - " def __init__(self, output_vocab_size, embedding_dim, dec_units):\n", - " super(Decoder, self).__init__()\n", - " self.dec_units = dec_units\n", - " self.output_vocab_size = output_vocab_size\n", - " self.embedding_dim = embedding_dim\n", - "\n", - " # For Step 1. The embedding layer convets token IDs to vectors\n", - " self.embedding = tf.keras.layers.Embedding(self.output_vocab_size,\n", - " embedding_dim)\n", - "\n", - " # For Step 2. The RNN keeps track of what's been generated so far.\n", - " self.gru = tf.keras.layers.GRU(self.dec_units,\n", - " return_sequences=True,\n", - " return_state=True,\n", - " recurrent_initializer='glorot_uniform')\n", - "\n", - " # For step 3. The RNN output will be the query for the attention layer.\n", - " self.attention = BahdanauAttention(self.dec_units)\n", - "\n", - " # For step 4. Eqn. (3): converting `ct` to `at`\n", - " self.Wc = tf.keras.layers.Dense(dec_units, activation=tf.math.tanh,\n", - " use_bias=False)\n", - "\n", - " # For step 5. This fully connected layer produces the logits for each\n", - " # output token.\n", - " self.fc = tf.keras.layers.Dense(self.output_vocab_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eUTfYHmfmwKH" - }, - "source": [ - "The `call` method for this layer takes and returns multiple tensors. Organize those into simple container classes:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7WfSIb2sArRT" - }, - "outputs": [], - "source": [ - "class DecoderInput(typing.NamedTuple):\n", - " new_tokens: Any\n", - " enc_output: Any\n", - " mask: Any\n", - "\n", - "class DecoderOutput(typing.NamedTuple):\n", - " logits: Any\n", - " attention_weights: Any" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NChkl2KrnV2y" - }, - "source": [ - "Here is the implementation of the `call` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PJOi5btHAPNK" - }, - "outputs": [], - "source": [ - "def call(self,\n", - " inputs: DecoderInput,\n", - " state=None) -\u003e Tuple[DecoderOutput, tf.Tensor]:\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(inputs.new_tokens, ('batch', 't'))\n", - " shape_checker(inputs.enc_output, ('batch', 's', 'enc_units'))\n", - " shape_checker(inputs.mask, ('batch', 's'))\n", - "\n", - " if state is not None:\n", - " shape_checker(state, ('batch', 'dec_units'))\n", - "\n", - " # Step 1. Lookup the embeddings\n", - " vectors = self.embedding(inputs.new_tokens)\n", - " shape_checker(vectors, ('batch', 't', 'embedding_dim'))\n", - "\n", - " # Step 2. Process one step with the RNN\n", - " rnn_output, state = self.gru(vectors, initial_state=state)\n", - "\n", - " shape_checker(rnn_output, ('batch', 't', 'dec_units'))\n", - " shape_checker(state, ('batch', 'dec_units'))\n", - "\n", - " # Step 3. Use the RNN output as the query for the attention over the\n", - " # encoder output.\n", - " context_vector, attention_weights = self.attention(\n", - " query=rnn_output, value=inputs.enc_output, mask=inputs.mask)\n", - " shape_checker(context_vector, ('batch', 't', 'dec_units'))\n", - " shape_checker(attention_weights, ('batch', 't', 's'))\n", - "\n", - " # Step 4. Eqn. (3): Join the context_vector and rnn_output\n", - " # [ct; ht] shape: (batch t, value_units + query_units)\n", - " context_and_rnn_output = tf.concat([context_vector, rnn_output], axis=-1)\n", - "\n", - " # Step 4. Eqn. (3): `at = tanh(Wc@[ct; ht])`\n", - " attention_vector = self.Wc(context_and_rnn_output)\n", - " shape_checker(attention_vector, ('batch', 't', 'dec_units'))\n", - "\n", - " # Step 5. Generate logit predictions:\n", - " logits = self.fc(attention_vector)\n", - " shape_checker(logits, ('batch', 't', 'output_vocab_size'))\n", - "\n", - " return DecoderOutput(logits, attention_weights), state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ay_mTMPfnb2a" - }, - "outputs": [], - "source": [ - "Decoder.call = call" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "arTOBklcFTiC" - }, - "source": [ - "The **encoder** processes its full input sequence with a single call to its RNN. This implementation of the **decoder** _can_ do that as well for efficient training. But this tutorial will run the decoder in a loop for a few reasons:\n", - "\n", - "* Flexibility: Writing the loop gives you direct control over the training procedure.\n", - "* Clarity: It's possible to do masking tricks and use `layers.RNN`, or `tfa.seq2seq` APIs to pack this all into a single call. But writing it out as a loop may be clearer. \n", - " * Loop free training is demonstrated in the [Text generation](text_generation.ipynb) tutiorial.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E1-mLAcUEXpK" - }, - "source": [ - "Now try using this decoder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4ZUMbYXIEVeA" - }, - "outputs": [], - "source": [ - "decoder = Decoder(output_text_processor.vocabulary_size(),\n", - " embedding_dim, units)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UPnaw583CpnY" - }, - "source": [ - "The decoder takes 4 inputs.\n", - "\n", - "* `new_tokens` - The last token generated. Initialize the decoder with the `\"[START]\"` token.\n", - "* `enc_output` - Generated by the `Encoder`.\n", - "* `mask` - A boolean tensor indicating where `tokens != 0`\n", - "* `state` - The previous `state` output from the decoder (the internal state\n", - " of the decoder's RNN). Pass `None` to zero-initialize it. The original\n", - " paper initializes it from the encoder's final RNN state. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4u6eJBU4GL40" - }, - "outputs": [], - "source": [ - "# Convert the target sequence, and collect the \"[START]\" tokens\n", - "example_output_tokens = output_text_processor(example_target_batch)\n", - "\n", - "start_index = output_text_processor.get_vocabulary().index('[START]')\n", - "first_token = tf.constant([[start_index]] * example_output_tokens.shape[0])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "E5hqvbR5FUCD" - }, - "outputs": [], - "source": [ - "# Run the decoder\n", - "dec_result, dec_state = decoder(\n", - " inputs = DecoderInput(new_tokens=first_token,\n", - " enc_output=example_enc_output,\n", - " mask=(example_tokens != 0)),\n", - " state = example_enc_state\n", - ")\n", - "\n", - "print(f'logits shape: (batch_size, t, output_vocab_size) {dec_result.logits.shape}')\n", - "print(f'state shape: (batch_size, dec_units) {dec_state.shape}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vEZvXZRVPHd6" - }, - "source": [ - "Sample a token according to the logits:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "P5UY8wko3jFp" - }, - "outputs": [], - "source": [ - "sampled_token = tf.random.categorical(dec_result.logits[:, 0, :], num_samples=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-xTpX44VkzrY" - }, - "source": [ - "Decode the token as the first word of the output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lKXTLYu4IV7I" - }, - "outputs": [], - "source": [ - "vocab = np.array(output_text_processor.get_vocabulary())\n", - "first_word = vocab[sampled_token.numpy()]\n", - "first_word[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LUQV6AXoQR7z" - }, - "source": [ - "Now use the decoder to generate a second set of logits.\n", - "\n", - "- Pass the same `enc_output` and `mask`, these haven't changed.\n", - "- Pass the sampled token as `new_tokens`.\n", - "- Pass the `decoder_state` the decoder returned last time, so the RNN continues with a memory of where it left off last time.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pX1VF9XDJTOM" - }, - "outputs": [], - "source": [ - "dec_result, dec_state = decoder(\n", - " DecoderInput(sampled_token,\n", - " example_enc_output,\n", - " mask=(example_tokens != 0)),\n", - " state=dec_state)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "H1rs0XL7Y2aS" - }, - "outputs": [], - "source": [ - "sampled_token = tf.random.categorical(dec_result.logits[:, 0, :], num_samples=1)\n", - "first_word = vocab[sampled_token.numpy()]\n", - "first_word[:5]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "B6xyru86m914" - }, - "source": [ - "## Training\n", - "\n", - "Now that you have all the model components, it's time to start training the model. You'll need:\n", - "\n", - "- A loss function and optimizer to perform the optimization.\n", - "- A training step function defining how to update the model for each input/target batch.\n", - "- A training loop to drive the training and save checkpoints." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_ch_71VbIRfK" - }, - "source": [ - "### Define the loss function" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WmTHr5iV3jFr" - }, - "outputs": [], - "source": [ - "class MaskedLoss(tf.keras.losses.Loss):\n", - " def __init__(self):\n", - " self.name = 'masked_loss'\n", - " self.loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", - " from_logits=True, reduction='none')\n", - "\n", - " def __call__(self, y_true, y_pred):\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(y_true, ('batch', 't'))\n", - " shape_checker(y_pred, ('batch', 't', 'logits'))\n", - "\n", - " # Calculate the loss for each item in the batch.\n", - " loss = self.loss(y_true, y_pred)\n", - " shape_checker(loss, ('batch', 't'))\n", - "\n", - " # Mask off the losses on padding.\n", - " mask = tf.cast(y_true != 0, tf.float32)\n", - " shape_checker(mask, ('batch', 't'))\n", - " loss *= mask\n", - "\n", - " # Return the total.\n", - " return tf.reduce_sum(loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M5AgEBh2S404" - }, - "source": [ - "### Implement the training step" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r_G20Te1XSmJ" - }, - "source": [ - "Start with a model class, the training process will be implemented as the `train_step` method on this model. See [Customizing fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) for details.\n", - "\n", - "Here the `train_step` method is a wrapper around the `_train_step` implementation which will come later. This wrapper includes a switch to turn on and off `tf.function` compilation, to make debugging easier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WWIyuy71TkJT" - }, - "outputs": [], - "source": [ - "class TrainTranslator(tf.keras.Model):\n", - " def __init__(self, embedding_dim, units,\n", - " input_text_processor,\n", - " output_text_processor, \n", - " use_tf_function=True):\n", - " super().__init__()\n", - " # Build the encoder and decoder\n", - " encoder = Encoder(input_text_processor.vocabulary_size(),\n", - " embedding_dim, units)\n", - " decoder = Decoder(output_text_processor.vocabulary_size(),\n", - " embedding_dim, units)\n", - "\n", - " self.encoder = encoder\n", - " self.decoder = decoder\n", - " self.input_text_processor = input_text_processor\n", - " self.output_text_processor = output_text_processor\n", - " self.use_tf_function = use_tf_function\n", - " self.shape_checker = ShapeChecker()\n", - "\n", - " def train_step(self, inputs):\n", - " self.shape_checker = ShapeChecker()\n", - " if self.use_tf_function:\n", - " return self._tf_train_step(inputs)\n", - " else:\n", - " return self._train_step(inputs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-i0i1x6jwsLm" - }, - "source": [ - "Overall the implementation for the `Model.train_step` method is as follows:\n", - "\n", - "1. Receive a batch of `input_text, target_text` from the `tf.data.Dataset`.\n", - "2. Convert those raw text inputs to token-embeddings and masks. \n", - "3. Run the encoder on the `input_tokens` to get the `encoder_output` and `encoder_state`.\n", - "4. Initialize the decoder state and loss. \n", - "5. Loop over the `target_tokens`:\n", - " 1. Run the decoder one step at a time.\n", - " 2. Calculate the loss for each step.\n", - " 3. Accumulate the average loss.\n", - "6. Calculate the gradient of the loss and use the optimizer to apply updates to the model's `trainable_variables`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ngBjFw4BU5G7" - }, - "source": [ - "The `_preprocess` method, added below, implements steps #1 and #2: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZlYE68wzXoA8" - }, - "outputs": [], - "source": [ - "def _preprocess(self, input_text, target_text):\n", - " self.shape_checker(input_text, ('batch',))\n", - " self.shape_checker(target_text, ('batch',))\n", - "\n", - " # Convert the text to token IDs\n", - " input_tokens = self.input_text_processor(input_text)\n", - " target_tokens = self.output_text_processor(target_text)\n", - " self.shape_checker(input_tokens, ('batch', 's'))\n", - " self.shape_checker(target_tokens, ('batch', 't'))\n", - "\n", - " # Convert IDs to masks.\n", - " input_mask = input_tokens != 0\n", - " self.shape_checker(input_mask, ('batch', 's'))\n", - "\n", - " target_mask = target_tokens != 0\n", - " self.shape_checker(target_mask, ('batch', 't'))\n", - "\n", - " return input_tokens, input_mask, target_tokens, target_mask" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lHy6hzStrgjQ" - }, - "outputs": [], - "source": [ - "TrainTranslator._preprocess = _preprocess" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d3kvbcArc2y-" - }, - "source": [ - "The `_train_step` method, added below, handles the remaining steps except for actually running the decoder: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Qs_gsISsYPpY" - }, - "outputs": [], - "source": [ - "def _train_step(self, inputs):\n", - " input_text, target_text = inputs \n", - "\n", - " (input_tokens, input_mask,\n", - " target_tokens, target_mask) = self._preprocess(input_text, target_text)\n", - "\n", - " max_target_length = tf.shape(target_tokens)[1]\n", - "\n", - " with tf.GradientTape() as tape:\n", - " # Encode the input\n", - " enc_output, enc_state = self.encoder(input_tokens)\n", - " self.shape_checker(enc_output, ('batch', 's', 'enc_units'))\n", - " self.shape_checker(enc_state, ('batch', 'enc_units'))\n", - "\n", - " # Initialize the decoder's state to the encoder's final state.\n", - " # This only works if the encoder and decoder have the same number of\n", - " # units.\n", - " dec_state = enc_state\n", - " loss = tf.constant(0.0)\n", - "\n", - " for t in tf.range(max_target_length-1):\n", - " # Pass in two tokens from the target sequence:\n", - " # 1. The current input to the decoder.\n", - " # 2. The target for the decoder's next prediction.\n", - " new_tokens = target_tokens[:, t:t+2]\n", - " step_loss, dec_state = self._loop_step(new_tokens, input_mask,\n", - " enc_output, dec_state)\n", - " loss = loss + step_loss\n", - "\n", - " # Average the loss over all non padding tokens.\n", - " average_loss = loss / tf.reduce_sum(tf.cast(target_mask, tf.float32))\n", - "\n", - " # Apply an optimization step\n", - " variables = self.trainable_variables \n", - " gradients = tape.gradient(average_loss, variables)\n", - " self.optimizer.apply_gradients(zip(gradients, variables))\n", - "\n", - " # Return a dict mapping metric names to current value\n", - " return {'batch_loss': average_loss}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KGwWHIxLrjGR" - }, - "outputs": [], - "source": [ - "TrainTranslator._train_step = _train_step" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "F7g40o-mXyt5" - }, - "source": [ - "The `_loop_step` method, added below, executes the decoder and calculates the incremental loss and new decoder state (`dec_state`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9VrzgwztXzYJ" - }, - "outputs": [], - "source": [ - "def _loop_step(self, new_tokens, input_mask, enc_output, dec_state):\n", - " input_token, target_token = new_tokens[:, 0:1], new_tokens[:, 1:2]\n", - "\n", - " # Run the decoder one step.\n", - " decoder_input = DecoderInput(new_tokens=input_token,\n", - " enc_output=enc_output,\n", - " mask=input_mask)\n", - "\n", - " dec_result, dec_state = self.decoder(decoder_input, state=dec_state)\n", - " self.shape_checker(dec_result.logits, ('batch', 't1', 'logits'))\n", - " self.shape_checker(dec_result.attention_weights, ('batch', 't1', 's'))\n", - " self.shape_checker(dec_state, ('batch', 'dec_units'))\n", - "\n", - " # `self.loss` returns the total for non-padded tokens\n", - " y = target_token\n", - " y_pred = dec_result.logits\n", - " step_loss = self.loss(y, y_pred)\n", - "\n", - " return step_loss, dec_state" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xj3I7VULrk1R" - }, - "outputs": [], - "source": [ - "TrainTranslator._loop_step = _loop_step" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WACCHvKWBQ9C" - }, - "source": [ - "### Test the training step\n", - "\n", - "Build a `TrainTranslator`, and configure it for training using the `Model.compile` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OA6bCske8TXm" - }, - "outputs": [], - "source": [ - "translator = TrainTranslator(\n", - " embedding_dim, units,\n", - " input_text_processor=input_text_processor,\n", - " output_text_processor=output_text_processor,\n", - " use_tf_function=False)\n", - "\n", - "# Configure the loss and optimizer\n", - "translator.compile(\n", - " optimizer=tf.optimizers.Adam(),\n", - " loss=MaskedLoss(),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6y5OnZDsB3sB" - }, - "source": [ - "Test out the `train_step`. For a text model like this the loss should start near:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zHe-OudqCFGK" - }, - "outputs": [], - "source": [ - "np.log(output_text_processor.vocabulary_size())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VwMU9cFEfjha" - }, - "outputs": [], - "source": [ - "%%time\n", - "for n in range(10):\n", - " print(translator.train_step([example_input_batch, example_target_batch]))\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "A-xqtsMbCUp2" - }, - "source": [ - "While it's easier to debug without a `tf.function` it does give a performance boost. So now that the `_train_step` method is working, try the `tf.function`-wrapped `_tf_train_step`, to maximize performance while training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UFUsTKQx0jaH" - }, - "outputs": [], - "source": [ - "@tf.function(input_signature=[[tf.TensorSpec(dtype=tf.string, shape=[None]),\n", - " tf.TensorSpec(dtype=tf.string, shape=[None])]])\n", - "def _tf_train_step(self, inputs):\n", - " return self._train_step(inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2-bgU59jrztQ" - }, - "outputs": [], - "source": [ - "TrainTranslator._tf_train_step = _tf_train_step" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KC8bRv_Gr3H9" - }, - "outputs": [], - "source": [ - "translator.use_tf_function = True" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EKMYNF_sIFb9" - }, - "source": [ - "The first call will be slow, because it traces the function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pLQZsX2dp1QK" - }, - "outputs": [], - "source": [ - "translator.train_step([example_input_batch, example_target_batch])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W3t2Hg7UISYi" - }, - "source": [ - "But after that it's usually 2-3x faster than the eager `train_step` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UzXXMwjXCqqh" - }, - "outputs": [], - "source": [ - "%%time\n", - "for n in range(10):\n", - " print(translator.train_step([example_input_batch, example_target_batch]))\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OIvigTqaEcu1" - }, - "source": [ - "A good test of a new model is to see that it can overfit a single batch of input. Try it, the loss should quickly go to zero:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U-dIWMIBqK7b" - }, - "outputs": [], - "source": [ - "losses = []\n", - "for n in range(100):\n", - " print('.', end='')\n", - " logs = translator.train_step([example_input_batch, example_target_batch])\n", - " losses.append(logs['batch_loss'].numpy())\n", - "\n", - "print()\n", - "plt.plot(losses)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aI02XFjoEt1k" - }, - "source": [ - "Now that you're confident that the training step is working, build a fresh copy of the model to train from scratch:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Emgfgh4tAmJt" - }, - "outputs": [], - "source": [ - "train_translator = TrainTranslator(\n", - " embedding_dim, units,\n", - " input_text_processor=input_text_processor,\n", - " output_text_processor=output_text_processor)\n", - "\n", - "# Configure the loss and optimizer\n", - "train_translator.compile(\n", - " optimizer=tf.optimizers.Adam(),\n", - " loss=MaskedLoss(),\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hpObfY22IddU" - }, - "source": [ - "### Train the model\n", - "\n", - "While there's nothing wrong with writing your own custom training loop, implementing the `Model.train_step` method, as in the previous section, allows you to run `Model.fit` and avoid rewriting all that boiler-plate code. \n", - "\n", - "This tutorial only trains for a couple of epochs, so use a `callbacks.Callback` to collect the history of batch losses, for plotting:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "J7m4mtnj80sq" - }, - "outputs": [], - "source": [ - "class BatchLogs(tf.keras.callbacks.Callback):\n", - " def __init__(self, key):\n", - " self.key = key\n", - " self.logs = []\n", - "\n", - " def on_train_batch_end(self, n, logs):\n", - " self.logs.append(logs[self.key])\n", - "\n", - "batch_loss = BatchLogs('batch_loss')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BQd_esVVoSf3" - }, - "outputs": [], - "source": [ - "train_translator.fit(dataset, epochs=3,\n", - " callbacks=[batch_loss])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "38rLdlmtQHCm" - }, - "outputs": [], - "source": [ - "plt.plot(batch_loss.logs)\n", - "plt.ylim([0, 3])\n", - "plt.xlabel('Batch #')\n", - "plt.ylabel('CE/token')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "w0S_O_RzHmfe" - }, - "source": [ - "The visible jumps in the plot are at the epoch boundaries." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mU3Ce8M6I3rz" - }, - "source": [ - "## Translate\n", - "\n", - "Now that the model is trained, implement a function to execute the full `text =\u003e text` translation.\n", - "\n", - "For this the model needs to invert the `text =\u003e token IDs` mapping provided by the `output_text_processor`. It also needs to know the IDs for special tokens. This is all implemented in the constructor for the new class. The implementation of the actual translate method will follow.\n", - "\n", - "Overall this is similar to the training loop, except that the input to the decoder at each time step is a sample from the decoder's last prediction." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PO-CLL1LVBbM" - }, - "outputs": [], - "source": [ - "class Translator(tf.Module):\n", - "\n", - " def __init__(self, encoder, decoder, input_text_processor,\n", - " output_text_processor):\n", - " self.encoder = encoder\n", - " self.decoder = decoder\n", - " self.input_text_processor = input_text_processor\n", - " self.output_text_processor = output_text_processor\n", - "\n", - " self.output_token_string_from_index = (\n", - " tf.keras.layers.experimental.preprocessing.StringLookup(\n", - " vocabulary=output_text_processor.get_vocabulary(),\n", - " mask_token='',\n", - " invert=True))\n", - "\n", - " # The output should never generate padding, unknown, or start.\n", - " index_from_string = tf.keras.layers.experimental.preprocessing.StringLookup(\n", - " vocabulary=output_text_processor.get_vocabulary(), mask_token='')\n", - " token_mask_ids = index_from_string(['', '[UNK]', '[START]']).numpy()\n", - "\n", - " token_mask = np.zeros([index_from_string.vocabulary_size()], dtype=np.bool)\n", - " token_mask[np.array(token_mask_ids)] = True\n", - " self.token_mask = token_mask\n", - "\n", - " self.start_token = index_from_string(tf.constant('[START]'))\n", - " self.end_token = index_from_string(tf.constant('[END]'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iBQzFZ9uWU79" - }, - "outputs": [], - "source": [ - "translator = Translator(\n", - " encoder=train_translator.encoder,\n", - " decoder=train_translator.decoder,\n", - " input_text_processor=input_text_processor,\n", - " output_text_processor=output_text_processor,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "b59PN-UxqYrU" - }, - "source": [ - "### Convert token IDs to text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-razg3Aso737" - }, - "source": [ - "The first method to implement is `tokens_to_text` which converts from token IDs to human readable text." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8IjwKTwtmdFf" - }, - "outputs": [], - "source": [ - "def tokens_to_text(self, result_tokens):\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(result_tokens, ('batch', 't'))\n", - " result_text_tokens = self.output_token_string_from_index(result_tokens)\n", - " shape_checker(result_text_tokens, ('batch', 't'))\n", - "\n", - " result_text = tf.strings.reduce_join(result_text_tokens,\n", - " axis=1, separator=' ')\n", - " shape_checker(result_text, ('batch'))\n", - "\n", - " result_text = tf.strings.strip(result_text)\n", - " shape_checker(result_text, ('batch',))\n", - " return result_text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "912aV0K7r90w" - }, - "outputs": [], - "source": [ - "Translator.tokens_to_text = tokens_to_text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "krBuAapkqNs9" - }, - "source": [ - "Input some random token IDs and see what it generates:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cWCMHdoS32QN" - }, - "outputs": [], - "source": [ - "example_output_tokens = tf.random.uniform(\n", - " shape=[5, 2], minval=0, dtype=tf.int64,\n", - " maxval=output_text_processor.vocabulary_size())\n", - "translator.tokens_to_text(example_output_tokens).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AC9De_kAqtaE" - }, - "source": [ - "### Sample from the decoder's predictions" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "q5tno-2ksJv6" - }, - "source": [ - "This function takes the decoder's logit outputs and samples token IDs from that distribution:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8lfuj3GcdD6e" - }, - "outputs": [], - "source": [ - "def sample(self, logits, temperature):\n", - " shape_checker = ShapeChecker()\n", - " # 't' is usually 1 here.\n", - " shape_checker(logits, ('batch', 't', 'vocab'))\n", - " shape_checker(self.token_mask, ('vocab',))\n", - "\n", - " token_mask = self.token_mask[tf.newaxis, tf.newaxis, :]\n", - " shape_checker(token_mask, ('batch', 't', 'vocab'), broadcast=True)\n", - "\n", - " # Set the logits for all masked tokens to -inf, so they are never chosen.\n", - " logits = tf.where(self.token_mask, -np.inf, logits)\n", - "\n", - " if temperature == 0.0:\n", - " new_tokens = tf.argmax(logits, axis=-1)\n", - " else: \n", - " logits = tf.squeeze(logits, axis=1)\n", - " new_tokens = tf.random.categorical(logits/temperature,\n", - " num_samples=1)\n", - " \n", - " shape_checker(new_tokens, ('batch', 't'))\n", - "\n", - " return new_tokens" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4DpDnBdBdL9_" - }, - "outputs": [], - "source": [ - "Translator.sample = sample" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QwdHfGEfsmy5" - }, - "source": [ - "Test run this function on some random inputs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rwLT0nxXym80" - }, - "outputs": [], - "source": [ - "example_logits = tf.random.normal([5, 1, output_text_processor.vocabulary_size()])\n", - "example_output_tokens = translator.sample(example_logits, temperature=1.0)\n", - "example_output_tokens" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NEWIKFIJ2HWM" - }, - "source": [ - "### Implement the translation loop\n", - "\n", - "Here is a complete implementation of the text to text translation loop.\n", - "\n", - "This implementation collects the results into python lists, before using `tf.concat` to join them into tensors.\n", - "\n", - "This implementation statically unrolls the graph out to `max_length` iterations.\n", - "This is okay with eager execution in python." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZmOvVrZmwAxg" - }, - "outputs": [], - "source": [ - "def translate_unrolled(self,\n", - " input_text, *,\n", - " max_length=50,\n", - " return_attention=True,\n", - " temperature=1.0):\n", - " batch_size = tf.shape(input_text)[0]\n", - " input_tokens = self.input_text_processor(input_text)\n", - " enc_output, enc_state = self.encoder(input_tokens)\n", - "\n", - " dec_state = enc_state\n", - " new_tokens = tf.fill([batch_size, 1], self.start_token)\n", - "\n", - " result_tokens = []\n", - " attention = []\n", - " done = tf.zeros([batch_size, 1], dtype=tf.bool)\n", - "\n", - " for _ in range(max_length):\n", - " dec_input = DecoderInput(new_tokens=new_tokens,\n", - " enc_output=enc_output,\n", - " mask=(input_tokens!=0))\n", - " \n", - " dec_result, dec_state = self.decoder(dec_input, state=dec_state)\n", - "\n", - " attention.append(dec_result.attention_weights)\n", - "\n", - " new_tokens = self.sample(dec_result.logits, temperature)\n", - "\n", - " # If a sequence produces an `end_token`, set it `done`\n", - " done = done | (new_tokens == self.end_token)\n", - " # Once a sequence is done it only produces 0-padding.\n", - " new_tokens = tf.where(done, tf.constant(0, dtype=tf.int64), new_tokens)\n", - "\n", - " # Collect the generated tokens\n", - " result_tokens.append(new_tokens)\n", - "\n", - " if tf.executing_eagerly() and tf.reduce_all(done):\n", - " break\n", - "\n", - " # Convert the list of generates token ids to a list of strings.\n", - " result_tokens = tf.concat(result_tokens, axis=-1)\n", - " result_text = self.tokens_to_text(result_tokens)\n", - "\n", - " if return_attention:\n", - " attention_stack = tf.concat(attention, axis=1)\n", - " return {'text': result_text, 'attention': attention_stack}\n", - " else:\n", - " return {'text': result_text}\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JOmd8Y269MG3" - }, - "outputs": [], - "source": [ - "Translator.translate = translate_unrolled" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NxYXf3GNKKLS" - }, - "source": [ - "Run it on a simple input:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hd2rgyHwVVrv" - }, - "outputs": [], - "source": [ - "%%time\n", - "input_text = tf.constant([\n", - " 'hace mucho frio aqui.', # \"It's really cold here.\"\n", - " 'Esta es mi vida.', # \"This is my life.\"\"\n", - "])\n", - "\n", - "result = translator.translate(\n", - " input_text = input_text)\n", - "\n", - "print(result['text'][0].numpy().decode())\n", - "print(result['text'][1].numpy().decode())\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "S-6cFyqeUPQm" - }, - "source": [ - "If you want to export this model you'll need to wrap this method in a `tf.function`. This basic implementation has a few issues if you try to do that:\n", - "\n", - "1. The resulting graphs are very large and take a few seconds to build, save or load.\n", - "2. You can't break from a statically unrolled loop, so it will always run `max_length` iterations, even if all the outputs are done. But even then it's marginally faster than eager execution.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_JhTZ5hOptO-" - }, - "outputs": [], - "source": [ - "@tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])\n", - "def tf_translate(self, input_text):\n", - " return self.translate(input_text)\n", - "\n", - "Translator.tf_translate = tf_translate" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fkccvHDvXCa8" - }, - "source": [ - "Run the `tf.function` once to compile it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_NzrixLvVBjQ" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = translator.tf_translate(\n", - " input_text = input_text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "USJdu00tVFbd" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = translator.tf_translate(\n", - " input_text = input_text)\n", - "\n", - "print(result['text'][0].numpy().decode())\n", - "print(result['text'][1].numpy().decode())\n", - "print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EbQpyYs13jF_" - }, - "outputs": [], - "source": [ - "#@title [Optional] Use a symbolic loop\n", - "def translate_symbolic(self,\n", - " input_text,\n", - " *,\n", - " max_length=50,\n", - " return_attention=True,\n", - " temperature=1.0):\n", - " shape_checker = ShapeChecker()\n", - " shape_checker(input_text, ('batch',))\n", - "\n", - " batch_size = tf.shape(input_text)[0]\n", - "\n", - " # Encode the input\n", - " input_tokens = self.input_text_processor(input_text)\n", - " shape_checker(input_tokens, ('batch', 's'))\n", - "\n", - " enc_output, enc_state = self.encoder(input_tokens)\n", - " shape_checker(enc_output, ('batch', 's', 'enc_units'))\n", - " shape_checker(enc_state, ('batch', 'enc_units'))\n", - "\n", - " # Initialize the decoder\n", - " dec_state = enc_state\n", - " new_tokens = tf.fill([batch_size, 1], self.start_token)\n", - " shape_checker(new_tokens, ('batch', 't1'))\n", - "\n", - " # Initialize the accumulators\n", - " result_tokens = tf.TensorArray(tf.int64, size=1, dynamic_size=True)\n", - " attention = tf.TensorArray(tf.float32, size=1, dynamic_size=True)\n", - " done = tf.zeros([batch_size, 1], dtype=tf.bool)\n", - " shape_checker(done, ('batch', 't1'))\n", - "\n", - " for t in tf.range(max_length):\n", - " dec_input = DecoderInput(\n", - " new_tokens=new_tokens, enc_output=enc_output, mask=(input_tokens != 0))\n", - "\n", - " dec_result, dec_state = self.decoder(dec_input, state=dec_state)\n", - "\n", - " shape_checker(dec_result.attention_weights, ('batch', 't1', 's'))\n", - " attention = attention.write(t, dec_result.attention_weights)\n", - "\n", - " new_tokens = self.sample(dec_result.logits, temperature)\n", - " shape_checker(dec_result.logits, ('batch', 't1', 'vocab'))\n", - " shape_checker(new_tokens, ('batch', 't1'))\n", - "\n", - " # If a sequence produces an `end_token`, set it `done`\n", - " done = done | (new_tokens == self.end_token)\n", - " # Once a sequence is done it only produces 0-padding.\n", - " new_tokens = tf.where(done, tf.constant(0, dtype=tf.int64), new_tokens)\n", - "\n", - " # Collect the generated tokens\n", - " result_tokens = result_tokens.write(t, new_tokens)\n", - "\n", - " if tf.reduce_all(done):\n", - " break\n", - "\n", - " # Convert the list of generated token ids to a list of strings.\n", - " result_tokens = result_tokens.stack()\n", - " shape_checker(result_tokens, ('t', 'batch', 't0'))\n", - " result_tokens = tf.squeeze(result_tokens, -1)\n", - " result_tokens = tf.transpose(result_tokens, [1, 0])\n", - " shape_checker(result_tokens, ('batch', 't'))\n", - "\n", - " result_text = self.tokens_to_text(result_tokens)\n", - " shape_checker(result_text, ('batch',))\n", - "\n", - " if return_attention:\n", - " attention_stack = attention.stack()\n", - " shape_checker(attention_stack, ('t', 'batch', 't1', 's'))\n", - "\n", - " attention_stack = tf.squeeze(attention_stack, 2)\n", - " shape_checker(attention_stack, ('t', 'batch', 's'))\n", - "\n", - " attention_stack = tf.transpose(attention_stack, [1, 0, 2])\n", - " shape_checker(attention_stack, ('batch', 't', 's'))\n", - "\n", - " return {'text': result_text, 'attention': attention_stack}\n", - " else:\n", - " return {'text': result_text}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ngywxv1WYO_O" - }, - "outputs": [], - "source": [ - "Translator.translate = translate_symbolic" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lItV7qjEGsYc" - }, - "source": [ - "The initial implementation used python lists to collect the outputs. This uses `tf.range` as the loop iterator, allowing `tf.autograph` to convert the loop. The biggest change in this implementation is the use of `tf.TensorArray` instead of python `list` to accumulate tensors. `tf.TensorArray` is required to collect a variable number of tensors in graph mode. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AJ_NznOgZTxC" - }, - "source": [ - "With eager execution this implementation performs on par with the original:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JRh66y-YYeBw" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = translator.translate(\n", - " input_text = input_text)\n", - "\n", - "print(result['text'][0].numpy().decode())\n", - "print(result['text'][1].numpy().decode())\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l6B8W4_MZdX0" - }, - "source": [ - "But when you wrap it in a `tf.function` you'll notice two differences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WX6EF8KtYh20" - }, - "outputs": [], - "source": [ - "@tf.function(input_signature=[tf.TensorSpec(dtype=tf.string, shape=[None])])\n", - "def tf_translate(self, input_text):\n", - " return self.translate(input_text)\n", - "\n", - "Translator.tf_translate = tf_translate" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9S0kQ-bBZswZ" - }, - "source": [ - "First: Graph creation is much faster (~10x), since it doesn't create `max_iterations` copies of the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Eq8d40RKYoJa" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = translator.tf_translate(\n", - " input_text = input_text)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2ABEwtKIZ6eE" - }, - "source": [ - "Second: The compiled function is much faster on small inputs (5x on this example), because it can break out of the loop." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "d5VdCLxPYrpz" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = translator.tf_translate(\n", - " input_text = input_text)\n", - "\n", - "print(result['text'][0].numpy().decode())\n", - "print(result['text'][1].numpy().decode())\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eo5sf4jZaO2l" - }, - "source": [ - "### Visualize the process" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FzZzC2cJacTv" - }, - "source": [ - "The attention weights returned by the `translate` method show where the model was \"looking\" when it generated each output token.\n", - "\n", - "So the sum of the attention over the input should return all ones:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UEd2GljgqQ-0" - }, - "outputs": [], - "source": [ - "a = result['attention'][0]\n", - "\n", - "print(np.sum(a, axis=-1))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "k_HWQHcI2_h5" - }, - "source": [ - "Here is the attention distribution for the first output step of the first example. Note how the attention is now much more focused than it was for the untrained model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M8BHdqQujALu" - }, - "outputs": [], - "source": [ - "_ = plt.bar(range(len(a[0, :])), a[0, :])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qB13OG472Z3V" - }, - "source": [ - "Since there is some rough alignment between the input and output words, you expect the attention to be focused near the diagonal:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xyeXuEYHd0kQ" - }, - "outputs": [], - "source": [ - "plt.imshow(np.array(a), vmin=0.0)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mXECcNTn2mxN" - }, - "source": [ - "Here is some code to make a better attention plot:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "s5hQWlbN3jGF" - }, - "outputs": [], - "source": [ - "#@title Labeled attention plots\n", - "def plot_attention(attention, sentence, predicted_sentence):\n", - " sentence = tf_lower_and_split_punct(sentence).numpy().decode().split()\n", - " predicted_sentence = predicted_sentence.numpy().decode().split() + ['[END]']\n", - " fig = plt.figure(figsize=(10, 10))\n", - " ax = fig.add_subplot(1, 1, 1)\n", - "\n", - " attention = attention[:len(predicted_sentence), :len(sentence)]\n", - "\n", - " ax.matshow(attention, cmap='viridis', vmin=0.0)\n", - "\n", - " fontdict = {'fontsize': 14}\n", - "\n", - " ax.set_xticklabels([''] + sentence, fontdict=fontdict, rotation=90)\n", - " ax.set_yticklabels([''] + predicted_sentence, fontdict=fontdict)\n", - "\n", - " ax.xaxis.set_major_locator(ticker.MultipleLocator(1))\n", - " ax.yaxis.set_major_locator(ticker.MultipleLocator(1))\n", - "\n", - " ax.set_xlabel('Input text')\n", - " ax.set_ylabel('Output text')\n", - " plt.suptitle('Attention weights')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "rrGawQv2eiA4" - }, - "outputs": [], - "source": [ - "i=0\n", - "plot_attention(result['attention'][i], input_text[i], result['text'][i])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JHBdOf9duumm" - }, - "source": [ - "Translate a few more sentences and plot them:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WrAM0FDomq3E" - }, - "outputs": [], - "source": [ - "%%time\n", - "three_input_text = tf.constant([\n", - " # This is my life.\n", - " 'Esta es mi vida.',\n", - " # Are they still home?\n", - " '¿Todavía están en casa?',\n", - " # Try to find out.'\n", - " 'Tratar de descubrir.',\n", - "])\n", - "\n", - "result = translator.tf_translate(three_input_text)\n", - "\n", - "for tr in result['text']:\n", - " print(tr.numpy().decode())\n", - "\n", - "print()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-LjFp0AljOaZ" - }, - "outputs": [], - "source": [ - "result['text']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "v7QwIMrG-id2" - }, - "outputs": [], - "source": [ - "i = 0\n", - "plot_attention(result['attention'][i], three_input_text[i], result['text'][i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zYVoVf8P-lr-" - }, - "outputs": [], - "source": [ - "i = 1\n", - "plot_attention(result['attention'][i], three_input_text[i], result['text'][i])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9sFvlZBk-me4" - }, - "outputs": [], - "source": [ - "i = 2\n", - "plot_attention(result['attention'][i], three_input_text[i], result['text'][i])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rA3xI3NzrRJt" - }, - "source": [ - "The short sentences often work well, but if the input is too long the model literally loses focus and stops providing reasonable predictions. There are two main reasons for this:\n", - "\n", - "1. The model was trained with teacher-forcing feeding the correct token at each step, regardless of the model's predictions. The model could be made more robust if it were sometimes fed its own predictions.\n", - "2. The model only has access to its previous output through the RNN state. If the RNN state gets corrupted, there's no way for the model to recover. [Transformers](transformer.ipynb) solve this by using self-attention in the encoder and decoder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-FUHFLEvSMbG" - }, - "outputs": [], - "source": [ - "long_input_text = tf.constant([inp[-1]])\n", - "\n", - "import textwrap\n", - "print('Expected output:\\n', '\\n'.join(textwrap.wrap(targ[-1])))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lDa_8NaN_RUy" - }, - "outputs": [], - "source": [ - "result = translator.tf_translate(long_input_text)\n", - "\n", - "i = 0\n", - "plot_attention(result['attention'][i], long_input_text[i], result['text'][i])\n", - "_ = plt.suptitle('This never works')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mMA9Pp71nzH9" - }, - "source": [ - "## Export" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5rLMNOmsKoXe" - }, - "source": [ - "Once you have a model you're satisfied with you might want to export it as a `tf.saved_model` for use outside of this python program that created it.\n", - "\n", - "Since the model is a subclass of `tf.Module` (through `keras.Model`), and all the functionality for export is compiled in a `tf.function` the model should export cleanly with `tf.saved_model.save`: " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NP2dNtEXJPEL" - }, - "source": [ - "Now that the function has been traced it can be exported using `saved_model.save`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OyvxT5V0_X5B" - }, - "outputs": [], - "source": [ - "tf.saved_model.save(translator, 'translator',\n", - " signatures={'serving_default': translator.tf_translate})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-I0j3i3ekOba" - }, - "outputs": [], - "source": [ - "reloaded = tf.saved_model.load('translator')\n", - "result = reloaded.tf_translate(three_input_text)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GXZF__FZXJCm" - }, - "outputs": [], - "source": [ - "%%time\n", - "result = reloaded.tf_translate(three_input_text)\n", - "\n", - "for tr in result['text']:\n", - " print(tr.numpy().decode())\n", - "\n", - "print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RTe5P5ioMJwN" - }, - "source": [ - "## Next steps\n", - "\n", - "* [Download a different dataset](http://www.manythings.org/anki/) to experiment with translations, for example, English to German, or English to French.\n", - "* Experiment with training on a larger dataset, or using more epochs.\n", - "* Try the [transformer tutorial](transformer.ipynb) which implements a similar translation task but uses a transformer layers instead of RNNs. This version also uses a `text.BertTokenizer` to implement wordpiece tokenization.\n", - "* Have a look at the [tensorflow_addons.seq2seq](https://www.tensorflow.org/addons/tutorials/networks_seq2seq_nmt) for implementing this sort of sequence to sequence model. The `tfa.seq2seq` package includes higher level functionality like `seq2seq.BeamSearchDecoder`. " - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "last_runtime": { - "build_target": "//learning/deepmind/public/tools/ml_python:ml_notebook", - "kind": "private" - }, - "name": "nmt_with_attention.ipynb", - "private_outputs": true, - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/text_classification_rnn.ipynb b/third_party/tensorflow-text/src/docs/tutorials/text_classification_rnn.ipynb deleted file mode 100644 index 881ecb0..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/text_classification_rnn.ipynb +++ /dev/null
@@ -1,724 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "hX4n9TsbGw-f" - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "0nbI5DtDGw-i" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9TnJztDZGw-n" - }, - "source": [ - "# Text classification with an RNN" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AfN3bMR5Gw-o" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/text_classification_rnn\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/text_classification_rnn.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/text_classification_rnn.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/text_classification_rnn.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "lUWearf0Gw-p" - }, - "source": [ - "This text classification tutorial trains a [recurrent neural network](https://developers.google.com/machine-learning/glossary/#recurrent_neural_network) on the [IMDB large movie review dataset](http://ai.stanford.edu/~amaas/data/sentiment/) for sentiment analysis." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_2VQo4bajwUU" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "z682XYsrjkY9" - }, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow as tf\n", - "\n", - "tfds.disable_progress_bar()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1rXHa-w9JZhb" - }, - "source": [ - "Import `matplotlib` and create a helper function to plot graphs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Mp1Z7P9pYRSK" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "def plot_graphs(history, metric):\n", - " plt.plot(history.history[metric])\n", - " plt.plot(history.history['val_'+metric], '')\n", - " plt.xlabel(\"Epochs\")\n", - " plt.ylabel(metric)\n", - " plt.legend([metric, 'val_'+metric])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pRmMubr0jrE2" - }, - "source": [ - "## Setup input pipeline\n", - "\n", - "\n", - "The IMDB large movie review dataset is a *binary classification* dataset—all the reviews have either a *positive* or *negative* sentiment.\n", - "\n", - "Download the dataset using [TFDS](https://www.tensorflow.org/datasets). See the [loading text tutorial](https://www.tensorflow.org/tutorials/load_data/text) for details on how to load this sort of data manually.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "SHRwRoP2nVHX" - }, - "outputs": [], - "source": [ - "dataset, info = tfds.load('imdb_reviews', with_info=True,\n", - " as_supervised=True)\n", - "train_dataset, test_dataset = dataset['train'], dataset['test']\n", - "\n", - "train_dataset.element_spec" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nWA4c2ir7g6p" - }, - "source": [ - "Initially this returns a dataset of (text, label pairs):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vd4_BGKyurao" - }, - "outputs": [], - "source": [ - "for example, label in train_dataset.take(1):\n", - " print('text: ', example.numpy())\n", - " print('label: ', label.numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z2qVJzcEluH_" - }, - "source": [ - "Next shuffle the data for training and create batches of these `(text, label)` pairs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dDsCaZCDYZgm" - }, - "outputs": [], - "source": [ - "BUFFER_SIZE = 10000\n", - "BATCH_SIZE = 64" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "VznrltNOnUc5" - }, - "outputs": [], - "source": [ - "train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)\n", - "test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jqkvdcFv41wC" - }, - "outputs": [], - "source": [ - "for example, label in train_dataset.take(1):\n", - " print('texts: ', example.numpy()[:3])\n", - " print()\n", - " print('labels: ', label.numpy()[:3])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s5eWCo88voPY" - }, - "source": [ - "## Create the text encoder" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "TFevcItw15P_" - }, - "source": [ - "The raw text loaded by `tfds` needs to be processed before it can be used in a model. The simplest way to process text for training is using the `TextVectorization` layer. This layer has many capabilities, but this tutorial sticks to the default behavior.\n", - "\n", - "Create the layer, and pass the dataset's text to the layer's `.adapt` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uC25Lu1Yvuqy" - }, - "outputs": [], - "source": [ - "VOCAB_SIZE = 1000\n", - "encoder = tf.keras.layers.TextVectorization(\n", - " max_tokens=VOCAB_SIZE)\n", - "encoder.adapt(train_dataset.map(lambda text, label: text))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IuQzVBbe3Ldu" - }, - "source": [ - "The `.adapt` method sets the layer's vocabulary. Here are the first 20 tokens. After the padding and unknown tokens they're sorted by frequency: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tBoyjjWg0Ac9" - }, - "outputs": [], - "source": [ - "vocab = np.array(encoder.get_vocabulary())\n", - "vocab[:20]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mjId5pua3jHQ" - }, - "source": [ - "Once the vocabulary is set, the layer can encode text into indices. The tensors of indices are 0-padded to the longest sequence in the batch (unless you set a fixed `output_sequence_length`):" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RGc7C9WiwRWs" - }, - "outputs": [], - "source": [ - "encoded_example = encoder(example)[:3].numpy()\n", - "encoded_example" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "F5cjz0bS39IN" - }, - "source": [ - "With the default settings, the process is not completely reversible. There are three main reasons for that:\n", - "\n", - "1. The default value for `preprocessing.TextVectorization`'s `standardize` argument is `\"lower_and_strip_punctuation\"`.\n", - "2. The limited vocabulary size and lack of character-based fallback results in some unknown tokens." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "N_tD0QY5wXaK" - }, - "outputs": [], - "source": [ - "for n in range(3):\n", - " print(\"Original: \", example[n].numpy())\n", - " print(\"Round-trip: \", \" \".join(vocab[encoded_example[n]]))\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bjUqGVBxGw-t" - }, - "source": [ - "## Create the model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W7zsmInBOCPO" - }, - "source": [ - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bgs6nnSTGw-t" - }, - "source": [ - "Above is a diagram of the model. \n", - "\n", - "1. This model can be build as a `tf.keras.Sequential`.\n", - "\n", - "2. The first layer is the `encoder`, which converts the text to a sequence of token indices.\n", - "\n", - "3. After the encoder is an embedding layer. An embedding layer stores one vector per word. When called, it converts the sequences of word indices to sequences of vectors. These vectors are trainable. After training (on enough data), words with similar meanings often have similar vectors.\n", - "\n", - " This index-lookup is much more efficient than the equivalent operation of passing a one-hot encoded vector through a `tf.keras.layers.Dense` layer.\n", - "\n", - "4. A recurrent neural network (RNN) processes sequence input by iterating through the elements. RNNs pass the outputs from one timestep to their input on the next timestep.\n", - "\n", - " The `tf.keras.layers.Bidirectional` wrapper can also be used with an RNN layer. This propagates the input forward and backwards through the RNN layer and then concatenates the final output. \n", - "\n", - " * The main advantage of a bidirectional RNN is that the signal from the beginning of the input doesn't need to be processed all the way through every timestep to affect the output. \n", - "\n", - " * The main disadvantage of a bidirectional RNN is that you can't efficiently stream predictions as words are being added to the end.\n", - "\n", - "5. After the RNN has converted the sequence to a single vector the two `layers.Dense` do some final processing, and convert from this vector representation to a single logit as the classification output. \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "V4fodCI7soQi" - }, - "source": [ - "The code to implement this is below:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LwfoBkmRYcP3" - }, - "outputs": [], - "source": [ - "model = tf.keras.Sequential([\n", - " encoder,\n", - " tf.keras.layers.Embedding(\n", - " input_dim=len(encoder.get_vocabulary()),\n", - " output_dim=64,\n", - " # Use masking to handle the variable sequence lengths\n", - " mask_zero=True),\n", - " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),\n", - " tf.keras.layers.Dense(64, activation='relu'),\n", - " tf.keras.layers.Dense(1)\n", - "])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QIGmIGkkouUb" - }, - "source": [ - "Please note that Keras sequential model is used here since all the layers in the model only have single input and produce single output. In case you want to use stateful RNN layer, you might want to build your model with Keras functional API or model subclassing so that you can retrieve and reuse the RNN layer states. Please check [Keras RNN guide](https://www.tensorflow.org/guide/keras/rnn#rnn_state_reuse) for more details." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kF-PsCk1LwjY" - }, - "source": [ - "The embedding layer [uses masking](https://www.tensorflow.org/guide/keras/masking_and_padding) to handle the varying sequence-lengths. All the layers after the `Embedding` support masking:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "87a8-CwfKebw" - }, - "outputs": [], - "source": [ - "print([layer.supports_masking for layer in model.layers])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZlS0iaUIWLpI" - }, - "source": [ - "To confirm that this works as expected, evaluate a sentence twice. First, alone so there's no padding to mask:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "O41gw3KfWHus" - }, - "outputs": [], - "source": [ - "# predict on a sample text without padding.\n", - "\n", - "sample_text = ('The movie was cool. The animation and the graphics '\n", - " 'were out of this world. I would recommend this movie.')\n", - "predictions = model.predict(np.array([sample_text]))\n", - "print(predictions[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "K0VQmGnEWcuz" - }, - "source": [ - "Now, evaluate it again in a batch with a longer sentence. The result should be identical:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UIgpuTeFNDzq" - }, - "outputs": [], - "source": [ - "# predict on a sample text with padding\n", - "\n", - "padding = \"the \" * 2000\n", - "predictions = model.predict(np.array([sample_text, padding]))\n", - "print(predictions[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "sRI776ZcH3Tf" - }, - "source": [ - "Compile the Keras model to configure the training process:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "kj2xei41YZjC" - }, - "outputs": [], - "source": [ - "model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", - " optimizer=tf.keras.optimizers.Adam(1e-4),\n", - " metrics=['accuracy'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zIwH3nto596k" - }, - "source": [ - "## Train the model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hw86wWS4YgR2" - }, - "outputs": [], - "source": [ - "history = model.fit(train_dataset, epochs=10,\n", - " validation_data=test_dataset,\n", - " validation_steps=30)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BaNbXi43YgUT" - }, - "outputs": [], - "source": [ - "test_loss, test_acc = model.evaluate(test_dataset)\n", - "\n", - "print('Test Loss:', test_loss)\n", - "print('Test Accuracy:', test_acc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "OZmwt_mzaQJk" - }, - "outputs": [], - "source": [ - "plt.figure(figsize=(16, 8))\n", - "plt.subplot(1, 2, 1)\n", - "plot_graphs(history, 'accuracy')\n", - "plt.ylim(None, 1)\n", - "plt.subplot(1, 2, 2)\n", - "plot_graphs(history, 'loss')\n", - "plt.ylim(0, None)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DwSE_386uhxD" - }, - "source": [ - "Run a prediction on a new sentence:\n", - "\n", - "If the prediction is \u003e= 0.0, it is positive else it is negative." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZXgfQSgRW6zU" - }, - "outputs": [], - "source": [ - "sample_text = ('The movie was cool. The animation and the graphics '\n", - " 'were out of this world. I would recommend this movie.')\n", - "predictions = model.predict(np.array([sample_text]))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7g1evcaRpTKm" - }, - "source": [ - "## Stack two or more LSTM layers\n", - "\n", - "Keras recurrent layers have two available modes that are controlled by the `return_sequences` constructor argument:\n", - "\n", - "* If `False` it returns only the last output for each input sequence (a 2D tensor of shape (batch_size, output_features)). This is the default, used in the previous model.\n", - "\n", - "* If `True` the full sequences of successive outputs for each timestep is returned (a 3D tensor of shape `(batch_size, timesteps, output_features)`).\n", - "\n", - "Here is what the flow of information looks like with `return_sequences=True`:\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wbSClCrG1z8l" - }, - "source": [ - "The interesting thing about using an `RNN` with `return_sequences=True` is that the output still has 3-axes, like the input, so it can be passed to another RNN layer, like this:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jo1jjO3vn0jo" - }, - "outputs": [], - "source": [ - "model = tf.keras.Sequential([\n", - " encoder,\n", - " tf.keras.layers.Embedding(len(encoder.get_vocabulary()), 64, mask_zero=True),\n", - " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, return_sequences=True)),\n", - " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n", - " tf.keras.layers.Dense(64, activation='relu'),\n", - " tf.keras.layers.Dropout(0.5),\n", - " tf.keras.layers.Dense(1)\n", - "])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hEPV5jVGp-is" - }, - "outputs": [], - "source": [ - "model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", - " optimizer=tf.keras.optimizers.Adam(1e-4),\n", - " metrics=['accuracy'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LeSE-YjdqAeN" - }, - "outputs": [], - "source": [ - "history = model.fit(train_dataset, epochs=10,\n", - " validation_data=test_dataset,\n", - " validation_steps=30)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_LdwilM1qPM3" - }, - "outputs": [], - "source": [ - "test_loss, test_acc = model.evaluate(test_dataset)\n", - "\n", - "print('Test Loss:', test_loss)\n", - "print('Test Accuracy:', test_acc)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ykUKnAoqbycW" - }, - "outputs": [], - "source": [ - "# predict on a sample text without padding.\n", - "\n", - "sample_text = ('The movie was not good. The animation and the graphics '\n", - " 'were terrible. I would not recommend this movie.')\n", - "predictions = model.predict(np.array([sample_text]))\n", - "print(predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_YYub0EDtwCu" - }, - "outputs": [], - "source": [ - "plt.figure(figsize=(16, 6))\n", - "plt.subplot(1, 2, 1)\n", - "plot_graphs(history, 'accuracy')\n", - "plt.subplot(1, 2, 2)\n", - "plot_graphs(history, 'loss')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9xvpE3BaGw_V" - }, - "source": [ - "Check out other existing recurrent layers such as [GRU layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers/GRU).\n", - "\n", - "If you're interestied in building custom RNNs, see the [Keras RNN Guide](https://www.tensorflow.org/guide/keras/rnn).\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "text_classification_rnn.ipynb", - "provenance": [], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/text_generation.ipynb b/third_party/tensorflow-text/src/docs/tutorials/text_generation.ipynb deleted file mode 100644 index 57462a4b..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/text_generation.ipynb +++ /dev/null
@@ -1,1388 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "t09eeeR5prIJ" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "GCCk8_dHpuNf" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ovpZyIhNIgoq" - }, - "source": [ - "# Text generation with an RNN" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hcD2nPQvPOFM" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/text_generation\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/text_generation.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/text_generation.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/text_generation.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BwpJ5IffzRG6" - }, - "source": [ - "This tutorial demonstrates how to generate text using a character-based RNN. You will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n", - "\n", - "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime \u003e Change runtime type \u003e Hardware accelerator \u003e GPU*.\n", - "\n", - "This tutorial includes runnable code implemented using [tf.keras](https://www.tensorflow.org/guide/keras/sequential_model) and [eager execution](https://www.tensorflow.org/guide/eager). The following is the sample output when the model in this tutorial trained for 30 epochs, and started with the prompt \"Q\":" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "HcygKkEVZBaa" - }, - "source": [ - "\u003cpre\u003e\n", - "QUEENE:\n", - "I had thought thou hadst a Roman; for the oracle,\n", - "Thus by All bids the man against the word,\n", - "Which are so weak of care, by old care done;\n", - "Your children were in your holy love,\n", - "And the precipitation through the bleeding throne.\n", - "\n", - "BISHOP OF ELY:\n", - "Marry, and will, my lord, to weep in such a one were prettiest;\n", - "Yet now I was adopted heir\n", - "Of the world's lamentable day,\n", - "To watch the next way with his father with his face?\n", - "\n", - "ESCALUS:\n", - "The cause why then we are all resolved more sons.\n", - "\n", - "VOLUMNIA:\n", - "O, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, it is no sin it should be dead,\n", - "And love and pale as any will to that word.\n", - "\n", - "QUEEN ELIZABETH:\n", - "But how long have I heard the soul for this world,\n", - "And show his hands of life be proved to stand.\n", - "\n", - "PETRUCHIO:\n", - "I say he look'd on, if I must be content\n", - "To stay him from the fatal of our country's bliss.\n", - "His lordship pluck'd from this sentence then for prey,\n", - "And then let us twain, being the moon,\n", - "were she such a case as fills m\n", - "\u003c/pre\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_bGsCP9DZFQ5" - }, - "source": [ - "While some of the sentences are grammatical, most do not make sense. The model has not learned the meaning of words, but consider:\n", - "\n", - "* The model is character-based. When training started, the model did not know how to spell an English word, or that words were even a unit of text.\n", - "\n", - "* The structure of the output resembles a play—blocks of text generally begin with a speaker name, in all capital letters similar to the dataset.\n", - "\n", - "* As demonstrated below, the model is trained on small batches of text (100 characters each), and is still able to generate a longer sequence of text with coherent structure." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "srXC6pLGLwS6" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WGyKZj3bzf9p" - }, - "source": [ - "### Import TensorFlow and other libraries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yG_n40gFzf9s" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "from tensorflow.keras.layers.experimental import preprocessing\n", - "\n", - "import numpy as np\n", - "import os\n", - "import time" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EHDoRoc5PKWz" - }, - "source": [ - "### Download the Shakespeare dataset\n", - "\n", - "Change the following line to run this code on your own data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pD_55cOxLkAb" - }, - "outputs": [], - "source": [ - "path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UHjdCjDuSvX_" - }, - "source": [ - "### Read the data\n", - "\n", - "First, look in the text:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "aavnuByVymwK" - }, - "outputs": [], - "source": [ - "# Read, then decode for py2 compat.\n", - "text = open(path_to_file, 'rb').read().decode(encoding='utf-8')\n", - "# length of text is the number of characters in it\n", - "print(f'Length of text: {len(text)} characters')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Duhg9NrUymwO" - }, - "outputs": [], - "source": [ - "# Take a look at the first 250 characters in text\n", - "print(text[:250])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IlCgQBRVymwR" - }, - "outputs": [], - "source": [ - "# The unique characters in the file\n", - "vocab = sorted(set(text))\n", - "print(f'{len(vocab)} unique characters')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rNnrKn_lL-IJ" - }, - "source": [ - "## Process the text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LFjSVAlWzf-N" - }, - "source": [ - "### Vectorize the text\n", - "\n", - "Before training, you need to convert the strings to a numerical representation. \n", - "\n", - "The `preprocessing.StringLookup` layer can convert each character into a numeric ID. It just needs the text to be split into tokens first." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a86OoYtO01go" - }, - "outputs": [], - "source": [ - "example_texts = ['abcdefg', 'xyz']\n", - "\n", - "chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')\n", - "chars" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1s4f1q3iqY8f" - }, - "source": [ - "Now create the `preprocessing.StringLookup` layer:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6GMlCe3qzaL9" - }, - "outputs": [], - "source": [ - "ids_from_chars = preprocessing.StringLookup(\n", - " vocabulary=list(vocab), mask_token=None)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZmX_jbgQqfOi" - }, - "source": [ - "It converts form tokens to character IDs:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WLv5Q_2TC2pc" - }, - "outputs": [], - "source": [ - "ids = ids_from_chars(chars)\n", - "ids" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tZfqhkYCymwX" - }, - "source": [ - "Since the goal of this tutorial is to generate text, it will also be important to invert this representation and recover human-readable strings from it. For this you can use `preprocessing.StringLookup(..., invert=True)`. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uenivzwqsDhp" - }, - "source": [ - "Note: Here instead of passing the original vocabulary generated with `sorted(set(text))` use the `get_vocabulary()` method of the `preprocessing.StringLookup` layer so that the `[UNK]` tokens is set the same way." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Wd2m3mqkDjRj" - }, - "outputs": [], - "source": [ - "chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(\n", - " vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "pqTDDxS-s-H8" - }, - "source": [ - "This layer recovers the characters from the vectors of IDs, and returns them as a `tf.RaggedTensor` of characters:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "c2GCh0ySD44s" - }, - "outputs": [], - "source": [ - "chars = chars_from_ids(ids)\n", - "chars" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-FeW5gqutT3o" - }, - "source": [ - "You can `tf.strings.reduce_join` to join the characters back into strings. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zxYI-PeltqKP" - }, - "outputs": [], - "source": [ - "tf.strings.reduce_join(chars, axis=-1).numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "w5apvBDn9Ind" - }, - "outputs": [], - "source": [ - "def text_from_ids(ids):\n", - " return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "bbmsf23Bymwe" - }, - "source": [ - "### The prediction task" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wssHQ1oGymwe" - }, - "source": [ - "Given a character, or a sequence of characters, what is the most probable next character? This is the task you're training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.\n", - "\n", - "Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hgsVvVxnymwf" - }, - "source": [ - "### Create training examples and targets\n", - "\n", - "Next divide the text into example sequences. Each input sequence will contain `seq_length` characters from the text.\n", - "\n", - "For each input sequence, the corresponding targets contain the same length of text, except shifted one character to the right.\n", - "\n", - "So break the text into chunks of `seq_length+1`. For example, say `seq_length` is 4 and our text is \"Hello\". The input sequence would be \"Hell\", and the target sequence \"ello\".\n", - "\n", - "To do this first use the `tf.data.Dataset.from_tensor_slices` function to convert the text vector into a stream of character indices." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UopbsKi88tm5" - }, - "outputs": [], - "source": [ - "all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))\n", - "all_ids" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "qmxrYDCTy-eL" - }, - "outputs": [], - "source": [ - "ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cjH5v45-yqqH" - }, - "outputs": [], - "source": [ - "for ids in ids_dataset.take(10):\n", - " print(chars_from_ids(ids).numpy().decode('utf-8'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "C-G2oaTxy6km" - }, - "outputs": [], - "source": [ - "seq_length = 100\n", - "examples_per_epoch = len(text)//(seq_length+1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-ZSYAcQV8OGP" - }, - "source": [ - "The `batch` method lets you easily convert these individual characters to sequences of the desired size." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BpdjRO2CzOfZ" - }, - "outputs": [], - "source": [ - "sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)\n", - "\n", - "for seq in sequences.take(1):\n", - " print(chars_from_ids(seq))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5PHW902-4oZt" - }, - "source": [ - "It's easier to see what this is doing if you join the tokens back into strings:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QO32cMWu4a06" - }, - "outputs": [], - "source": [ - "for seq in sequences.take(5):\n", - " print(text_from_ids(seq).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UbLcIPBj_mWZ" - }, - "source": [ - "For training you'll need a dataset of `(input, label)` pairs. Where `input` and \n", - "`label` are sequences. At each time step the input is the current character and the label is the next character. \n", - "\n", - "Here's a function that takes a sequence as input, duplicates, and shifts it to align the input and label for each timestep:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9NGu-FkO_kYU" - }, - "outputs": [], - "source": [ - "def split_input_target(sequence):\n", - " input_text = sequence[:-1]\n", - " target_text = sequence[1:]\n", - " return input_text, target_text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WxbDTJTw5u_P" - }, - "outputs": [], - "source": [ - "split_input_target(list(\"Tensorflow\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "B9iKPXkw5xwa" - }, - "outputs": [], - "source": [ - "dataset = sequences.map(split_input_target)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GNbw-iR0ymwj" - }, - "outputs": [], - "source": [ - "for input_example, target_example in dataset.take(1):\n", - " print(\"Input :\", text_from_ids(input_example).numpy())\n", - " print(\"Target:\", text_from_ids(target_example).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MJdfPmdqzf-R" - }, - "source": [ - "### Create training batches\n", - "\n", - "You used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, you need to shuffle the data and pack it into batches." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "p2pGotuNzf-S" - }, - "outputs": [], - "source": [ - "# Batch size\n", - "BATCH_SIZE = 64\n", - "\n", - "# Buffer size to shuffle the dataset\n", - "# (TF data is designed to work with possibly infinite sequences,\n", - "# so it doesn't attempt to shuffle the entire sequence in memory. Instead,\n", - "# it maintains a buffer in which it shuffles elements).\n", - "BUFFER_SIZE = 10000\n", - "\n", - "dataset = (\n", - " dataset\n", - " .shuffle(BUFFER_SIZE)\n", - " .batch(BATCH_SIZE, drop_remainder=True)\n", - " .prefetch(tf.data.experimental.AUTOTUNE))\n", - "\n", - "dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "r6oUuElIMgVx" - }, - "source": [ - "## Build The Model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "m8gPwEjRzf-Z" - }, - "source": [ - "This section defines the model as a `keras.Model` subclass (For details see [Making new Layers and Models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models)). \n", - "\n", - "This model has three layers:\n", - "\n", - "* `tf.keras.layers.Embedding`: The input layer. A trainable lookup table that will map each character-ID to a vector with `embedding_dim` dimensions;\n", - "* `tf.keras.layers.GRU`: A type of RNN with size `units=rnn_units` (You can also use an LSTM layer here.)\n", - "* `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs. It outputs one logit for each character in the vocabulary. These are the log-likelihood of each character according to the model." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zHT8cLh7EAsg" - }, - "outputs": [], - "source": [ - "# Length of the vocabulary in chars\n", - "vocab_size = len(vocab)\n", - "\n", - "# The embedding dimension\n", - "embedding_dim = 256\n", - "\n", - "# Number of RNN units\n", - "rnn_units = 1024" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "wj8HQ2w8z4iO" - }, - "outputs": [], - "source": [ - "class MyModel(tf.keras.Model):\n", - " def __init__(self, vocab_size, embedding_dim, rnn_units):\n", - " super().__init__(self)\n", - " self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)\n", - " self.gru = tf.keras.layers.GRU(rnn_units,\n", - " return_sequences=True,\n", - " return_state=True)\n", - " self.dense = tf.keras.layers.Dense(vocab_size)\n", - "\n", - " def call(self, inputs, states=None, return_state=False, training=False):\n", - " x = inputs\n", - " x = self.embedding(x, training=training)\n", - " if states is None:\n", - " states = self.gru.get_initial_state(x)\n", - " x, states = self.gru(x, initial_state=states, training=training)\n", - " x = self.dense(x, training=training)\n", - "\n", - " if return_state:\n", - " return x, states\n", - " else:\n", - " return x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IX58Xj9z47Aw" - }, - "outputs": [], - "source": [ - "model = MyModel(\n", - " # Be sure the vocabulary size matches the `StringLookup` layers.\n", - " vocab_size=len(ids_from_chars.get_vocabulary()),\n", - " embedding_dim=embedding_dim,\n", - " rnn_units=rnn_units)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RkA5upJIJ7W7" - }, - "source": [ - "For each character the model looks up the embedding, runs the GRU one timestep with the embedding as input, and applies the dense layer to generate logits predicting the log-likelihood of the next character:\n", - "\n", - "" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gKbfm04amhXk" - }, - "source": [ - "Note: For training you could use a `keras.Sequential` model here. To generate text later you'll need to manage the RNN's internal state. It's simpler to include the state input and output options upfront, than it is to rearrange the model architecture later. For more details see the [Keras RNN guide](https://www.tensorflow.org/guide/keras/rnn#rnn_state_reuse)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-ubPo0_9Prjb" - }, - "source": [ - "## Try the model\n", - "\n", - "Now run the model to see that it behaves as expected.\n", - "\n", - "First check the shape of the output:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "C-_70kKAPrPU" - }, - "outputs": [], - "source": [ - "for input_example_batch, target_example_batch in dataset.take(1):\n", - " example_batch_predictions = model(input_example_batch)\n", - " print(example_batch_predictions.shape, \"# (batch_size, sequence_length, vocab_size)\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Q6NzLBi4VM4o" - }, - "source": [ - "In the above example the sequence length of the input is `100` but the model can be run on inputs of any length:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "vPGmAAXmVLGC" - }, - "outputs": [], - "source": [ - "model.summary()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uwv0gEkURfx1" - }, - "source": [ - "To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n", - "\n", - "Note: It is important to _sample_ from this distribution as taking the _argmax_ of the distribution can easily get the model stuck in a loop.\n", - "\n", - "Try it for the first example in the batch:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4V4MfFg0RQJg" - }, - "outputs": [], - "source": [ - "sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)\n", - "sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QM1Vbxs_URw5" - }, - "source": [ - "This gives us, at each timestep, a prediction of the next character index:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YqFMUQc_UFgM" - }, - "outputs": [], - "source": [ - "sampled_indices" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LfLtsP3mUhCG" - }, - "source": [ - "Decode these to see the text predicted by this untrained model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xWcFwPwLSo05" - }, - "outputs": [], - "source": [ - "print(\"Input:\\n\", text_from_ids(input_example_batch[0]).numpy())\n", - "print()\n", - "print(\"Next Char Predictions:\\n\", text_from_ids(sampled_indices).numpy())" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LJL0Q0YPY6Ee" - }, - "source": [ - "## Train the model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YCbHQHiaa4Ic" - }, - "source": [ - "At this point the problem can be treated as a standard classification problem. Given the previous RNN state, and the input this time step, predict the class of the next character." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "trpqTWyvk0nr" - }, - "source": [ - "### Attach an optimizer, and a loss function" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UAjbjY03eiQ4" - }, - "source": [ - "The standard `tf.keras.losses.sparse_categorical_crossentropy` loss function works in this case because it is applied across the last dimension of the predictions.\n", - "\n", - "Because your model returns logits, you need to set the `from_logits` flag.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZOeWdgxNFDXq" - }, - "outputs": [], - "source": [ - "loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4HrXTACTdzY-" - }, - "outputs": [], - "source": [ - "example_batch_loss = loss(target_example_batch, example_batch_predictions)\n", - "mean_loss = example_batch_loss.numpy().mean()\n", - "print(\"Prediction shape: \", example_batch_predictions.shape, \" # (batch_size, sequence_length, vocab_size)\")\n", - "print(\"Mean loss: \", mean_loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vkvUIneTFiow" - }, - "source": [ - "A newly initialized model shouldn't be too sure of itself, the output logits should all have similar magnitudes. To confirm this you can check that the exponential of the mean loss is approximately equal to the vocabulary size. A much higher loss means the model is sure of its wrong answers, and is badly initialized:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MAJfS5YoFiHf" - }, - "outputs": [], - "source": [ - "tf.exp(mean_loss).numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "jeOXriLcymww" - }, - "source": [ - "Configure the training procedure using the `tf.keras.Model.compile` method. Use `tf.keras.optimizers.Adam` with default arguments and the loss function." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "DDl1_Een6rL0" - }, - "outputs": [], - "source": [ - "model.compile(optimizer='adam', loss=loss)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ieSJdchZggUj" - }, - "source": [ - "### Configure checkpoints" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "C6XBUUavgF56" - }, - "source": [ - "Use a `tf.keras.callbacks.ModelCheckpoint` to ensure that checkpoints are saved during training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "W6fWTriUZP-n" - }, - "outputs": [], - "source": [ - "# Directory where the checkpoints will be saved\n", - "checkpoint_dir = './training_checkpoints'\n", - "# Name of the checkpoint files\n", - "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")\n", - "\n", - "checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(\n", - " filepath=checkpoint_prefix,\n", - " save_weights_only=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "3Ky3F_BhgkTW" - }, - "source": [ - "### Execute the training" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "IxdOA-rgyGvs" - }, - "source": [ - "To keep training time reasonable, use 10 epochs to train the model. In Colab, set the runtime to GPU for faster training." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7yGBE2zxMMHs" - }, - "outputs": [], - "source": [ - "EPOCHS = 20" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UK-hmKjYVoll" - }, - "outputs": [], - "source": [ - "history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kKkD5M6eoSiN" - }, - "source": [ - "## Generate text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oIdQ8c8NvMzV" - }, - "source": [ - "The simplest way to generate text with this model is to run it in a loop, and keep track of the model's internal state as you execute it.\n", - "\n", - "\n", - "\n", - "Each time you call the model you pass in some text and an internal state. The model returns a prediction for the next character and its new state. Pass the prediction and state back in to continue generating text.\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DjGz1tDkzf-u" - }, - "source": [ - "The following makes a single step prediction:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iSBU1tHmlUSs" - }, - "outputs": [], - "source": [ - "class OneStep(tf.keras.Model):\n", - " def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):\n", - " super().__init__()\n", - " self.temperature = temperature\n", - " self.model = model\n", - " self.chars_from_ids = chars_from_ids\n", - " self.ids_from_chars = ids_from_chars\n", - "\n", - " # Create a mask to prevent \"[UNK]\" from being generated.\n", - " skip_ids = self.ids_from_chars(['[UNK]'])[:, None]\n", - " sparse_mask = tf.SparseTensor(\n", - " # Put a -inf at each bad index.\n", - " values=[-float('inf')]*len(skip_ids),\n", - " indices=skip_ids,\n", - " # Match the shape to the vocabulary\n", - " dense_shape=[len(ids_from_chars.get_vocabulary())])\n", - " self.prediction_mask = tf.sparse.to_dense(sparse_mask)\n", - "\n", - " @tf.function\n", - " def generate_one_step(self, inputs, states=None):\n", - " # Convert strings to token IDs.\n", - " input_chars = tf.strings.unicode_split(inputs, 'UTF-8')\n", - " input_ids = self.ids_from_chars(input_chars).to_tensor()\n", - "\n", - " # Run the model.\n", - " # predicted_logits.shape is [batch, char, next_char_logits]\n", - " predicted_logits, states = self.model(inputs=input_ids, states=states,\n", - " return_state=True)\n", - " # Only use the last prediction.\n", - " predicted_logits = predicted_logits[:, -1, :]\n", - " predicted_logits = predicted_logits/self.temperature\n", - " # Apply the prediction mask: prevent \"[UNK]\" from being generated.\n", - " predicted_logits = predicted_logits + self.prediction_mask\n", - "\n", - " # Sample the output logits to generate token IDs.\n", - " predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)\n", - " predicted_ids = tf.squeeze(predicted_ids, axis=-1)\n", - "\n", - " # Convert from token ids to characters\n", - " predicted_chars = self.chars_from_ids(predicted_ids)\n", - "\n", - " # Return the characters and model state.\n", - " return predicted_chars, states" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fqMOuDutnOxK" - }, - "outputs": [], - "source": [ - "one_step_model = OneStep(model, chars_from_ids, ids_from_chars)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p9yDoa0G3IgQ" - }, - "source": [ - "Run it in a loop to generate some text. Looking at the generated text, you'll see the model knows when to capitalize, make paragraphs and imitates a Shakespeare-like writing vocabulary. With the small number of training epochs, it has not yet learned to form coherent sentences." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ST7PSyk9t1mT" - }, - "outputs": [], - "source": [ - "start = time.time()\n", - "states = None\n", - "next_char = tf.constant(['ROMEO:'])\n", - "result = [next_char]\n", - "\n", - "for n in range(1000):\n", - " next_char, states = one_step_model.generate_one_step(next_char, states=states)\n", - " result.append(next_char)\n", - "\n", - "result = tf.strings.join(result)\n", - "end = time.time()\n", - "print(result[0].numpy().decode('utf-8'), '\\n\\n' + '_'*80)\n", - "print('\\nRun time:', end - start)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AM2Uma_-yVIq" - }, - "source": [ - "The easiest thing you can do to improve the results is to train it for longer (try `EPOCHS = 30`).\n", - "\n", - "You can also experiment with a different start string, try adding another RNN layer to improve the model's accuracy, or adjust the temperature parameter to generate more or less random predictions." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_OfbI4aULmuj" - }, - "source": [ - "If you want the model to generate text *faster* the easiest thing you can do is batch the text generation. In the example below the model generates 5 outputs in about the same time it took to generate 1 above. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZkLu7Y8UCMT7" - }, - "outputs": [], - "source": [ - "start = time.time()\n", - "states = None\n", - "next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])\n", - "result = [next_char]\n", - "\n", - "for n in range(1000):\n", - " next_char, states = one_step_model.generate_one_step(next_char, states=states)\n", - " result.append(next_char)\n", - "\n", - "result = tf.strings.join(result)\n", - "end = time.time()\n", - "print(result, '\\n\\n' + '_'*80)\n", - "print('\\nRun time:', end - start)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UlUQzwu6EXam" - }, - "source": [ - "## Export the generator\n", - "\n", - "This single-step model can easily be [saved and restored](https://www.tensorflow.org/guide/saved_model), allowing you to use it anywhere a `tf.saved_model` is accepted." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3Grk32H_CzsC" - }, - "outputs": [], - "source": [ - "tf.saved_model.save(one_step_model, 'one_step')\n", - "one_step_reloaded = tf.saved_model.load('one_step')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_Z9bb_wX6Uuu" - }, - "outputs": [], - "source": [ - "states = None\n", - "next_char = tf.constant(['ROMEO:'])\n", - "result = [next_char]\n", - "\n", - "for n in range(100):\n", - " next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)\n", - " result.append(next_char)\n", - "\n", - "print(tf.strings.join(result)[0].numpy().decode(\"utf-8\"))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Y4QwTjAM6A2O" - }, - "source": [ - "## Advanced: Customized Training\n", - "\n", - "The above training procedure is simple, but does not give you much control.\n", - "It uses teacher-forcing which prevents bad predictions from being fed back to the model, so the model never learns to recover from mistakes.\n", - "\n", - "So now that you've seen how to run the model manually next you'll implement the training loop. This gives a starting point if, for example, you want to implement _curriculum learning_ to help stabilize the model's open-loop output.\n", - "\n", - "The most important part of a custom training loop is the train step function.\n", - "\n", - "Use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/guide/eager).\n", - "\n", - "The basic procedure is:\n", - "\n", - "1. Execute the model and calculate the loss under a `tf.GradientTape`.\n", - "2. Calculate the updates and apply them to the model using the optimizer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "x0pZ101hjwW0" - }, - "outputs": [], - "source": [ - "class CustomTraining(MyModel):\n", - " @tf.function\n", - " def train_step(self, inputs):\n", - " inputs, labels = inputs\n", - " with tf.GradientTape() as tape:\n", - " predictions = self(inputs, training=True)\n", - " loss = self.loss(labels, predictions)\n", - " grads = tape.gradient(loss, model.trainable_variables)\n", - " self.optimizer.apply_gradients(zip(grads, model.trainable_variables))\n", - "\n", - " return {'loss': loss}" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4Oc-eJALcK8B" - }, - "source": [ - "The above implementation of the `train_step` method follows [Keras' `train_step` conventions](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). This is optional, but it allows you to change the behavior of the train step and still use keras' `Model.compile` and `Model.fit` methods." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XKyWiZ_Lj7w5" - }, - "outputs": [], - "source": [ - "model = CustomTraining(\n", - " vocab_size=len(ids_from_chars.get_vocabulary()),\n", - " embedding_dim=embedding_dim,\n", - " rnn_units=rnn_units)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U817KUm7knlm" - }, - "outputs": [], - "source": [ - "model.compile(optimizer = tf.keras.optimizers.Adam(),\n", - " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "o694aoBPnEi9" - }, - "outputs": [], - "source": [ - "model.fit(dataset, epochs=1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "W8nAtKHVoInR" - }, - "source": [ - "Or if you need more control, you can write your own complete custom training loop:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "d4tSNwymzf-q" - }, - "outputs": [], - "source": [ - "EPOCHS = 10\n", - "\n", - "mean = tf.metrics.Mean()\n", - "\n", - "for epoch in range(EPOCHS):\n", - " start = time.time()\n", - "\n", - " mean.reset_states()\n", - " for (batch_n, (inp, target)) in enumerate(dataset):\n", - " logs = model.train_step([inp, target])\n", - " mean.update_state(logs['loss'])\n", - "\n", - " if batch_n % 50 == 0:\n", - " template = f\"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}\"\n", - " print(template)\n", - "\n", - " # saving (checkpoint) the model every 5 epochs\n", - " if (epoch + 1) % 5 == 0:\n", - " model.save_weights(checkpoint_prefix.format(epoch=epoch))\n", - "\n", - " print()\n", - " print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')\n", - " print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')\n", - " print(\"_\"*80)\n", - "\n", - "model.save_weights(checkpoint_prefix.format(epoch=epoch))" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "text_generation.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/text_similarity.ipynb b/third_party/tensorflow-text/src/docs/tutorials/text_similarity.ipynb deleted file mode 100644 index 6f9d08a..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/text_similarity.ipynb +++ /dev/null
@@ -1,221 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "Tce3stUlHN0L" - }, - "source": [ - "##### Copyright 2020 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tuOe1ymfHZPu" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qFdPvlXBOdUN" - }, - "source": [ - "# TF.Text Metrics" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfBg1C5NB3X0" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/text_similarity\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/text_similarity.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/text_similarity.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/text_similarity.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xHxb-dlhMIzW" - }, - "source": [ - "## Overview\n", - "\n", - "TensorFlow Text provides a collection of text-metrics-related classes and ops ready to use with TensorFlow 2.0. The library contains implementations of text-similarity metrics such as ROUGE-L, required for automatic evaluation of text generation models.\n", - "\n", - "The benefit of using these ops in evaluating your models is that they are compatible with TPU evaluation and work nicely with TF streaming metric APIs." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MUXex9ctTuDB" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "K_8D_DtQJ0kC" - }, - "outputs": [], - "source": [ - "!pip install -q tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "IqR2PQG4ZaZ0" - }, - "outputs": [], - "source": [ - "import tensorflow as tf\n", - "import tensorflow_text as text" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QKp40qS-DGEZ" - }, - "source": [ - "### ROUGE-L\n", - "\n", - "The Rouge-L metric is a score from 0 to 1 indicating how similar two sequences are, based on the length of the longest common subsequence (LCS). In particular, Rouge-L is the weighted harmonic mean (or f-measure) combining the LCS precision (the percentage of the hypothesis sequence covered by the LCS) and the LCS recall (the percentage of the reference sequence covered by the LCS).\n", - "\n", - "Source: https://www.microsoft.com/en-us/research/publication/rouge-a-package-for-automatic-evaluation-of-summaries/\n", - "\n", - "The TF.Text implementation returns the F-measure, Precision, and Recall for each (hypothesis, reference) pair.\n", - "\n", - "Consider the following hypothesis/reference pair:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WUgEkGHRKafG" - }, - "outputs": [], - "source": [ - "hypotheses = tf.ragged.constant([['captain', 'of', 'the', 'delta', 'flight'],\n", - " ['the', '1990', 'transcript']])\n", - "references = tf.ragged.constant([['delta', 'air', 'lines', 'flight'],\n", - " ['this', 'concludes', 'the', 'transcript']])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qeiXnY-_Khp1" - }, - "source": [ - "The hypotheses and references are expected to be tf.RaggedTensors of tokens. Tokens are required instead of raw sentences because no single tokenization strategy fits all tasks.\n", - "\n", - "Now we can call text.metrics.rouge_l and get our result back:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LS_NigzqKgtT" - }, - "outputs": [], - "source": [ - "result = text.metrics.rouge_l(hypotheses, references)\n", - "print('F-Measure: %s' % result.f_measure)\n", - "print('P-Measure: %s' % result.p_measure)\n", - "print('R-Measure: %s' % result.r_measure)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FQoprhImKoD0" - }, - "source": [ - "ROUGE-L has an additional hyperparameter, alpha, which determines the weight of the harmonic mean used for computing the F-Measure. Values closer to 0 treat Recall as more important and values closer to 1 treat Precision as more important. alpha defaults to .5, which corresponds to equal weight for Precision and Recall." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Q2ZnjOIgKnnS" - }, - "outputs": [], - "source": [ - "# Compute ROUGE-L with alpha=0\n", - "result = text.metrics.rouge_l(hypotheses, references, alpha=0)\n", - "print('F-Measure (alpha=0): %s' % result.f_measure)\n", - "print('P-Measure (alpha=0): %s' % result.p_measure)\n", - "print('R-Measure (alpha=0): %s' % result.r_measure)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iYUYiLJhKseb" - }, - "outputs": [], - "source": [ - "# Compute ROUGE-L with alpha=1\n", - "result = text.metrics.rouge_l(hypotheses, references, alpha=1)\n", - "print('F-Measure (alpha=1): %s' % result.f_measure)\n", - "print('P-Measure (alpha=1): %s' % result.p_measure)\n", - "print('R-Measure (alpha=1): %s' % result.r_measure)" - ] - } - ], - "metadata": { - "colab": { - "collapsed_sections": [ - "Tce3stUlHN0L" - ], - "name": "text_similarity.ipynb", - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/transformer.ipynb b/third_party/tensorflow-text/src/docs/tutorials/transformer.ipynb deleted file mode 100644 index 784b4d5e..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/transformer.ipynb +++ /dev/null
@@ -1,2282 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "s_qNSzzyaCbD" - }, - "source": [ - "##### Copyright 2019 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "jmjh290raIky" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "J0Qjg6vuaHNt" - }, - "source": [ - "# Transformer model for language understanding" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AOpGoE2T-YXS" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/transformer\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003e\n", - " View on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/transformer.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003e\n", - " Run in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/transformer.ipynb\"\u003e\n", - " \u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003e\n", - " View source on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/transformer.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "M-f8TnGpE_ex" - }, - "source": [ - "This tutorial trains a \u003ca href=\"https://arxiv.org/abs/1706.03762\" class=\"external\"\u003eTransformer model\u003c/a\u003e to translate a [Portuguese to English dataset](https://www.tensorflow.org/datasets/catalog/ted_hrlr_translate#ted_hrlr_translatept_to_en). This is an advanced example that assumes knowledge of [text generation](https://www.tensorflow.org/text/tutorials/text_generation) and [attention](https://www.tensorflow.org/text/tutorials/nmt_with_attention).\n", - "\n", - "The core idea behind the Transformer model is *self-attention*—the ability to attend to different positions of the input sequence to compute a representation of that sequence. Transformer creates stacks of self-attention layers and is explained below in the sections *Scaled dot product attention* and *Multi-head attention*.\n", - "\n", - "A transformer model handles variable-sized input using stacks of self-attention layers instead of [RNNs](https://www.tensorflow.org/text/tutorials/text_classification_rnn) or [CNNs](https://www.tensorflow.org/tutorials/images/cnn). This general architecture has a number of advantages:\n", - "\n", - "* It makes no assumptions about the temporal/spatial relationships across the data. This is ideal for processing a set of objects (for example, [StarCraft units](https://deepmind.com/blog/alphastar-mastering-real-time-strategy-game-starcraft-ii/#block-8)).\n", - "* Layer outputs can be calculated in parallel, instead of a series like an RNN.\n", - "* Distant items can affect each other's output without passing through many RNN-steps, or convolution layers (see [Scene Memory Transformer](https://arxiv.org/pdf/1903.03878.pdf) for example).\n", - "* It can learn long-range dependencies. This is a challenge in many sequence tasks.\n", - "\n", - "The downsides of this architecture are:\n", - "\n", - "* For a time-series, the output for a time-step is calculated from the *entire history* instead of only the inputs and current hidden-state. This _may_ be less efficient. \n", - "* If the input *does* have a temporal/spatial relationship, like text, some positional encoding must be added or the model will effectively see a bag of words. \n", - "\n", - "After training the model in this notebook, you will be able to input a Portuguese sentence and return the English translation.\n", - "\n", - "\u003cimg src=\"https://www.tensorflow.org/images/tutorials/transformer/attention_map_portuguese.png\" width=\"800\" alt=\"Attention heatmap\"\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "swymtxpl7W7w" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XFG0NDRu5mYQ" - }, - "outputs": [], - "source": [ - "!pip install tensorflow_datasets\n", - "!pip install -U tensorflow-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JjJJyJTZYebt" - }, - "outputs": [], - "source": [ - "import collections\n", - "import logging\n", - "import os\n", - "import pathlib\n", - "import re\n", - "import string\n", - "import sys\n", - "import time\n", - "\n", - "import numpy as np\n", - "import matplotlib.pyplot as plt\n", - "\n", - "import tensorflow_datasets as tfds\n", - "import tensorflow_text as text\n", - "import tensorflow as tf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pXzVhU34zWEU" - }, - "outputs": [], - "source": [ - "logging.getLogger('tensorflow').setLevel(logging.ERROR) # suppress warnings" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-cCvXbPkccV1" - }, - "source": [ - "## Download the Dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t4_Qt8W1hJE_" - }, - "source": [ - "Use [TensorFlow datasets](https://www.tensorflow.org/datasets) to load the [Portuguese-English translation dataset](https://github.com/neulab/word-embeddings-for-nmt) from the [TED Talks Open Translation Project](https://www.ted.com/participate/translate).\n", - "\n", - "This dataset contains approximately 50000 training examples, 1100 validation examples, and 2000 test examples." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8q9t4FmN96eN" - }, - "outputs": [], - "source": [ - "examples, metadata = tfds.load('ted_hrlr_translate/pt_to_en', with_info=True,\n", - " as_supervised=True)\n", - "train_examples, val_examples = examples['train'], examples['validation']" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1OnwQZ2IpTbl" - }, - "source": [ - "The `tf.data.Dataset` object returned by TensorFlow datasets yields pairs of text examples:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AtuMLTQGlHZ1" - }, - "outputs": [], - "source": [ - "for pt_examples, en_examples in train_examples.batch(3).take(1):\n", - " for pt in pt_examples.numpy():\n", - " print(pt.decode('utf-8'))\n", - "\n", - " print()\n", - "\n", - " for en in en_examples.numpy():\n", - " print(en.decode('utf-8'))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "eJxTd6aVnZyh" - }, - "source": [ - "## Text tokenization \u0026 detokenization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "WlHAFNaopq6U" - }, - "source": [ - "You can't train a model directly on text. The text needs to be converted to some numeric representation first. Typically, you convert the text to sequences of token IDs, which are used as indices into an embedding." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "2sPvbCEePzOC" - }, - "source": [ - "One popular implementation is demonstrated in the [Subword tokenizer tutorial](https://www.tensorflow.org/text/guide/subwords_tokenizer) builds subword tokenizers (`text.BertTokenizer`) optimized for this dataset and exports them in a [saved_model](https://www.tensorflow.org/guide/saved_model). \n", - "\n", - "Download and unzip and import the `saved_model`:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QToMl0NanZPr" - }, - "outputs": [], - "source": [ - "model_name = \"ted_hrlr_translate_pt_en_converter\"\n", - "tf.keras.utils.get_file(\n", - " f\"{model_name}.zip\",\n", - " f\"https://storage.googleapis.com/download.tensorflow.org/models/{model_name}.zip\",\n", - " cache_dir='.', cache_subdir='', extract=True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "h5dbGnPXnuI1" - }, - "outputs": [], - "source": [ - "tokenizers = tf.saved_model.load(model_name)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KQYc0CJTpnvg" - }, - "source": [ - "The `tf.saved_model` contains two text tokenizers, one for English and one for Portuguese. Both have the same methods:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "s-PCJijfcZ9_" - }, - "outputs": [], - "source": [ - "[item for item in dir(tokenizers.en) if not item.startswith('_')]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-pv_nc5lrAiD" - }, - "source": [ - "The `tokenize` method converts a batch of strings to a padded-batch of token IDs. This method splits punctuation, lowercases and unicode-normalizes the input before tokenizing. That standardization is not visible here because the input data is already standardized." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0GV-smAAQJtM" - }, - "outputs": [], - "source": [ - "for en in en_examples.numpy():\n", - " print(en.decode('utf-8'))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "2fQJq1xB-tOn" - }, - "outputs": [], - "source": [ - "encoded = tokenizers.en.tokenize(en_examples)\n", - "\n", - "for row in encoded.to_list():\n", - " print(row)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "dBRlikwDR2Lu" - }, - "source": [ - "The `detokenize` method attempts to convert these token IDs back to human readable text: " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Tpqx1aug3W31" - }, - "outputs": [], - "source": [ - "round_trip = tokenizers.en.detokenize(encoded)\n", - "for line in round_trip.numpy():\n", - " print(line.decode('utf-8'))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "e5mGvytArL9g" - }, - "source": [ - "The lower level `lookup` method converts from token-IDs to token text:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "RpzRLzvIuN3R" - }, - "outputs": [], - "source": [ - "tokens = tokenizers.en.lookup(encoded)\n", - "tokens" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "5V8Ix_PNSfhV" - }, - "source": [ - "Here you can see the \"subword\" aspect of the tokenizers. The word \"searchability\" is decomposed into \"search ##ability\" and the word \"serendipity\" into \"s ##ere ##nd ##ip ##ity\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fd1NWMxjfsDd" - }, - "source": [ - "## Setup input pipeline" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E7JHK7N7tNZy" - }, - "source": [ - "To build an input pipeline suitable for training you'll apply some transformations to the dataset.\n", - "\n", - "This function will be used to encode the batches of raw text:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6shgzEck3FiV" - }, - "outputs": [], - "source": [ - "def tokenize_pairs(pt, en):\n", - " pt = tokenizers.pt.tokenize(pt)\n", - " # Convert from ragged to dense, padding with zeros.\n", - " pt = pt.to_tensor()\n", - "\n", - " en = tokenizers.en.tokenize(en)\n", - " # Convert from ragged to dense, padding with zeros.\n", - " en = en.to_tensor()\n", - " return pt, en" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cf2HwC_0wG0v" - }, - "source": [ - "Here's a simple input pipeline that processes, shuffles and batches the data:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bcRp7VcQ5m6g" - }, - "outputs": [], - "source": [ - "BUFFER_SIZE = 20000\n", - "BATCH_SIZE = 64" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BUN_jLBTwNxk" - }, - "outputs": [], - "source": [ - "def make_batches(ds):\n", - " return (\n", - " ds\n", - " .cache()\n", - " .shuffle(BUFFER_SIZE)\n", - " .batch(BATCH_SIZE)\n", - " .map(tokenize_pairs, num_parallel_calls=tf.data.AUTOTUNE)\n", - " .prefetch(tf.data.AUTOTUNE))\n", - "\n", - "\n", - "train_batches = make_batches(train_examples)\n", - "val_batches = make_batches(val_examples)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "nBQuibYA4n0n" - }, - "source": [ - "## Positional encoding\n", - "\n", - "Attention layers see their input as a set of vectors, with no sequential order. This model also doesn't contain any recurrent or convolutional layers. Because of this a \"positional encoding\" is added to give the model some information about the relative position of the tokens in the sentence. \n", - "\n", - "The positional encoding vector is added to the embedding vector. Embeddings represent a token in a d-dimensional space where tokens with similar meaning will be closer to each other. But the embeddings do not encode the relative position of tokens in a sentence. So after adding the positional encoding, tokens will be closer to each other based on the *similarity of their meaning and their position in the sentence*, in the d-dimensional space.\n", - "\n", - "The formula for calculating the positional encoding is as follows:\n", - "\n", - "$$\\Large{PE_{(pos, 2i)} = \\sin(pos / 10000^{2i / d_{model}})} $$\n", - "$$\\Large{PE_{(pos, 2i+1)} = \\cos(pos / 10000^{2i / d_{model}})} $$" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "WhIOZjMNKujn" - }, - "outputs": [], - "source": [ - "def get_angles(pos, i, d_model):\n", - " angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))\n", - " return pos * angle_rates" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1Rz82wEs5biZ" - }, - "outputs": [], - "source": [ - "def positional_encoding(position, d_model):\n", - " angle_rads = get_angles(np.arange(position)[:, np.newaxis],\n", - " np.arange(d_model)[np.newaxis, :],\n", - " d_model)\n", - "\n", - " # apply sin to even indices in the array; 2i\n", - " angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])\n", - "\n", - " # apply cos to odd indices in the array; 2i+1\n", - " angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])\n", - "\n", - " pos_encoding = angle_rads[np.newaxis, ...]\n", - "\n", - " return tf.cast(pos_encoding, dtype=tf.float32)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "1kLCla68EloE" - }, - "outputs": [], - "source": [ - "n, d = 2048, 512\n", - "pos_encoding = positional_encoding(n, d)\n", - "print(pos_encoding.shape)\n", - "pos_encoding = pos_encoding[0]\n", - "\n", - "# Juggle the dimensions for the plot\n", - "pos_encoding = tf.reshape(pos_encoding, (n, d//2, 2))\n", - "pos_encoding = tf.transpose(pos_encoding, (2, 1, 0))\n", - "pos_encoding = tf.reshape(pos_encoding, (d, n))\n", - "\n", - "plt.pcolormesh(pos_encoding, cmap='RdBu')\n", - "plt.ylabel('Depth')\n", - "plt.xlabel('Position')\n", - "plt.colorbar()\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a_b4ou4TYqUN" - }, - "source": [ - "## Masking" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "s42Uydjkv0hF" - }, - "source": [ - "Mask all the pad tokens in the batch of sequence. It ensures that the model does not treat padding as the input. The mask indicates where pad value `0` is present: it outputs a `1` at those locations, and a `0` otherwise." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "U2i8-e1s8ti9" - }, - "outputs": [], - "source": [ - "def create_padding_mask(seq):\n", - " seq = tf.cast(tf.math.equal(seq, 0), tf.float32)\n", - "\n", - " # add extra dimensions to add the padding\n", - " # to the attention logits.\n", - " return seq[:, tf.newaxis, tf.newaxis, :] # (batch_size, 1, 1, seq_len)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "A7BYeBCNvi7n" - }, - "outputs": [], - "source": [ - "x = tf.constant([[7, 6, 0, 0, 1], [1, 2, 3, 0, 0], [0, 0, 0, 4, 5]])\n", - "create_padding_mask(x)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Z0hzukDBgVom" - }, - "source": [ - "The look-ahead mask is used to mask the future tokens in a sequence. In other words, the mask indicates which entries should not be used.\n", - "\n", - "This means that to predict the third token, only the first and second token will be used. Similarly to predict the fourth token, only the first, second and the third tokens will be used and so on." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "dVxS8OPI9uI0" - }, - "outputs": [], - "source": [ - "def create_look_ahead_mask(size):\n", - " mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)\n", - " return mask # (seq_len, seq_len)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yxKGuXxaBeeE" - }, - "outputs": [], - "source": [ - "x = tf.random.uniform((1, 3))\n", - "temp = create_look_ahead_mask(x.shape[1])\n", - "temp" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xluDl5cXYy4y" - }, - "source": [ - "## Scaled dot product attention" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "vsxEE_-Wa1gF" - }, - "source": [ - "\u003cimg src=\"https://www.tensorflow.org/images/tutorials/transformer/scaled_attention.png\" width=\"500\" alt=\"scaled_dot_product_attention\"\u003e\n", - "\n", - "The attention function used by the transformer takes three inputs: Q (query), K (key), V (value). The equation used to calculate the attention weights is:\n", - "\n", - "$$\\Large{Attention(Q, K, V) = softmax_k\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right) V} $$\n", - "\n", - "The dot-product attention is scaled by a factor of square root of the depth. This is done because for large values of depth, the dot product grows large in magnitude pushing the softmax function where it has small gradients resulting in a very hard softmax. \n", - "\n", - "For example, consider that `Q` and `K` have a mean of 0 and variance of 1. Their matrix multiplication will have a mean of 0 and variance of `dk`. So the *square root of `dk`* is used for scaling, so you get a consistent variance regardless of the value of `dk`. If the variance is too low the output may be too flat to optimize effectively. If the variance is too high the softmax may saturate at initialization making it difficult to learn.\n", - "\n", - "The mask is multiplied with -1e9 (close to negative infinity). This is done because the mask is summed with the scaled matrix multiplication of Q and K and is applied immediately before a softmax. The goal is to zero out these cells, and large negative inputs to softmax are near zero in the output." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LazzUq3bJ5SH" - }, - "outputs": [], - "source": [ - "def scaled_dot_product_attention(q, k, v, mask):\n", - " \"\"\"Calculate the attention weights.\n", - " q, k, v must have matching leading dimensions.\n", - " k, v must have matching penultimate dimension, i.e.: seq_len_k = seq_len_v.\n", - " The mask has different shapes depending on its type(padding or look ahead)\n", - " but it must be broadcastable for addition.\n", - "\n", - " Args:\n", - " q: query shape == (..., seq_len_q, depth)\n", - " k: key shape == (..., seq_len_k, depth)\n", - " v: value shape == (..., seq_len_v, depth_v)\n", - " mask: Float tensor with shape broadcastable\n", - " to (..., seq_len_q, seq_len_k). Defaults to None.\n", - "\n", - " Returns:\n", - " output, attention_weights\n", - " \"\"\"\n", - "\n", - " matmul_qk = tf.matmul(q, k, transpose_b=True) # (..., seq_len_q, seq_len_k)\n", - "\n", - " # scale matmul_qk\n", - " dk = tf.cast(tf.shape(k)[-1], tf.float32)\n", - " scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)\n", - "\n", - " # add the mask to the scaled tensor.\n", - " if mask is not None:\n", - " scaled_attention_logits += (mask * -1e9)\n", - "\n", - " # softmax is normalized on the last axis (seq_len_k) so that the scores\n", - " # add up to 1.\n", - " attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1) # (..., seq_len_q, seq_len_k)\n", - "\n", - " output = tf.matmul(attention_weights, v) # (..., seq_len_q, depth_v)\n", - "\n", - " return output, attention_weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "FiqETnhCkoXh" - }, - "source": [ - "As the softmax normalization is done on K, its values decide the amount of importance given to Q.\n", - "\n", - "The output represents the multiplication of the attention weights and the V (value) vector. This ensures that the tokens you want to focus on are kept as-is and the irrelevant tokens are flushed out." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "n90YjClyInFy" - }, - "outputs": [], - "source": [ - "def print_out(q, k, v):\n", - " temp_out, temp_attn = scaled_dot_product_attention(\n", - " q, k, v, None)\n", - " print('Attention weights are:')\n", - " print(temp_attn)\n", - " print('Output is:')\n", - " print(temp_out)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "yAzUAf2DPlNt" - }, - "outputs": [], - "source": [ - "np.set_printoptions(suppress=True)\n", - "\n", - "temp_k = tf.constant([[10, 0, 0],\n", - " [0, 10, 0],\n", - " [0, 0, 10],\n", - " [0, 0, 10]], dtype=tf.float32) # (4, 3)\n", - "\n", - "temp_v = tf.constant([[1, 0],\n", - " [10, 0],\n", - " [100, 5],\n", - " [1000, 6]], dtype=tf.float32) # (4, 2)\n", - "\n", - "# This `query` aligns with the second `key`,\n", - "# so the second `value` is returned.\n", - "temp_q = tf.constant([[0, 10, 0]], dtype=tf.float32) # (1, 3)\n", - "print_out(temp_q, temp_k, temp_v)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "zg6k-fGhgXra" - }, - "outputs": [], - "source": [ - "# This query aligns with a repeated key (third and fourth),\n", - "# so all associated values get averaged.\n", - "temp_q = tf.constant([[0, 0, 10]], dtype=tf.float32) # (1, 3)\n", - "print_out(temp_q, temp_k, temp_v)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UAq3YOzUgXhb" - }, - "outputs": [], - "source": [ - "# This query aligns equally with the first and second key,\n", - "# so their values get averaged.\n", - "temp_q = tf.constant([[10, 10, 0]], dtype=tf.float32) # (1, 3)\n", - "print_out(temp_q, temp_k, temp_v)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aOz-4_XIhaTP" - }, - "source": [ - "Pass all the queries together." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "6dlU8Tm-hYrF" - }, - "outputs": [], - "source": [ - "temp_q = tf.constant([[0, 0, 10],\n", - " [0, 10, 0],\n", - " [10, 10, 0]], dtype=tf.float32) # (3, 3)\n", - "print_out(temp_q, temp_k, temp_v)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "kmzGPEy64qmA" - }, - "source": [ - "## Multi-head attention" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "fz5BMC8Kaoqo" - }, - "source": [ - "\u003cimg src=\"https://www.tensorflow.org/images/tutorials/transformer/multi_head_attention.png\" width=\"500\" alt=\"multi-head attention\"\u003e\n", - "\n", - "\n", - "Multi-head attention consists of four parts:\n", - "* Linear layers.\n", - "* Scaled dot-product attention.\n", - "* Final linear layer." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "JPmbr6F1C-v_" - }, - "source": [ - "Each multi-head attention block gets three inputs; Q (query), K (key), V (value). These are put through linear (Dense) layers before the multi-head attention function.\n", - "\n", - "In the diagram above `(K,Q,V)` are passed through sepearte linear (`Dense`) layers for each attention head. For simplicity/efficiency the code below implements this using a single dense layer with `num_heads` times as many outputs. The output is rearranged to a shape of `(batch, num_heads, ...)` before applying the attention function.\n", - "\n", - "The `scaled_dot_product_attention` function defined above is applied in a single call, broadcasted for efficiency. An appropriate mask must be used in the attention step. The attention output for each head is then concatenated (using `tf.transpose`, and `tf.reshape`) and put through a final `Dense` layer.\n", - "\n", - "Instead of one single attention head, Q, K, and V are split into multiple heads because it allows the model to jointly attend to information from different representation subspaces at different positions. After the split each head has a reduced dimensionality, so the total computation cost is the same as a single head attention with full dimensionality." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "BSV3PPKsYecw" - }, - "outputs": [], - "source": [ - "class MultiHeadAttention(tf.keras.layers.Layer):\n", - " def __init__(self, d_model, num_heads):\n", - " super(MultiHeadAttention, self).__init__()\n", - " self.num_heads = num_heads\n", - " self.d_model = d_model\n", - "\n", - " assert d_model % self.num_heads == 0\n", - "\n", - " self.depth = d_model // self.num_heads\n", - "\n", - " self.wq = tf.keras.layers.Dense(d_model)\n", - " self.wk = tf.keras.layers.Dense(d_model)\n", - " self.wv = tf.keras.layers.Dense(d_model)\n", - "\n", - " self.dense = tf.keras.layers.Dense(d_model)\n", - "\n", - " def split_heads(self, x, batch_size):\n", - " \"\"\"Split the last dimension into (num_heads, depth).\n", - " Transpose the result such that the shape is (batch_size, num_heads, seq_len, depth)\n", - " \"\"\"\n", - " x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))\n", - " return tf.transpose(x, perm=[0, 2, 1, 3])\n", - "\n", - " def call(self, v, k, q, mask):\n", - " batch_size = tf.shape(q)[0]\n", - "\n", - " q = self.wq(q) # (batch_size, seq_len, d_model)\n", - " k = self.wk(k) # (batch_size, seq_len, d_model)\n", - " v = self.wv(v) # (batch_size, seq_len, d_model)\n", - "\n", - " q = self.split_heads(q, batch_size) # (batch_size, num_heads, seq_len_q, depth)\n", - " k = self.split_heads(k, batch_size) # (batch_size, num_heads, seq_len_k, depth)\n", - " v = self.split_heads(v, batch_size) # (batch_size, num_heads, seq_len_v, depth)\n", - "\n", - " # scaled_attention.shape == (batch_size, num_heads, seq_len_q, depth)\n", - " # attention_weights.shape == (batch_size, num_heads, seq_len_q, seq_len_k)\n", - " scaled_attention, attention_weights = scaled_dot_product_attention(\n", - " q, k, v, mask)\n", - "\n", - " scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # (batch_size, seq_len_q, num_heads, depth)\n", - "\n", - " concat_attention = tf.reshape(scaled_attention,\n", - " (batch_size, -1, self.d_model)) # (batch_size, seq_len_q, d_model)\n", - "\n", - " output = self.dense(concat_attention) # (batch_size, seq_len_q, d_model)\n", - "\n", - " return output, attention_weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0D8FJue5lDyZ" - }, - "source": [ - "Create a `MultiHeadAttention` layer to try out. At each location in the sequence, `y`, the `MultiHeadAttention` runs all 8 attention heads across all other locations in the sequence, returning a new vector of the same length at each location." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Hu94p-_-2_BX" - }, - "outputs": [], - "source": [ - "temp_mha = MultiHeadAttention(d_model=512, num_heads=8)\n", - "y = tf.random.uniform((1, 60, 512)) # (batch_size, encoder_sequence, d_model)\n", - "out, attn = temp_mha(y, k=y, q=y, mask=None)\n", - "out.shape, attn.shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RdDqGayx67vv" - }, - "source": [ - "## Point wise feed forward network" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "gBqzJXGfHK3X" - }, - "source": [ - "Point wise feed forward network consists of two fully-connected layers with a ReLU activation in between." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ET7xLt0yCT6Z" - }, - "outputs": [], - "source": [ - "def point_wise_feed_forward_network(d_model, dff):\n", - " return tf.keras.Sequential([\n", - " tf.keras.layers.Dense(dff, activation='relu'), # (batch_size, seq_len, dff)\n", - " tf.keras.layers.Dense(d_model) # (batch_size, seq_len, d_model)\n", - " ])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mytb1lPyOHLB" - }, - "outputs": [], - "source": [ - "sample_ffn = point_wise_feed_forward_network(512, 2048)\n", - "sample_ffn(tf.random.uniform((64, 50, 512))).shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "7e7hKcxn6-zd" - }, - "source": [ - "## Encoder and decoder" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "yScbC0MUH8dS" - }, - "source": [ - "\u003cimg src=\"https://www.tensorflow.org/images/tutorials/transformer/transformer.png\" width=\"600\" alt=\"transformer\"\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MfYJG-Kvgwy2" - }, - "source": [ - "The transformer model follows the same general pattern as a standard [sequence to sequence with attention model](https://www.tensorflow.org/text/tutorials/nmt_with_attention.ipynb). \n", - "\n", - "* The input sentence is passed through `N` encoder layers that generates an output for each token in the sequence.\n", - "* The decoder attends to the encoder's output and its own input (self-attention) to predict the next word. " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QFv-FNYUmvpn" - }, - "source": [ - "### Encoder layer\n", - "\n", - "Each encoder layer consists of sublayers:\n", - "\n", - "1. Multi-head attention (with padding mask) \n", - "2. Point wise feed forward networks. \n", - "\n", - "Each of these sublayers has a residual connection around it followed by a layer normalization. Residual connections help in avoiding the vanishing gradient problem in deep networks.\n", - "\n", - "The output of each sublayer is `LayerNorm(x + Sublayer(x))`. The normalization is done on the `d_model` (last) axis. There are N encoder layers in the transformer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ncyS-Ms3i2x_" - }, - "outputs": [], - "source": [ - "class EncoderLayer(tf.keras.layers.Layer):\n", - " def __init__(self, d_model, num_heads, dff, rate=0.1):\n", - " super(EncoderLayer, self).__init__()\n", - "\n", - " self.mha = MultiHeadAttention(d_model, num_heads)\n", - " self.ffn = point_wise_feed_forward_network(d_model, dff)\n", - "\n", - " self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n", - " self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n", - "\n", - " self.dropout1 = tf.keras.layers.Dropout(rate)\n", - " self.dropout2 = tf.keras.layers.Dropout(rate)\n", - "\n", - " def call(self, x, training, mask):\n", - "\n", - " attn_output, _ = self.mha(x, x, x, mask) # (batch_size, input_seq_len, d_model)\n", - " attn_output = self.dropout1(attn_output, training=training)\n", - " out1 = self.layernorm1(x + attn_output) # (batch_size, input_seq_len, d_model)\n", - "\n", - " ffn_output = self.ffn(out1) # (batch_size, input_seq_len, d_model)\n", - " ffn_output = self.dropout2(ffn_output, training=training)\n", - " out2 = self.layernorm2(out1 + ffn_output) # (batch_size, input_seq_len, d_model)\n", - "\n", - " return out2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "AzZRXdO0mI48" - }, - "outputs": [], - "source": [ - "sample_encoder_layer = EncoderLayer(512, 8, 2048)\n", - "\n", - "sample_encoder_layer_output = sample_encoder_layer(\n", - " tf.random.uniform((64, 43, 512)), False, None)\n", - "\n", - "sample_encoder_layer_output.shape # (batch_size, input_seq_len, d_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6LO_48Owmx_o" - }, - "source": [ - "### Decoder layer\n", - "\n", - "Each decoder layer consists of sublayers:\n", - "\n", - "1. Masked multi-head attention (with look ahead mask and padding mask)\n", - "2. Multi-head attention (with padding mask). V (value) and K (key) receive the *encoder output* as inputs. Q (query) receives the *output from the masked multi-head attention sublayer.*\n", - "3. Point wise feed forward networks\n", - "\n", - "Each of these sublayers has a residual connection around it followed by a layer normalization. The output of each sublayer is `LayerNorm(x + Sublayer(x))`. The normalization is done on the `d_model` (last) axis.\n", - "\n", - "There are N decoder layers in the transformer.\n", - "\n", - "As Q receives the output from decoder's first attention block, and K receives the encoder output, the attention weights represent the importance given to the decoder's input based on the encoder's output. In other words, the decoder predicts the next token by looking at the encoder output and self-attending to its own output. See the demonstration above in the scaled dot product attention section." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9SoX0-vd1hue" - }, - "outputs": [], - "source": [ - "class DecoderLayer(tf.keras.layers.Layer):\n", - " def __init__(self, d_model, num_heads, dff, rate=0.1):\n", - " super(DecoderLayer, self).__init__()\n", - "\n", - " self.mha1 = MultiHeadAttention(d_model, num_heads)\n", - " self.mha2 = MultiHeadAttention(d_model, num_heads)\n", - "\n", - " self.ffn = point_wise_feed_forward_network(d_model, dff)\n", - "\n", - " self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n", - " self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n", - " self.layernorm3 = tf.keras.layers.LayerNormalization(epsilon=1e-6)\n", - "\n", - " self.dropout1 = tf.keras.layers.Dropout(rate)\n", - " self.dropout2 = tf.keras.layers.Dropout(rate)\n", - " self.dropout3 = tf.keras.layers.Dropout(rate)\n", - "\n", - " def call(self, x, enc_output, training,\n", - " look_ahead_mask, padding_mask):\n", - " # enc_output.shape == (batch_size, input_seq_len, d_model)\n", - "\n", - " attn1, attn_weights_block1 = self.mha1(x, x, x, look_ahead_mask) # (batch_size, target_seq_len, d_model)\n", - " attn1 = self.dropout1(attn1, training=training)\n", - " out1 = self.layernorm1(attn1 + x)\n", - "\n", - " attn2, attn_weights_block2 = self.mha2(\n", - " enc_output, enc_output, out1, padding_mask) # (batch_size, target_seq_len, d_model)\n", - " attn2 = self.dropout2(attn2, training=training)\n", - " out2 = self.layernorm2(attn2 + out1) # (batch_size, target_seq_len, d_model)\n", - "\n", - " ffn_output = self.ffn(out2) # (batch_size, target_seq_len, d_model)\n", - " ffn_output = self.dropout3(ffn_output, training=training)\n", - " out3 = self.layernorm3(ffn_output + out2) # (batch_size, target_seq_len, d_model)\n", - "\n", - " return out3, attn_weights_block1, attn_weights_block2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "Ne2Bqx8k71l0" - }, - "outputs": [], - "source": [ - "sample_decoder_layer = DecoderLayer(512, 8, 2048)\n", - "\n", - "sample_decoder_layer_output, _, _ = sample_decoder_layer(\n", - " tf.random.uniform((64, 50, 512)), sample_encoder_layer_output,\n", - " False, None, None)\n", - "\n", - "sample_decoder_layer_output.shape # (batch_size, target_seq_len, d_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SE1H51Ajm0q1" - }, - "source": [ - "### Encoder\n", - "\n", - "The `Encoder` consists of:\n", - "1. Input Embedding\n", - "2. Positional Encoding\n", - "3. N encoder layers\n", - "\n", - "The input is put through an embedding which is summed with the positional encoding. The output of this summation is the input to the encoder layers. The output of the encoder is the input to the decoder." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "jpEox7gJ8FCI" - }, - "outputs": [], - "source": [ - "class Encoder(tf.keras.layers.Layer):\n", - " def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,\n", - " maximum_position_encoding, rate=0.1):\n", - " super(Encoder, self).__init__()\n", - "\n", - " self.d_model = d_model\n", - " self.num_layers = num_layers\n", - "\n", - " self.embedding = tf.keras.layers.Embedding(input_vocab_size, d_model)\n", - " self.pos_encoding = positional_encoding(maximum_position_encoding,\n", - " self.d_model)\n", - "\n", - " self.enc_layers = [EncoderLayer(d_model, num_heads, dff, rate)\n", - " for _ in range(num_layers)]\n", - "\n", - " self.dropout = tf.keras.layers.Dropout(rate)\n", - "\n", - " def call(self, x, training, mask):\n", - "\n", - " seq_len = tf.shape(x)[1]\n", - "\n", - " # adding embedding and position encoding.\n", - " x = self.embedding(x) # (batch_size, input_seq_len, d_model)\n", - " x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))\n", - " x += self.pos_encoding[:, :seq_len, :]\n", - "\n", - " x = self.dropout(x, training=training)\n", - "\n", - " for i in range(self.num_layers):\n", - " x = self.enc_layers[i](x, training, mask)\n", - "\n", - " return x # (batch_size, input_seq_len, d_model)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8QG9nueFQKXx" - }, - "outputs": [], - "source": [ - "sample_encoder = Encoder(num_layers=2, d_model=512, num_heads=8,\n", - " dff=2048, input_vocab_size=8500,\n", - " maximum_position_encoding=10000)\n", - "temp_input = tf.random.uniform((64, 62), dtype=tf.int64, minval=0, maxval=200)\n", - "\n", - "sample_encoder_output = sample_encoder(temp_input, training=False, mask=None)\n", - "\n", - "print(sample_encoder_output.shape) # (batch_size, input_seq_len, d_model)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p-uO6ls8m2O5" - }, - "source": [ - "### Decoder" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZtT7PKzrXkNr" - }, - "source": [ - " The `Decoder` consists of:\n", - "1. Output Embedding\n", - "2. Positional Encoding\n", - "3. N decoder layers\n", - "\n", - "The target is put through an embedding which is summed with the positional encoding. The output of this summation is the input to the decoder layers. The output of the decoder is the input to the final linear layer." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "d5_d5-PLQXwY" - }, - "outputs": [], - "source": [ - "class Decoder(tf.keras.layers.Layer):\n", - " def __init__(self, num_layers, d_model, num_heads, dff, target_vocab_size,\n", - " maximum_position_encoding, rate=0.1):\n", - " super(Decoder, self).__init__()\n", - "\n", - " self.d_model = d_model\n", - " self.num_layers = num_layers\n", - "\n", - " self.embedding = tf.keras.layers.Embedding(target_vocab_size, d_model)\n", - " self.pos_encoding = positional_encoding(maximum_position_encoding, d_model)\n", - "\n", - " self.dec_layers = [DecoderLayer(d_model, num_heads, dff, rate)\n", - " for _ in range(num_layers)]\n", - " self.dropout = tf.keras.layers.Dropout(rate)\n", - "\n", - " def call(self, x, enc_output, training,\n", - " look_ahead_mask, padding_mask):\n", - "\n", - " seq_len = tf.shape(x)[1]\n", - " attention_weights = {}\n", - "\n", - " x = self.embedding(x) # (batch_size, target_seq_len, d_model)\n", - " x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))\n", - " x += self.pos_encoding[:, :seq_len, :]\n", - "\n", - " x = self.dropout(x, training=training)\n", - "\n", - " for i in range(self.num_layers):\n", - " x, block1, block2 = self.dec_layers[i](x, enc_output, training,\n", - " look_ahead_mask, padding_mask)\n", - "\n", - " attention_weights[f'decoder_layer{i+1}_block1'] = block1\n", - " attention_weights[f'decoder_layer{i+1}_block2'] = block2\n", - "\n", - " # x.shape == (batch_size, target_seq_len, d_model)\n", - " return x, attention_weights" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a1jXoAMRZyvu" - }, - "outputs": [], - "source": [ - "sample_decoder = Decoder(num_layers=2, d_model=512, num_heads=8,\n", - " dff=2048, target_vocab_size=8000,\n", - " maximum_position_encoding=5000)\n", - "temp_input = tf.random.uniform((64, 26), dtype=tf.int64, minval=0, maxval=200)\n", - "\n", - "output, attn = sample_decoder(temp_input,\n", - " enc_output=sample_encoder_output,\n", - " training=False,\n", - " look_ahead_mask=None,\n", - " padding_mask=None)\n", - "\n", - "output.shape, attn['decoder_layer2_block2'].shape" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y54xnJnuYgJ7" - }, - "source": [ - "## Create the Transformer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "uERO1y54cOKq" - }, - "source": [ - "Transformer consists of the encoder, decoder and a final linear layer. The output of the decoder is the input to the linear layer and its output is returned." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "PED3bIpOYkBu" - }, - "outputs": [], - "source": [ - "class Transformer(tf.keras.Model):\n", - " def __init__(self, num_layers, d_model, num_heads, dff, input_vocab_size,\n", - " target_vocab_size, pe_input, pe_target, rate=0.1):\n", - " super().__init__()\n", - " self.encoder = Encoder(num_layers, d_model, num_heads, dff,\n", - " input_vocab_size, pe_input, rate)\n", - "\n", - " self.decoder = Decoder(num_layers, d_model, num_heads, dff,\n", - " target_vocab_size, pe_target, rate)\n", - "\n", - " self.final_layer = tf.keras.layers.Dense(target_vocab_size)\n", - "\n", - " def call(self, inputs, training):\n", - " # Keras models prefer if you pass all your inputs in the first argument\n", - " inp, tar = inputs\n", - "\n", - " enc_padding_mask, look_ahead_mask, dec_padding_mask = self.create_masks(inp, tar)\n", - "\n", - " enc_output = self.encoder(inp, training, enc_padding_mask) # (batch_size, inp_seq_len, d_model)\n", - "\n", - " # dec_output.shape == (batch_size, tar_seq_len, d_model)\n", - " dec_output, attention_weights = self.decoder(\n", - " tar, enc_output, training, look_ahead_mask, dec_padding_mask)\n", - "\n", - " final_output = self.final_layer(dec_output) # (batch_size, tar_seq_len, target_vocab_size)\n", - "\n", - " return final_output, attention_weights\n", - "\n", - " def create_masks(self, inp, tar):\n", - " # Encoder padding mask\n", - " enc_padding_mask = create_padding_mask(inp)\n", - "\n", - " # Used in the 2nd attention block in the decoder.\n", - " # This padding mask is used to mask the encoder outputs.\n", - " dec_padding_mask = create_padding_mask(inp)\n", - "\n", - " # Used in the 1st attention block in the decoder.\n", - " # It is used to pad and mask future tokens in the input received by\n", - " # the decoder.\n", - " look_ahead_mask = create_look_ahead_mask(tf.shape(tar)[1])\n", - " dec_target_padding_mask = create_padding_mask(tar)\n", - " look_ahead_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask)\n", - "\n", - " return enc_padding_mask, look_ahead_mask, dec_padding_mask" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "tJ4fbQcIkHW1" - }, - "outputs": [], - "source": [ - "sample_transformer = Transformer(\n", - " num_layers=2, d_model=512, num_heads=8, dff=2048,\n", - " input_vocab_size=8500, target_vocab_size=8000,\n", - " pe_input=10000, pe_target=6000)\n", - "\n", - "temp_input = tf.random.uniform((64, 38), dtype=tf.int64, minval=0, maxval=200)\n", - "temp_target = tf.random.uniform((64, 36), dtype=tf.int64, minval=0, maxval=200)\n", - "\n", - "fn_out, _ = sample_transformer([temp_input, temp_target], training=False)\n", - "\n", - "fn_out.shape # (batch_size, tar_seq_len, target_vocab_size)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "wsINyf1VEQLC" - }, - "source": [ - "## Set hyperparameters" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "zVjWCxFNcgbt" - }, - "source": [ - "To keep this example small and relatively fast, the values for `num_layers, d_model, dff` have been reduced. \n", - "\n", - "The base model described in the [paper](https://arxiv.org/abs/1706.03762) used: `num_layers=6, d_model=512, dff=2048`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lnJn5SLA2ahP" - }, - "outputs": [], - "source": [ - "num_layers = 4\n", - "d_model = 128\n", - "dff = 512\n", - "num_heads = 8\n", - "dropout_rate = 0.1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xYEGhEOtzn5W" - }, - "source": [ - "## Optimizer" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "GOmWW--yP3zx" - }, - "source": [ - "Use the Adam optimizer with a custom learning rate scheduler according to the formula in the [paper](https://arxiv.org/abs/1706.03762).\n", - "\n", - "$$\\Large{lrate = d_{model}^{-0.5} * \\min(step{\\_}num^{-0.5}, step{\\_}num \\cdot warmup{\\_}steps^{-1.5})}$$\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iYQdOO1axwEI" - }, - "outputs": [], - "source": [ - "class CustomSchedule(tf.keras.optimizers.schedules.LearningRateSchedule):\n", - " def __init__(self, d_model, warmup_steps=4000):\n", - " super(CustomSchedule, self).__init__()\n", - "\n", - " self.d_model = d_model\n", - " self.d_model = tf.cast(self.d_model, tf.float32)\n", - "\n", - " self.warmup_steps = warmup_steps\n", - "\n", - " def __call__(self, step):\n", - " arg1 = tf.math.rsqrt(step)\n", - " arg2 = step * (self.warmup_steps ** -1.5)\n", - "\n", - " return tf.math.rsqrt(self.d_model) * tf.math.minimum(arg1, arg2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7r4scdulztRx" - }, - "outputs": [], - "source": [ - "learning_rate = CustomSchedule(d_model)\n", - "\n", - "optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98,\n", - " epsilon=1e-9)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "f33ZCgvHpPdG" - }, - "outputs": [], - "source": [ - "temp_learning_rate_schedule = CustomSchedule(d_model)\n", - "\n", - "plt.plot(temp_learning_rate_schedule(tf.range(40000, dtype=tf.float32)))\n", - "plt.ylabel(\"Learning Rate\")\n", - "plt.xlabel(\"Train Step\")" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YgkDE7hzo8r5" - }, - "source": [ - "## Loss and metrics" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oxGJtoDuYIHL" - }, - "source": [ - "Since the target sequences are padded, it is important to apply a padding mask when calculating the loss." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MlhsJMm0TW_B" - }, - "outputs": [], - "source": [ - "loss_object = tf.keras.losses.SparseCategoricalCrossentropy(\n", - " from_logits=True, reduction='none')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "67oqVHiT0Eiu" - }, - "outputs": [], - "source": [ - "def loss_function(real, pred):\n", - " mask = tf.math.logical_not(tf.math.equal(real, 0))\n", - " loss_ = loss_object(real, pred)\n", - "\n", - " mask = tf.cast(mask, dtype=loss_.dtype)\n", - " loss_ *= mask\n", - "\n", - " return tf.reduce_sum(loss_)/tf.reduce_sum(mask)\n", - "\n", - "\n", - "def accuracy_function(real, pred):\n", - " accuracies = tf.equal(real, tf.argmax(pred, axis=2))\n", - "\n", - " mask = tf.math.logical_not(tf.math.equal(real, 0))\n", - " accuracies = tf.math.logical_and(mask, accuracies)\n", - "\n", - " accuracies = tf.cast(accuracies, dtype=tf.float32)\n", - " mask = tf.cast(mask, dtype=tf.float32)\n", - " return tf.reduce_sum(accuracies)/tf.reduce_sum(mask)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "phlyxMnm-Tpx" - }, - "outputs": [], - "source": [ - "train_loss = tf.keras.metrics.Mean(name='train_loss')\n", - "train_accuracy = tf.keras.metrics.Mean(name='train_accuracy')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aeHumfr7zmMa" - }, - "source": [ - "## Training and checkpointing" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UiysUa--4tOU" - }, - "outputs": [], - "source": [ - "transformer = Transformer(\n", - " num_layers=num_layers,\n", - " d_model=d_model,\n", - " num_heads=num_heads,\n", - " dff=dff,\n", - " input_vocab_size=tokenizers.pt.get_vocab_size().numpy(),\n", - " target_vocab_size=tokenizers.en.get_vocab_size().numpy(),\n", - " pe_input=1000,\n", - " pe_target=1000,\n", - " rate=dropout_rate)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Fzuf06YZp66w" - }, - "source": [ - "Create the checkpoint path and the checkpoint manager. This will be used to save checkpoints every `n` epochs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hNhuYfllndLZ" - }, - "outputs": [], - "source": [ - "checkpoint_path = \"./checkpoints/train\"\n", - "\n", - "ckpt = tf.train.Checkpoint(transformer=transformer,\n", - " optimizer=optimizer)\n", - "\n", - "ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5)\n", - "\n", - "# if a checkpoint exists, restore the latest checkpoint.\n", - "if ckpt_manager.latest_checkpoint:\n", - " ckpt.restore(ckpt_manager.latest_checkpoint)\n", - " print('Latest checkpoint restored!!')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0Di_Yaa1gf9r" - }, - "source": [ - "The target is divided into tar_inp and tar_real. tar_inp is passed as an input to the decoder. `tar_real` is that same input shifted by 1: At each location in `tar_input`, `tar_real` contains the next token that should be predicted.\n", - "\n", - "For example, `sentence` = \"SOS A lion in the jungle is sleeping EOS\"\n", - "\n", - "`tar_inp` = \"SOS A lion in the jungle is sleeping\"\n", - "\n", - "`tar_real` = \"A lion in the jungle is sleeping EOS\"\n", - "\n", - "The transformer is an auto-regressive model: it makes predictions one part at a time, and uses its output so far to decide what to do next. \n", - "\n", - "During training this example uses teacher-forcing (like in the [text generation tutorial](https://www.tensorflow.org/text/tutorials/text_generation)). Teacher forcing is passing the true output to the next time step regardless of what the model predicts at the current time step.\n", - "\n", - "As the transformer predicts each token, *self-attention* allows it to look at the previous tokens in the input sequence to better predict the next token.\n", - "\n", - "To prevent the model from peeking at the expected output the model uses a look-ahead mask." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "LKpoA6q1sJFj" - }, - "outputs": [], - "source": [ - "EPOCHS = 20" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "iJwmp9OE29oj" - }, - "outputs": [], - "source": [ - "# The @tf.function trace-compiles train_step into a TF graph for faster\n", - "# execution. The function specializes to the precise shape of the argument\n", - "# tensors. To avoid re-tracing due to the variable sequence lengths or variable\n", - "# batch sizes (the last batch is smaller), use input_signature to specify\n", - "# more generic shapes.\n", - "\n", - "train_step_signature = [\n", - " tf.TensorSpec(shape=(None, None), dtype=tf.int64),\n", - " tf.TensorSpec(shape=(None, None), dtype=tf.int64),\n", - "]\n", - "\n", - "\n", - "@tf.function(input_signature=train_step_signature)\n", - "def train_step(inp, tar):\n", - " tar_inp = tar[:, :-1]\n", - " tar_real = tar[:, 1:]\n", - "\n", - " with tf.GradientTape() as tape:\n", - " predictions, _ = transformer([inp, tar_inp],\n", - " training = True)\n", - " loss = loss_function(tar_real, predictions)\n", - "\n", - " gradients = tape.gradient(loss, transformer.trainable_variables)\n", - " optimizer.apply_gradients(zip(gradients, transformer.trainable_variables))\n", - "\n", - " train_loss(loss)\n", - " train_accuracy(accuracy_function(tar_real, predictions))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qM2PDWGDJ_8V" - }, - "source": [ - "Portuguese is used as the input language and English is the target language." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bbvmaKNiznHZ" - }, - "outputs": [], - "source": [ - "for epoch in range(EPOCHS):\n", - " start = time.time()\n", - "\n", - " train_loss.reset_states()\n", - " train_accuracy.reset_states()\n", - "\n", - " # inp -\u003e portuguese, tar -\u003e english\n", - " for (batch, (inp, tar)) in enumerate(train_batches):\n", - " train_step(inp, tar)\n", - "\n", - " if batch % 50 == 0:\n", - " print(f'Epoch {epoch + 1} Batch {batch} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')\n", - "\n", - " if (epoch + 1) % 5 == 0:\n", - " ckpt_save_path = ckpt_manager.save()\n", - " print(f'Saving checkpoint for epoch {epoch+1} at {ckpt_save_path}')\n", - "\n", - " print(f'Epoch {epoch + 1} Loss {train_loss.result():.4f} Accuracy {train_accuracy.result():.4f}')\n", - "\n", - " print(f'Time taken for 1 epoch: {time.time() - start:.2f} secs\\n')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "QfcsSWswSdGV" - }, - "source": [ - "### Run inference" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "y6APsFrgImLW" - }, - "source": [ - "The following steps are used for inference:\n", - "\n", - "* Encode the input sentence using the Portuguese tokenizer (`tokenizers.pt`). This is the encoder input.\n", - "* The decoder input is initialized to the `[START]` token.\n", - "* Calculate the padding masks and the look ahead masks.\n", - "* The `decoder` then outputs the predictions by looking at the `encoder output` and its own output (self-attention).\n", - "* Concatenate the predicted token to the decoder input and pass it to the decoder.\n", - "* In this approach, the decoder predicts the next token based on the previous tokens it predicted." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-FQmQwtv9-kk" - }, - "source": [ - "Note: The model is optimized for _efficient training_ and makes a next-token prediction for each token in the output simultaneously. This is redundant during inference, and only the last prediction is used. This model can be made more efficient for inference if you only calculate the last prediction when running in inference mode (`training=False`)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "5buvMlnvyrFm" - }, - "outputs": [], - "source": [ - "class Translator(tf.Module):\n", - " def __init__(self, tokenizers, transformer):\n", - " self.tokenizers = tokenizers\n", - " self.transformer = transformer\n", - "\n", - " def __call__(self, sentence, max_length=20):\n", - " # input sentence is portuguese, hence adding the start and end token\n", - " assert isinstance(sentence, tf.Tensor)\n", - " if len(sentence.shape) == 0:\n", - " sentence = sentence[tf.newaxis]\n", - "\n", - " sentence = self.tokenizers.pt.tokenize(sentence).to_tensor()\n", - " \n", - " encoder_input = sentence\n", - "\n", - " # as the target is english, the first token to the transformer should be the\n", - " # english start token.\n", - " start_end = self.tokenizers.en.tokenize([''])[0]\n", - " start = start_end[0][tf.newaxis]\n", - " end = start_end[1][tf.newaxis]\n", - "\n", - " # `tf.TensorArray` is required here (instead of a python list) so that the\n", - " # dynamic-loop can be traced by `tf.function`.\n", - " output_array = tf.TensorArray(dtype=tf.int64, size=0, dynamic_size=True)\n", - " output_array = output_array.write(0, start)\n", - " \n", - " for i in tf.range(max_length):\n", - " output = tf.transpose(output_array.stack())\n", - " predictions, _ = self.transformer([encoder_input, output], training=False)\n", - " \n", - " # select the last token from the seq_len dimension\n", - " predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size)\n", - "\n", - " predicted_id = tf.argmax(predictions, axis=-1)\n", - "\n", - " # concatentate the predicted_id to the output which is given to the decoder\n", - " # as its input.\n", - " output_array = output_array.write(i+1, predicted_id[0])\n", - "\n", - " if predicted_id == end:\n", - " break\n", - "\n", - " output = tf.transpose(output_array.stack())\n", - " # output.shape (1, tokens)\n", - " text = tokenizers.en.detokenize(output)[0] # shape: ()\n", - "\n", - " tokens = tokenizers.en.lookup(output)[0]\n", - "\n", - " # `tf.function` prevents us from using the attention_weights that were\n", - " # calculated on the last iteration of the loop. So recalculate them outside\n", - " # the loop.\n", - " _, attention_weights = self.transformer([encoder_input, output[:,:-1]], training=False)\n", - "\n", - " return text, tokens, attention_weights" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ofUWszmY3szZ" - }, - "source": [ - "Create an instance of this `Translator` class, and try it out a few times:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "4OR2D4EXeIRY" - }, - "outputs": [], - "source": [ - "translator = Translator(tokenizers, transformer)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "lU2_yG_vBGza" - }, - "outputs": [], - "source": [ - "def print_translation(sentence, tokens, ground_truth):\n", - " print(f'{\"Input:\":15s}: {sentence}')\n", - " print(f'{\"Prediction\":15s}: {tokens.numpy().decode(\"utf-8\")}')\n", - " print(f'{\"Ground truth\":15s}: {ground_truth}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YsxrAlvFG8SZ" - }, - "outputs": [], - "source": [ - "sentence = \"este é um problema que temos que resolver.\"\n", - "ground_truth = \"this is a problem we have to solve .\"\n", - "\n", - "translated_text, translated_tokens, attention_weights = translator(\n", - " tf.constant(sentence))\n", - "print_translation(sentence, translated_text, ground_truth)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "7EH5y_aqI4t1" - }, - "outputs": [], - "source": [ - "sentence = \"os meus vizinhos ouviram sobre esta ideia.\"\n", - "ground_truth = \"and my neighboring homes heard about this idea .\"\n", - "\n", - "translated_text, translated_tokens, attention_weights = translator(\n", - " tf.constant(sentence))\n", - "print_translation(sentence, translated_text, ground_truth)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "J-hVCTSUMlkb" - }, - "outputs": [], - "source": [ - "sentence = \"vou então muito rapidamente partilhar convosco algumas histórias de algumas coisas mágicas que aconteceram.\"\n", - "ground_truth = \"so i \\'ll just share with you some stories very quickly of some magical things that have happened .\"\n", - "\n", - "translated_text, translated_tokens, attention_weights = translator(\n", - " tf.constant(sentence))\n", - "print_translation(sentence, translated_text, ground_truth)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "S3EQiFUC--Ds" - }, - "source": [ - "## Attention plots" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "hHV2pdXHGz-0" - }, - "source": [ - "The `Translator` class returns a dictionary of attention maps you can use to visualize the internal working of the model:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "t-kFyiOLH0xg" - }, - "outputs": [], - "source": [ - "sentence = \"este é o primeiro livro que eu fiz.\"\n", - "ground_truth = \"this is the first book i've ever done.\"\n", - "\n", - "translated_text, translated_tokens, attention_weights = translator(\n", - " tf.constant(sentence))\n", - "print_translation(sentence, translated_text, ground_truth)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "CcI4DxAK5EHY" - }, - "outputs": [], - "source": [ - "def plot_attention_head(in_tokens, translated_tokens, attention):\n", - " # The plot is of the attention when a token was generated.\n", - " # The model didn't generate `\u003cSTART\u003e` in the output. Skip it.\n", - " translated_tokens = translated_tokens[1:]\n", - "\n", - " ax = plt.gca()\n", - " ax.matshow(attention)\n", - " ax.set_xticks(range(len(in_tokens)))\n", - " ax.set_yticks(range(len(translated_tokens)))\n", - "\n", - " labels = [label.decode('utf-8') for label in in_tokens.numpy()]\n", - " ax.set_xticklabels(\n", - " labels, rotation=90)\n", - "\n", - " labels = [label.decode('utf-8') for label in translated_tokens.numpy()]\n", - " ax.set_yticklabels(labels)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_KY4c2cryuxY" - }, - "outputs": [], - "source": [ - "head = 0\n", - "# shape: (batch=1, num_heads, seq_len_q, seq_len_k)\n", - "attention_heads = tf.squeeze(\n", - " attention_weights['decoder_layer4_block2'], 0)\n", - "attention = attention_heads[head]\n", - "attention.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XdxmakWE6Om3" - }, - "outputs": [], - "source": [ - "in_tokens = tf.convert_to_tensor([sentence])\n", - "in_tokens = tokenizers.pt.tokenize(in_tokens).to_tensor()\n", - "in_tokens = tokenizers.pt.lookup(in_tokens)[0]\n", - "in_tokens" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hVdPSfecmrpj" - }, - "outputs": [], - "source": [ - "translated_tokens" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "XtzyKCFamm4N" - }, - "outputs": [], - "source": [ - "plot_attention_head(in_tokens, translated_tokens, attention)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MBliB-PCzNK3" - }, - "outputs": [], - "source": [ - "def plot_attention_weights(sentence, translated_tokens, attention_heads):\n", - " in_tokens = tf.convert_to_tensor([sentence])\n", - " in_tokens = tokenizers.pt.tokenize(in_tokens).to_tensor()\n", - " in_tokens = tokenizers.pt.lookup(in_tokens)[0]\n", - " in_tokens\n", - "\n", - " fig = plt.figure(figsize=(16, 8))\n", - "\n", - " for h, head in enumerate(attention_heads):\n", - " ax = fig.add_subplot(2, 4, h+1)\n", - "\n", - " plot_attention_head(in_tokens, translated_tokens, head)\n", - "\n", - " ax.set_xlabel(f'Head {h+1}')\n", - "\n", - " plt.tight_layout()\n", - " plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "pyRQi7944wru" - }, - "outputs": [], - "source": [ - "plot_attention_weights(sentence, translated_tokens,\n", - " attention_weights['decoder_layer4_block2'][0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MZJirKUtikTt" - }, - "source": [ - "The model does okay on unfamiliar words. Neither \"triceratops\" or \"encyclopedia\" are in the input dataset and the model almost learns to transliterate them, even without a shared vocabulary:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9cxysY7uh3jg" - }, - "outputs": [], - "source": [ - "sentence = \"Eu li sobre triceratops na enciclopédia.\"\n", - "ground_truth = \"I read about triceratops in the encyclopedia.\"\n", - "\n", - "translated_text, translated_tokens, attention_weights = translator(\n", - " tf.constant(sentence))\n", - "print_translation(sentence, translated_text, ground_truth)\n", - "\n", - "plot_attention_weights(sentence, translated_tokens,\n", - " attention_weights['decoder_layer4_block2'][0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "mOyiOetL2l60" - }, - "source": [ - "## Export" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "YTK3g2UL2oMc" - }, - "source": [ - "That inference model is working, so next you'll export it as a `tf.saved_model`.\n", - "\n", - "To do that, wrap it in yet another `tf.Module` sub-class, this time with a `tf.function` on the `__call__` method:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "GRmzkibLusQi" - }, - "outputs": [], - "source": [ - "class ExportTranslator(tf.Module):\n", - " def __init__(self, translator):\n", - " self.translator = translator\n", - "\n", - " @tf.function(input_signature=[tf.TensorSpec(shape=[], dtype=tf.string)])\n", - " def __call__(self, sentence):\n", - " (result, \n", - " tokens,\n", - " attention_weights) = self.translator(sentence, max_length=100)\n", - " \n", - " return result" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "O9f_pmEA4kql" - }, - "source": [ - "In the above `tf.function` only the output sentence is returned. Thanks to the [non-strict execution](https://tensorflow.org/guide/intro_to_graphs) in `tf.function` any unnecessary values are never computed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "EfomoJDP2n5n" - }, - "outputs": [], - "source": [ - "translator = ExportTranslator(translator)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SUfoCWPS9LuB" - }, - "source": [ - "Since the model is decoding the predictions using `tf.argmax` the predictions are deterministic. The original model and one reloaded from its `SavedModel` should give identical predictions:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hAlqyycz3IYL" - }, - "outputs": [], - "source": [ - "translator(\"este é o primeiro livro que eu fiz.\").numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ar3LO-Vuvlnv" - }, - "outputs": [], - "source": [ - "tf.saved_model.save(translator, export_dir='translator')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "8WUflwyT1SEF" - }, - "outputs": [], - "source": [ - "reloaded = tf.saved_model.load('translator')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "-sBTBWwR1XMr" - }, - "outputs": [], - "source": [ - "reloaded(\"este é o primeiro livro que eu fiz.\").numpy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "RqQ1fIsLwkGE" - }, - "source": [ - "## Summary\n", - "\n", - "In this tutorial, you learned about positional encoding, multi-head attention, the importance of masking and how to create a transformer.\n", - "\n", - "Try using a different dataset to train the transformer. You can also create the base transformer or transformer XL by changing the hyperparameters above. You can also use the layers defined here to create [BERT](https://arxiv.org/abs/1810.04805) and train state of the art models. Furthermore, you can implement beam search to get better predictions." - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "last_runtime": { - "build_target": "//learning/deepmind/public/tools/ml_python:ml_notebook", - "kind": "private" - }, - "name": "transformer.ipynb", - "provenance": [ - { - "file_id": "1fpiHY_g7b1-bs_sSRWcbiw9qv4eDU4QZ", - "timestamp": 1628275335747 - }, - { - "file_id": "https://github.com/tensorflow/text/blob/master/docs/tutorials/transformer.ipynb", - "timestamp": 1628273726995 - } - ], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.11" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb b/third_party/tensorflow-text/src/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb deleted file mode 100644 index 2d35ea0..0000000 --- a/third_party/tensorflow-text/src/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb +++ /dev/null
@@ -1,732 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "vs3a5tGVAWGI" - }, - "source": [ - "##### Copyright 2021 The TensorFlow Authors." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "HYfsarcYBJQp" - }, - "outputs": [], - "source": [ - "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", - "# you may not use this file except in compliance with the License.\n", - "# You may obtain a copy of the License at\n", - "#\n", - "# https://www.apache.org/licenses/LICENSE-2.0\n", - "#\n", - "# Unless required by applicable law or agreed to in writing, software\n", - "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", - "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", - "# See the License for the specific language governing permissions and\n", - "# limitations under the License." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "aOpqCFEyBQDd" - }, - "source": [ - "# Uncertainty-aware Deep Language Learning with BERT-SNGP" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "6MlSYP6cBT61" - }, - "source": [ - "\u003ctable class=\"tfo-notebook-buttons\" align=\"left\"\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://www.tensorflow.org/text/tutorials/uncertainty_quantification_with_sngp_bert\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/tf_logo_32px.png\" /\u003eView on TensorFlow.org\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/colab_logo_32px.png\" /\u003eRun in Google Colab\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca target=\"_blank\" href=\"https://github.com/tensorflow/text/blob/master/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/GitHub-Mark-32px.png\" /\u003eView on GitHub\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://storage.googleapis.com/tensorflow_docs/text/docs/tutorials/uncertainty_quantification_with_sngp_bert.ipynb\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/download_logo_32px.png\" /\u003eDownload notebook\u003c/a\u003e\n", - " \u003c/td\u003e\n", - " \u003ctd\u003e\n", - " \u003ca href=\"https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3\"\u003e\u003cimg src=\"https://www.tensorflow.org/images/hub_logo_32px.png\" /\u003eSee TF Hub model\u003c/a\u003e\n", - " \u003c/td\u003e\n", - "\u003c/table\u003e" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-IM5IzM26GBh" - }, - "source": [ - "In the [SNGP tutorial](https://www.tensorflow.org/tutorials/understanding/sngp), you learned how to build SNGP model on top of a deep residual network to improve its ability to quantify its uncertainty. In this tutorial, you will apply SNGP to a natural language understanding (NLU) task by building it on top of a deep BERT encoder to improve deep NLU model's ability in detecting out-of-scope queries. \n", - "\n", - "Specifically, you will:\n", - "* Build BERT-SNGP, a SNGP-augmented [BERT](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2) model.\n", - "* Load the [CLINC Out-of-scope (OOS)](https://www.tensorflow.org/datasets/catalog/clinc_oos) intent detection dataset.\n", - "* Train the BERT-SNGP model.\n", - "* Evaluate the BERT-SNGP model's performance in uncertainty calibration and out-of-domain detection.\n", - "\n", - "Beyond CLINC OOS, the SNGP model has been applied to large-scale datasets such as [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), and to the image datasets such as [CIFAR-100](https://www.tensorflow.org/datasets/catalog/cifar100) and [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012). \n", - "For benchmark results of SNGP and other uncertainty methods, as well as high-quality implementation with end-to-end training / evaluation scripts, you can check out the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines) benchmark." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "-bsids4eAYYI" - }, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "l2dCK-rbYXsb" - }, - "outputs": [], - "source": [ - "!pip uninstall -y tensorflow tf-text" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "MmlftNekWmKR" - }, - "outputs": [], - "source": [ - "!pip install -U tensorflow-text-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hU_lfdL0BY8L" - }, - "outputs": [], - "source": [ - "!pip install -U tf-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "3sgnLBKk7iuR" - }, - "outputs": [], - "source": [ - "!pip install -U tf-models-nightly" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "M42dnVSk7dVy" - }, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "import sklearn.metrics\n", - "import sklearn.calibration\n", - "\n", - "import tensorflow_hub as hub\n", - "import tensorflow_datasets as tfds\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "import official.nlp.modeling.layers as layers\n", - "import official.nlp.optimization as optimization" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4TiolAXow5Rs" - }, - "source": [ - "This tutorial needs the GPU to run efficiently. Check if the GPU is available. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "18dxUFtEBeIR" - }, - "outputs": [], - "source": [ - "tf.__version__" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "9enQL-rZxGkP" - }, - "outputs": [], - "source": [ - "gpus = tf.config.list_physical_devices('GPU')\n", - "gpus" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ZY_xLQnS-6ar" - }, - "outputs": [], - "source": [ - "assert gpus, \"\"\"\n", - " No GPU(s) found! This tutorial will take many hours to run without a GPU.\n", - "\n", - " You may hit this error if the installed tensorflow package is not\n", - " compatible with the CUDA and CUDNN versions.\"\"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cnRQfguq6GZj" - }, - "source": [ - "First implement a standard BERT classifier following the [classify text with BERT](https://www.tensorflow.org/tutorials/text/classify_text_with_bert) tutorial. We will use the [BERT-base](https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3) encoder, and the built-in [`ClassificationHead`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/cls_head.py) as the classifier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "bNBEGs7s6NHB" - }, - "outputs": [], - "source": [ - "#@title Standard BERT model\n", - "\n", - "PREPROCESS_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'\n", - "MODEL_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3'\n", - "\n", - "class BertClassifier(tf.keras.Model):\n", - " def __init__(self, \n", - " num_classes=150, inner_dim=768, dropout_rate=0.1,\n", - " **classifier_kwargs):\n", - " \n", - " super().__init__()\n", - " self.classifier_kwargs = classifier_kwargs\n", - "\n", - " # Initiate the BERT encoder components.\n", - " self.bert_preprocessor = hub.KerasLayer(PREPROCESS_HANDLE, name='preprocessing')\n", - " self.bert_hidden_layer = hub.KerasLayer(MODEL_HANDLE, trainable=True, name='bert_encoder')\n", - "\n", - " # Defines the encoder and classification layers.\n", - " self.bert_encoder = self.make_bert_encoder()\n", - " self.classifier = self.make_classification_head(num_classes, inner_dim, dropout_rate)\n", - "\n", - " def make_bert_encoder(self):\n", - " text_inputs = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')\n", - " encoder_inputs = self.bert_preprocessor(text_inputs)\n", - " encoder_outputs = self.bert_hidden_layer(encoder_inputs)\n", - " return tf.keras.Model(text_inputs, encoder_outputs)\n", - "\n", - " def make_classification_head(self, num_classes, inner_dim, dropout_rate):\n", - " return layers.ClassificationHead(\n", - " num_classes=num_classes, \n", - " inner_dim=inner_dim,\n", - " dropout_rate=dropout_rate,\n", - " **self.classifier_kwargs)\n", - "\n", - " def call(self, inputs, **kwargs):\n", - " encoder_outputs = self.bert_encoder(inputs)\n", - " classifier_inputs = encoder_outputs['sequence_output']\n", - " return self.classifier(classifier_inputs, **kwargs)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "SbhbNbKk6WNR" - }, - "source": [ - "### Build SNGP model" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "p7YakN0V6Oif" - }, - "source": [ - "To implement a BERT-SNGP model, you only need to replace the `ClassificationHead` with the built-in [`GaussianProcessClassificationHead`](https://github.com/tensorflow/models/blob/master/official/nlp/modeling/layers/cls_head.py). Spectral normalization is already pre-packaged into this classification head. Like in the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp), add a covariance reset callback to the model, so the model automatically reset the covariance estimator at the begining of a new epoch to avoid counting the same data twice." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QCaJy85y8WeE" - }, - "outputs": [], - "source": [ - "class ResetCovarianceCallback(tf.keras.callbacks.Callback):\n", - "\n", - " def on_epoch_begin(self, epoch, logs=None):\n", - " \"\"\"Resets covariance matrix at the begining of the epoch.\"\"\"\n", - " if epoch \u003e 0:\n", - " self.model.classifier.reset_covariance_matrix()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "YoHgOuiZ6Q4y" - }, - "outputs": [], - "source": [ - "class SNGPBertClassifier(BertClassifier):\n", - "\n", - " def make_classification_head(self, num_classes, inner_dim, dropout_rate):\n", - " return layers.GaussianProcessClassificationHead(\n", - " num_classes=num_classes, \n", - " inner_dim=inner_dim,\n", - " dropout_rate=dropout_rate,\n", - " gp_cov_momentum=-1,\n", - " temperature=30.,\n", - " **self.classifier_kwargs)\n", - "\n", - " def fit(self, *args, **kwargs):\n", - " \"\"\"Adds ResetCovarianceCallback to model callbacks.\"\"\"\n", - " kwargs['callbacks'] = list(kwargs.get('callbacks', []))\n", - " kwargs['callbacks'].append(ResetCovarianceCallback())\n", - "\n", - " return super().fit(*args, **kwargs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UOj5YWTt6dCe" - }, - "source": [ - "Note: The `GaussianProcessClassificationHead` takes a new argument `temperature`. It corresponds to the $\\lambda$ parameter in the __mean-field approximation__ introduced in the [SNGP tutorial](https://www.tensorflow.org/tutorials/uncertainty/sngp). In practice, this value is usually treated as a hyperparamter, and is finetuned to optimize the model's calibration performance." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qdU90uDT6hFq" - }, - "source": [ - "### Load CLINC OOS dataset" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AnuNeyHw6kH7" - }, - "source": [ - "Now load the [CLINC OOS](https://www.tensorflow.org/datasets/catalog/clinc_oos) intent detection dataset. This dataset contains 15000 user's spoken queries collected over 150 intent classes, it also contains 1000 out-of-domain (OOD) sentences that are not covered by any of the known classes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mkMZN2iA6hhg" - }, - "outputs": [], - "source": [ - "(clinc_train, clinc_test, clinc_test_oos), ds_info = tfds.load(\n", - " 'clinc_oos', split=['train', 'test', 'test_oos'], with_info=True, batch_size=-1)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UJSL2nm8Bo02" - }, - "source": [ - "Make the train and test data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "cgkOOZOq6fQL" - }, - "outputs": [], - "source": [ - "train_examples = clinc_train['text']\n", - "train_labels = clinc_train['intent']\n", - "\n", - "# Makes the in-domain (IND) evaluation data.\n", - "ind_eval_data = (clinc_test['text'], clinc_test['intent'])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Kw76f6caBq_E" - }, - "source": [ - "Create a OOD evaluation dataset. For this, combine the in-domain test data `clinc_test` and the out-of-domain data `clinc_test_oos`. We will also assign label 0 to the in-domain examples, and label 1 to the out-of-domain examples. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "uVFuzecR64FJ" - }, - "outputs": [], - "source": [ - "test_data_size = ds_info.splits['test'].num_examples\n", - "oos_data_size = ds_info.splits['test_oos'].num_examples\n", - "\n", - "# Combines the in-domain and out-of-domain test examples.\n", - "oos_texts = tf.concat([clinc_test['text'], clinc_test_oos['text']], axis=0)\n", - "oos_labels = tf.constant([0] * test_data_size + [1] * oos_data_size)\n", - "\n", - "# Converts into a TF dataset.\n", - "ood_eval_dataset = tf.data.Dataset.from_tensor_slices(\n", - " {\"text\": oos_texts, \"label\": oos_labels})" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "ZcHwfwfU6qCE" - }, - "source": [ - "### Train and evaluate" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "_VTY6KYc6sBB" - }, - "source": [ - "First set up the basic training configurations." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_-uUkUtk6qWC" - }, - "outputs": [], - "source": [ - "TRAIN_EPOCHS = 3\n", - "TRAIN_BATCH_SIZE = 32\n", - "EVAL_BATCH_SIZE = 256" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "tiEjMdFV6wXQ" - }, - "outputs": [], - "source": [ - "#@title\n", - "\n", - "def bert_optimizer(learning_rate, \n", - " batch_size=TRAIN_BATCH_SIZE, epochs=TRAIN_EPOCHS, \n", - " warmup_rate=0.1):\n", - " \"\"\"Creates an AdamWeightDecay optimizer with learning rate schedule.\"\"\"\n", - " train_data_size = ds_info.splits['train'].num_examples\n", - " \n", - " steps_per_epoch = int(train_data_size / batch_size)\n", - " num_train_steps = steps_per_epoch * epochs\n", - " num_warmup_steps = int(warmup_rate * num_train_steps) \n", - "\n", - " # Creates learning schedule.\n", - " lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(\n", - " initial_learning_rate=learning_rate,\n", - " decay_steps=num_train_steps,\n", - " end_learning_rate=0.0) \n", - " \n", - " return optimization.AdamWeightDecay(\n", - " learning_rate=lr_schedule,\n", - " weight_decay_rate=0.01,\n", - " epsilon=1e-6,\n", - " exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "KX_Hzl3l6w-H" - }, - "outputs": [], - "source": [ - "optimizer = bert_optimizer(learning_rate=1e-4)\n", - "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "metrics = tf.metrics.SparseCategoricalAccuracy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ptn9Cupe6z7o" - }, - "outputs": [], - "source": [ - "fit_configs = dict(batch_size=TRAIN_BATCH_SIZE,\n", - " epochs=TRAIN_EPOCHS,\n", - " validation_batch_size=EVAL_BATCH_SIZE, \n", - " validation_data=ind_eval_data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0ZK5PBwW61jd" - }, - "outputs": [], - "source": [ - "sngp_model = SNGPBertClassifier()\n", - "sngp_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)\n", - "sngp_model.fit(train_examples, train_labels, **fit_configs)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cpDsgTYx63tO" - }, - "source": [ - "### Evaluate OOD performance" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "d5NGVe7L67bB" - }, - "source": [ - "Evaluate how well the model can detect the unfamiliar out-of-domain queries. For rigorous evaluation, use the OOD evaluation dataset `ood_eval_dataset` built earlier." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "cellView": "form", - "id": "yyLgt_lL7APo" - }, - "outputs": [], - "source": [ - "#@title\n", - "\n", - "def oos_predict(model, ood_eval_dataset, **model_kwargs):\n", - " oos_labels = []\n", - " oos_probs = []\n", - "\n", - " ood_eval_dataset = ood_eval_dataset.batch(EVAL_BATCH_SIZE)\n", - " for oos_batch in ood_eval_dataset:\n", - " oos_text_batch = oos_batch[\"text\"]\n", - " oos_label_batch = oos_batch[\"label\"] \n", - "\n", - " pred_logits = model(oos_text_batch, **model_kwargs)\n", - " pred_probs_all = tf.nn.softmax(pred_logits, axis=-1)\n", - " pred_probs = tf.reduce_max(pred_probs_all, axis=-1)\n", - "\n", - " oos_labels.append(oos_label_batch)\n", - " oos_probs.append(pred_probs)\n", - "\n", - " oos_probs = tf.concat(oos_probs, axis=0)\n", - " oos_labels = tf.concat(oos_labels, axis=0) \n", - "\n", - " return oos_probs, oos_labels" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Dmc2tVXs6_uo" - }, - "source": [ - "Computes the OOD probabilities as $1 - p(x)$, where $p(x)=softmax(logit(x))$ is the predictive probability." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_9aFVVDO7C7o" - }, - "outputs": [], - "source": [ - "sngp_probs, ood_labels = oos_predict(sngp_model, ood_eval_dataset)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "_PC0wwZp7GJD" - }, - "outputs": [], - "source": [ - "ood_probs = 1 - sngp_probs" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "AsandMTX7HjX" - }, - "source": [ - "Now evaluate how well the model's uncertainty score `ood_probs` predicts the out-of-domain label. First compute the Area under precision-recall curve (AUPRC) for OOD probability v.s. OOD detection accuracy." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "0u5Wx8AP7Mdx" - }, - "outputs": [], - "source": [ - "precision, recall, _ = sklearn.metrics.precision_recall_curve(ood_labels, ood_probs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "axcctOsh7N5A" - }, - "outputs": [], - "source": [ - "auprc = sklearn.metrics.auc(recall, precision)\n", - "print(f'SNGP AUPRC: {auprc:.4f}')" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "U_GEqxq-7Q1Y" - }, - "source": [ - "This matches the SNGP performance reported at the CLINC OOS benchmark under the [Uncertainty Baselines](https://github.com/google/uncertainty-baselines)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "8H4vYcyd7Ux2" - }, - "source": [ - "Next, examine the model's quality in [uncertainty calibration](https://scikit-learn.org/stable/modules/calibration.html), i.e., whether the model's predictive probability corresponds to its predictive accuracy. A well-calibrated model is considered trust-worthy, since, for example, its predictive probability $p(x)=0.8$ means that the model is correct 80% of the time." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "x5GxrSWJ7SYn" - }, - "outputs": [], - "source": [ - "prob_true, prob_pred = sklearn.calibration.calibration_curve(\n", - " ood_labels, ood_probs, n_bins=10, strategy='quantile')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "ozzJM-D-7XVq" - }, - "outputs": [], - "source": [ - "plt.plot(prob_pred, prob_true)\n", - "\n", - "plt.plot([0., 1.], [0., 1.], c='k', linestyle=\"--\")\n", - "plt.xlabel('Predictive Probability')\n", - "plt.ylabel('Predictive Accuracy')\n", - "plt.title('Calibration Plots, SNGP')\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "36M6HeHx7ZI4" - }, - "source": [ - "## Resources and further reading" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "xdFTpyaP0A-N" - }, - "source": [ - "* See the [SNGP tutorial](https://www.tensorflow.org/tutorials/understanding/sngp) for an detailed walkthrough of implementing SNGP from scratch. \n", - "* See [Uncertainty Baselines](https://github.com/google/uncertainty-baselines) for the implementation of SNGP model (and many other uncertainty methods) on a wide variety of benchmark datasets (e.g., [CIFAR](https://www.tensorflow.org/datasets/catalog/cifar100), [ImageNet](https://www.tensorflow.org/datasets/catalog/imagenet2012), [Jigsaw toxicity detection](https://www.tensorflow.org/datasets/catalog/wikipedia_toxicity_subtypes), etc).\n", - "* For a deeper understanding of the SNGP method, check out the paper [Simple and Principled Uncertainty Estimation with Deterministic Deep Learning via Distance Awareness](https://arxiv.org/abs/2006.10108).\n" - ] - } - ], - "metadata": { - "accelerator": "GPU", - "colab": { - "collapsed_sections": [], - "name": "uncertainty_quantification_with_sngp_bert.ipynb", - "private_outputs": true, - "provenance": [ - { - "file_id": "1rpzuIuHNW4nnnj5mi1NhV9gjmiRy_QWB", - "timestamp": 1622128463249 - }, - { - "file_id": "/piper/depot/google3/third_party/tensorflow_text/g3doc/tutorials/uncertainty_quantification_with_sngp_bert.ipynb?workspaceId=markdaoust:no-nightly::citc", - "timestamp": 1622127860630 - } - ], - "toc_visible": true - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -}
diff --git a/third_party/tensorflow-text/src/examples/keras_example_174.ipynb b/third_party/tensorflow-text/src/examples/keras_example_174.ipynb deleted file mode 100644 index 628de4b1..0000000 --- a/third_party/tensorflow-text/src/examples/keras_example_174.ipynb +++ /dev/null
@@ -1,163 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "TF Text / Keras example (#174)", - "provenance": [] - }, - "source": [ - "##### Copyright 2018 The TensorFlow Authors.\n", - "\n", - "Licensed under the Apache License, Version 2.0 (the \"License\");" - ], - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - } - }, - "cells": [ - { - "cell_type": "code", - "metadata": { - "id": "0aJ6YQE6oB9x", - "colab_type": "code", - "outputId": "ca711144-aed0-4b5b-ac66-9353508fceca", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 632 - } - }, - "source": [ - "!pip install tensorflow_text==2.0.1" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bMEL3ylpoIEP", - "colab_type": "code", - "colab": {} - }, - "source": [ - "import tensorflow as tf\n", - "import tensorflow_text as text" - ], - "execution_count": 0, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "J4pMEBBBondS", - "colab_type": "code", - "outputId": "09fd1152-9ea0-4d97-fca7-0f68e6bef011", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "ragged_input = tf.ragged.constant([[1, 2, 3, 4, 5], [5, 6]])\n", - "input_data = tf.data.Dataset.from_tensor_slices(ragged_input).batch(2)\n", - "\n", - "model = tf.keras.Sequential([\n", - " tf.keras.layers.InputLayer(input_shape=(None,), dtype='int32', ragged=True),\n", - " text.keras.layers.ToDense(pad_value=0, mask=True),\n", - " tf.keras.layers.Embedding(100, 16),\n", - " tf.keras.layers.LSTM(32),\n", - " tf.keras.layers.Dense(32, activation='relu'),\n", - " tf.keras.layers.Dense(1, activation='sigmoid')\n", - "])\n", - "\n", - "model.compile(\n", - " optimizer=\"rmsprop\",\n", - " loss=\"binary_crossentropy\",\n", - " metrics=[\"accuracy\"])\n", - "\n", - "output = model.predict(input_data)\n", - "print(output)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "[[0.49998033]\n", - " [0.5012409 ]]\n" - ], - "name": "stdout" - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nRJJaWFfsgA3", - "colab_type": "code", - "outputId": "d7d0434f-4de5-4e0a-b57a-06dd03c47d9f", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 51 - } - }, - "source": [ - "def _CreateTable(vocab, num_oov=1):\n", - " init = tf.lookup.KeyValueTensorInitializer(\n", - " vocab,\n", - " tf.range(tf.size(vocab, out_type=tf.int64), dtype=tf.int64),\n", - " key_dtype=tf.string,\n", - " value_dtype=tf.int64)\n", - " return tf.lookup.StaticVocabularyTable(\n", - " init, num_oov, lookup_key_dtype=tf.string)\n", - "\n", - "reviews_data_array = ['I really liked this movie', 'not my favorite']\n", - "reviews_labels_array = [1,0]\n", - "train_x = tf.constant(reviews_data_array)\n", - "train_y = tf.constant(reviews_labels_array)\n", - "\n", - "a = _CreateTable(['I', 'really', 'liked', 'this', 'movie', 'not', 'my', 'favorite'])\n", - "\n", - "def preprocess(data, labels):\n", - " t = text.WhitespaceTokenizer()\n", - " data = t.tokenize(data)\n", - " # data = data.merge_dims(-2,-1)\n", - " ids = tf.ragged.map_flat_values(a.lookup, data)\n", - " return (ids, labels)\n", - "\n", - "train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(2)\n", - "train_dataset = train_dataset.map(preprocess)\n", - "\n", - "model = tf.keras.Sequential([\n", - " tf.keras.layers.InputLayer(input_shape=(None,), dtype='int64', ragged=True),\n", - " text.keras.layers.ToDense(pad_value=0, mask=True),\n", - " tf.keras.layers.Embedding(100, 16),\n", - " tf.keras.layers.LSTM(32),\n", - " tf.keras.layers.Dense(32, activation='relu'),\n", - " tf.keras.layers.Dense(1, activation='sigmoid')\n", - "])\n", - "\n", - "model.compile(\n", - " optimizer=\"rmsprop\",\n", - " loss=\"binary_crossentropy\",\n", - " metrics=[\"accuracy\"])\n", - "\n", - "output = model.fit(train_dataset, epochs=1, verbose=1)\n", - "print(output)" - ], - "execution_count": 0, - "outputs": [ - { - "output_type": "stream", - "text": [ - "1/1 [==============================] - 2s 2s/step - loss: 0.6915 - accuracy: 1.0000\n", - "<tensorflow.python.keras.callbacks.History object at 0x7f7d64b5e5f8>\n" - ], - "name": "stdout" - } - ] - } - ] -} \ No newline at end of file
diff --git a/third_party/tensorflow-text/src/oss_scripts/build_docs.py b/third_party/tensorflow-text/src/oss_scripts/build_docs.py deleted file mode 100644 index 170403d3..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/build_docs.py +++ /dev/null
@@ -1,99 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Tool to generate external api_docs. - -python build_docs.py --output_dir=/tmp/text_api -""" -import os - -from absl import app -from absl import flags - -import tensorflow as tf - -from tensorflow_docs.api_generator import doc_controls -from tensorflow_docs.api_generator import generate_lib -from tensorflow_docs.api_generator import public_api - -import tensorflow_text as text - -PROJECT_SHORT_NAME = "text" -PROJECT_FULL_NAME = "TensorFlow Text" - -FLAGS = flags.FLAGS - -flags.DEFINE_string( - "output_dir", - default="/tmp/text_api", - help="Where to write the resulting docs to.") -flags.DEFINE_string( - "code_url_prefix", - "http://github.com/tensorflow/text/blob/master/tensorflow_text", - "The url prefix for links to code.") - -flags.DEFINE_bool("search_hints", True, - "Include metadata search hints in the generated files") - -flags.DEFINE_string("site_path", "/text/api_docs/python", - "Path prefix in the _toc.yaml") - - -def _hide_layer_and_module_methods(): - """Hide methods and properties defined in the base classes of keras layers.""" - # __dict__ only sees attributes defined in *this* class, not on parent classes - # Needed to ignore redudant subclass documentation - module_contents = list(tf.Module.__dict__.items()) - layer_contents = list(tf.keras.layers.Layer.__dict__.items()) - - for name, obj in module_contents + layer_contents: - if name == "__init__": - continue - - if isinstance(obj, property): - obj = obj.fget - - if isinstance(obj, (staticmethod, classmethod)): - obj = obj.__func__ - - try: - doc_controls.do_not_doc_in_subclasses(obj) - except AttributeError: - pass - - -def build_docs(): - """Build api docs for tensorflow_text.""" - _hide_layer_and_module_methods() - del text.keras # keras is empty. - - doc_generator = generate_lib.DocGenerator( - root_title="TensorFlow Text", - py_modules=[("text", text)], - base_dir=os.path.dirname(text.__file__), - search_hints=True, - code_url_prefix=FLAGS.code_url_prefix, - site_path="text/api_docs/python", - callbacks=[public_api.explicit_package_contents_filter]) - doc_generator.build(FLAGS.output_dir) - - -def main(_): - # Build API docs - build_docs() - - -if __name__ == "__main__": - app.run(main)
diff --git a/third_party/tensorflow-text/src/oss_scripts/configure.sh b/third_party/tensorflow-text/src/oss_scripts/configure.sh deleted file mode 100755 index e450c3401..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/configure.sh +++ /dev/null
@@ -1,106 +0,0 @@ -#!/bin/bash -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -function write_to_bazelrc() { - echo "$1" >> .bazelrc -} - -function write_action_env_to_bazelrc() { - write_to_bazelrc "build --action_env $1=\"$2\"" -} - -osname="$(uname -s | tr 'A-Z' 'a-z')" -echo $osname - -function is_windows() { - # On windows, the shell script is actually running in msys - [[ "${osname}" =~ msys_nt*|mingw*|cygwin*|uwin* ]] -} - -function is_macos() { - [[ "${osname}" == "darwin" ]] -} - -# Remove .bazelrc if it already exist -[ -e .bazelrc ] && rm .bazelrc - -if [[ $(pip show tensorflow) == *tensorflow* ]] || [[ $(pip show tf-nightly) == *tf-nightly* ]] ; then - echo 'Using installed tensorflow.' -else - echo 'Installing tensorflow.' - pip install tensorflow==2.7.0 -fi - -if is_windows; then - # ICU must be built as a static library, so the external data must be built in - sed -i -e 's/":headers",$/":headers", ":windows_static_link_data",/' third_party/icu/BUILD.bzl -fi - -write_to_bazelrc "build:manylinux2010 --crosstool_top=@ubuntu18.04-gcc7_manylinux2010-cuda11.2-cudnn8.1-tensorrt7.2_config_cuda//crosstool:toolchain" -write_to_bazelrc "build --spawn_strategy=standalone" -write_to_bazelrc "build --strategy=Genrule=standalone" -write_to_bazelrc "build -c opt" -write_to_bazelrc "build --define=framework_shared_object=true" -write_to_bazelrc "build --experimental_repo_remote_exec" -# By default, build in C++ 14 mode. -write_to_bazelrc "build --cxxopt=-std=c++14" -write_to_bazelrc "build --host_cxxopt=-std=c++14" - -# Config for Android build. -write_to_bazelrc "build:android --crosstool_top=//external:android/crosstool" -write_to_bazelrc "build:android --host_crosstool_top=@bazel_tools//tools/cpp:toolchain" -write_to_bazelrc "build:android --action_env TF_HEADER_DIR=\"\"" -write_to_bazelrc "build:android --action_env TF_SHARED_LIBRARY_DIR=\"\"" -write_to_bazelrc "build:android --action_env TF_SHARED_LIBRARY_NAME=\"\"" -write_to_bazelrc "build:android_arm --config=android" -write_to_bazelrc "build:android_arm --cpu=armeabi-v7a" -write_to_bazelrc "build:android_arm --fat_apk_cpu=armeabi-v7a" -write_to_bazelrc "build:android_arm64 --config=android" -write_to_bazelrc "build:android_arm64 --cpu=arm64-v8a" -write_to_bazelrc "build:android_arm64 --fat_apk_cpu=arm64-v8a" -write_to_bazelrc "build:android_x86 --config=android" -write_to_bazelrc "build:android_x86 --cpu=x86" -write_to_bazelrc "build:android_x86 --fat_apk_cpu=x86" -write_to_bazelrc "build:android_x86_64 --config=android" -write_to_bazelrc "build:android_x86_64 --cpu=x86_64" -write_to_bazelrc "build:android_x86_64 --fat_apk_cpu=x86_64" - -if is_windows; then - write_to_bazelrc "build --copt=/experimental:preprocessor" - write_to_bazelrc "build --host_copt=/experimental:preprocessor" -fi - -TF_CFLAGS=( $(python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()))" | awk '{print $1}') ) -TF_LFLAGS=( $(python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" | awk '{print $1}') ) -TF_LFLAGS_2=( $(python -c "import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()))" | awk '{print $2}') ) -TF_ABIFLAG=$(python -c "import tensorflow as tf; print(tf.sysconfig.CXX11_ABI_FLAG)") - -HEADER_DIR=${TF_CFLAGS:2} -SHARED_LIBRARY_DIR=${TF_LFLAGS:2} -SHARED_LIBRARY_NAME=$(echo $TF_LFLAGS_2 | rev | cut -d":" -f1 | rev) -if is_macos; then - SHARED_LIBRARY_NAME="libtensorflow_framework.dylib" -fi -if is_windows; then - HEADER_DIR=$(echo "$HEADER_DIR" | tr '\\' '/') - SHARED_LIBRARY_DIR="${HEADER_DIR:0:-7}python" - SHARED_LIBRARY_NAME="_pywrap_tensorflow_internal.lib" -fi -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$SHARED_LIBRARY_DIR -write_action_env_to_bazelrc "TF_HEADER_DIR" ${HEADER_DIR} -write_action_env_to_bazelrc "TF_SHARED_LIBRARY_DIR" ${SHARED_LIBRARY_DIR} -write_action_env_to_bazelrc "TF_SHARED_LIBRARY_NAME" ${SHARED_LIBRARY_NAME} -write_action_env_to_bazelrc "TF_CXX11_ABI_FLAG" ${TF_ABIFLAG}
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/BUILD b/third_party/tensorflow-text/src/oss_scripts/pip_package/BUILD deleted file mode 100644 index b4cbe10..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/BUILD +++ /dev/null
@@ -1,17 +0,0 @@ -# Tools for building the TF.Text pip package. - -package(default_visibility = ["//visibility:private"]) - -licenses(["notice"]) # Apache 2.0 - -sh_binary( - name = "build_pip_package", - srcs = ["build_pip_package.sh"], - data = [ - "LICENSE", - "MANIFEST.in", - "setup.nightly.py", - "setup.py", - "//tensorflow_text", - ], -)
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/LICENSE b/third_party/tensorflow-text/src/oss_scripts/pip_package/LICENSE deleted file mode 100644 index 4862420..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/LICENSE +++ /dev/null
@@ -1,203 +0,0 @@ -Copyright 2018 The TensorFlow Authors. All rights reserved. - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2017, The TensorFlow Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License.
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/MANIFEST.in b/third_party/tensorflow-text/src/oss_scripts/pip_package/MANIFEST.in deleted file mode 100644 index 9969409..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/MANIFEST.in +++ /dev/null
@@ -1 +0,0 @@ -recursive-include tensorflow_text *.dylib *.pyd *.so *.so
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/build_pip_package.sh b/third_party/tensorflow-text/src/oss_scripts/pip_package/build_pip_package.sh deleted file mode 100755 index 1e5470c..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/build_pip_package.sh +++ /dev/null
@@ -1,94 +0,0 @@ -#!/usr/bin/env bash -# Tool to build the TensorFlow Text pip package. -# -# Usage: -# bazel build oss_scripts/pip_package:build_pip_package -# bazel-bin/oss_scripts/build_pip_package -# -# Arguments: -# output_dir: An output directory. Defaults to `/tmp/tensorflow_text_pkg`. - -set -e # fail and exit on any command erroring - -die() { - echo >&2 "$@" - exit 1 -} - -osname="$(uname -s | tr 'A-Z' 'a-z')" -echo $osname - -function is_windows() { - # On windows, the shell script is actually running in msys - [[ "${osname}" =~ msys_nt*|mingw*|cygwin*|uwin* ]] -} - -function is_macos() { - [[ "${osname}" == "darwin" ]] -} - -function is_nightly() { - [[ "$IS_NIGHTLY" == "nightly" ]] -} - -function abspath() { - cd "$(dirname $1)" - echo "$PWD/$(basename $1)" - cd "$OLDPWD" -} - -plat_name="" -if is_macos; then - plat_name="--plat-name macosx-10.9-x86_64" -fi - -main() { - local output_dir="$1" - - if [[ -z "${output_dir}" ]]; then - output_dir="/tmp/tensorflow_text_pkg" - fi - mkdir -p ${output_dir} - output_dir=$(abspath "${output_dir}") - echo "=== Destination directory: ${output_dir}" - - if [[ ! -d "bazel-bin/tensorflow_text" ]]; then - die "Could not find bazel-bin. Did you run from the root of the build tree?" - fi - - local temp_dir="$(mktemp -d)" - trap "rm -rf ${temp_dir}" EXIT - echo "=== Using tmpdir ${temp_dir}" - - if is_windows; then - runfiles="bazel-bin/oss_scripts/pip_package/build_pip_package.exe.runfiles" - else - runfiles="bazel-bin/oss_scripts/pip_package/build_pip_package.runfiles" - fi - cp -LR \ - "${runfiles}/org_tensorflow_text/tensorflow_text" \ - "${temp_dir}" - if is_nightly; then - cp "${runfiles}/org_tensorflow_text/oss_scripts/pip_package/setup.nightly.py" \ - "${temp_dir}" - else - cp "${runfiles}/org_tensorflow_text/oss_scripts/pip_package/setup.py" \ - "${temp_dir}" - fi - cp "${runfiles}/org_tensorflow_text/oss_scripts/pip_package/MANIFEST.in" \ - "${temp_dir}" - cp "${runfiles}/org_tensorflow_text/oss_scripts/pip_package/LICENSE" \ - "${temp_dir}" - - pushd "${temp_dir}" > /dev/null - - # Build pip package - if is_nightly; then - python setup.nightly.py bdist_wheel --universal $plat_name - else - python setup.py bdist_wheel --universal $plat_name - fi - cp dist/*.whl "${output_dir}" -} - -main "$@"
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.nightly.py b/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.nightly.py deleted file mode 100644 index 53ffc89..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.nightly.py +++ /dev/null
@@ -1,103 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""TF.Text is a TensorFlow library of text related ops, modules, and subgraphs. - -TF.Text is a TensorFlow library of text related ops, modules, and subgraphs. The -library can perform the preprocessing regularly required by text-based models, -and includes other features useful for sequence modeling not provided by core -TensorFlow. - -See the README on GitHub for further documentation. -http://github.com/tensorflow/text -""" - -import os - -from setuptools import find_packages -from setuptools import setup -from setuptools.command.install import install -from setuptools.dist import Distribution - -project_name = 'tensorflow-text-nightly' -project_version = 'REPLACE_ME' - - -class BinaryDistribution(Distribution): - """This class is needed in order to create OS specific wheels.""" - - def is_pure(self): - return False - - def has_ext_modules(self): - return True - - -class InstallPlatlib(install): - """This is needed to set the library to platlib compliant.""" - - def finalize_options(self): - """For more info; see http://github.com/google/or-tools/issues/616 .""" - install.finalize_options(self) - self.install_lib = self.install_platlib - self.install_libbase = self.install_lib - self.install_lib = os.path.join(self.install_lib, self.extra_dirs) - - -DOCLINES = __doc__.split('\n') - -setup( - name=project_name, - version=project_version.replace('-', ''), - description=DOCLINES[0], - long_description='\n'.join(DOCLINES[2:]), - author='Google Inc.', - author_email='packages@tensorflow.org', - url='http://github.com/tensorflow/text', - license='Apache 2.0', - packages=find_packages(), - include_package_data=True, - zip_safe=False, - cmdclass={'install': InstallPlatlib}, - distclass=BinaryDistribution, - install_requires=[ - 'tensorflow_hub>=0.8.0', - ], - extras_require={ - 'tests': [ - 'absl-py', - 'pytest', - 'tensorflow-datasets>=3.2.0', - ], - }, - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - keywords='tensorflow text machine learning', -)
diff --git a/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.py b/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.py deleted file mode 100644 index 4a67b90..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/pip_package/setup.py +++ /dev/null
@@ -1,105 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""TF.Text is a TensorFlow library of text related ops, modules, and subgraphs. - -TF.Text is a TensorFlow library of text related ops, modules, and subgraphs. The -library can perform the preprocessing regularly required by text-based models, -and includes other features useful for sequence modeling not provided by core -TensorFlow. - -See the README on GitHub for further documentation. -http://github.com/tensorflow/text -""" - -import os - -from setuptools import find_packages -from setuptools import setup -from setuptools.command.install import install -from setuptools.dist import Distribution - -project_name = 'tensorflow-text' -project_version = '2.7.3' - - -class BinaryDistribution(Distribution): - """This class is needed in order to create OS specific wheels.""" - - def is_pure(self): - return False - - def has_ext_modules(self): - return True - - -class InstallPlatlib(install): - """This is needed to set the library to platlib compliant.""" - - def finalize_options(self): - """For more info; see http://github.com/google/or-tools/issues/616 .""" - install.finalize_options(self) - self.install_lib = self.install_platlib - self.install_libbase = self.install_lib - self.install_lib = os.path.join(self.install_lib, self.extra_dirs) - - -DOCLINES = __doc__.split('\n') - -setup( - name=project_name, - version=project_version.replace('-', ''), - description=DOCLINES[0], - long_description='\n'.join(DOCLINES[2:]), - author='Google Inc.', - author_email='packages@tensorflow.org', - url='http://github.com/tensorflow/text', - license='Apache 2.0', - packages=find_packages(), - include_package_data=True, - zip_safe=False, - cmdclass={'install': InstallPlatlib}, - distclass=BinaryDistribution, - install_requires=[ - 'tensorflow>=2.7.0, <2.8', - 'tensorflow_hub>=0.8.0', - ], - extras_require={ - 'tensorflow_cpu': ['tensorflow-cpu>=2.4.0rc0, <2.5',], - 'tests': [ - 'absl-py', - 'pytest', - 'tensorflow-datasets>=3.2.0', - ], - }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Software Development', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules', - ], - keywords='tensorflow text machine learning', -)
diff --git a/third_party/tensorflow-text/src/oss_scripts/run_build.sh b/third_party/tensorflow-text/src/oss_scripts/run_build.sh deleted file mode 100755 index 949efa7..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/run_build.sh +++ /dev/null
@@ -1,18 +0,0 @@ -#!/bin/bash -set -e # fail and exit on any command erroring -set -x # print evaluated commands - -osname="$(uname -s)" -if [[ $osname == "Darwin" ]]; then - # Update to macos extensions - sed -i '' 's/".so"/".dylib"/' tensorflow_text/tftext.bzl - perl -pi -e "s/(load_library.load_op_library.*)\\.so'/\$1.dylib'/" $(find tensorflow_text/python -type f) - export CC_OPT_FLAGS='-mavx' -fi - -# Run configure. -./oss_scripts/configure.sh - -# Build the pip package. -bazel build --enable_runfiles oss_scripts/pip_package:build_pip_package -./bazel-bin/oss_scripts/pip_package/build_pip_package .
diff --git a/third_party/tensorflow-text/src/oss_scripts/run_tests.sh b/third_party/tensorflow-text/src/oss_scripts/run_tests.sh deleted file mode 100755 index 3196cbd..0000000 --- a/third_party/tensorflow-text/src/oss_scripts/run_tests.sh +++ /dev/null
@@ -1,7 +0,0 @@ -#!/bin/bash - -set -x # print commands as they are executed -set -e # fail and exit on any command erroring - -./oss_scripts/configure.sh -bazel test --test_output=errors --keep_going --jobs=1 tensorflow_text:all
diff --git a/third_party/tensorflow-text/src/tensorflow_text/BUILD b/third_party/tensorflow-text/src/tensorflow_text/BUILD deleted file mode 100644 index e8a447e7..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/BUILD +++ /dev/null
@@ -1,1393 +0,0 @@ -load("//tensorflow_text:tftext.bzl", "py_tf_text_library") - -# [internal] load build_test.bzl -load("@org_tensorflow//tensorflow/lite:build_def.bzl", "tflite_cc_shared_object") - -# Visibility rules -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) - -exports_files(["LICENSE"]) - -#################################### -# Public TF.Text library to import # -#################################### - -# Most TF users will want to add this as their dependency. -alias( - name = "tensorflow_text", - actual = ":ops", - visibility = ["//visibility:public"], -) - -cc_library( - name = "ops_lib", - visibility = ["//visibility:public"], - deps = [ - ":constrained_sequence_op_cc", - ":fast_wordpiece_tokenizer_cc", - ":mst_ops_cc", - ":normalize_ops_cc", - ":regex_split_ops_cc", - ":sentence_breaking_ops_cc", - ":sentencepiece_tokenizer_cc", - ":split_merge_from_logits_tokenizer_cc", - ":split_merge_tokenizer_cc", - ":state_based_sentence_breaker_op_cc", - ":text_similarity_metric_ops_cc", - ":unicode_script_tokenizer_cc", - ":whitespace_tokenizer_cc", - ":whitespace_tokenizer_v2_cc", - ":wordpiece_tokenizer_cc", - ], -) - -tflite_cc_shared_object( - name = "libtftextops_for_testing.so", - deps = [":ops_lib"], -) - -py_library( - name = "ops", - srcs = [ - "__init__.py", - "python/__init__.py", - "python/keras/__init__.py", - "python/keras/layers/__init__.py", - "python/metrics/__init__.py", - "python/numpy/__init__.py", - "python/ops/__init__.py", - "tools/__init__.py", - ], - srcs_version = "PY3", - deps = [ - ":bert_tokenizer", - ":create_feature_bitmask_op", - ":fast_wordpiece_tokenizer", - ":greedy_constrained_sequence_op", - ":hub_module_splitter", - ":hub_module_tokenizer", - ":item_selector_ops", - ":masking_ops", - ":mst_ops", - ":ngrams_op", - ":normalize_ops", - ":pad_along_dimension_op", - ":pad_model_inputs_ops", - ":pointer_ops", - ":regex_split_ops", - ":segment_combiner_ops", - ":sentence_breaking_ops", - ":sentencepiece_tokenizer", - ":sliding_window_op", - ":split_merge_from_logits_tokenizer", - ":split_merge_tokenizer", - ":splitter", - ":state_based_sentence_breaker_op", - ":string_ops", - ":text_similarity_metric_ops", - ":todense_layer", - ":tokenization", - ":tokenization_layers", - ":trimmer_ops", - ":unicode_char_tokenizer", - ":unicode_script_tokenizer", - ":viterbi_constrained_sequence_op", - ":viterbi_decode", - ":whitespace_tokenizer", - ":whitespace_tokenizer_v2", - ":wordpiece_tokenizer", - ":wordshape_ops", - # python:util tensorflow dep, - "//tensorflow_text/core/pybinds:tflite_registrar", - "//tensorflow_text/tools/wordpiece_vocab", - ], -) - -# build_test - -# public_names_test - -# This is required for other external users to build tests using these models. -exports_files( - glob(["python/ops/test_data/**"]), -) - -########################## -# Individual tf.text ops # -########################## -# The py libraries are ordered alphabetically and are grouped with their corresponding tests. - -py_library( - name = "bert_tokenizer", - srcs = ["python/ops/bert_tokenizer.py"], - tags = ["ignore_srcs"], - deps = [ - ":normalize_ops", - ":regex_split_ops", - ":tokenization", - ":wordpiece_tokenizer", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:string_ops tensorflow dep, - # python/eager:monitoring tensorflow dep, - ], -) - -py_test( - name = "bert_tokenizer_test", - size = "small", - timeout = "moderate", - srcs = ["python/ops/bert_tokenizer_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":bert_tokenizer", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_tf_text_library( - name = "constrained_sequence_op", - srcs = [], - cc_op_defs = ["core/ops/constrained_sequence_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:constrained_sequence_kernel", - ], - deps = [ - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged tensorflow dep, - ], -) - -py_library( - name = "create_feature_bitmask_op", - srcs = ["python/ops/create_feature_bitmask_op.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:check_ops tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - ], -) - -py_test( - name = "create_feature_bitmask_op_test", - size = "small", - srcs = ["python/ops/create_feature_bitmask_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":create_feature_bitmask_op", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - ], -) - -py_tf_text_library( - name = "fast_wordpiece_tokenizer", - srcs = ["python/ops/fast_wordpiece_tokenizer.py"], - cc_op_defs = ["core/ops/fast_wordpiece_tokenizer_op.cc"], - cc_op_kernels = [ - # lite/kernels/shim:tf_op_shim tensorflow dep, - "//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_kernel", - ], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - "//tensorflow_text/core/pybinds:pywrap_fast_wordpiece_tokenizer_model_builder", - ], -) - -py_test( - name = "fast_wordpiece_tokenizer_test", - size = "small", - srcs = ["python/ops/fast_wordpiece_tokenizer_test.py"], - data = [ - ":python/ops/test_data/fast_wordpiece_tokenizer_model.fb", - ], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":tensorflow_text", - "@absl_py//absl/testing:parameterized", - # tensorflow package dep, - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/compat tensorflow dep, - # python/data/kernel_tests:test_base tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "greedy_constrained_sequence_op", - srcs = ["python/ops/greedy_constrained_sequence_op.py"], - cc_op_kernels = [ - ":constrained_sequence_op", - ], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "greedy_constrained_sequence_op_test", - size = "small", - srcs = ["python/ops/greedy_constrained_sequence_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":greedy_constrained_sequence_op", - # numpy dep, - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "hub_module_splitter", - srcs = ["python/ops/hub_module_splitter.py"], - deps = [ - ":splitter", - "@org_tensorflow_hub//tensorflow_hub", - # python:array_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "hub_module_splitter_test", - size = "large", - srcs = ["python/ops/hub_module_splitter_test.py"], - data = [ - ":test_data_segmenter_hub_module", - ], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":ops", - # python:client_testlib tensorflow dep, - # python:framework_ops tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:variables tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "hub_module_tokenizer", - srcs = ["python/ops/hub_module_tokenizer.py"], - deps = [ - ":hub_module_splitter", - ":tokenization", - ], -) - -py_test( - name = "hub_module_tokenizer_test", - size = "large", - srcs = ["python/ops/hub_module_tokenizer_test.py"], - data = [ - ":test_data_segmenter_hub_module", - ], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":ops", - # python:client_testlib tensorflow dep, - # python:framework_ops tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:variables tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "item_selector_ops", - srcs = ["python/ops/item_selector_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:control_flow_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:map_fn tensorflow dep, - # python:math_ops tensorflow dep, - # python:random_ops tensorflow dep, - # python:sort_ops tensorflow dep, - # python/ops/ragged:ragged_array_ops tensorflow dep, - # python/ops/ragged:ragged_batch_gather_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_functional_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_math_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - # python/ops/ragged:ragged_tensor_shape tensorflow dep, - # python/ops/ragged:ragged_where_op tensorflow dep, - ], -) - -py_test( - name = "item_selector_ops_test", - size = "medium", - srcs = ["python/ops/item_selector_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":item_selector_ops", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:math_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "masking_ops", - srcs = ["python/ops/masking_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:map_fn tensorflow dep, - # python:math_ops tensorflow dep, - # python:random_ops tensorflow dep, - # python:sort_ops tensorflow dep, - # python/ops/ragged:ragged_batch_gather_ops tensorflow dep, - # python/ops/ragged:ragged_functional_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_math_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - # python/ops/ragged:ragged_where_op tensorflow dep, - ], -) - -py_test( - name = "masking_ops_test", - size = "medium", - srcs = ["python/ops/masking_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":item_selector_ops", - ":masking_ops", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "mst_ops", - srcs = ["python/ops/mst_ops.py"], - cc_op_defs = ["core/ops/mst_ops.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:mst_op_kernels", - ], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:standard_ops tensorflow dep, - ], -) - -py_test( - name = "mst_ops_test", - srcs = ["python/ops/mst_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":mst_ops", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:math_ops tensorflow dep, - ], -) - -py_library( - name = "ngrams_op", - srcs = ["python/ops/ngrams_op.py"], - deps = [ - ":sliding_window_op", - # python:errors tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_functional_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "ngrams_op_test", - size = "small", - srcs = ["python/ops/ngrams_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":ngrams_op", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "normalize_ops", - srcs = ["python/ops/normalize_ops.py"], - cc_op_defs = ["core/ops/normalize_ops.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:normalize_kernels", - ], - deps = [ - # python:dtypes tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "normalize_ops_test", - size = "small", - srcs = ["python/ops/normalize_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":normalize_ops", - # python:client_testlib tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "pad_along_dimension_op", - srcs = ["python/ops/pad_along_dimension_op.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "pad_along_dimension_op_test", - size = "medium", - srcs = ["python/ops/pad_along_dimension_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":pad_along_dimension_op", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/eager:context tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_library( - name = "pad_model_inputs_ops", - srcs = ["python/ops/pad_model_inputs_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:constant_op tensorflow dep, - # python:control_flow_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:math_ops tensorflow dep, - # python:tensor_array_ops tensorflow dep, - # python/ops/ragged:ragged_array_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "pad_model_inputs_ops_test", - srcs = ["python/ops/pad_model_inputs_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":pad_model_inputs_ops", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "pointer_ops", - srcs = ["python/ops/pointer_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:check_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python/ops/ragged:ragged_functional_ops tensorflow dep, - # python/ops/ragged:ragged_gather_ops tensorflow dep, - # python/ops/ragged:ragged_math_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - # python/ops/ragged:ragged_where_op tensorflow dep, - # python/ops/ragged:segment_id_ops tensorflow dep, - ], -) - -py_test( - name = "gather_with_default_op_test", - srcs = ["python/ops/gather_with_default_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":pointer_ops", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - ], -) - -py_test( - name = "span_alignment_op_test", - srcs = ["python/ops/span_alignment_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":pointer_ops", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_test( - name = "span_overlaps_op_test", - srcs = ["python/ops/span_overlaps_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":pointer_ops", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/eager:context tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "ragged_tensor_to_tensor_test", - srcs = ["python/ragged/ragged_tensor_to_tensor_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":tensorflow_text", - # tensorflow package dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "regex_split_ops", - srcs = ["python/ops/regex_split_ops.py"], - cc_op_defs = ["core/ops/regex_split_ops.cc"], - cc_op_kernels = ["//tensorflow_text/core/kernels:regex_split_kernels"], - deps = [ - ":splitter", - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "regex_split_ops_test", - size = "medium", - srcs = ["python/ops/regex_split_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":regex_split_ops", - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:platform tensorflow dep, - ], -) - -py_library( - name = "segment_combiner_ops", - srcs = ["python/ops/segment_combiner_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - ], -) - -py_test( - name = "segment_combiner_ops_test", - srcs = ["python/ops/segment_combiner_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":segment_combiner_ops", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "sentence_breaking_ops", - srcs = ["python/ops/sentence_breaking_ops.py"], - cc_op_defs = [ - "core/ops/sentence_breaking_ops.cc", - ], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:sentence_breaking_kernels", - ], - deps = [ - ":regex_split_ops", - # tf:lib tensorflow dep, - # python:dtypes tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "sentence_breaking_ops_test", - size = "small", - srcs = ["python/ops/sentence_breaking_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":sentence_breaking_ops", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_tf_text_library( - name = "sentencepiece_tokenizer", - srcs = ["python/ops/sentencepiece_tokenizer.py"], - cc_op_defs = ["core/ops/sentencepiece_ops.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:sentencepiece_kernels", - ], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "sentencepiece_tokenizer_test", - size = "large", - srcs = ["python/ops/sentencepiece_tokenizer_test.py"], - data = [ - ":python/ops/test_data/test_oss_model.model", - ], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":sentencepiece_tokenizer", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_gather_ops tensorflow dep, - # python/saved_model tensorflow dep, - ], -) - -py_library( - name = "sliding_window_op", - srcs = ["python/ops/sliding_window_op.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:errors tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "sliding_window_op_test", - size = "small", - srcs = ["python/ops/sliding_window_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":sliding_window_op", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "split_merge_from_logits_tokenizer", - srcs = ["python/ops/split_merge_from_logits_tokenizer.py"], - cc_op_defs = ["core/ops/tokenizer_from_logits_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:tokenizer_from_logits_kernel", - ], - deps = [ - ":tokenization", - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged tensorflow dep, - ], -) - -py_test( - name = "split_merge_from_logits_tokenizer_test", - size = "small", - srcs = ["python/ops/split_merge_from_logits_tokenizer_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":split_merge_from_logits_tokenizer", - # python:client_testlib tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "split_merge_tokenizer", - srcs = ["python/ops/split_merge_tokenizer.py"], - cc_op_defs = ["core/ops/split_merge_tokenize_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:split_merge_tokenize_kernel", - ], - deps = [ - ":tokenization", - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged tensorflow dep, - ], -) - -py_test( - name = "split_merge_tokenizer_test", - size = "small", - srcs = ["python/ops/split_merge_tokenizer_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":split_merge_tokenizer", - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "splitter", - srcs = ["python/ops/splitter.py"], - deps = [ - # python/module tensorflow dep, - ], -) - -py_tf_text_library( - name = "state_based_sentence_breaker_op", - srcs = ["python/ops/state_based_sentence_breaker_op.py"], - cc_op_defs = [ - "core/ops/sentence_breaking_ops_v2.cc", - ], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:sentence_breaking_kernels_v2", - ], - deps = [ - ":sentence_breaking_ops", - # tf:lib tensorflow dep, - # python:dtypes tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "state_based_sentence_breaker_op_test", - size = "medium", - srcs = ["python/ops/state_based_sentence_breaker_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":state_based_sentence_breaker_op", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:dtypes tensorflow dep, - # python:errors tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_library( - name = "string_ops", - srcs = ["python/ops/string_ops.py"], - deps = [ - # python:string_ops tensorflow dep, - ], -) - -py_test( - name = "coerce_to_valid_utf8_op_test", - size = "small", - srcs = ["python/ops/coerce_to_valid_utf8_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":string_ops", - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - ], -) - -py_tf_text_library( - name = "text_similarity_metric_ops", - srcs = ["python/metrics/text_similarity_metric_ops.py"], - cc_op_defs = ["core/ops/rouge_l_op.cc"], - cc_op_kernels = ["//tensorflow_text/core/kernels:rouge_l_kernel"], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:lookup_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "text_similarity_metric_ops_test", - size = "small", - srcs = ["python/metrics/text_similarity_metric_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":text_similarity_metric_ops", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "todense_layer", - srcs = ["python/keras/layers/todense.py"], - deps = [ - # tensorflow package dep, - ], -) - -py_test( - name = "todense_test", - size = "large", - srcs = ["python/keras/layers/todense_test.py"], - python_version = "PY3", - shard_count = 2, - srcs_version = "PY3", - deps = [ - ":todense_layer", - "@absl_py//absl/testing:parameterized", - # numpy dep, - # tensorflow package dep, - # python:framework_test_lib tensorflow dep, - # python/keras tensorflow dep, - # python/keras:testing_utils tensorflow dep, - ], -) - -py_library( - name = "tokenization", - srcs = ["python/ops/tokenization.py"], - deps = [ - ":splitter", - # python/module tensorflow dep, - ], -) - -py_library( - name = "tokenization_layers", - srcs = ["python/keras/layers/tokenization_layers.py"], - srcs_version = "PY2AND3", - deps = [ - ":unicode_script_tokenizer", - ":whitespace_tokenizer_v2", - ":wordpiece_tokenizer", - # tensorflow package dep, - # python:lookup_ops tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - ], -) - -py_test( - name = "tokenization_layers_test", - size = "large", - srcs = ["python/keras/layers/tokenization_layers_test.py"], - python_version = "PY3", - shard_count = 20, - deps = [ - ":tokenization_layers", - "@absl_py//absl/testing:parameterized", - # numpy dep, - # tensorflow package dep, - # python/keras tensorflow dep, - # python/keras:testing_utils tensorflow dep, - ], -) - -py_library( - name = "trimmer_ops", - srcs = ["python/ops/trimmer_ops.py"], - deps = [ - ":item_selector_ops", - # python:array_ops tensorflow dep, - # python:constant_op tensorflow dep, - # python:control_flow_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:functional_ops tensorflow dep, - # python/ops/ragged:ragged_map_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "trimmer_ops_test", - srcs = ["python/ops/trimmer_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":trimmer_ops", - "@absl_py//absl/testing:parameterized", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -filegroup( - name = "test_data_segmenter_hub_module", - srcs = glob([ - "python/ops/test_data/segmenter_hub_module/**", - ]), -) - -py_library( - name = "unicode_char_tokenizer", - srcs = ["python/ops/unicode_char_tokenizer.py"], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "unicode_char_tokenizer_test", - size = "large", - srcs = ["python/ops/unicode_char_tokenizer_test.py"], - python_version = "PY3", - shard_count = 5, - srcs_version = "PY3", - deps = [ - ":unicode_char_tokenizer", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "unicode_script_tokenizer", - srcs = ["python/ops/unicode_script_tokenizer.py"], - cc_op_defs = ["core/ops/unicode_script_tokenize_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:unicode_script_tokenize_kernel", - ], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "unicode_script_tokenizer_test", - size = "large", - srcs = ["python/ops/unicode_script_tokenizer_test.py"], - python_version = "PY3", - shard_count = 5, - srcs_version = "PY3", - deps = [ - ":unicode_script_tokenizer", - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "viterbi_constrained_sequence_op", - srcs = ["python/ops/viterbi_constrained_sequence_op.py"], - cc_op_kernels = [ - ":constrained_sequence_op", - ], - deps = [ - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "viterbi_constrained_sequence_op_test", - size = "small", - srcs = ["python/ops/viterbi_constrained_sequence_op_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":viterbi_constrained_sequence_op", - ":viterbi_decode", - # numpy dep, - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "viterbi_decode", - srcs = ["python/numpy/viterbi_decode.py"], - deps = [ - # numpy dep, - ], -) - -py_test( - name = "viterbi_decode_test", - size = "small", - srcs = ["python/numpy/viterbi_decode_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":viterbi_decode", - "@absl_py//absl/testing:absltest", - # numpy dep, - ], -) - -py_tf_text_library( - name = "whitespace_tokenizer", - srcs = [], - cc_op_defs = ["core/ops/whitespace_tokenize_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:whitespace_tokenize_kernel", - ], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_tf_text_library( - name = "whitespace_tokenizer_v2", - srcs = ["python/ops/whitespace_tokenizer.py"], - cc_op_defs = ["core/ops/whitespace_tokenizer_op.cc"], - cc_op_kernels = [ - # lite/kernels/shim:tf_op_shim tensorflow dep, - "//tensorflow_text/core/kernels:whitespace_tokenizer_kernel", - ], - deps = [ - ":tokenization", - ":whitespace_tokenizer", - # python:array_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/ops/ragged:ragged_conversion_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - "//tensorflow_text/core/pybinds:pywrap_whitespace_tokenizer_config_builder", - ], -) - -py_test( - name = "whitespace_tokenizer_test", - size = "large", - srcs = ["python/ops/whitespace_tokenizer_test.py"], - python_version = "PY3", - shard_count = 4, - srcs_version = "PY3", - deps = [ - ":tensorflow_text", - # tensorflow package dep, - # python:client_testlib tensorflow dep, - # python:constant_op tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_tf_text_library( - name = "wordpiece_tokenizer", - srcs = ["python/ops/wordpiece_tokenizer.py"], - cc_op_defs = ["core/ops/wordpiece_op.cc"], - cc_op_kernels = [ - "//tensorflow_text/core/kernels:wordpiece_kernel", - ], - deps = [ - ":tokenization", - # python:array_ops tensorflow dep, - # python:check_ops tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_ops tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python:sort_ops tensorflow dep, - # python:string_ops tensorflow dep, - # python/compat tensorflow dep, - # python/eager:monitoring tensorflow dep, - # python/ops/ragged:ragged_functional_ops tensorflow dep, - # python/ops/ragged:ragged_string_ops tensorflow dep, - # python/ops/ragged:ragged_tensor tensorflow dep, - ], -) - -py_test( - name = "wordpiece_tokenizer_test", - size = "small", - srcs = ["python/ops/wordpiece_tokenizer_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":wordpiece_tokenizer", - "@absl_py//absl/testing:parameterized", - # python:array_ops tensorflow dep, - # python:client_testlib tensorflow dep, - # python:dtypes tensorflow dep, - # python:framework_test_lib tensorflow dep, - # python:lookup_ops tensorflow dep, - # python:math_ops tensorflow dep, - # python/compat tensorflow dep, - # python/ops/ragged:ragged_factory_ops tensorflow dep, - ], -) - -py_library( - name = "wordshape_ops", - srcs = ["python/ops/wordshape_ops.py"], - deps = [ - # python:array_ops tensorflow dep, - # python:framework_ops tensorflow dep, - # python:string_ops tensorflow dep, - ], -) - -py_test( - name = "wordshape_ops_test", - size = "small", - srcs = ["python/ops/wordshape_ops_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":wordshape_ops", - # python:client_testlib tensorflow dep, - # python:framework_test_lib tensorflow dep, - ], -) - -############## -# Benchmarks # -############## - -# To run the benchmarks: -# bazel run -c opt benchmark_test_name -- --benchmark_filter=. - -# The --benchmark_filter flag specifies the list of benchmarks to run using a -# regular expression that is matched against the benchmark methods' name. -# e.g. --benchmark_filter="wordpiece" will run the Wordpiece Tokenizer benchmarks. - -py_library(name = "profiler_lib") - -py_library( - name = "benchmark_utils", - srcs = ["python/benchmarks/benchmark_utils.py"], - deps = [ - # numpy dep, - # tensorflow datasets dep, - # python:dtypes tensorflow dep, - # python:platform tensorflow dep, - # python:session tensorflow dep, - # python/data/ops:dataset_ops tensorflow dep, - # python/eager:context tensorflow dep, - # python/eager:def_function tensorflow dep, - ], -) - -py_test( - name = "tokenizers_benchmarks", - size = "medium", - srcs = ["python/benchmarks/tokenizers_benchmarks.py"], - data = [ - ":python/benchmarks/test_data/uncased_L-12_H-768_A-12/vocab.txt", - ":python/ops/test_data/test_oss_model.model", - ":test_data_segmenter_hub_module", - ], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":benchmark_utils", - ":ops", - "@absl_py//absl:app", - "@absl_py//absl/testing:parameterized", - # python:dtypes tensorflow dep, - # python/user_ops:ops tensorflow dep, - ], -) - -py_test( - name = "ops_benchmarks", - size = "medium", - srcs = ["python/benchmarks/ops_benchmarks.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":benchmark_utils", - ":ops", - "@absl_py//absl:app", - "@absl_py//absl/testing:parameterized", - # python/user_ops:ops tensorflow dep, - ], -)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/__init__.py deleted file mode 100644 index ee18108..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/__init__.py +++ /dev/null
@@ -1,91 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Various tensorflow ops related to text-processing.""" -from tensorflow.python.util.all_util import remove_undocumented - -# pylint: disable=wildcard-import -from tensorflow_text.core.pybinds import tflite_registrar -from tensorflow_text.python import keras -from tensorflow_text.python import metrics -from tensorflow_text.python.ops import * - - -# Public symbols in the "tensorflow_text" package. Symbols are sorted in -# increasing order of their lowercase version. -_allowed_symbols = [ - "BertTokenizer", - "Detokenizer", - "FastWordpieceTokenizer", - "FirstNItemSelector", - "HubModuleSplitter", - "HubModuleTokenizer", - "MaskValuesChooser", - "RandomItemSelector", - "Reduction", - "RegexSplitter", - "RoundRobinTrimmer", - "SentencepieceTokenizer", - "SplitMergeFromLogitsTokenizer", - "SplitMergeTokenizer", - "Splitter", - "SplitterWithOffsets", - "StateBasedSentenceBreaker", - "Tokenizer", - "TokenizerWithOffsets", - "UnicodeCharTokenizer", - "UnicodeScriptTokenizer", - "WaterfallTrimmer", - "WhitespaceTokenizer", - "WordShape", - "WordpieceTokenizer", - "build_fast_wordpiece_model", - "case_fold_utf8", - "coerce_to_structurally_valid_utf8", - "combine_segments", - "find_source_offsets", - "gather_with_default", - "greedy_constrained_sequence", - "keras", - "mask_language_model", - "max_spanning_tree", - "max_spanning_tree_gradient", - "metrics", - "ngrams", - "normalize_utf8", - "normalize_utf8_with_offsets_map", - "pad_along_dimension", - "pad_model_inputs", - "regex_split", - "regex_split_with_offsets", - "sentence_fragments", - "sliding_window", - "span_alignment", - "span_overlaps", - "tflite_registrar", - "viterbi_constrained_sequence", - "wordshape", -] - -tflite_registrar.SELECT_TFTEXT_OPS = [ - tflite_registrar.AddFastWordpieceTokenize, - tflite_registrar.AddFastWordpieceDetokenize, - tflite_registrar.AddNgramsStringJoin, - tflite_registrar.AddRaggedTensorToTensor, - tflite_registrar.AddWhitespaceTokenize -] - -remove_undocumented(__name__, _allowed_symbols) -__version__ = "2.7.3"
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/BUILD b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/BUILD deleted file mode 100644 index d93485c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/BUILD +++ /dev/null
@@ -1,912 +0,0 @@ -# Kernels for tf.text ops. -# [internal] load cc_proto_library.bzl -load("@flatbuffers//:build_defs.bzl", "flatbuffer_cc_library") -load("//tensorflow_text:tftext.bzl", "tf_cc_library") - -licenses(["notice"]) - -# Visibility rules -package(default_visibility = ["//visibility:public"]) - -exports_files(["LICENSE"]) - -tf_cc_library( - name = "constrained_sequence", - srcs = ["constrained_sequence.cc"], - hdrs = ["constrained_sequence.h"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - ], -) - -tf_cc_library( - name = "constrained_sequence_kernel", - srcs = ["constrained_sequence_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - ], - deps = [ - ":constrained_sequence", - "@com_google_absl//absl/base:core_headers", - ], -) - -cc_test( - name = "constrained_sequence_kernel_input_validation_test", - srcs = ["constrained_sequence_kernel_input_validation_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:constrained_sequence_op_cc", - ], -) - -cc_test( - name = "exp_greedy_constrained_sequence_kernel_test", - srcs = ["exp_greedy_constrained_sequence_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:constrained_sequence_op_cc", - ], -) - -cc_test( - name = "exp_viterbi_constrained_sequence_kernel_test", - srcs = ["exp_viterbi_constrained_sequence_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:constrained_sequence_op_cc", - ], -) - -cc_test( - name = "log_greedy_constrained_sequence_kernel_test", - srcs = ["log_greedy_constrained_sequence_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:constrained_sequence_op_cc", - ], -) - -cc_test( - name = "log_viterbi_constrained_sequence_kernel_test", - srcs = ["log_viterbi_constrained_sequence_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:constrained_sequence_op_cc", - ], -) - -cc_library( - name = "darts_clone_trie_builder", - srcs = [ - "darts_clone_trie_builder.cc", - ], - hdrs = [ - "darts_clone_trie_builder.h", - ], - deps = [ - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/status:statusor", - "@darts_clone", - ], -) - -cc_library( - name = "darts_clone_trie_wrapper", - hdrs = [ - "darts_clone_trie_wrapper.h", - ], - deps = [ - "@com_google_absl//absl/status:statusor", - ], -) - -cc_test( - name = "darts_clone_trie_test", - size = "small", - srcs = ["darts_clone_trie_test.cc"], - deps = [ - ":darts_clone_trie_builder", - ":darts_clone_trie_wrapper", - "@com_google_absl//absl/status", - "@com_google_googletest//:gtest_main", - ], -) - -tf_cc_library( - name = "disjoint_set_forest", - hdrs = ["disjoint_set_forest.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], -) - -cc_test( - name = "disjoint_set_forest_test", - size = "small", - srcs = ["disjoint_set_forest_test.cc"], - deps = [ - ":disjoint_set_forest", - "@com_google_googletest//:gtest_main", - ], -) - -tf_cc_library( - name = "fast_wordpiece_tokenizer", - srcs = ["fast_wordpiece_tokenizer.cc"], - hdrs = [ - "fast_wordpiece_tokenizer.h", - ], - deps = [ - ":darts_clone_trie_wrapper", - ":fast_wordpiece_tokenizer_model", - ":fast_wordpiece_tokenizer_utils", - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@icu//:nfkc", - # lite/kernels/shim:status_macros tensorflow dep, - ], -) - -cc_test( - name = "fast_wordpiece_tokenizer_test", - srcs = ["fast_wordpiece_tokenizer_test.cc"], - data = [ - "//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb", - ], - deps = [ - ":fast_wordpiece_tokenizer", - ":fast_wordpiece_tokenizer_model_builder", - "@com_google_googletest//:gtest_main", - "@com_google_absl//absl/flags:flag", - # tf:lib tensorflow dep, - ], -) - -flatbuffer_cc_library( - name = "fast_wordpiece_tokenizer_model", - srcs = [ - "fast_wordpiece_tokenizer_model.fbs", - ], -) - -tf_cc_library( - name = "fast_wordpiece_tokenizer_model_builder", - srcs = ["fast_wordpiece_tokenizer_model_builder.cc"], - hdrs = [ - "fast_wordpiece_tokenizer_model_builder.h", - ], - deps = [ - ":darts_clone_trie_builder", - ":darts_clone_trie_wrapper", - ":fast_wordpiece_tokenizer_model", - ":fast_wordpiece_tokenizer_utils", - ":sentence_fragmenter_v2", - ":wordpiece_tokenizer", - "@com_google_absl//absl/container:flat_hash_set", - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@icu//:nfkc", - # lite/kernels/shim:status_macros tensorflow dep, - ], -) - -tf_cc_library( - name = "fast_wordpiece_tokenizer_kernel", - srcs = ["fast_wordpiece_tokenizer_kernel.cc"], - hdrs = ["fast_wordpiece_tokenizer_kernel.h"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":fast_wordpiece_tokenizer_kernel_template", - # lite/kernels/shim:tf_op_shim tensorflow dep, - ], -) - -cc_library( - name = "fast_wordpiece_tokenizer_kernel_template", - hdrs = ["fast_wordpiece_tokenizer_kernel_template.h"], - deps = [ - ":fast_wordpiece_tokenizer", - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - # lite/kernels/shim:op_kernel tensorflow dep, - # lite/kernels/shim:status_macros tensorflow dep, - ], -) - -cc_library( - name = "fast_wordpiece_tokenizer_tflite", - srcs = ["fast_wordpiece_tokenizer_tflite.cc"], - hdrs = ["fast_wordpiece_tokenizer_tflite.h"], - deps = [ - ":fast_wordpiece_tokenizer_kernel_template", - "@org_tensorflow//tensorflow/core/util:ragged_to_dense_util_common", - "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:lib", - "@org_tensorflow//tensorflow/lite/c:common", - "@org_tensorflow//tensorflow/lite/kernels/shim:tflite_op_shim", - "@org_tensorflow//tensorflow/lite:mutable_op_resolver", - # lite:mutable_op_resolver tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels/shim:tflite_op_shim tensorflow dep, - ], -) - -cc_library( - name = "fast_wordpiece_tokenizer_utils", - hdrs = [ - "fast_wordpiece_tokenizer_utils.h", - ], - deps = [ - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/strings", - "@icu//:nfkc", - ], -) - -cc_test( - name = "fast_wordpiece_tokenizer_utils_test", - srcs = ["fast_wordpiece_tokenizer_utils_test.cc"], - deps = [ - ":fast_wordpiece_tokenizer_utils", - "@com_google_googletest//:gtest_main", - ], -) - -tf_cc_library( - name = "mst_op_kernels", - srcs = ["mst_op_kernels.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - ":mst_solver", - ], -) - -tf_cc_library( - name = "mst_solver", - hdrs = ["mst_solver.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], - deps = [ - ":disjoint_set_forest", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - ], -) - -cc_test( - name = "mst_solver_test", - size = "small", - srcs = ["mst_solver_test.cc"], - deps = [ - ":mst_solver", - "@com_google_googletest//:gtest", - "@com_google_googletest//:gtest_main", - # tf:test tensorflow dep, - ], -) - -cc_test( - name = "mst_solver_random_comparison_test", - size = "small", - timeout = "long", - srcs = ["mst_solver_random_comparison_test.cc"], - tags = [ - "nofastbuild", # exclude from non-opt TAP projects - "optonly", # exclude from non-opt TAP projects - ], - deps = [ - ":mst_solver", - ":spanning_tree_iterator", - "@com_google_googletest//:gtest", # google-only - "@com_google_googletest//:gtest_main", - "@com_google_absl//absl/flags:flag", - # tf:lib tensorflow dep, - ], -) - -proto_library( - name = "edit_changes_proto", - srcs = ["edit_changes.proto"], -) - -cc_proto_library( - name = "edit_changes_cc_proto", - deps = [":edit_changes_proto"], -) - -tf_cc_library( - name = "normalize_kernels", - srcs = ["normalize_kernels.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":edit_changes_cc_proto", - "//third_party/icu/data:icu_normalization_data", - "@com_google_absl//absl/strings", - "@icu//:nfkc", - "@icu//:nfkc_cf", - ], -) - -tf_cc_library( - name = "ngrams_kernel_template", - hdrs = ["ngrams_kernel_template.h"], - tf_deps = [ - # tf/platform:tstring tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/status", - "@com_google_absl//absl/strings", - # lite/kernels/shim:op_kernel tensorflow dep, - # lite/kernels/shim:status_macros tensorflow dep, - # lite/kernels/shim:tensor_view tensorflow dep, - ], -) - -tf_cc_library( - name = "ngrams_kernel", - srcs = ["ngrams_kernel.cc"], - hdrs = ["ngrams_kernel.h"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":ngrams_kernel_template", - # lite/kernels/shim:tf_op_shim tensorflow dep, - ], -) - -tf_cc_library( - name = "ngrams_tflite", - srcs = ["ngrams_tflite.cc"], - hdrs = ["ngrams_tflite.h"], - deps = [ - ":ngrams_kernel_template", - "@org_tensorflow//tensorflow/core/util:ragged_to_dense_util_common", - "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:lib", - "@org_tensorflow//tensorflow/lite/c:common", - "@org_tensorflow//tensorflow/lite/kernels/shim:tflite_op_shim", - "@org_tensorflow//tensorflow/lite:mutable_op_resolver", - # lite:context tensorflow dep, - # lite:mutable_op_resolver tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels/shim:tflite_op_shim tensorflow dep, - ], -) - -cc_test( - name = "ngrams_tflite_test", - srcs = ["ngrams_tflite_test.cc"], - deps = [ - ":ngrams_tflite", - "@com_google_googletest//:gtest_main", - "@flatbuffers", - # lite:string_util tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels:test_util tensorflow dep, - # lite/schema:schema_fbs tensorflow dep, - ], -) - -cc_library( - name = "ragged_tensor_to_tensor_tflite", - srcs = ["ragged_tensor_to_tensor_tflite.cc"], - hdrs = ["ragged_tensor_to_tensor_tflite.h"], - deps = [ - "@flatbuffers", - "@org_tensorflow//tensorflow/core/util:ragged_to_dense_util_common", - "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:lib", - "@org_tensorflow//tensorflow/lite/c:common", - "@org_tensorflow//tensorflow/lite/kernels/shim:tflite_op_shim", - "@org_tensorflow//tensorflow/lite:mutable_op_resolver", - # tf/util:ragged_to_dense_util_common tensorflow dep, - # lite:framework tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels:kernel_util tensorflow dep, - # lite/kernels/internal:tensor tensorflow dep, - # lite/kernels/internal:types tensorflow dep, - ], -) - -cc_test( - name = "ragged_tensor_to_tensor_tflite_test", - srcs = ["ragged_tensor_to_tensor_tflite_test.cc"], - deps = [ - ":ragged_tensor_to_tensor_tflite", - "@com_google_googletest//:gtest_main", - "@flatbuffers", - # lite:framework tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels:test_util tensorflow dep, - # lite/kernels/internal:tensor tensorflow dep, - # lite/schema:schema_fbs tensorflow dep, - ], -) - -tf_cc_library( - name = "regex_split", - srcs = ["regex_split.cc"], - hdrs = ["regex_split.h"], - deps = [ - "@com_google_absl//absl/strings", - "@com_googlesource_code_re2//:re2", - ], -) - -tf_cc_library( - name = "regex_split_kernels", - srcs = ["regex_split_kernels.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - ":regex_split", - "@com_google_absl//absl/memory", - ], -) - -cc_test( - name = "regex_split_test", - srcs = ["regex_split_test.cc"], - deps = [ - ":regex_split", - "@com_google_googletest//:gtest_main", - "@com_google_absl//absl/strings", - "@com_googlesource_code_re2//:re2", - # tf:lib tensorflow dep, - ], -) - -tf_cc_library( - name = "rouge_l_kernel", - srcs = ["rouge_l_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/strings", - ], -) - -cc_test( - name = "rouge_l_kernel_test", - size = "small", - srcs = ["rouge_l_kernel_test.cc"], - deps = [ - ":rouge_l_kernel", - # tf:framework tensorflow dep, - # tf:test tensorflow dep, - # tf:test_main tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:text_similarity_metric_ops_cc", - ], -) - -tf_cc_library( - name = "sentence_breaking_kernels", - srcs = ["sentence_breaking_kernels.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":sentence_breaking_utils", - ":sentence_fragmenter", - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -tf_cc_library( - name = "sentence_breaking_utils", - srcs = ["sentence_breaking_utils.cc"], - hdrs = ["sentence_breaking_utils.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -cc_test( - name = "sentence_breaking_utils_test", - size = "small", - srcs = ["sentence_breaking_utils_test.cc"], - deps = [ - ":sentence_breaking_utils", - "@com_google_googletest//:gtest", - "@com_google_googletest//:gtest_main", - "@icu//:common", - ], -) - -tf_cc_library( - name = "sentence_fragmenter", - srcs = ["sentence_fragmenter.cc"], - hdrs = ["sentence_fragmenter.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], - deps = [ - ":sentence_breaking_utils", - ], -) - -tf_cc_library( - name = "sentence_breaking_kernels_v2", - srcs = ["sentence_breaking_kernels_v2.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":sentence_fragmenter_v2", - "@com_google_absl//absl/strings", - ], -) - -tf_cc_library( - name = "sentence_fragmenter_v2", - srcs = ["sentence_fragmenter_v2.cc"], - hdrs = ["sentence_fragmenter_v2.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -cc_test( - name = "sentence_fragmenter_v2_test", - srcs = ["sentence_fragmenter_v2_test.cc"], - deps = [ - ":sentence_fragmenter_v2", - "@com_google_absl//absl/strings", - "@com_google_googletest//:gtest_main", - "@icu//:common", - ], -) - -tf_cc_library( - name = "sentencepiece_kernels", - srcs = ["sentencepiece_kernels.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:framework_headers_lib tensorflow dep, - # tf:lib tensorflow dep, - # tf:protos_all_cc tensorflow dep, - ], - deps = [ - "@com_google_sentencepiece//src:sentencepiece_cc_proto", # Old target - "@com_google_sentencepiece//src:sentencepiece_model_cc_proto", # Old target - "@com_google_sentencepiece//src:sentencepiece_processor", # Old target - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/meta:type_traits", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/types:span", - ], -) - -tf_cc_library( - name = "spanning_tree_iterator", - testonly = 1, - srcs = ["spanning_tree_iterator.cc"], - hdrs = ["spanning_tree_iterator.h"], - tf_deps = [ - # tf:lib tensorflow dep, - ], -) - -cc_test( - name = "spanning_tree_iterator_test", - size = "small", - srcs = ["spanning_tree_iterator_test.cc"], - deps = [ - ":spanning_tree_iterator", - "@com_google_googletest//:gtest_main", - # tf:lib tensorflow dep, - ], -) - -tf_cc_library( - name = "split_merge_tokenize_kernel", - srcs = ["split_merge_tokenize_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -cc_library( - name = "text_kernels_test_util", - testonly = 1, - srcs = ["text_kernels_test_util.cc"], - hdrs = ["text_kernels_test_util.h"], - deps = [ - "@com_google_googletest//:gtest", - # tf:framework tensorflow dep, - # tf:testlib tensorflow dep, - ], -) - -tf_cc_library( - name = "tflite_ops", - hdrs = [ - "fast_wordpiece_tokenizer_tflite.h", - "ngrams_tflite.h", - "ragged_tensor_to_tensor_tflite.h", - "whitespace_tokenizer_tflite.h", - ], - deps = [ - ":fast_wordpiece_tokenizer_tflite", - ":ngrams_tflite", - ":ragged_tensor_to_tensor_tflite", - ":whitespace_tokenizer_tflite", - ], -) - -tf_cc_library( - name = "tokenizer_from_logits_kernel", - srcs = ["tokenizer_from_logits_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - "@com_google_absl//absl/base:core_headers", - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -tf_cc_library( - name = "unicode_script_tokenize_kernel", - srcs = ["unicode_script_tokenize_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - "@icu//:common", - ], -) - -cc_test( - name = "unicode_script_tokenize_kernel_test", - srcs = ["unicode_script_tokenize_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:unicode_script_tokenizer_cc", - ], -) - -tf_cc_library( - name = "whitespace_tokenize_kernel", - srcs = ["whitespace_tokenize_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - "@icu//:common", - ], -) - -cc_test( - name = "whitespace_tokenize_kernel_test", - srcs = ["whitespace_tokenize_kernel_test.cc"], - deps = [ - ":text_kernels_test_util", - "@com_google_googletest//:gtest_main", - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - # tf:test tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:whitespace_tokenizer_cc", - ], -) - -cc_library( - name = "whitespace_tokenizer", - srcs = ["whitespace_tokenizer.cc"], - hdrs = ["whitespace_tokenizer.h"], - deps = [ - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -cc_test( - name = "whitespace_tokenizer_test", - size = "small", - srcs = ["whitespace_tokenizer_test.cc"], - deps = [ - ":whitespace_tokenizer", - ":whitespace_tokenizer_config_builder", - "@com_google_googletest//:gtest_main", - "@com_google_absl//absl/flags:flag", - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - # tf:lib tensorflow dep, - # tf:test_main tensorflow dep, - ], -) - -tf_cc_library( - name = "whitespace_tokenizer_kernel", - srcs = ["whitespace_tokenizer_kernel.cc"], - hdrs = ["whitespace_tokenizer_kernel.h"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":whitespace_tokenizer_kernel_template", - # lite/kernels/shim:op_kernel tensorflow dep, - # lite/kernels/shim:tf_op_shim tensorflow dep, - ], -) - -tf_cc_library( - name = "whitespace_tokenizer_kernel_template", - hdrs = ["whitespace_tokenizer_kernel_template.h"], - tf_deps = [ - # tf:framework tensorflow dep, - ], - deps = [ - ":whitespace_tokenizer", - "@com_google_absl//absl/status", - "@com_google_absl//absl/status:statusor", - # lite/kernels/shim:op_kernel tensorflow dep, - # lite/kernels/shim:shape tensorflow dep, - # lite/kernels/shim:tensor_view tensorflow dep, - ], -) - -tf_cc_library( - name = "whitespace_tokenizer_tflite", - srcs = ["whitespace_tokenizer_tflite.cc"], - hdrs = ["whitespace_tokenizer_tflite.h"], - deps = [ - ":whitespace_tokenizer_kernel_template", - "@org_tensorflow//tensorflow/core/util:ragged_to_dense_util_common", - "@org_tensorflow//tensorflow/core:framework", - "@org_tensorflow//tensorflow/core:lib", - "@org_tensorflow//tensorflow/lite/c:common", - "@org_tensorflow//tensorflow/lite/kernels/shim:tflite_op_shim", - "@org_tensorflow//tensorflow/lite:mutable_op_resolver", - # lite:mutable_op_resolver tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels/shim:tflite_op_shim tensorflow dep, - ], -) - -cc_library( - name = "whitespace_tokenizer_config_builder", - srcs = ["whitespace_tokenizer_config_builder.cc"], - hdrs = ["whitespace_tokenizer_config_builder.h"], - deps = [ - "@icu//:common", - ], -) - -cc_test( - name = "whitespace_tokenizer_config_builder_test", - size = "small", - srcs = ["whitespace_tokenizer_config_builder_test.cc"], - deps = [ - ":whitespace_tokenizer", - ":whitespace_tokenizer_config_builder", - "@com_google_googletest//:gtest_main", - "@icu//:common", - # tf:lib tensorflow dep, - # tf:test_main tensorflow dep, - ], -) - -tf_cc_library( - name = "wordpiece_kernel", - srcs = ["wordpiece_kernel.cc"], - tf_deps = [ - # tf:framework tensorflow dep, - # tf:lib tensorflow dep, - ], - deps = [ - ":wordpiece_tokenizer", - "@com_google_absl//absl/base:core_headers", - ], -) - -tf_cc_library( - name = "wordpiece_tokenizer", - srcs = ["wordpiece_tokenizer.cc"], - hdrs = ["wordpiece_tokenizer.h"], - deps = [ - "@com_google_absl//absl/strings", - "@icu//:common", - ], -) - -cc_test( - name = "wordpiece_kernel_test", - size = "small", - srcs = ["wordpiece_kernel_test.cc"], - deps = [ - ":wordpiece_kernel", - # tf:framework tensorflow dep, - # tf:test tensorflow dep, - # tf:test_main tensorflow dep, - # tf:testlib tensorflow dep, - # tf/kernels:ops_testutil tensorflow dep, - "//tensorflow_text:wordpiece_tokenizer_cc", - ], -)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/LICENSE b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/LICENSE deleted file mode 100644 index ccd61dc..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/LICENSE +++ /dev/null
@@ -1,203 +0,0 @@ - - Apache License - Version 2.0, January 2004 - https://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - https://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc deleted file mode 100644 index 07c1e68..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.cc +++ /dev/null
@@ -1,451 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/constrained_sequence.h" - -#include <algorithm> -#include <iterator> -#include <limits> -#include <vector> - -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/types.h" - -namespace tensorflow { -namespace text { - -// State index to use if the sequence in question requires an impossible -// transition. -constexpr int kErrorState = -1; - -ScoreAccessor::ScoreAccessor(const Tensor& score_tensor, - const Tensor& lengths_tensor) { - data_ = score_tensor.flat<float>().data(); - if (lengths_tensor.dtype() == DT_INT64) { - use_long_lengths_ = true; - long_lengths_ = lengths_tensor.flat<int64>().data(); - } else { - use_long_lengths_ = false; - lengths_ = lengths_tensor.flat<int>().data(); - } - has_explicit_batch_ = (score_tensor.shape().dims() == 3); - if (has_explicit_batch_) { - batch_size_ = score_tensor.shape().dim_size(0); - num_steps_ = score_tensor.shape().dim_size(1); - num_scores_ = score_tensor.shape().dim_size(2); - } else { - batch_size_ = 1; - num_steps_ = score_tensor.shape().dim_size(0); - num_scores_ = score_tensor.shape().dim_size(1); - } - batch_offset_ = num_scores_ * num_steps_; - step_offset_ = num_scores_; -} - -// Get a score out of the data tensor. -float ScoreAccessor::GetScore(int batch_idx, - int step_idx, - int score_idx) const { - DCHECK_LE(batch_idx, batch_size_); - DCHECK_LE(step_idx, num_steps_); - DCHECK_LE(score_idx, num_scores_); - return data_[batch_offset_ * batch_idx + step_offset_ * step_idx + score_idx]; -} - -int64 ScoreAccessor::GetLength(int batch_idx) const { - DCHECK_LE(batch_idx, batch_size_); - if (use_long_lengths_) { - return long_lengths_[batch_idx]; - } else { - return lengths_[batch_idx]; - } -} - -int ScoreAccessor::batch_size() const { - return batch_size_; -} -int ScoreAccessor::num_steps() const { - return num_steps_; -} -int ScoreAccessor::num_scores() const { - return num_scores_; -} -bool ScoreAccessor::has_explicit_batch() const { - return has_explicit_batch_; -} - -// Perform Viterbi analysis on a single batch item. -void ViterbiAnalysis( - const ScoreAccessor& scores, - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - const int batch, - bool use_log_space, - bool use_start_end_states, - int32* output_data) { - VLOG(2) << "Analyzing batch " << batch; - const bool has_transition_weights = transition_weights.size() != 0; - const bool has_allowed_transitions = allowed_transitions.size() != 0; - const int num_states = scores.num_scores(); - const int out_of_bounds_index = num_states; - - int64 num_steps = scores.GetLength(batch); - - // Create two vectors to hold scores. These will be bound to referents later - // so the names here are somewhat irrelevant. - std::vector<double> scores_a(num_states, - std::numeric_limits<float>::lowest()); - std::vector<double> scores_b(num_states, - std::numeric_limits<float>::lowest()); - - // Create a chart of backpointers. Include rows for [start] and [end] - // transitions. By initializing this to kErrorState, we ensure unreachable - // transitions get marked as errors. - std::vector<std::vector<int>> backpointers( - num_steps, std::vector<int>(num_states, kErrorState)); - - // Set current and previous references for step 0 - std::vector<double>* previous_scores = &scores_a; - std::vector<double>* current_scores = &scores_b; - - const bool vlog3 = VLOG_IS_ON(3); - for (int curr_state = 0; curr_state < num_states; ++curr_state) { - std::vector<int>& current_bps = backpointers[0]; - if (use_start_end_states) { - // Initialize the zeroth step BPs to kOutOfBoundsIndex for all states - // where the OOB->state transition is valid, and set scores as needed. - if (has_allowed_transitions && - !allowed_transitions(out_of_bounds_index, curr_state)) { - if (vlog3) { - LOG(INFO) << "(" << batch << ", 0, [START]->" << curr_state - << "): disallowed."; - } - continue; - } - - // Because the backpointer vectors are initialized to kErrorState, we - // need only to set the valid transition paths to have come from the - // padding state. - current_bps[curr_state] = out_of_bounds_index; - - // For valid transitions, get the score (and adjust as appropriate). - const int step = 0; - float current_score = scores.GetScore(batch, step, curr_state); - if (has_transition_weights) { - if (use_log_space) { - current_score += transition_weights(out_of_bounds_index, curr_state); - } else { - current_score *= transition_weights(out_of_bounds_index, curr_state); - } - } - - if (vlog3) { - if (has_transition_weights) { - LOG(INFO) << "(" << batch << ", " << step << ", [START]->" - << curr_state << "): Total score: " << current_score - << " (raw: " << scores.GetScore(batch, step, curr_state) - << ", tw: " - << transition_weights(out_of_bounds_index, curr_state) - << ")"; - } else { - LOG(INFO) << "(" << batch << ", " << step << ", [START]->" - << curr_state << "): Total score: " << current_score - << " (raw: " << scores.GetScore(batch, step, curr_state) - << ")"; - } - } - - current_scores->at(curr_state) = current_score; - } else { - // If we don't have specific start and end states, all bp's are valid - // and all starting scores are the unadjusted step 0 scores. - current_bps[curr_state] = out_of_bounds_index; - const int step = 0; - current_scores->at(curr_state) = scores.GetScore(batch, step, curr_state); - } - } - - // Update the current scores (and normalize if we're not in log space). - if (!use_log_space) { - const double max_score = - *std::max_element(current_scores->begin(), current_scores->end()); - if (max_score > 0) { - for (double& score : *current_scores) - score /= max_score; - } - } - - // Swap current and previous score arrays, as we are advancing a step. - std::vector<double>* tmp = previous_scores; - previous_scores = current_scores; - current_scores = tmp; - - // Handle all steps save for the first and last in this loop. - for (int step = 1; step < num_steps; ++step) { - const std::vector<int>& previous_bps = backpointers[step - 1]; - std::vector<int>& current_bps = backpointers[step]; - - for (int curr_state = 0; curr_state < num_states; ++curr_state) { - int best_source_state = kErrorState; - float best_score = std::numeric_limits<float>::lowest(); - for (int prev_state = 0; prev_state < num_states; ++prev_state) { - // If the previous state was an error state, pass to the next state. - if (previous_bps[prev_state] == kErrorState) { - if (vlog3) { - LOG(INFO) << "(" << batch << ", " << step << ", " << prev_state - << "->" << curr_state << "): prev state error."; - } - continue; - } - - // If this is not a permitted transition, continue. - if (has_allowed_transitions && - !allowed_transitions(prev_state, curr_state)) { - if (vlog3) { - LOG(INFO) << "(" << batch << ", " << step << ", " << prev_state - << "->" << curr_state << "): disallowed."; - } - continue; - } - - float current_score = scores.GetScore(batch, step, curr_state); - if (use_log_space) { - current_score += previous_scores->at(prev_state); - } else { - current_score *= previous_scores->at(prev_state); - } - if (has_transition_weights) { - if (use_log_space) { - current_score += transition_weights(prev_state, curr_state); - } else { - current_score *= transition_weights(prev_state, curr_state); - } - } - - if (vlog3) { - if (has_transition_weights) { - LOG(INFO) << "(" << batch << ", " << step << ", " << prev_state - << "->" << curr_state - << "): Total score: " << current_score - << " (prev: " << previous_scores->at(prev_state) - << ", raw: " << scores.GetScore(batch, step, curr_state) - << ", tw: " << transition_weights(prev_state, curr_state) - << ")"; - } else { - LOG(INFO) << "(" << batch << ", " << step << ", " << prev_state - << "->" << curr_state - << "): Total score: " << current_score - << " (prev: " << previous_scores->at(prev_state) - << ", raw: " << scores.GetScore(batch, step, curr_state) - << ")"; - } - } - - if (current_score >= best_score) { - best_source_state = prev_state; - best_score = current_score; - } - } - current_bps[curr_state] = best_source_state; - current_scores->at(curr_state) = best_score; - } - - // Normalize if we're not in log space. - if (!use_log_space) { - const double max_score = - *std::max_element(current_scores->begin(), current_scores->end()); - if (max_score > 0) { - for (double& score : *current_scores) - score /= max_score; - } - } - - // After each step, switch the current scores to the previous scores and - // use the previous previous scores as the current scores. - std::vector<double>* tmp = previous_scores; - previous_scores = current_scores; - current_scores = tmp; - } - - // Handle the final transition out of the sequence. - int final_state = out_of_bounds_index; - const std::vector<int>& previous_bps = backpointers[num_steps - 1]; - int best_source_state = kErrorState; - float final_score = std::numeric_limits<float>::lowest(); - - for (int prev_state = 0; prev_state < num_states; ++prev_state) { - // If the previous state was an error state, pass to the next state. - if (previous_bps[prev_state] == kErrorState) { - current_scores->at(prev_state) = std::numeric_limits<float>::lowest(); - if (vlog3) { - LOG(INFO) << "(" << batch << ", " << num_steps << ", " << prev_state - << "->[END]): prev state error."; - } - continue; - } - - // If this is not a permitted transition, continue. - if (has_allowed_transitions && use_start_end_states && - !allowed_transitions(prev_state, final_state)) { - current_scores->at(prev_state) = std::numeric_limits<float>::lowest(); - if (vlog3) { - LOG(INFO) << "(" << batch << ", " << num_steps << ", " << prev_state - << "->[END]): disallowed."; - } - continue; - } - - // Weight the final transition score by the probability of exiting the - // sequence as well. - float current_score = previous_scores->at(prev_state); - if (use_start_end_states) { - if (has_transition_weights) { - if (use_log_space) { - current_score += transition_weights(prev_state, final_state); - } else { - current_score *= transition_weights(prev_state, final_state); - } - } - - if (vlog3) { - if (has_transition_weights) { - LOG(INFO) << "(" << batch << ", " << num_steps << ", " << prev_state - << "->[END]): Total score: " << current_score - << " (prev: " << previous_scores->at(prev_state) - << ", tw: " << transition_weights(prev_state, final_state) - << ")"; - } else { - LOG(INFO) << "(" << batch << ", " << num_steps << ", " << prev_state - << "->[END]): Total score: " << current_score - << " (prev: " << previous_scores->at(prev_state) << ")"; - } - } - } - - current_scores->at(prev_state) = current_score; - if (current_score >= final_score) { - best_source_state = prev_state; - final_score = current_score; - } - } - - if (vlog3) { - LOG(INFO) << "Final score: " << final_score; - } - - // Calculate the path. - if (best_source_state == kErrorState) { - // If the best source is an error state, the path is unknowable. Report - // error states for the whole sequence. - for (int64 i = 0; i < scores.GetLength(batch); ++i) { - output_data[i] = kErrorState; - } - } else { - // If the best source is a 'real' state, report the state path. - int steps_to_report = scores.GetLength(batch); - int previous_state = best_source_state; - for (int64 i = steps_to_report - 1; i >= 0; --i) { - output_data[i] = previous_state; - previous_state = backpointers[i][previous_state]; - } - } -} - -void GreedyAnalysis( - const ScoreAccessor& scores, - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - int batch, - bool use_log_space, - bool use_start_end_states, - int32* output_data) { - const bool has_transition_weights = transition_weights.size() != 0; - const bool has_allowed_transitions = allowed_transitions.size() != 0; - const int num_states = scores.num_scores(); - const int out_of_bounds_index = num_states; - int64 num_steps = scores.GetLength(batch); - - for (int step = 0; step < num_steps; ++step) { - // Do final step calculations if this is the final step in the sequence - // and we are calculating based on implicit start and end states. - bool do_final_step = - (step == scores.GetLength(batch) - 1) && use_start_end_states; - VLOG(2) << "is last step: " << do_final_step; - - const int previous_state = - (step == 0) ? (out_of_bounds_index) : (output_data[step - 1]); - - if (previous_state == kErrorState) { - // If the previous state is the error state, the current state must - // also be the error state. - output_data[step] = kErrorState; - continue; - } - - // If no transition is possible, this will stay the error state. - int best_new_state = kErrorState; - float best_new_score = std::numeric_limits<float>::lowest(); - - for (int state = 0; state < num_states; ++state) { - float current_score = scores.GetScore(batch, step, state); - - // If we are not using start/end states AND step is 0, then - // current_score will not be altered. - if (use_start_end_states || step > 0) { - if (has_allowed_transitions) { - // If either the transition from the previous state to this state - // is disallowed, or we need to analyze the final step and the - // transition from this state to the final step is not allowed, - // disallow this transition. - if (!allowed_transitions(previous_state, state) || - (do_final_step && - !allowed_transitions(state, out_of_bounds_index))) { - continue; - } - } - - if (has_transition_weights) { - if (use_log_space) { - current_score += transition_weights(previous_state, state); - } else { - current_score *= transition_weights(previous_state, state); - } - // On the last step, also analyze by the weight value of - // transitioning from this state to the out-of-bounds state. - if (do_final_step) { - if (use_log_space) { - current_score += transition_weights(state, out_of_bounds_index); - } else { - current_score *= transition_weights(state, out_of_bounds_index); - } - } - } - } - if (current_score >= best_new_score) { - best_new_state = state; - best_new_score = current_score; - } - } - output_data[step] = best_new_state; - VLOG(2) << "Best state for step " << step << " is " << output_data[step] - << " with score " << best_new_score; - } -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h deleted file mode 100644 index 6e473af..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence.h +++ /dev/null
@@ -1,96 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_CONSTRAINED_SEQUENCE_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_CONSTRAINED_SEQUENCE_H_ - -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/platform/types.h" - -namespace tensorflow { -namespace text { - -class ScoreAccessor { - public: - explicit ScoreAccessor(const Tensor& score_tensor, - const Tensor& lengths_tensor); - - // Get a score out of the data tensor. - float GetScore(int batch_idx, int step_idx, int score_idx) const; - - int64 GetLength(int batch_idx) const; - - int batch_size() const; - int num_steps() const; - int num_scores() const; - bool has_explicit_batch() const; - - private: - // A pointer into the underlying data of the score tensor. Not owned. - const float* data_; - - // A pointer into the underlying data of the lengths tensor. Not owned. - const int* lengths_; - const int64* long_lengths_; - - // Whether the passed lengths tensor is int32 or int64. - bool use_long_lengths_; - - // The batch size associated with the data tensor. - int batch_size_; - - // The number of steps in the data tensor. - int num_steps_; - - // The number of scores in the data tensor. - int num_scores_; - - // The amount to increase the offset within the flat data array if the batch - // index increases by 1. - int batch_offset_; - - // The amount to increase the offset within the flat data array if the step - // index increases by 1. - int step_offset_; - - // True if the original tensor had an explicit batch dimension (that is, - // it was of rank 3). - bool has_explicit_batch_; -}; - -// Perform Viterbi analysis on a single batch item. -void ViterbiAnalysis( - const ScoreAccessor& scores, - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - const int batch, - bool use_log_space, - bool use_start_end_states, - int32* output_data); - -// Perform a greedy analysis on a single batch item. -void GreedyAnalysis( - const ScoreAccessor& scores, - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - int batch, - bool use_log_space, - bool use_start_end_states, - int32* output_data); - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_CONSTRAINED_SEQUENCE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc deleted file mode 100644 index 869a5c3e5..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel.cc +++ /dev/null
@@ -1,263 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <limits> -#include <memory> -#include <string> -#include <vector> - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow_text/core/kernels/constrained_sequence.h" - -namespace tensorflow { - -using ::tensorflow::DataType; -using ::tensorflow::DEVICE_CPU; -using ::tensorflow::DT_BOOL; -using ::tensorflow::DT_FLOAT; -using ::tensorflow::OpKernel; -using ::tensorflow::OpKernelConstruction; -using ::tensorflow::OpKernelContext; -using ::tensorflow::Status; -using ::tensorflow::Tensor; -using ::tensorflow::TensorShape; -using ::tensorflow::errors::InvalidArgument; -using ::tensorflow::text::GreedyAnalysis; -using ::tensorflow::text::ScoreAccessor; -using ::tensorflow::text::ViterbiAnalysis; - -// State index to use if the sequence in question requires an impossible -// transition. -constexpr int kErrorState = -1; - -// State index to use when outputting a padded tensor and the sequence in -// question does not have a token for a given step. -constexpr int kPaddingState = -2; - -namespace { - -// Validate that a given constraint tensor is the proper shape (dimension -// 2, with shape [num_states + 1, num_states + 1]. -tensorflow::Status ValidateConstraintTensor(const Tensor& tensor, - const int num_states, - const bool use_start_end_states, - const string& name) { - if (tensor.shape().dims() != 2) { - return InvalidArgument( - tensorflow::strings::StrCat(name, " must be of rank 2")); - } - int expected_size = use_start_end_states ? num_states + 1 : num_states; - if (tensor.shape().dim_size(0) != expected_size) { - return InvalidArgument(tensorflow::strings::StrCat( - name, " must have a zeroth dimension of size ", expected_size, - " when num_states is ", num_states, " and use_start_and_end_states is ", - use_start_end_states)); - } - if (tensor.shape().dim_size(1) != expected_size) { - return InvalidArgument(tensorflow::strings::StrCat( - name, " must have a first dimension of size ", expected_size, - " when num_states is ", num_states, " and use_start_and_end_states is ", - use_start_end_states)); - } - return tensorflow::Status::OK(); -} - -} // namespace - -template <typename Tin, typename Tsplits> -class ConstrainedSequence : public OpKernel { - public: - explicit ConstrainedSequence(OpKernelConstruction* context) - : OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("use_viterbi", &use_viterbi_)); - OP_REQUIRES_OK(context, context->GetAttr("use_log_space", &use_log_space_)); - OP_REQUIRES_OK(context, context->GetAttr("use_start_and_end_states", - &use_start_end_states_)); - } - - void Compute(OpKernelContext* context) override { - const auto& score_tensor = context->input(0); - OP_REQUIRES(context, - (score_tensor.shape().dims() == 2) || - (score_tensor.shape().dims() == 3), - InvalidArgument("The score tensor must be of rank 2 or 3.")); - const auto& lengths_tensor = context->input(1); - - ScoreAccessor scores(score_tensor, lengths_tensor); - - // The scores tensor should be [batch, step, scores]. - const int batch_size = scores.batch_size(); - const int num_steps = scores.num_steps(); - const int num_scores = scores.num_scores(); - - OP_REQUIRES(context, lengths_tensor.NumElements() == batch_size, - InvalidArgument(tensorflow::strings::StrCat( - "There should be exactly one length for every batch " - "element. Found ", - lengths_tensor.NumElements(), - " length elements for a batch size of ", batch_size))); - - VLOG(2) << "batch: " << batch_size; - VLOG(2) << "steps: " << num_steps; - VLOG(2) << "score: " << num_scores; - - // Make sure there's enough data to advance every sequence. - int max_length = 0; - int total_length = 0; - for (int i = 0; i < batch_size; ++i) { - int64 length = scores.GetLength(i); - total_length += length; - if (length > max_length) { - max_length = length; - } - } - - OP_REQUIRES( - context, num_steps >= max_length, - InvalidArgument( - "The scores tensor is too short for the longest sequence length.")); - - // Validate the constraint tensors. - const auto& allowed_transitions_tensor = context->input(2); - bool has_allowed_transitions = - allowed_transitions_tensor.NumElements() != 0; - VLOG(4) << allowed_transitions_tensor.NumElements(); - if (has_allowed_transitions) { - OP_REQUIRES_OK(context, - ValidateConstraintTensor(allowed_transitions_tensor, - num_scores, use_start_end_states_, - "allowed_transitions")); - } - - const auto& transition_weights_tensor = context->input(3); - - VLOG(4) << transition_weights_tensor.NumElements(); - bool has_transition_weights = transition_weights_tensor.NumElements() != 0; - if (has_transition_weights) { - OP_REQUIRES_OK(context, ValidateConstraintTensor( - transition_weights_tensor, num_scores, - use_start_end_states_, "transition_weights")); - - // If we have transition weights in exp-space, all values must be non- - // negative. - if (!use_log_space_) { - for (int i = 0; i < transition_weights_tensor.NumElements(); ++i) { - OP_REQUIRES(context, transition_weights_tensor.flat<float>()(i) >= 0, - InvalidArgument("The transition weights tensor must not " - "contain negative values.")); - } - } - } - - const tensorflow::Tensor empty_float(DT_FLOAT, TensorShape({0, 0})); - const tensorflow::Tensor empty_bool(DT_BOOL, TensorShape({0, 0})); - - const auto& transition_weights = - has_transition_weights ? transition_weights_tensor.matrix<float>() - : empty_float.matrix<float>(); - - const auto& allowed_transitions = - has_allowed_transitions ? allowed_transitions_tensor.matrix<bool>() - : empty_bool.matrix<bool>(); - - Tensor* output; - OP_REQUIRES_OK(context, context->allocate_output( - 0, TensorShape({total_length}), &output)); - int32* output_data = output->flat<int32>().data(); - - Tensor* offsets; - OP_REQUIRES_OK(context, context->allocate_output( - 1, TensorShape({batch_size + 1}), &offsets)); - Tsplits* offset_data = offsets->flat<Tsplits>().data(); - offset_data[0] = 0; - - for (int batch = 0; batch < batch_size; ++batch) { - int step_offset = offset_data[batch]; - int64 num_steps = scores.GetLength(batch); - offset_data[batch + 1] = step_offset + num_steps; - if (use_viterbi_) { - DoViterbiAnalysis(transition_weights, allowed_transitions, batch, - scores, &output_data[step_offset]); - } else { - DoGreedyAnalysis(transition_weights, allowed_transitions, batch, scores, - &output_data[step_offset]); - } - } - } - - private: - // Perform Viterbi analysis on a single batch item. - void DoViterbiAnalysis( - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - const int batch, - const ScoreAccessor& scores, - int32* output_data) { - ViterbiAnalysis(scores, transition_weights, allowed_transitions, batch, - use_log_space_, use_start_end_states_, output_data); - } - - // Perform a greedy analysis on a single batch item. - void DoGreedyAnalysis( - const tensorflow::TTypes<const float>::Matrix& transition_weights, - const tensorflow::TTypes<const bool>::Matrix& allowed_transitions, - int batch, - const ScoreAccessor& scores, - int32* output_data) { - GreedyAnalysis(scores, transition_weights, allowed_transitions, batch, - use_log_space_, use_start_end_states_, output_data); - } - - // True if this op should perform calculations in log-space (using addition). - // If false, will perform calculations in normalized exp-space (using - // multiplication). - bool use_log_space_; - - // True if this op should calculate scores using the Viterbi algorithm. If - // false, will use a greedy algorithm. - bool use_viterbi_; - - // True if this op should calculate sequences based on an implicit start - // and end state. - bool use_start_end_states_; - - TF_DISALLOW_COPY_AND_ASSIGN(ConstrainedSequence); -}; - -#define REGISTER_KERNELS(Tin) \ - REGISTER_KERNEL_BUILDER(Name("ConstrainedSequence") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<Tin>("Tin") \ - .TypeConstraint<int32>("Tsplits"), \ - ConstrainedSequence<Tin, int32>); \ - REGISTER_KERNEL_BUILDER(Name("ConstrainedSequence") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<Tin>("Tin") \ - .TypeConstraint<int64>("Tsplits"), \ - ConstrainedSequence<Tin, int64>) - -REGISTER_KERNELS(int32); -REGISTER_KERNELS(int64); - -#undef REGISTER_KERNELS - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel_input_validation_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel_input_validation_test.cc deleted file mode 100644 index 13dc8c9..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/constrained_sequence_kernel_input_validation_test.cc +++ /dev/null
@@ -1,496 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { - -using tensorflow::DT_INT32; -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::MatrixEq; -using tensorflow::text_kernels_test_util::VectorEq; - -class ConstrainedSequenceInputValidationTest : public tensorflow::OpsTestBase { - public: - void SetUpOpWithDefaults(bool use_start_end, - tensorflow::DataType input_datatype) { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", input_datatype) - .Attr("use_viterbi", true) - .Attr("use_log_space", true) - .Attr("use_start_and_end_states", use_start_end) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } - - void SetUpOpWithStartEnd() { SetUpOpWithDefaults(true, DT_INT32); } - - void SetUpOpWithNoStartEnd() { SetUpOpWithDefaults(false, DT_INT32); } -}; -// TODO(b/122968457): There are a bunch of tests that only validate !ok instead -// of looking for specific error messages; fix that. - -// This test examines evaluations with only a permissions matrix. -TEST_F(ConstrainedSequenceInputValidationTest, WorksWithInt64InputLengths) { - // Prepare graph. - SetUpOpWithDefaults(true, DT_INT64); - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - std::vector<int64> input_lengths({1, 1, 1}); - AddInputFromArray<int64>(TensorShape({3}), input_lengths); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnOuterWrongSizePermissionMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({4, 5}), - { - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnInnerWrongSizePermissionMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 4}), - { - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnWrongRankPermissionMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({25}), - { - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnOuterWrongSizeWeightMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({4, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnInnerWrongSizeWeightMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 4}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, FailsOnWrongRankWeightMatrix) { - // Prepare graph. - SetUpOpWithStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({25}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -TEST_F(ConstrainedSequenceInputValidationTest, - PassesWithCorrectSizedWeightAndPermissionsMatrix) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({4, 4}), { - true, true, true, true, // - true, true, true, true, // - true, true, true, true, // - true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({4, 4}), {0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 1.0, 1.0}); - auto result = RunOpKernel(); - EXPECT_TRUE(result.ok()); -} - -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnOuterWrongSizePermissionMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({4, 5}), - { - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnInnerWrongSizePermissionMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 4}), - { - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - true, true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnWrongRankPermissionMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({16}), { - true, true, true, true, // - true, true, true, true, // - true, true, true, true, // - true, true, true, true, // - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnOuterWrongSizeWeightMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({4, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnInnerWrongSizeWeightMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 4}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} -TEST_F(ConstrainedSequenceInputValidationTest, - FailsOnWrongRankWeightMatrixWithNoStartEnd) { - // Prepare graph. - SetUpOpWithNoStartEnd(); - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 3.0, 4.0, // - 1.0, 12.0, 3.0, 4.0, // - 1.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({16}), {0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 1.0}); - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc deleted file mode 100644 index 73ac94f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.cc +++ /dev/null
@@ -1,100 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/darts_clone_trie_builder.h" - -#include <algorithm> -#include <numeric> - -#include "absl/container/flat_hash_set.h" -// #include "include/darts.h" - -namespace tensorflow { -namespace text { -namespace trie_utils { - -absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( - const std::vector<std::string>& keys) { - std::vector<int> values(keys.size()); - std::iota(values.begin(), values.end(), 0); - return BuildDartsCloneTrie(keys, values); -} - -absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( - const std::vector<std::string>& keys, - const std::vector<int>& values) { - if (keys.size() != values.size()) { - return absl::InvalidArgumentError(absl::StrCat( - "The sizes of 'keys' and 'values' must be equal! Keys size: ", - keys.size(), " . Values size: ", values.size())); - } - - { - // Make sure there are no duplicated elements or empty strings in 'keys'. - absl::flat_hash_set<absl::string_view> unique_keys; - for (const auto& key : keys) { - if (key.empty()) { - return absl::InvalidArgumentError( - "The empty string \"\" is found in 'keys', which is not " - "supported."); - } - if (!unique_keys.insert(key).second) { - return absl::InvalidArgumentError( - absl::StrCat("Duplicated key: ", key, ".")); - } - } - } - - // Make sure all values are non-negative. - for (int i = 0; i < keys.size(); ++i) { - if (values[i] < 0) { - return absl::InvalidArgumentError(absl::StrCat( - "All values must be non-negative! Found value: ", values[i], - " for key: ", keys[i], ", at index: ", i)); - } - } - - // Create a vector to hold the indexes. - std::vector<int> vocab_index_sorted(keys.size()); - std::iota(vocab_index_sorted.begin(), vocab_index_sorted.end(), 0); - - // Sort the index by keys. - std::sort( - vocab_index_sorted.begin(), vocab_index_sorted.end(), - [&keys](const int x, const int y) { return keys.at(x) < keys.at(y); }); - - // Create vectors to build the trie. - std::vector<const char*> trie_keys; - std::vector<int> trie_values; - trie_keys.reserve(keys.size()); - trie_values.reserve(keys.size()); - for (const auto index : vocab_index_sorted) { - trie_keys.push_back(keys.at(index).c_str()); - trie_values.push_back(values[index]); - } - - // Build the trie. - auto trie = absl::make_unique<Darts::DoubleArray>(); - trie->build(trie_keys.size(), const_cast<char**>(&trie_keys[0]), nullptr, - const_cast<int*>(&trie_values[0])); - - // Return the data of darts_clone (an array of 32-bit unsigned int). See: - // http://google3/third_party/darts_clone/include/darts.h?l=53-55&rcl=245017625 - const uint32_t* trie_array = static_cast<const uint32_t*>(trie->array()); - return std::vector<uint32_t>(trie_array, trie_array + trie->size()); -} - -} // namespace trie_utils -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h deleted file mode 100644 index 9b47debd..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_builder.h +++ /dev/null
@@ -1,54 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Builder utils for Darts-clone tries. -// -// Darts-clone is a compact and efficient implementation of Darts (Double-ARray -// Trie System). For more info, see https://github.com/s-yata/darts-clone. -// -// This header file contains utils that build a darts-clone trie. To access such -// a darts-clone trie, use the utils from the companion header file -// darts_clone_trie_wrapper.h. -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_BUILDER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_BUILDER_H_ - -#include <stdint.h> -#include <string> -#include <vector> - -#include "absl/status/statusor.h" - -namespace tensorflow { -namespace text { -namespace trie_utils { - -// Builds the trie given keys and values, and returns the darts_clone trie -// array data. `keys` and `values` should have the same size; `values[i]` is the -// value for `keys[i]`. `keys` should not contain duplicated elements. In -// addition, the empty string "" should not be in `keys`, because darts_clone -// does not support that. Furthermore, all `values` should be non-negative. -absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( - const std::vector<std::string>& keys, - const std::vector<int>& values); - -// A variant where the values are indexes in the keys: i.e., the value for -// `keys[i]` is the index `i`. -absl::StatusOr<std::vector<uint32_t>> BuildDartsCloneTrie( - const std::vector<std::string>& keys); - -} // namespace trie_utils -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_BUILDER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_test.cc deleted file mode 100644 index 5eb6039..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_test.cc +++ /dev/null
@@ -1,188 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow_text/core/kernels/darts_clone_trie_builder.h" -#include "tensorflow_text/core/kernels/darts_clone_trie_wrapper.h" - -namespace tensorflow { -namespace text { -namespace trie_utils { - -using ::testing::status::StatusIs; - -TEST(DartsCloneTrieTest, CreateCursorPointToRootAndTryTraverseOneStep) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - int data; - - cursor = trie.CreateTraversalCursorPointToRoot(); // Create a cursor to point - // to the root. - EXPECT_TRUE(trie.TryTraverseOneStep(cursor, 'A')); - EXPECT_FALSE(trie.TryGetData(cursor, data)); - EXPECT_TRUE(trie.TryTraverseOneStep(cursor, 'b')); - EXPECT_FALSE(trie.TryGetData(cursor, data)); - EXPECT_TRUE(trie.TryTraverseOneStep(cursor, 'c')); - EXPECT_TRUE(trie.TryGetData(cursor, data)); - EXPECT_THAT(data, 2); - EXPECT_FALSE(trie.TryTraverseOneStep(cursor, 'c')); -} - -TEST(DartsCloneTrieTest, CreateCursorAndTryTraverseSeveralSteps) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - int data; - - cursor = trie.CreateTraversalCursor(trie.kRootNodeId); // Create a cursor to - // point to the root. - EXPECT_TRUE(trie.TryTraverseSeveralSteps(cursor, "def")); - EXPECT_TRUE(trie.TryGetData(cursor, data)); - EXPECT_THAT(data, 0); -} - -TEST(DartsCloneTrieTest, TraversePathNotExisted) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - - trie.SetTraversalCursor( - cursor, - trie.kRootNodeId); // Use SetTraversalCursor() to point to the root. - EXPECT_FALSE(trie.TryTraverseSeveralSteps(cursor, "dez")); -} - -TEST(DartsCloneTrieTest, TraverseOnUtf8Path) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - int data; - - trie.SetTraversalCursor( - cursor, - trie.kRootNodeId); // Use SetTraversalCursor() to point to the root. - EXPECT_TRUE(trie.TryTraverseSeveralSteps(cursor, "\xe1\xb8\x8aZZ")); - EXPECT_TRUE(trie.TryGetData(cursor, data)); - EXPECT_THAT(data, 1); -} - -TEST(DartsCloneTrieTest, TraverseOnPartialUtf8Path) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - int data; - - trie.SetTraversalCursor( - cursor, - trie.kRootNodeId); // Use SetTraversalCursor() to point to the root. - EXPECT_TRUE(trie.TryTraverseSeveralSteps(cursor, "\xe1\xb8")); - EXPECT_FALSE(trie.TryGetData(cursor, data)); -} - -TEST(DartsCloneTrieTest, TraverseOnUtf8PathNotExisted) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - - // Create the trie instance. - ASSERT_OK_AND_ASSIGN(std::vector<uint32_t> trie_array, - BuildDartsCloneTrie(vocab_tokens)); - ASSERT_OK_AND_ASSIGN(DartsCloneTrieWrapper trie, - DartsCloneTrieWrapper::Create(trie_array.data())); - - DartsCloneTrieWrapper::TraversalCursor cursor; - - trie.SetTraversalCursor( - cursor, - trie.kRootNodeId); // Use SetTraversalCursor() to point to the root. - EXPECT_FALSE(trie.TryTraverseSeveralSteps(cursor, "\xe1\xb8\x84")); -} - -TEST(DartsCloneTrieBuildError, KeysValuesSizeDifferent) { - // The test vocabulary. - std::vector<std::string> keys{"def", "\xe1\xb8\x8aZZ", "Abc"}; - std::vector<int> values{1, 2, 3, 4}; - - // Create the trie instance. - ASSERT_THAT(BuildDartsCloneTrie(keys, values), - StatusIs(util::error::INVALID_ARGUMENT)); -} - -TEST(DartsCloneTrieBuildError, DuplicatedKeys) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc", "def"}; - - // Create the trie instance. - ASSERT_THAT(BuildDartsCloneTrie(vocab_tokens), - StatusIs(util::error::INVALID_ARGUMENT)); -} - -TEST(DartsCloneTrieBuildError, EmptyStringsInKeys) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc", ""}; - - // Create the trie instance. - ASSERT_THAT(BuildDartsCloneTrie(vocab_tokens), - StatusIs(util::error::INVALID_ARGUMENT)); -} - -TEST(DartsCloneTrieBuildError, NegativeValues) { - // The test vocabulary. - std::vector<std::string> vocab_tokens{"def", "\xe1\xb8\x8aZZ", "Abc"}; - std::vector<int> vocab_values{0, -1, 1}; - - // Create the trie instance. - ASSERT_THAT(BuildDartsCloneTrie(vocab_tokens, vocab_values), - StatusIs(util::error::INVALID_ARGUMENT)); -} - -} // namespace trie_utils -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h deleted file mode 100644 index fce2633..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/darts_clone_trie_wrapper.h +++ /dev/null
@@ -1,169 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Access utils for Darts-clone tries. -// -// Darts-clone is a compact and efficient implementation of Darts (Double-ARray -// Trie System). For more info, see https://github.com/s-yata/darts-clone. -// -// This header file contains utils that access a darts-clone trie. To build such -// a darts-clone trie, use the utils from the companion header file -// darts_clone_trie_builder.h. -// -// Note that although there is a 'traverse()' function in the original source -// (see https://github.com/s-yata/darts-clone/blob/master/include/darts.h), the -// utils in this header file are more efficient and the APIs are more flexible. -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_WRAPPER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_WRAPPER_H_ - -#include <stdint.h> -#include <string.h> - -#include "absl/status/statusor.h" - -namespace tensorflow { -namespace text { -namespace trie_utils { - -// A wrapper class of darts_clone trie for traversing and getting data on the -// trie. It does not own the actual 'trie_array'. -class DartsCloneTrieWrapper { - public: - // Represents the root node id. - static constexpr uint32_t kRootNodeId = 0; - - // A struct serving as the trie traversal cursor. It holds 'node_id' and - // 'unit' (which is 'trie_array_[node_id]'). The reason is to save and reuse - // the 'trie_array_[node_id]'. - struct TraversalCursor { - uint32_t node_id = 0; - uint32_t unit = 0; - }; - - // Constructs an instance by passing in the pointer to the trie array data. - // The caller needs to make sure that 'trie_array' points to a valid structure - // returned by darts_clone trie builder. The caller also needs to maintain the - // availability of 'trie_array' throughout the lifetime of this instance. - static absl::StatusOr<DartsCloneTrieWrapper> Create( - const uint32_t* trie_array) { - if (trie_array == nullptr) { - return absl::InvalidArgumentError("trie_array is nullptr."); - } - return DartsCloneTrieWrapper(trie_array); - } - - // Creates a cursor pointing to the root. - TraversalCursor CreateTraversalCursorPointToRoot() { - return {kRootNodeId, trie_array_[kRootNodeId]}; - } - - // Creates a cursor pointing to the 'node_id'. - TraversalCursor CreateTraversalCursor(uint32_t node_id) { - return {node_id, trie_array_[node_id]}; - } - - // Sets the cursor to point to 'node_id'. - void SetTraversalCursor(TraversalCursor& cursor, uint32_t node_id) { - cursor.node_id = node_id; - cursor.unit = trie_array_[node_id]; - } - - // Traverses one step from 'cursor' following 'ch'. If successful (i.e., there - // exists such an edge), moves 'cursor' to the new node and returns true. - // Otherwise, does nothing (i.e., 'cursor' is not changed) and returns false. - bool TryTraverseOneStep(TraversalCursor& cursor, unsigned char ch) const { - const uint32_t next_node_id = cursor.node_id ^ offset(cursor.unit) ^ ch; - const uint32_t next_node_unit = trie_array_[next_node_id]; - if (label(next_node_unit) != ch) { - return false; - } - cursor.node_id = next_node_id; - cursor.unit = next_node_unit; - return true; - } - - // Traverses several steps from 'cursor' following the characters on 'path'. - // If *all* steps are successful, moves 'cursor' to the new node and returns - // true. Otherwise, does nothing (i.e., 'cursor' is not changed) and returns - // false. - bool TryTraverseSeveralSteps(TraversalCursor& cursor, - absl::string_view path) const { - return TryTraverseSeveralSteps(cursor, path.data(), path.size()); - } - - // If the node pointed by 'cursor' has data, read into 'out_data' and returns - // true; otherwise, does nothing and returns false. - bool TryGetData(const TraversalCursor& cursor, int& out_data) const { - if (!has_leaf(cursor.unit)) { - return false; - } - const uint32_t value_unit = - trie_array_[cursor.node_id ^ offset(cursor.unit)]; - out_data = value(value_unit); - return true; - } - - private: - // Use Create() instead of the constructor. - explicit DartsCloneTrieWrapper(const uint32_t* trie_array) - : trie_array_(trie_array) {} - - // The actual implementation of TryTraverseSeveralSteps. - bool TryTraverseSeveralSteps(TraversalCursor& cursor, - const char* ptr, - int size) const { - uint32_t cur_id = cursor.node_id; - uint32_t cur_unit = cursor.unit; - for (; size > 0; --size, ++ptr) { - const unsigned char ch = static_cast<const unsigned char>(*ptr); - cur_id ^= offset(cur_unit) ^ ch; - cur_unit = trie_array_[cur_id]; - if (label(cur_unit) != ch) { - return false; - } - } - cursor.node_id = cur_id; - cursor.unit = cur_unit; - return true; - } - - // The helper functions below are based on - // https://github.com/tensorflow/tflite-support/blob/master/tensorflow_lite_support/custom_ops/kernel/sentencepiece/double_array_trie.h - - // Returns offset to children. - static uint32_t offset(uint32_t unit) { - return (unit >> 10) << ((unit & 0x200) >> 6); - } - - // Returns a label associated with a node. - // A leaf node will have the MSB set and thus return an invalid label. - static uint32_t label(uint32_t unit) { return unit & 0x800000ff; } - - // Returns whether a node has a leaf as a child. - static bool has_leaf(uint32_t unit) { return unit & 0x100; } - - // Returns a value associated with a node. Available when a node is a leaf. - static int value(uint32_t unit) { - return static_cast<int>(unit & 0x7fffffff); - } - - // The pointer to the darts trie array. - const uint32_t* trie_array_; -}; - -} // namespace trie_utils -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_DARTS_CLONE_TRIE_WRAPPER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h deleted file mode 100644 index deff86c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest.h +++ /dev/null
@@ -1,187 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_DISJOINT_SET_FOREST_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_DISJOINT_SET_FOREST_H_ - -#include <stddef.h> - -#include <type_traits> -#include <vector> - -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace text { - -// An implementation of the disjoint-set forest data structure. The universe of -// elements is the dense range of indices [0,n). Thread-compatible. -// -// By default, this uses the path compression and union by rank optimizations, -// achieving near-constant runtime on all operations. However, the user may -// disable the union by rank optimization, which allows the user to control how -// roots are selected when a union occurs. When union by rank is disabled, the -// runtime of all operations increases to O(log n) amortized. -// -// Template args: -// Index: An unsigned integral type wide enough to hold n. -// kUseUnionByRank: Whether to use the union by rank optimization. -template <class Index, bool kUseUnionByRank = true> -class DisjointSetForest { - public: - static_assert(std::is_integral<Index>::value, "Index must be integral"); - static_assert(!std::is_signed<Index>::value, "Index must be unsigned"); - using IndexType = Index; - - // Creates an empty forest. - DisjointSetForest() = default; - - // Initializes this to hold the elements [0,|size|), each initially in its own - // singleton set. Replaces existing state, if any. - void Init(Index size); - - // Returns the root of the set containing |element|, which uniquely identifies - // the set. Note that the root of a set may change as the set is merged with - // other sets; do not cache the return value of FindRoot(e) across calls to - // Union() or UnionOfRoots() that could merge the set containing e. - Index FindRoot(Index element); - - // For convenience, returns true if |element1| and |element2| are in the same - // set. When performing a large batch of queries it may be more efficient to - // cache the value of FindRoot(), modulo caveats regarding caching above. - bool SameSet(Index element1, Index element2); - - // Merges the sets rooted at |root1| and |root2|, which must be the roots of - // their respective sets. Either |root1| or |root2| will be the root of the - // merged set. If |kUseUnionByRank| is true, then it is unspecified whether - // |root1| or |root2| will be the root; otherwise, |root2| will be the root. - void UnionOfRoots(Index root1, Index root2); - - // As above, but for convenience finds the root of |element1| and |element2|. - void Union(Index element1, Index element2); - - // The number of elements in this. - Index size() const { return size_; } - - private: - // The number of elements in the universe underlying the sets. - Index size_ = 0; - - // The parent of each element, where self-loops are roots. - std::vector<Index> parents_; - - // The rank of each element, for the union by rank optimization. Only used if - // |kUseUnionByRank| is true. - std::vector<Index> ranks_; -}; - -// Implementation details below. - -template <class Index, bool kUseUnionByRank> -void DisjointSetForest<Index, kUseUnionByRank>::Init(Index size) { - size_ = size; - parents_.resize(size_); - if (kUseUnionByRank) - ranks_.resize(size_); - - // Create singleton sets. - for (Index i = 0; i < size_; ++i) { - parents_[i] = i; - if (kUseUnionByRank) - ranks_[i] = 0; - } -} - -template <class Index, bool kUseUnionByRank> -Index DisjointSetForest<Index, kUseUnionByRank>::FindRoot(Index element) { - DCHECK_LT(element, size()); - Index* const __restrict parents = parents_.data(); - - // Walk up to the root of the |element|. Unroll the first two comparisons - // because path compression ensures most FindRoot() calls end there. In - // addition, if a root is found within the first two comparisons, then the - // path compression updates can be skipped. - Index current = element; - Index parent = parents[current]; - if (current == parent) - return current; // |element| is a root - current = parent; - parent = parents[current]; - if (current == parent) - return current; // |element| is the child of a root - do { // otherwise, continue upwards until root - current = parent; - parent = parents[current]; - } while (current != parent); - const Index root = current; - - // Apply path compression on the traversed nodes. - current = element; - parent = parents[current]; // not root, thanks to unrolling above - do { - parents[current] = root; - current = parent; - parent = parents[current]; - } while (parent != root); - - return root; -} - -template <class Index, bool kUseUnionByRank> -bool DisjointSetForest<Index, kUseUnionByRank>::SameSet(Index element1, - Index element2) { - return FindRoot(element1) == FindRoot(element2); -} - -template <class Index, bool kUseUnionByRank> -void DisjointSetForest<Index, kUseUnionByRank>::UnionOfRoots(Index root1, - Index root2) { - DCHECK_LT(root1, size()); - DCHECK_LT(root2, size()); - DCHECK_EQ(root1, parents_[root1]); - DCHECK_EQ(root2, parents_[root2]); - if (root1 == root2) - return; // already merged - Index* const __restrict parents = parents_.data(); - - if (kUseUnionByRank) { - // Attach the lesser-rank root to the higher-rank root. - Index* const __restrict ranks = ranks_.data(); - const Index rank1 = ranks[root1]; - const Index rank2 = ranks[root2]; - if (rank2 < rank1) { - parents[root2] = root1; - } else if (rank1 < rank2) { - parents[root1] = root2; - } else { - // Equal ranks; choose one arbitrarily and promote its rank. - parents[root1] = root2; - ranks[root2] = rank2 + 1; - } - } else { - // Always make |root2| the root of the merged set. - parents[root1] = root2; - } -} - -template <class Index, bool kUseUnionByRank> -void DisjointSetForest<Index, kUseUnionByRank>::Union(Index element1, - Index element2) { - UnionOfRoots(FindRoot(element1), FindRoot(element2)); -} - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_DISJOINT_SET_FOREST_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc deleted file mode 100644 index 2d3e09b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/disjoint_set_forest_test.cc +++ /dev/null
@@ -1,150 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/disjoint_set_forest.h" - -#include <stddef.h> - -#include <set> -#include <utility> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> - -namespace tensorflow { -namespace text { - -// Testing rig. -// -// Template args: -// Forest: An instantiation of the DisjointSetForest<> template. -template <class Forest> -class DisjointSetForestTest : public ::testing::Test { - protected: - using Index = typename Forest::IndexType; - - // Expects that the |expected_sets| and |forest| match. - void ExpectSets(const std::set<std::set<Index>>& expected_sets, - Forest* forest) { - std::set<std::pair<Index, Index>> expected_pairs; - for (const auto& expected_set : expected_sets) { - for (auto it = expected_set.begin(); it != expected_set.end(); ++it) { - for (auto jt = expected_set.begin(); jt != expected_set.end(); ++jt) { - expected_pairs.emplace(*it, *jt); - } - } - } - - for (Index lhs = 0; lhs < forest->size(); ++lhs) { - for (Index rhs = 0; rhs < forest->size(); ++rhs) { - if (expected_pairs.find({lhs, rhs}) != expected_pairs.end()) { - EXPECT_EQ(forest->FindRoot(lhs), forest->FindRoot(rhs)); - EXPECT_TRUE(forest->SameSet(lhs, rhs)); - } else { - EXPECT_NE(forest->FindRoot(lhs), forest->FindRoot(rhs)); - EXPECT_FALSE(forest->SameSet(lhs, rhs)); - } - } - } - } -}; - -using Forests = ::testing::Types<DisjointSetForest<uint8, false>, - DisjointSetForest<uint8, true>, - DisjointSetForest<uint16, false>, - DisjointSetForest<uint16, true>, - DisjointSetForest<uint32, false>, - DisjointSetForest<uint32, true>, - DisjointSetForest<uint64, false>, - DisjointSetForest<uint64, true>>; -TYPED_TEST_SUITE(DisjointSetForestTest, Forests); - -TYPED_TEST(DisjointSetForestTest, DefaultEmpty) { - TypeParam forest; - EXPECT_EQ(0, forest.size()); -} - -TYPED_TEST(DisjointSetForestTest, InitEmpty) { - TypeParam forest; - forest.Init(0); - EXPECT_EQ(0, forest.size()); -} - -TYPED_TEST(DisjointSetForestTest, Populated) { - TypeParam forest; - forest.Init(5); - EXPECT_EQ(5, forest.size()); - this->ExpectSets({{0}, {1}, {2}, {3}, {4}}, &forest); - - forest.UnionOfRoots(1, 2); - this->ExpectSets({{0}, {1, 2}, {3}, {4}}, &forest); - - forest.Union(1, 2); - this->ExpectSets({{0}, {1, 2}, {3}, {4}}, &forest); - - forest.UnionOfRoots(0, 4); - this->ExpectSets({{0, 4}, {1, 2}, {3}}, &forest); - - forest.Union(3, 4); - this->ExpectSets({{0, 3, 4}, {1, 2}}, &forest); - - forest.Union(0, 3); - this->ExpectSets({{0, 3, 4}, {1, 2}}, &forest); - - forest.Union(2, 0); - this->ExpectSets({{0, 1, 2, 3, 4}}, &forest); - - forest.Union(1, 3); - this->ExpectSets({{0, 1, 2, 3, 4}}, &forest); -} - -// Testing rig for checking that when union by rank is disabled, the root of a -// merged set can be controlled. -class DisjointSetForestNoUnionByRankTest : public ::testing::Test { - protected: - using Forest = DisjointSetForest<uint32, false>; - - // Expects that the roots of the |forest| match |expected_roots|. - void ExpectRoots(const std::vector<uint32>& expected_roots, Forest* forest) { - ASSERT_EQ(expected_roots.size(), forest->size()); - for (uint32 i = 0; i < forest->size(); ++i) { - EXPECT_EQ(expected_roots[i], forest->FindRoot(i)); - } - } -}; - -TEST_F(DisjointSetForestNoUnionByRankTest, ManuallySpecifyRoot) { - Forest forest; - forest.Init(5); - ExpectRoots({0, 1, 2, 3, 4}, &forest); - - forest.UnionOfRoots(0, 1); // 1 is the root - ExpectRoots({1, 1, 2, 3, 4}, &forest); - - forest.Union(4, 3); // 3 is the root - ExpectRoots({1, 1, 2, 3, 3}, &forest); - - forest.Union(0, 2); // 2 is the root - ExpectRoots({2, 2, 2, 3, 3}, &forest); - - forest.Union(3, 3); // no effect - ExpectRoots({2, 2, 2, 3, 3}, &forest); - - forest.Union(4, 0); // 2 is the root - ExpectRoots({2, 2, 2, 2, 2}, &forest); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto deleted file mode 100644 index 08f62778..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/edit_changes.proto +++ /dev/null
@@ -1,14 +0,0 @@ -syntax = "proto2"; - -package tensorflow.text; - -// Protocol buffer for serializing a single icu::Edits object -// represented by a sequence of edit changes pairs: (old_length, new_length) -message EditChanges { - message Change { - optional int32 old_length = 1; - optional int32 new_length = 2; - } - - repeated Change change = 1; -}
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_greedy_constrained_sequence_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_greedy_constrained_sequence_kernel_test.cc deleted file mode 100644 index 458fcb67..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_greedy_constrained_sequence_kernel_test.cc +++ /dev/null
@@ -1,854 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { - -using tensorflow::DT_INT32; -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::MatrixEq; -using tensorflow::text_kernels_test_util::VectorEq; - -class ExpGreedyConstrainedSequenceTest : public tensorflow::OpsTestBase { - public: - void SetUpOpWithDefaults() { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", false) - .Attr("use_log_space", false) - .Attr("use_start_and_end_states", true) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -// TODO(b/122968457): There are a bunch of tests that only validate !ok instead -// of looking for specific error messages; fix that. - -// This test examines evaluations with only a permissions matrix. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty weights matrix not of rank 2. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a 2D score matrix (implicit batch 1). -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithSingleBatchItem) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({1, 4}), // - { - 10.0, 12.0, 13.0, 4.0, // - }); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({1}), {1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // Validate the output. - std::vector<int32> expected_transitions({1}); - std::vector<int64> expected_offsets({0, 1}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines int64 input type and int32 output type. -TEST_F(ExpGreedyConstrainedSequenceTest, int64inint32out) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - // Validate the output. - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op can take a sequence length of type {{X},{Y},{Z}} -// (with an outer batch dimension). -TEST_F(ExpGreedyConstrainedSequenceTest, TwoDimensionalSequenceLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3, 1}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions that are forbidden by the permission -// matrix (final->null) are not taken. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeightsConstrainedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok; the next - // highest is 1, but 1->OUT is not OK; the next highest is 0, which is OK. - // The second sequence's highest score is 3, OUT->3 is OK and 3->OUT is OK. - // The third sequence's highest score is 0, OUT->0 is OK and 0->OUT is OK. - // Validate the output. - std::vector<int32> expected_transitions({0, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with only a weight matrix. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3) - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2) - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({3, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty not rank 2 permissions matrix. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3) - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2) - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({3, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are scored with the probability -// of ending the sequence on the transition (x->final->null). -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsWeightedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 0.1, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row and the last column in the - // score tensor, so the real scores are: - // 1: {1.0, 1.0, 3.5, 0.4} (max is 2) - // 2: {0.1, 4.5, 5.5, 0.5} (max is 2) - // 3: {10.0, 12.0, 1.5, 0.4} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({2, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are not scored with the probability -// of ending the sequence on the transition (x->final->null) if -// use_start_and_end_states is False. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsNotWeightedByEnd) { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", false) - .Attr("use_log_space", false) - .Attr("use_start_and_end_states", false) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({4, 4}), {0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row and the last column in the - // score tensor, so the real scores are: - // 1: {5.0, 1.0, 3.5, 4.0} (max is 0) - // 2: {.5, 4.5, 5.5, 2.5} (max is 2) - // 3: {50.0, 12.0, 1.5,2.0} (max is 0) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with both weight and permission matrices. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 'OUTSIDE' - true, false, true, true, false, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3). OUT->3 is OK. - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2). OUT->2 is OK. - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1). OUT->1 is not OK, so go with 0. - // Note that X->OUT is set to always be OK here. - std::vector<int32> expected_transitions({3, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesMultipleTransitionsWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 0.5, 0.5, 1.0, // 2 - 0.5, 0.5, 1.0, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {0.1 6.0 3.5 4.0} (max is 3). OUT->3 is OK. - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2). OUT->2 is OK. - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1). OUT->1 is not OK, so go with 0. - // STEP 2: - // 1: In state '3', so use row 3 in the weight tensor. - // Weights are {5, 5, 10, 5}; 3->2 is OK and 2->OUT is OK; use 2. - // 2: In state '2', so use row 2 in the weight tensor. - // Weights are {5, 7.5, .5, 6.0}; 2->3 is not OK and 2->1 is not OK, so 0. - // 3: In state 0, so use row 0 in the weight tensor. - // Weights are {0.5, 5.5, 0.5, 5}; 0->1 is OK but 1->OUT is not, so 3. - - std::vector<int32> expected_transitions({3, 2, 2, 0, 0, 3}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesMultipleTransitionsWithVaryingLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 1, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 0.5, 0.5, 1.0, // 2 - 0.5, 0.5, 1.0, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {0.1 6.0 3.5 4.0} (max is 3). OUT->3 is OK. - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2). OUT->2 and 2->OUT are OK. - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1). OUT->1 is not OK, so go with 0. - // STEP 2: - // 1: In state '3', so use row 3 in the weight tensor. - // Weights are {5, 5, 10, 5}; 3->2 is OK and 2->OUT is OK; use 2. - // 2: End of sequence; no change. - // 3: In state 0, so use row 0 in the weight tensor. - // Weights are {0.5, 5.5, 0.5, 5}; 0->1 is OK but 1->OUT is not, so 3. - - std::vector<int32> expected_transitions({3, 2, 2, 0, 3}); - std::vector<int64> expected_offsets({0, 2, 3, 5}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a fully negative input set. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNegativeInputs) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - -10.0, -12.0, -13.0, -4.0, // - -1.0, -12.0, -13.0, -14.0, // - -15.0, -2.0, -3.0, -14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, true, true, true, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_transitions({3, 0, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an all-zero weight matrix. -TEST_F(ExpGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithZeroedWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), { - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, - }); - - TF_ASSERT_OK(RunOpKernel()); - - // In the case of a tie between weights, the higher state number wins; - // if all weights are zero, the states should all be 3. - - std::vector<int32> expected_transitions({3, 3, 3}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -TEST_F(ExpGreedyConstrainedSequenceTest, - ImpossibleSequencesResultInNegativeOnesIfAttrIsSet) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - false, false, false, false, false, // FROM 0 - false, false, false, false, false, // FROM 1 - false, false, false, false, false, // FROM 2 - false, false, false, false, false, // FROM 3 - false, false, false, false, false, // FROM 'OUT' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // Validate the output. - - std::vector<int32> expected_transitions({-1, -1, -1, -1, -1, -1}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op will throw an error if there are too few scores to -// finalize all the sequences. -TEST_F(ExpGreedyConstrainedSequenceTest, ErrorsIfGivenInsufficientScores) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 2, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_viterbi_constrained_sequence_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_viterbi_constrained_sequence_kernel_test.cc deleted file mode 100644 index 9bc899a8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/exp_viterbi_constrained_sequence_kernel_test.cc +++ /dev/null
@@ -1,910 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { - -using tensorflow::DT_INT32; -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::MatrixEq; -using tensorflow::text_kernels_test_util::VectorEq; - -class ExpViterbiConstrainedSequenceTest : public tensorflow::OpsTestBase { - public: - void SetUpOpWithDefaults() { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", true) - .Attr("use_log_space", false) - .Attr("use_start_and_end_states", true) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -// TODO(b/122968457): There are a bunch of tests that only validate !ok instead -// of looking for specific error messages; fix that. - -// This test examines evaluations with only a permissions matrix. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty weights matrix not of rank 2. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a 2D score matrix (implicit batch 1). -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithSingleBatchItem) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({1, 4}), // - { - 10.0, 12.0, 13.0, 4.0, // - }); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({1}), {1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // Validate the output. - std::vector<int32> expected_transitions({1}); - std::vector<int64> expected_offsets({0, 1}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines int64 input type and int32 output type. -TEST_F(ExpViterbiConstrainedSequenceTest, int64inint32out) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - // Validate the output. - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op can take a sequence length of type {{X},{Y},{Z}} -// (with an outer batch dimension). -TEST_F(ExpViterbiConstrainedSequenceTest, TwoDimensionalSequenceLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3, 1}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions that are forbidden by the permission -// matrix (final->null) are not taken. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeightsConstrainedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok; the next - // highest is 1, but 1->OUT is not OK; the next highest is 0, which is OK. - // The second sequence's highest score is 3, OUT->3 is OK and 3->OUT is OK. - // The third sequence's highest score is 0, OUT->0 is OK and 0->OUT is OK. - // Validate the output. - std::vector<int32> expected_transitions({0, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with only a weight matrix. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3) - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2) - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({3, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty not rank 2 permissions matrix. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3) - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2) - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({3, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are scored with the probability -// of ending the sequence on the transition (x->final->null). -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsWeightedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 0.1, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row and the last column in the - // score tensor, so the real scores are: - // 1: {1.0, 1.0, 3.5, 0.4} (max is 2) - // 2: {0.1, 4.5, 5.5, 0.5} (max is 2) - // 3: {10.0, 12.0, 1.5, 0.4} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({2, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are not scored with the probability -// of ending the sequence on the transition (x->final->null) if -// use_start_and_end_states is False. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsNotWeightedByEnd) { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", true) - .Attr("use_log_space", false) - .Attr("use_start_and_end_states", false) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({4, 4}), {0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5, // - 0.5, 0.5, 0.5, 0.5}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row and the last column in the - // score tensor, so the real scores are: - // 1: {5.0, 1.0, 3.5, 4.0} (max is 0) - // 2: {.5, 4.5, 5.5, 2.5} (max is 2) - // 3: {50.0, 12.0, 1.5,2.0} (max is 0) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with both weight and permission matrices. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 'OUTSIDE' - true, false, true, true, false, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // 1: {1.0, 1.0, 3.5, 4.0} (max is 3). OUT->3 is OK. - // 2: {0.1, 4.5, 5.5, 5.0} (max is 2). OUT->2 is OK. - // 3: {10.0, 12.0, 1.5, 4.0} (max is 1). OUT->1 is not OK, so go with 0. - // Note that X->OUT is set to always be OK here. - std::vector<int32> expected_transitions({3, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesMultipleTransitionsWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 0.5, 0.5, 1.0, // 2 - 0.5, 0.5, 1.0, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // B0: { 1.0, [NOTOK], 3.5, 4.0} - // B1: { 0.1, [NOTOK], 5.5, 5.0} - // B2: {10.0, [NOTOK], 1.5, 4.0} - // - // STEP 2: - // (Forbidden transitions are marked with '*') - // - // BATCH 0: - // Raw scores are: {10.0, 10.0, 10.0, 10.0} - // from 0: New scores are {5.0, 5.0, 5.0, 5.0}, totals: {5, 0, 17.5, 20} - // from 1: New scores are {5.0, 5.0, 0*, 5.0}, totals: {5, 0, 0, 20} - // from 2: New scores are {5.0, 5.0, 5.0, 10.0}, totals: {5, 0, 17.5, 40} - // from 3: New scores are {5.0, 5.0, 0*, 5.0}, totals: {5, 0, 0, 20} - // Top scores are 20, 20, 40, 20 from [3, 3, 3, 3]. - // 1->OUT is not valid. - // Final scores are [20, 0, 40, 20] for a - // final state of [2] with a sequence of [3->2]. - // - // BATCH 1: - // Raw scores are {10, 15, 1, 12} - // from 0: Weighted score is {5, 5, 5, 5}, totals: {0.5, 0, 27.5, 25} - // from 1: Weighted score is {7.5, 7.5, 0*, 7.5}, t: {0.75, 0, 0, 37.5} - // from 2: Weighted score is {0.5, 0.5, 0.5, 1.0}, t: {0.05, 0, 2.75, 5} - // from 3: Weighted score is {6, 6, 0*, 6}, totals: {0.6, 0, 0, 30} - // Top scores are {27.5, 37.5, 5, 30} from [2, 3, 3, 3] - // 1->OUT is not valid, so final scores are [27.5, 0, 5, 30] for a final - // state of [3] and a sequence of [3, 3] - // - // BATCH 2: - // Raw scores are {1.0, 11.0, 1.0, 10.0} - // 2/0: Weighted score is {.5, .5, .5, .5}. t: {5, 0, 0.75, 2} - // 2/1: Weighted score is {5.5, 5.5, 0*, 5.5}. t: {55, 0, 0, 22} - // 2/2: Weighted score is {.5, .5, .5, 1.0}. t: {5, 0, 0.75, 4} - // 2/3: Weighted score is {5, 5, 0*, 5}. t: {50, 0, 0, 20} - // Top scores are {5, 55, 5, 50} from [0, 0, 0, 0] - // 1->OUT is not valid, so final scores are [5, 0, 5, 50] for a final - // state of 3 and a sequence of [0, 3]. - - std::vector<int32> expected_transitions({3, 2, 3, 3, 0, 3}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesMultipleTransitionsWithVaryingLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 1, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 0.5, 0.5, 1.0, // 2 - 0.5, 0.5, 1.0, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be multiplied by the last row in the weight tensor, so - // the 'real' scores are: - // B0: { 1.0, [NOTOK], 3.5, 4.0} - // B1: { 0.1, [NOTOK], 5.5, 5.0} - // B2: {10.0, [NOTOK], 1.5, 4.0} - // - // STEP 2: - // (Forbidden transitions are marked with '*') - // - // BATCH 0: - // Raw scores are: {10.0, 10.0, 10.0, 10.0} - // from 0: New scores are {5.0, 5.0, 5.0, 5.0}, totals: {5, 0, 17.5, 20} - // from 1: New scores are {5.0, 5.0, 0*, 5.0}, totals: {5, 0, 0, 20} - // from 2: New scores are {5.0, 5.0, 5.0, 10.0}, totals: {5, 0, 17.5, 40} - // from 3: New scores are {5.0, 5.0, 0*, 5.0}, totals: {5, 0, 0, 20} - // Top scores are 20, 20, 40, 20 from [3, 3, 3, 3]. - // 1->OUT is not valid. - // Final scores are [20, 0, 40, 20] for a - // final state of [2] with a sequence of [3->2]. - // - // BATCH 1: - // End of sequence; no further action. - // - // BATCH 2: - // Raw scores are {1.0, 11.0, 1.0, 10.0} - // 2/0: Weighted score is {.5, .5, .5, .5}. t: {5, 0, 0.75, 2} - // 2/1: Weighted score is {5.5, 5.5, 0*, 5.5}. t: {55, 0, 0, 22} - // 2/2: Weighted score is {.5, .5, .5, 1.0}. t: {5, 0, 0.75, 4} - // 2/3: Weighted score is {5, 5, 0*, 5}. t: {50, 0, 0, 20} - // Top scores are {5, 55, 5, 50} from [0, 0, 0, 0] - // 1->OUT is not valid, so final scores are [5, 0, 5, 50] for a final - // state of 3 and a sequence of [0, 3]. - - std::vector<int32> expected_transitions({3, 2, 2, 0, 3}); - std::vector<int64> expected_offsets({0, 2, 3, 5}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an all-zero weight matrix. -TEST_F(ExpViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithZeroedWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), { - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, - }); - - TF_ASSERT_OK(RunOpKernel()); - - // In the case of a tie between weights, the higher state number wins; - // if all weights are zero, the states should all be 3. - - std::vector<int32> expected_transitions({3, 3, 3}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -TEST_F(ExpViterbiConstrainedSequenceTest, - ImpossibleSequencesResultInNegativeOnesIfAttrIsSet) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - false, false, false, false, false, // FROM 0 - false, false, false, false, false, // FROM 1 - false, false, false, false, false, // FROM 2 - false, false, false, false, false, // FROM 3 - false, false, false, false, false, // FROM 'OUT' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // Validate the output. - - std::vector<int32> expected_transitions({-1, -1, -1, -1, -1, -1}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op will throw an error if there are too few scores to -// finalize all the sequences. -TEST_F(ExpViterbiConstrainedSequenceTest, ErrorsIfGivenInsufficientScores) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 2, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -// This test ensures that the op correctly outputs a ragged tensor with type -// int32 -TEST_F(ExpViterbiConstrainedSequenceTest, OutputsInt32RaggedTensor) { - // Prepare graph. - SetUpOpWithDefaults(); - - AddInputFromArray<float>( - TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Tr. to 3 - 10.0, 10.0, 10.0, 10.0, // Tr. 3 to 2 on wt. - 1.0, 9.0, 11.0, 5.0, // Tr. to 2 - 10.0, 15.0, 1.0, 12.0, // Irrelevant (past end of sequence) - 100.0, 24.0, 3.0, 4.0, // Tr. to 0 - 1.0, 10.0, 1.0, 10.0, // Tr. 0 to 3 (1 cannot tr. to NULL) - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 1, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 0.5, 0.5, 1.0, // 2 - 0.5, 0.5, 1.0, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_transitions({3, 2, 2, 0, 3}); - std::vector<int64> expected_offsets({0, 2, 3, 5}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc deleted file mode 100644 index 9dede81a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.cc +++ /dev/null
@@ -1,739 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h" - -#include "absl/base/attributes.h" -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "absl/strings/match.h" -#include "absl/strings/str_join.h" -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "tensorflow/lite/kernels/shim/status_macros.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h" - -namespace tensorflow { -namespace text { -namespace { - -template <bool kGetPieces> -int GetCurrentOutputSize(std::vector<std::string>* output_pieces, - std::vector<int>* output_ids) { - if constexpr (kGetPieces) { - return output_pieces->size(); - } else { - return output_ids->size(); - } -} - -} // namespace - -/*static*/ absl::StatusOr<FastWordpieceTokenizer> -FastWordpieceTokenizer::Create(const void* config_flatbuffer) { - FastWordpieceTokenizer tokenizer; - // `GetFastWordpieceTokenizerConfig()` is autogenerated by flatbuffer. - tokenizer.config_ = GetFastWordpieceTokenizerConfig(config_flatbuffer); - auto trie_or = trie_utils::DartsCloneTrieWrapper::Create( - tokenizer.config_->trie_array()->data()); - if (!trie_or.ok()) { - return absl::InvalidArgumentError( - "Failed to create DartsCloneTrieWrapper from " - "FastWordpieceTokenizerConfig.trie_array."); - } - tokenizer.trie_ = - absl::make_unique<trie_utils::DartsCloneTrieWrapper>(*std::move(trie_or)); - return std::move(tokenizer); -} - -void FastWordpieceTokenizer::Tokenize(absl::string_view input, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets, - int input_word_offset_in_text) const { - if (config_->end_to_end()) { - TokenizeTextImpl</*kGetPieces=*/true, /*kGetIds=*/true, - /*kGetOffsets=*/true>(input, output_pieces, output_ids, - output_start_offsets, - output_end_offsets); - } else { - TokenizeSingleWordImpl</*kGetPieces=*/true, /*kGetIds=*/true, - /*kGetOffsets=*/true>( - input, input_word_offset_in_text, output_pieces, output_ids, - output_start_offsets, output_end_offsets); - } -} - -void FastWordpieceTokenizer::Tokenize(absl::string_view input, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets, - int input_word_offset_in_text) const { - if (config_->end_to_end()) { - TokenizeTextImpl</*kGetPieces=*/false, /*kGetIds=*/true, - /*kGetOffsets=*/true>(input, /*output_pieces=*/nullptr, - output_ids, output_start_offsets, - output_end_offsets); - } else { - TokenizeSingleWordImpl</*kGetPieces=*/false, /*kGetIds=*/true, - /*kGetOffsets=*/true>( - input, input_word_offset_in_text, /*output_pieces=*/nullptr, output_ids, - output_start_offsets, output_end_offsets); - } -} - -void FastWordpieceTokenizer::Tokenize(absl::string_view input, - std::vector<int>* output_ids, - int input_word_offset_in_text) const { - if (config_->end_to_end()) { - TokenizeTextImpl</*kGetPieces=*/false, /*kGetIds=*/true, - /*kGetOffsets=*/false>(input, /*output_pieces=*/nullptr, - output_ids, - /*output_start_offsets=*/nullptr, - /*output_end_offsets=*/nullptr); - } else { - TokenizeSingleWordImpl</*kGetPieces=*/false, /*kGetIds=*/true, - /*kGetOffsets=*/false>( - input, input_word_offset_in_text, /*output_pieces=*/nullptr, output_ids, - /*output_start_offsets=*/nullptr, - /*output_end_offsets=*/nullptr); - } -} - -absl::StatusOr<std::vector<std::string>> -FastWordpieceTokenizer::DetokenizeToTokens( - const absl::Span<const int> input) const { - std::vector<std::string> subwords; - std::vector<std::string> output_tokens; - if (!config_->support_detokenization()) { - return absl::FailedPreconditionError( - "Detokenize function is only enabled when support_detokenization is " - "true in the config flatbuffer. Please rebuild the model flatbuffer " - "by setting support_detokenization=true."); - } - for (int id : input) { - auto vocab = config_->vocab_array()->Get(id); - auto is_suffix = config_->vocab_is_suffix_array()->Get(id); - if (!subwords.empty() && !is_suffix) { - // When current subword is not a suffix token, it marks the start of a new - // word. We concatenate the subwords that compose the previous word and - // add it to the return list. - output_tokens.emplace_back(absl::StrJoin(subwords, "")); - subwords.clear(); - } - // Special case: when a suffix token e.g. "##a" appears at the start of the - // input ids, we preserve the suffix_indicator. - if (subwords.empty() && is_suffix) { - subwords.emplace_back(config_->suffix_indicator()->string_view()); - } - subwords.emplace_back(vocab->string_view()); - } - if (!subwords.empty()) { - output_tokens.emplace_back(absl::StrJoin(subwords, "")); - } - return output_tokens; -} - -absl::StatusOr<std::string> FastWordpieceTokenizer::Detokenize( - const absl::Span<const int> input) const { - SH_ASSIGN_OR_RETURN(std::vector<std::string> output_tokens, - DetokenizeToTokens(input)); - return absl::StrJoin(output_tokens, " "); -} - -int FastWordpieceTokenizer::SkipTheRemainingOfWordAndTrailingWhiteSpaces( - absl::string_view input, - int& cur_pos) const { - const int input_size = input.size(); - UChar32 cur_unicode_char; - int next_pos; - int end_of_word = cur_pos; - while (cur_pos < input_size) { - next_pos = cur_pos; - U8_NEXT(input, next_pos, input_size, cur_unicode_char); - if (u_isUWhiteSpace(cur_unicode_char)) { - cur_pos = next_pos; // Skip the whitespace as well. - // Break and return since we've met a word boundary. - break; - } - if (fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar( - cur_unicode_char)) { - // Break and return since we've met a word boundary. We do not skip the - // punctuation character: that character may be a token by itself. - break; - } - end_of_word = next_pos; // Mark the exclusive end. - cur_pos = next_pos; // Skip the character. - } - return end_of_word; -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -void FastWordpieceTokenizer::TokenizeTextImpl( - absl::string_view input_text, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - static_assert(kGetPieces || kGetIds, - "At least one of `kGetPieces` and `kGetIds` should be true."); - if (input_text.empty()) { - return; - } - const int input_size = input_text.size(); - int next_pos = 0; - int cur_pos = 0; - int original_num_tokens = - GetCurrentOutputSize<kGetPieces>(output_pieces, output_ids); - UChar32 prev_unicode_char; - UChar32 cur_unicode_char; - while (cur_pos < input_size) { - int cur_offset_in_input_word = 0; - // Tokenize the word starting at the current position. - auto cur_node = trie_->CreateTraversalCursorPointToRoot(); - int word_byte_length_so_far = 0; - int input_word_offset_in_text = cur_pos; - absl::string_view input_substr = input_text.substr(cur_pos); - // The trie matching loop below tokenizes and recognizes word pieces until - // 1. it steps over the input boundary, or - // 2. the length of the current word reaches 'max_bytes_per_token', or - // 3. it sees a whitespace / punctuation / unknown character. - while (cur_pos < input_size) { - prev_unicode_char = cur_unicode_char; - next_pos = cur_pos; - U8_NEXT(input_text, next_pos, input_text.length(), cur_unicode_char); - - if (word_byte_length_so_far + next_pos - cur_pos > - config_->max_bytes_per_token()) - break; - // Try matching one Unicode character from here. - while (!trie_->TryTraverseSeveralSteps( - cur_node, input_text.substr(cur_pos, next_pos - cur_pos))) { - // Trie cannot consume the whole Unicode character. We need to pop one - // or more longest-matching tokens off the beginning of the string - // represented by the current node. We then transit to the node pointed - // by the failure link, which represents the remaining suffix string - // after popping those matching prefix tokens. - // - // For example, if the current node is "abcdef", and we need to pop - // "ab", and "##cd" off the beginning, the failure link points to the - // node that represents "##ef". - if (!TryFollowFailureLinkAndCollectTokens<kGetPieces, kGetIds, - kGetOffsets>( - input_substr, input_word_offset_in_text, - cur_offset_in_input_word, cur_node, output_pieces, output_ids, - output_start_offsets, output_end_offsets)) { - goto outside_trie_match_loop; - } - } - // Trie consumed the whole Unicode char and was able to traverse to a - // new node. We move forward the cursor to match the next character. - word_byte_length_so_far += next_pos - cur_pos; - cur_pos = next_pos; - } - outside_trie_match_loop: - if (cur_pos >= input_size) { - // Collect the remaining tokens stored on a path on the trie. - HandleTheRemainingStringOnTriePath<kGetPieces, kGetIds, kGetOffsets>( - input_substr, input_word_offset_in_text, cur_node, - original_num_tokens, cur_offset_in_input_word, output_pieces, - output_ids, output_start_offsets, output_end_offsets); - // Break as we've finished all characters. - break; - } - bool is_white_space = u_isUWhiteSpace(cur_unicode_char); - if (is_white_space || - fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar( - cur_unicode_char) || - (cur_pos && fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar( - prev_unicode_char))) { - // If the current Unicode character is a valid word boundary, collect the - // remaining tokens stored on a path on the trie. - HandleTheRemainingStringOnTriePath<kGetPieces, kGetIds, kGetOffsets>( - absl::string_view(input_substr.data(), - cur_pos - input_word_offset_in_text), - input_word_offset_in_text, cur_node, original_num_tokens, - cur_offset_in_input_word, output_pieces, output_ids, - output_start_offsets, output_end_offsets); - // Skip the whitespace. - if (is_white_space) - cur_pos = next_pos; - // Continue in the outer while loop to process the remaining input. - continue; - } - - // Note that even with the following line removed, the code is still correct - // (i.e., Mutants is right). We keep this line for efficiency reasons: We - // have tested the current char, and it is not a whitespace or punctuation - // char. Hence it's safe to skip the current char; we don't want to test it - // again in the subsequent function. - cur_pos = next_pos; - int end_of_word = - SkipTheRemainingOfWordAndTrailingWhiteSpaces(input_text, cur_pos); - - // The current character is not a word boundary. The case is simple: We are - // at the start or middle of some word with unknown characters or exceeding - // the length limit. We map the entire word unk_token, skip the remaining - // portion, and continue. - ResetOutputAppendUnknownToken<kGetPieces, kGetIds, kGetOffsets>( - input_word_offset_in_text, (end_of_word - input_word_offset_in_text), - original_num_tokens, output_pieces, output_ids, output_start_offsets, - output_end_offsets); - } -} -// This function implements the new linear WordPiece algorithm. The overall -// design is illustrated as follows: -// -// * WordPiece tokenization works in a left-to-right longest-matching-first -// greedy manner, known as maximum matching. -// -// * We use a trie containing all pieces from the vocabulary. -// -// * We iterate the input text left-to-right, following the trie in search of -// longer and longer matches. -// -// * Challenge: When we fall off the trie matching, the best match is usually -// several characters back. -// -// * For example, assume the vocabulary is {a, ab, ##cd, ##efz, abcdefg}. -// If the input is "abcdefz", the trie matching stops at the position of -// "z". However, the longest match is "ab", which is 5 characters back. -// -// * Straightforward solution: Remember the last match while iterating on the -// trie. That gives us the longest match. Then we roll our string iterator -// backwards and reprocess the characters that weren't part of the match. It -// can be proved that the time complexity is quadratic. -// -// * For the example above, it will backtrack to the 3rd position and -// restart matching from "c", resulting in repetitive, wasteful iterations. -// -// * Optimized solution (the novel linear algorithm): Instead of having to -// reprocess the letters that didn't match, we can have the trie record -// (1) the longest-matching tokens that we would have identified (called -// "failure pops") and (2) a link pointing to a node (called "failure link") -// representing the state from where we can continue to match the next -// character. When trie matching cannot consume an input character, we perform -// a "failure transition" by (a) appending the failure pops to the tokenization -// result and (b) transiting through the failure link to a new state to -// continue the process. Our string iterator never backtracks, and it can be -// proved that we make at most `n` failure transitions in total in processing a -// string of length `n`. Therefore, the time complexity is linear. -// -// * For the same example above, when the trie matching fails at the -// character "z", the optimized solution is smart enough to know that the -// longest-matching tokens we can collect are ["ab", "##cd"]. It is also -// smart enough to set itself into such a state as if it has only seen and -// matched "##ef" so far. Now given the next character being "z", it -// immediately identifies the next matching token as "##efz". -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -void FastWordpieceTokenizer::TokenizeSingleWordImpl( - absl::string_view input_word, - int input_word_offset_in_text, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - static_assert(kGetPieces || kGetIds, - "At least one of `kGetPieces` and `kGetIds` should be true."); - if (input_word.empty()) { - return; - } - const int input_size = input_word.size(); - - // `original_num_tokens` stores the number of tokens in the output before - // tokenizing this `input_word`. This is needed because we attempt to tokenize - // `input_word` into word piece tokens and append the recognized tokens to the - // outputs on the fly. If we later find out that `input_word` cannot be - // tokenized into sub-tokens with the current vocabulary, we roll-back the - // output vectors (by removing those tentative tokens) based on - // `original_num_tokens` and appends the "unk_token". - int original_num_tokens = - GetCurrentOutputSize<kGetPieces>(output_pieces, output_ids); - - if (input_word.size() > config_->max_bytes_per_token()) { - ResetOutputAppendUnknownToken<kGetPieces, kGetIds, kGetOffsets>( - input_word_offset_in_text, input_size, original_num_tokens, - output_pieces, output_ids, output_start_offsets, output_end_offsets); - return; - } - - // `cur_offset_in_input_word` tracks the offset of the remaining portion of - // `input_word`, for which the tokens are yet to be recognized and outputted. - // Initially it just points to the start of the input. And it gets moved - // when more tokens are outputed. - // - // For example, suppose the vocab is {a,abcd,##b,##bc,##z}, and the input is - // "abcz". First `cur_offset_in_input_word` points to position 0, since we - // haven't ouputted any tokens. After the first token "a" is recognized and - // outputted, it moves passing the substring "a" to position 1. Then after the - // second token "##bc" is recognized and put to the outputs, it moves passing - // the substring "bc" to position 3. - // - // This variable is used to calculate the offsets of each word piece token. - // And since knowing their offsets in the input word, we're also able to get - // the token string without looking it up in the vocabulary table. This saves - // an extra look-up in hash table (saving time), and we don't even need to - // save the vocabulary table anymore (saving memory). - int cur_offset_in_input_word = 0; - - // Here is an example to illustrate the inference process. - // - // Suppose the vocabulary is {a,abcd,##b,##bc,##z}, and the suffix indicator - // is ##. Below is the trie built from that vocabulary: - // - // (a) (b) (c) (d) - // 0 ----- 3 ----- 4 ----- 5 ----- 6 - // (#)| - // 1 - // (#)| (b) (c) - // 2 ----- 7 ----- 8 - // | (z) - // + ----- 9 - // - // The algorithm constructs auxiliary structures on top of the trie to enable - // linear inference, which consist of two parts (let v denote a node): - // * failure links f(v), pointing to another node, - // * failure pops F(v), a list of tokens stored on node v. - // - // The table of str(v) (which is the string along the trie path from the root - // to node v), f(v), and F(v) for the above trie is as follows: - // - // v | 0 1 2 3 4 5 6 7 8 9 - // str(v)| "" # ## a ab abc abcd ##b ##bc ##z - // F(v)| [] [] [] [a] [a] [a] [abcd] [##b] [##bc] [##z] - // f(v)| null null null 2 7 8 2 2 2 null - // - // Please refer to `FastWordpieceTokenizerBuilder.h|cc` for detailed - // information on how failure links and failure pops are constructed. - // - // Let the input word be "abcz". Below is the inference process that is - // carried out by this method. - // - // Step | Char | Node transition | Output - // 0 | | 0 | [] - // 1 | a | goto(0,a) -> 3 | [] - // 2 | b | goto(3,b) -> 4 | [] - // 3 | c | goto(4,c) -> 5 | [] - // 4 | z | f(5) -> 8 | [a] - // | z | f(8) -> 2 | [a, ##bc] - // | z | goto(2,z) -> 9 | [a, ##bc] - // final | f(9) -> 2 | [a, ##bc, ##z] - // - // Notes: - // * In each step we match and process one input character. - // * goto(u,c) -> v: following the trie link with label c to transit from node - // u to node v. - // * f(u) -> v: following the failure link to transit from node u to node v. - // * The "final" step means that after processing all input characters, we - // keep transiting through the failure links until arriving at the node 2 - // that represents the suffix indicator "##". - // - // Please refer to the below code and comments. - - // Start from the root of the trie. - auto cur_node = trie_->CreateTraversalCursorPointToRoot(); - - for (auto ch : input_word) { - // Although the matching is on Unicode codepoints, it is equivalent to - // directly work with the utf-8 encoding bytes. - while (!trie_->TryTraverseOneStep(cur_node, ch)) { - // Trie cannot consume `ch`. As explained earlier (see "Optimized - // solution" above) we need to (1) pop one or more longest-matching tokens - // (i.e., failure pops) off the start of the string represented by the - // current node, and (2) transit through the failure link to a node that - // represents the remaining suffix string after popping those - // longest-matching prefix tokens. - if (!TryFollowFailureLinkAndCollectTokens<kGetPieces, kGetIds, - kGetOffsets>( - input_word, input_word_offset_in_text, cur_offset_in_input_word, - cur_node, output_pieces, output_ids, output_start_offsets, - output_end_offsets)) { - // If unable to follow the failure link, it means that the current trie - // node doesn't have any matching prefix vocab tokens to pop. Since the - // next character is not associated with a valid trie edge, the entire - // word cannot be tokenized. - ResetOutputAppendUnknownToken<kGetPieces, kGetIds, kGetOffsets>( - input_word_offset_in_text, input_size, original_num_tokens, - output_pieces, output_ids, output_start_offsets, - output_end_offsets); - return; - } - } - // Trie consumed `ch` and was able to traverse to a new node. Continue and - // process the next character. - } - // Segment the remaining string on the trie into tokens and collect them, or - // determine that the word cannot be tokenized. - HandleTheRemainingStringOnTriePath<kGetPieces, kGetIds, kGetOffsets>( - input_word, input_word_offset_in_text, cur_node, original_num_tokens, - cur_offset_in_input_word, output_pieces, output_ids, output_start_offsets, - output_end_offsets); -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -ABSL_ATTRIBUTE_ALWAYS_INLINE bool -FastWordpieceTokenizer::TryFollowFailureLinkAndCollectTokens( - absl::string_view input_word, - int input_word_offset_in_text, - int& cur_offset_in_input_word, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& node, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - int cur_node_data; - if (trie_->TryGetData(node, cur_node_data)) { - // A shortcut to get f(cur_node) (i.e., the failure link) and F(cur_node) - // (i.e., failure pops) when `cur_node` has data. This results in ~10% - // speedup (statistically significant). - AppendTokenToOutput<kGetPieces, kGetIds, kGetOffsets>( - input_word, input_word_offset_in_text, cur_offset_in_input_word, - cur_node_data, output_pieces, output_ids, output_start_offsets, - output_end_offsets); - // Transit through the failure link. - trie_->SetTraversalCursor( - node, - config_->failure_struct_array()->Get(node.node_id)->failure_link()); - return true; - } - - const auto& node_aux = config_->failure_struct_array()->Get(node.node_id); - - if (node_aux->failure_link() == fast_wordpiece_tokenizer_utils::kNullNode) { - // No failure_link can be followed. - return false; - } - - // Collect the tokens (i.e., failure pops), represented by (offset, length) in - // a failure_pops pool (held by the config flatbuffer). - int failure_pops_offset, failure_pops_length; - fast_wordpiece_tokenizer_utils::GetFailurePopsOffsetAndLength( - node_aux->failure_pops_offset_length(), failure_pops_offset, - failure_pops_length); - const int failure_pops_end_offset = failure_pops_offset + failure_pops_length; - for (int offset_in_pool = failure_pops_offset; - offset_in_pool < failure_pops_end_offset; ++offset_in_pool) { - AppendTokenToOutput<kGetPieces, kGetIds, kGetOffsets>( - input_word, input_word_offset_in_text, cur_offset_in_input_word, - config_->failure_pops_pool()->Get(offset_in_pool), output_pieces, - output_ids, output_start_offsets, output_end_offsets); - } - - // Transit through the failure link. - trie_->SetTraversalCursor(node, node_aux->failure_link()); - return true; -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -void FastWordpieceTokenizer::AppendTokenToOutput( - absl::string_view input_word, - int input_word_offset_in_text, - int& cur_offset_in_input_word, - int encoded_token_value, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - auto token_id = - fast_wordpiece_tokenizer_utils::GetTokenId(encoded_token_value); - if constexpr (kGetIds) { - output_ids->push_back(token_id); - } - if constexpr (kGetPieces || kGetOffsets) { - // For suffix tokens, the length below is without the suffix indicator. - int token_substr_length = - fast_wordpiece_tokenizer_utils::GetTokenLength(encoded_token_value); - if (!cur_offset_in_input_word && - fast_wordpiece_tokenizer_utils::IsSuffixToken(encoded_token_value)) { - // This is a special case where `input_word` happens to start with the - // suffix indicator (e.g., "##") and a suffix token is recognized at the - // start (since `cur_offset_input_word == 0`). In this case, we need - // to adjust and add the length of the suffix indicator string. - token_substr_length += config_->suffix_indicator()->size(); - } - if constexpr (kGetPieces) { - // If token id is unk_token_id, it means that it is a dummy node for - // punctuations that are not contained in the vocabulary, we append - // the unk_token in this case. Otherwise, we - // get the subword string from `input_word` by the offset and length. - auto unk_token = config_->unk_token()->string_view(); - auto subword_str = - (token_id == config_->unk_token_id()) - ? absl::string_view(unk_token.data(), unk_token.size()) - : absl::string_view(input_word.data() + cur_offset_in_input_word, - token_substr_length); - output_pieces->emplace_back( - cur_offset_in_input_word - ? absl::StrCat(config_->suffix_indicator()->str(), subword_str) - : subword_str); - } - if constexpr (kGetOffsets) { - // Record the offsets relative to the start of the whole text. - output_start_offsets->push_back(input_word_offset_in_text + - cur_offset_in_input_word); - output_end_offsets->push_back(input_word_offset_in_text + - cur_offset_in_input_word + - token_substr_length); - } - cur_offset_in_input_word += token_substr_length; - } -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -ABSL_ATTRIBUTE_ALWAYS_INLINE void -FastWordpieceTokenizer::HandleTheRemainingStringOnTriePath( - absl::string_view input_word, - int input_word_offset_in_text, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, - int& original_num_tokens, - int& cur_offset_in_input_word, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - if (cur_node.node_id == trie_utils::DartsCloneTrieWrapper::kRootNodeId) { - // We've seen an empty input word. Just return. - return; - } - // Try handling the special case where the entire input word happens to be the - // suffix indicator (e.g., "##") itself. - if (TryHandleTheInputWordBeingSuffixIndicatorItself<kGetPieces, kGetIds, - kGetOffsets>( - input_word, input_word_offset_in_text, cur_node, - cur_offset_in_input_word, original_num_tokens, output_pieces, - output_ids, output_start_offsets, output_end_offsets)) { - original_num_tokens = - GetCurrentOutputSize<kGetPieces>(output_pieces, output_ids); - return; - } - - // Handle the normal case because we need to collect the remaining tokens from - // the string represented by `cur_node` (i.e., on the trie path from the trie - // root to `cur_node`), or find out the word cannot be tokenized. - // - // See the example in the comments of this function in the header file. - // - // The tokenization is successful if and only if the entire string represented - // by `cur_node` can be segmented into consecutive matching tokens, resulting - // in the empty suffix string (e.g., "##"), which is represented by - // `trie_suffix_root_`. So we keep following the failure links and collecting - // failure pops tokens until we arrive at `trie_suffix_root_` or encounter a - // null failure link in the middle. - while (cur_node.node_id != config_->trie_suffix_root() && - cur_node.node_id != config_->trie_punct_failure_link_node()) { - if (!TryFollowFailureLinkAndCollectTokens<kGetPieces, kGetIds, kGetOffsets>( - input_word, input_word_offset_in_text, cur_offset_in_input_word, - cur_node, output_pieces, output_ids, output_start_offsets, - output_end_offsets)) { - // The remaining string cannot be tokenized, neither can the input word. - ResetOutputAppendUnknownToken<kGetPieces, kGetIds, kGetOffsets>( - input_word_offset_in_text, input_word.size(), original_num_tokens, - output_pieces, output_ids, output_start_offsets, output_end_offsets); - return; - } - } - // Arrive at `trie_suffix_root_`. - - // Update the `original_num_tokens`. - original_num_tokens = - GetCurrentOutputSize<kGetPieces>(output_pieces, output_ids); - - // Succeed and exit. -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -void FastWordpieceTokenizer::ResetOutputAppendUnknownToken( - int input_word_offset_in_text, - int input_size, - int& original_num_tokens, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - if constexpr (kGetPieces) { - output_pieces->resize(original_num_tokens + 1); - output_pieces->back() = config_->unk_token()->str(); - } - if constexpr (kGetIds) { - output_ids->resize(original_num_tokens + 1); - output_ids->back() = config_->unk_token_id(); - } - if constexpr (kGetOffsets) { - output_start_offsets->resize(original_num_tokens + 1); - output_start_offsets->back() = input_word_offset_in_text; - - output_end_offsets->resize(original_num_tokens + 1); - output_end_offsets->back() = input_word_offset_in_text + input_size; - } - - // Update `original_num_tokens` (since we have appended the "unk_token"). - ++original_num_tokens; -} - -template <bool kGetPieces, bool kGetIds, bool kGetOffsets> -ABSL_ATTRIBUTE_ALWAYS_INLINE bool -FastWordpieceTokenizer::TryHandleTheInputWordBeingSuffixIndicatorItself( - absl::string_view input_word, - int input_word_offset_in_text, - const trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, - int& cur_offset_in_input_word, - int original_num_tokens, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const { - // Handle the special case where the input word is the suffix indicator (e.g., - // "##") itself. This is because that, after all the characters of an input - // word were successfully processed, if we ended by standing at - // `trie_suffix_root_` but did not recognize any new tokens, it can only be - // the case that the word is the suffix indicator string (e.g., "##") itself. - // For this case we output the pre-computed result. - if (cur_node.node_id != config_->trie_suffix_root()) { - // The input word is not the suffix indicator itself. - return false; - } - int cur_num_tokens = - GetCurrentOutputSize<kGetPieces>(output_pieces, output_ids); - if (cur_num_tokens != original_num_tokens) { - // The input word is not the suffix indicator itself. - return false; - } - - // The input word is the suffix indicator itself. Next we handle two cases. - if (config_->precomputed_result_for_suffix_indicator()->size() == 1 && - fast_wordpiece_tokenizer_utils::GetTokenId( - config_->precomputed_result_for_suffix_indicator()->Get(0)) == - config_->unk_token_id()) { - // Case 1: The suffix indicator string cannot be tokenized but has to be - // mapped to unk_token. - ResetOutputAppendUnknownToken<kGetPieces, kGetIds, kGetOffsets>( - input_word_offset_in_text, input_word.size(), original_num_tokens, - output_pieces, output_ids, output_start_offsets, output_end_offsets); - return true; - } - - // Case 2: The suffix indicator can be tokenized normally. - for (int encoded_token_value : - *config_->precomputed_result_for_suffix_indicator()) { - AppendTokenToOutput<kGetPieces, kGetIds, kGetOffsets>( - input_word, input_word_offset_in_text, cur_offset_in_input_word, - encoded_token_value, output_pieces, output_ids, output_start_offsets, - output_end_offsets); - } - return true; -} -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h deleted file mode 100644 index 4ab48f55..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h +++ /dev/null
@@ -1,270 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_H_ - -#include <string> -#include <vector> - -#include "absl/status/statusor.h" -#include "absl/strings/str_cat.h" -#include "tensorflow_text/core/kernels/darts_clone_trie_wrapper.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_generated.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h" - -namespace tensorflow { -namespace text { - -// Applies WordPiece tokenization with an existing WordPiece vocabulary. -// -// Example: -// input = unaffable -// output = un ##aff ##able -// -// One important edge case is that if the input word contains a Unicode -// character that is not seen in the vocabulary, the entire word is mapped -// to the unknown token, which is "<unk>" by default. Otherwise, in the "worst" -// case, the word is split into characters. -// -// This is based on the WordPiece/Subword tokenizer from tensor2tensor. -// https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/text_encoder.py -class FastWordpieceTokenizer { - public: - // Creates an instance. - // - // Args: - // * config_flatbuffer: the pointer to the FastWordpieceTokenizerConfig - // flatbuffer, which is not owned by this instance and should be kept alive - // through the lifetime of the instance. - static absl::StatusOr<FastWordpieceTokenizer> Create( - const void* config_flatbuffer); - - // Tokenizes `input` into its word pieces (i.e., subword tokens) and - // appends the new tokens to the end of the outputs. - // When `config_->end_to_end() is `false`, `input` should be a single - // word (after pre-tokenization by whitespaces and/or punctuations). - // Otherwise, `input` should be general text consisting of potentially many - // words. - // - // The input should be UTF-8 but the tokenization is performed on Unicode - // codepoints. - // - // - // Args: - // * input: The UTF-8 string of an input. - // * output_pieces: The output tokens. - // * output_ids: The output token ids. - // * output_start_offsets: The start offsets of output tokens in the input - // text, in utf-8 bytes. - // * output_end_offsets: The end offsets of output tokens in the input - // text, in utf-8 bytes. - // * input_word_offset_in_text: The relative offset of the input word in - // the whole text. Only used when not using end-to-end tokenizer. - // Note: the start offsets are inclusive and the end offsets are exclusive. - void Tokenize(absl::string_view input, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets, - int input_word_offset_in_text = 0) const; - - // An override not returning `output_pieces`. - void Tokenize(absl::string_view input, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets, - int input_word_offset_in_text = 0) const; - - // An override only returning `output_ids`. - void Tokenize(absl::string_view input, - std::vector<int>* output_ids, - int input_word_offset_in_text = 0) const; - - // Detokenizes wordpiece ids into a vector of tokens. - absl::StatusOr<std::vector<std::string>> DetokenizeToTokens( - const absl::Span<const int> input) const; - - // Detokenizes wordpiece ids to a text. If the input string to the tokenizer - // is normalized and the tokenized wordpieces don't contain `<unk>`, the - // detokenized result of the tokenized wordpieces is the same as the original - // input text. - absl::StatusOr<std::string> Detokenize( - const absl::Span<const int> input) const; - - private: - // The actual implementation of `Tokenize` when configured for single words. - // - // The template parameters `kGetPieces`, `kGetIds', and `kGetOffsets` control - // which parts of the output we generate. At least one of `kGetPieces` and - // `kGetIds` should be true. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void TokenizeSingleWordImpl(absl::string_view input_word, - int input_word_offset_in_text, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // The actual implementation of `Tokenize` when configured for general texts. - // - // The work of this method is equivalent to first splitting `input_text` into - // words (by splitting on punctuation and whitespaces, and next running - // `TokenizeSingleWordImpl` on each word. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void TokenizeTextImpl(absl::string_view input_text, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // Try following the failure link to make the transition when trie matching - // fails. - // - // If f(node) (i.e., failure link) is not null, it does the following: - // (1) collects tokens F(node) (i.e., failure pops) and appends to the end of - // `output_ids`, `output_pieces`, and/or `output_start_offsets` and - // `output_end_offsets`, - // (2) moves `cur_offset_in_input_word` accordingly to pass the collected - // tokens when `kGetPieces=true` or `kGetOffsets=true`, in order to - // calculate the start/end offsets of tokens and to get the token - // strings. Otherwise, `cur_offset_in_input_word` is ignored. - // (3) transits `node` to f(node) following the failure link, - // (4) returns true. - // - // If f(node) is null, it does not change anything and returns false. - // - // Args: - // * cur_offset_in_input_word: The current offset in `input_word` that - // corresponds to the start offset of the tokens that are going to be - // collected in this function. This value is used if 'kGetPieces=true' or - // 'kGetOffsets=true', and when so, this value will be updated accordingly - // after the new word piece tokens have been appended to the output. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - bool TryFollowFailureLinkAndCollectTokens( - absl::string_view input_word, - int input_word_offset_in_text, - int& cur_offset_in_input_word, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& node, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // Appends a word piece token (represented by `encoded_token_value`) to the - // output. - // - // Args: - // * cur_offset_in_input_word: The current offset in `input_word` that - // corresponds to the start offset of the wordpiece token. This value - // is used if `kGetPieces=true` or `kGetOffsets=true`, and when so, this - // value will be updated accordingly after the wordpiece token has been - // appended to the output. - // * encoded_token_value: the encoded value of the word piece token to be - // appended. See EncodeToken() in fast_wordpiece_tokenizer_utils.h. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void AppendTokenToOutput(absl::string_view input_word, - int input_word_offset_in_text, - int& cur_offset_in_input_word, - int encoded_token_value, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // This method is called when the trie matching loop encounters a word - // boundary (e.g., the end-of-input). This method segments the remaining - // string on the trie path into pieces and appends them to the outputs. If - // that is not possible with the current vocabulary, this method resets the - // outputs and appends unk_token. - // - // Example 1: suppose the vocabulary is {ab, abcd}. If the input word is "ab", - // after matching "ab", we processed all input characters and now meets the - // end-of-input. Note that the string "ab" is stored on the trie path that we - // just traversed along. This function recognizes it as the token "ab" and - // puts the token into the output as expected. - // - // Example 2: for the same vocabulary {ab, abcd}, suppose the input word is - // "abc". After the trie matching loop, we matched "abc" and encountered the - // end-of-input. Now the string "abc" is stored on the trie path, which we - // haven't segmented into tokens yet. So this function closes it by trying to - // segment "abc" into tokens. It fails since the remaining string "abc" cannot - // be tokenized into tokens given the vocabulary. In this case, it resets the - // outputs and appends unk_token at the end as expected. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void HandleTheRemainingStringOnTriePath( - absl::string_view input_word, - int input_word_offset_in_text, - trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, - int& original_num_tokens, - int& cur_offset_in_input_word, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // Resets the output and appends unk_token. - // - // We call this method when we find that the input word cannot be tokenized. - // We clear all new tokens recognized so far and replace them with a single - // unk_token. - // - // Args: - // * input_word_offset_in_text: The offset of the current word in the - // input text. - // * input_size: The length of the current input word, in utf-8 bytes. - // * original_num_tokens: The original number of tokens in the output before - // we started the tokenization of the current input word. It is updated - // after this method. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - void ResetOutputAppendUnknownToken( - int input_word_offset_in_text, - int input_size, - int& original_num_tokens, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // Try handling the special case when the input word is the suffix indicator - // itself. If so, appends the precomputed result to output_pieces and - // output_ids, and returns true. Otherwise, it does nothing and returns false. - template <bool kGetPieces, bool kGetIds, bool kGetOffsets> - bool TryHandleTheInputWordBeingSuffixIndicatorItself( - absl::string_view input_word, - int input_word_offset_in_text, - const trie_utils::DartsCloneTrieWrapper::TraversalCursor& cur_node, - int& cur_offset_in_input_word, - int original_num_tokens, - std::vector<std::string>* output_pieces, - std::vector<int>* output_ids, - std::vector<int>* output_start_offsets, - std::vector<int>* output_end_offsets) const; - - // Returns the position (in bytes) immediately after the end of the word. - int SkipTheRemainingOfWordAndTrailingWhiteSpaces(absl::string_view input, - int& cur_pos) const; - - // Points to the FastWordpieceTokenizer config flatbuffer (not owned). - const FastWordpieceTokenizerConfig* config_ = nullptr; - - // A wrapper to access the trie encoded inside the flatbuffer that `config_` - // points to. - std::unique_ptr<trie_utils::DartsCloneTrieWrapper> trie_ = nullptr; -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.cc deleted file mode 100644 index 505f135..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.cc +++ /dev/null
@@ -1,31 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.h" - -#include "tensorflow/core/framework/op_kernel.h" - -namespace tensorflow { -namespace text { - -REGISTER_KERNEL_BUILDER(Name(FastWordpieceTokenizeWithOffsetsOpKernel::OpName()) - .Device(tensorflow::DEVICE_CPU), - FastWordpieceTokenizeWithOffsetsOpKernel); - -REGISTER_KERNEL_BUILDER(Name(FastWordpieceDetokenizeOpKernel::OpName()) - .Device(tensorflow::DEVICE_CPU), - FastWordpieceDetokenizeOpKernel); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.h deleted file mode 100644 index 4f33d786..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.h +++ /dev/null
@@ -1,39 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_H_ - -#include "tensorflow/lite/kernels/shim/tf_op_shim.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h" - -namespace tensorflow { -namespace text { - -class FastWordpieceTokenizeWithOffsetsOpKernel - : public tflite::shim::TfOpKernel<FastWordpieceTokenizeWithOffsetsOp> { - public: - using TfOpKernel::TfOpKernel; -}; - -class FastWordpieceDetokenizeOpKernel - : public tflite::shim::TfOpKernel<FastWordpieceDetokenizeOp> { - public: - using TfOpKernel::TfOpKernel; -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h deleted file mode 100644 index d71b9bc..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h +++ /dev/null
@@ -1,418 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_TEMPLATE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_TEMPLATE_H_ - -#include "absl/status/status.h" -#include "absl/strings/str_cat.h" -#include "tensorflow/lite/kernels/shim/op_kernel.h" -#include "tensorflow/lite/kernels/shim/status_macros.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h" - -namespace tensorflow { -namespace text { - -// See `kDoc` data member for the documentation on this op kernel. -// -// This template class can be instantiated into a kernel for either TF or -// TFLite. See -// https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels/shim -// for more info on how this works. -template <tflite::shim::Runtime Rt> -class FastWordpieceTokenizeWithOffsetsOp - : public tflite::shim::OpKernelShim<FastWordpieceTokenizeWithOffsetsOp, - Rt> { - private: - enum Inputs { kInputValues = 0, kWpModel }; - enum Outputs { - kOutputSubwords = 0, - kOutputIds, - kOutputRowSplits, - kStartValues, - kEndValues - }; - - using Shape = tflite::shim::Shape; - using typename tflite::shim::OpKernelShim<FastWordpieceTokenizeWithOffsetsOp, - Rt>::InitContext; - using typename tflite::shim::OpKernelShim<FastWordpieceTokenizeWithOffsetsOp, - Rt>::InvokeContext; - using typename tflite::shim::OpKernelShim<FastWordpieceTokenizeWithOffsetsOp, - Rt>::ShapeInferenceContext; - - public: - FastWordpieceTokenizeWithOffsetsOp() = default; - static const char kOpName[]; - static const char kDoc[]; - - // Attributes declaration (syntax: https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Attrs() { return {}; } - - // Input tensors declaration (syntax: - // https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Inputs(); - - // Output tensors declaration (syntax: - // https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Outputs(); - - // Initializes the op - absl::Status Init(InitContext* context) { return absl::OkStatus(); } - - // Runs the operation - absl::Status Invoke(InvokeContext* context); - - // Shape inference - static absl::Status ShapeInference(ShapeInferenceContext* c); -}; - -////////////////////////// Implementation - -template <tflite::shim::Runtime Rt> -std::vector<std::string> FastWordpieceTokenizeWithOffsetsOp<Rt>::Inputs() { - return {"input_values: string", "wp_model: uint8"}; -} - -template <tflite::shim::Runtime Rt> -std::vector<std::string> FastWordpieceTokenizeWithOffsetsOp<Rt>::Outputs() { - return {"output_subwords: string", "output_ids: int64", - "output_row_splits: int64", "start_values: int64", - "end_values: int64"}; -} - -template <tflite::shim::Runtime Rt> -absl::Status FastWordpieceTokenizeWithOffsetsOp<Rt>::Invoke( - InvokeContext* context) { - SH_ASSIGN_OR_RETURN(const auto input_values, context->GetInput(kInputValues)); - const auto& values_vec = input_values->template As<tstring, 1>(); - - SH_ASSIGN_OR_RETURN(const auto wp_model, context->GetInput(kWpModel)); - // OK to create on every call because FastWordpieceTokenizer is a - // lightweight, memory-mapped wrapper on `wp_model` tensor, and thus - // Create() is very cheap. - auto fast_wordpiece_tokenizer = - ::tensorflow::text::FastWordpieceTokenizer::Create( - wp_model->template Data<uint8>().data()); - SH_RETURN_IF_ERROR(fast_wordpiece_tokenizer.status()); - - // TODO(xysong): Optimize based on which information below is requested. - std::vector<std::string> subwords; - std::vector<int> subword_ids; - std::vector<int> begin_offset; - std::vector<int> end_offset; - std::vector<int> row_splits; - - row_splits.push_back(0); - - // Iterate through all the values and wordpiece tokenize them. - for (int i = 0; i < values_vec.Dim(0); ++i) { - // Tokenize into subwords and record the offset locations. - const int original_num_wordpieces = subwords.size(); - fast_wordpiece_tokenizer->Tokenize(values_vec(i), &subwords, &subword_ids, - &begin_offset, &end_offset); - const int delta_num_wordpieces = subwords.size() - original_num_wordpieces; - - // Record the row splits. - row_splits.push_back(delta_num_wordpieces + row_splits.back()); - } - - const int subwords_size = subwords.size(); - SH_ASSIGN_OR_RETURN( - auto output_subwords, - context->GetOutput(kOutputSubwords, Shape({subwords_size}))); - auto output_subwords_vec = - output_subwords->template As<tensorflow::tstring, 1>(); - - SH_ASSIGN_OR_RETURN( - auto output_ids, - context->GetOutput( - kOutputIds, - Shape({static_cast<int>( - subword_ids.size())}))); /* same shape as `output_subwords` */ - auto output_ids_vec = output_ids->template As<int64, 1>(); - - SH_ASSIGN_OR_RETURN( - auto output_row_splits, - context->GetOutput(kOutputRowSplits, - Shape({static_cast<int>(row_splits.size())}))); - auto output_row_splits_vec = output_row_splits->template As<int64, 1>(); - - SH_ASSIGN_OR_RETURN(auto start_values, - context->GetOutput(kStartValues, Shape({subwords_size}))); - auto start_values_vec = start_values->template As<int64, 1>(); - - SH_ASSIGN_OR_RETURN(auto end_values, - context->GetOutput(kEndValues, Shape({subwords_size}))); - auto end_values_vec = end_values->template As<int64, 1>(); - - for (int i = 0; i < subwords.size(); ++i) { - output_subwords_vec(i) = subwords[i]; - } - - for (int i = 0; i < subword_ids.size(); ++i) { - output_ids_vec(i) = subword_ids[i]; - } - - for (int i = 0; i < row_splits.size(); ++i) { - output_row_splits_vec(i) = row_splits[i]; - } - - for (int i = 0; i < begin_offset.size(); ++i) { - start_values_vec(i) = begin_offset[i]; - } - - for (int i = 0; i < end_offset.size(); ++i) { - end_values_vec(i) = end_offset[i]; - } - - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -absl::Status FastWordpieceTokenizeWithOffsetsOp<Rt>::ShapeInference( - ShapeInferenceContext* c) { - using tflite::shim::Shape; - SH_ASSIGN_OR_RETURN(const Shape input_values_shape, - c->GetInputShape(kInputValues)); - SH_ASSIGN_OR_RETURN(const auto wp_model_shape, c->GetInputShape(kWpModel)); - const auto rank_1_shape = Shape({Shape::kUnknownDim}); - // TODO(b/204148042): Compatible & ToString are not exported by TF - /*if (!input_values_shape.Compatible(rank_1_shape)) { - return absl::FailedPreconditionError( - absl::StrCat("Shape must be rank 1: ", input_values_shape.ToString())); - } - if (!wp_model_shape.Compatible(rank_1_shape)) { - return absl::FailedPreconditionError( - absl::StrCat("Shape must be rank 1: ", wp_model_shape.ToString())); - }*/ - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputSubwords, rank_1_shape)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputIds, rank_1_shape)); - // row splits size - const int num_splits = Shape::AddDims(1, input_values_shape.Dim(0)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputRowSplits, Shape({num_splits}))); - SH_RETURN_IF_ERROR(c->SetOutputShape(kStartValues, rank_1_shape)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kEndValues, rank_1_shape)); - - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -const char FastWordpieceTokenizeWithOffsetsOp<Rt>::kOpName[] = - "FastWordpieceTokenizeWithOffsets"; - -template <tflite::shim::Runtime Rt> -const char FastWordpieceTokenizeWithOffsetsOp<Rt>::kDoc[] = R"doc( - Tokenizes tokens into sub-word pieces based off of a vocabulary using the fast - linear WordPiece algorithm. - - `wordpiece_tokenize_with_offsets` returns the relative offsets. - - ### Example: - - ```python - >>> tokens = ['don', '\'t', 'treadness'] - >>> wordpiece, ids, row_splits, start, end = ( - ... fast_wordpiece_tokenize_with_offsets(tokens, model_buffer)) - >>> RaggedTensor.from_row_splits(wordpiece, row_splits) - [['don', '\'', 't'], ['tread', '##ness']] - >>> RaggedTensor.from_row_splits(ids, row_splits) - [[0, 1, 2], [3, 4]] # Dummy ids. - >>> RaggedTensor.from_row_splits(start, row_splits) - start = [[[0, 3, 4], [0, 5]]] - >>> RaggedTensor.from_row_splits(end, row_splits) - end = [[[3, 4, 5], [5, 10]]] - ``` - - Args: - input_values: 1D Tensor of strings to tokenize with. - wp_model: Buffer tensor for the FastWordpieceTokenizerConfig flatbuffer. - - Returns: - * output_values: 1D tensor containing the wordpieces for all input strings. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * output_ids: 1D tensor containing the wordpiece ids for all input strings. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * output_row_splits: 1D int tensor with the row splits that allow us to - build RaggedTensors from output_values, output_ids, start_values, and - end_values. - * start_values: 1D tensor containing the inclusive start byte offset for - each wordpiece in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * end_values: 1D tensor containing the exclusive end byte offset for - each wordpiece in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_splits. -)doc"; - -// See `kDoc` data member for the documentation on this op kernel. -// -// This template class can be instantiated into a kernel for either TF or -// TFLite. See -// https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/kernels/shim -// for more info on how this works. -template <tflite::shim::Runtime Rt> -class FastWordpieceDetokenizeOp - : public tflite::shim::OpKernelShim<FastWordpieceDetokenizeOp, Rt> { - private: - enum Inputs { kInputValues = 0, kInputRowSplits, kWpModel }; - enum Outputs { kOutputWords = 0 }; - - using Shape = tflite::shim::Shape; - using typename tflite::shim::OpKernelShim<FastWordpieceDetokenizeOp, - Rt>::InitContext; - using typename tflite::shim::OpKernelShim<FastWordpieceDetokenizeOp, - Rt>::InvokeContext; - using typename tflite::shim::OpKernelShim<FastWordpieceDetokenizeOp, - Rt>::ShapeInferenceContext; - - public: - FastWordpieceDetokenizeOp() = default; - static const char kOpName[]; - static const char kDoc[]; - - // Attributes declaration (syntax: https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Attrs() { return {}; } - - // Input tensors declaration (syntax: - // https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Inputs(); - - // Output tensors declaration (syntax: - // https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Outputs(); - - // Initializes the op - absl::Status Init(InitContext* context) { return absl::OkStatus(); } - - // Runs the operation - absl::Status Invoke(InvokeContext* context); - - // Shape inference - static absl::Status ShapeInference(ShapeInferenceContext* c); -}; - -////////////////////////// Implementation - -template <tflite::shim::Runtime Rt> -std::vector<std::string> FastWordpieceDetokenizeOp<Rt>::Inputs() { - return {"input_values: int32", "input_row_splits: int64", "wp_model: uint8"}; -} - -template <tflite::shim::Runtime Rt> -std::vector<std::string> FastWordpieceDetokenizeOp<Rt>::Outputs() { - return {"output_words: string"}; -} - -template <tflite::shim::Runtime Rt> -absl::Status FastWordpieceDetokenizeOp<Rt>::Invoke(InvokeContext* context) { - SH_ASSIGN_OR_RETURN(const auto input_values, context->GetInput(kInputValues)); - const auto& values_vec = input_values->template As<int, 1>(); - - SH_ASSIGN_OR_RETURN(const auto input_row_splits, - context->GetInput(kInputRowSplits)); - const auto& row_splits_vec = input_row_splits->template As<int64, 1>(); - - SH_ASSIGN_OR_RETURN(const auto wp_model, context->GetInput(kWpModel)); - // OK to create on every call because FastWordpieceTokenizer is a - // lightweight, memory-mapped wrapper on `wp_model` tensor, and thus - // Create() is very cheap. - auto fast_wordpiece_tokenizer = - ::tensorflow::text::FastWordpieceTokenizer::Create( - wp_model->template Data<uint8>().data()); - SH_RETURN_IF_ERROR(fast_wordpiece_tokenizer.status()); - - std::vector<std::string> sentences; - - // Iterate through row_splits to split input_values. - for (int i = 0; i < row_splits_vec.Dim(0) - 1; ++i) { - auto single_input = - absl::Span<const int>(values_vec.Ptr() + row_splits_vec(i), - row_splits_vec(i + 1) - row_splits_vec(i)); - SH_ASSIGN_OR_RETURN(auto sentence, - fast_wordpiece_tokenizer->Detokenize(single_input)); - sentences.push_back(sentence); - } - - const int words_size = sentences.size(); - SH_ASSIGN_OR_RETURN(auto output_words, - context->GetOutput(kOutputWords, Shape({words_size}))); - auto output_words_vec = output_words->template As<tensorflow::tstring, 1>(); - - for (int i = 0; i < words_size; ++i) { - output_words_vec(i) = sentences[i]; - } - - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -absl::Status FastWordpieceDetokenizeOp<Rt>::ShapeInference( - ShapeInferenceContext* c) { - using tflite::shim::Shape; - SH_ASSIGN_OR_RETURN(const Shape input_values_shape, - c->GetInputShape(kInputValues)); - SH_ASSIGN_OR_RETURN(const Shape input_row_splits_shape, - c->GetInputShape(kInputRowSplits)); - SH_ASSIGN_OR_RETURN(const auto wp_model_shape, c->GetInputShape(kWpModel)); - const auto rank_1_shape = Shape({Shape::kUnknownDim}); - // TODO(b/204148042): Compatible & ToString are not exported by TF - /*if (!input_values_shape.Compatible(rank_1_shape)) { - return absl::FailedPreconditionError( - absl::StrCat("Shape must be rank 1: ", input_values_shape.ToString())); - } - if (!input_row_splits_shape.Compatible(rank_1_shape)) { - return absl::FailedPreconditionError(absl::StrCat( - "Shape must be rank 1: ", input_row_splits_shape.ToString())); - } - if (!wp_model_shape.Compatible(rank_1_shape)) { - return absl::FailedPreconditionError( - absl::StrCat("Shape must be rank 1: ", wp_model_shape.ToString())); - }*/ - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputWords, rank_1_shape)); - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -const char FastWordpieceDetokenizeOp<Rt>::kOpName[] = - "TFText>FastWordpieceDetokenize"; - -template <tflite::shim::Runtime Rt> -const char FastWordpieceDetokenizeOp<Rt>::kDoc[] = R"doc( - Detokenizes sub-word ids into sentences. - - ### Example: - - ```python - >>> # Vocab of the model_buffer: ['a', 'ab', '##c', 'abc', '##d']. - >>> wordpiece_ids = [0, 1, 2, 3, 4] - >>> row_splits = [0, 3, 5] - >>> tokens = fast_wordpiece_tokenizer_detokenize(tokens, row_splits, model_buffer) - >>> tokens - ['a abc', 'abcd'] - ``` - - Args: - input_values: 1D Tensor of sub-word ids. - input_row_splits: 1D Tensor of row splits that denotes the boundary of each - sentence in the `input_values`. - wp_model: Buffer tensor for the FastWordpieceTokenizerConfig flatbuffer. - - Returns: - * output_values: 1D tensor containing all the sentences. -)doc"; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_KERNEL_TEMPLATE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model.fbs b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model.fbs deleted file mode 100644 index 3f508f6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model.fbs +++ /dev/null
@@ -1,68 +0,0 @@ -namespace tensorflow.text; - -struct FailureStruct { - // The failure link of node v, denoted as f(v). - failure_link: uint32; - - // The failure pops of node v, denoted as F(v). It is an encoded value of - // (offset, length) that represents a consecutive subarray in - // 'failure_pops_pool' (see FastWordpieceTokenizerConfig). - failure_pops_offset_length: uint32; -} - -table FastWordpieceTokenizerConfig { - // The trie data, in the format of darts_clone trie, as accepted by - // DartsCloneTrieWrapper::Create(). - trie_array: [uint32]; - - // The array of the failure structures. - failure_struct_array: [FailureStruct]; - - // The array holding the failure pops. - failure_pops_pool: [int]; - - // The trie suffix root node id. - trie_suffix_root: uint32; - - // Max size of the input token. If the input length is longer than this, it - // will be mapped to unk_token. - max_bytes_per_token: int; - - // Characters prepended to a wordpiece to indicate that it is a suffix to - // another subword, such as "##". - suffix_indicator: string; - - // The unknown token string. - unk_token: string; - - // The unkown token id. - unk_token_id: int; - - // The precomputed result for the input being the suffix indicator itself. - precomputed_result_for_suffix_indicator: [int]; - - // The node id of every punctuation's failure link. It is only used when - // end_to_end=true. - trie_punct_failure_link_node: uint32; - - // Whether to build end-to-end tokenizer for tokenizing general texts (as - // opposed to splitted single words). When it is true, the input text is first - // split into words on "punctuation"/whitespaces, and each word is further - // tokenized into subwords. - // Note that our definition of "punctuation" includes some special Chinese - // characters for compatibility with Bert. More details are available in - // `fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar`. - end_to_end: bool; - - // Whether the tokenizer supports detokenization function. - support_detokenization: bool; - - // WordPiece Vocabulary. Note that we remove suffix indicator from suffix - // tokens for saving space. - vocab_array: [string]; - - // Whether the corresponding token in the vocab_array is a suffix token. - vocab_is_suffix_array: [bool]; -} - -root_type FastWordpieceTokenizerConfig;
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc deleted file mode 100644 index 0f4d2135..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.cc +++ /dev/null
@@ -1,974 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h" - -#include <stdint.h> - -#include <memory> -#include <queue> -#include <stack> - -#include "absl/container/flat_hash_map.h" -#include "absl/container/flat_hash_set.h" -#include "absl/status/status.h" -#include "absl/strings/match.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_split.h" -#include "absl/strings/string_view.h" -#include "absl/strings/strip.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "tensorflow/lite/kernels/shim/status_macros.h" -#include "tensorflow_text/core/kernels/darts_clone_trie_builder.h" -#include "tensorflow_text/core/kernels/darts_clone_trie_wrapper.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_generated.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h" -#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" -#include "tensorflow_text/core/kernels/wordpiece_tokenizer.h" - -namespace tensorflow { -namespace text { -namespace { - -// A Unicode control char that never appears in the input as it is filtered -// during text normalization. It is used to build dummy nodes in the trie. -static constexpr char kInvalidControlChar = 0x11; - -// A wrapper of vocab tokens that will be used to build the trie. -class TrieVocabToken { - public: - TrieVocabToken(absl::string_view token, - int token_id, - absl::string_view suffix_indicator) - : token_(std::string(token)), token_id_(token_id) { - if (!suffix_indicator.empty() && token_ != suffix_indicator && - absl::StartsWith(token_, suffix_indicator)) { - is_suffix_token_ = true; - actual_token_start_offset_ = suffix_indicator.size(); - } - // Iterate over the Unicode chars from the token, to initialize - // contains_punctuation_ and actual_token_unicode_len_. - int token_len = token.size(); - int cur_pos = actual_token_start_offset_; - UChar32 c; - while (cur_pos < token_len) { - U8_NEXT(token, cur_pos, token_len, c); - if (!contains_punctuation_ && - fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar(c)) { - contains_punctuation_ = true; - } - ++actual_token_unicode_len_; - } - } - - absl::string_view Token() const { return token_; } - - int TokenId() const { return token_id_; } - - bool IsSuffixToken() const { return is_suffix_token_; } - - bool ContainsPunctuation() const { return contains_punctuation_; } - - int TokenUnicodeLengthWithoutSuffixIndicator() const { - return actual_token_unicode_len_; - } - - int TokenLengthWithoutSuffixIndicator() const { - return token_.size() - actual_token_start_offset_; - } - - private: - std::string token_; - - int token_id_ = -1; - - // By design, `is_suffix_token_`=false for the suffix indicator (e.g., "##") - // itself. - bool is_suffix_token_ = false; - - // The starting offset of the token string in `token_` without the suffix - // indicator. By design, `actual_token_start_offset_`=0 for the suffix - // indicator (e.g., "##") itself. - int actual_token_start_offset_ = 0; - - // Length of the actual token string in Unicode character. - int actual_token_unicode_len_ = 0; - - // True when the actual token string contains punctuation, e.g. "test.x", - // "##.", ".test", "...", "!", etc. - bool contains_punctuation_ = false; -}; - -// The failure struct to store failure links and failure pops. -struct FailureStruct { - // The failure link, denoted as f(v), of each node v. - // - // Null node is represented by fast_wordpiece_tokenizer_utils::kNullNode. - uint32_t failure_link = fast_wordpiece_tokenizer_utils::kNullNode; - - // The failure pop list, denoted as F(v), of a node v. - // - // It is stored as a pair of offset and length that represents a continuous - // vector in `failure_pops_pool_`. This pair is encoded using - // EncodeFailurePopList() in fast_wordpiece_tokenizer_utils.h. - uint32_t failure_pops_offset_length = - fast_wordpiece_tokenizer_utils::kNullFailurePopsList; -}; - -// An implementation of WordpieceVocab, used (1) to store the input vocabulary -// and (2) to call the original implementation of WordPiece tokenization to -// pre-compute the result for the suffix indicator string. -class StringVocab : public WordpieceVocab { - public: - explicit StringVocab(const std::vector<std::string>& vocab) : vocab_(vocab) { - for (int i = 0; i < vocab.size(); ++i) { - index_map_[vocab_[i]] = i; - } - } - - LookupStatus Contains(absl::string_view key, bool* value) const override { - *value = index_map_.contains(key); - return LookupStatus(); - } - - absl::optional<int> LookupId(absl::string_view key) const { - auto it = index_map_.find(key); - if (it == index_map_.end()) { - return absl::nullopt; - } else { - return it->second; - } - } - - // Returns the key of `vocab_id` or empty if `vocab_id` is not valid. - absl::optional<absl::string_view> LookupWord(int vocab_id) const { - if (vocab_id >= vocab_.size() || vocab_id < 0) { - return absl::nullopt; - } - return vocab_[vocab_id]; - } - - int Size() const { return index_map_.size(); } - - private: - std::vector<std::string> vocab_; - absl::flat_hash_map<absl::string_view, int> index_map_; -}; - -// Builds the FastWordpieceTokenizer model. -class FastWordpieceBuilder { - public: - // When no_pretokenization is false, we split the input string by punctuation - // chars (in addition to whitespaces) and then tokenize it to wordpieces. - absl::Status BuildModel(const std::vector<std::string>& vocab, - int max_bytes_per_token, - absl::string_view suffix_indicator, - absl::string_view unk_token, - bool no_pretokenization, - bool support_detokenization); - - absl::StatusOr<std::string> ExportToFlatBuffer() const; - - private: - absl::StatusOr<std::vector<TrieVocabToken>> PrepareVocabTokensToBuildTrie(); - - absl::Status ConstructTrie( - const std::vector<TrieVocabToken>& tokens_to_build_trie); - - absl::Status BuildFailureStructure( - const std::vector<TrieVocabToken>& tokens_to_build_trie); - - // Builds the set of outgoing edge labels for each trie node and returns a - // mapping (node_id -> set<char>). Used in BuildFailureStructure(). - absl::StatusOr<std::vector<absl::flat_hash_set<char>>> - BuildOutgoingEdgeLabelsForTrie( - const std::vector<TrieVocabToken>& tokens_to_build_trie); - - // Builds the set of outgoing edge labels for nodes along the trie path of - // `vocab_token`. Used in BuildOutgoingEdgeLabelsForTrie(). - absl::Status BuildOutgoingEdgeLabelsAlongVocabToken( - const TrieVocabToken& vocab_token, - std::vector<absl::flat_hash_set<char>>& node_outgoing_edge_labels); - - // Assigns failure link f(cur_node) to `failure_link` and populates failure - // pops F(cur_node) (based on `one_step_pops` and - // `parent_failure_pops_offset_length`). - absl::Status AssignFailureLinkAndPops(uint32_t cur_node, - uint32_t failure_link, - const std::vector<int>& one_step_pops, - int parent_failure_pops_offset_length); - - // If `failure_pops_offset_length` encodes a valid failure pop list, appends - // the failure pop list to the end of `out_failure_pops`. Otherwise, does - // nothing. - void GetFailurePopsAndAppendToOut(uint32_t failure_pops_offset_length, - std::vector<int>& out_failure_pops); - - absl::Status PrecomputeResultForSuffixIndicator(); - - inline void BreakTrieLinkFromParentToChild(uint32_t child_node_id) { - // In trie, the least significant 8 bits encode the label of the trie link - // from the parent to the node itself. - // - // Reference: - // https://github.com/s-yata/darts-clone/blob/e40ce4627526985a7767444b6ed6893ab6ff8983/include/darts.h#L65-L70. - // - // For example, if there is a trie link `u` -> `v` with label (say) 'a' - // (ASCII 97 or 0x61), then the least significant 8 bits of node `v` will be - // 0x61. By erasing its least significant 8 bits to 0, it effectively - // prevents the node from being reachable from its parent, i.e. breaking the - // trie link from the parent to the node itself. - trie_array_[child_node_id] &= 0xFFFFFF00; - } - - inline void EraseValueOfNode(uint32_t node_id) { - // In trie, the 9th least significant bit of a node's value marks whether - // the node has a leaf node (i.e., having a value stored on the node). - // - // Reference: - // https://github.com/s-yata/darts-clone/blob/e40ce4627526985a7767444b6ed6893ab6ff8983/include/darts.h#L54-L58 - // - // By setting the 9th least significant bit to 0, it effectively erases any - // value (i.e., token id in our case) associated with the node. - trie_array_[node_id] &= 0xFFFFFEFF; - } - - absl::optional<StringVocab> vocab_; - - int max_bytes_per_token_ = -1; - - std::string suffix_indicator_; - - std::string unk_token_; - - int unk_token_id_ = -1; - - // A wrapper to access the trie encoded by `trie_array_`. - absl::optional<trie_utils::DartsCloneTrieWrapper> trie_; - - // The actual data of the trie. - std::vector<uint32_t> trie_array_; - - // The "suffix_root" node on the trie whose trie path (from the root to the - // node) is the suffix indicator string. - uint32_t trie_suffix_root_ = fast_wordpiece_tokenizer_utils::kNullNode; - - // The dummy node to serve as the failure link of punctuation nodes. - uint32_t trie_punct_failure_link_node_ = - fast_wordpiece_tokenizer_utils::kNullNode; - - // Whether to build the end-to-end tokenizer that tokenizes general texts. - // When set to false, it splits the input on punctuation/whitespace and treat - // each punctuation as an independent word. - bool no_pretokenization_; - - // Whether the tokenizer supports the detokenization function. - bool support_detokenization_; - - std::vector<FailureStruct> failure_struct_array_; - - // Each element in the failure pops pool is an encoded vocab token. - // See EncodeToken() in fast_wordpiece_tokenizer_utils.h. - std::vector<int> failure_pops_pool_; - - // The precomputed result for the suffix indicator. Each element in the - // failure pops pool is an encoded vocab token. See EncodeToken() in - // fast_wordpiece_tokenizer_utils.h. - std::vector<int> precomputed_result_for_suffix_indicator_; - - // The mapping from node id to whether the corresponding token is a - // punctuation char. - absl::flat_hash_map<uint32_t, bool> node_id_is_punc_map_; -}; - -absl::Status FastWordpieceBuilder::BuildModel( - const std::vector<std::string>& vocab, - int max_bytes_per_token, - absl::string_view suffix_indicator, - absl::string_view unk_token, - bool no_pretokenization, - bool support_detokenization) { - unk_token_ = std::string(unk_token); - suffix_indicator_ = std::string(suffix_indicator); - max_bytes_per_token_ = max_bytes_per_token; - no_pretokenization_ = no_pretokenization; - support_detokenization_ = support_detokenization; - - vocab_.emplace(vocab); - if (vocab_->Size() != vocab.size()) { - return absl::FailedPreconditionError( - "Tokens in the vocabulary must be unique."); - } - - // Determine `unk_token_id_`. - const absl::optional<int> unk_token_id = vocab_->LookupId(unk_token_); - if (!unk_token_id.has_value()) { - return absl::FailedPreconditionError("Cannot find unk_token in the vocab!"); - } - unk_token_id_ = *unk_token_id; - - // Construct the trie and the failure structure. - SH_ASSIGN_OR_RETURN(auto tokens_to_build_trie, - PrepareVocabTokensToBuildTrie()); - SH_RETURN_IF_ERROR(ConstructTrie(tokens_to_build_trie)); - SH_RETURN_IF_ERROR(BuildFailureStructure(tokens_to_build_trie)); - - // Precompute the result when the input is the suffix indicator string itself. - SH_RETURN_IF_ERROR(PrecomputeResultForSuffixIndicator()); - - return absl::OkStatus(); -} - -absl::StatusOr<std::vector<TrieVocabToken>> -FastWordpieceBuilder::PrepareVocabTokensToBuildTrie() { - // To simplify the inference (fewer corner cases), - // * We ensure that `trie_suffix_root_` is always available on the trie. - // * We ensure that `trie_suffix_root_` does not have data (i.e., the suffix - // indicator string is not in the set of the keys of the trie). - // * We don't actually add the end-of-input symbol "$" but use an alternative - // logic. See FastWordpieceTokenizer::HandleTheRemainingStringOnTriePath(). - - if (vocab_->Size() > fast_wordpiece_tokenizer_utils::kMaxSupportedVocabSize) { - return absl::FailedPreconditionError( - absl::StrCat("Vocab size exceeds the max supported (", - fast_wordpiece_tokenizer_utils::kMaxSupportedVocabSize, - "). Found vocab size: ", vocab_->Size(), ".")); - } - - // Collect a subset of tokens (and variations) to build the trie. - std::vector<TrieVocabToken> tokens_to_build_trie; - tokens_to_build_trie.reserve(vocab_->Size()); - for (int token_id = 0; token_id < vocab_->Size(); ++token_id) { - const absl::optional<absl::string_view> word = vocab_->LookupWord(token_id); - if (!word.has_value()) { - return absl::FailedPreconditionError( - "Impossible. `token_id` is definitely within the range of vocab " - "token ids; hence LookupWord() should always succeed."); - } - if (word->empty()) { - // It does not make sense to add the empty string "" to the vocabulary. In - // addition, darts_clone does not allow an empty Trie key. - // - // We allow this only for compatibility with the original Wordpiece - // algorithm. - LOG(WARNING) - << "The empty string is found in the vocabulary, which takes place " - "in the token id space but will never be used in the result. " - "Consider cleaning it from the vocabulary."; - continue; - } - if (*word == suffix_indicator_) { - // In real-life cases, no need to add the suffix indicator string (e.g., - // "##") to the vocabulary. - // - // We allow this only for compatibility with the original Wordpiece - // algorithm. - LOG(WARNING) - << "The empty suffix token is found in the vocabulary, which takes " - "place in token id space but will (almost) never be used in the " - "result. Consider cleaning it from the vocabulary."; - - // The token id of the suffix indicator is used only when the input is - // the suffix indicator itself. That case is handled elsewhere, in - // PrecomputeResultForSuffixIndicator(). - // - // Therefore, we don't insert the suffix indicator string as a key into - // the trie. As a result, `trie_suffix_root_` node will never have data. - - continue; - } - TrieVocabToken vocab_token(*word, token_id, suffix_indicator_); - if (vocab_token.TokenLengthWithoutSuffixIndicator() > - fast_wordpiece_tokenizer_utils::kMaxVocabTokenLengthInUTF8Bytes) { - return absl::FailedPreconditionError(absl::StrCat( - "Vocab token utf8 length (excluding suffix indicator) exceeds the " - "max supported (", - fast_wordpiece_tokenizer_utils::kMaxVocabTokenLengthInUTF8Bytes, - "). The vocab token is: ", *word, - " with utf8 length (excluding suffix indicator): ", - vocab_token.TokenLengthWithoutSuffixIndicator(), ".")); - } - // Skip word that contains punctuation but is not a punctuation itself. - // <unk>, <pad>, ##. are skipped in this step. - if (!no_pretokenization_ && vocab_token.ContainsPunctuation() && - (vocab_token.TokenUnicodeLengthWithoutSuffixIndicator() > 1 || - vocab_token.IsSuffixToken())) { - continue; - } - tokens_to_build_trie.emplace_back(vocab_token); - } - - if (tokens_to_build_trie.empty()) { - return absl::FailedPreconditionError( - "No valid vocab tokens were found to build the trie."); - } - if (!suffix_indicator_.empty()) { - const bool suffix_token_exists = std::any_of( - tokens_to_build_trie.begin(), tokens_to_build_trie.end(), - [](const TrieVocabToken& token) { return token.IsSuffixToken(); }); - if (!suffix_token_exists) { - // No suffix tokens in the vocab. That would lead to no trie node for - // the suffix indicator, which creates corner cases in the inference. - // To prevent that, we add a dummy suffix token, e.g., "##" + - // kInvalidControlChar (if the suffix indicator is "##"), which is never - // matched during inference. - tokens_to_build_trie.emplace_back(TrieVocabToken( - absl::StrCat(suffix_indicator_, std::string(1, kInvalidControlChar)), - unk_token_id_, suffix_indicator_)); - } - } - - if (!no_pretokenization_) { - // Special treatment for all Unicode punctuation chars that are not already - // in the trie. - // The maximum codepoint in Unicode is 0x0010FFFF. - for (UChar32 cp = 1; cp <= 0x0010FFFF; ++cp) { - if (!U_IS_UNICODE_CHAR(cp) || - !fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar(cp)) { - continue; - } - // Get the UTF8 encoding of the codepoint cp. - char buf[4]; - int len = 0; - U8_APPEND_UNSAFE(buf, len, cp); - absl::string_view buf_view(buf, len); - // Set the token id of punctuation chars that don't exist in the vocab as - // unk_token_id_. - if (!vocab_->LookupId(buf_view)) { - TrieVocabToken vocab_token(buf_view, unk_token_id_, suffix_indicator_); - tokens_to_build_trie.emplace_back(vocab_token); - } - } - // Insert a dummy node to serve as the failure link targets for punctuation - // nodes. - tokens_to_build_trie.emplace_back(TrieVocabToken( - std::string(1, kInvalidControlChar), unk_token_id_, suffix_indicator_)); - } - return tokens_to_build_trie; -} - -absl::Status FastWordpieceBuilder::ConstructTrie( - const std::vector<TrieVocabToken>& tokens_to_build_trie) { - std::vector<std::string> keys; - std::vector<int> values; - for (const TrieVocabToken& vocab_token : tokens_to_build_trie) { - keys.emplace_back(vocab_token.Token()); - SH_ASSIGN_OR_RETURN(int encoded_value, - fast_wordpiece_tokenizer_utils::EncodeToken( - vocab_token.TokenId(), - vocab_token.TokenLengthWithoutSuffixIndicator(), - vocab_token.IsSuffixToken())); - values.push_back(encoded_value); - } - SH_ASSIGN_OR_RETURN(trie_array_, - trie_utils::BuildDartsCloneTrie(keys, values)); - SH_ASSIGN_OR_RETURN( - trie_utils::DartsCloneTrieWrapper trie, - trie_utils::DartsCloneTrieWrapper::Create(trie_array_.data())); - trie_.emplace(std::move(trie)); - - if (trie_array_.size() > - fast_wordpiece_tokenizer_utils::kMaxSupportedTrieSize) { - return absl::FailedPreconditionError(absl::StrCat( - "Not supported since the constructed Darts trie size (", - trie_array_.size(), ") is greater than the maximum supported size (", - fast_wordpiece_tokenizer_utils::kMaxSupportedTrieSize, ").")); - } - - // Locate the trie suffix root. - auto node = trie_->CreateTraversalCursorPointToRoot(); - if (!trie_->TryTraverseSeveralSteps(node, suffix_indicator_)) { - return absl::FailedPreconditionError( - "Cannot locate trie_suffix_root_. This should never happen."); - } - trie_suffix_root_ = node.node_id; - - if (!no_pretokenization_) { - // Locate the dummy node for the failure link for punctuation nodes. - node = trie_->CreateTraversalCursorPointToRoot(); - if (!trie_->TryTraverseSeveralSteps(node, - std::string(1, kInvalidControlChar))) { - return absl::FailedPreconditionError( - "Cannot locate the dummy node for the failure link for punctuation " - "nodes. This should never happen."); - } - trie_punct_failure_link_node_ = node.node_id; - - // We make `trie_punct_failure_link_node_` a standalone dummy node. - EraseValueOfNode(trie_punct_failure_link_node_); - BreakTrieLinkFromParentToChild(trie_punct_failure_link_node_); - } - return absl::OkStatus(); -} - -absl::Status FastWordpieceBuilder::BuildOutgoingEdgeLabelsAlongVocabToken( - const TrieVocabToken& vocab_token, - std::vector<absl::flat_hash_set<char>>& node_outgoing_edge_labels) { - const absl::string_view token = vocab_token.Token(); - trie_utils::DartsCloneTrieWrapper::TraversalCursor cur_node; - int char_pos = 0; - trie_->SetTraversalCursor(cur_node, trie_->kRootNodeId); - while (char_pos < token.size()) { - const char edge_label = token[char_pos]; - node_outgoing_edge_labels[cur_node.node_id].insert(edge_label); - if (!trie_->TryTraverseOneStep(cur_node, edge_label)) { - // Should never happen, since we built trie using all of `vocab_token`. - return absl::FailedPreconditionError(absl::StrCat( - "Error in traversing to child following edge ", - absl::string_view(&edge_label, 1), " from the prefix ", - token.substr(0, char_pos), " at parent id ", cur_node.node_id, - ". The token is ", token, ". The char position is ", char_pos, ".")); - } - ++char_pos; - } - // Record whether the current node represents a punctuation char in the map. - node_id_is_punc_map_[cur_node.node_id] = - !vocab_token.IsSuffixToken() && vocab_token.ContainsPunctuation() && - vocab_token.TokenUnicodeLengthWithoutSuffixIndicator() == 1; - return absl::OkStatus(); -} - -absl::StatusOr<std::vector<absl::flat_hash_set<char>>> -FastWordpieceBuilder::BuildOutgoingEdgeLabelsForTrie( - const std::vector<TrieVocabToken>& tokens_to_build_trie) { - std::vector<absl::flat_hash_set<char>> node_outgoing_edge_labels( - trie_array_.size()); - const std::string dummy_token_for_trie_punct_failure_link_node = - std::string(1, kInvalidControlChar); - for (const TrieVocabToken& vocab_token : tokens_to_build_trie) { - if (vocab_token.Token() == dummy_token_for_trie_punct_failure_link_node) - continue; - SH_RETURN_IF_ERROR(BuildOutgoingEdgeLabelsAlongVocabToken( - vocab_token, node_outgoing_edge_labels)); - } - return node_outgoing_edge_labels; -} - -// Computes failure links and failure pops using BFS traversal. -absl::Status FastWordpieceBuilder::BuildFailureStructure( - const std::vector<TrieVocabToken>& tokens_to_build_trie) { - // Build the set of outgoing edge labels for each trie node (node_id -> - // set<char>). This is needed by BFS because darts-clone does not provide an - // API to enumerate the outgoing links for a node. - SH_ASSIGN_OR_RETURN( - std::vector<absl::flat_hash_set<char>> node_outgoing_edge_labels, - BuildOutgoingEdgeLabelsForTrie(tokens_to_build_trie)); - - failure_struct_array_.resize(trie_array_.size()); - // Initialize the BFS queue. - std::queue<uint32_t> bfs_queue({trie_->kRootNodeId}); - if (trie_suffix_root_ != trie_->kRootNodeId) { - // When `suffix_indicator_` is empty, `trie_suffix_root_` will collapse - // with root. In this case, we don't visit it twice. - // - // In addition, we have ensured that `trie_suffix_root_` will never be null. - // See PrepareVocabTokensToBuildTrie(). - bfs_queue.push(trie_suffix_root_); - } - - // The BFS loop. - while (!bfs_queue.empty()) { - uint32_t parent_id = bfs_queue.front(); - bfs_queue.pop(); - - // Explore the children of the parent node. - // - // Fix the iteration order of the outgoing edges to ensure that the model is - // always built in the same way (i.e., visiting nodes in the same order). - std::vector<char> outgoing_labels_sorted( - node_outgoing_edge_labels[parent_id].begin(), - node_outgoing_edge_labels[parent_id].end()); - std::sort(outgoing_labels_sorted.begin(), outgoing_labels_sorted.end()); - for (const char edge_label : outgoing_labels_sorted) { - auto child_node = trie_->CreateTraversalCursor(parent_id); - if (!trie_->TryTraverseOneStep(child_node, edge_label)) { - // Should never happen, due to how we built `node_outgoing_edge_labels`; - // see BuildOutgoingEdgeLabelsAlongVocabToken(). - return absl::FailedPreconditionError(absl::StrCat( - "Failed to traverse to child following edge ", - absl::string_view(&edge_label, 1), " at parent ", parent_id, ".")); - } - if (child_node.node_id == trie_suffix_root_) { - // Avoid visiting `trie_suffix_root_` twice. - continue; - } - - // For the child node v, compute failure link f(v) and failure pops F(v). - // - // In the comments below, str(v) is the string on the path from the trie - // root to the node v, and V is the vocabulary used to build the trie. - - int child_data_value = -1; - if (trie_->TryGetData(child_node, child_data_value)) { - uint32_t failure_link = trie_suffix_root_; - // Check whether the current node represents a punctuation char. - // Since the current node has data and thus corresponds to some token, - // it must be in the map `node_id_is_punc_map_` - if (!node_id_is_punc_map_.contains(child_node.node_id)) { - return absl::FailedPreconditionError( - "Failed to find if an end node in the trie is a punctuation char " - "in node_id_is_punc_map_. It should never happen."); - } - if (!no_pretokenization_ && - node_id_is_punc_map_.at(child_node.node_id)) { - // For end-to-end tokenizer, we set the failure link node of every - // punctuation char as a special node trie_punct_failure_link_node_ - // which is a dummy node (no parent, no descendants, failure link is - // null). Hence, by detecting the landing node, we know we just - // matched a punctuation char. We then split it as a single word. - failure_link = trie_punct_failure_link_node_; - } - // Case 1 (easy): str(v) is in V. Assume that during tokenization of a - // word, we reached node v, but can't continue further, because the - // current char from the input word does not match any of the edges - // outgoing from v. In that case, str(v) is already the max match, so - // it's the only wordpiece we add to the list of wordpieces we committed - // to. Hence, F(v) = [str(v)]. The next wordpiece from the current word - // is a suffix, so we move to node f(v) = trie_suffix_root_, which - // represents the suffix indicator (e.g., "##"), from where we continue - // the match process. In summary, we have: - // * f(v) = trie_suffix_root_. - // * F(v) = [str(v)]. - SH_RETURN_IF_ERROR(AssignFailureLinkAndPops( - /*cur_node=*/child_node.node_id, /*failure_link=*/failure_link, - /*one_step_pops=*/{child_data_value}, - /*parent_failure_pops_offset_length=*/ - fast_wordpiece_tokenizer_utils::kNullFailurePopsList)); - bfs_queue.push(child_node.node_id); - continue; - } - - // Case 2 (complex): str(v) is not in V. - // - // Consider the same scenario as in Case 1, where we can't continue - // further from v, but now, str(v) is not a valid wordpiece. Instead, - // we need to consider the wordpieces that the MaxMatch algorithm would - // generate for the beginning of str(v) (these wordpieces are stored in - // F(v)). f(v) (the state we transit to) should correspond to the trie - // node for the remaining suffix of str(v). - // - // We could compute F(v) and f(v) by running the original WordPiece - // algorithm. Instead, we do it even faster, by using F(u) and f(u) (the - // similar info for the parent node u). Intuitively F(v) consists of (1) - // the tokens from F(u) and (2) the possible tokens that the MaxMatch - // algorithm would generate for str(f(u)).c, where str(f(u)) is the suffix - // of str(u) not covered by the concatenation of the tokens from F(u), "." - // means concatenation, and c is the edge label character from u to v. - // - // - // Let u be the parent node, and c be the edge label from u to v. To - // compute f(v) and F(v), the loop below uses a node variable z (called - // `itr_node`) and a list G (called `one_steps_pops`). Initially, z is set - // to be f(u), and G is empty. - // 1. If z is null, f(v) will be null, too (see Note 2 below for what - // this means). We're done. - // 2. Check if there is a trie edge out of node z, for label c, leading - // to node goto(z, c). If so, set f(v) = goto(z,c) and F(v) = F(u) + G. - // We're done and break. - // 3. Otherwise, collect the pop tokens (by G = G + F(z)) and - // follows the failure link (by z = f(z)). - // 4. Goes to Step 1 and continue the loop. - // - // Note 1: processing node v depends on the info for nodes z that are - // closer to the root than v. Due to our use of the BFS traversal, that - // info is guaranteed to exist when we examine node v. - // - // Note 2: f(v) is null means that during the tokenization process of some - // input word, if the trie matching cannot continue at node v, there are - // no failure links that we can follow, and (it can be proved that in such - // a case) the input word can't be tokenized with the current vocab. - // - // For formal discussions and proofs, please refer to the academic paper - // https://arxiv.org/abs/2012.15524 - const FailureStruct& parent_fs = failure_struct_array_[parent_id]; - if (parent_fs.failure_link != fast_wordpiece_tokenizer_utils::kNullNode) { - std::vector<int> one_step_pops; - auto itr_node = trie_->CreateTraversalCursor(parent_fs.failure_link); - while (true) { - if (trie_->TryTraverseOneStep(itr_node, edge_label)) { - // Set the failure link and failure pops for `child_node`. - SH_RETURN_IF_ERROR(AssignFailureLinkAndPops( - /*cur_node=*/child_node.node_id, - /*failure_link=*/itr_node.node_id, one_step_pops, - parent_fs.failure_pops_offset_length)); - break; - } - const FailureStruct& itr_node_fs = - failure_struct_array_[itr_node.node_id]; - if (itr_node_fs.failure_link == - fast_wordpiece_tokenizer_utils::kNullNode) { - // Cannot follow anymore: failure link of `child_node` will be null. - break; - } - // Append the failure pops of `itr_node` to `one_step_pops`. - GetFailurePopsAndAppendToOut(itr_node_fs.failure_pops_offset_length, - one_step_pops); - // Follow the failure link. - trie_->SetTraversalCursor(itr_node, itr_node_fs.failure_link); - } - } - - bfs_queue.push(child_node.node_id); - } - } - - if (!no_pretokenization_ && !suffix_indicator_.empty()) { - // Rewire trie links along suffix_indicator_. - // If the suffix indicator contains a punctuation char, let `u`--(`c`)-->`v` - // be the first trie edge along the suffix indicator such that the edge - // label (i.e. `c`) is a punctuation char. Note that `u`, `v` are trie - // nodes. `c` is the edge label. We make the following change: - // - // Case 1: if `u` is the root, we remove the trie edge from `v` to its child - // along the suffix indicator. - // Case 2: if `u` is not the root, we remove the trie edge from `u` to `v`. - // - // Example 1: if suffix_indicator_ is "##" (as in BERT), we remove the trie - // link from "#" to "##". The goal here is to make sure we match the - // punctuation character "#" as a token by itself, without matching "##" - // (as we split by punctuation, "##" is not a valid token). - // Example 2: if suffix_indicator is "foo#", we remove the trie link from - // "foo" to "foo#". - int cur_pos = 0; - int next_pos = 0; - bool prev_node_id_is_root = false; - auto node = trie_->CreateTraversalCursorPointToRoot(); - UChar32 c; - int suffix_indicator_length = suffix_indicator_.size(); - while (cur_pos < suffix_indicator_length) { - next_pos = cur_pos; - U8_NEXT(suffix_indicator_, next_pos, suffix_indicator_length, c); - prev_node_id_is_root = (node.node_id == trie_->kRootNodeId); - absl::string_view cur_unicode_char(suffix_indicator_.data() + cur_pos, - next_pos - cur_pos); - if (!trie_->TryTraverseSeveralSteps(node, cur_unicode_char)) { - return absl::FailedPreconditionError( - "Cannot locate a character in suffix_indicator_. It should never " - "happen."); - } - if (fast_wordpiece_tokenizer_utils::IsPunctuationOrChineseChar(c)) { - // If the previous node is a root node, read the next char to break the - // link from the current punctuation char to its next child node. - if (prev_node_id_is_root) { - cur_pos = next_pos; - U8_FWD_1(suffix_indicator_, next_pos, suffix_indicator_length); - const absl::string_view next_unicode_char( - suffix_indicator_.data() + cur_pos, next_pos - cur_pos); - auto child_node = node; - if (!trie_->TryTraverseSeveralSteps(child_node, next_unicode_char)) { - return absl::FailedPreconditionError( - "Cannot locate a character in suffix_indicator_. It should " - "never happen."); - } - BreakTrieLinkFromParentToChild(child_node.node_id); - } else { - BreakTrieLinkFromParentToChild(node.node_id); - } - break; - } - cur_pos = next_pos; - } - } - return absl::OkStatus(); -} - -absl::Status FastWordpieceBuilder::AssignFailureLinkAndPops( - uint32_t cur_node, - uint32_t failure_link, - const std::vector<int>& one_step_pops, - int parent_failure_pops_offset_length) { - if (failure_link == fast_wordpiece_tokenizer_utils::kNullNode) { - return absl::OkStatus(); - } - FailureStruct& cur_node_fs = failure_struct_array_[cur_node]; - cur_node_fs.failure_link = failure_link; - - // Let v be `cur_node` and u be the parent node. - if (one_step_pops.empty()) { - // Case 1: F(v) = F(u). So we just share the same vector. - cur_node_fs.failure_pops_offset_length = parent_failure_pops_offset_length; - } else { - // Case 2: F(v) = F(u) + `one_step_pops`. We need to create a new vector and - // append to `failure_pops_pool_`. - const int failure_pops_offset = failure_pops_pool_.size(); - if (failure_pops_offset > - fast_wordpiece_tokenizer_utils::kMaxSupportedFailurePoolOffset) { - return absl::FailedPreconditionError(absl::StrCat( - "Failure pops list offset is ", failure_pops_offset, - ", which exceeds maximum supported offset ", - fast_wordpiece_tokenizer_utils::kMaxSupportedFailurePoolOffset, - ". The vocabulary seems to be too large to be supported.")); - } - // First copy F(u). - GetFailurePopsAndAppendToOut(parent_failure_pops_offset_length, - failure_pops_pool_); - // Then append `one_step_pops`. - failure_pops_pool_.insert(failure_pops_pool_.end(), one_step_pops.begin(), - one_step_pops.end()); - const int failure_pops_length = - failure_pops_pool_.size() - failure_pops_offset; - if (failure_pops_length > - fast_wordpiece_tokenizer_utils::kMaxFailurePopsListSize) { - // This should not happen, because `kBitsToEncodeFailurePopsListSize` is - // set to be less than or equal to `kBitsToEncodeVocabTokenLength` (see - // fast_wordpiece_tokenizer_utils.h). - return absl::FailedPreconditionError(absl::StrCat( - "Failure pops list size is ", failure_pops_length, - ", which exceeds maximum supported size ", - fast_wordpiece_tokenizer_utils::kMaxFailurePopsListSize, ".")); - } - - cur_node_fs.failure_pops_offset_length = - fast_wordpiece_tokenizer_utils::EncodeFailurePopList( - failure_pops_offset, failure_pops_length); - } - return absl::OkStatus(); -} - -void FastWordpieceBuilder::GetFailurePopsAndAppendToOut( - uint32_t failure_pops_offset_length, - std::vector<int>& out_failure_pops) { - if (failure_pops_offset_length == - fast_wordpiece_tokenizer_utils::kNullFailurePopsList) { - return; - } - int failure_pops_offset, failure_pops_length; - fast_wordpiece_tokenizer_utils::GetFailurePopsOffsetAndLength( - failure_pops_offset_length, failure_pops_offset, failure_pops_length); - out_failure_pops.insert( - out_failure_pops.end(), failure_pops_pool_.begin() + failure_pops_offset, - failure_pops_pool_.begin() + failure_pops_offset + failure_pops_length); -} - -absl::Status FastWordpieceBuilder::PrecomputeResultForSuffixIndicator() { - std::vector<std::string> subwords; - std::vector<int> begin_offset; - std::vector<int> end_offset; - int num_word_pieces; - // Use the original WordPiece implementation. - LookupStatus status = WordpieceTokenize( - suffix_indicator_, max_bytes_per_token_, /*max_chars_per_subtoken=*/-1, - suffix_indicator_, /*use_unknown_token=*/true, unk_token_, - /*split_unknown_characters=*/false, &vocab_.value(), &subwords, - &begin_offset, &end_offset, &num_word_pieces); - precomputed_result_for_suffix_indicator_.reserve(subwords.size()); - if (!status.success) { - return absl::FailedPreconditionError(status.error_msg); - } - for (int i = 0; i < subwords.size(); ++i) { - const absl::optional<int> subword_id = vocab_->LookupId(subwords[i]); - if (!subword_id.has_value()) { - return absl::FailedPreconditionError( - "Impossible because `subwords[i]` must be in the vocabulary!"); - } - TrieVocabToken token(subwords[i], *subword_id, suffix_indicator_); - SH_ASSIGN_OR_RETURN( - int encoded_value, - fast_wordpiece_tokenizer_utils::EncodeToken( - token.TokenId(), token.TokenLengthWithoutSuffixIndicator(), - token.IsSuffixToken())); - precomputed_result_for_suffix_indicator_.push_back(encoded_value); - } - return absl::OkStatus(); -} - -absl::StatusOr<std::string> FastWordpieceBuilder::ExportToFlatBuffer() const { - flatbuffers::FlatBufferBuilder builder; - - const auto trie_array = builder.CreateVector(trie_array_); - std::vector<tensorflow::text::FailureStruct> failure_struct_fbs_vector; - failure_struct_fbs_vector.reserve(failure_struct_array_.size()); - for (const auto& item : failure_struct_array_) { - failure_struct_fbs_vector.emplace_back(item.failure_link, - item.failure_pops_offset_length); - } - const auto failure_structure_array = - builder.CreateVectorOfStructs(failure_struct_fbs_vector); - const auto failure_pops_pool = builder.CreateVector(failure_pops_pool_); - const auto precomputed_result_for_suffix_indicator = - builder.CreateVector(precomputed_result_for_suffix_indicator_); - const auto suffix_indicator = builder.CreateString(suffix_indicator_); - const auto unk_token = builder.CreateString(unk_token_); - - std::vector<flatbuffers::Offset<flatbuffers::String>> vocab_fbs_vector; - std::vector<bool> vocab_is_suffix_fbs_vector; - - if (support_detokenization_) { - vocab_fbs_vector.reserve(vocab_->Size()); - for (int i = 0; i < vocab_->Size(); ++i) { - const absl::optional<absl::string_view> word = vocab_->LookupWord(i); - if (!word.has_value()) { - return absl::FailedPreconditionError( - "Impossible. `token_id` is definitely within the range of vocab " - "token ids; hence LookupWord() should always succeed."); - } - absl::string_view token = word.value(); - bool is_suffix_token = false; - if (!suffix_indicator_.empty() && token != suffix_indicator_ && - absl::StartsWith(token, suffix_indicator_)) { - is_suffix_token = true; - // For suffix tokens, we remove the suffix indicator to save spac and - // for ease of use in detokenization (where the suffix indicator will be - // stripped anyway). - token = token.substr(suffix_indicator_.size()); - } - vocab_fbs_vector.emplace_back(builder.CreateString(token)); - vocab_is_suffix_fbs_vector.emplace_back(is_suffix_token); - } - } - - auto vocab_array = builder.CreateVector(vocab_fbs_vector); - auto vocab_is_suffix_array = builder.CreateVector(vocab_is_suffix_fbs_vector); - - FastWordpieceTokenizerConfigBuilder wtcb(builder); - wtcb.add_trie_array(trie_array); - wtcb.add_failure_struct_array(failure_structure_array); - wtcb.add_failure_pops_pool(failure_pops_pool); - wtcb.add_trie_suffix_root(trie_suffix_root_); - wtcb.add_trie_punct_failure_link_node(trie_punct_failure_link_node_); - - wtcb.add_max_bytes_per_token(max_bytes_per_token_); - wtcb.add_suffix_indicator(suffix_indicator); - wtcb.add_unk_token(unk_token); - wtcb.add_unk_token_id(unk_token_id_); - wtcb.add_precomputed_result_for_suffix_indicator( - precomputed_result_for_suffix_indicator); - wtcb.add_end_to_end(!no_pretokenization_); - wtcb.add_support_detokenization(support_detokenization_); - wtcb.add_vocab_array(vocab_array); - wtcb.add_vocab_is_suffix_array(vocab_is_suffix_array); - FinishFastWordpieceTokenizerConfigBuffer(builder, wtcb.Finish()); - return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()), - builder.GetSize()); -} -} // namespace - -absl::StatusOr<std::string> BuildModelAndExportToFlatBuffer( - const std::vector<std::string>& vocab, - int max_bytes_per_token, - absl::string_view suffix_indicator, - absl::string_view unk_token, - bool no_pretokenization, - bool support_detokenization) { - FastWordpieceBuilder builder; - SH_RETURN_IF_ERROR(builder.BuildModel( - vocab, max_bytes_per_token, suffix_indicator, unk_token, - no_pretokenization, support_detokenization)); - SH_ASSIGN_OR_RETURN(std::string flatbuffer, builder.ExportToFlatBuffer()); - return flatbuffer; -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h deleted file mode 100644 index 769e66c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h +++ /dev/null
@@ -1,56 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_MODEL_BUILDER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_MODEL_BUILDER_H_ - -#include <string> -#include <vector> - -#include "absl/status/statusor.h" - -namespace tensorflow { -namespace text { - -// Builds a FastWordpieceTokenizer model in flatbuffer format. -// -// Args: -// * vocab: The WordPiece vocabulary. -// * max_bytes_per_token: The max size of the input token. If the input -// length is longer than this, it will be mapped to unk_token. -// * suffix_indicator: Characters prepended to a wordpiece to indicate that -// it is a suffix to another subword, such as "##". -// * unk_token: The unknown token string. -// * no_pretokenization: Whether to pretokenize on punctuation & whitespace. -// Set to `false` when the model is used for general text end-to-end -// tokenization, which combines pre-tokenization (splitting text into words -// on punctuation/whitespaces) and WordPiece (breaking words into subwords) -// into one pass. -//. * support_detokenization: Whether to enable the detokenization function. -// Setting it to true expands the size of the flatbuffer. As a reference, -// When using 120k multilingual BERT WordPiece vocab, the flatbuffer's size -// increases from ~5MB to ~6MB. -// Returns: -// The bytes of the flatbuffer that stores the model. -absl::StatusOr<std::string> BuildModelAndExportToFlatBuffer( - const std::vector<std::string>& vocab, - int max_bytes_per_token, - absl::string_view suffix_indicator, - absl::string_view unk_token, - bool no_pretokenization = false, - bool support_detokenization = false); -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FASt_WORDPIECE_TOKENIZER_MODEL_BUILDER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc deleted file mode 100644 index 8aa05e86..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_test.cc +++ /dev/null
@@ -1,2489 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer.h" - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "absl/flags/flag.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h" - -namespace tensorflow { -namespace text { -namespace { - -using ::testing::ElementsAre; - -constexpr char kTestConfigPath[] = - "third_party/tensorflow_text/python/ops/test_data/" - "fast_wordpiece_tokenizer_model.fb"; - -TEST(FastWordpieceTokenizerTest, LoadAndTokenize) { - std::string config_flatbuffer; - auto status = tensorflow::ReadFileToString( - tensorflow::Env::Default(), kTestConfigPath, &config_flatbuffer); - ASSERT_TRUE(status.ok()); - - // The config_flatbuffer used here is built from the following config: - // * vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - // "##ghz", "<unk>"} - // * unk_token = "<unk>" - // * suffix_indicator = "##" - // * max_bytes_per_token = 100 - ASSERT_OK_AND_ASSIGN( - auto tokenizer, FastWordpieceTokenizer::Create(config_flatbuffer.data())); - - std::string input = "abcdefghz"; - std::vector<std::string> output_tokens; - std::vector<int> output_ids; - std::vector<int> output_start_offsets; - std::vector<int> output_end_offsets; - tokenizer.Tokenize(input, &output_tokens, &output_ids, &output_start_offsets, - &output_end_offsets); - EXPECT_THAT(output_tokens, ElementsAre("abc", "##de", "##f", "##ghz")); - EXPECT_THAT(output_ids, ElementsAre(1, 3, 6, 7)); - EXPECT_THAT(output_start_offsets, ElementsAre(0, 3, 5, 6)); - EXPECT_THAT(output_end_offsets, ElementsAre(3, 5, 6, 9)); -} - -template <typename T> -std::string ListToString(const std::vector<T>& list) { - return absl::StrCat("[", absl::StrJoin(list, ", "), "]"); -} - -// Testing spec struct for parameterized tests. -struct Spec { - friend std::ostream& operator<<(std::ostream& os, const Spec& s) { - return os << "vocab: " << ListToString(s.vocab) << ", " - << "unk_token:" << s.unk_token << ", " - << "suffix_indicator:" << s.suffix_indicator << ", " - << "max_bytes_per_token:" << s.max_bytes_per_token << ", " - << "input:" << s.input << ", " - << "expected_tokens:" << ListToString(s.expected_tokens) << ", " - << "expected_token_ids:" << ListToString(s.expected_token_ids) - << ", " - << "expected_token_start_offsets:" - << ListToString(s.expected_token_start_offsets) << ", " - << "expected_token_end_offsets:" - << ListToString(s.expected_token_end_offsets) << std::endl; - } - - std::vector<std::string> vocab; - std::string unk_token; - std::string suffix_indicator; - int max_bytes_per_token; - std::string input; - std::vector<std::string> expected_tokens; - std::vector<int> expected_token_ids; - std::vector<int> expected_token_start_offsets = {}; - std::vector<int> expected_token_end_offsets = {}; - // Only used when detokenizing the tokenized ids back to text. - std::string expected_detokenized_text; -}; - -// Parameterized tests specs for Tokenize() when input is a single word. -const std::vector<Spec>& GetTestSpecsForTokenizeSingleWord() { - static const std::vector<Spec>& v = *new std::vector<Spec>{ - // Test suite 1, normal vocabulary. - // Test 0: Empty input. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - // Test 1: Basic. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_tokens = {"abc", "##de", "##f", "##ghz"}, - .expected_token_ids = {1, 3, 6, 7}, - .expected_token_start_offsets = {0, 3, 5, 6}, - .expected_token_end_offsets = {3, 5, 6, 9}, - }, - // Test 2: Collect more tokens at the end. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdef", - .expected_tokens = {"abc", "##de", "##f"}, - .expected_token_ids = {1, 3, 6}, - .expected_token_start_offsets = {0, 3, 5}, - .expected_token_end_offsets = {3, 5, 6}, - }, - // Test 3: Unseen character alone. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "X", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 4: Unseen character at the beginning. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "Xde", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 5: Unseen character in the middle. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcXde", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {6}, - }, - // Test 6: Unseen character at the end. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcX", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {4}, - }, - // Test 7: Input has leading suffix indicator. Result is normal. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##deh", - .expected_tokens = {"##deh"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - // Test 8: Input has the leading suffix indicator. Vocab has "#" and - // "###". Result is normal. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##deh", - .expected_tokens = {"##deh"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - // Test 9: Input is the suffix indicator itself. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 10: [PAD] is in the vocabulary. Input is [PAD]. - { - .vocab = {"[pad]", "a", "abc", "abcdefghi", "##de", "##defgxy", - "##deh", "##f", "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "[pad]", - .expected_tokens = {"[pad]"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - // Test 11: [PAD] is not in the vocabulary. Input is [PAD]. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "[pad]", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {10}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - - // Test suite 2, input contains #. - // Test 12: Input is #. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 13: Input is #. Result is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"#"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 14: Input is #. The suffix indicator is in the vocab. Result is - // not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"#"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 15: Input is the suffix indicator itself. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {9}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 16: Input is the suffix indicator itself. Result is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"#", "###"}, - .expected_token_ids = {8, 9}, - .expected_token_start_offsets = {0, 1}, - .expected_token_end_offsets = {1, 2}, - }, - // Test 17: Input is the suffix indicator itself. The suffix indicator is - // in the vocab. Result is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"##"}, - .expected_token_ids = {10}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 18: Input is ###. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {9}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 19: Input is ###. Result is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"###"}, - .expected_token_ids = {9}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 20: Input is ###. The suffix indicator is in the vocab. Result is - // not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"###"}, - .expected_token_ids = {9}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 21: Input is ####. Result is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "####", - .expected_tokens = {"###", "###"}, - .expected_token_ids = {9, 9}, - .expected_token_start_offsets = {0, 3}, - .expected_token_end_offsets = {3, 4}, - }, - // Test 22: Input is ####. The suffix indicator is in the vocab. Result - // is not <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "#", "###", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "####", - .expected_tokens = {"###", "###"}, - .expected_token_ids = {9, 9}, - .expected_token_start_offsets = {0, 3}, - .expected_token_end_offsets = {3, 4}, - }, - - // Test suite 3, the vocabulary contains empty tokens ("", "##"). - // Test 23: The empty prefix token ("") and the empty suffix token ("##") - // are in the vocabulary. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_tokens = {"abc", "##de", "##f", "##ghz"}, - .expected_token_ids = {1, 3, 6, 7}, - .expected_token_start_offsets = {0, 3, 5, 6}, - .expected_token_end_offsets = {3, 5, 6, 9}, - }, - // Test 24: The empty prefix token ("") and the empty suffix ("##") token - // are in the vocabulary. Input is empty. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - // Test 25: The empty prefix token ("") and the empty suffix token ("##") - // are in the vocabulary. Input is the suffix indicator. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"##"}, - .expected_token_ids = {9}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 26: The empty prefix token ("") and the empty suffix token ("##") - // are in the vocabulary. There are vocab tokens after the empty vocab - // tokens in the vocab. Result is one vocab token. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "xyz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "xyz", - .expected_tokens = {"xyz"}, - .expected_token_ids = {10}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 27: The empty prefix token ("") and the empty suffix ("##") token - // are in the vocabulary. There are vocab tokens after the empty vocab - // tokens in the vocab. Result has multiple tokens. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "xy", "##z", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "xyz", - .expected_tokens = {"xy", "##z"}, - .expected_token_ids = {10, 11}, - .expected_token_start_offsets = {0, 2}, - .expected_token_end_offsets = {2, 3}, - }, - // Test 28: The empty prefix token ("") and the empty suffix token ("##") - // are in the vocabulary. Input has the leading suffix indicator. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "", "##", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##deh", - .expected_tokens = {"##deh"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - - // Test suite 4, No suffix tokens in the vocabulary. - // Test 29: No suffix tokens in the vocabulary. Result is normal. - { - .vocab = {"a", "abc", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abc", - .expected_tokens = {"abc"}, - .expected_token_ids = {1}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 30: No suffix tokens in the vocabulary. Result is <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcde", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - // Test 31: No suffix tokens in the vocabulary. A different input. Result - // is <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdz", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - // Test 32: No suffix tokens in the vocabulary. Input is #. Result is - // <unk> - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 33: No suffix tokens in the vocabulary. Input is #. Result is not - // <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "#"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"#"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 34: No suffix tokens in the vocabulary. Vocab has the suffix - // indicator. Input is #. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 35: No suffix tokens in the vocabulary. Input is ##. Result is - // <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 36: No suffix tokens in the vocabulary. Vocab has the suffix - // indicator. Input is #. Result is <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "#", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {1}, - }, - // Test 37: No suffix tokens in the vocabulary. Vocab has the suffix - // indicator. Input is ##. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"##"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 38: No suffix tokens in the vocabulary. Vocab has '#'. Input is - // ##. Result is <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "#"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 39: No suffix tokens in the vocabulary. Vocab has the suffix - // indicator and "#". Input is ##. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "##", "#"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"##"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 40: No suffix tokens in the vocabulary. Input is ###. Result is - // <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 41: No suffix tokens in the vocabulary. Vocab has '#'. Input is - // ###. Result is <unk>. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "#"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 42: No suffix tokens in the vocabulary. Vocab has the suffix - // indicator. Input is ###. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {4}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 43: There is only one suffix tokens "###" in the vocabulary. - // Input is ###. - { - .vocab = {"a", "abc", "de", "abcdefghi", "<unk>", "###"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"###"}, - .expected_token_ids = {5}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - - // Test suite 5, No prefix tokens in the vocabulary. - // Test 44: No prefix tokens in the vocabulary. Input is a prefix token. - { - .vocab = {"##a", "##abc", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abc", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {2}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 45: No prefix tokens in the vocabulary. Input is a suffix token. - { - .vocab = {"##a", "##abc", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##abc", - .expected_tokens = {"##abc"}, - .expected_token_ids = {1}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - - // Test suite 6, more tests. - // Test 46: Input is empty. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - // Test 47: Normal input. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "unwanted", - .expected_tokens = {"un", "##want", "##ed"}, - .expected_token_ids = {7, 4, 5}, - .expected_token_start_offsets = {0, 2, 6}, - .expected_token_end_offsets = {2, 6, 8}, - }, - // Test 48: Unseen character. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "unwantedX", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {1}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {9}, - }, - - // Test suite 7. Testing on long inputs (kMaxInputCharPerWord = 100). The - // word length below means the number of utf-8 bytes. - // Test 49: Word length = 99 (i.e., kMaxInputCharPerWord-1). - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##012345678"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "01234567890123456789012345678901234567890123456789012345678" - "9012345678901234567890123456789012345678", - .expected_tokens = {"0123456789", "##0123456789", "##0123456789", - "##0123456789", "##0123456789", "##0123456789", - "##0123456789", "##0123456789", "##0123456789", - "##012345678"}, - .expected_token_ids = {1, 2, 2, 2, 2, 2, 2, 2, 2, 3}, - .expected_token_start_offsets = {0, 10, 20, 30, 40, 50, 60, 70, 80, - 90}, - .expected_token_end_offsets = {10, 20, 30, 40, 50, 60, 70, 80, 90, - 99}, - }, - // Test 50: Word length = 100 (i.e., kMaxInputCharPerWord). Contains a - // multi-bytes Unicode char. - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##01234567", - /*U+05C3*/ "##\xD7\x83", "##a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "01234567890123456789012345678901234567890123456789012345678" - "901234567890123456789012345678901234567\xD7\x83", - .expected_tokens = {"0123456789", "##0123456789", "##0123456789", - "##0123456789", "##0123456789", "##0123456789", - "##0123456789", "##0123456789", "##0123456789", - "##01234567", "##\xD7\x83"}, - .expected_token_ids = {1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4}, - .expected_token_start_offsets = {0, 10, 20, 30, 40, 50, 60, 70, 80, - 90, 98}, - .expected_token_end_offsets = {10, 20, 30, 40, 50, 60, 70, 80, 90, 98, - 100}, - }, - // Test 51: Word length = 101 (i.e., kMaxInputCharPerWord+1). Contains a - // multi-bytes Unicode char. - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##012345678", - /*U+05C3*/ "##\xD7\x83", "##a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "01234567890123456789012345678901234567890123456789012345678" - "9012345678901234567890123456789012345678\xD7\x83", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {101}, - }, - // Test 52: Word length = 101 (i.e., kMaxInputCharPerWord+1). - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##012345678", - "##a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "01234567890123456789012345678901234567890123456789012345678" - "90123456789012345678901234567890123456789a", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {101}, - }, - // Test 53: Word length = 99 (i.e., kMaxInputCharPerWord-1). The word is - // not tokenizable. - { - .vocab = {"<unk>", "0123456789", "##0123456789", - "##012345678\xe2\x80\x8B", "##\xe2\x80\x8B"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "01234567890123456789012345678901234567890123456789012345678" - "9012345678901234567890123456789012345678", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {99}, - }, - - // Test suite 8. Normal vocab and inputs. - // Test 54. - { - .vocab = {"<unk>", "play", "see", "##ing", "##ed", "##es", "##ly", - "##on", "##s", "##able"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "play", - .expected_tokens = {"play"}, - .expected_token_ids = {1}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {4}, - }, - // Test 55. - { - .vocab = {"<unk>", "play", "see", "##ing", "##ed", "##es", "##ly", - "##on", "##s", "##able"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "playing", - .expected_tokens = {"play", "##ing"}, - .expected_token_ids = {1, 3}, - .expected_token_start_offsets = {0, 4}, - .expected_token_end_offsets = {4, 7}, - }, - // Test 56. - { - .vocab = {"<unk>", "play", "see", "##ing", "##ed", "##es", "##ly", - "##on", "##s", "##able"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "sees", - .expected_tokens = {"see", "##s"}, - .expected_token_ids = {2, 8}, - .expected_token_start_offsets = {0, 3}, - .expected_token_end_offsets = {3, 4}, - }, - // Test 57. - { - .vocab = {"<unk>", "play", "see", "##ing", "##ed", "##es", "##ly", - "##on", "##s", "##able", "u", "un", "##de", "##deni"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "undeniable", - .expected_tokens = {"un", "##deni", "##able"}, - .expected_token_ids = {11, 13, 9}, - .expected_token_start_offsets = {0, 2, 6}, - .expected_token_end_offsets = {2, 6, 10}, - }, - // Test 58. - { - .vocab = {"<unk>", "play", "see", "##ing", "##ed", "##es", "##ly", - "##on", "##s", "##able", "u", "un", "##de", "##deni", - "undeniable"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "undeniable", - .expected_tokens = {"undeniable"}, - .expected_token_ids = {14}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {10}, - }, - // Test 59. - { - .vocab = {"<unk>", "s", "su", "super", "##per", "##ca", - "##cali", "##f", "##fra", "##g", "##gil", "##i", - "##is", "##istic", "##e", "##ex", "##pi", "##pia", - "##li", "##lido", "##ci", "##cious", "##ous"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "supercalifragilisticexpialidocious", - .expected_tokens = {"super", "##cali", "##fra", "##gil", "##istic", - "##ex", "##pia", "##lido", "##cious"}, - .expected_token_ids = {3, 6, 8, 10, 13, 15, 17, 19, 21}, - .expected_token_start_offsets = {0, 5, 9, 12, 15, 20, 22, 25, 29}, - .expected_token_end_offsets = {5, 9, 12, 15, 20, 22, 25, 29, 34}, - }, - - // Test suite 9. Different unk_tokens. - // Test 60: Basic with a different unk_token. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_tokens = {"abc", "##de", "##f", "##ghz"}, - .expected_token_ids = {1, 3, 6, 7}, - .expected_token_start_offsets = {0, 3, 5, 6}, - .expected_token_end_offsets = {3, 5, 6, 9}, - }, - // Test 61: Untokenizable with a different unk_token. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefghzX", - .expected_tokens = {"[unk]"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {10}, - }, - - // Test suite 10. Input is the unk_token. - // Test 62: Input is the unk_token. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "[unk]", - .expected_tokens = {"[unk]"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {5}, - }, - - // Test suite 11. Input is the suffix indicator itself. - // Test 63: Suffix indicator is "##" and is tokenizable. - { - .vocab = {"#", "###", "a", "abc", "abcdefghi", "##de", "##defgxy", - "##deh", "##f", "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"#", "###"}, - .expected_token_ids = {0, 1}, - .expected_token_start_offsets = {0, 1}, - .expected_token_end_offsets = {1, 2}, - }, - // Test 64: Suffix indicator is "##" but not tokenizable. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"[unk]"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 65: Suffix indicator is "##" and "##" is in the vocabulary. - { - .vocab = {"#", "###", "##", "a", "abc", "abcdefghi", "##de", - "##defgxy", "##deh", "##f", "##ghz", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"##"}, - .expected_token_ids = {2}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {2}, - }, - // Test 66: Suffix indicator is "###" and is tokenizable. - { - .vocab = {"#", "####", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "###", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"#", "####", "####"}, - .expected_token_ids = {0, 1, 1}, - .expected_token_start_offsets = {0, 1, 2}, - .expected_token_end_offsets = {1, 2, 3}, - }, - // Test 67: Suffix indicator is "###" and is tokenizable. A different - // vocab. - { - .vocab = {"#", "####", "##", "[unk]"}, - .unk_token = "[unk]", - .suffix_indicator = "###", - .max_bytes_per_token = 100, - .input = "###", - .expected_tokens = {"##", "####"}, - .expected_token_ids = {2, 1}, - .expected_token_start_offsets = {0, 2}, - .expected_token_end_offsets = {2, 3}, - }, - - // Test suite 12, different suffix indicators. - // Test 68: A different suffix indicator. - { - .vocab = {"a", "abc", "abcdefghi", "<suffix>de", "<suffix>defgxy", - "<suffix>deh", "<suffix>f", "<suffix>ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "<suffix>", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_tokens = {"abc", "<suffix>de", "<suffix>f", "<suffix>ghz"}, - .expected_token_ids = {1, 3, 6, 7}, - .expected_token_start_offsets = {0, 3, 5, 6}, - .expected_token_end_offsets = {3, 5, 6, 9}, - }, - // Test 69: The suffix indicator is empty. - { - .vocab = {"a", "abc", "abcdefghi", "de", "defgxy", "deh", "f", "ghz", - "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_tokens = {"abc", "de", "f", "ghz"}, - .expected_token_ids = {1, 3, 6, 7}, - .expected_token_start_offsets = {0, 3, 5, 6}, - .expected_token_end_offsets = {3, 5, 6, 9}, - }, - // Test 70: The suffix indicator is empty. Input is empty. - { - .vocab = {"a", "abc", "abcdefghi", "de", "defgxy", "deh", "f", "ghz", - "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "", - .max_bytes_per_token = 100, - .input = "", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - - // Test suite 13, multi-bytes chars in vocab and input. - // The following codepoints and their utf-8 encodings are used here: - // * U+03B1 (Greek Small Letter Alpha): "\xCE\xB1" - // * U+03B2 (Greek Small Letter Beta): "\xCE\xB2" - // * U+2EDA (Cjk Radical C-Simplified Leaf): b'\xE2\xBB\x9A' - // * U+2EDB (Cjk Radical C-Simplified Wind): b'\xE2\xBB\x9B' - // Test 71: multi-bytes chars in the vocab. - { - .vocab = {"<unk>", "abc", "a", "##bc", "a\xCE\xB1\xCE\xB2", - "\xCE\xB1", "##\xCE\xB1", "##\xCE\xB2", "\xE2\xBB\x9A"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abc", - .expected_tokens = {"abc"}, - .expected_token_ids = {1}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - // Test 72: input contains 2-bytes chars. - { - .vocab = {"<unk>", "abc", "a", "##bc", "a\xCE\xB1\xCE\xB2", - "\xCE\xB1", "##\xCE\xB1", "##\xCE\xB2", "\xE2\xBB\x9A"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "a\xCE\xB1\xCE\xB2\xCE\xB1\xCE\xB2", - .expected_tokens = {"a\xCE\xB1\xCE\xB2", "##\xCE\xB1", "##\xCE\xB2"}, - .expected_token_ids = {4, 6, 7}, - .expected_token_start_offsets = {0, 5, 7}, - .expected_token_end_offsets = {5, 7, 9}, - }, - // Test 73: input contains 3-bytes chars. - { - .vocab = {"<unk>", "abc", "a", "##bc", "a\xCE\xB1\xCE\xB2", - "\xCE\xB1", "##\xCE\xB1", "##\xCE\xB2", "\xE2\xBB\x9A"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "\xE2\xBB\x9A" - "bc\xCE\xB1", - .expected_tokens = {"\xE2\xBB\x9A", "##bc", "##\xCE\xB1"}, - .expected_token_ids = {8, 3, 6}, - .expected_token_start_offsets = {0, 3, 5}, - .expected_token_end_offsets = {3, 5, 7}, - }, - // Test 74: input contains unseen multi-bytes chars. - { - .vocab = {"<unk>", "abc", "a", "##bc", "a\xCE\xB1\xCE\xB2", - "\xCE\xB1", "##\xCE\xB1", "##\xCE\xB2", "\xE2\xBB\x9A"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "\xE2\xBB\x9B", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {3}, - }, - }; - return v; -} - -using TestTokenizeSingleWord = testing::TestWithParam<Spec>; - -TEST_P(TestTokenizeSingleWord, Test) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token, - /*no_pretokenization=*/true)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<std::string> output_tokens; - std::vector<int> output_ids; - std::vector<int> output_begin_offsets; - std::vector<int> output_end_offsets; - tokenizer.Tokenize(spec.input, &output_tokens, &output_ids, - &output_begin_offsets, &output_end_offsets); - EXPECT_THAT(output_tokens, spec.expected_tokens); - EXPECT_THAT(output_ids, spec.expected_token_ids); - EXPECT_THAT(output_begin_offsets, spec.expected_token_start_offsets); - EXPECT_THAT(output_end_offsets, spec.expected_token_end_offsets); -} - -TEST_P(TestTokenizeSingleWord, TestNoOutputPieces) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token, - true /* no_pretokenization */)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<int> output_ids; - std::vector<int> output_begin_offsets; - std::vector<int> output_end_offsets; - tokenizer.Tokenize(spec.input, &output_ids, &output_begin_offsets, - &output_end_offsets); - EXPECT_THAT(output_ids, spec.expected_token_ids); - EXPECT_THAT(output_begin_offsets, spec.expected_token_start_offsets); - EXPECT_THAT(output_end_offsets, spec.expected_token_end_offsets); -} - -TEST_P(TestTokenizeSingleWord, TestNoOutputPiecesOnlyOutputIds) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token, - true /* no_pretokenization */)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<int> output_ids; - tokenizer.Tokenize(spec.input, &output_ids); - EXPECT_THAT(output_ids, spec.expected_token_ids); -} - -TEST_P(TestTokenizeSingleWord, TestNoOutputPiecesWithPositiveSentenceOffsets) { - const Spec& spec = GetParam(); - const int offset_in_sentence = 123; - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token, - true /* no_pretokenization */)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<int> output_ids; - std::vector<int> output_begin_offsets; - std::vector<int> output_end_offsets; - std::vector<int> expected_token_start_offsets( - spec.expected_token_start_offsets); - std::vector<int> expected_token_end_offsets(spec.expected_token_end_offsets); - - for (int& offset : expected_token_start_offsets) { - offset += offset_in_sentence; - } - for (int& offset : expected_token_end_offsets) { - offset += offset_in_sentence; - } - - tokenizer.Tokenize(spec.input, &output_ids, &output_begin_offsets, - &output_end_offsets, - /*input_word_offset_in_text=*/offset_in_sentence); - EXPECT_THAT(output_begin_offsets, expected_token_start_offsets); - EXPECT_THAT(output_end_offsets, expected_token_end_offsets); -} - -INSTANTIATE_TEST_SUITE_P( - FastWordpieceTokenizerParameterizedTest, - TestTokenizeSingleWord, - testing::ValuesIn(GetTestSpecsForTokenizeSingleWord())); - -// Test End-to-end FastWordPieceTokenization for tokenizing general texts. -const std::vector<Spec>& GetTestSpecsForTokenizeText() { - static const std::vector<Spec>& v = *new std::vector<Spec>{ - // Test suite 1. End-to-end test including whitespace tokenization. - // Test 0: Input is empty. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - // Test 1: Input has only spaces. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " \t ", - .expected_tokens = {}, - .expected_token_ids = {}, - .expected_token_start_offsets = {}, - .expected_token_end_offsets = {}, - }, - // Test 2: Input is a single word. Result is OK. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdef", - .expected_tokens = {"abc", "##de", "##f"}, - .expected_token_ids = {1, 3, 6}, - .expected_token_start_offsets = {0, 3, 5}, - .expected_token_end_offsets = {3, 5, 6}, - }, - // Test 3: Input is a single word. Result is <unk>. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcd", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {4}, - }, - // Test 4: Input contains multiple words, with several whitespaces in the - // middle. Result is OK. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdef \t\t \tabcf", - .expected_tokens = {"abc", "##de", "##f", "abc", "##f"}, - .expected_token_ids = {1, 3, 6, 1, 6}, - .expected_token_start_offsets = {0, 3, 5, 11, 14}, - .expected_token_end_offsets = {3, 5, 6, 14, 15}, - }, - // Test 5: Input has multiple words, with leading and trailing spaces. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "\tabcdef abcf ", - .expected_tokens = {"abc", "##de", "##f", "abc", "##f"}, - .expected_token_ids = {1, 3, 6, 1, 6}, - .expected_token_start_offsets = {1, 4, 6, 9, 12}, - .expected_token_end_offsets = {4, 6, 7, 12, 13}, - }, - // Test 6: Input contains suffix indicator as words. Suffix indicator is - // in vocab. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "## abcde ## ##a", - .expected_tokens = {"<unk>", "<unk>", "abc", "##de", "<unk>", "<unk>", - "<unk>", "<unk>", "a"}, - .expected_token_ids = {8, 8, 1, 3, 8, 8, 8, 8, 0}, - .expected_token_start_offsets = {0, 1, 3, 6, 9, 10, 13, 14, 15}, - .expected_token_end_offsets = {1, 2, 6, 8, 10, 11, 14, 15, 16}, - }, - // Test 7: Input contains suffix indicator as words. Suffix indicator is - // in vocab. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>", "##"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "## abcde ## ##a ##f", - .expected_tokens = {"<unk>", "<unk>", "abc", "##de", "<unk>", "<unk>", - "<unk>", "<unk>", "a", "<unk>", "<unk>", "<unk>"}, - .expected_token_ids = {8, 8, 1, 3, 8, 8, 8, 8, 0, 8, 8, 8}, - .expected_token_start_offsets = {0, 1, 3, 6, 9, 10, 13, 14, 15, 17, - 18, 19}, - .expected_token_end_offsets = {1, 2, 6, 8, 10, 11, 14, 15, 16, 18, 19, - 20}, - }, - // Test 8: Input contains suffix indicator as words. Suffix indicator is - // not in vocab. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"<unk>", "<unk>"}, - .expected_token_ids = {8, 8}, - .expected_token_start_offsets = {0, 1}, - .expected_token_end_offsets = {1, 2}, - }, - // Test 9: Input contains unseen character words. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " a \tabcdeX \rabcdefghz abcdeXfghz Xabc abcd", - .expected_tokens = {"a", "<unk>", "abc", "##de", "##f", "##ghz", - "<unk>", "<unk>", "<unk>"}, - .expected_token_ids = {0, 8, 1, 3, 6, 7, 8, 8, 8}, - .expected_token_start_offsets = {1, 4, 12, 15, 17, 18, 22, 33, 38}, - .expected_token_end_offsets = {2, 10, 15, 17, 18, 21, 32, 37, 42}, - }, - // Test 10: Input contains untokenizable words. No spaces before or after. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefgx", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {8}, - }, - // Test 11: Input contains untokenizable words. One space before. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " abcdefgx", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {1}, - .expected_token_end_offsets = {9}, - }, - // Test 12: Input contains untokenizable words. One space after. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefgx ", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {0}, - .expected_token_end_offsets = {8}, - }, - // Test 13: Input has untokenizable words. One space before and after. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " abcdefgx ", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {8}, - .expected_token_start_offsets = {1}, - .expected_token_end_offsets = {9}, - }, - // Test 14: Input contains mix words with unseen characters. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " a \tabcdeX \rabcdefghz abcdeXfghz Xabc", - .expected_tokens = {"a", "<unk>", "abc", "##de", "##f", "##ghz", - "<unk>", "<unk>"}, - .expected_token_ids = {0, 8, 1, 3, 6, 7, 8, 8}, - .expected_token_start_offsets = {1, 4, 12, 15, 17, 18, 22, 33}, - .expected_token_end_offsets = {2, 10, 15, 17, 18, 21, 32, 37}, - }, - // Test 15: Another basic test. - { - .vocab = {"<unk>", "<s>", "</s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "unwanted running", - .expected_tokens = {"un", "##want", "##ed", "runn", "##ing"}, - .expected_token_ids = {7, 4, 5, 8, 9}, - .expected_token_start_offsets = {0, 2, 6, 9, 13}, - .expected_token_end_offsets = {2, 6, 8, 13, 16}, - }, - // Test 16: Input has unseen characters. - { - .vocab = {"<unk>", "<s>", "</s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "unwantedX running", - .expected_tokens = {"<unk>", "runn", "##ing"}, - .expected_token_ids = {0, 8, 9}, - .expected_token_start_offsets = {0, 10, 14}, - .expected_token_end_offsets = {9, 14, 17}, - }, - // Test 17: Input contains mix words with untokenizable words. - { - .vocab = {"a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", - "##ghz", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " a \tabcdeX \rabcdefghz abcdeXfghz ab", - .expected_tokens = {"a", "<unk>", "abc", "##de", "##f", "##ghz", - "<unk>", "<unk>"}, - .expected_token_ids = {0, 8, 1, 3, 6, 7, 8, 8}, - .expected_token_start_offsets = {1, 4, 12, 15, 17, 18, 22, 33}, - .expected_token_end_offsets = {2, 10, 15, 17, 18, 21, 32, 35}, - }, - // Test 18: Input and vocab contains Unicode tokens. The Trie matching - // loop would stop at matching a partial word. - { - .vocab = {"\xE2\x82\xAC", "a", "abc", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " \xE2\x82\xAD abc", - .expected_tokens = {"<unk>", "abc"}, - .expected_token_ids = {3, 2}, - .expected_token_start_offsets = {1, 5}, - .expected_token_end_offsets = {4, 8}, - }, - // Test 19: Contains suffix indicator as a word. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "...", "#", "###"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##", - .expected_tokens = {"#", "#"}, - .expected_token_ids = {13, 13}, - .expected_token_start_offsets = {0, 1}, - .expected_token_end_offsets = {1, 2}, - }, - // Test 20: unknown words. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " X wantXwanted. \t ", - .expected_tokens = {"<unk>", "<unk>", "."}, - .expected_token_ids = {1, 1, 10}, - .expected_token_start_offsets = {1, 3, 14}, - .expected_token_end_offsets = {2, 14, 15}, - }, - // Test 21: After the loop, the next character is whitespace. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted. \t wa..", - .expected_tokens = {"want", "##ed", ".", "wa", ".", "."}, - .expected_token_ids = {3, 5, 10, 6, 10, 10}, - .expected_token_start_offsets = {2, 6, 8, 13, 15, 16}, - .expected_token_end_offsets = {6, 8, 9, 15, 16, 17}, - }, - // Test 22: After the loop, the next character is not a whitespace. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted.x \t wa..", - .expected_tokens = {"want", "##ed", ".", "<unk>", "wa", ".", "."}, - .expected_token_ids = {3, 5, 10, 1, 6, 10, 10}, - .expected_token_start_offsets = {2, 6, 8, 9, 14, 16, 17}, - .expected_token_end_offsets = {6, 8, 9, 10, 16, 17, 18}, - }, - // Test 23: After the loop, the next character is not a whitespace. And a - // trailing space. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted.x \t wa.. \n", - .expected_tokens = {"want", "##ed", ".", "<unk>", "wa", ".", "."}, - .expected_token_ids = {3, 5, 10, 1, 6, 10, 10}, - .expected_token_start_offsets = {2, 6, 8, 9, 14, 16, 17}, - .expected_token_end_offsets = {6, 8, 9, 10, 16, 17, 18}, - }, - // Test 24: After the loop, it's in the middle of a whitespace. The - // previous is tokenizable. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - "##\xc2\xa1"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted\xc2\xa0\t wa", - .expected_tokens = {"want", "##ed", "wa"}, - .expected_token_ids = {1, 3, 4}, - .expected_token_start_offsets = {2, 6, 12}, - .expected_token_end_offsets = {6, 8, 14}, - }, - // Test 25: After the loop, it's in the middle of a whitespace. The - // previous is tokenizable (a punctuation). - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - "\xc2\xa1", "##\xc2\xa1"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted.\xc2\xa0\t wa", - .expected_tokens = {"want", "##ed", ".", "wa"}, - .expected_token_ids = {1, 3, 5, 4}, - .expected_token_start_offsets = {2, 6, 8, 13}, - .expected_token_end_offsets = {6, 8, 9, 15}, - }, - // Test 26: After the loop, it's in the middle of a whitespace. The - // previous is untokenizable. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - "##e\xC2\xA1", "##\xC2\xA1"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wante\xc2\xa0\t wa", - .expected_tokens = {"<unk>", "wa"}, - .expected_token_ids = {0, 4}, - .expected_token_start_offsets = {2, 11}, - .expected_token_end_offsets = {7, 13}, - }, - - // Test suite 2. End-to-end test including whitespace tokenization and - // split on punctuation. - // Test 27. Basic case 1. - { - .vocab = - { - "<unk>", "don", "##'", "##t", "tread", "##ness", - "hel", "##lo", "there", "my", "na", "##me", - "is", "ter", "##ry", "what", "##cha", "##ma", - "##call", "##it?", "you", "said", - }, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "hello there my name is terry", - .expected_tokens = {"hel", "##lo", "there", "my", "na", "##me", "is", - "ter", "##ry"}, - .expected_token_ids = {6, 7, 8, 9, 10, 11, 12, 13, 14}, - .expected_token_start_offsets = {0, 3, 6, 12, 15, 17, 20, 23, 26}, - .expected_token_end_offsets = {3, 5, 11, 14, 17, 19, 22, 26, 28}, - }, - // Test 28. Basic case 2. - { - .vocab = - { - "<unk>", "don", "##'", "##t", "tread", "##ness", - "hel", "##lo", "there", "my", "na", "##me", - "is", "ter", "##ry", "what", "##cha", "##ma", - "##call", "##it?", "you", "said", - }, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "whatchamacallit? you said", - .expected_tokens = {"<unk>", "<unk>", "you", "said"}, - .expected_token_ids = {0, 0, 20, 21}, - .expected_token_start_offsets = {0, 15, 17, 21}, - .expected_token_end_offsets = {15, 16, 20, 25}, - }, - // Test 29. Basic case 3. Punctuation is an independant word in the vocab. - { - .vocab = - { - "<unk>", "don", "##'", "##t", "tread", "##ness", - "hel", "##lo", "there", "my", "na", "##me", - "is", "ter", "##ry", "what", "##cha", "##ma", - "##call", "##it?", "you", "said", "##it", "?", - }, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "whatchamacallit? you said", - .expected_tokens = {"what", "##cha", "##ma", "##call", "##it", "?", - "you", "said"}, - .expected_token_ids = {15, 16, 17, 18, 22, 23, 20, 21}, - .expected_token_start_offsets = {0, 4, 7, 9, 13, 15, 17, 21}, - .expected_token_end_offsets = {4, 7, 9, 13, 15, 16, 20, 25}, - }, - // Test 30. Basic case 4 with untokenizable words. - { - .vocab = - { - "<unk>", "don", "'", "t", "tread", "##ness", - "hel", "##lo", "there", "my", "na", "##me", - "is", "ter", "##ry", "what", "##cha", "##ma", - "##call", "##it?", "you", "said", - }, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "don't tread cantfindme treadcantfindme", - .expected_tokens = {"don", "'", "t", "tread", "<unk>", "<unk>"}, - .expected_token_ids = {1, 2, 3, 4, 0, 0}, - .expected_token_start_offsets = {0, 3, 4, 6, 12, 23}, - .expected_token_end_offsets = {3, 4, 5, 11, 22, 38}, - }, - // Test 31: Basic case 5. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "unwanted.", - .expected_tokens = {"un", "##want", "##ed", "."}, - .expected_token_ids = {7, 4, 5, 10}, - .expected_token_start_offsets = {0, 2, 6, 8}, - .expected_token_end_offsets = {2, 6, 8, 9}, - }, - // Test 32: Basic case 6. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " want.wanted. \t ", - .expected_tokens = {"want", ".", "want", "##ed", "."}, - .expected_token_ids = {3, 10, 3, 5, 10}, - .expected_token_start_offsets = {2, 6, 7, 11, 13}, - .expected_token_end_offsets = {6, 7, 11, 13, 14}, - }, - // Test 33: Basic with unseen characters (as a single word). - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " X want.wanted. \t ", - .expected_tokens = {"<unk>", "want", ".", "want", "##ed", "."}, - .expected_token_ids = {1, 3, 10, 3, 5, 10}, - .expected_token_start_offsets = {1, 3, 7, 8, 12, 14}, - .expected_token_end_offsets = {2, 7, 8, 12, 14, 15}, - }, - // Test 34: Basic with unseen characters (in a word before a punctuation). - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " X wantX.wanted. \t ", - .expected_tokens = {"<unk>", "<unk>", ".", "want", "##ed", "."}, - .expected_token_ids = {1, 1, 10, 3, 5, 10}, - .expected_token_start_offsets = {1, 3, 8, 9, 13, 15}, - .expected_token_end_offsets = {2, 8, 9, 13, 15, 16}, - }, - // Test 35: Basic with unseen characters (in the middle of a word). - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " X wantXwanted. \t ", - .expected_tokens = {"<unk>", "<unk>", "."}, - .expected_token_ids = {1, 1, 10}, - .expected_token_start_offsets = {1, 3, 14}, - .expected_token_end_offsets = {2, 14, 15}, - }, - // Test 36: Basic with unseen characters and a leading period. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " X .wantXwanted. \t ", - .expected_tokens = {"<unk>", ".", "<unk>", "."}, - .expected_token_ids = {1, 10, 1, 10}, - .expected_token_start_offsets = {1, 3, 4, 15}, - .expected_token_end_offsets = {2, 4, 15, 16}, - }, - // Test 37: Contains ellipsis (as "....."). - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted. \t wa.....", - .expected_tokens = {"want", "##ed", ".", "wa", ".", ".", ".", ".", - "."}, - .expected_token_ids = {3, 5, 10, 6, 10, 10, 10, 10, 10}, - .expected_token_start_offsets = {2, 6, 8, 13, 15, 16, 17, 18, 19}, - .expected_token_end_offsets = {6, 8, 9, 15, 16, 17, 18, 19, 20}, - }, - // Test 38: After the loop, the next character is an unknown punctuation; - // the previous can be tokenized. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted, \t wa", - .expected_tokens = {"want", "##ed", "<unk>", "wa"}, - .expected_token_ids = {3, 5, 1, 6}, - .expected_token_start_offsets = {2, 6, 8, 13}, - .expected_token_end_offsets = {6, 8, 9, 15}, - }, - // Test 39: After the loop, the next character is an unknown punctuation; - // the previous can be tokenized. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted., \t wa", - .expected_tokens = {"want", "##ed", ".", "<unk>", "wa"}, - .expected_token_ids = {3, 5, 10, 1, 6}, - .expected_token_start_offsets = {2, 6, 8, 9, 14}, - .expected_token_end_offsets = {6, 8, 9, 10, 16}, - }, - // Test 40: After the loop, the next character is an unknown punctuation; - // the previous is empty. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " , wanted, \t wa", - .expected_tokens = {"<unk>", "want", "##ed", "<unk>", "wa"}, - .expected_token_ids = {1, 3, 5, 1, 6}, - .expected_token_start_offsets = {1, 3, 7, 9, 14}, - .expected_token_end_offsets = {2, 7, 9, 10, 16}, - }, - // Test 41: After the loop, the next character is an unknown punctuation; - // the previous can not be tokenized. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wante, \t wa", - .expected_tokens = {"<unk>", "<unk>", "wa"}, - .expected_token_ids = {1, 1, 6}, - .expected_token_start_offsets = {2, 7, 12}, - .expected_token_end_offsets = {7, 8, 14}, - }, - // Test 42: After the loop, in the middle of an unknown punctuation. - // Previous is tokenizable. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - /*U+05C3*/ "\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted\xd7\x86xyz \t wa", - .expected_tokens = {"want", "##ed", "<unk>", "<unk>", "wa"}, - .expected_token_ids = {1, 3, 0, 0, 4}, - .expected_token_start_offsets = {2, 6, 8, 10, 17}, - .expected_token_end_offsets = {6, 8, 10, 13, 19}, - }, - // Test 43: After the loop, in the middle of an unknown punctuation. - // Previous is tokenizable. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - /*U+05C3*/ "\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted.\xd7\x86xyz \t wa", - .expected_tokens = {"want", "##ed", ".", "<unk>", "<unk>", "wa"}, - .expected_token_ids = {1, 3, 5, 0, 0, 4}, - .expected_token_start_offsets = {2, 6, 8, 9, 11, 18}, - .expected_token_end_offsets = {6, 8, 9, 11, 14, 20}, - }, - // Test 44: After the loop, in the middle of an unknown punctuation. - // Previous is not tokenizable. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - /*U+05C3*/ "##e\xD7\x83", - /*U+05C3*/ "\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wante\xd7\x86xyz \t wa", - .expected_tokens = {"<unk>", "<unk>", "<unk>", "wa"}, - .expected_token_ids = {0, 0, 0, 4}, - .expected_token_start_offsets = {2, 7, 9, 16}, - .expected_token_end_offsets = {7, 9, 12, 18}, - }, - // Test 45: Fails to match the first character in the beginning. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "xyz \t wa", - .expected_tokens = {"<unk>", "wa"}, - .expected_token_ids = {1, 6}, - .expected_token_start_offsets = {0, 7}, - .expected_token_end_offsets = {3, 9}, - }, - // Test 46: After the loop, the next character is not a whitespace nor - // punctuation. Trie fails to recognize the first character. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wanted.xyz \t wa", - .expected_tokens = {"want", "##ed", ".", "<unk>", "wa"}, - .expected_token_ids = {3, 5, 10, 1, 6}, - .expected_token_start_offsets = {2, 6, 8, 9, 16}, - .expected_token_end_offsets = {6, 8, 9, 12, 18}, - }, - // Test 47: After the loop, the next character is not a whitespace nor - // punctuation. Previous is not tokenizable. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wantedxyz \t wa", - .expected_tokens = {"<unk>", "wa"}, - .expected_token_ids = {1, 6}, - .expected_token_start_offsets = {2, 15}, - .expected_token_end_offsets = {11, 17}, - }, - // Test 48: After the loop, the next character is not a whitespace nor - // punctuation. Previous is not tokenizable. - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = " wantexyz \t wa", - .expected_tokens = {"<unk>", "wa"}, - .expected_token_ids = {1, 6}, - .expected_token_start_offsets = {2, 14}, - .expected_token_end_offsets = {10, 16}, - }, - // Test 49: Unknown punctuation followed by unseen character. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", ".", "##.", "...", - /*U+05C3*/ "##e\xD7\x83", - /*U+05C3*/ "\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "wanted\xd7\x86xyz", - .expected_tokens = {"want", "##ed", "<unk>", "<unk>"}, - .expected_token_ids = {1, 3, 0, 0}, - .expected_token_start_offsets = {0, 4, 6, 8}, - .expected_token_end_offsets = {4, 6, 8, 11}, - }, - // Test 50: Ellipsis is mapped to "<unk>"s when "." is not in vocab. - { - .vocab = {"<unk>", "want", "##want", "##ed", "wa", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "wanted...", - .expected_tokens = {"want", "##ed", "<unk>", "<unk>", "<unk>"}, - .expected_token_ids = {1, 3, 0, 0, 0}, - .expected_token_start_offsets = {0, 4, 6, 7, 8}, - .expected_token_end_offsets = {4, 6, 7, 8, 9}, - }, - - // Test suite 3. End-to-end test including whitespace and punctuation - // tokenization on max_bytes_per_token = 10. - // Test 51: Word length = 9 (i.e., max_bytes_per_token-1). - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 012345678 ", - .expected_tokens = {"01234", "##5678"}, - .expected_token_ids = {1, 2}, - .expected_token_start_offsets = {2, 7}, - .expected_token_end_offsets = {7, 11}, - }, - // Test 52: Word length = 10 (i.e., max_bytes_per_token). - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 0123456789 ", - .expected_tokens = {"01234", "##56789"}, - .expected_token_ids = {1, 3}, - .expected_token_start_offsets = {2, 7}, - .expected_token_end_offsets = {7, 12}, - }, - // Test 53: Word length = 9, followed by a multi-bytes Unicode punctuation - // char, which is a hebrew punctuation "sof pasquq". - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 012345678\xD7\x83 ", - .expected_tokens = {"01234", "##5678", "<unk>"}, - .expected_token_ids = {1, 2, 0}, - .expected_token_start_offsets = {2, 7, 11}, - .expected_token_end_offsets = {7, 11, 13}, - }, - // Test 54: Word length = 11 (i.e., max_bytes_per_token+1). The 10th - // char is on Unicode boundary. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83", "##a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 0123456789a ", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {2}, - .expected_token_end_offsets = {13}, - }, - // Test 55: Word length = 10 (i.e., max_bytes_per_token). The next char - // (\xe2\x80\x80) is a whitespace. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83", "##a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 0123456789\xe2\x80\x80 ", - .expected_tokens = {"01234", "##56789"}, - .expected_token_ids = {1, 3}, - .expected_token_start_offsets = {2, 7}, - .expected_token_end_offsets = {7, 12}, - }, - // Test 56: Word length = 9 (i.e., max_bytes_per_token-1). The next is - // a multi-byte whitespace. The 10th char is in the middle of the - // whitespace. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83", "##a", "##\xe2\x80\x8B"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 012345678\xe2\x80\x80 ", - .expected_tokens = {"01234", "##5678"}, - .expected_token_ids = {1, 2}, - .expected_token_start_offsets = {2, 7}, - .expected_token_end_offsets = {7, 11}, - }, - // Test 57: Word length = 9 (i.e., max_bytes_per_token-1). The next is a - // multi-byte whitespace. The 10th char is in the middle of the - // whitespace. The word is not tokenizable. - { - .vocab = {"<unk>", "01234", "##56789", "##5678\xe2\x80\x8B", - /*U+05C3*/ "##\xD7\x83", "##a", "##\xe2\x80\x8B"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " 012345678\xe2\x80\x80 ", - .expected_tokens = {"<unk>"}, - .expected_token_ids = {0}, - .expected_token_start_offsets = {2}, - .expected_token_end_offsets = {11}, - }, - // Test 58: Word length = 9 (i.e., max_bytes_per_token-1) plus a - // trailing punctuation. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "##\xD7\x83", "##a", "."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .012345678. ", - .expected_tokens = {".", "01234", "##5678", "."}, - .expected_token_ids = {6, 1, 2, 6}, - .expected_token_start_offsets = {2, 3, 8, 12}, - .expected_token_end_offsets = {3, 8, 12, 13}, - }, - // Test 59: Word length = 9 (i.e., max_bytes_per_token-1) plus a - // trailing punctuation, followed by more words. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "\xD7\x83", "##a", ".", "...", "a"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .012345678.a ", - .expected_tokens = {".", "01234", "##5678", ".", "a"}, - .expected_token_ids = {6, 1, 2, 6, 8}, - .expected_token_start_offsets = {2, 3, 8, 12, 13}, - .expected_token_end_offsets = {3, 8, 12, 13, 14}, - }, - // Test 60: Word length = 10 (i.e., max_bytes_per_token) plus a - // trailing punctuation, and the word is tokenizable. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "\xD7\x83", "##a", ".", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .0123456789. ", - .expected_tokens = {".", "01234", "##56789", "."}, - .expected_token_ids = {6, 1, 3, 6}, - .expected_token_start_offsets = {2, 3, 8, 13}, - .expected_token_end_offsets = {3, 8, 13, 14}, - }, - // Test 61: Word length = 10 (i.e., max_bytes_per_token) plus a - // trailing unknown punctuation, and the word is tokenizable. - { - .vocab = {"<unk>", "01234", "##5678", "##56789", "##a", ".", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .0123456789\xD7\x83 ", - .expected_tokens = {".", "01234", "##56789", "<unk>"}, - .expected_token_ids = {5, 1, 3, 0}, - .expected_token_start_offsets = {2, 3, 8, 13}, - .expected_token_end_offsets = {3, 8, 13, 15}, - }, - // Test 62: Word length = 11 (i.e., max_bytes_per_token+1). - { - .vocab = {"<unk>", "01234", "##5678", "##56789", - /*U+05C3*/ "\xD7\x83", "##a", ".", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .0123456789Z ", - .expected_tokens = {".", "<unk>"}, - .expected_token_ids = {6, 0}, - .expected_token_start_offsets = {2, 3}, - .expected_token_end_offsets = {3, 14}, - }, - // Test 63: Word length = 11 (i.e., max_bytes_per_token+1). - // The input would be tokenizable if `max_byte_per_token` is set to be - // greater or equal to `word_length`. - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##012345678abc", - /*U+05C3*/ "\xD7\x83", "##a", ".", "..."}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = " .012345678a. ", - .expected_tokens = {".", "<unk>", "."}, - .expected_token_ids = {6, 0, 6}, - .expected_token_start_offsets = {2, 3, 13}, - .expected_token_end_offsets = {3, 13, 14}, - }, - // Test 64: Input is "<unk>". - { - .vocab = {"<unk>", "0123456789", "##0123456789", "##012345678abc", - /*U+05C3*/ "\xD7\x83", "##a", ".", "...", ">"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "<unk>.", - .expected_tokens = {"<unk>", "<unk>", ">", "."}, - .expected_token_ids = {0, 0, 8, 6}, - .expected_token_start_offsets = {0, 1, 4, 5}, - .expected_token_end_offsets = {1, 4, 5, 6}, - }, - - // Test suite 4: Test different suffix indicators. - // Test 65: Suffix indicator is "##". Input contains "##". - { - .vocab = {"<pad>", "<unk>", "<s>", "want", "##want", "##ed", "wa", - "un", "runn", "##ing", ".", "##.", "...", "#", "##", "###"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "## running", - .expected_tokens = {"#", "#", "runn", "##ing"}, - .expected_token_ids = {13, 13, 8, 9}, - .expected_token_start_offsets = {0, 1, 3, 7}, - .expected_token_end_offsets = {1, 2, 7, 10}, - }, - // Test 66: Test suffix indicator "<suffix>". - { - .vocab = {"<unk>", "want", "<suffix>want", "<suffix>ed", "wa", "un", - "runn", "<suffix>ing", "#", "."}, - .unk_token = "<unk>", - .suffix_indicator = "<suffix>", - .max_bytes_per_token = 100, - .input = "## running. <", - .expected_tokens = {"#", "#", "runn", "<suffix>ing", ".", "<unk>"}, - .expected_token_ids = {8, 8, 6, 7, 9, 0}, - .expected_token_start_offsets = {0, 1, 3, 7, 10, 12}, - .expected_token_end_offsets = {1, 2, 7, 10, 11, 13}, - }, - // Test 67: Test suffix indicator "suffix>". Suffix indicator appears in - // the input as a single word after a punctuation. - { - .vocab = {"<unk>", "want", "suffix>want", "suffix>ed", "wa", "un", - "runn", "suffix>ing", "#", "su", "suffix>ffix", "suffix"}, - .unk_token = "<unk>", - .suffix_indicator = "suffix>", - .max_bytes_per_token = 100, - .input = "#suffix> running", - .expected_tokens = {"#", "suffix", "<unk>", "runn", "suffix>ing"}, - .expected_token_ids = {8, 11, 0, 6, 7}, - .expected_token_start_offsets = {0, 1, 7, 9, 13}, - .expected_token_end_offsets = {1, 7, 8, 13, 16}, - }, - // Test 68: Test suffix indicator "suffix>". Suffix indicator appears in - // the input as a single word after a punctuation. - { - .vocab = {"<unk>", "want", "suffix>want", "suffix>ed", "wa", "un", - "runn", "suffix>ing", "#", "su", "suffix>ffix"}, - .unk_token = "<unk>", - .suffix_indicator = "suffix>", - .max_bytes_per_token = 100, - .input = "#suffix> running", - .expected_tokens = {"#", "su", "suffix>ffix", "<unk>", "runn", - "suffix>ing"}, - .expected_token_ids = {8, 9, 10, 0, 6, 7}, - .expected_token_start_offsets = {0, 1, 3, 7, 9, 13}, - .expected_token_end_offsets = {1, 3, 7, 8, 13, 16}, - }, - // Test 69: Test suffix indicator "<suffix". Suffix indicator appears in - // the input as a single word after a punctuation. - { - .vocab = {"<unk>", "runn", "<suffixing", "#", "su", "<suffixffix"}, - .unk_token = "<unk>", - .suffix_indicator = "<suffix", - .max_bytes_per_token = 100, - .input = "#<suffix running", - .expected_tokens = {"#", "<unk>", "su", "<suffixffix", "runn", - "<suffixing"}, - .expected_token_ids = {3, 0, 4, 5, 1, 2}, - .expected_token_start_offsets = {0, 1, 2, 4, 9, 13}, - .expected_token_end_offsets = {1, 2, 4, 8, 13, 16}, - }, - // Test 70: Test suffix indicator "<suffix". Input "<suffixing" appears in - // the vocab as a leading prefix of a word. - { - .vocab = {"<unk>", "runn", "<suffixing", "<", "su", "<suffixffix"}, - .unk_token = "<unk>", - .suffix_indicator = "<suffix", - .max_bytes_per_token = 100, - .input = "<suffixing running", - .expected_tokens = {"<", "su", "<suffixffix", "<suffixing", "runn", - "<suffixing"}, - .expected_token_ids = {3, 4, 5, 2, 1, 2}, - .expected_token_start_offsets = {0, 1, 3, 7, 11, 15}, - .expected_token_end_offsets = {1, 3, 7, 10, 15, 18}, - }, - // Test 71: Test suffix indicator ">>>". Suffix indicator appears in the - // input. - { - .vocab = {"<unk>", "want", ">>>want", ">>>ed", "wa", "un", "runn", - ">>>ing", "#", "su", ">>>ffix"}, - .unk_token = "<unk>", - .suffix_indicator = ">>>", - .max_bytes_per_token = 100, - .input = "#suffix>>> running", - .expected_tokens = {"#", "su", ">>>ffix", "<unk>", "<unk>", "<unk>", - "runn", ">>>ing"}, - .expected_token_ids = {8, 9, 10, 0, 0, 0, 6, 7}, - .expected_token_start_offsets = {0, 1, 3, 7, 8, 9, 11, 15}, - .expected_token_end_offsets = {1, 3, 7, 8, 9, 10, 15, 18}, - }, - // Test 72: Test suffix indicator "<<suffix". Suffix indicator appears in - // the input and the vocab. - { - .vocab = {"<unk>", "runn", "<<suffixing", "<", "su", "<<suffixffix"}, - .unk_token = "<unk>", - .suffix_indicator = "<<suffix", - .max_bytes_per_token = 100, - .input = "<<suffix running", - .expected_tokens = {"<", "<", "su", "<<suffixffix", "runn", - "<<suffixing"}, - .expected_token_ids = {3, 3, 4, 5, 1, 2}, - .expected_token_start_offsets = {0, 1, 2, 4, 9, 13}, - .expected_token_end_offsets = {1, 2, 4, 8, 13, 16}, - }, - // Test 73: Test suffix indicator "XYZ". Input contains "XYZ". - { - .vocab = {"<unk>", "runn", "XYZing", "<", "X", "XYZYZ"}, - .unk_token = "<unk>", - .suffix_indicator = "XYZ", - .max_bytes_per_token = 100, - .input = "XYZ running", - .expected_tokens = {"X", "XYZYZ", "runn", "XYZing"}, - .expected_token_ids = {4, 5, 1, 2}, - .expected_token_start_offsets = {0, 1, 4, 8}, - .expected_token_end_offsets = {1, 3, 8, 11}, - }, - // Test 74: Test suffix indicator "XYZ", which appears in the - // vocab and input sentence as a single word. - { - .vocab = {"<unk>", "runn", "XYZing", "<", "X", "XYZYZ", "XYZ"}, - .unk_token = "<unk>", - .suffix_indicator = "XYZ", - .max_bytes_per_token = 100, - .input = "XYZ running", - .expected_tokens = {"XYZ", "runn", "XYZing"}, - .expected_token_ids = {6, 1, 2}, - .expected_token_start_offsets = {0, 4, 8}, - .expected_token_end_offsets = {3, 8, 11}, - }, - // Test suite 5: Test multi-byte punctuation and Chinese characters. - // Test 75: Contains a multi-bytes Unicode punctuation char "\xEF\xBC\x8C" - // followed by a tokenizable word. - { - .vocab = {"<unk>", "want", "##ed", "ABC", "\xEF\xBC\x8C", "##ABC"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = "wanted\xEF\xBC\x8C" - "ABC", - .expected_tokens = {"want", "##ed", "\xEF\xBC\x8C", "ABC"}, - .expected_token_ids = {1, 2, 4, 3}, - .expected_token_start_offsets = {0, 4, 6, 9}, - .expected_token_end_offsets = {4, 6, 9, 12}, - }, - // Test 76: Contains a multi-bytes Unicode punctuation char "\xEF\xBC\x8C" - // (absent in the vocab) followed by a tokenizable word. - { - .vocab = {"<unk>", "want", "##ed", "ABC", "\xEF\xBC\x8C", "##ABC"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = "wanted\xD7\x83" - "ABC", - .expected_tokens = {"want", "##ed", "<unk>", "ABC"}, - .expected_token_ids = {1, 2, 0, 3}, - .expected_token_start_offsets = {0, 4, 6, 8}, - .expected_token_end_offsets = {4, 6, 8, 11}, - }, - // Test 77: Contains a multi-bytes Unicode chinese character \xe4\xb8\x81, - // which is considered as a single word in Bert, so it's treated in the - // same way as punctuation characters by the tokenizer. - { - .vocab = {"<unk>", "want", "##ed", "ABC", "\xe4\xb8\x81", "##ABC"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = "wanted\xe4\xb8\x81" - "ABC", - .expected_tokens = {"want", "##ed", "\xe4\xb8\x81", "ABC"}, - .expected_token_ids = {1, 2, 4, 3}, - .expected_token_start_offsets = {0, 4, 6, 9}, - .expected_token_end_offsets = {4, 6, 9, 12}, - }, - // Test 78: Contains a multi-bytes Unicode chinese character \xe4\xb8\x81. - { - .vocab = {"<unk>", "want", "##ed", "ABC", "##ABC", - "wanted\xe4\xb8\x81"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = "wanted\xe4\xb8\x81" - "ABC", - .expected_tokens = {"want", "##ed", "<unk>", "ABC"}, - .expected_token_ids = {1, 2, 0, 3}, - .expected_token_start_offsets = {0, 4, 6, 9}, - .expected_token_end_offsets = {4, 6, 9, 12}, - }, - // Test 79: Contains a multi-bytes Unicode chinese character \xe4\xb8\x81, - // which is included in the vocab as the suffix of a word. - { - .vocab = {"<unk>", "want", "##ed", "ABC", "##ABC", - "wanted\xe4\xb8\x81"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 10, - .input = "wanted\xe4\xb8\x81" - "ABC", - .expected_tokens = {"want", "##ed", "<unk>", "ABC"}, - .expected_token_ids = {1, 2, 0, 3}, - .expected_token_start_offsets = {0, 4, 6, 9}, - .expected_token_end_offsets = {4, 6, 9, 12}, - }}; - return v; -} - -using TestTokenizeText = testing::TestWithParam<Spec>; - -TEST_P(TestTokenizeText, Test) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<std::string> output_tokens; - std::vector<int> output_ids; - std::vector<int> output_begin_offsets; - std::vector<int> output_end_offsets; - tokenizer.Tokenize(spec.input, &output_tokens, &output_ids, - &output_begin_offsets, &output_end_offsets); - EXPECT_THAT(output_tokens, spec.expected_tokens); - EXPECT_THAT(output_ids, spec.expected_token_ids); - EXPECT_THAT(output_begin_offsets, spec.expected_token_start_offsets); - EXPECT_THAT(output_end_offsets, spec.expected_token_end_offsets); -} - -TEST_P(TestTokenizeText, TestNoOutputPieces) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<int> output_ids; - std::vector<int> output_begin_offsets; - std::vector<int> output_end_offsets; - tokenizer.Tokenize(spec.input, &output_ids, &output_begin_offsets, - &output_end_offsets); - EXPECT_THAT(output_ids, spec.expected_token_ids); - EXPECT_THAT(output_begin_offsets, spec.expected_token_start_offsets); - EXPECT_THAT(output_end_offsets, spec.expected_token_end_offsets); -} - -TEST_P(TestTokenizeText, TestNoOutputPiecesOnlyOutputIds) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - std::vector<int> output_ids; - tokenizer.Tokenize(spec.input, &output_ids); - EXPECT_THAT(output_ids, spec.expected_token_ids); -} - -INSTANTIATE_TEST_SUITE_P(EndToEndFastWordpieceTokenizerParameterizedTest, - TestTokenizeText, - testing::ValuesIn(GetTestSpecsForTokenizeText())); - -// Test the detokenization function of FastWordPieceTokenizer. -const std::vector<Spec>& GetTestSpecsForTokenizeDetokenize() { - static const std::vector<Spec>& v = *new std::vector<Spec>{ - // Test 0: Input is a single word. - { - .vocab = {"a", "abc", "##de", "##defgxy", "##deh", "##f", "##ghz", - "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "abcdefghz", - .expected_token_ids = {1, 2, 5, 6}, - .expected_detokenized_text = "abcdefghz", - }, - // Test 1: Input is a sentence. - { - .vocab = {"a", "abc", "##de", "##c", "##f", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "a abc abcde ab", - .expected_token_ids = {0, 1, 1, 2, 5}, - .expected_detokenized_text = "a abc abcde <unk>", - }, - // Test 2: Input has the leading suffix indicator. - { - .vocab = {"a", "abc", "##de", "##deh", "##f", "<unk>"}, - .unk_token = "<unk>", - .suffix_indicator = "##", - .max_bytes_per_token = 100, - .input = "##deh abcde", - .expected_token_ids = {3, 1, 2}, - .expected_detokenized_text = "##deh abcde", - }, - }; - return v; -} -using TestTokenizeDetokenize = testing::TestWithParam<Spec>; - -TEST_P(TestTokenizeDetokenize, Test) { - const Spec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - std::string flatbuffer, - BuildModelAndExportToFlatBuffer(spec.vocab, spec.max_bytes_per_token, - spec.suffix_indicator, spec.unk_token, - /*no_pretokenization=*/true, - /*support_detokenization=*/true)); - ASSERT_OK_AND_ASSIGN(auto tokenizer, - FastWordpieceTokenizer::Create(flatbuffer.data())); - - // Test detokenization. - ASSERT_OK_AND_ASSIGN(auto output_text, - tokenizer.Detokenize(spec.expected_token_ids)); - EXPECT_THAT(output_text, spec.expected_detokenized_text); -} - -INSTANTIATE_TEST_SUITE_P( - FastWordpieceTokenizerDetokenizeParameterizedTest, - TestTokenizeDetokenize, - testing::ValuesIn(GetTestSpecsForTokenizeDetokenize())); - -} // namespace -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.cc deleted file mode 100644 index 6c353d8d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.cc +++ /dev/null
@@ -1,43 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h" - -#include "tensorflow/lite/kernels/shim/tflite_op_shim.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel_template.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -using TokenizeOpKernel = tflite::shim::TfLiteOpKernel< - tensorflow::text::FastWordpieceTokenizeWithOffsetsOp>; - -using DetokenizeOpKernel = - tflite::shim::TfLiteOpKernel<tensorflow::text::FastWordpieceDetokenizeOp>; - -extern "C" void AddFastWordpieceTokenize(tflite::MutableOpResolver* resolver) { - TokenizeOpKernel::Add(resolver); -} - -extern "C" void AddFastWordpieceDetokenize( - tflite::MutableOpResolver* resolver) { - DetokenizeOpKernel::Add(resolver); -} - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h deleted file mode 100644 index 8424501..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h +++ /dev/null
@@ -1,35 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_GOOGLE_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_TFLITE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_GOOGLE_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_TFLITE_H_ - -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -extern "C" void AddFastWordpieceTokenize(::tflite::MutableOpResolver* resolver); - -extern "C" void AddFastWordpieceDetokenize( - ::tflite::MutableOpResolver* resolver); - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_GOOGLE_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_TFLITE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h deleted file mode 100644 index ba2cf6ab..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h +++ /dev/null
@@ -1,274 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// To optimize speed/memory usage, we assume: -// * The WordPiece vocabulary has at most 2^22 = 4M tokens. -// * No token from the vocabulary has more than 256 bytes. -// -// The assumptions are adjustable by setting the constants defined in this file. -// -// Note: by recompiling the underlying trie library and the helper functions in -// this file to use 64-bit (or even larger) integers, we can support even a -// larger vocab size and longer vocab tokens. Still, we believe the current -// implementation covers all real cases. -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_UTILS_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_UTILS_H_ - -#include <stdint.h> - -#include <limits> - -#include "absl/status/statusor.h" -#include "absl/strings/str_cat.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/umachine.h" - -namespace tensorflow { -namespace text { -namespace fast_wordpiece_tokenizer_utils { - -// This header assumes that <int> is 32-bit integer types. -static_assert(sizeof(int) == 4, "FastWordpieceTokenizer requires 4-byte int."); - -//////////////////////////////////////////////////////////////////////////////// -// Constants for token encoding. -// -// The constants below define a 32-bit compact token representation that encodes -// (1) the token id, (2) the token length (minus 1, and without the suffix -// indicator, in utf-8 bytes), and (3) is_suffix_token (i.e., the token starts -// with the suffix indicator (say) "##"). -// -// The encoded value is stored on the darts_clone trie as well as in the -// `failure_pops_pool` (see FastWordpieceTokenizerConfig in -// fast_wordpiece_tokenizer_model.fbs). As required by darts_clone_trie, the -// type of the encoded value should be 32-bit signed int, and the top bit is -// reserved to be always 0. -// -// Examples (given the existing constants; bits are numbered 0 to 31 from -// right/lower to left/upper; the top bit is reserved by darts_clone trie and is -// always 0): -// * Token "a", token id 0 -> The encoded value is 0x0: -// * bit 31: 0. -// * bit 30: 0, since token "a" is not a suffix token. -// * bits 29-8: 0, since the token id is 0. -// * bits 7-0: 0, since the encoded token length is 0 (see below comments). -// * Token "b", token id 1 -> The encoded value is 0x100: -// * bit 31: 0. -// * bit 30: 0, since token "b" is not a suffix token. -// * bits 29-8: 1, since the token id is 1. -// * bits 7-0: 0, since the encoded token length is 0 (see below comments). -// * Token "##b", token id 2 -> The encoded value is 0x40000200: -// * bit 31: 0. -// * bit 30: 1, since token "##b" is a suffix token. -// * bits 29-8: 2, since the token id is 2. -// * bits 7-0: 0, since the encoded token length is 0 (see below comments). -// * Token "bc", token id 3 -> The encoded value is 0x301: -// * bit 31: 0. -// * bit 30: 0, since token "bc" is not a suffix token. -// * bits 29-8: 3, since the token id is 3. -// * bits 7-0: 1, since the encoded token length is 1 (see below comments). -// * Token "##bcd", token id 5 -> The encoded value is 0x40000502: -// * bit 31: 0. -// * bit 30: 1, since token "##bcd" is a suffix token. -// * bits 29-8: 5, since the token id is 5. -// * bits 7-0: 2, since the encoded token length is 2 (see below comments). -// -// One special case is that when the suffix indicator is the empty string "". In -// this case, `is_suffix_token` is false for all tokens. -// -// Another special case is that when the suffix indicator string happens to be a -// token in the vocabulary. When encoding such a token like "##", by design, -// `is_suffix_token` is false, and the encoded token length is the full length -// of the suffix indicator string. -// -//////////////////////////////////////////////////////////////////////////////// - -// The (right-to-left 0-based) bit to encode whether the token is a suffix -// token. -static constexpr uint32_t kBitToIndicateSuffixToken = 30; - -// The number of low bits to encode the vocab token length into a compact -// representation. Technically, we encode the length of the token without the -// suffix indicator (if any) minus 1. Examples: -// * Token "a" -> we encode 1-1 = 0. -// * Token "abc" -> we encode 3-1 = 0. -// * Token "##abc" -> we encode 2, as before (we ignore the suffix indicator). -static constexpr uint32_t kBitsToEncodeVocabTokenLength = 8; - -// The bit mask to get the vocab token length from the compact representation. -static constexpr uint32_t kMaskToEncodeVocabTokenLength = - (1 << kBitsToEncodeVocabTokenLength) - 1; - -// Max vocab token length supported (given `kBitsToEncodeVocabTokenLength`). -static constexpr uint32_t kMaxVocabTokenLengthInUTF8Bytes = - (1 << kBitsToEncodeVocabTokenLength); - -// The maximum vocab size supported by our 32-bit encoding. Using right-to-left -// 0-based numbering, Bit 31 is reserved by darts_clone trie. Bit 30 indicates -// whether the token is a suffix token. The low `kBitsToEncodeVocabTokenLength` -// bits encode the token length. Given `kBitsToEncodeVocabTokenLength=8`, this -// leaves 32-1-1-8=22 bits for token ids, i.e., a max vocab size of 2^22 = 4M. -static constexpr uint32_t kMaxSupportedVocabSize = - (1 << (32 - 1 - 1 - kBitsToEncodeVocabTokenLength)); - -// The bit mask to get the vocab token id from the compact representation. -static constexpr uint32_t kMaskToEncodeVocabTokenId = - ((1 << kBitToIndicateSuffixToken) - 1) ^ kMaskToEncodeVocabTokenLength; - -//////////////////////////////////////////////////////////////////////////////// -// Helpers for encoding / decoding tokens. -//////////////////////////////////////////////////////////////////////////////// - -// Encodes a token into the encoded value. `token_length` is without the suffix -// indicator. The result is always a non-negative integer. Only used in building -// the model (in flatbuffer), not in doing WordPiece tokenization. -inline absl::StatusOr<int> EncodeToken(int token_id, - int token_length, - bool is_suffix_token) { - const int encoded_value = (is_suffix_token << kBitToIndicateSuffixToken) | - (token_id << kBitsToEncodeVocabTokenLength) | - (token_length - 1); - if (encoded_value < 0) { - return absl::FailedPreconditionError(absl::StrCat( - "EncodeToken() must return a non-negative value! Found encoded value: ", - encoded_value, " for input token id: ", token_id, ", token_length: ", - token_length, ", is_suffix_token: ", is_suffix_token)); - } - return encoded_value; -} - -// Gets whether it is a suffix token from the encoded value. -inline bool IsSuffixToken(int token_encoded_value) { - return static_cast<bool>(token_encoded_value >> kBitToIndicateSuffixToken); -} - -// Gets the token id from the encoded value. -inline int GetTokenId(int token_encoded_value) { - return (token_encoded_value & kMaskToEncodeVocabTokenId) >> - kBitsToEncodeVocabTokenLength; -} - -// Gets the token length (without the suffix indicator) from the encoded value. -inline int GetTokenLength(int token_encoded_value) { - return (token_encoded_value & kMaskToEncodeVocabTokenLength) + 1; -} - -//////////////////////////////////////////////////////////////////////////////// -// Constants for encoding failure pop lists. -// -// We put all failure pop lists into a common pool. The constants below define -// the compact representation that encodes (1) the offset, and (2) the length -// (minus 1) for a failure pop list in the common pool. -// -// Examples (given the existing constants; bits are numbered 0 to 31 from -// right/lower to left/upper): -// * failure pop list A, whose offset is 0 and length is 1 -> The encoded value -// is 0x0: -// * bits 31-8: 0, since the offset is 0. -// * bits 7-0: 0, since the encoded length is 0 (=1-1). -// * failure pop list B, whose offset is 0 and length is 3 -> The encoded value -// is 0x2: -// * bits 31-8: 0, since the offset is 0. -// * bits 7-0: 2, since the encoded length is 2 (=3-1). -// * failure pop list C, whose offset is 11 and the length is 10 -> The encoded -// value is 0xB09: -// * bits 31-8: 0xB, since the offset is 11. -// * bits 7-0: 9, since the encoded length is 9 (=10-1). -//////////////////////////////////////////////////////////////////////////////// - -// The number of low bits used to encode the length of failure pops minus 1 in -// the compact representation. This value should be less than or equal to -// `kBitsToEncodeVocabTokenLength`, since the size of failure pops is bounded by -// the maximum token length in the vocabulary. -static constexpr uint32_t kBitsToEncodeFailurePopsListSize = - kBitsToEncodeVocabTokenLength; - -// The bit mask to get the length of the failure pop list (without any suffix -// indicator, and minus 1) from the compact representation. -static constexpr uint32_t kMaskToEncodeFailurePopsListSize = - (1 << kBitsToEncodeFailurePopsListSize) - 1; - -// Max length of the failure pop list supported (given -// `kBitsToEncodeFailurePopsListSize`). -static constexpr uint32_t kMaxFailurePopsListSize = - (1 << kBitsToEncodeFailurePopsListSize); - -// The maximum valid offset in the failure pool, excluding the largest one -// (i.e., 0xFF...F), which is reserved to denote a null failure pop list (see -// `kNullFailurePopsList`). -static constexpr uint32_t kMaxSupportedFailurePoolOffset = - (1 << (32 - kBitsToEncodeFailurePopsListSize)) - 1 - 1; - -// Represents the null failure pops list, because 0xFF...F is not a valid of -// offset (see `kMaxSupportedFailurePoolOffset`). -static constexpr uint32_t kNullFailurePopsList = - std::numeric_limits<uint32_t>::max(); - -//////////////////////////////////////////////////////////////////////////////// -// Helpers for encoding / decoding failure pop lists -//////////////////////////////////////////////////////////////////////////////// - -// Encodes the offset (in the failure pop pool) and the length of a failure pop -// list into an integer for a compact representation. -inline uint32_t EncodeFailurePopList(int offset, int length) { - return (offset << kBitsToEncodeFailurePopsListSize) | (length - 1); -} - -// Decodes the offset (in the failure pop pool) and the length of a failure pop -// list from the compact representation (an integer). -inline void GetFailurePopsOffsetAndLength(uint32_t offset_and_length, - int& out_offset, - int& out_length) { - out_offset = offset_and_length >> kBitsToEncodeFailurePopsListSize; - out_length = (offset_and_length & kMaskToEncodeFailurePopsListSize) + 1; -} - -//////////////////////////////////////////////////////////////////////////////// -// Constants related to the Trie structure. -//////////////////////////////////////////////////////////////////////////////// - -// Represents the null node id. Different from any normal node. -static constexpr uint32_t kNullNode = std::numeric_limits<uint32_t>::max(); - -// The maximum trie size supported. Because std::numeric_limits<uint32_t>::max() -// (i.e., 0xFFFFFFFF) is reserved to represent the null node, the total trie -// size needs to be smaller or equal to 0xFFFFFFFF. -static constexpr uint32_t kMaxSupportedTrieSize = - std::numeric_limits<uint32_t>::max(); - -//////////////////////////////////////////////////////////////////////////////// -// Helpers for analyzing Unicode characters. -//////////////////////////////////////////////////////////////////////////////// -inline bool IsPunctuationOrChineseChar(UChar32 char_value) { - uint32_t cp = static_cast<uint32_t>(char_value); - // Chinese characters that are treated as punctuation in Bert. - if ((cp >= 0x4E00 && cp <= 0x9FFF) || (cp >= 0x3400 && cp <= 0x4DBF) || - (cp >= 0x20000 && cp <= 0x2A6DF) || (cp >= 0x2A700 && cp <= 0x2B73F) || - (cp >= 0x2B740 && cp <= 0x2B81F) || (cp >= 0x2B820 && cp <= 0x2CEAF) || - (cp >= 0xF900 && cp <= 0xFAFF) || (cp >= 0x2F800 && cp <= 0x2FA1F)) { - return true; - } - // Some special chars e.g. ">", "$" that are not covered by the u_ispunct are - // considered as punctuation chars. - if ((cp >= 33 && cp <= 47) || (cp >= 58 && cp <= 64) || - (cp >= 91 && cp <= 96) || (cp >= 123 && cp <= 126)) { - return true; - } - return u_ispunct(char_value); -} -} // namespace fast_wordpiece_tokenizer_utils -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_FAST_WORDPIECE_TOKENIZER_UTILS_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc deleted file mode 100644 index 931438e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils_test.cc +++ /dev/null
@@ -1,155 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_utils.h" - -#include <gmock/gmock.h> -#include <gtest/gtest.h> - -namespace tensorflow { -namespace text { -namespace fast_wordpiece_tokenizer_utils { -namespace { - -// Testing spec struct for token encoding / decoding. -struct TokenSpec { - friend std::ostream& operator<<(std::ostream& os, const TokenSpec& s) { - return os << "token_id:" << s.token_id << ", " - << "token_length:" << s.token_length << ", " - << "is_suffix_token:" << s.is_suffix_token << std::endl; - } - - int token_id; - int token_length; - bool is_suffix_token; -}; - -// Parameterized tests specs for token encoding / decoding. -const std::vector<TokenSpec>& GetTokenSpecs() { - static const std::vector<TokenSpec>& kSpecs = *new std::vector<TokenSpec>{ - // Test 0. - { - .token_id = 0, - .token_length = 1, - .is_suffix_token = false, - }, - // Test 1. - { - .token_id = 1, - .token_length = 1, - .is_suffix_token = false, - }, - // Test 2. - { - .token_id = 2, - .token_length = 1, - .is_suffix_token = true, - }, - // Test 3. - { - .token_id = 3, - .token_length = 10, - .is_suffix_token = false, - }, - // Test 4. - { - .token_id = 4, - .token_length = 10, - .is_suffix_token = true, - }, - // Test 5. - { - .token_id = kMaxSupportedVocabSize - 1, - .token_length = kMaxVocabTokenLengthInUTF8Bytes, - .is_suffix_token = true, - }, - }; - return kSpecs; -} - -using TokenEncodingDecodingTest = testing::TestWithParam<TokenSpec>; - -TEST_P(TokenEncodingDecodingTest, GeneralTest) { - const TokenSpec& spec = GetParam(); - ASSERT_OK_AND_ASSIGN( - auto encoded_value, - EncodeToken(spec.token_id, spec.token_length, spec.is_suffix_token)); - EXPECT_THAT(GetTokenId(encoded_value), spec.token_id); - EXPECT_THAT(GetTokenLength(encoded_value), spec.token_length); - EXPECT_THAT(IsSuffixToken(encoded_value), spec.is_suffix_token); -} - -INSTANTIATE_TEST_SUITE_P(TestTokenEncodingDecoding, - TokenEncodingDecodingTest, - testing::ValuesIn(GetTokenSpecs())); - -struct FailurePopListSpec { - friend std::ostream& operator<<(std::ostream& os, - const FailurePopListSpec& s) { - return os << "offset:" << s.offset << ", " - << "length:" << s.length << std::endl; - } - - int offset; - int length; -}; - -// Parameterized tests specs for failure pop list encoding and decoding. -const std::vector<FailurePopListSpec>& GetFailurePopListSpecs() { - static const std::vector<FailurePopListSpec>& kSpecs = - *new std::vector<FailurePopListSpec>{ - // Test 0. - { - .offset = 0, - .length = 1, - }, - // Test 1. - { - .offset = 0, - .length = 3, - }, - // Test 2. - { - .offset = 11, - .length = 10, - }, - // Test 3. - { - .offset = kMaxSupportedFailurePoolOffset, - .length = kMaxFailurePopsListSize, - }, - }; - return kSpecs; -} - -using FailurePopListEncodingDecodingTest = - testing::TestWithParam<FailurePopListSpec>; - -TEST_P(FailurePopListEncodingDecodingTest, GeneralTest) { - const FailurePopListSpec& spec = GetParam(); - auto offset_and_length = EncodeFailurePopList(spec.offset, spec.length); - int offset, length; - GetFailurePopsOffsetAndLength(offset_and_length, offset, length); - EXPECT_THAT(offset, spec.offset); - EXPECT_THAT(length, spec.length); -} - -INSTANTIATE_TEST_SUITE_P(TestFailurePopListEncodingDecoding, - FailurePopListEncodingDecodingTest, - testing::ValuesIn(GetFailurePopListSpecs())); - -} // namespace -} // namespace fast_wordpiece_tokenizer_utils -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_greedy_constrained_sequence_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_greedy_constrained_sequence_kernel_test.cc deleted file mode 100644 index dc8e6fb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_greedy_constrained_sequence_kernel_test.cc +++ /dev/null
@@ -1,799 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { - -using tensorflow::DT_INT32; -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::MatrixEq; -using tensorflow::text_kernels_test_util::VectorEq; - -class LogGreedyConstrainedSequenceTest : public tensorflow::OpsTestBase { - public: - void SetUpOpWithDefaults() { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", false) - .Attr("use_log_space", true) - .Attr("use_start_and_end_states", true) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -// TODO(b/122968457): There are a bunch of tests that only validate !ok instead -// of looking for specific error messages; fix that. - -// This test examines evaluations with only a permissions matrix. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty weights matrix not of rank 2. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a 2D score matrix (implicit batch 1). -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithSingleBatchItem) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({1, 4}), // - { - 10.0, 12.0, 13.0, 4.0, // - }); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({1}), {1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // Validate the output. - std::vector<int32> expected_transitions({1}); - std::vector<int64> expected_offsets({0, 1}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines int64 input type and int32 output type. -TEST_F(LogGreedyConstrainedSequenceTest, int64inint32out) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - // Validate the output. - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op can take a sequence length of type {{X},{Y},{Z}} -// (with an outer batch dimension). -TEST_F(LogGreedyConstrainedSequenceTest, TwoDimensionalSequenceLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3, 1}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions that are forbidden by the permission -// matrix (final->null) are not taken. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeightsConstrainedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok; the next - // highest is 1, but 1->OUT is not OK; the next highest is 0, which is OK. - // The second sequence's highest score is 3, OUT->3 is OK and 3->OUT is OK. - // The third sequence's highest score is 0, OUT->0 is OK and 0->OUT is OK. - // Validate the output. - std::vector<int32> expected_transitions({0, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with only a weight matrix. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {10.1, 2.5, 7.5, 5.0} (max is 0) - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2) - // 3: {100.1, 24.5, 3.5, 5.0} (max is 0) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty not rank 2 permissions matrix. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {10.1, 2.5, 7.5, 5.0} (max is 0) - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2) - // 3: {100.1, 24.5, 3.5, 5.0} (max is 0) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are scored with the probability -// of ending the sequence on the transition (x->final->null). -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsWeightedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 0.1, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row and the last column in the - // score tensor, so the real scores are: - // 1: {10.1, 2.5, 7.5, 4.1} (max is 0) - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2) - // 3: {100.1, 24.5, 3.5, 5.0} (max is 0) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with both weight and permission matrices. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 7.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, false, // FROM 2 - true, true, true, true, true, // FROM 3 - false, true, true, true, false, // FROM 'OUT' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 1.0, // - 0.5, 0.5, 0.5, 0.5, 0.1, // - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row and the last column in the - // score tensor, so the real scores are: - // 1: {7.1, 2.5, 7.5, 4.1} (max is 3, but 2->NUL/NUL->0 is not OK, so 3.) - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2, but 2->NUL is not OK, so 1.) - // 3: {100.1, 24.5, 3.5, 5.0} (max is 0, but NUL->0 is not OK, so 1.) - // Validate the output. - std::vector<int32> expected_transitions({3, 1, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesMultipleTransitionsWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - false, true, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 1.0, 0.5, 1.0, // 2 - 0.5, 0.5, 0.5, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {10.1, 2.5, 7.5, 5.0} (max is 2). OUT->2 is OK. - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2). OUT->2 is OK. - // 3: {100.1, 11.5, 1.5, 11.0} (max is 0). OUT->0 is not OK, so go with 1. - // STEP 2: - // 1: In state '2', so use row 2 in the weight tensor. - // Weights are {11.5, 11.5, 12.0, 11.5}; 2->2 is OK and 2->OUT is OK; use 2. - // 2: In state '2', so use row 2 in the weight tensor. - // Weights are {10.5, 15.5, 2.0, 13.0}; 2->3 is not OK and 2->1 is not OK, so - // 0. 3: In state 0, so use row 0 in the weight tensor. Weights are - // {1.5, 11.5, 1.5, 11}; 0->1 is OK but 1->OUT is not, so 3. - - std::vector<int32> expected_transitions({2, 2, 2, 0, 1, 3}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesMultipleTransitionsWithVaryingLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // Batch 0, step 0 - 10.0, 10.0, 10.0, 10.0, // Batch 0, step 1 - 1.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - 100.0, 24.0, 3.0, 4.0, // Batch 2, step 0 - 1.0, 11.0, 1.0, 10.0, // Batch 2, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 1, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, false, true, false, true, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - false, true, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.5, 0.5, 0.5, 0.5, 1.0, // 0 - 0.5, 0.5, 0.5, 0.5, 1.0, // 1 - 0.5, 0.5, 1.0, 0.5, 1.0, // 2 - 0.5, 0.5, 0.5, 0.5, 1.0, // 3 - 0.1, 0.5, 0.5, 1.0, 1.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {10.1, 2.5, 7.5, 5.0} (max is 2). OUT->2 is OK. - // 2: {1.1, 9.5, 11.5, 6.0} (max is 2). OUT->2 and 2->OUT are OK. - // 3: {100.1, 11.5, 1.5, 11.0} (max is 0). OUT->0 is not OK, so go with 1. - // STEP 2: - // 1: In state '2', so use row 2 in the weight tensor. - // Weights are {11.5, 11.5, 12.0, 11.5}; 2->2 is OK and 2->OUT is OK; use 2. - // 2: End of sequence. - // 3: In state 0, so use row 0 in the weight tensor. - // Weights are {1.5, 11.5, 1.5, 11}; 0->1 is OK but 1->OUT is not, so 3. - - std::vector<int32> expected_transitions({2, 2, 2, 1, 3}); - std::vector<int64> expected_offsets({0, 2, 3, 5}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a fully negative input set. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithNegativeInputs) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - -10.0, -12.0, -13.0, -4.0, // - -1.0, -12.0, -13.0, -14.0, // - -15.0, -2.0, -3.0, -14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, true, true, true, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_transitions({3, 0, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an all-zero weight matrix. -TEST_F(LogGreedyConstrainedSequenceTest, - ComputesSingleTransitionWithZeroedWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - 100.0, 24.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), { - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, - }); - - TF_ASSERT_OK(RunOpKernel()); - - // Because all weights are zero, the max values should be the max of the - // scores. - std::vector<int32> expected_transitions({0, 2, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -TEST_F(LogGreedyConstrainedSequenceTest, - ImpossibleSequencesResultInNegativeOnesIfAttrIsSet) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - false, false, false, false, false, // FROM 0 - false, false, false, false, false, // FROM 1 - false, false, false, false, false, // FROM 2 - false, false, false, false, false, // FROM 3 - false, false, false, false, false, // FROM 'OUT' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // Validate the output. - - std::vector<int32> expected_transitions({-1, -1, -1, -1, -1, -1}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op will throw an error if there are too few scores to -// finalize all the sequences. -TEST_F(LogGreedyConstrainedSequenceTest, ErrorsIfGivenInsufficientScores) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 2, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc deleted file mode 100644 index b7db6069..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/log_viterbi_constrained_sequence_kernel_test.cc +++ /dev/null
@@ -1,814 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/status.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { - -using tensorflow::DT_INT32; -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::MatrixEq; -using tensorflow::text_kernels_test_util::VectorEq; - -// TODO(b/122968457): There are a bunch of tests that only validate !ok instead -// of looking for specific error messages; fix that. - -class LogViterbiConstrainedSequenceTest : public tensorflow::OpsTestBase { - public: - void SetUpOpWithDefaults() { - // Prepare graph. - TF_ASSERT_OK(NodeDefBuilder("tested_op", "ConstrainedSequence") - .Attr("Tin", DT_INT32) - .Attr("use_viterbi", true) - .Attr("use_log_space", true) - .Attr("use_start_and_end_states", true) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -// This test examines evaluations with only a permissions matrix. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeights) { - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty weights matrix not of rank 2. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyWeights) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a 2D score matrix (implicit batch 1). -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithSingleBatchItem) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({1, 4}), // - { - 10.0, 12.0, 13.0, 4.0, // - }); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({1}), {1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // Validate the output. - std::vector<int32> expected_transitions({1}); - std::vector<int64> expected_offsets({0, 1}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines int64 input type and int32 output type. -TEST_F(LogViterbiConstrainedSequenceTest, int64inint32out) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - // Validate the output. - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op can take a sequence length of type {{X},{Y},{Z}} -// (with an outer batch dimension). -TEST_F(LogViterbiConstrainedSequenceTest, TwoDimensionalSequenceLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3, 1}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok, so it's 1. - // The second sequence's highest score is 3, which is ok. - // The third sequence's highest score is 0, which is ok. - - // Validate the output. - std::vector<int32> expected_transitions({1, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions that are forbidden by the permission -// matrix (final->null) are not taken. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoWeightsConstrainedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, false, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // The first sequence's highest score is 2, but OUT->2 is not ok; the next - // highest is 1, but 1->OUT is not OK; the next highest is 0, which is OK. - // The second sequence's highest score is 3, OUT->3 is OK and 3->OUT is OK. - // The third sequence's highest score is 0, OUT->0 is OK and 0->OUT is OK. - // Validate the output. - std::vector<int32> expected_transitions({0, 3, 0}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with only a weight matrix. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - -12.0, 3.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 10.0, 5.0, 3.0, 1.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {20.0, 7.0, 10.0, 5.0} (max is 0) - // 2: {11.0, 14.0, 14.0, 6.0} (max is 2, due to tiebreaker.) - // 3: {-2.0, 8.0, 6.0, 5.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with an empty not rank 2 permissions matrix. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNonMatrixEmptyPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - -12.0, 3.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 10.0, 5.0, 3.0, 1.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {20.0, 7.0, 10.0, 5.0} (max is 0) - // 2: {11.0, 14.0, 14.0, 6.0} (max is 2, due to tiebreaker.) - // 3: {-2.0, 8.0, 6.0, 5.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({0, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures that final transitions are scored with the probability -// of ending the sequence on the transition (x->final->null). -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNoPermissionsWeightedByEnd) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - -12.0, 3.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({0, 0}), {}); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.0, 0.0, 0.0, 0.0, -15.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 10.0, 5.0, 3.0, 1.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {5.0, 7.0, 10.0, 5.0} (max is 2 - state 1->null adds -15.) - // 2: {11.0, 14.0, 14.0, 6.0} (max is 2, due to tiebreaker.) - // 3: {-2.0, 8.0, 6.0, 5.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({2, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with both weight and permission matrices. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 2.0, 7.0, 4.0, // - 1.0, 9.0, 11.0, 5.0, // - -12.0, 3.0, 3.0, 4.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 'OUTSIDE' - false, true, true, true, false, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), {0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 0.0, 0.0, 0.0, 0.0, 0.0, // - 10.0, 5.0, 3.0, 1.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // All scores should be summed with the last row in the weight tensor, so - // the 'real' scores are: - // 1: {20.0, 7.0, 10.0, 5.0} (max is 0, but NUL->0 is forbidden, so 2.) - // 2: {11.0, 14.0, 14.0, 6.0} (max is 2, due to tiebreaker.) - // 3: {-2.0, 8.0, 6.0, 5.0} (max is 1) - // Validate the output. - std::vector<int32> expected_transitions({2, 2, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesMultipleTransitionsWithWeightsAndPermissions) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({2, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 13.0, 12.0, 11.0, 10.0, // Batch 0, step 1 - 7.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({2}), {2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, false, true, false, false, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), - {-1.0, 1.0, -2.0, 2.0, 0.0, // 0 - 3.0, -3.0, 4.0, -4.0, 0.0, // 1 - 5.0, -5.0, 6.0, -6.0, 0.0, // 2 - -7.0, 7.0, -8.0, 8.0, 0.0, // 3 - 0.0, 1.0, 2.0, 3.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be summed with the last row in the weight tensor, so the - // 'real' scores are: - // B0: { 10.0, [NOTOK], 9.0, 7.0} - // B1: { 7.0, [NOTOK], 13.0, 8.0} - // - // STEP 2: - // (Forbidden transitions are marked with '*' and X stands for the lowest - // possible score.) - // - // BATCH 0: - // Raw scores are: {13.0, 12.0, 11.0, 10.0} - // - // Final state 0: (13.0) Weighted scores are {12.0, 16.0, 18.0, 6.0} - // New totals are {22, X, 27, 18} [max 27 from 2] - // - // Final state 1: (12.0) Weighted scores are {13.0, 9.0, X, 19.0}, - // New totals are {23, X, X, 26} [max 26 from 3] - // - // Final state 2: (11.0) Weighted scores are {9, 15, 21, 3}, - // New totals are {19, X, 30, 10} [max 30 from 2] - // - // Final state 3: (10.0) Weighted scores are {12, 6, X, 18}, - // New totals are {19, X, X, 25} [max 25 from 3] - // - // Top scores are [27, 26, 30, 25] from [2, 3, 2, 3]. - // 2->OUT is X, so final scores are [27, 26, X, 25] for a - // final state of [0] with a sequence of [2->0]. - // - // - // BATCH 1: - // Previous scores are {7, X, 13, 8} - // Raw scores are {10, 15, 1, 12} - // - // Final state 0: Weighted score is {9, 18, 15, 3} - // New totals are {16, X, 28, 11} [max 28 from 2] - // - // Final state 1: Weighted score is {16, 12, 10, 22} - // New totals are {23, X, X*, 30} [max 30 from 3] - // - // Final state 2: Weighted score is {-1, 5, 7, -7} - // New totals are {6, X, 20, 1} [max 20 from 2] - // - // Final state 3: Weighted score is {14, 8, 6, 20} - // New totals are {21, X, X*, 28} [max 28 from 3] - // - // Top scores are [28, 30, 20, 28] from [2, 3, 2, 3]. - // 2->OUT is not valid, so final scores are [28, 30, X*, 28] for a - // final state of [1] with a sequence of [3->1]. - // - - std::vector<int32> expected_transitions({2, 0, 3, 1}); - std::vector<int64> expected_offsets({0, 2, 4}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines multiple evaluations with both weight and permission -// matrices. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesMultipleTransitionsWithVaryingLengths) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({2, 2, 4}), // - {{ - 10.0, 12.0, 7.0, 4.0, // Batch 0, step 0 - 0.0, 0.0, 0.0, 0.0, // PAD - 7.0, 9.0, 11.0, 5.0, // Batch 1, step 0 - 10.0, 15.0, 1.0, 12.0, // Batch 1, step 1 - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({2}), {1, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO NUL - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, false, true, false, false, // FROM 2 - true, true, true, true, true, // FROM 3 (OUT) - true, false, true, true, true, // FROM 'NULL' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({5, 5}), - {-1.0, 1.0, -2.0, 2.0, 0.0, // 0 - 3.0, -3.0, 4.0, -4.0, 0.0, // 1 - 5.0, -5.0, 6.0, -6.0, 0.0, // 2 - -7.0, 7.0, -8.0, 8.0, 0.0, // 3 - 0.0, 1.0, 2.0, 3.0, 0.0}); - - TF_ASSERT_OK(RunOpKernel()); - - // STEP 1: - // All scores should be summed with the last row in the weight tensor, so the - // 'real' scores are: - // B0: { 10.0, [NOTOK], 9.0, 7.0} - // B1: { 7.0, [NOTOK], 13.0, 8.0} - // - // STEP 2: - // (Forbidden transitions are marked with '*' and X stands for the lowest - // possible score.) - // - // BATCH 0: - // Batch 0 is complete. - // - // BATCH 1: - // Previous scores are {7, X, 13, 8} - // Raw scores are {10, 15, 1, 12} - // - // Final state 0: Weighted score is {9, 18, 15, 3} - // New totals are {16, X, 28, 11} [max 28 from 2] - // - // Final state 1: Weighted score is {16, 12, 10, 22} - // New totals are {23, X, X*, 30} [max 30 from 3] - // - // Final state 2: Weighted score is {-1, 5, 7, -7} - // New totals are {6, X, 20, 1} [max 20 from 2] - // - // Final state 3: Weighted score is {14, 8, 6, 20} - // New totals are {21, X, X*, 28} [max 28 from 3] - // - // Top scores are [28, 30, 20, 28] from [2, 3, 2, 3]. - // 2->OUT is not valid, so final scores are [28, 30, X*, 28] for a - // final state of [1] with a sequence of [3->1]. - // - - std::vector<int32> expected_transitions({0, 3, 1}); - std::vector<int64> expected_offsets({0, 1, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test examines evaluations with a fully negative input set. -TEST_F(LogViterbiConstrainedSequenceTest, - ComputesSingleTransitionWithNegativeInputs) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - -10.0, -12.0, -13.0, -4.0, // - -1.0, -12.0, -13.0, -14.0, // - -15.0, -2.0, -3.0, -14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 1, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, true, true, true, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_transitions({3, 0, 1}); - std::vector<int64> expected_offsets({0, 1, 2, 3}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -TEST_F(LogViterbiConstrainedSequenceTest, - ImpossibleSequencesResultInNegativeOnesIfAttrIsSet) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 2, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {2, 2, 2}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - false, false, false, false, false, // FROM 0 - false, false, false, false, false, // FROM 1 - false, false, false, false, false, // FROM 2 - false, false, false, false, false, // FROM 3 - false, false, false, false, false, // FROM 'OUT' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - TF_ASSERT_OK(RunOpKernel()); - - // Validate the output. - - std::vector<int32> expected_transitions({-1, -1, -1, -1, -1, -1}); - std::vector<int64> expected_offsets({0, 2, 4, 6}); - - // Validate the output. - EXPECT_THAT(*GetOutput(0), VectorEq(expected_transitions)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_offsets)); -} - -// This test ensures the op will throw an error if there are too few scores to -// finalize all the sequences. -TEST_F(LogViterbiConstrainedSequenceTest, ErrorsIfGivenInsufficientScores) { - // Prepare graph. - SetUpOpWithDefaults(); - - // Add the scores input. - AddInputFromArray<float>(TensorShape({3, 1, 4}), // - {{ - 10.0, 12.0, 13.0, 4.0, // - 1.0, 12.0, 13.0, 14.0, // - 15.0, 2.0, 3.0, 14.0, // - }}); - - // Add the sequence_lengths input. - AddInputFromArray<int>(TensorShape({3}), {1, 2, 1}); - - // Add the allowed_transitions input. - AddInputFromArray<bool>(TensorShape({5, 5}), - { - // TO 0 TO 1 TO 2 TO 3 TO OUT - true, true, true, true, true, // FROM 0 - true, true, true, true, true, // FROM 1 - true, true, true, true, true, // FROM 2 - true, true, true, true, true, // FROM 3 - true, true, false, true, false, // FROM 'OUTSIDE' - }); - - // Add the transition_weights input. - AddInputFromArray<float>(TensorShape({0, 0}), {}); - - auto result = RunOpKernel(); - EXPECT_FALSE(result.ok()); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc deleted file mode 100644 index d6b3a91d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_op_kernels.cc +++ /dev/null
@@ -1,193 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <cmath> -#include <limits> -#include <type_traits> -#include <vector> - -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/util/work_sharder.h" -#include "tensorflow_text/core/kernels/mst_solver.h" - -namespace tensorflow { -namespace text { - -// Op kernel implementation that wraps the |MstSolver|. -template <class Index, class Score> -class MaxSpanningTreeOpKernel : public tensorflow::OpKernel { - public: - explicit MaxSpanningTreeOpKernel(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context) { - OP_REQUIRES_OK(context, context->GetAttr("forest", &forest_)); - } - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor& num_nodes_tensor = context->input(0); - const tensorflow::Tensor& scores_tensor = context->input(1); - - // Check ranks. - OP_REQUIRES(context, num_nodes_tensor.dims() == 1, - tensorflow::errors::InvalidArgument( - "num_nodes must be a vector, got shape ", - num_nodes_tensor.shape().DebugString())); - OP_REQUIRES(context, scores_tensor.dims() == 3, - tensorflow::errors::InvalidArgument( - "scores must be rank 3, got shape ", - scores_tensor.shape().DebugString())); - - // Batch size and input dimension (B and M in the op docstring). - const int64 batch_size = scores_tensor.shape().dim_size(0); - const int64 input_dim = scores_tensor.shape().dim_size(1); - - // Check shapes. - const tensorflow::TensorShape shape_b({batch_size}); - const tensorflow::TensorShape shape_bxm({batch_size, input_dim}); - const tensorflow::TensorShape shape_bxmxm( - {batch_size, input_dim, input_dim}); - OP_REQUIRES( - context, num_nodes_tensor.shape() == shape_b, - tensorflow::errors::InvalidArgument( - "num_nodes misshapen: got ", num_nodes_tensor.shape().DebugString(), - " but expected ", shape_b.DebugString())); - OP_REQUIRES( - context, scores_tensor.shape() == shape_bxmxm, - tensorflow::errors::InvalidArgument( - "scores misshapen: got ", scores_tensor.shape().DebugString(), - " but expected ", shape_bxmxm.DebugString())); - - // Create outputs. - tensorflow::Tensor* max_scores_tensor = nullptr; - tensorflow::Tensor* argmax_sources_tensor = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(0, shape_b, &max_scores_tensor)); - OP_REQUIRES_OK(context, context->allocate_output(1, shape_bxm, - &argmax_sources_tensor)); - - // Acquire shaped and typed references. - const BatchedSizes num_nodes_b = num_nodes_tensor.vec<int32>(); - const BatchedScores scores_bxmxm = scores_tensor.tensor<Score, 3>(); - BatchedMaxima max_scores_b = max_scores_tensor->vec<Score>(); - BatchedSources argmax_sources_bxm = argmax_sources_tensor->matrix<int32>(); - - // Solve the batch of MST problems in parallel. Set a high cycles per unit - // to encourage finer sharding. - constexpr int64 kCyclesPerUnit = 1000 * 1000 * 1000; - std::vector<tensorflow::Status> statuses(batch_size); - context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( - batch_size, kCyclesPerUnit, [&](int64 begin, int64 end) { - for (int64 problem = begin; problem < end; ++problem) { - statuses[problem] = RunSolver(problem, num_nodes_b, scores_bxmxm, - max_scores_b, argmax_sources_bxm); - } - }); - for (const tensorflow::Status& status : statuses) { - OP_REQUIRES_OK(context, status); - } - } - - private: - using BatchedSizes = typename tensorflow::TTypes<int32>::ConstVec; - using BatchedScores = typename tensorflow::TTypes<Score, 3>::ConstTensor; - using BatchedMaxima = typename tensorflow::TTypes<Score>::Vec; - using BatchedSources = typename tensorflow::TTypes<int32>::Matrix; - - // Solves for the maximum spanning tree of the digraph defined by the values - // at index |problem| in |num_nodes_b| and |scores_bxmxm|. On success, sets - // the values at index |problem| in |max_scores_b| and |argmax_sources_bxm|. - // On error, returns non-OK. - tensorflow::Status RunSolver(int problem, - BatchedSizes num_nodes_b, - BatchedScores scores_bxmxm, - BatchedMaxima max_scores_b, - BatchedSources argmax_sources_bxm) const { - // Check digraph size overflow. - const int32 num_nodes = num_nodes_b(problem); - const int32 input_dim = argmax_sources_bxm.dimension(1); - if (num_nodes > input_dim) { - return tensorflow::errors::InvalidArgument( - "number of nodes in digraph ", problem, - " overflows input dimension: got ", num_nodes, - " but expected <= ", input_dim); - } - if (num_nodes >= std::numeric_limits<Index>::max()) { - return tensorflow::errors::InvalidArgument( - "number of nodes in digraph ", problem, " overflows index type: got ", - num_nodes, " but expected < ", std::numeric_limits<Index>::max()); - } - const Index num_nodes_index = static_cast<Index>(num_nodes); - - MstSolver<Index, Score> solver; - TF_RETURN_IF_ERROR(solver.Init(forest_, num_nodes_index)); - - // Populate the solver with arcs and root selections. Note that non-finite - // scores are treated as nonexistent arcs or roots. - for (Index target = 0; target < num_nodes_index; ++target) { - for (Index source = 0; source < num_nodes_index; ++source) { - const Score score = scores_bxmxm(problem, target, source); - if (!std::isfinite(static_cast<double>(score))) - continue; - if (source == target) { // root - solver.AddRoot(target, score); - } else { // arc - solver.AddArc(source, target, score); - } - } - } - - std::vector<Index> argmax(num_nodes); - TF_RETURN_IF_ERROR(solver.Solve(&argmax)); - - // Output the tree and accumulate its score. - Score max_score = 0; - for (Index target = 0; target < num_nodes_index; ++target) { - const Index source = argmax[target]; - argmax_sources_bxm(problem, target) = source; - max_score += scores_bxmxm(problem, target, source); - } - max_scores_b(problem) = max_score; - - // Pad the source list with -1. - for (int32 i = num_nodes; i < input_dim; ++i) { - argmax_sources_bxm(problem, i) = -1; - } - - return tensorflow::Status::OK(); - } - - private: - bool forest_ = false; -}; - -// Use Index=uint16, which allows digraphs containing up to 32,767 nodes. -REGISTER_KERNEL_BUILDER(Name("MaxSpanningTree") - .Device(tensorflow::DEVICE_CPU) - .TypeConstraint<int32>("T"), - MaxSpanningTreeOpKernel<uint16, int32>); -REGISTER_KERNEL_BUILDER(Name("MaxSpanningTree") - .Device(tensorflow::DEVICE_CPU) - .TypeConstraint<float>("T"), - MaxSpanningTreeOpKernel<uint16, float>); -REGISTER_KERNEL_BUILDER(Name("MaxSpanningTree") - .Device(tensorflow::DEVICE_CPU) - .TypeConstraint<double>("T"), - MaxSpanningTreeOpKernel<uint16, double>); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h deleted file mode 100644 index 7d73435f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver.h +++ /dev/null
@@ -1,618 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_MST_SOLVER_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_MST_SOLVER_H_ - -#include <stddef.h> - -#include <algorithm> -#include <cmath> -#include <limits> -#include <type_traits> -#include <utility> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "absl/types/span.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow_text/core/kernels/disjoint_set_forest.h" - -namespace tensorflow { -namespace text { - -// Maximum spanning tree solver for directed graphs. Thread-compatible. -// -// The solver operates on a digraph of n nodes and m arcs and outputs a maximum -// spanning tree rooted at any node. Scores can be associated with arcs and -// root selections, and the score of a tree is the sum of the relevant arc and -// root-selection scores. -// -// The implementation is based on: -// -// go/tarjan-1977 google-only -// R.E. Tarjan. 1977. Finding Optimum Branchings. Networks 7(1), pp. 25-35. -// [In particular, see Section 4 "a modification for dense graphs"] -// -// which itself is an improvement of the Chu-Liu-Edmonds algorithm. Note also -// the correction in: -// -// go/camerini-1979 google-only -// P.M. Camerini, L. Fratta, F. Maffioli. 1979. A Note on Finding Optimum -// Branchings. Networks 9(4), pp. 309-312. -// -// The solver runs in O(n^2) time, which is optimal for dense digraphs but slow -// for sparse digraphs where O(m + n log n) can be achieved. The solver uses -// O(n^2) space to store the digraph, which is also optimal for dense digraphs. -// -// Although this algorithm has an inferior asymptotic runtime on sparse graphs, -// it avoids high-constant-overhead data structures like Fibonacci heaps, which -// are required in the asymptotically faster algorithms. Therefore, this solver -// may still be competitive on small sparse graphs. -// -// TODO(terrykoo): If we start running on large sparse graphs, implement the -// following, which runs in O(m + n log n): -// -// go/tarjan-1986 google-only -// H.N. Gabow, Z. Galil, T. Spencer, and R.E. Tarjan. 1986. Efficient -// algorithms for finding minimum spanning trees in undirected and directed -// graphs. Combinatorica, 6(2), pp. 109-122. -// -// Template args: -// Index: An unsigned integral type wide enough to hold 2n. -// Score: A signed arithmetic (integral or floating-point) type. -template <class Index, class Score> -class MstSolver { - public: - static_assert(std::is_integral<Index>::value, "Index must be integral"); - static_assert(!std::is_signed<Index>::value, "Index must be unsigned"); - static_assert(std::is_arithmetic<Score>::value, "Score must be arithmetic"); - static_assert(std::is_signed<Score>::value, "Score must be signed"); - using IndexType = Index; - using ScoreType = Score; - - // Creates an empty solver. Call Init() before use. - MstSolver() = default; - - // Initializes this for a digraph with |num_nodes| nodes, or returns non-OK on - // error. Discards existing state; call AddArc() and AddRoot() to add arcs - // and root selections. If |forest| is true, then this solves for a maximum - // spanning forest (i.e., a set of disjoint trees that span the digraph). - tensorflow::Status Init(bool forest, Index num_nodes); - - // Adds an arc from the |source| node to the |target| node with the |score|. - // The |source| and |target| must be distinct node indices in [0,n), and the - // |score| must be finite. Calling this multiple times on the same |source| - // and |target| overwrites the score instead of adding parallel arcs. - void AddArc(Index source, Index target, Score score); - - // As above, but adds a root selection for the |root| node with the |score|. - void AddRoot(Index root, Score score); - - // Returns the score of the arc from |source| to |target|, which must have - // been added by a previous call to AddArc(). - Score ArcScore(Index source, Index target) const; - - // Returns the score of selecting the |root|, which must have been added by a - // previous call to AddRoot(). - Score RootScore(Index root) const; - - // Populates |argmax| with the maximum directed spanning tree of the current - // digraph, or returns non-OK on error. The |argmax| array must contain at - // least n elements. On success, argmax[t] is the source of the arc directed - // into t, or t itself if t is a root. - // - // NB: If multiple spanning trees achieve the maximum score, |argmax| will be - // set to one of the maximal trees, but it is unspecified which one. - tensorflow::Status Solve(absl::Span<Index> argmax); - - // Convience method - tensorflow::Status Solve(std::vector<Index>* argmax) { - return Solve(absl::MakeSpan(argmax->data(), argmax->size())); - } - - private: - // Implementation notes: - // - // The solver does not operate on the "original" digraph as specified by the - // user, but a "transformed" digraph that differs as follows: - // - // * The transformed digraph adds an "artificial root" node at index 0 and - // offsets all original node indices by +1 to make room. For each root - // selection, the artificial root has one outbound arc directed into the - // candidate root that carries the root-selection score. The artificial - // root has no inbound arcs. - // - // * When solving for a spanning tree (i.e., when |forest_| is false), the - // outbound arcs of the artificial root are penalized to ensure that the - // artificial root has exactly one child. - // - // In the remainder of this file, all mentions of nodes, arcs, etc., refer to - // the transformed digraph unless otherwise specified. - // - // The algorithm is divided into two phases, the "contraction phase" and the - // "expansion phase". The contraction phase finds the arcs that make up the - // maximum spanning tree by applying a series of "contractions" which further - // modify the digraph. The expansion phase "expands" these modifications and - // recovers the maximum spanning tree in the original digraph. - // - // During the contraction phase, the algorithm selects the best inbound arc - // for each node. These arcs can form cycles, which are "contracted" by - // removing the cycle nodes and replacing them with a new contracted node. - // Since each contraction removes 2 or more cycle nodes and adds 1 contracted - // node, at most n-1 contractions will occur. (The digraph initially contains - // n+1 nodes, but one is the artificial root, which cannot form a cycle). - // - // When contracting a cycle, nodes are not explicitly removed and replaced. - // Instead, a contracted node is appended to the digraph and the cycle nodes - // are remapped to the contracted node, which implicitly removes and replaces - // the cycle. As a result, each contraction actually increases the size of - // the digraph, up to a maximum of 2n nodes. One advantage of adding and - // remapping nodes is that it is convenient to recover the argmax spanning - // tree during the expansion phase. - // - // Note that contractions can be nested, because the best inbound arc for a - // contracted node may itelf form a cycle. During the expansion phase, the - // algorithm picks a root of the hierarchy of contracted nodes, breaks the - // cycle it represents, and repeats until all cycles are broken. - - // Constants, as enums to avoid the need for static variable definitions. - enum Constants : Index { - // An index reserved for "null" values. - kNullIndex = std::numeric_limits<Index>::max(), - }; - - // A possibly-nonexistent arc in the digraph. - struct Arc { - // Creates a nonexistent arc. - Arc() = default; - - // Returns true if this arc exists. - bool Exists() const { return target != 0; } - - // Returns true if this is a root-selection arc. - bool IsRoot() const { return source == 0; } - - // Returns a string representation of this arc. - std::string DebugString() const { - if (!Exists()) - return "[null]"; - if (IsRoot()) { - return absl::StrCat("[*->", target, "=", score, "]"); - } - return absl::StrCat("[", source, "->", target, "=", score, "]"); - } - - // Score of this arc. - Score score; - - // Source of this arc in the initial digraph. - Index source; - - // Target of this arc in the initial digraph, or 0 if this is nonexistent. - Index target = 0; - }; - - // Returns the index, in |arcs_|, of the arc from |source| to |target|. The - // |source| must be one of the initial n+1 nodes. - size_t ArcIndex(size_t source, size_t target) const; - - // Penalizes the root arc scores to ensure that this finds a tree, or does - // nothing if |forest_| is true. Must be called before ContractionPhase(). - void MaybePenalizeRootScoresForTree(); - - // Returns the maximum inbound arc of the |node|, or null if there is none. - const Arc* MaximumInboundArc(Index node) const; - - // Merges the inbound arcs of the |cycle_node| into the inbound arcs of the - // |contracted_node|. Arcs are merged as follows: - // * If the source and target of the arc belong to the same strongly-connected - // component, it is ignored. - // * If exactly one of the nodes had an arc from some source, then on exit the - // |contracted_node| has that arc. - // * If both of the nodes had an arc from the same source, then on exit the - // |contracted_node| has the better-scoring arc. - // The |score_offset| is added to the arc scores of the |cycle_node| before - // they are merged into the |contracted_node|. - void MergeInboundArcs(Index cycle_node, - Score score_offset, - Index contracted_node); - - // Contracts the cycle in |argmax_arcs_| that contains the |node|. - void ContractCycle(Index node); - - // Runs the contraction phase of the solver, or returns non-OK on error. This - // phase finds the best inbound arc for each node, contracting cycles as they - // are formed. Stops when every node has selected an inbound arc and there - // are no cycles. - tensorflow::Status ContractionPhase(); - - // Runs the expansion phase of the solver, or returns non-OK on error. This - // phase expands each contracted node, breaks cycles, and populates |argmax| - // with the maximum spanning tree. - tensorflow::Status ExpansionPhase(absl::Span<Index> argmax); - - // If true, solve for a spanning forest instead of a spanning tree. - bool forest_ = false; - - // The number of nodes in the original digraph; i.e., n. - Index num_original_nodes_ = 0; - - // The number of nodes in the initial digraph; i.e., n+1. - Index num_initial_nodes_ = 0; - - // The maximum number of possible nodes in the digraph; i.e., 2n. - Index num_possible_nodes_ = 0; - - // The number of nodes in the current digraph, which grows from n+1 to 2n. - Index num_current_nodes_ = 0; - - // Column-major |num_initial_nodes_| x |num_current_nodes_| matrix of arcs, - // where rows and columns correspond to source and target nodes. Columns are - // added as cycles are contracted into new nodes. - // - // TODO(terrykoo): It is possible to squeeze the nonexistent arcs out of each - // column and run the algorithm with each column being a sorted list (sorted - // by source node). This is in fact the suggested representation in Tarjan - // (1977). This won't improve the asymptotic runtime but still might improve - // speed in practice. I haven't done this because it adds complexity versus - // checking Arc::Exists() in a few loops. Try this out when we can benchmark - // this on real data. - std::vector<Arc> arcs_; - - // Disjoint-set forests tracking the weakly-connected and strongly-connected - // components of the initial digraph, based on the arcs in |argmax_arcs_|. - // Weakly-connected components are used to detect cycles; strongly-connected - // components are used to detect self-loops. - DisjointSetForest<Index> weak_components_; - DisjointSetForest<Index> strong_components_; - - // A disjoint-set forest that maps each node to the top-most contracted node - // that contains it. Nodes that have not been contracted map to themselves. - // NB: This disjoint-set forest does not use union by rank so we can control - // the outcome of a set union. There will only be O(n) operations on this - // instance, so the increased O(log n) cost of each operation is acceptable. - DisjointSetForest<Index, false> contracted_nodes_; - - // An array that represents the history of cycle contractions, as follows: - // * If contracted_into_[t] is |kNullIndex|, then t is deleted. - // * If contracted_into_[t] is 0, then t is a "root" contracted node; i.e., t - // has not been contracted into another node. - // * Otherwise, contracted_into_[t] is the node into which t was contracted. - std::vector<Index> contracted_into_; - - // The maximum inbound arc for each node. The first element is null because - // the artificial root has no inbound arcs. - std::vector<const Arc*> argmax_arcs_; - - // Workspace for ContractCycle(), which records the nodes and arcs in the - // cycle being contracted. - std::vector<std::pair<Index, const Arc*>> cycle_; -}; - -// Implementation details below. - -template <class Index, class Score> -tensorflow::Status MstSolver<Index, Score>::Init(bool forest, Index num_nodes) { - if (num_nodes <= 0) { - return tensorflow::errors::InvalidArgument("Non-positive number of nodes: ", - num_nodes); - } - - // Upcast to size_t to avoid overflow. - if (2 * static_cast<size_t>(num_nodes) >= static_cast<size_t>(kNullIndex)) { - return tensorflow::errors::InvalidArgument("Too many nodes: ", num_nodes); - } - - forest_ = forest; - num_original_nodes_ = num_nodes; - num_initial_nodes_ = num_original_nodes_ + 1; - num_possible_nodes_ = 2 * num_original_nodes_; - num_current_nodes_ = num_initial_nodes_; - - // Allocate the full n+1 x 2n matrix, but start with a n+1 x n+1 prefix. - const size_t num_initial_arcs = static_cast<size_t>(num_initial_nodes_) * - static_cast<size_t>(num_initial_nodes_); - const size_t num_possible_arcs = static_cast<size_t>(num_initial_nodes_) * - static_cast<size_t>(num_possible_nodes_); - arcs_.reserve(num_possible_arcs); - arcs_.assign(num_initial_arcs, {}); - - weak_components_.Init(num_initial_nodes_); - strong_components_.Init(num_initial_nodes_); - contracted_nodes_.Init(num_possible_nodes_); - contracted_into_.assign(num_possible_nodes_, 0); - argmax_arcs_.assign(num_possible_nodes_, nullptr); - - // This doesn't need to be cleared now; it will be cleared before use. - cycle_.reserve(num_original_nodes_); - - return tensorflow::Status::OK(); -} - -template <class Index, class Score> -void MstSolver<Index, Score>::AddArc(Index source, Index target, Score score) { - DCHECK_NE(source, target); - DCHECK(std::isfinite(score)); - Arc& arc = arcs_[ArcIndex(source + 1, target + 1)]; - arc.score = score; - arc.source = source + 1; - arc.target = target + 1; -} - -template <class Index, class Score> -void MstSolver<Index, Score>::AddRoot(Index root, Score score) { - DCHECK(std::isfinite(score)); - Arc& arc = arcs_[ArcIndex(0, root + 1)]; - arc.score = score; - arc.source = 0; - arc.target = root + 1; -} - -template <class Index, class Score> -Score MstSolver<Index, Score>::ArcScore(Index source, Index target) const { - const Arc& arc = arcs_[ArcIndex(source + 1, target + 1)]; - DCHECK(arc.Exists()); - return arc.score; -} - -template <class Index, class Score> -Score MstSolver<Index, Score>::RootScore(Index root) const { - const Arc& arc = arcs_[ArcIndex(0, root + 1)]; - DCHECK(arc.Exists()); - return arc.score; -} - -template <class Index, class Score> -tensorflow::Status MstSolver<Index, Score>::Solve(absl::Span<Index> argmax) { - MaybePenalizeRootScoresForTree(); - TF_RETURN_IF_ERROR(ContractionPhase()); - TF_RETURN_IF_ERROR(ExpansionPhase(argmax)); - return tensorflow::Status::OK(); -} - -template <class Index, class Score> -inline size_t MstSolver<Index, Score>::ArcIndex(size_t source, - size_t target) const { - DCHECK_LT(source, num_initial_nodes_); - DCHECK_LT(target, num_current_nodes_); - return source + target * static_cast<size_t>(num_initial_nodes_); -} - -template <class Index, class Score> -void MstSolver<Index, Score>::MaybePenalizeRootScoresForTree() { - if (forest_) - return; - DCHECK_EQ(num_current_nodes_, num_initial_nodes_) - << "Root penalties must be applied before starting the algorithm."; - - // Find the minimum and maximum arc scores. These allow us to bound the range - // of possible tree scores. - Score max_score = std::numeric_limits<Score>::lowest(); - Score min_score = std::numeric_limits<Score>::max(); - for (const Arc& arc : arcs_) { - if (!arc.Exists()) - continue; - max_score = std::max(max_score, arc.score); - min_score = std::min(min_score, arc.score); - } - - // Nothing to do, no existing arcs. - if (max_score < min_score) - return; - - // A spanning tree or forest contains n arcs. The penalty below ensures that - // every structure with one root has a higher score than every structure with - // two roots, and so on. - const Score root_penalty = 1 + num_initial_nodes_ * (max_score - min_score); - for (Index root = 1; root < num_initial_nodes_; ++root) { - Arc& arc = arcs_[ArcIndex(0, root)]; - if (!arc.Exists()) - continue; - arc.score -= root_penalty; - } -} - -template <class Index, class Score> -const typename MstSolver<Index, Score>::Arc* -MstSolver<Index, Score>::MaximumInboundArc(Index node) const { - const Arc* __restrict arc = &arcs_[ArcIndex(0, node)]; - const Arc* arc_end = arc + num_initial_nodes_; - - Score max_score = std::numeric_limits<Score>::lowest(); - const Arc* argmax_arc = nullptr; - for (; arc < arc_end; ++arc) { - if (!arc->Exists()) - continue; - const Score score = arc->score; - if (max_score <= score) { - max_score = score; - argmax_arc = arc; - } - } - return argmax_arc; -} - -template <class Index, class Score> -void MstSolver<Index, Score>::MergeInboundArcs(Index cycle_node, - Score score_offset, - Index contracted_node) { - const Arc* __restrict cycle_arc = &arcs_[ArcIndex(0, cycle_node)]; - const Arc* cycle_arc_end = cycle_arc + num_initial_nodes_; - Arc* __restrict contracted_arc = &arcs_[ArcIndex(0, contracted_node)]; - - for (; cycle_arc < cycle_arc_end; ++cycle_arc, ++contracted_arc) { - if (!cycle_arc->Exists()) - continue; // nothing to merge - - // Skip self-loops; they are useless because they cannot be used to break - // the cycle represented by the |contracted_node|. - if (strong_components_.SameSet(cycle_arc->source, cycle_arc->target)) { - continue; - } - - // Merge the |cycle_arc| into the |contracted_arc|. - const Score cycle_score = cycle_arc->score + score_offset; - if (!contracted_arc->Exists() || contracted_arc->score < cycle_score) { - contracted_arc->score = cycle_score; - contracted_arc->source = cycle_arc->source; - contracted_arc->target = cycle_arc->target; - } - } -} - -template <class Index, class Score> -void MstSolver<Index, Score>::ContractCycle(Index node) { - // Append a new node for the contracted cycle. - const Index contracted_node = num_current_nodes_++; - DCHECK_LE(num_current_nodes_, num_possible_nodes_); - arcs_.resize(arcs_.size() + num_initial_nodes_); - - // We make two passes through the cycle. The first pass updates everything - // except the |arcs_|, and the second pass updates the |arcs_|. The |arcs_| - // must be updated in a second pass because MergeInboundArcs() requires that - // the |strong_components_| are updated with the newly-contracted cycle. - cycle_.clear(); - Index cycle_node = node; - do { - // Gather the nodes and arcs in |cycle_| for the second pass. - const Arc* cycle_arc = argmax_arcs_[cycle_node]; - DCHECK(!cycle_arc->IsRoot()) << cycle_arc->DebugString(); - cycle_.emplace_back(cycle_node, cycle_arc); - - // Mark the cycle nodes as members of a strongly-connected component. - strong_components_.Union(cycle_arc->source, cycle_arc->target); - - // Mark the cycle nodes as members of the new contracted node. Juggling is - // required because |contracted_nodes_| also determines the next cycle node. - const Index next_node = contracted_nodes_.FindRoot(cycle_arc->source); - contracted_nodes_.UnionOfRoots(cycle_node, contracted_node); - contracted_into_[cycle_node] = contracted_node; - cycle_node = next_node; - - // When the cycle repeats, |cycle_node| will be equal to |contracted_node|, - // not |node|, because the first iteration of this loop mapped |node| to - // |contracted_node| in |contracted_nodes_|. - } while (cycle_node != contracted_node); - - // Merge the inbound arcs of each cycle node into the |contracted_node|. - for (const auto& node_and_arc : cycle_) { - // Set the |score_offset| to the cost of breaking the cycle by replacing the - // arc currently directed into the |cycle_node|. - const Index cycle_node = node_and_arc.first; - const Score score_offset = -node_and_arc.second->score; - MergeInboundArcs(cycle_node, score_offset, contracted_node); - } -} - -template <class Index, class Score> -tensorflow::Status MstSolver<Index, Score>::ContractionPhase() { - // Skip the artificial root since it has no inbound arcs. - for (Index target = 1; target < num_current_nodes_; ++target) { - // Find the maximum inbound arc for the current |target|, if any. - const Arc* arc = MaximumInboundArc(target); - if (arc == nullptr) { - return tensorflow::errors::FailedPrecondition("Infeasible digraph"); - } - argmax_arcs_[target] = arc; - - // The articifial root cannot be part of a cycle, so we do not need to check - // for cycles or even update its membership in the connected components. - if (arc->IsRoot()) - continue; - - // Since every node has at most one selected inbound arc, cycles can be - // detected using weakly-connected components. - const Index source_component = weak_components_.FindRoot(arc->source); - const Index target_component = weak_components_.FindRoot(arc->target); - if (source_component == target_component) { - // Cycle detected; contract it into a new node. - ContractCycle(target); - } else { - // No cycles, just update the weakly-connected components. - weak_components_.UnionOfRoots(source_component, target_component); - } - } - - return tensorflow::Status::OK(); -} - -template <class Index, class Score> -tensorflow::Status MstSolver<Index, Score>::ExpansionPhase( - absl::Span<Index> argmax) { - if (argmax.size() < num_original_nodes_) { - return tensorflow::errors::InvalidArgument( - "Argmax array too small: ", num_original_nodes_, - " elements required, but got ", argmax.size()); - } - - // Select and expand a root contracted node until no contracted nodes remain. - // Thanks to the (topological) order in which contracted nodes are appended, - // root contracted nodes are easily enumerated using a backward scan. After - // this loop, entries [1,n] of |argmax_arcs_| provide the arcs of the maximum - // spanning tree. - for (Index i = num_current_nodes_ - 1; i >= num_initial_nodes_; --i) { - if (contracted_into_[i] == kNullIndex) - continue; // already deleted - const Index root = i; // if not deleted, must be a root due to toposorting - - // Copy the cycle-breaking arc to its specified target. - const Arc* arc = argmax_arcs_[root]; - argmax_arcs_[arc->target] = arc; - - // The |arc| not only breaks the cycle associated with the |root|, but also - // breaks every nested cycle between the |root| and the target of the |arc|. - // Delete the contracted nodes corresponding to all broken cycles. - Index node = contracted_into_[arc->target]; - while (node != kNullIndex && node != root) { - const Index parent = contracted_into_[node]; - contracted_into_[node] = kNullIndex; - node = parent; - } - } - - // Copy the spanning tree from |argmax_arcs_| to |argmax|. Also count roots - // for validation below. - Index num_roots = 0; - for (Index target = 0; target < num_original_nodes_; ++target) { - const Arc& arc = *argmax_arcs_[target + 1]; - DCHECK_EQ(arc.target, target + 1) << arc.DebugString(); - if (arc.IsRoot()) { - ++num_roots; - argmax[target] = target; - } else { - argmax[target] = arc.source - 1; - } - } - DCHECK_GE(num_roots, 1); - - // Even when |forest_| is false, |num_roots| can still be more than 1. While - // the root score penalty discourages structures with multiple root arcs, it - // is not a hard constraint. For example, if the original digraph contained - // one root selection per node and no other arcs, the solver would incorrectly - // produce an all-root structure in spite of the root score penalty. As this - // example illustrates, however, |num_roots| will be more than 1 if and only - // if the original digraph is infeasible for trees. - if (!forest_ && num_roots != 1) { - return tensorflow::errors::FailedPrecondition("Infeasible digraph"); - } - - return tensorflow::Status::OK(); -} - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_MST_SOLVER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc deleted file mode 100644 index 69e48c2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_random_comparison_test.cc +++ /dev/null
@@ -1,187 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <time.h> - -#include <random> -#include <set> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "absl/flags/flag.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow_text/core/kernels/mst_solver.h" -#include "tensorflow_text/core/kernels/spanning_tree_iterator.h" - -ABSL_FLAG(int64, - seed, - 0, - "Seed for random comparison tests, or 0 for a weak random seed."); -ABSL_FLAG(int, num_trials, 3, "Number of trials for random comparison tests."); - -namespace tensorflow { -namespace text { - -using ::testing::Contains; - -// Returns the random seed, or 0 for a weak random seed. -int64 GetSeed() { - return absl::GetFlag(FLAGS_seed); -} - -// Returns the number of trials to run for each random comparison. -int64 GetNumTrials() { - return absl::GetFlag(FLAGS_num_trials); -} - -// Testing rig. Runs a comparison between a brute-force MST solver and the -// MstSolver<> on random digraphs. When the first test parameter is true, -// solves for forests instead of trees. The second test parameter defines the -// size of the test digraph. -class MstSolverRandomComparisonTest - : public ::testing::TestWithParam<::testing::tuple<bool, uint32>> { - protected: - // Use integer scores so score comparisons are exact. - using Solver = MstSolver<uint32, int32>; - - // An array providing a source node for each node. Roots are self-loops. - using SourceList = SpanningTreeIterator::SourceList; - - // A row-major n x n matrix whose i,j entry gives the score of the arc from i - // to j, and whose i,i entry gives the score of selecting i as a root. - using ScoreMatrix = std::vector<int32>; - - // Returns true if this should be a forest. - bool forest() const { return ::testing::get<0>(GetParam()); } - - // Returns the number of nodes for digraphs. - uint32 num_nodes() const { return ::testing::get<1>(GetParam()); } - - // Returns the score of the arcs in |sources| based on the |scores|. - int32 ScoreArcs(const ScoreMatrix& scores, const SourceList& sources) const { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - int32 score = 0; - for (uint32 target = 0; target < num_nodes(); ++target) { - const uint32 source = sources[target]; - score += scores[target + source * num_nodes()]; - } - return score; - } - - // Returns the score of the maximum spanning tree (or forest, if the first - // test parameter is true) of the dense digraph defined by the |scores|, and - // sets |argmax_trees| to contain all maximal trees. - int32 RunBruteForceMstSolver(const ScoreMatrix& scores, - std::set<SourceList>* argmax_trees) { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - int32 max_score; - argmax_trees->clear(); - - iterator_.ForEachTree(num_nodes(), [&](const SourceList& sources) { - const int32 score = ScoreArcs(scores, sources); - if (argmax_trees->empty() || max_score < score) { - max_score = score; - argmax_trees->clear(); - argmax_trees->insert(sources); - } else if (max_score == score) { - argmax_trees->insert(sources); - } - }); - - return max_score; - } - - // As above, but uses the |solver_| and extracts only one |argmax_tree|. - int32 RunMstSolver(const ScoreMatrix& scores, SourceList* argmax_tree) { - CHECK_EQ(num_nodes() * num_nodes(), scores.size()); - TF_CHECK_OK(solver_.Init(forest(), num_nodes())); - - // Add all roots and arcs. - for (uint32 source = 0; source < num_nodes(); ++source) { - for (uint32 target = 0; target < num_nodes(); ++target) { - const int32 score = scores[target + source * num_nodes()]; - if (source == target) { - solver_.AddRoot(target, score); - } else { - solver_.AddArc(source, target, score); - } - } - } - - // Solve for the max spanning tree. - argmax_tree->resize(num_nodes()); - TF_CHECK_OK(solver_.Solve(argmax_tree)); - return ScoreArcs(scores, *argmax_tree); - } - - // Returns a random ScoreMatrix spanning num_nodes() nodes. - ScoreMatrix RandomScores() { - ScoreMatrix scores(num_nodes() * num_nodes()); - for (int32& value : scores) - value = static_cast<int32>(prng_() % 201) - 100; - return scores; - } - - // Runs a comparison between MstSolver and BruteForceMst on random digraphs of - // num_nodes() nodes, for the specified number of trials. - void RunComparison() { - // Seed the PRNG, possibly non-deterministically. Log the seed value so the - // test results can be reproduced, even when the seed is non-deterministic. - uint32 seed = GetSeed(); - if (seed == 0) - seed = time(nullptr); - prng_.seed(seed); - LOG(INFO) << "seed = " << seed; - - const int num_trials = GetNumTrials(); - for (int trial = 0; trial < num_trials; ++trial) { - const ScoreMatrix scores = RandomScores(); - - std::set<SourceList> expected_argmax_trees; - const int32 expected_max_score = - RunBruteForceMstSolver(scores, &expected_argmax_trees); - - SourceList actual_argmax_tree; - const int32 actual_max_score = RunMstSolver(scores, &actual_argmax_tree); - - // In case of ties, MstSolver will find a maximal spanning tree, but we - // don't know which one. - EXPECT_EQ(expected_max_score, actual_max_score); - ASSERT_THAT(expected_argmax_trees, Contains(actual_argmax_tree)); - } - } - - // Tree iterator for brute-force solver. - SpanningTreeIterator iterator_{forest()}; - - // MstSolver<> instance used by the test. Reused across all MST invocations - // to exercise reuse. - Solver solver_; - - // Pseudo-random number generator. - std::mt19937 prng_; -}; - -INSTANTIATE_TEST_SUITE_P(AllowForest, - MstSolverRandomComparisonTest, - ::testing::Combine(::testing::Bool(), - ::testing::Range<uint32>(1, 9))); - -TEST_P(MstSolverRandomComparisonTest, Comparison) { - RunComparison(); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc deleted file mode 100644 index ef5327f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/mst_solver_test.cc +++ /dev/null
@@ -1,276 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/mst_solver.h" - -#include <limits> -#include <utility> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/lib/core/status_test_util.h" - -namespace tensorflow { -namespace text { - -// Testing rig. -// -// Template args: -// Solver: An instantiation of the MstSolver<> template. -template <class Solver> -class MstSolverTest : public ::testing::Test { - protected: - using Index = typename Solver::IndexType; - using Score = typename Solver::ScoreType; - - // Adds directed arcs for all |num_nodes| nodes to the |solver_| with the - // |score|. - void AddAllArcs(Index num_nodes, Score score) { - for (Index source = 0; source < num_nodes; ++source) { - for (Index target = 0; target < num_nodes; ++target) { - if (source == target) - continue; - solver_.AddArc(source, target, score); - } - } - } - - // Adds root selections for all |num_nodes| nodes to the |solver_| with the - // |score|. - void AddAllRoots(Index num_nodes, Score score) { - for (Index root = 0; root < num_nodes; ++root) { - solver_.AddRoot(root, score); - } - } - - // Runs the |solver_| using an argmax array of size |argmax_array_size| and - // expects it to fail with an error message that matches |error_substr|. - void SolveAndExpectError(int argmax_array_size, - const std::string& error_message_substr) { - std::vector<Index> argmax(argmax_array_size); - EXPECT_TRUE(absl::StrContains(solver_.Solve(&argmax).error_message(), - error_message_substr)); - } - - // As above, but expects success. Does not assert anything about the solution - // produced by the solver. - void SolveAndExpectOk(int argmax_array_size) { - std::vector<Index> argmax(argmax_array_size); - TF_EXPECT_OK(solver_.Solve(&argmax)); - } - - // As above, but expects the solution to be |expected_argmax| and infers the - // argmax array size. - void SolveAndExpectArgmax(const std::vector<Index>& expected_argmax) { - std::vector<Index> actual_argmax(expected_argmax.size()); - TF_ASSERT_OK(solver_.Solve(&actual_argmax)); - EXPECT_EQ(expected_argmax, actual_argmax); - } - - // MstSolver<> instance used by the test. Reused across all MST problems in - // each test to exercise reuse. - Solver solver_; -}; - -using Solvers = ::testing::Types<MstSolver<uint8, int16>, - MstSolver<uint16, int32>, - MstSolver<uint32, int64>, - MstSolver<uint16, float>, - MstSolver<uint32, double>>; -TYPED_TEST_SUITE(MstSolverTest, Solvers); - -TYPED_TEST(MstSolverTest, FailIfNoNodes) { - for (const bool forest : {false, true}) { - EXPECT_TRUE(absl::StrContains(this->solver_.Init(forest, 0).error_message(), - "Non-positive number of nodes")); - } -} - -TYPED_TEST(MstSolverTest, FailIfTooManyNodes) { - // Set to a value that would overflow when doubled. - const auto kNumNodes = - (std::numeric_limits<typename TypeParam::IndexType>::max() / 2) + 10; - for (const bool forest : {false, true}) { - EXPECT_TRUE( - absl::StrContains(this->solver_.Init(forest, kNumNodes).error_message(), - "Too many nodes")); - } -} - -TYPED_TEST(MstSolverTest, InfeasibleIfNoRootsNoArcs) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->SolveAndExpectError(kNumNodes, "Infeasible digraph"); - } -} - -TYPED_TEST(MstSolverTest, InfeasibleIfNoRootsAllArcs) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllArcs(kNumNodes, 0); - this->SolveAndExpectError(kNumNodes, "Infeasible digraph"); - } -} - -TYPED_TEST(MstSolverTest, FeasibleForForestOnlyIfAllRootsNoArcs) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - if (forest) { - this->SolveAndExpectOk(kNumNodes); // all roots is a valid forest - } else { - this->SolveAndExpectError(kNumNodes, "Infeasible digraph"); - } - } -} - -TYPED_TEST(MstSolverTest, FeasibleIfAllRootsAllArcs) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - this->SolveAndExpectOk(kNumNodes); - } -} - -TYPED_TEST(MstSolverTest, FailIfArgmaxArrayTooSmall) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - this->SolveAndExpectError(kNumNodes - 1, // too small - "Argmax array too small"); - } -} - -TYPED_TEST(MstSolverTest, OkIfArgmaxArrayTooLarge) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - this->SolveAndExpectOk(kNumNodes + 1); // too large - } -} - -TYPED_TEST(MstSolverTest, SolveForAllRootsForestOnly) { - const int kNumNodes = 10; - const bool forest = true; - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 1); // favor all root selections - this->AddAllArcs(kNumNodes, 0); - this->SolveAndExpectArgmax({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}); -} - -TYPED_TEST(MstSolverTest, SolveForLeftToRightChain) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - for (int target = 1; target < kNumNodes; ++target) { - this->solver_.AddArc(target - 1, target, 1); // favor left-to-right chain - } - this->SolveAndExpectArgmax({0, 0, 1, 2, 3, 4, 5, 6, 7, 8}); - } -} - -TYPED_TEST(MstSolverTest, SolveForRightToLeftChain) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - for (int source = 1; source < kNumNodes; ++source) { - this->solver_.AddArc(source, source - 1, 1); // favor right-to-left chain - } - this->SolveAndExpectArgmax({1, 2, 3, 4, 5, 6, 7, 8, 9, 9}); - } -} - -TYPED_TEST(MstSolverTest, SolveForAllFromFirstTree) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - for (int target = 1; target < kNumNodes; ++target) { - this->solver_.AddArc(0, target, 1); // favor first -> target - } - this->SolveAndExpectArgmax({0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); - } -} - -TYPED_TEST(MstSolverTest, SolveForAllFromLastTree) { - const int kNumNodes = 10; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - for (int target = 0; target + 1 < kNumNodes; ++target) { - this->solver_.AddArc(9, target, 1); // favor last -> target - } - this->SolveAndExpectArgmax({9, 9, 9, 9, 9, 9, 9, 9, 9, 9}); - } -} - -TYPED_TEST(MstSolverTest, SolveForBinaryTree) { - const int kNumNodes = 15; - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, kNumNodes)); - this->AddAllRoots(kNumNodes, 0); - this->AddAllArcs(kNumNodes, 0); - for (int target = 1; target < kNumNodes; ++target) { - this->solver_.AddArc((target - 1) / 2, target, 1); // like a binary heap - } - // clang-format off - this->SolveAndExpectArgmax({0, - 0, 0, - 1, 1, 2, 2, - 3, 3, 4, 4, 5, 5, 6, 6}); - // clang-format on - } -} - -TYPED_TEST(MstSolverTest, ScoreAccessors) { - for (const bool forest : {false, true}) { - TF_ASSERT_OK(this->solver_.Init(forest, 10)); - this->solver_.AddArc(0, 1, 0); - this->solver_.AddArc(1, 4, 1); - this->solver_.AddArc(7, 6, 2); - this->solver_.AddArc(9, 2, 3); - - this->solver_.AddRoot(0, 10); - this->solver_.AddRoot(2, 20); - this->solver_.AddRoot(8, 30); - - EXPECT_EQ(this->solver_.ArcScore(0, 1), 0); - EXPECT_EQ(this->solver_.ArcScore(1, 4), 1); - EXPECT_EQ(this->solver_.ArcScore(7, 6), 2); - EXPECT_EQ(this->solver_.ArcScore(9, 2), 3); - - EXPECT_EQ(this->solver_.RootScore(0), 10); - EXPECT_EQ(this->solver_.RootScore(2), 20); - EXPECT_EQ(this->solver_.RootScore(8), 30); - } -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.cc deleted file mode 100644 index a63edeb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.cc +++ /dev/null
@@ -1,42 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_text/core/kernels/ngrams_kernel.h" - -#include "tensorflow/core/framework/op_kernel.h" - -namespace tensorflow { -namespace text { - -REGISTER_KERNEL_BUILDER( - Name(NGramsStrJoinKernel::OpName()).Device(tensorflow::DEVICE_CPU), - NGramsStrJoinKernel); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.h deleted file mode 100644 index 5cb7904b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel.h +++ /dev/null
@@ -1,47 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_KERNEL_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_KERNEL_H_ - -#include "tensorflow/lite/kernels/shim/tf_op_shim.h" -#include "tensorflow_text/core/kernels/ngrams_kernel_template.h" - -namespace tensorflow { -namespace text { - -class NGramsStrJoinKernel : public tflite::shim::TfOpKernel<NGramsStrJoin> { - public: - using TfOpKernel::TfOpKernel; -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_KERNEL_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h deleted file mode 100644 index c6f4598..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_kernel_template.h +++ /dev/null
@@ -1,276 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_H_ - -#include "absl/status/status.h" -#include "absl/strings/str_cat.h" -#include "absl/strings/str_join.h" -#include "absl/strings/string_view.h" -#include "tensorflow/core/platform/tstring.h" -#include "tensorflow/lite/kernels/shim/op_kernel.h" -#include "tensorflow/lite/kernels/shim/status_macros.h" -#include "tensorflow/lite/kernels/shim/tensor_view.h" - -namespace tensorflow { -namespace text { - -// text.ngrams op kernel. See `kDoc` for more info. -template <tflite::shim::Runtime Rt> -class NGramsStrJoin : public tflite::shim::OpKernelShim<NGramsStrJoin, Rt> { - protected: - using Shape = tflite::shim::Shape; - - public: - using typename tflite::shim::OpKernelShim<NGramsStrJoin, Rt>::InitContext; - using typename tflite::shim::OpKernelShim<NGramsStrJoin, Rt>::InvokeContext; - using typename tflite::shim::OpKernelShim<NGramsStrJoin, - Rt>::ShapeInferenceContext; - - NGramsStrJoin() = default; - static const char kOpName[]; - static const char kDoc[]; - - // Attributes declaration - static std::vector<std::string> Attrs() { - return {"width: int32", "axis: int", - "reduction_type: string", "string_separator: string", - "RAGGED_RANK: int >= 0", "Tsplits: {int64} = DT_INT64"}; - } - // Input tensors declaration - static std::vector<std::string> Inputs() { - return {"values: string", "row_splits: RAGGED_RANK * Tsplits"}; - } - // Output tensors declaration - static std::vector<std::string> Outputs() { - return {"values: string", "row_splits: RAGGED_RANK * Tsplits"}; - } - - // Initializes the op - absl::Status Init(InitContext* ctx) { - absl::string_view reduction_type_val; - SH_RETURN_IF_ERROR(ctx->GetAttr("reduction_type", &reduction_type_val)); - if (reduction_type_val != kStringJoin) { - return absl::InternalError( - absl::StrCat("Unsupported reduction_type: ", reduction_type_val)); - } - int64_t axis; - SH_RETURN_IF_ERROR(ctx->GetAttr("axis", &axis)); - if (axis != -1) { - return absl::InternalError(absl::StrCat("axis != -1: ", axis)); - } - SH_RETURN_IF_ERROR(ctx->GetAttr("width", &width_)); - absl::string_view string_separator; - SH_RETURN_IF_ERROR(ctx->GetAttr("string_separator", &string_separator)); - string_separator_ = std::string(string_separator); - return absl::OkStatus(); - } - - // Shape inference - static absl::Status ShapeInference(ShapeInferenceContext* ctx) { - if (ctx->NumOutputs() == 1) { - // Tensor Output - SH_ASSIGN_OR_RETURN(const auto input_shape, ctx->GetInputShape(kValues)); - int64_t width; - SH_RETURN_IF_ERROR(ctx->GetAttr("width", &width)); - SH_RETURN_IF_ERROR(ctx->SetOutputShape( - kValues, OutputValuesTensorShape(input_shape, width))); - } else { - // RaggedTensor Output - SH_RETURN_IF_ERROR(ctx->SetOutputShape(kValues, Shape())); - - // The row_splits tensors maintain their shape, because only the - // innermost dimension will change. - for (int i = kRowSplitsStart; i < ctx->NumOutputs(); ++i) { - SH_ASSIGN_OR_RETURN(const Shape input_row_splits_shape, - ctx->GetInputShape(i)); - if (input_row_splits_shape.Rank() != 1) { - return absl::InvalidArgumentError( - absl::StrCat("expected rank == 1 for input index: ", i)); - } - SH_RETURN_IF_ERROR(ctx->SetOutputShape(i, input_row_splits_shape)); - } - } - return absl::OkStatus(); - } - - // Runs the operation - absl::Status Invoke(InvokeContext* ctx) { - using Tsplits = int64_t; - // Storage for the dummy input and output row_splits used in the tensor - // case. - std::vector<Tsplits> tensor_input_row_splits; - std::vector<Tsplits> tensor_output_row_splits; - - const Tsplits* input_row_splits; - Tsplits* output_row_splits; - int n_row_splits = 0; - - SH_ASSIGN_OR_RETURN(const auto input_values, ctx->GetInput(kValues)); - const Shape input_values_shape(input_values->Shape()); - - // Tensor output - if (ctx->NumOutputs() == 1) { - // Generate mock input and output innermost row_splits. - int64_t total_tokens = - input_values->template Data<tensorflow::tstring>().size(); - int64_t tokens_per_element = - input_values_shape->at(input_values_shape->size() - 1); - tensor_output_row_splits.resize(total_tokens / tokens_per_element + 1); - for (int64_t i = 0; i <= total_tokens; i += tokens_per_element) { - tensor_input_row_splits.push_back(i); - } - input_row_splits = tensor_input_row_splits.data(); - output_row_splits = tensor_output_row_splits.data(); - n_row_splits = tensor_input_row_splits.size(); - } else { - // RaggedTensor output - int index = 0; - const int num_row_splits = ctx->NumInputs() - kRowSplitsStart; - while (index < num_row_splits - 1) { - SH_ASSIGN_OR_RETURN(const auto input_tensor_row_splits, - ctx->GetInput(kRowSplitsStart + index)); - SH_ASSIGN_OR_RETURN( - const auto output_tensor_row_splits, - ctx->GetOutput(kRowSplitsStart + index, - Shape(input_tensor_row_splits->Shape()))); - const auto input_buffer = - input_tensor_row_splits->template Data<Tsplits>(); - const auto output_buffer = - output_tensor_row_splits->template Data<Tsplits>(); - std::memcpy(output_buffer.data(), input_buffer.data(), - input_buffer.size() * sizeof(Tsplits)); - ++index; - } - - SH_ASSIGN_OR_RETURN(const auto input_tensor_row_splits, - ctx->GetInput(kRowSplitsStart + index)); - SH_ASSIGN_OR_RETURN( - const auto output_tensor_row_splits, - ctx->GetOutput(kRowSplitsStart + index, - Shape(input_tensor_row_splits->Shape()))); - input_row_splits = - input_tensor_row_splits->template Data<Tsplits>().data(); - output_row_splits = - output_tensor_row_splits->template Data<Tsplits>().data(); - n_row_splits = input_tensor_row_splits->Shape().at(0); - } - - const auto input_values_data = - input_values->template Data<tensorflow::tstring>(); - - std::vector<std::string> buffer; - for (int i = 0; i < n_row_splits - 1; ++i) { - output_row_splits[i] = buffer.size(); - std::vector<tensorflow::tstring> tokens; - for (int j = input_row_splits[i]; j < input_row_splits[i + 1]; ++j) { - tokens.emplace_back(input_values_data.at(j)); - if (tokens.size() < width_) - continue; - tokens.erase(tokens.begin(), tokens.begin() + tokens.size() - width_); - buffer.push_back(absl::StrJoin(tokens, string_separator_)); - } - } - output_row_splits[n_row_splits - 1] = buffer.size(); - - tflite::shim::TensorViewOr output_values_or; - if (ctx->NumOutputs() == 1) { - output_values_or = ctx->GetOutput( - kValues, OutputValuesTensorShape(input_values_shape, width_)); - } else { - output_values_or = - ctx->GetOutput(kValues, Shape({static_cast<int>(buffer.size())})); - } - if (!output_values_or.ok()) - return output_values_or.status(); - auto& output_buffer = - output_values_or.value()->template Data<tensorflow::tstring>(); - int i = 0; - for (const auto& v : buffer) - output_buffer[i++] = v; - return absl::OkStatus(); - } - - protected: - inline static Shape OutputValuesTensorShape(const Shape& input_values_shape, - const int64_t width) { - Shape output_shape(input_values_shape); - const int last_dim = output_shape->size() - 1; - (*output_shape)[last_dim] = - std::max(0, output_shape->at(last_dim) - static_cast<int>(width) + 1); - return output_shape; - } - - static const char kStringJoin[]; - // Both the input and output tensors use the same indices. - static constexpr int kValues = 0; - static constexpr int kRowSplitsStart = 1; - - int64_t width_; - std::string string_separator_; -}; - -// Static member definitions. -// These can be inlined once the toolchain is bumped up to C++17 - -template <tflite::shim::Runtime Rt> -const char NGramsStrJoin<Rt>::kOpName[] = "TFText>Ngrams"; - -template <tflite::shim::Runtime Rt> -const char NGramsStrJoin<Rt>::kDoc[] = R"doc( -Description: - This TFLite op implements the text.ngrams when reduction_type = STRING_JOIN. - -Input: -* data: A string tensor, or a ragged string tensor (a 1D string value tensor - and one or more 1D int64 row_split tensors). - -Attributes: -* width: scalar integer - The width of the ngram window. -* axis: scalar integer - The axis to create ngrams along. For STRING_JOIN, this must be -1. -* reduction_type: scalar string - A string corresponding to the name of an enum value of text.Reduction - Currently, only STRING_JOIN is supported. -* string_separator: scalar string - The separator string used to join tokens together. - -Output: -* output: A string tensor that matches the rank of 'data'. Will be a ragged - tensor if 'data' is a ragged tensor. -)doc"; - -template <tflite::shim::Runtime Rt> -const char NGramsStrJoin<Rt>::kStringJoin[] = "STRING_JOIN"; - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.cc deleted file mode 100644 index 1b00aeba..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.cc +++ /dev/null
@@ -1,54 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_text/core/kernels/ngrams_tflite.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/shim/tflite_op_shim.h" -#include "tensorflow_text/core/kernels/ngrams_kernel_template.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -using OpKernel = tflite::shim::TfLiteOpKernel<tensorflow::text::NGramsStrJoin>; - -extern "C" void AddNgramsStringJoin(tflite::MutableOpResolver* resolver) { - OpKernel::Add(resolver); -} - -TfLiteRegistration* Register_tftext_Ngrams() { - return OpKernel::GetTfLiteRegistration(); -} - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.h deleted file mode 100644 index 0714c355..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite.h +++ /dev/null
@@ -1,53 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_TFLITE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_TFLITE_H_ - -/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -// Adds the Ngrams custom op to an op resolver. -// This function can be loaded using dlopen. Since C++ function names get -// mangled, declare this function as extern C, so its name is unchanged. -extern "C" void AddNgramsStringJoin(MutableOpResolver* resolver); - -TfLiteRegistration* Register_tftext_Ngrams(); - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_NGRAMS_TFLITE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc deleted file mode 100644 index af43e36e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ngrams_tflite_test.cc +++ /dev/null
@@ -1,305 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow_text/core/kernels/ngrams_tflite.h" - -#include <string> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -// #include "flatbuffers/flexbuffers.h" -#include "tensorflow/lite/kernels/test_util.h" -#include "tensorflow/lite/schema/schema_generated.h" -#include "tensorflow/lite/string_util.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { -namespace { - -using ::testing::ElementsAre; -using ::testing::ElementsAreArray; - -class NgramsModel : public SingleOpModel { - public: - // Constructor for testing the op with a tf.Tensor - NgramsModel(int width, - const std::string& string_separator, - const std::vector<std::string>& input_values, - const std::vector<int>& input_shape) { - input_values_ = AddInput(TensorType_STRING); - output_values_ = AddOutput(TensorType_STRING); - - BuildCustomOp(width, string_separator); - - BuildInterpreter({input_shape}); - PopulateStringTensor(input_values_, input_values); - Invoke(); - } - - // Constructor for the op with a tf.RaggedTensor - // Note: This interface uses row_lengths, as they're closer to the - // dimensions in a TensorShape, but internally everything is row_splits. - NgramsModel(int width, - const std::string& string_separator, - const std::vector<std::string>& input_values, - const std::vector<std::vector<int64_t>> nested_row_lengths) { - std::vector<std::vector<int>> input_shapes; - input_shapes.reserve(nested_row_lengths.size() + 1); - - input_values_ = AddInput(TensorType_STRING); - input_shapes.push_back({static_cast<int>(input_values.size())}); - output_values_ = AddOutput(TensorType_STRING); - - input_row_splits_.reserve(nested_row_lengths.size()); - output_row_splits_.reserve(nested_row_lengths.size()); - for (int i = 0; i < nested_row_lengths.size(); ++i) { - input_row_splits_.push_back(AddInput(TensorType_INT64)); - input_shapes.push_back( - {static_cast<int>(nested_row_lengths[i].size() + 1)}); - output_row_splits_.push_back(AddOutput(TensorType_INT64)); - } - - BuildCustomOp(width, string_separator); - - BuildInterpreter(input_shapes); - PopulateStringTensor(input_values_, input_values); - for (int i = 0; i < nested_row_lengths.size(); ++i) { - std::vector<int64_t> row_splits; - row_splits.reserve(nested_row_lengths[i].size() + 1); - int64_t index = 0; - row_splits.push_back(index); - for (int64_t row_length : nested_row_lengths[i]) { - index += row_length; - row_splits.push_back(index); - } - PopulateTensor(input_row_splits_[i], row_splits); - } - Invoke(); - } - - std::vector<int> GetValuesTensorShape() { - return GetTensorShape(output_values_); - } - - std::vector<std::string> ExtractValuesTensorVector() { - std::vector<std::string> r; - TfLiteTensor* tensor = interpreter_->tensor(output_values_); - int n = GetStringCount(tensor); - for (int i = 0; i < n; ++i) { - StringRef ref = GetString(tensor, i); - r.emplace_back(ref.str, ref.len); - } - return r; - } - - int GetNumNestedRowLengths() { return output_row_splits_.size(); } - - std::vector<int> GetRowLengthsTensorShape(int i) { - std::vector<int> shape = GetTensorShape(output_row_splits_[i]); - --shape[0]; - return shape; - } - - std::vector<int64_t> ExtractRowLengthsTensorVector(int i) { - std::vector<int64_t> row_splits = - ExtractVector<int64_t>(output_row_splits_[i]); - std::vector<int64_t> row_lengths; - row_lengths.reserve(row_splits.size() - 1); - int64_t head = row_splits[0]; - for (int i = 1; i < row_splits.size(); ++i) { - int64_t tail = row_splits[i]; - row_lengths.push_back(tail - head); - head = tail; - } - return row_lengths; - } - - private: - void BuildCustomOp(int width, const std::string& string_separator) { - flexbuffers::Builder fbb; - size_t start_map = fbb.StartMap(); - fbb.Int("width", width); - fbb.String("string_separator", string_separator); - fbb.Int("axis", -1); - fbb.String("reduction_type", "STRING_JOIN"); - fbb.EndMap(start_map); - fbb.Finish(); - - SetCustomOp("tftext:Ngrams", fbb.GetBuffer(), Register_tftext_Ngrams); - } - - int input_values_; - std::vector<int> input_row_splits_; - int output_values_; - std::vector<int> output_row_splits_; -}; - -TEST(NgramsTest, TensorSingleSequenceWidthTwo) { - NgramsModel m(2, " ", {"this", "is", "a", "test"}, std::vector<int>{4}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this is", "is a", "a test")); -} - -TEST(NgramsTest, TensorSingleSequenceWidthThree) { - NgramsModel m(3, " ", {"this", "is", "a", "test"}, std::vector<int>{4}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(2)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this is a", "is a test")); -} - -TEST(NgramsTest, TensorSingleSequenceLongerSeparator) { - NgramsModel m(2, "...", {"this", "is", "a", "test"}, std::vector<int>{4}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this...is", "is...a", "a...test")); -} - -TEST(NgramsTest, TensorSingleSequenceWidthTooLong) { - NgramsModel m(5, " ", {"this", "is", "a", "test"}, std::vector<int>{4}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(0)); - EXPECT_THAT(m.ExtractValuesTensorVector(), ElementsAre()); -} - -TEST(NgramsTest, TensorMultidimensionalInputWidthTwo) { - NgramsModel m(2, " ", - { - "0,0,0", "0,0,1", "0,0,2", "0,0,3", // - "0,1,0", "0,1,1", "0,1,2", "0,1,3", // - "0,2,0", "0,2,1", "0,2,2", "0,2,3", // - "1,0,0", "1,0,1", "1,0,2", "1,0,3", // - "1,1,0", "1,1,1", "1,1,2", "1,1,3", // - "1,2,0", "1,2,1", "1,2,2", "1,2,3", // - }, - std::vector<int>{2, 3, 4}); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(2, 3, 3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAreArray({ - "0,0,0 0,0,1", "0,0,1 0,0,2", "0,0,2 0,0,3", // - "0,1,0 0,1,1", "0,1,1 0,1,2", "0,1,2 0,1,3", // - "0,2,0 0,2,1", "0,2,1 0,2,2", "0,2,2 0,2,3", // - "1,0,0 1,0,1", "1,0,1 1,0,2", "1,0,2 1,0,3", // - "1,1,0 1,1,1", "1,1,1 1,1,2", "1,1,2 1,1,3", // - "1,2,0 1,2,1", "1,2,1 1,2,2", "1,2,2 1,2,3", // - })); -} - -TEST(NgramsTest, RaggedTensorSingleSequenceWidthTwo) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4}); - NgramsModel m(2, " ", {"this", "is", "a", "test"}, nested_row_lengths); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this is", "is a", "a test")); - ASSERT_THAT(m.GetNumNestedRowLengths(), 1); - EXPECT_THAT(m.GetRowLengthsTensorShape(0), ElementsAre(1)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(0), ElementsAre(3)); -} - -TEST(NgramsTest, RaggedTensorSingleSequenceWidthThree) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4}); - NgramsModel m(3, " ", {"this", "is", "a", "test"}, nested_row_lengths); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(2)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this is a", "is a test")); - ASSERT_THAT(m.GetNumNestedRowLengths(), 1); - EXPECT_THAT(m.GetRowLengthsTensorShape(0), ElementsAre(1)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(0), ElementsAre(2)); -} - -TEST(NgramsTest, RaggedTensorSingleSequenceLongerSeparator) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4}); - NgramsModel m(2, "<>", {"this", "is", "a", "test"}, nested_row_lengths); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(3)); - EXPECT_THAT(m.ExtractValuesTensorVector(), - ElementsAre("this<>is", "is<>a", "a<>test")); - ASSERT_THAT(m.GetNumNestedRowLengths(), 1); - EXPECT_THAT(m.GetRowLengthsTensorShape(0), ElementsAre(1)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(0), ElementsAre(3)); -} - -TEST(NgramsTest, RaggedTensorSingleSequenceWidthTooLong) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4}); - NgramsModel m(5, " ", {"this", "is", "a", "test"}, nested_row_lengths); - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(0)); - EXPECT_THAT(m.ExtractValuesTensorVector(), ElementsAre()); - ASSERT_THAT(m.GetNumNestedRowLengths(), 1); - EXPECT_THAT(m.GetRowLengthsTensorShape(0), ElementsAre(1)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(0), ElementsAre(0)); -} - -TEST(NgramsTest, RaggedTensorMultidimensionalInputWidthTwo) { - std::vector<std::vector<int64_t>> nested_row_lengths; - nested_row_lengths.push_back({4, 2, 1}); - nested_row_lengths.push_back({5, 4, 3, 2, 2, 3, 4, 6}); - NgramsModel m(2, " ", - { - "0,0,0", "0,0,1", "0,0,2", "0,0,3", "0,0,4", // - "0,1,0", "0,1,1", "0,1,2", "0,1,3", // - "0,2,0", "0,2,1", "0,2,2", // - "0,3,0", "0,3,1", // - "1,0,0", "1,0,1", // - "1,1,0", "1,1,1", "1,1,2", // - "1,2,0", "1,2,1", "1,2,2", "1,2,3", // - "2,0,0", "2,0,1", "2,0,2", "2,0,3", "2,0,4", "2,0,5", // - }, - nested_row_lengths); - - std::vector<std::string> expected_values = { - "0,0,0 0,0,1", "0,0,1 0,0,2", "0,0,2 0,0,3", "0,0,3 0,0,4", // - "0,1,0 0,1,1", "0,1,1 0,1,2", "0,1,2 0,1,3", // - "0,2,0 0,2,1", "0,2,1 0,2,2", // - "0,3,0 0,3,1", // - "1,0,0 1,0,1", // - "1,1,0 1,1,1", "1,1,1 1,1,2", // - "1,2,0 1,2,1", "1,2,1 1,2,2", "1,2,2 1,2,3", // - "2,0,0 2,0,1", "2,0,1 2,0,2", "2,0,2 2,0,3", "2,0,3 2,0,4", - "2,0,4 2,0,5", // - }; - EXPECT_THAT(m.GetValuesTensorShape(), ElementsAre(expected_values.size())); - EXPECT_THAT(m.ExtractValuesTensorVector(), ElementsAreArray(expected_values)); - ASSERT_THAT(m.GetNumNestedRowLengths(), 2); - EXPECT_THAT(m.GetRowLengthsTensorShape(0), ElementsAre(3)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(0), ElementsAre(4, 2, 1)); - EXPECT_THAT(m.GetRowLengthsTensorShape(1), ElementsAre(8)); - EXPECT_THAT(m.ExtractRowLengthsTensorVector(1), - ElementsAre(4, 3, 2, 1, 1, 2, 3, 5)); -} - -} // namespace -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels.cc deleted file mode 100644 index ce321f0..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels.cc +++ /dev/null
@@ -1,370 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <cstdint> -#include <locale> -#include <string> -#include <tuple> -#include <vector> - -#include "absl/strings/ascii.h" -#include "absl/strings/str_cat.h" -#include "icu4c/source/common/unicode/edits.h" -#include "icu4c/source/common/unicode/errorcode.h" -#include "icu4c/source/common/unicode/normalizer2.h" -#include "icu4c/source/common/unicode/utypes.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/variant.h" -#include "tensorflow/core/framework/variant_encode_decode.h" -#include "tensorflow_text/core/kernels/edit_changes.pb.h" - -namespace tensorflow { -namespace text { - -class CaseFoldUTF8Op : public tensorflow::OpKernel { - public: - explicit CaseFoldUTF8Op(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context) {} - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor* input_tensor; - OP_REQUIRES_OK(context, context->input("input", &input_tensor)); - const auto& input_vec = input_tensor->flat<tstring>(); - - // TODO(gregbillock): support forwarding - tensorflow::Tensor* output_tensor; - OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor->shape(), - &output_tensor)); - auto output_vec = output_tensor->flat<tstring>(); - - icu::ErrorCode icu_error; - const icu::Normalizer2* nfkc_cf = - icu::Normalizer2::getNFKCCasefoldInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal(absl::StrCat( - icu_error.errorName(), - ": Could not retrieve ICU NFKC_CaseFold normalizer"))); - - for (int64 i = 0; i < input_vec.size(); ++i) { - string output_text; - icu::StringByteSink<string> byte_sink(&output_text); - const auto& input = input_vec(i); - nfkc_cf->normalizeUTF8(0, icu::StringPiece(input.data(), input.size()), - byte_sink, nullptr, icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal("Could not normalize input string: " + - input_vec(i))); - output_vec(i) = output_text; - } - } -}; - -REGISTER_KERNEL_BUILDER(Name("CaseFoldUTF8").Device(tensorflow::DEVICE_CPU), - CaseFoldUTF8Op); - -namespace { - -string GetNormalizationForm(OpKernelConstruction* context) { - string normalization_form; - ([=](string* c) -> void { - OP_REQUIRES_OK(context, context->GetAttr("normalization_form", c)); - })(&normalization_form); - return absl::AsciiStrToUpper(normalization_form); -} - -} // namespace - -class NormalizeUTF8Op : public tensorflow::OpKernel { - public: - explicit NormalizeUTF8Op(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context), - normalization_form_(GetNormalizationForm(context)) {} - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor* input_tensor; - OP_REQUIRES_OK(context, context->input("input", &input_tensor)); - const auto& input_vec = input_tensor->flat<tstring>(); - - tensorflow::Tensor* output_tensor; - OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor->shape(), - &output_tensor)); - auto output_vec = output_tensor->flat<tstring>(); - - icu::ErrorCode icu_error; - const icu::Normalizer2* normalizer = nullptr; - if (normalization_form_ == "NFKC") { - normalizer = icu::Normalizer2::getNFKCInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal(absl::StrCat( - icu_error.errorName(), - ": Could not retrieve ICU NFKC normalizer"))); - } else if (normalization_form_ == "NFC") { - normalizer = icu::Normalizer2::getNFCInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal( - absl::StrCat(icu_error.errorName(), - ": Could not retrieve ICU NFC normalizer"))); - } else if (normalization_form_ == "NFD") { - normalizer = icu::Normalizer2::getNFDInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal( - absl::StrCat(icu_error.errorName(), - ": Could not retrieve ICU NFD normalizer"))); - } else if (normalization_form_ == "NFKD") { - normalizer = icu::Normalizer2::getNFKDInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal(absl::StrCat( - icu_error.errorName(), - ": Could not retrieve ICU NFKd normalizer"))); - } else { - OP_REQUIRES( - context, false, - errors::InvalidArgument(absl::StrCat( - "Unknown normalization form requrested: ", normalization_form_))); - } - - for (int64 i = 0; i < input_vec.size(); ++i) { - string output_text; - icu::StringByteSink<string> byte_sink(&output_text); - const auto& input = input_vec(i); - normalizer->normalizeUTF8(0, icu::StringPiece(input.data(), input.size()), - byte_sink, nullptr, icu_error); - OP_REQUIRES( - context, icu_error.isSuccess(), - errors::Internal(absl::StrCat(icu_error.errorName(), - ": Could not normalize input string: ", - absl::string_view(input_vec(i))))); - output_vec(i) = output_text; - } - } - - private: - string normalization_form_; -}; - -REGISTER_KERNEL_BUILDER(Name("NormalizeUTF8").Device(tensorflow::DEVICE_CPU), - NormalizeUTF8Op); - -namespace { - -// OffsetMapVariant is a tf.Variant object that stores a single icu::Edits -// object and providing encode/decode methods. -// The encode method is called to serialize the stored icu::Edits object when -// the variant is assigned to graph output. The decode method is called to -// reconstruct the icu::Edits object from the serialized `changes` string when -// the variant is at the graph input. -struct OffsetMapVariant { - string changes; - icu::Edits edits_; - - std::string TypeName() const { return "(anonymous)::OffsetMapVariant"; } - void Encode(tensorflow::VariantTensorData* data) const; - bool Decode(const tensorflow::VariantTensorData& data); -}; - -void OffsetMapVariant::Encode(tensorflow::VariantTensorData* data) const { - EditChanges changes; - icu::Edits::Iterator it = edits_.getFineIterator(); - icu::ErrorCode icu_error; - while (it.next(icu_error)) { - auto* change = changes.add_change(); - change->set_old_length(it.oldLength()); - change->set_new_length(it.newLength()); - } - string changes_str = changes.SerializeAsString(); - data->set_metadata(changes_str); -} - -bool OffsetMapVariant::Decode(const tensorflow::VariantTensorData& data) { - string serialized; - data.get_metadata(&serialized); - EditChanges changes; - changes.ParseFromString(serialized); - icu::Edits edit; - icu::ErrorCode icu_error; - for (int64 j = 0; j < changes.change_size(); ++j) { - auto* change = changes.mutable_change(j); - int old_length = change->old_length(); - int new_length = change->new_length(); - if (old_length == new_length) { - edit.addUnchanged(static_cast<int32_t>(old_length)); - } else { - edit.addReplace(static_cast<int32_t>(old_length), - static_cast<int32_t>(new_length)); - } - } - edits_ = edit; - return true; -} -} // namespace - -class NormalizeUTF8WithOffsetsMapOp : public tensorflow::OpKernel { - public: - explicit NormalizeUTF8WithOffsetsMapOp( - tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context), - normalization_form_(GetNormalizationForm(context)) {} - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor* input_tensor; - OP_REQUIRES_OK(context, context->input("input", &input_tensor)); - const auto& input_vec = input_tensor->flat<tstring>(); - - tensorflow::Tensor* output_tensor; - OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor->shape(), - &output_tensor)); - tensorflow::Tensor* output_offsets_map_tensor; - OP_REQUIRES_OK(context, - context->allocate_output(1, input_tensor->shape(), - &output_offsets_map_tensor)); - - auto output_vec = output_tensor->flat<tstring>(); - auto output_offsets_map_vec = output_offsets_map_tensor->flat<Variant>(); - - icu::ErrorCode icu_error; - const icu::Normalizer2* normalizer = nullptr; - if (normalization_form_ == "NFKC") { - normalizer = icu::Normalizer2::getNFKCInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal(absl::StrCat( - icu_error.errorName(), - ": Could not retrieve ICU NFKC normalizer"))); - } else if (normalization_form_ == "NFC") { - normalizer = icu::Normalizer2::getNFCInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal( - absl::StrCat(icu_error.errorName(), - ": Could not retrieve ICU NFC normalizer"))); - } else if (normalization_form_ == "NFD") { - normalizer = icu::Normalizer2::getNFDInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal( - absl::StrCat(icu_error.errorName(), - ": Could not retrieve ICU NFD normalizer"))); - } else if (normalization_form_ == "NFKD") { - normalizer = icu::Normalizer2::getNFKDInstance(icu_error); - OP_REQUIRES(context, icu_error.isSuccess(), - errors::Internal(absl::StrCat( - icu_error.errorName(), - ": Could not retrieve ICU NFKD normalizer"))); - } else { - OP_REQUIRES(context, false, - errors::InvalidArgument(absl::StrCat( - "Offset not supported for this normalization form: ", - normalization_form_))); - } - - for (int64 i = 0; i < input_vec.size(); ++i) { - OffsetMapVariant variant; - string output_text; - icu::Edits edits; - icu::StringByteSink<string> byte_sink(&output_text); - const auto& input = input_vec(i); - normalizer->normalizeUTF8(0, icu::StringPiece(input.data(), input.size()), - byte_sink, &edits, icu_error); - OP_REQUIRES( - context, icu_error.isSuccess(), - errors::Internal(absl::StrCat(icu_error.errorName(), - ": Could not normalize input string: ", - absl::string_view(input_vec(i))))); - - output_vec(i) = output_text; - variant.edits_ = std::move(edits); - output_offsets_map_vec(i) = variant; - } - } - - private: - string normalization_form_; -}; - -REGISTER_KERNEL_BUILDER( - Name("NormalizeUTF8WithOffsetsMap").Device(tensorflow::DEVICE_CPU), - NormalizeUTF8WithOffsetsMapOp); - -template <typename SPLITS_TYPE> -class FindSourceOffsetsOp : public tensorflow::OpKernel { - public: - explicit FindSourceOffsetsOp(tensorflow::OpKernelConstruction* context) - : tensorflow::OpKernel(context) {} - - void Compute(tensorflow::OpKernelContext* context) override { - const tensorflow::Tensor& edits_values = context->input(0); - const tensorflow::Tensor& input_offsets_values = context->input(1); - const tensorflow::Tensor& input_offsets_splits = context->input(2); - - const auto& input_offsets_values_vec = input_offsets_values.flat<int64>(); - const auto& input_offsets_splits_vec = - input_offsets_splits.flat<SPLITS_TYPE>(); - const auto& edits_vec = edits_values.flat<Variant>(); - - icu::ErrorCode icu_error; - int64 cur_split_index_begin = 0; - int64 cur_split_index_end = 0; - std::vector<int64> output_offsets_values(input_offsets_values_vec.size()); - int64 idx_edits = 0; - int64 idx_output = 0; - for (int64 i = 0; i < input_offsets_splits_vec.size() - 1; ++i) { - cur_split_index_begin = input_offsets_splits_vec(i); - cur_split_index_end = input_offsets_splits_vec(i + 1); - if (cur_split_index_begin == cur_split_index_end) { - continue; - } - OP_REQUIRES(context, idx_edits < edits_vec.size(), - tensorflow::errors::InvalidArgument( - "Input offset tensor dimension did not match the offset " - "map dimension.")); - auto iter = edits_vec(idx_edits++) - .get<OffsetMapVariant>() - ->edits_.getFineChangesIterator(); - for (int64 j = cur_split_index_begin; j < cur_split_index_end; ++j) { - output_offsets_values[idx_output++] = - iter.sourceIndexFromDestinationIndex(input_offsets_values_vec(j), - icu_error); - } - } - OP_REQUIRES(context, idx_edits == edits_vec.size(), - tensorflow::errors::InvalidArgument( - "Input offset tensor dimension did not match the offset " - "map dimension.")); - - int64 output_offsets_values_size = output_offsets_values.size(); - Tensor* output_offsets_values_tensor = nullptr; - OP_REQUIRES_OK(context, context->allocate_output( - "output_offsets_values", - TensorShape({output_offsets_values_size}), - &output_offsets_values_tensor)); - auto output_offsets_values_data = - output_offsets_values_tensor->flat<int64>().data(); - memcpy(output_offsets_values_data, output_offsets_values.data(), - output_offsets_values_size * sizeof(int64)); - } - - private: - TF_DISALLOW_COPY_AND_ASSIGN(FindSourceOffsetsOp); -}; - -REGISTER_KERNEL_BUILDER(Name("FindSourceOffsets") - .Device(tensorflow::DEVICE_CPU) - .TypeConstraint<int64>("Tsplits"), - FindSourceOffsetsOp<int64>); -REGISTER_KERNEL_BUILDER(Name("FindSourceOffsets") - .Device(tensorflow::DEVICE_CPU) - .TypeConstraint<int32>("Tsplits"), - FindSourceOffsetsOp<int32>); -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels_test.cc deleted file mode 100644 index 5974638..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/normalize_kernels_test.cc +++ /dev/null
@@ -1,27 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Copyright 2020 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License.
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc deleted file mode 100644 index 141147a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.cc +++ /dev/null
@@ -1,749 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include <cstdint> -#include <memory> - -// #include "flatbuffers/flexbuffers.h" -#include "tensorflow/core/util/ragged_to_dense_util_common.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/internal/types.h" -#include "tensorflow/lite/kernels/kernel_util.h" -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { -namespace ragged_tensor_to_tensor { -namespace { - -constexpr int kShapeInput = 0; -constexpr int kValuesInput = 1; -constexpr int kDefaultValueInput = 2; -constexpr int kFirstPartitionInputIndex = 3; - -constexpr int kOutputTensor = 0; - -constexpr char kRowPartitionTypesAttr[] = "row_partition_types"; - -// The following three functions are copied from -// .../tensorflow/lite/kernels/internal/tensor_ctypes.h -// This header is not available in tensorflow package when building. -template <typename T> -inline T* GetTensorData(TfLiteTensor* tensor) { - return tensor != nullptr ? reinterpret_cast<T*>(tensor->data.raw) : nullptr; -} - -template <typename T> -inline const T* GetTensorData(const TfLiteTensor* tensor) { - return tensor != nullptr ? reinterpret_cast<const T*>(tensor->data.raw) - : nullptr; -} - -inline RuntimeShape GetTensorShape(const TfLiteTensor* tensor) { - if (tensor == nullptr) { - return RuntimeShape(); - } - - TfLiteIntArray* dims = tensor->dims; - const int dims_size = dims->size; - const int32_t* dims_data = reinterpret_cast<const int32_t*>(dims->data); - return RuntimeShape(dims_size, dims_data); -} - -struct ConversionAttributes { - std::vector<tensorflow::RowPartitionType> partition_types; - int ragged_rank = 0; - - tensorflow::RowPartitionType GetRowPartitionTypeByDimension( - int dimension) const { - if (partition_types.front() == - tensorflow::RowPartitionType::FIRST_DIM_SIZE) { - return partition_types[dimension + 1]; - } else { - return partition_types[dimension]; - } - } -}; -template <typename INDEX_TYPE> -int GetFirstDimensionSizeT(TfLiteContext* context, - const TfLiteTensor& first_partition_input, - const ConversionAttributes* attributes) { - const tensorflow::RowPartitionType first_partition_type = - attributes->partition_types.front(); - switch (first_partition_type) { - case tensorflow::RowPartitionType::FIRST_DIM_SIZE: - return *GetTensorData<INDEX_TYPE>(&first_partition_input); - case tensorflow::RowPartitionType::VALUE_ROWIDS: - context->ReportError(context, - "Cannot handle VALUE_ROWIDS in first dimension."); - return -1; - case tensorflow::RowPartitionType::ROW_SPLITS: { - const auto shape = GetTensorShape(&first_partition_input); - return shape.Dims(0) - 1; - } - - default: - context->ReportError( - context, "Cannot handle type ", - RowPartitionTypeToString(first_partition_type).c_str()); - return -1; - } -} - -int GetFirstDimensionSize(TfLiteContext* context, - const TfLiteTensor& first_partition_input, - const ConversionAttributes* attributes) { - switch (first_partition_input.type) { - case kTfLiteInt32: - return GetFirstDimensionSizeT<int32_t>(context, first_partition_input, - attributes); - case kTfLiteInt64: - return GetFirstDimensionSizeT<int64_t>(context, first_partition_input, - attributes); - default: - context->ReportError(context, - "Not supported row partitioning tensor type"); - return -1; - } -} - -bool ValidateDefaultValueShape(TfLiteContext* context, - const RuntimeShape& default_value_shape, - const RuntimeShape& /*value_shape*/) { - // TF implementation also checks that shapes are not defined, not needed in - // TFLite. - // TODO(mgubin): Only scalar default value sizes are supported. - if (default_value_shape.FlatSize() != 1) { - context->ReportError(context, "Only scalar default value is supported"); - return false; - } - return true; -} - -RuntimeShape TensorShapeFromTensor(const TfLiteTensor& tensor) { - // TODO(mgubin): No checks, see - // third_party/tensorflow/core/kernels/list_kernels.cc - const RuntimeShape tensor_shape(tensor.dims->size, tensor.dims->data); - if (0 == tensor.dims->size) { - // If the input tensor is scalar then the shape is empty (also scalar). - return RuntimeShape{}; - } - RuntimeShape result(tensor_shape.FlatSize()); - switch (tensor.type) { - case kTfLiteInt32: { - for (int i = 0; i < tensor_shape.FlatSize(); ++i) { - result.SetDim(i, GetTensorData<int32_t>(&tensor)[i]); - } - } break; - case kTfLiteInt64: { - for (int i = 0; i < tensor_shape.FlatSize(); ++i) { - result.SetDim(i, GetTensorData<int64_t>(&tensor)[i]); - } - } break; - default: { - // Checked in Prepare. - } - } - return result; -} - -const TfLiteTensor* GetRowPartitionTensor( - const ConversionAttributes& conversion_attributes, - TfLiteContext* context, - TfLiteNode* node, - int dimension) { - if (conversion_attributes.partition_types.front() == - tensorflow::RowPartitionType::FIRST_DIM_SIZE) { - return &context->tensors[node->inputs->data[kFirstPartitionInputIndex + 1 + - dimension]]; - } else { - return &context->tensors[node->inputs - ->data[kFirstPartitionInputIndex + dimension]]; - } -} - -int GetMaxWidthValueRowID(const TfLiteTensor* tensor) { - const RuntimeShape tensor_shape(tensor->dims->size, tensor->dims->data); - const int index_length = tensor_shape.FlatSize(); - if (index_length == 0) { - return 0; - } - auto value_rowids = [tensor](int index) { - switch (tensor->type) { - case kTfLiteInt32: - return static_cast<int>(tensor->data.i32[index]); - case kTfLiteInt64: - return static_cast<int>(tensor->data.i64[index]); - default: - // TODO(mgubin): Add error checks. - return 0; - } - }; - int first_equal_index = 0; - int first_equal_index_value = value_rowids(0); - int max_width = 0; - for (int i = 0; i < index_length; ++i) { - const int value = value_rowids(i); - if (value != first_equal_index_value) { - first_equal_index_value = value; - max_width = std::max(i - first_equal_index, max_width); - first_equal_index = i; - } - } - return std::max(index_length - first_equal_index, max_width); -} - -int GetMaxWidthRowSplit(const TfLiteTensor* tensor) { - const RuntimeShape tensor_shape(tensor->dims->size, tensor->dims->data); - const int tensor_length = tensor_shape.FlatSize(); - if (tensor_length == 0 || tensor_length == 1) { - return 0; - } - auto value_rowsplit = [tensor](int index) { - switch (tensor->type) { - case kTfLiteInt32: - return static_cast<int>(tensor->data.i32[index]); - case kTfLiteInt64: - return static_cast<int>(tensor->data.i64[index]); - default: - // TODO(mgubin): Add error checks. - return 0; - } - }; - int max_width = 1; - int prev_split = value_rowsplit(0); - for (int i = 1; i < tensor_length; ++i) { - const int split = value_rowsplit(i); - max_width = std::max(max_width, split - prev_split); - prev_split = split; - } - return max_width; -} - -int GetMaxWidth(const ConversionAttributes& conversion_attributes, - TfLiteContext* context, - TfLiteNode* node, - int dimension) { - const TfLiteTensor* tensor = GetRowPartitionTensor( - conversion_attributes, context, node, dimension - 1); - switch (conversion_attributes.GetRowPartitionTypeByDimension(dimension - 1)) { - case tensorflow::RowPartitionType::VALUE_ROWIDS: - return GetMaxWidthValueRowID(tensor); - case tensorflow::RowPartitionType::ROW_SPLITS: - return GetMaxWidthRowSplit(tensor); - default: - context->ReportError(context, "Cannot handle partition type"); - return -1; - } -} - -RuntimeShape CombineRaggedTensorToTensorShapes( - int ragged_rank, - const RuntimeShape& output_shape, - const RuntimeShape& value_shape) { - // TODO(mgubin): No checks, see - // third_party/tensorflow/core/ops/ragged_to_dense_util.cc - RuntimeShape result(output_shape); - if (output_shape.DimensionsCount() == 0) { - const int output_shape_rank = ragged_rank + value_shape.DimensionsCount(); - result.Resize(output_shape_rank); - for (int i = 0; i < output_shape_rank; ++i) { - result.SetDim(i, -1); - } - } - const int need_to_set = - output_shape.DimensionsCount() - value_shape.DimensionsCount(); - for (int i = 1; i < value_shape.DimensionsCount(); ++i) { - result.SetDim(need_to_set + i, value_shape.Dims(i)); - } - return result; -} - -RuntimeShape CalculateOutputSize( - const ConversionAttributes& conversion_attributes, - TfLiteContext* context, - TfLiteNode* node, - int first_dimension, - int ragged_rank, - const TfLiteTensor& values, - const TfLiteTensor& default_value, - const TfLiteTensor& output_shape) { - RuntimeShape values_shape(values.dims->size, values.dims->data); - RuntimeShape default_value_shape(default_value.dims->size, - default_value.dims->data); - - if (!ValidateDefaultValueShape(context, default_value_shape, values_shape)) { - return {}; - } - RuntimeShape output_shape_shape = TensorShapeFromTensor(output_shape); - - RuntimeShape result_shape = CombineRaggedTensorToTensorShapes( - ragged_rank, output_shape_shape, values_shape); - if (result_shape.Dims(0) < 0) { - result_shape.SetDim(0, first_dimension); - } - for (int i = 1; i <= ragged_rank; ++i) { - if (result_shape.Dims(i) < 0) { - result_shape.SetDim(i, - GetMaxWidth(conversion_attributes, context, node, i)); - } - } - return result_shape; -} - -TfLiteIntArray* IntArrayFromShape(const RuntimeShape& shape) { - TfLiteIntArray* result = TfLiteIntArrayCreate(shape.DimensionsCount()); - for (int i = 0; i < shape.DimensionsCount(); ++i) { - result->data[i] = shape.Dims(i); - } - return result; -} - -/** - * The output_index represents the index in the output tensor - * where the first element of a particular dimension would be written. - * If it is -1, it indicates that the index is out of scope. - * Example, given first_dimension = 10, first_dimension_output = 6, - * and output_index_multiplier = 100: - * result = [0 100 200 300 400 500 -1 -1 -1 -1] - * If first_dimension_output = 11 instead, then: - * result = [0 100 200 300 400 500 600 700 800 900] - */ -void CalculateFirstParentOutputIndex(int first_dimension, - int output_index_multiplier, - int first_dimension_output, - std::vector<int>* result) { - const int min_dimension = std::min(first_dimension, first_dimension_output); - result->reserve(first_dimension); - int current_output_index = 0; - for (int i = 0; i < min_dimension; - ++i, current_output_index += output_index_multiplier) { - result->push_back(current_output_index); - } - for (int i = min_dimension; i < first_dimension; ++i) { - result->push_back(-1); - } -} -// Calculate the output index of the first element of a list. -// The parent_output_index is the same computation for the previous list. -// -1 indicates an element or list that is out of range. -// The output_index_multiplier is the number of output indices one moves -// forward for each column. -// E.g., given: -// value_rowids:[0 1 2 2 2 3 5 5 6] -// parent_output_index:[1000 1100 2000 2100 -1 3000 4000] -// output_index_multiplier: 10 -// output_size: 2 -// You get: -// result = [1000 1100 2000 2010 -1 2100 -1 -1 3000] -// result[0] = parent_output_index[value_rowids[0]] -// result[1] = parent_output_index[value_rowids[1]] -// result[2] = parent_output_index[value_rowids[2]] -// result[3] = parent_output_index[value_rowids[2] + 10] -// result[4] = -1 because it is the third element the size is 2. -// result[5] = parent_output_index[value_rowids[3]] -// result[6] = -1 because parent_output_index[value_rowids[6]] == -1 -// result[7] = -1 because parent_output_index[value_rowids[6]] == -1 -// result[8] = parent_output_index[value_rowids[7]] -void CalculateOutputIndexValueRowID(const TfLiteTensor& value_rowids, - const std::vector<int>& parent_output_index, - int output_index_multiplier, - int output_size, - std::vector<int>* result) { - const RuntimeShape tensor_shape(value_rowids.dims->size, - value_rowids.dims->data); - const int index_size = tensor_shape.FlatSize(); - result->reserve(index_size); - if (index_size == 0) { - return; - } - - auto value_rowids_val = [value_rowids](int index) { - switch (value_rowids.type) { - case kTfLiteInt32: - return static_cast<int>(value_rowids.data.i32[index]); - case kTfLiteInt64: - return static_cast<int>(value_rowids.data.i64[index]); - default: - // TODO(mgubin): Add error checks. - return 0; - } - }; - int current_output_column = 0; - int current_value_rowid = value_rowids_val(0); - // DCHECK_LT(current_value_rowid, parent_output_index.size()); - int current_output_index = parent_output_index[current_value_rowid]; - result->push_back(current_output_index); - for (int i = 1; i < index_size; ++i) { - int next_value_rowid = value_rowids_val(i); - if (next_value_rowid == current_value_rowid) { - if (current_output_index >= 0) { - ++current_output_column; - if (current_output_column < output_size) { - current_output_index += output_index_multiplier; - } else { - current_output_index = -1; - } - } - } else { - current_output_column = 0; - current_value_rowid = next_value_rowid; - // DCHECK_LT(next_value_rowid, parent_output_index.size()); - current_output_index = parent_output_index[next_value_rowid]; - } - result->push_back(current_output_index); - } - // DCHECK_EQ(result->size(), value_rowids.size()); -} - -void CalculateOutputIndexRowSplit(const TfLiteTensor& row_split, - const std::vector<int>& parent_output_index, - int output_index_multiplier, - int output_size, - std::vector<int>* result) { - const RuntimeShape row_split_shape(row_split.dims->size, - row_split.dims->data); - const int row_split_size = row_split_shape.FlatSize(); - auto row_split_val = [row_split](int index) { - switch (row_split.type) { - case kTfLiteInt32: - return static_cast<int>(row_split.data.i32[index]); - case kTfLiteInt64: - return static_cast<int>(row_split.data.i64[index]); - default: - // TODO(mgubin): Add error checks. - return 0; - } - }; - if (row_split_size > 0) { - result->reserve(row_split_val(row_split_size - 1)); - } - for (int i = 0; i < row_split_size - 1; ++i) { - const int row_length = row_split_val(i + 1) - row_split_val(i); - int real_length = std::min(output_size, row_length); - int parent_output_index_current = parent_output_index[i]; - - if (parent_output_index_current == -1) { - real_length = 0; - } - for (int j = 0; j < real_length; ++j) { - result->push_back(parent_output_index_current); - parent_output_index_current += output_index_multiplier; - } - for (int j = 0; j < row_length - real_length; ++j) { - result->push_back(-1); - } - } - // if (row_split_size > 0) { - // DCHECK_EQ(result->size(), row_split(row_split_size - 1)); - //} -} - -TfLiteStatus CalculateOutputIndex( - const ConversionAttributes& conversion_attributes, - TfLiteContext* context, - TfLiteNode* node, - int dimension, - const std::vector<int>& parent_output_index, - int output_index_multiplier, - int output_size, - std::vector<int>* result) { - const TfLiteTensor* row_partition_tensor = - GetRowPartitionTensor(conversion_attributes, context, node, dimension); - auto partition_type = - conversion_attributes.GetRowPartitionTypeByDimension(dimension); - switch (partition_type) { - case tensorflow::RowPartitionType::VALUE_ROWIDS: - CalculateOutputIndexValueRowID(*row_partition_tensor, parent_output_index, - output_index_multiplier, output_size, - result); - return kTfLiteOk; - case tensorflow::RowPartitionType::ROW_SPLITS: - CalculateOutputIndexRowSplit(*row_partition_tensor, parent_output_index, - output_index_multiplier, output_size, - result); - return kTfLiteOk; - default: - context->ReportError(context, "Unsupported partition type"); - return kTfLiteError; - } -} - -template <typename VALUE_TYPE> -void SetOutputT(TfLiteContext* context, - int ragged_rank, - const std::vector<int>& output_index, - const TfLiteTensor& values_tensor, - const TfLiteTensor& default_value_tensor, - TfLiteTensor* output_tensor) { - const VALUE_TYPE* values_base = GetTensorData<VALUE_TYPE>(&values_tensor); - VALUE_TYPE* output_base = GetTensorData<VALUE_TYPE>(output_tensor); - const VALUE_TYPE* default_value = - GetTensorData<VALUE_TYPE>(&default_value_tensor); - - RuntimeShape output_shape = GetTensorShape(output_tensor); - RuntimeShape element_shape = - RuntimeShape(output_shape.DimensionsCount() - ragged_rank - 1, - output_shape.DimsData() + ragged_rank + 1); - - // element_shape.RemoveDimRange(0, ragged_rank + 1); - const int value_element_size = element_shape.FlatSize(); - size_t output_index_size = output_index.size(); - - // Loop through the output_index vector, finding contiguous regions that - // should be copied. Once we find the end of a contiguous region, copy it - // and add any necessary padding (with default_value). - int src_start = 0; // Start of contiguous region (in values) - int dst_start = 0; // Destination for contiguous region (in output) - int dst_end = 0; // Destination for contiguous region (in output) - for (int src_i = 0; src_i <= output_index_size; ++src_i) { - // dst_i is the destination where the value at src_i should be copied. - int dst_i = src_i < output_index_size ? output_index[src_i] : -1; - - // If we're still in a contiguous region, then update dst_end go to the - // next src_i. - if (dst_i == dst_end) { - ++dst_end; - continue; - } - - // We found the end of contiguous region. This can be because we found - // a gap (dst_i > dst_end), or a source value that shouldn't be copied - // because it's out-of-bounds (dst_i == -1), or the end of the tensor - // (dst_i = -1). - if (dst_start < dst_end) { - // Copy the contiguous region. - const VALUE_TYPE* src = values_base + src_start * value_element_size; - VALUE_TYPE* dst = output_base + dst_start * value_element_size; - int nvals = (dst_end - dst_start) * value_element_size; - std::copy(src, src + nvals, dst); - // copy_array<VALUE_TYPE, int>(dst, src, nvals); - } - - // Add any necessary padding (w/ default_value). - if (src_i >= output_index_size) { - // We reached the end of values: pad to the end of output. - const int output_size = output_shape.FlatSize(); - dst_i = output_size / value_element_size; - } - if (dst_i > dst_end) { - std::fill(output_base + dst_end * value_element_size, - output_base + dst_i * value_element_size, *default_value); - dst_end = dst_i; - } - - // Update indices. - if (dst_i < 0) { - // src_i should be skipped -- leave it out of the contiguous region. - src_start = src_i + 1; - dst_start = dst_end; - } else { - // src_i should be copied -- include it in the contiguous region. - src_start = src_i; - dst_start = dst_end; - dst_end = dst_start + 1; - } - } -} - -void SetOutput(TfLiteContext* context, - int ragged_rank, - const std::vector<int>& output_index, - const TfLiteTensor& values_tensor, - const TfLiteTensor& default_value_tensor, - TfLiteTensor* output_tensor) { - switch (output_tensor->type) { - case kTfLiteInt32: - SetOutputT<int32_t>(context, ragged_rank, output_index, values_tensor, - default_value_tensor, output_tensor); - break; - case kTfLiteInt64: - SetOutputT<int64_t>(context, ragged_rank, output_index, values_tensor, - default_value_tensor, output_tensor); - break; - case kTfLiteFloat32: - SetOutputT<float>(context, ragged_rank, output_index, values_tensor, - default_value_tensor, output_tensor); - break; - default: - context->ReportError(context, "Not supported values type"); - } -} - -} // namespace - -void* Initialize(TfLiteContext* context, const char* buffer, size_t length) { - auto attributes = std::make_unique<ConversionAttributes>(); - - const uint8_t* buffer_t = reinterpret_cast<const uint8_t*>(buffer); - - const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); - // TODO (mgubin): Converting flat buffer to a vector of strings looks not very - // effective but simple. A cleaner way is needed. - const flexbuffers::TypedVector row_partition_types_attr = - m[kRowPartitionTypesAttr].AsTypedVector(); - std::vector<std::string> row_partition_types_attr_strings; - row_partition_types_attr_strings.reserve(row_partition_types_attr.size()); - for (int i = 0; i < row_partition_types_attr.size(); ++i) { - row_partition_types_attr_strings.emplace_back( - row_partition_types_attr[i].AsString().str()); - } - attributes->partition_types = - tensorflow::GetRowPartitionTypesHelper(row_partition_types_attr_strings); - if (attributes->partition_types.size() != - row_partition_types_attr_strings.size()) { - context->ReportError(context, "Can't parse partition type attribute"); - return nullptr; - } - attributes->ragged_rank = - tensorflow::GetRaggedRank(attributes->partition_types); - return attributes.release(); -} -void Free(TfLiteContext* /*context*/, void* buffer) { - ConversionAttributes* attributes = - reinterpret_cast<ConversionAttributes*>(buffer); - delete attributes; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - const ConversionAttributes* attributes = - reinterpret_cast<ConversionAttributes*>(node->user_data); - if (attributes == nullptr) { - // Parsing attributes failed, can't prepare. - context->ReportError(context, "Attributes are not initialized"); - return kTfLiteError; - } - // The output tensor need to be set to dynamic because it can have different - // size. - TfLiteTensor& output_tensor = - context->tensors[node->outputs->data[kOutputTensor]]; - SetTensorToDynamic(&output_tensor); - - // Check that input shape tensor is int32 or int64 - TfLiteTensor& input_shape = context->tensors[node->inputs->data[kShapeInput]]; - if (input_shape.type != kTfLiteInt32 && input_shape.type != kTfLiteInt64) { - context->ReportError(context, - "Input form tensor could be only int32 or int64"); - return kTfLiteError; - } - return kTfLiteOk; -} - -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - const ConversionAttributes* attributes = - reinterpret_cast<ConversionAttributes*>(node->user_data); - TfLiteTensor& input_shape = context->tensors[node->inputs->data[kShapeInput]]; - TfLiteTensor& input_values = - context->tensors[node->inputs->data[kValuesInput]]; - TfLiteTensor& default_value = - context->tensors[node->inputs->data[kDefaultValueInput]]; - // TODO (mgubin): Only scallar default value is supported. - if (RuntimeShape(default_value.dims->size, default_value.dims->data) - .FlatSize() != 1) { - context->ReportError(context, "Only scallar default value is supported"); - return kTfLiteError; - } - TfLiteTensor& first_partition_input = - context->tensors[node->inputs->data[kFirstPartitionInputIndex]]; - - // Calculate dimensions. - const int first_dimension = - GetFirstDimensionSize(context, first_partition_input, attributes); - if (first_dimension < 0) { - return kTfLiteError; - } - RuntimeShape output_shape = CalculateOutputSize( - *attributes, context, node, first_dimension, attributes->ragged_rank, - input_values, default_value, input_shape); - if (output_shape.DimensionsCount() == 0) { - return kTfLiteError; - } - - std::vector<int> multiplier; - multiplier.resize(attributes->ragged_rank + 1); - multiplier.back() = 1; - for (int i = multiplier.size() - 2; i >= 0; --i) { - multiplier[i] = multiplier[i + 1] * output_shape.Dims(i + 1); - } - - // Allocate output tensor. - TfLiteTensor& output_tensor = - context->tensors[node->outputs->data[kOutputTensor]]; - - TF_LITE_ENSURE_OK(context, - context->ResizeTensor(context, &output_tensor, - IntArrayFromShape(output_shape))); - - // Copy data. - const int full_size = multiplier.front() * output_shape.Dims(0); - if (full_size > 0) { - std::vector<int> output_index, new_output_index; - int nvals = input_values.dims->data[0]; - output_index.reserve(nvals); - new_output_index.reserve(nvals); - - CalculateFirstParentOutputIndex(first_dimension, multiplier[0], - output_shape.Dims(0), &output_index); - for (int i = 1; i <= attributes->ragged_rank; ++i) { - TF_LITE_ENSURE_OK( - context, CalculateOutputIndex( - *attributes, context, node, i - 1, output_index, - multiplier[i], output_shape.Dims(i), &new_output_index)); - output_index.swap(new_output_index); - new_output_index.clear(); - } - - SetOutput(context, attributes->ragged_rank, output_index, input_values, - default_value, &output_tensor); - } - return kTfLiteOk; -} - -static TfLiteRegistration* GetTfLiteRegistration() { - static TfLiteRegistration r = {Initialize, Free, Prepare, Eval}; - return &r; -} - -} // namespace ragged_tensor_to_tensor - -extern "C" void AddRaggedTensorToTensor(tflite::MutableOpResolver* resolver) { - resolver->AddCustom("RaggedTensorToTensor", - ragged_tensor_to_tensor::GetTfLiteRegistration()); -} - -TfLiteRegistration* Register_RAGGED_TENSOR_TO_TENSOR() { - return ragged_tensor_to_tensor::GetTfLiteRegistration(); -} - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.h deleted file mode 100644 index 2cd01298..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.h +++ /dev/null
@@ -1,33 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_RAGGED_TENSOR_TO_TENSOR_TFLITE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_RAGGED_TENSOR_TO_TENSOR_TFLITE_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -extern "C" void AddRaggedTensorToTensor(::tflite::MutableOpResolver* resolver); - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_RAGGED_TENSOR_TO_TENSOR_TFLITE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc deleted file mode 100644 index 8e10c51..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite_test.cc +++ /dev/null
@@ -1,301 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/* Copyright 2020 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include <initializer_list> -#include <string> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -// #include "flatbuffers/flexbuffers.h" -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/interpreter.h" -#include "tensorflow/lite/kernels/internal/tensor.h" -#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" -#include "tensorflow/lite/kernels/test_util.h" -#include "tensorflow/lite/schema/schema_generated.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { -TfLiteRegistration* Register_RAGGED_TENSOR_TO_TENSOR(); -} // namespace text -} // namespace custom -} // namespace ops - -namespace { - -class RaggedTensorToTensorOpModel : public SingleOpModel { - public: - RaggedTensorToTensorOpModel(int output_shape_dims, - std::initializer_list<int> values_shape, - std::initializer_list<std::initializer_list<int>> - partition_tensors_shapes, - std::vector<std::string> partition_types, - TensorType value_type = TensorType_FLOAT32, - TensorType index_type = TensorType_INT32) { - // A structure to collect shapes for the input. - std::vector<std::vector<int>> shapes; - input_shape_ = AddInput(index_type); - shapes.push_back({output_shape_dims}); - input_values_ = AddInput(value_type); - shapes.emplace_back(values_shape); - input_default_values_ = AddInput(value_type); - shapes.push_back({1}); - for (const auto& p : partition_tensors_shapes) { - partition_tensors_.push_back(AddInput(TensorType_INT32)); - shapes.emplace_back(p); - } - output_ = AddOutput(value_type); - - flexbuffers::Builder fbb; - size_t start = fbb.StartMap(); - { - size_t start = fbb.StartVector("row_partition_types"); - for (const auto& s : partition_types) { - fbb.String(s); - } - fbb.EndVector(start, /*typed=*/true, /*fixed=*/false); - } - fbb.Int("num_row_partition_tensors", partition_types.size()); - fbb.EndMap(start); - fbb.Finish(); - SetCustomOp("RaggedTensorToTensor", fbb.GetBuffer(), - ops::custom::text::Register_RAGGED_TENSOR_TO_TENSOR); - BuildInterpreter(shapes); - } - - std::vector<int> GetOutputShape() { return GetTensorShape(output_); } - - std::vector<float> GetOutputFloat() { return ExtractVector<float>(output_); } - std::vector<int32> GetOutputInt() { return ExtractVector<int32>(output_); } - - void InvokeFloat(const std::vector<int>& shape, - const std::vector<float>& values, - float default_value, - const std::vector<std::vector<int>>& partition_values) { - PopulateTensor(input_shape_, shape); - PopulateTensor(input_values_, values); - PopulateTensor(input_default_values_, {default_value}); - for (int i = 0; i < partition_values.size(); ++i) { - PopulateTensor(partition_tensors_[i], partition_values[i]); - } - SingleOpModel::Invoke(); - } - void InvokeInt(const std::vector<int>& shape, - const std::vector<int32>& values, - int32 default_value, - const std::vector<std::vector<int>>& partition_values) { - PopulateTensor(input_shape_, shape); - PopulateTensor(input_values_, values); - PopulateTensor(input_default_values_, {default_value}); - for (int i = 0; i < partition_values.size(); ++i) { - PopulateTensor(partition_tensors_[i], partition_values[i]); - } - SingleOpModel::Invoke(); - } - - private: - int input_shape_; - int input_values_; - int input_default_values_; - std::vector<int> partition_tensors_; - int output_; -}; - -TEST(RaggedTensorToTensorTest, RaggedTensorToTensor) { - // indices = [2, 1, 0, 3] - // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - // params.shape = [4, None] - RaggedTensorToTensorOpModel model( - 2, // output_shape_dims - {9}, // values_shape - {{1}, {9}}, // partition_tensors_shapes - std::vector<std::string>({"FIRST_DIM_SIZE", "VALUE_ROWIDS"})); - model.InvokeFloat({4, 4}, // shape - {.1, .2, .3, .4, .5, .6, .7, .8, .9}, // values - 1.5, // default_value - std::vector<std::vector<int>>( - {std::vector<int>({4}), - std::vector<int>({0, 0, 0, 2, 2, 2, 2, 3, 3})})); - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({4, 4})); - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray({.1, .2, .3, 1.5, 1.5, 1.5, 1.5, 1.5, - .4, .5, .6, .7, .8, .9, 1.5, 1.5})); -} - -TEST(RaggedTensorToTensorTest, RaggedTensorToTensorRowSplits) { - // indices = [2, 1, 0, 3] - // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - RaggedTensorToTensorOpModel model(2, // output_shape_dims - {9}, // values_shape - {{5}}, // partition_tensors_shapes - std::vector<std::string>({"ROW_SPLITS"})); - model.InvokeFloat( - {4, 4}, // shape - {.1, .2, .3, .4, .5, .6, .7, .8, .9}, // values - 1.5, // default_value - std::vector<std::vector<int>>({std::vector<int>({0, 3, 3, 7, 9})})); - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({4, 4})); - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray({.1, .2, .3, 1.5, 1.5, 1.5, 1.5, 1.5, - .4, .5, .6, .7, .8, .9, 1.5, 1.5})); -} - -TEST(RaggedTensorToTensorTest, RaggedTensorToTensor_3DParams) { - // params = [ - // [[]], - // [[.1, .2], [.3]], - // [], - // [[.4, .5], [.6, .7, .8]], - // [[.9]] - // ] - RaggedTensorToTensorOpModel model( - 3, // output_shape_dims - {9}, // values_shape - {{1}, {6}, {9}}, // partition_tensors_shapes - std::vector<std::string>( - {"FIRST_DIM_SIZE", "VALUE_ROWIDS", "VALUE_ROWIDS"})); - model.InvokeFloat( - {5, 2, 3}, // shape - {.1, .2, .3, .4, .5, .6, .7, .8, .9}, // values - 1.5, // default_value - std::vector<std::vector<int>>( - {std::vector<int>({5}), std::vector<int>({0, 1, 1, 3, 3, 4}), - std::vector<int>({1, 1, 2, 3, 3, 4, 4, 4, 5})})); - - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({5, 2, 3})); - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray({1.5, 1.5, 1.5, 1.5, 1.5, 1.5, .1, .2, - 1.5, .3, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, - 1.5, 1.5, .4, .5, 1.5, .6, .7, .8, - .9, 1.5, 1.5, 1.5, 1.5, 1.5})); -} - -TEST(RaggedTensorToTensorOpTest, RaggedTensorToTensor_3DParamsRowSplits) { - // params = [ - // [[]], - // [[.1, .2], [.3]], - // [], - // [[.4, .5], [.6, .7, .8]], - // [[.9]] - // ] - RaggedTensorToTensorOpModel model( - 3, // output_shape_dims - {9}, // values_shape - {{6}, {7}}, // partition_tensors_shapes - std::vector<std::string>({"ROW_SPLITS", "ROW_SPLITS"})); - model.InvokeFloat( - {5, 2, 3}, // shape - {.1, .2, .3, .4, .5, .6, .7, .8, .9}, // values - 1.5, // default_value - std::vector<std::vector<int>>({std::vector<int>({0, 1, 3, 3, 5, 6}), - std::vector<int>({0, 0, 2, 3, 5, 8, 9})})); - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({5, 2, 3})); - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray({1.5, 1.5, 1.5, 1.5, 1.5, 1.5, .1, .2, - 1.5, .3, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, - 1.5, 1.5, .4, .5, 1.5, .6, .7, .8, - .9, 1.5, 1.5, 1.5, 1.5, 1.5})); -} - -TEST(RaggedTensorToTensorTest, RaggedTensorToTensor_3DParamsRowSplits2) { - // params = [ - // [[0, 1, 2], []], - // [], - // [[3]] - // ] - - RaggedTensorToTensorOpModel model( - 3, // output_shape_dims - {4}, // values_shape - {{4}, {4}}, // partition_tensors_shapes - std::vector<std::string>({"ROW_SPLITS", "ROW_SPLITS"}), TensorType_INT32); - model.InvokeInt( - {3, 2, 3}, // shape - {0, 1, 2, 3}, // values - 5, // default_value - std::vector<std::vector<int>>( - {std::vector<int>({0, 2, 2, 3}), std::vector<int>({0, 3, 3, 4})})); - - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({3, 2, 3})); - - EXPECT_THAT(model.GetOutputInt(), - testing::ElementsAreArray( - {0, 1, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 5, 5, 5, 5, 5})); -} - -TEST(RaggedTensorToTensorTest, RaggedTensorToTensorContractExpanded) { - // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - RaggedTensorToTensorOpModel model( - 2, // output_shape_dims - {9}, // values_shape - {{1}, {9}}, // partition_tensors_shapes - std::vector<std::string>({"FIRST_DIM_SIZE", "VALUE_ROWIDS"})); - model.InvokeFloat({3, 5}, // shape - {.1, .2, .3, .4, .5, .6, .7, .8, .9}, // values - 1.5, // default_value - std::vector<std::vector<int>>( - {std::vector<int>({4}), - std::vector<int>({0, 0, 0, 2, 2, 2, 2, 3, 3})})); - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({3, 5})); - - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray({.1, .2, .3, 1.5, 1.5, // - 1.5, 1.5, 1.5, 1.5, 1.5, // - .4, .5, .6, .7, 1.5})); -} - -// Adds a dense dimension. -TEST(RaggedTensorToTensorTest, RaggedTensorToTensorContractExpandedDense) { - // params = [[.1, .2, .3], [], [.4, .5, .6, .7], [.8, .9]] - RaggedTensorToTensorOpModel model( - 3, // output_shape_dims - {9, 2}, // values_shape - {{1}, {9}}, // partition_tensors_shapes - std::vector<std::string>({"FIRST_DIM_SIZE", "VALUE_ROWIDS"})); - - model.InvokeFloat({3, 5, 2}, // shape - {.1, 1.1, .2, 1.2, .3, 1.3, .4, 1.4, .5, 1.5, .6, 1.6, .7, - 1.7, .8, 1.8, .9, 1.9}, // values - 1.5, // default_value - std::vector<std::vector<int>>( - {std::vector<int>({4}), - std::vector<int>({0, 0, 0, 2, 2, 2, 2, 3, 3})})); - - EXPECT_THAT(model.GetOutputShape(), testing::ElementsAreArray({3, 5, 2})); - EXPECT_THAT(model.GetOutputFloat(), - testing::ElementsAreArray( - {.1, 1.1, .2, 1.2, .3, 1.3, 1.5, 1.5, 1.5, 1.5, // - 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, // - .4, 1.4, .5, 1.5, .6, 1.6, .7, 1.7, 1.5, 1.5})); -} -} // namespace -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc index aa17d77..5fb2361f 100644 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc +++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.cc
@@ -1,4 +1,4 @@ -// Copyright 2021 TF.Text Authors. +// Copyright 2023 TF.Text Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -21,14 +21,12 @@ namespace { template <typename T> -void RegexSplitImpl(absl::string_view input, - const RE2& re2, - bool include_delimiter, - const RE2& include_delim_regex, +void RegexSplitImpl(absl::string_view input, const RE2& re2, + bool include_delimiter, const RE2& include_delim_regex, std::vector<absl::string_view>* tokens, std::vector<T>* begin_offsets, std::vector<T>* end_offsets) { - absl::string_view leftover(input.data()); + absl::string_view leftover = input; absl::string_view last_end = leftover; // Keep looking for split points until we have reached the end of the input. @@ -47,32 +45,30 @@ tokens->push_back(token); // Mark the end of the last token begin_offsets->push_back(token.data() - input.data()); - end_offsets->push_back(token.data() + token.length() - input.begin()); + end_offsets->push_back(token.data() + token.length() - input.data()); } if (should_include_delim) { // If desired, include the deliminator as a token. tokens->push_back(extracted_delim_token); // Mark the end of the token at the end of the beginning of the delimiter. - begin_offsets->push_back(extracted_delim_token.data() - input.begin()); + begin_offsets->push_back(extracted_delim_token.data() - input.data()); end_offsets->push_back(extracted_delim_token.data() + - extracted_delim_token.length() - input.begin()); + extracted_delim_token.length() - input.data()); } } // Close the last token. if (!leftover.empty()) { tokens->push_back(leftover); - begin_offsets->push_back(leftover.data() - input.begin()); - end_offsets->push_back(leftover.data() + leftover.length() - input.begin()); + begin_offsets->push_back(leftover.data() - input.data()); + end_offsets->push_back(leftover.data() + leftover.length() - input.data()); } } } // namespace -void RegexSplit(absl::string_view input, - const RE2& re2, - bool include_delimiter, +void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, const RE2& include_delim_regex, std::vector<absl::string_view>* tokens, std::vector<long>* begin_offsets, // NOLINT @@ -81,9 +77,7 @@ begin_offsets, end_offsets); } -void RegexSplit(absl::string_view input, - const RE2& re2, - bool include_delimiter, +void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, const RE2& include_delim_regex, std::vector<absl::string_view>* tokens, std::vector<long long>* begin_offsets, // NOLINT
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h index e9df472..b16fa2f 100644 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h +++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split.h
@@ -1,4 +1,4 @@ -// Copyright 2021 TF.Text Authors. +// Copyright 2023 TF.Text Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -24,21 +24,17 @@ namespace tensorflow { namespace text { -void RegexSplit(absl::string_view input, - const RE2& re2, - bool include_delimiter, +void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, const RE2& include_delim_regex, std::vector<absl::string_view>* tokens, std::vector<long>* begin_offsets, // NOLINT std::vector<long>* end_offsets); // NOLINT -void RegexSplit(absl::string_view input, - const RE2& re2, - bool include_delimiter, +void RegexSplit(absl::string_view input, const RE2& re2, bool include_delimiter, const RE2& include_delim_regex, std::vector<absl::string_view>* tokens, std::vector<long long>* begin_offsets, // NOLINT - std::vector<long long>* end_offsets); // NOLINT + std::vector<long long>* end_offsets); // NOLINT } // namespace text } // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc deleted file mode 100644 index f7ee942..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_kernels.cc +++ /dev/null
@@ -1,200 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <memory> - -#include "absl/memory/memory.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow_text/core/kernels/regex_split.h" - -namespace tensorflow { -namespace text { - -using ::tensorflow::Status; - -class RegexSplitOp : public tensorflow::OpKernel { - public: - explicit RegexSplitOp(tensorflow::OpKernelConstruction* ctx) - : tensorflow::OpKernel(ctx) {} - - void Compute(tensorflow::OpKernelContext* ctx) override { - bool should_keep_delim; - std::shared_ptr<RE2> delim_re; - std::shared_ptr<RE2> keep_delim_re; - GetRegexFromInput(ctx, &delim_re, &keep_delim_re); - should_keep_delim = keep_delim_re->pattern().empty() ? false : true; - - const Tensor* input_tensor; - OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); - const auto& input_flat = input_tensor->flat<tstring>(); - - std::vector<int64> begin_offsets; - std::vector<int64> end_offsets; - std::vector<absl::string_view> tokens; - std::vector<int64> row_splits; - row_splits.push_back(0); - - for (size_t i = 0; i < input_flat.size(); ++i) { - RegexSplit(absl::string_view(input_flat(i).data()), *delim_re, - should_keep_delim, *keep_delim_re, &tokens, &begin_offsets, - &end_offsets); - row_splits.push_back(begin_offsets.size()); - } - - // Emit the flat Tensors needed to construct RaggedTensors for tokens, - // start, end offsets. - std::vector<int64> tokens_shape; - tokens_shape.push_back(tokens.size()); - - std::vector<int64> offsets_shape; - offsets_shape.push_back(begin_offsets.size()); - - std::vector<int64> row_splits_shape; - row_splits_shape.push_back(row_splits.size()); - - Tensor* output_tokens_tensor = nullptr; - OP_REQUIRES_OK(ctx, - ctx->allocate_output("tokens", TensorShape(tokens_shape), - &output_tokens_tensor)); - auto output_tokens = output_tokens_tensor->flat<tstring>(); - - Tensor* output_begin_offsets_tensor = nullptr; - OP_REQUIRES_OK( - ctx, ctx->allocate_output("begin_offsets", TensorShape(offsets_shape), - &output_begin_offsets_tensor)); - auto output_begin_offsets = output_begin_offsets_tensor->flat<int64>(); - - Tensor* output_end_offsets_tensor = nullptr; - OP_REQUIRES_OK( - ctx, ctx->allocate_output("end_offsets", TensorShape(offsets_shape), - &output_end_offsets_tensor)); - auto output_end_offsets = output_end_offsets_tensor->flat<int64>(); - - Tensor* output_row_splits_tensor = nullptr; - OP_REQUIRES_OK( - ctx, ctx->allocate_output("row_splits", TensorShape(row_splits_shape), - &output_row_splits_tensor)); - auto output_row_splits = output_row_splits_tensor->flat<int64>(); - - // Copy outputs to Tensors. - for (size_t i = 0; i < tokens.size(); ++i) { - const auto& token = tokens[i]; - output_tokens(i) = tstring(token.data(), token.length()); - } - - for (size_t i = 0; i < begin_offsets.size(); ++i) { - output_begin_offsets(i) = begin_offsets[i]; - } - - for (size_t i = 0; i < end_offsets.size(); ++i) { - output_end_offsets(i) = end_offsets[i]; - } - - for (size_t i = 0; i < row_splits.size(); ++i) { - output_row_splits(i) = row_splits[i]; - } - } - - private: - void GetRegexFromInput(tensorflow::OpKernelContext* ctx, - std::shared_ptr<RE2>* delim_re, - std::shared_ptr<RE2>* keep_delim_re) { - const Tensor* delim_regex_pattern_tensor; - OP_REQUIRES_OK( - ctx, ctx->input("delim_regex_pattern", &delim_regex_pattern_tensor)); - OP_REQUIRES(ctx, - TensorShapeUtils::IsScalar(delim_regex_pattern_tensor->shape()), - errors::InvalidArgument( - "Pattern must be scalar, but received ", - delim_regex_pattern_tensor->shape().DebugString())); - const string delim_regex_pattern = - delim_regex_pattern_tensor->flat<tstring>()(0); - *delim_re = CachedDelimRE2(delim_regex_pattern); - OP_REQUIRES( - ctx, (*delim_re)->ok(), - errors::InvalidArgument("Invalid pattern: ", delim_regex_pattern, - ", error: ", (*delim_re)->error())); - - const Tensor* keep_delim_regex_pattern_tensor; - OP_REQUIRES_OK(ctx, ctx->input("keep_delim_regex_pattern", - &keep_delim_regex_pattern_tensor)); - OP_REQUIRES( - ctx, - TensorShapeUtils::IsScalar(keep_delim_regex_pattern_tensor->shape()), - errors::InvalidArgument( - "Pattern must be scalar, but received ", - keep_delim_regex_pattern_tensor->shape().DebugString())); - const string keep_delim_regex_pattern = - keep_delim_regex_pattern_tensor->flat<tstring>()(0); - *keep_delim_re = CachedKeepDelimRE2(keep_delim_regex_pattern); - OP_REQUIRES( - ctx, (*keep_delim_re)->ok(), - errors::InvalidArgument("Invalid pattern: ", keep_delim_regex_pattern, - ", error: ", (*keep_delim_re)->error())); - } - - std::shared_ptr<RE2> CachedDelimRE2(const string& pattern) { - { - tf_shared_lock l(delim_mu_); - if (delim_re_ != nullptr && delim_re_->pattern() == pattern) { - return delim_re_; - } - } - // Construct the new RE2 object before acquiring the lock. - auto regex = std::make_shared<RE2>(pattern); - { - mutex_lock l(delim_mu_); - // Swap instead of assigning so that we destruct the old - // RE2 object (when necessary) after releasing the lock. - delim_re_.swap(regex); - return delim_re_; - } - } - - std::shared_ptr<RE2> CachedKeepDelimRE2(const string& pattern) { - { - tf_shared_lock l(keep_delim_mu_); - if (keep_delim_re_ != nullptr && keep_delim_re_->pattern() == pattern) { - return keep_delim_re_; - } - } - // Construct the new RE2 object before acquiring the lock. - auto regex = std::make_shared<RE2>(pattern); - { - mutex_lock l(keep_delim_mu_); - // Swap instead of assigning so that we destruct the old - // RE2 object (when necessary) after releasing the lock. - keep_delim_re_.swap(regex); - return keep_delim_re_; - } - } - - mutex delim_mu_; - std::shared_ptr<RE2> delim_re_ TF_GUARDED_BY(delim_mu_); - - mutex keep_delim_mu_; - std::shared_ptr<RE2> keep_delim_re_ TF_GUARDED_BY(keep_delim_mu_); - - TF_DISALLOW_COPY_AND_ASSIGN(RegexSplitOp); -}; - -REGISTER_KERNEL_BUILDER( - Name("RegexSplitWithOffsets").Device(tensorflow::DEVICE_CPU), - RegexSplitOp); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_test.cc deleted file mode 100644 index d353c09..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/regex_split_test.cc +++ /dev/null
@@ -1,77 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/regex_split.h" - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "absl/strings/string_view.h" -#include "re2/re2.h" -#include "tensorflow/core/platform/tstring.h" - -namespace tensorflow { -namespace text { -namespace { - -std::vector<absl::string_view> RunTest(const tstring& input, - const tstring& regex, - const tstring& delim_regex) { - RE2 re2((absl::string_view(regex))); - RE2 include_delim_re2((absl::string_view(delim_regex))); - - std::vector<int64> begin_offsets; - std::vector<int64> end_offsets; - std::vector<absl::string_view> tokens; - - RegexSplit(input, re2, true, include_delim_re2, &tokens, &begin_offsets, - &end_offsets); - return tokens; -} - -TEST(RegexSplitTest, JapaneseAndWhitespace) { - tstring regex = "(\\p{Hiragana}+|\\p{Katakana}+|\\s)"; - tstring delim_regex = "(\\p{Hiragana}+|\\p{Katakana}+)"; - tstring input = "He said フランスです"; - auto extracted_tokens = RunTest(input, regex, delim_regex); - EXPECT_THAT(extracted_tokens, testing::ElementsAreArray({ - "He", - "said", - "フランス", - "です", - })); -} - -TEST(RegexSplitTest, Japanese) { - tstring regex = "(\\p{Hiragana}+|\\p{Katakana}+)"; - tstring input = "He said フランスです"; - auto extracted_tokens = RunTest(input, regex, regex); - EXPECT_THAT(extracted_tokens, testing::ElementsAreArray({ - "He said ", - "フランス", - "です", - })); -} - -TEST(RegexSplitTest, ChineseHan) { - tstring regex = "(\\p{Han})"; - tstring input = "敵人變盟友背後盤算"; - auto extracted_tokens = RunTest(input, regex, regex); - EXPECT_THAT(extracted_tokens, - testing::ElementsAreArray( - {"敵", "人", "變", "盟", "友", "背", "後", "盤", "算"})); -} - -} // namespace -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc deleted file mode 100644 index db53bc9..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel.cc +++ /dev/null
@@ -1,220 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <limits> -#include <memory> -#include <string> -#include <vector> - -#include "tensorflow/core/framework/lookup_interface.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/register_types.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace text { - -namespace {} // namespace - -// ROUGE-L implementation based on -// https://www.microsoft.com/en-us/research/publication/ -// rouge-a-package-for-automatic-evaluation-of-summaries/ -template <typename SPLITS_TYPE, typename VALUES_TYPE> -class RougeLOp : public OpKernel { - public: - using ConstFlatSplits = typename TTypes<SPLITS_TYPE>::ConstFlat; - using ConstFlatValues = typename TTypes<VALUES_TYPE>::ConstFlat; - - explicit RougeLOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - const Tensor& hyp_tensor = ctx->input(0); - const auto hyp_tensor_flat = hyp_tensor.flat<VALUES_TYPE>(); - const Tensor& hyp_splits = ctx->input(1); - const auto hyp_splits_flat = hyp_splits.flat<SPLITS_TYPE>(); - - const Tensor& ref_tensor = ctx->input(2); - const auto ref_tensor_flat = ref_tensor.flat<VALUES_TYPE>(); - const Tensor& ref_splits = ctx->input(3); - const auto ref_splits_flat = ref_splits.flat<SPLITS_TYPE>(); - - const Tensor& alpha_tensor = ctx->input(4); - const auto alpha_scalar = alpha_tensor.scalar<float>(); - const float alpha = alpha_scalar(); - - // Alpha must be <=1. - OP_REQUIRES(ctx, alpha <= 1, - errors::InvalidArgument("alpha must be <1 but was=", alpha)); - - // Ref and Hyp must have the same number of rows. - OP_REQUIRES(ctx, ref_splits_flat.size() == hyp_splits_flat.size(), - errors::InvalidArgument( - "ref splits len=", ref_splits_flat.size(), - "must equal hyp splits len=", hyp_splits_flat.size())); - - // All inputs must be vectors. - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(hyp_tensor.shape()), - errors::InvalidArgument("hypotheses values must be a vector")); - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(ref_tensor.shape()), - errors::InvalidArgument("references values must be a vector")); - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(hyp_splits.shape()), - errors::InvalidArgument("hypotheses splits must be a vector")); - OP_REQUIRES(ctx, TensorShapeUtils::IsVector(ref_splits.shape()), - errors::InvalidArgument("references splits must be a vector")); - // Ref and Hyp must have at least one split. - OP_REQUIRES(ctx, ref_splits_flat.size() > 0, - errors::InvalidArgument( - "ref splits len=0; must have at least 1 split")); - - // Output is a dense Tensor containing one row per input row. - TensorShape output_shape({ref_splits_flat.size() - 1}); - - // Allocate the F-Measure output tensor. - Tensor* f_measure_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_output("f_measure", output_shape, - &f_measure_tensor)); - auto f_measures_flat = f_measure_tensor->flat<float>(); - - // Allocate the P-Measure output tensor. - Tensor* p_measure_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_output("p_measure", output_shape, - &p_measure_tensor)); - auto p_measures_flat = p_measure_tensor->flat<float>(); - - // Allocate the R-Measure output tensor. - Tensor* r_measure_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_output("r_measure", output_shape, - &r_measure_tensor)); - auto r_measures_flat = r_measure_tensor->flat<float>(); - - // Iterate over the splits, skipping the first split as it is always zero. - for (int i = 1; i < hyp_splits_flat.size(); i++) { - // Length of hyp and ref. - SPLITS_TYPE lhyp = hyp_splits_flat(i) - hyp_splits_flat(i - 1); - SPLITS_TYPE lref = ref_splits_flat(i) - ref_splits_flat(i - 1); - // Length of longest common substring. - int32 llcs = LongestCommonSubsequenceLength( - hyp_splits_flat(i - 1), hyp_splits_flat(i), hyp_tensor_flat, - ref_splits_flat(i - 1), ref_splits_flat(i), ref_tensor_flat); - auto measures = ComputeMeasures(lhyp, lref, llcs, alpha); - f_measures_flat(i - 1) = std::get<0>(measures); - p_measures_flat(i - 1) = std::get<1>(measures); - r_measures_flat(i - 1) = std::get<2>(measures); - } - } - - private: - // By using LCS, the ROUGE-L algorithm does not require consecutive matches - // but rather credits the order of N-grams. - int32 LongestCommonSubsequenceLength(const SPLITS_TYPE hyp_i, - const SPLITS_TYPE hyp_j, - const ConstFlatValues& hyp, - const SPLITS_TYPE ref_i, - const SPLITS_TYPE ref_j, - const ConstFlatValues& ref) { - SPLITS_TYPE lhyp = hyp_j - hyp_i; - SPLITS_TYPE lref = ref_j - ref_i; - // Create a scratch matrix to keep track of the LCS seen so far using DP. - // http://www.algorithmist.com/index.php/Longest_Common_Subsequence - Tensor scratch(DT_INT32, {lhyp + 2, lref + 2}); - auto scratch2d = scratch.matrix<int32>(); - for (SPLITS_TYPE x = hyp_i; x <= hyp_j + 1; x++) { - for (SPLITS_TYPE y = ref_i; y <= ref_j + 1; y++) { - SPLITS_TYPE a = x - hyp_i; - SPLITS_TYPE b = y - ref_i; - if (a == 0 || b == 0) { - // If in first row or column, we write a zero to the table. - scratch2d(a, b) = 0; - } else if (x == hyp_j + 1 || y == ref_j + 1 || - hyp(x - 1) != ref(y - 1)) { - // If in the last row or column, or if the tokens are not equal, - // carry the largest score seen in the cell above or to the left of - // the current cell. - scratch2d(a, b) = - std::max({scratch2d(a - 1, b), scratch2d(a, b - 1)}); - } else { - // If tokens are equal, we are part of a subsequence, so increment the - // diagonal score. - scratch2d(a, b) = scratch2d(a - 1, b - 1) + 1; - } - } - } - return scratch2d(lhyp, lref); - } - - std::tuple<float, float, float> ComputeMeasures(const SPLITS_TYPE lhyp_int, - const SPLITS_TYPE lref_int, - const int32 llcs_int, - const float alpha) { - const float lhyp = static_cast<float>(lhyp_int); - const float lref = static_cast<float>(lref_int); - const float llcs = static_cast<float>(llcs_int); - const float p_lcs = llcs / (lhyp + 1e-12); - const float r_lcs = llcs / (lref + 1e-12); - // Use the tensor2tensor formulation if the alpha value is <0, - // which does not make sense as a weighted average term. - const float f_lcs = alpha < 0 ? ComputeTensor2TensorF(p_lcs, r_lcs) - : ComputeOfficialF(p_lcs, r_lcs, alpha); - return std::make_tuple(f_lcs, p_lcs, r_lcs); - } - - float ComputeTensor2TensorF(const float p_lcs, const float r_lcs) { - const float beta = p_lcs / (r_lcs + 1e-12); - const float numerator = (1 + (beta * beta)) * r_lcs * p_lcs; - const float denominator = r_lcs + ((beta * beta) * p_lcs); - if (denominator > 0) { - return numerator / denominator; - } - return 0; - } - - float ComputeOfficialF(const float p_lcs, - const float r_lcs, - const float alpha) { - float denominator = (alpha * r_lcs + (1 - alpha) * p_lcs); - if (denominator > 0) { - return (p_lcs * r_lcs) / denominator; - } - return denominator; - } - - TF_DISALLOW_COPY_AND_ASSIGN(RougeLOp); -}; - -#define REGISTER(VALUES_TYPE) \ - REGISTER_KERNEL_BUILDER(Name("RougeL") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<int32>("Tsplits") \ - .TypeConstraint<VALUES_TYPE>("Tvalues"), \ - RougeLOp<int32, VALUES_TYPE>); \ - REGISTER_KERNEL_BUILDER(Name("RougeL") \ - .Device(DEVICE_CPU) \ - .TypeConstraint<int64>("Tsplits") \ - .TypeConstraint<VALUES_TYPE>("Tvalues"), \ - RougeLOp<int64, VALUES_TYPE>); - -TF_CALL_int32(REGISTER); -TF_CALL_int64(REGISTER); -TF_CALL_string(REGISTER); -#undef REGISTER - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc deleted file mode 100644 index e218b416..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/rouge_l_kernel_test.cc +++ /dev/null
@@ -1,45 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/shape_inference_testutil.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace { - -TEST(RougeLFMeasureOpTest, ShapeFn) { - ShapeInferenceTestOp op("RougeL"); - - INFER_OK(op, "[?];[3];[?];[3];[]", "[2];[2];[2]"); - INFER_OK(op, "[5];[3];[?];[3];[]", "[2];[2];[2]"); - INFER_OK(op, "[?];[3];[8];[3];[]", "[2];[2];[2]"); - INFER_OK(op, "[5];[3];[8];[3];[]", "[2];[2];[2]"); - INFER_OK(op, "[5];[3];[8];?;[]", "[2];[2];[2]"); - INFER_OK(op, "[5];?;[8];[3];[]", "[2];[2];[2]"); - INFER_OK(op, "[5];[?];[8];[?];[]", "[?];[?];[?]"); - INFER_OK(op, "?;?;?;?;?", "[?];[?];[?]"); - INFER_ERROR("Dimension 0 in both shapes must be equal, but are 3 and 2.", op, - "[5];[3];[8];[2];[]"); - INFER_ERROR("Shape must be rank 0 but is rank 1", op, "[5];[3];[8];[3];[1]"); -} - -} // namespace -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc deleted file mode 100644 index 180a82c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels.cc +++ /dev/null
@@ -1,267 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow_text/core/kernels/sentence_breaking_utils.h" -#include "tensorflow_text/core/kernels/sentence_fragmenter.h" - -using ::tensorflow::tstring; -using ::tensorflow::errors::InvalidArgument; - -namespace tensorflow { -namespace text { - -// TODO(thuang513): This is copied from unicode_ops.cc, move this to a separate -// util lib in tensorflow and reuse it here instead. -namespace { -// Lifecycle wrapper for UConverter making it easier to use with thread_local. -// TODO(gregbillock): Consider whether to use the higher-level convert API and -// create a specialized fast code path for UTF8. -class WrappedConverter { - public: - WrappedConverter() {} - - ~WrappedConverter() { - if (converter_) { - ucnv_close(converter_); - } - } - - void init(const string& name) { - if (converter_ && name == name_) { - // Note: this reset is not typically needed, but if not done, then in some - // cases the cached converter will maintain state of input endianness - // which isn't valid from input to input in every batched case. - ucnv_reset(converter_); - return; - } - - if (converter_) { - ucnv_close(converter_); - converter_ = nullptr; - name_ = ""; - } - - UErrorCode status = U_ZERO_ERROR; - converter_ = ucnv_open(name.c_str(), &status); - if (U_FAILURE(status)) { - if (converter_) { - ucnv_close(converter_); - converter_ = nullptr; - } - } else { - name_ = name; - } - } - - UConverter* converter_ = nullptr; - string name_; -}; - -struct ErrorOptions { - UChar32 subst = 0xFFFD; - bool elide_replacement = false; - bool replace_control_chars = false; - bool error_on_malformatting = false; -}; - -Status GetErrorOptions(OpKernelConstruction* context, ErrorOptions* out) { - *out = ErrorOptions(); - - string error_policy; - TF_RETURN_IF_ERROR(context->GetAttr("errors", &error_policy)); - - if (error_policy == "replace") { - out->elide_replacement = false; - } else if (error_policy == "ignore") { - out->elide_replacement = true; - } else if (error_policy == "strict") { - out->error_on_malformatting = true; - } else { - return InvalidArgument( - "errors policy must be one of 'strict', 'replace', or 'ignore'"); - } - - int32 replacement_char; - TF_RETURN_IF_ERROR(context->GetAttr("replacement_char", &replacement_char)); - - if (replacement_char >= UCHAR_MIN_VALUE && - replacement_char <= UCHAR_MAX_VALUE) { - out->subst = replacement_char; - } else { - return InvalidArgument("replacement_char out of unicode codepoint range"); - } - - if (context->HasAttr("replace_control_characters")) { - TF_RETURN_IF_ERROR(context->GetAttr("replace_control_characters", - &(out->replace_control_chars))); - } - - return Status::OK(); -} - -inline bool ShouldHandleFormatError(const ErrorOptions& error_options, - UChar32 ch, - bool format_error) { - return ((error_options.replace_control_chars && ch <= 0x1F) || format_error); -} - -} // namespace - -class SentenceFragmentsOp : public OpKernel { - public: - explicit SentenceFragmentsOp(OpKernelConstruction* context) - : OpKernel(context) { - OP_REQUIRES_OK(context, GetErrorOptions(context, &error_options_)); - - OP_REQUIRES_OK(context, - context->GetAttr("input_encoding", &input_encoding_)); - // Make a temporary UConverter to ensure it will create without error - // at execution time (and to warm any data caches the converter needs). - // This instance is not used. - std::unique_ptr<WrappedConverter> input_encoder = - absl::make_unique<WrappedConverter>(); - input_encoder->init(input_encoding_); - OP_REQUIRES( - context, input_encoder->converter_, - InvalidArgument("Could not create converter for input encoding: " + - input_encoding_)); - } - - void Compute(::tensorflow::OpKernelContext* context) override { -#define DECLARE_AND_VALIDATE_INPUT_VECTOR(name, dtype) \ - const Tensor* name##_tensor; \ - OP_REQUIRES_OK(context, context->input(#name, &name##_tensor)); \ - OP_REQUIRES(context, TensorShapeUtils::IsVector(name##_tensor->shape()), \ - InvalidArgument( \ - absl::StrCat("'", #name, "' must be a vector, got shape: ", \ - name##_tensor->shape().DebugString()))); \ - const auto& name = name##_tensor->vec<dtype>(); - - DECLARE_AND_VALIDATE_INPUT_VECTOR(row_lengths, int64); - DECLARE_AND_VALIDATE_INPUT_VECTOR(token_start, int64); - DECLARE_AND_VALIDATE_INPUT_VECTOR(token_end, int64); - DECLARE_AND_VALIDATE_INPUT_VECTOR(token_word, tstring); - DECLARE_AND_VALIDATE_INPUT_VECTOR(token_properties, int64); - -#undef DECLARE_AND_VALIDATE_INPUT_TENSOR - - static thread_local std::unique_ptr<WrappedConverter> input_encoder; - if (!input_encoder) { - input_encoder = absl::make_unique<WrappedConverter>(); - } - input_encoder->init(input_encoding_); - OP_REQUIRES( - context, input_encoder->converter_, - InvalidArgument("Could not create converter for input encoding: " + - input_encoding_)); - - UConverter* converter = input_encoder->converter_; - UnicodeUtil util(converter); - - int num_elements = 0; - for (int i = 0; i < row_lengths.size(); ++i) { - num_elements += row_lengths(i); - } - OP_REQUIRES(context, - num_elements == token_start.size() && - token_start.size() == token_end.size() && - token_end.size() == token_word.size(), - InvalidArgument(absl::StrCat( - "num_elements(", num_elements, "), token_start(", - token_start.size(), "), token_end(", token_end.size(), - "), token_word(", token_word.size(), - ") must all be the same size."))); - - // Iterate through the text - int token_index = 0; - int num_fragments = 0; - std::vector<std::vector<SentenceFragment>> fragments; - for (int i = 0; i < row_lengths.size(); ++i) { - std::vector<Token> tokens; - Document doc(&tokens); - for (int j = 0; j < row_lengths(i); ++j) { - doc.AddToken( - token_word(token_index), token_start(token_index), - token_end(token_index), Token::SPACE_BREAK, - static_cast<Token::TextProperty>(token_properties(token_index))); - ++token_index; - } - - // Find fragments. - SentenceFragmenter fragmenter(&doc, &util); - std::vector<SentenceFragment> frags; - OP_REQUIRES_OK(context, fragmenter.FindFragments(&frags)); - - num_fragments += frags.size(); - fragments.push_back(std::move(frags)); - } - - std::vector<int64> fragment_shape; - fragment_shape.push_back(num_fragments); - - std::vector<int64> doc_batch_shape; - doc_batch_shape.push_back(fragments.size()); - -#define DECLARE_OUTPUT_TENSOR(name, out_shape) \ - Tensor* name##_tensor = nullptr; \ - OP_REQUIRES_OK(context, context->allocate_output( \ - #name, TensorShape(out_shape), &name##_tensor)); \ - auto name = name##_tensor->vec<int64>(); - - DECLARE_OUTPUT_TENSOR(fragment_start, fragment_shape); - DECLARE_OUTPUT_TENSOR(fragment_end, fragment_shape); - DECLARE_OUTPUT_TENSOR(fragment_properties, fragment_shape); - DECLARE_OUTPUT_TENSOR(terminal_punc_token, fragment_shape); - DECLARE_OUTPUT_TENSOR(output_row_lengths, doc_batch_shape); - -#undef DECLARE_OUTPUT_TENSOR - - // output_row_splits should have shape of - // [number of fragments over the entire batch] - int element_index = 0; - // Iterate through all the documents - for (int i = 0; i < fragments.size(); ++i) { - const std::vector<SentenceFragment>& fragments_in_doc = fragments[i]; - // Iterate through all the fragments of a document - for (int j = 0; j < fragments_in_doc.size(); ++j) { - const SentenceFragment& fragment = fragments_in_doc[j]; - fragment_start(element_index) = fragment.start; - fragment_end(element_index) = fragment.limit; - fragment_properties(element_index) = fragment.properties; - terminal_punc_token(element_index) = fragment.terminal_punc_token; - ++element_index; - } - output_row_lengths(i) = fragments_in_doc.size(); - } - } - - private: - string input_encoding_; - ErrorOptions error_options_; -}; - -REGISTER_KERNEL_BUILDER(Name("SentenceFragments").Device(DEVICE_CPU), - SentenceFragmentsOp); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels_v2.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels_v2.cc deleted file mode 100644 index 65ff46b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_kernels_v2.cc +++ /dev/null
@@ -1,85 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" - -using ::tensorflow::tstring; - -namespace tensorflow { -namespace text { - -class SentenceFragmentsOpV2 : public OpKernel { - public: - explicit SentenceFragmentsOpV2(OpKernelConstruction* context) - : OpKernel(context) {} - - void Compute(::tensorflow::OpKernelContext* context) override { - const Tensor* document_tensor; - OP_REQUIRES_OK(context, context->input("doc", &document_tensor)); - const auto& document = document_tensor->vec<tstring>(); - - std::vector<int64> fragment_start; - std::vector<int64> fragment_end; - std::vector<int64> fragment_properties; - std::vector<int64> terminal_punc_token; - std::vector<int64> output_row_lengths; - - // Iterate through all the documents and find fragments. - for (int i = 0; i < document.size(); ++i) { - // Find fragments. - SentenceFragmenterV2 fragmenter(document(i)); - std::vector<SentenceFragment> frags; - - OP_REQUIRES_OK(context, fragmenter.FindFragments(&frags)); - - for (const auto& f : frags) { - fragment_start.push_back(f.start); - fragment_end.push_back(f.limit); - fragment_properties.push_back(f.properties); - terminal_punc_token.push_back(f.terminal_punc_token); - } - output_row_lengths.push_back(frags.size()); - } - -#define DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(name, dtype) \ - int64 name##_size = name.size(); \ - Tensor* name##_tensor = nullptr; \ - OP_REQUIRES_OK(context, \ - context->allocate_output(#name, TensorShape({name##_size}), \ - &name##_tensor)); \ - auto name##_data = name##_tensor->flat<dtype>().data(); \ - memcpy(name##_data, name.data(), name##_size * sizeof(dtype)); - - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(fragment_start, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(fragment_end, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(fragment_properties, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(terminal_punc_token, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_row_lengths, int64); - -#undef DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR - } -}; - -REGISTER_KERNEL_BUILDER(Name("SentenceFragmentsV2").Device(DEVICE_CPU), - SentenceFragmentsOpV2); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc deleted file mode 100644 index 2937fe2f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.cc +++ /dev/null
@@ -1,246 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/sentence_breaking_utils.h" - -#include <string> - -#include "absl/strings/str_cat.h" -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/utypes.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" - -using ::tensorflow::Status; - -namespace tensorflow { -namespace text { - -Status UnicodeUtil::GetOneUChar(const absl::string_view& input, - bool* has_more_than_one_char, - UChar32* result) const { - UErrorCode status = U_ZERO_ERROR; - const char* source = input.data(); - const char* limit = input.data() + input.length(); - if (!converter_) { - return tensorflow::errors::Internal( - absl::StrCat("Converter has not been initialized!")); - } - *result = ucnv_getNextUChar(converter_, &source, limit, &status); - - if (U_FAILURE(status)) { - return tensorflow::errors::Internal( - absl::StrCat("Failed to decode string, error status=", status)); - } - - if (source != limit) { - *has_more_than_one_char = true; - } else { - *has_more_than_one_char = false; - } - - return ::tensorflow::Status::OK(); -} - -Status UnicodeUtil::IsTerminalPunc(const absl::string_view& input, - bool* result) const { - *result = false; - const auto& ellipsis_status = IsEllipsis(input, result); - // If there was a error decoding, or if we found an ellipsis, then return. - if (!ellipsis_status.ok()) - return ellipsis_status; - if (*result) - return Status::OK(); - - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case 0x055C: // Armenian exclamation mark - case 0x055E: // Armenian question mark - case 0x17d4: // Khmer sign khan - case 0x037E: // Greek question mark - case 0x2026: // ellipsis - *result = true; - return Status::OK(); - } - - USentenceBreak sb_property = static_cast<USentenceBreak>( - u_getIntPropertyValue(char_value, UCHAR_SENTENCE_BREAK)); - *result = sb_property == U_SB_ATERM || sb_property == U_SB_STERM; - return Status::OK(); -} - -Status UnicodeUtil::IsClosePunc(const absl::string_view& input, - bool* result) const { - *result = false; - if (input == "''") { - *result = true; - return Status::OK(); - } - - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '>': - case ']': - case '`': - case 64831: // Ornate right parenthesis - case 65282: // fullwidth quotation mark - case 65287: // fullwidth apostrophe - *result = true; - return Status::OK(); - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - - *result = lb_property == U_LB_CLOSE_PUNCTUATION || - lb_property == U_LB_CLOSE_PARENTHESIS || - lb_property == U_LB_QUOTATION; - return Status::OK(); -} - -Status UnicodeUtil::IsOpenParen(const absl::string_view& input, - bool* result) const { - *result = false; - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '<': - case 64830: // Ornate left parenthesis - *result = true; - return Status::OK(); - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - *result = lb_property == U_LB_OPEN_PUNCTUATION; - return Status::OK(); -} - -Status UnicodeUtil::IsCloseParen(const absl::string_view& input, - bool* result) const { - *result = false; - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '>': - case 64831: // Ornate right parenthesis - *result = true; - return Status::OK(); - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - *result = lb_property == U_LB_CLOSE_PUNCTUATION || - lb_property == U_LB_CLOSE_PARENTHESIS; - return Status::OK(); -} - -Status UnicodeUtil::IsPunctuationWord(const absl::string_view& input, - bool* result) const { - *result = false; - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '`': - case '<': - case '>': - case '~': - case 5741: - *result = true; - return Status::OK(); - } - - *result = u_ispunct(char_value) || - u_hasBinaryProperty(char_value, UCHAR_DASH) || - u_hasBinaryProperty(char_value, UCHAR_HYPHEN); - return Status::OK(); -} - -Status UnicodeUtil::IsEllipsis(const absl::string_view& input, - bool* result) const { - *result = false; - if (input == "...") { - *result = true; - return Status::OK(); - } - - bool has_more_than_one_char = false; - UChar32 char_value; - const auto& status = GetOneUChar(input, &has_more_than_one_char, &char_value); - if (!status.ok()) - return status; - if (has_more_than_one_char) { - *result = false; - return Status::OK(); - } - - *result = char_value == 0x2026; - return Status::OK(); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.h deleted file mode 100644 index d15abf6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils.h +++ /dev/null
@@ -1,73 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_BREAKING_UTILS_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_BREAKING_UTILS_H_ - -#include <string> -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/ucnv.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "icu4c/source/common/unicode/utypes.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -// A class of utils for identifying certain classes and properties of unicode -// characters. -class UnicodeUtil { - public: - // `converter` not owned. - explicit UnicodeUtil(UConverter* converter) : converter_(converter) {} - - // Returns true iff a string is terminal punctuation. - ::tensorflow::Status IsTerminalPunc(const absl::string_view& input, - bool* result) const; - - // Returns true iff a string is close punctuation (close quote or close - // paren). - ::tensorflow::Status IsClosePunc(const absl::string_view& input, - bool* result) const; - - // Returns true iff a string is an open paren. - ::tensorflow::Status IsOpenParen(const absl::string_view& input, - bool* result) const; - - // Returns true iff a string is a close paren. - ::tensorflow::Status IsCloseParen(const absl::string_view& input, - bool* result) const; - - // Returns true iff a word is made of punctuation characters only. - ::tensorflow::Status IsPunctuationWord(const absl::string_view& input, - bool* result) const; - - // Returns true iff a string is an ellipsis token ("..."). - ::tensorflow::Status IsEllipsis(const absl::string_view& input, - bool* result) const; - - private: - ::tensorflow::Status GetOneUChar(const absl::string_view&, - bool* has_more_than_one_char, - UChar32* result) const; - - // not owned. mutable because UConverter contains some internal options and - // buffer. - mutable UConverter* converter_; -}; - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_BREAKING_UTILS_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc deleted file mode 100644 index 14fc095..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_breaking_utils_test.cc +++ /dev/null
@@ -1,581 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/sentence_breaking_utils.h" - -#include <memory> -#include <string> -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/ucnv.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/uniset.h" -#include "icu4c/source/common/unicode/unistr.h" -#include "icu4c/source/common/unicode/uset.h" -#include "icu4c/source/common/unicode/utypes.h" - -namespace tensorflow { -namespace text { -namespace { - -class SentenceBreakingUtilsTest { - protected: - UConverter* GetUConverter() { - constexpr char name[] = "UTF-8"; - UErrorCode status = U_ZERO_ERROR; - UConverter* converter = ucnv_open(name, &status); - if (U_FAILURE(status)) { - if (converter) { - ucnv_close(converter); - } - return nullptr; - } - return converter; - } -}; - -class SentenceBreakingUtilsParamTest : public SentenceBreakingUtilsTest, - public ::testing::TestWithParam<UChar> { - protected: - void SetUp() override { - converter_ = SentenceBreakingUtilsTest::GetUConverter(); - ASSERT_NE(converter_, nullptr); - } - - void TearDown() override { ucnv_close(converter_); } - - std::string StringFromUnicodeChar(UChar32 input) { - std::string result; - icu::UnicodeString test_unicode_string(input); - test_unicode_string.toUTF8String(result); - return result; - } - - UConverter* converter_; -}; - -class IsTerminalPuncParamTest : public SentenceBreakingUtilsParamTest {}; - -class IsTerminalPuncTest : public SentenceBreakingUtilsTest, - public ::testing::Test {}; - -const UChar is_terminal_punc_test_cases[] = { - 0x055C, // Armenian exclamation mark - 0x055E, // Armenian question mark - 0x0589, // Armenian full stop - 0x061F, // Arabic question mark - 0x06D4, // Arabic full stop - 0x0700, // Syriabc end of paragraph - 0x0701, // Syriac supralinear full stop - 0x0702, // Syriac sublinear full stop - 0x1362, // Ethiopic full stop - 0x1367, // Ethiopic question mark - 0x1368, // Ethiopic paragraph separator - 0x104A, // Myanmar sign little section - 0x104B, // Myanmar sign section - 0x166E, // Canadian syllabics full stop - 0x17d4, // Khmer sign khan - 0x1803, // Mongolian full stop - 0x1809, // Mongolian Manchu full stop - 0x1944, // Limbu exclamation mark - 0x1945, // Limbu question mark - 0x203C, // double exclamation mark - 0x203D, // interrobang - 0x2047, // double question mark - 0x2048, // question exclamation mark - 0x2049, // exclamation question mark - 0x3002, // ideographic full stop - 0x037E, // Greek question mark - 0xFE52, // small full stop - 0xFE56, // small question mark - 0xFE57, // small exclamation mark - 0xFF01, // fullwidth exclamation mark - 0xFF0E, // fullwidth full stop - 0xFF1F, // fullwidth question mark - 0xFF61, // halfwidth ideographic full stop - 0x2026, // ellipsis - 0x0964, - 0x0965, // Devanagari danda..Devanagari double -}; - -TEST_P(IsTerminalPuncParamTest, IsTerminalPunc) { - UnicodeUtil util(converter_); - std::string test_string = StringFromUnicodeChar(GetParam()); - bool result = false; - EXPECT_TRUE(util.IsTerminalPunc(test_string, &result).ok()); - EXPECT_TRUE(result); -} - -INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, - IsTerminalPuncParamTest, - ::testing::ValuesIn(is_terminal_punc_test_cases)); - -TEST_F(IsTerminalPuncTest, IsMultiCharEllipseTerminalPunc) { - UConverter* converter = SentenceBreakingUtilsTest::GetUConverter(); - ASSERT_NE(converter, nullptr); - UnicodeUtil util(converter); - std::string test_string = "..."; - bool result; - EXPECT_TRUE(util.IsTerminalPunc(test_string, &result).ok()); - EXPECT_TRUE(result); - ucnv_close(converter); -} - -TEST_F(IsTerminalPuncTest, TestMultiUnicodeChars) { - UConverter* converter = SentenceBreakingUtilsTest::GetUConverter(); - ASSERT_NE(converter, nullptr); - UnicodeUtil util(converter); - std::string test_string = "never gonna let you decode"; - bool result; - EXPECT_TRUE(util.IsTerminalPunc(test_string, &result).ok()); - EXPECT_FALSE(result); - ucnv_close(converter); -} - -TEST_F(IsTerminalPuncTest, TestInvalidConverter) { - UErrorCode status = U_ZERO_ERROR; - UConverter* converter = ucnv_open("cant find me", &status); - UnicodeUtil util(converter); - std::string test_string = "."; - bool result; - EXPECT_FALSE(util.IsTerminalPunc(test_string, &result).ok()); - ucnv_close(converter); -} - -class ClosePuncParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar close_punc_test_cases[] = { - 0x29, 0x5D, 0x3E, 0x7D, - 0x207E, // superscript right parenthesis - 0x208E, // subscript right parenthesis - 0x27E7, // mathematical right white square bracket - 0x27E9, // mathematical right angle bracket - 0x27EB, // mathematical right double angle bracket - 0x2984, // right white curly bracket - 0x2986, // right white parenthesis - 0x2988, // Z notation right image bracket - 0x298A, // Z notation right binding bracket - 0x298C, // right square bracket with underbar - 0x298E, // right square bracket with tick in top corner - 0x2990, // right square bracket with tick in bottom corner - 0x2992, // right angle bracket with dot - 0x2994, // right arc greater-than bracket - 0x2996, // double right arc less-than bracket - 0x2998, // right black tortoise shell bracket - 0x29D9, // right wiggly fence - 0x29DB, // right double wiggly fence - 0x29FD, // right-pointing curved angle bracket - 0x3009, // CJK right angle bracket - 0x300B, // CJK right double angle bracket - 0x3011, // CJK right black lenticular bracket - 0x3015, // CJK right tortoise shell bracket - 0x3017, // CJK right white lenticular bracket - 0x3019, // CJK right white tortoise shell bracket - 0x301B, // CJK right white square bracket - 0xFD3F, // Ornate right parenthesis - 0xFE5A, // small right parenthesis - 0xFE5C, // small right curly bracket - 0xFF09, // fullwidth right parenthesis - 0xFF3D, // fullwidth right square bracket - 0xFF5D, // fullwidth right curly bracket - 0x27, 0x60, 0x22, - 0xFF07, // fullwidth apostrophe - 0xFF02, // fullwidth quotation mark - 0x2019, // right single quotation mark (English, others) - 0x201D, // right double quotation mark (English, others) - 0x2018, // left single quotation mark (Czech, German, Slovak) - 0x201C, // left double quotation mark (Czech, German, Slovak) - 0x203A, // single right-pointing angle quotation mark (French, others) - 0x00BB, // right-pointing double angle quotation mark (French, others) - 0x2039, // single left-pointing angle quotation mark (Slovenian, others) - 0x00AB, // left-pointing double angle quotation mark (Slovenian, others) - 0x300D, // right corner bracket (East Asian languages) - 0xfe42, // presentation form for vertical right corner bracket - 0xFF63, // halfwidth right corner bracket (East Asian languages) - 0x300F, // right white corner bracket (East Asian languages) - 0xfe44, // presentation form for vertical right white corner bracket - 0x301F, // low double prime quotation mark (East Asian languages) - 0x301E, // close double prime (East Asian languages written horizontally) -}; - -TEST_P(ClosePuncParamTest, IsClosePunc) { - UnicodeUtil util(converter_); - std::string test_string = StringFromUnicodeChar(GetParam()); - bool result = false; - EXPECT_TRUE(util.IsClosePunc(test_string, &result).ok()); - EXPECT_TRUE(result); -} - -INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, - ClosePuncParamTest, - ::testing::ValuesIn(close_punc_test_cases)); - -class OpenParenParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar open_paren_test_cases[] = { - '(', '[', '<', '{', - 0x207D, // superscript left parenthesis - 0x208D, // subscript left parenthesis - 0x27E6, // mathematical left white square bracket - 0x27E8, // mathematical left angle bracket - 0x27EA, // mathematical left double angle bracket - 0x2983, // left white curly bracket - 0x2985, // left white parenthesis - 0x2987, // Z notation left image bracket - 0x2989, // Z notation left binding bracket - 0x298B, // left square bracket with underbar - 0x298D, // left square bracket with tick in top corner - 0x298F, // left square bracket with tick in bottom corner - 0x2991, // left angle bracket with dot - 0x2993, // left arc less-than bracket - 0x2995, // double left arc greater-than bracket - 0x2997, // left black tortoise shell bracket - 0x29D8, // left wiggly fence - 0x29DA, // left double wiggly fence - 0x29FC, // left-pointing curved angle bracket - 0x3008, // CJK left angle bracket - 0x300A, // CJK left double angle bracket - 0x3010, // CJK left black lenticular bracket - 0x3014, // CJK left tortoise shell bracket - 0x3016, // CJK left white lenticular bracket - 0x3018, // CJK left white tortoise shell bracket - 0x301A, // CJK left white square bracket - 0xFD3E, // Ornate left parenthesis - 0xFE59, // small left parenthesis - 0xFE5B, // small left curly bracket - 0xFF08, // fullwidth left parenthesis - 0xFF3B, // fullwidth left square bracket - 0xFF5B, // fullwidth left curly bracket -}; - -TEST_P(OpenParenParamTest, IsOpenParen) { - UnicodeUtil util(converter_); - std::string test_string = StringFromUnicodeChar(GetParam()); - bool result = false; - EXPECT_TRUE(util.IsOpenParen(test_string, &result).ok()); - EXPECT_TRUE(result); -} - -INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, - OpenParenParamTest, - ::testing::ValuesIn(open_paren_test_cases)); - -class CloseParenParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar close_paren_test_cases[] = { - ')', ']', '>', '}', - 0x207E, // superscript right parenthesis - 0x208E, // subscript right parenthesis - 0x27E7, // mathematical right white square bracket - 0x27E9, // mathematical right angle bracket - 0x27EB, // mathematical right double angle bracket - 0x2984, // right white curly bracket - 0x2986, // right white parenthesis - 0x2988, // Z notation right image bracket - 0x298A, // Z notation right binding bracket - 0x298C, // right square bracket with underbar - 0x298E, // right square bracket with tick in top corner - 0x2990, // right square bracket with tick in bottom corner - 0x2992, // right angle bracket with dot - 0x2994, // right arc greater-than bracket - 0x2996, // double right arc less-than bracket - 0x2998, // right black tortoise shell bracket - 0x29D9, // right wiggly fence - 0x29DB, // right double wiggly fence - 0x29FD, // right-pointing curved angle bracket - 0x3009, // CJK right angle bracket - 0x300B, // CJK right double angle bracket - 0x3011, // CJK right black lenticular bracket - 0x3015, // CJK right tortoise shell bracket - 0x3017, // CJK right white lenticular bracket - 0x3019, // CJK right white tortoise shell bracket - 0x301B, // CJK right white square bracket - 0xFD3F, // Ornate right parenthesis - 0xFE5A, // small right parenthesis - 0xFE5C, // small right curly bracket - 0xFF09, // fullwidth right parenthesis - 0xFF3D, // fullwidth right square bracket - 0xFF5D, // fullwidth right curly bracket -}; - -TEST_P(CloseParenParamTest, IsCloseParen) { - UnicodeUtil util(converter_); - std::string test_string = StringFromUnicodeChar(GetParam()); - bool result = false; - EXPECT_TRUE(util.IsCloseParen(test_string, &result).ok()); - EXPECT_TRUE(result); -} - -INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, - CloseParenParamTest, - ::testing::ValuesIn(close_paren_test_cases)); - -class IsPunctuationWordParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar punc_word_test_cases[] = { - '(', '[', '<', '{', - 0x207D, // superscript left parenthesis - 0x208D, // subscript left parenthesis - 0x27E6, // mathematical left white square bracket - 0x27E8, // mathematical left angle bracket - 0x27EA, // mathematical left double angle bracket - 0x2983, // left white curly bracket - 0x2985, // left white parenthesis - 0x2987, // Z notation left image bracket - 0x2989, // Z notation left binding bracket - 0x298B, // left square bracket with underbar - 0x298D, // left square bracket with tick in top corner - 0x298F, // left square bracket with tick in bottom corner - 0x2991, // left angle bracket with dot - 0x2993, // left arc less-than bracket - 0x2995, // double left arc greater-than bracket - 0x2997, // left black tortoise shell bracket - 0x29D8, // left wiggly fence - 0x29DA, // left double wiggly fence - 0x29FC, // left-pointing curved angle bracket - 0x3008, // CJK left angle bracket - 0x300A, // CJK left double angle bracket - 0x3010, // CJK left black lenticular bracket - 0x3014, // CJK left tortoise shell bracket - 0x3016, // CJK left white lenticular bracket - 0x3018, // CJK left white tortoise shell bracket - 0x301A, // CJK left white square bracket - 0xFD3E, // Ornate left parenthesis - 0xFE59, // small left parenthesis - 0xFE5B, // small left curly bracket - 0xFF08, // fullwidth left parenthesis - 0xFF3B, // fullwidth left square bracket - 0xFF5B, // fullwidth left curly bracket - '"', '\'', '`', - 0xFF07, // fullwidth apostrophe - 0xFF02, // fullwidth quotation mark - 0x2018, // left single quotation mark (English, others) - 0x201C, // left double quotation mark (English, others) - 0x201B, // single high-reveresed-9 quotation mark (PropList.txt) - 0x201A, // single low-9 quotation mark (Czech, German, Slovak) - 0x201E, // double low-9 quotation mark (Czech, German, Slovak) - 0x201F, // double high-reversed-9 quotation mark (PropList.txt) - 0x2019, // right single quotation mark (Danish, Finnish, Swedish, Norw.) - 0x201D, // right double quotation mark (Danish, Finnish, Swedish, Norw.) - 0x2039, // single left-pointing angle quotation mark (French, others) - 0x00AB, // left-pointing double angle quotation mark (French, others) - 0x203A, // single right-pointing angle quotation mark (Slovenian, others) - 0x00BB, // right-pointing double angle quotation mark (Slovenian, others) - 0x300C, // left corner bracket (East Asian languages) - 0xFE41, // presentation form for vertical left corner bracket - 0xFF62, // halfwidth left corner bracket (East Asian languages) - 0x300E, // left white corner bracket (East Asian languages) - 0xFE43, // presentation form for vertical left white corner bracket - 0x301D, // reversed double prime quotation mark (East Asian langs, horiz.) - ')', ']', '>', '}', - 0x207E, // superscript right parenthesis - 0x208E, // subscript right parenthesis - 0x27E7, // mathematical right white square bracket - 0x27E9, // mathematical right angle bracket - 0x27EB, // mathematical right double angle bracket - 0x2984, // right white curly bracket - 0x2986, // right white parenthesis - 0x2988, // Z notation right image bracket - 0x298A, // Z notation right binding bracket - 0x298C, // right square bracket with underbar - 0x298E, // right square bracket with tick in top corner - 0x2990, // right square bracket with tick in bottom corner - 0x2992, // right angle bracket with dot - 0x2994, // right arc greater-than bracket - 0x2996, // double right arc less-than bracket - 0x2998, // right black tortoise shell bracket - 0x29D9, // right wiggly fence - 0x29DB, // right double wiggly fence - 0x29FD, // right-pointing curved angle bracket - 0x3009, // CJK right angle bracket - 0x300B, // CJK right double angle bracket - 0x3011, // CJK right black lenticular bracket - 0x3015, // CJK right tortoise shell bracket - 0x3017, // CJK right white lenticular bracket - 0x3019, // CJK right white tortoise shell bracket - 0x301B, // CJK right white square bracket - 0xFD3F, // Ornate right parenthesis - 0xFE5A, // small right parenthesis - 0xFE5C, // small right curly bracket - 0xFF09, // fullwidth right parenthesis - 0xFF3D, // fullwidth right square bracket - 0xFF5D, // fullwidth right curly bracket - '\'', '"', '`', - 0xFF07, // fullwidth apostrophe - 0xFF02, // fullwidth quotation mark - 0x2019, // right single quotation mark (English, others) - 0x201D, // right double quotation mark (English, others) - 0x2018, // left single quotation mark (Czech, German, Slovak) - 0x201C, // left double quotation mark (Czech, German, Slovak) - 0x203A, // single right-pointing angle quotation mark (French, others) - 0x00BB, // right-pointing double angle quotation mark (French, others) - 0x2039, // single left-pointing angle quotation mark (Slovenian, others) - 0x00AB, // left-pointing double angle quotation mark (Slovenian, others) - 0x300D, // right corner bracket (East Asian languages) - 0xfe42, // presentation form for vertical right corner bracket - 0xFF63, // halfwidth right corner bracket (East Asian languages) - 0x300F, // right white corner bracket (East Asian languages) - 0xfe44, // presentation form for vertical right white corner bracket - 0x301F, // low double prime quotation mark (East Asian languages) - 0x301E, // close double prime (East Asian languages written horizontally) - 0x00A1, // Spanish inverted exclamation mark - 0x00BF, // Spanish inverted question mark - '.', '!', '?', - 0x055C, // Armenian exclamation mark - 0x055E, // Armenian question mark - 0x0589, // Armenian full stop - 0x061F, // Arabic question mark - 0x06D4, // Arabic full stop - 0x0700, // Syriac end of paragraph - 0x0701, // Syriac supralinear full stop - 0x0702, // Syriac sublinear full stop - 0x0964, // Devanagari danda..Devanagari double danda - 0x0965, - 0x1362, // Ethiopic full stop - 0x1367, // Ethiopic question mark - 0x1368, // Ethiopic paragraph separator - 0x104A, // Myanmar sign little section - 0x104B, // Myanmar sign section - 0x166E, // Canadian syllabics full stop - 0x17d4, // Khmer sign khan - 0x1803, // Mongolian full stop - 0x1809, // Mongolian Manchu full stop - 0x1944, // Limbu exclamation mark - 0x1945, // Limbu question mark - 0x203C, // double exclamation mark - 0x203D, // interrobang - 0x2047, // double question mark - 0x2048, // question exclamation mark - 0x2049, // exclamation question mark - 0x3002, // ideographic full stop - 0x037E, // Greek question mark - 0xFE52, // small full stop - 0xFE56, // small question mark - 0xFE57, // small exclamation mark - 0xFF01, // fullwidth exclamation mark - 0xFF0E, // fullwidth full stop - 0xFF1F, // fullwidth question mark - 0xFF61, // halfwidth ideographic full stop - 0x2026, // ellipsis - 0x30fb, // Katakana middle dot - 0xff65, // halfwidth Katakana middle dot - 0x2040, // character tie - '-', '~', - 0x058a, // Armenian hyphen - 0x1806, // Mongolian todo soft hyphen - 0x2010, // hyphen..horizontal bar - 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, - 0x2053, // swung dash -- from Table 6-3 of Unicode book - 0x207b, // superscript minus - 0x208b, // subscript minus - 0x2212, // minus sign - 0x301c, // wave dash - 0x3030, // wavy dash - 0xfe31, // presentation form for vertical em dash..en dash - 0xfe32, - 0xfe58, // small em dash - 0xfe63, // small hyphen-minus - 0xff0d, // fullwidth hyphen-minus - ',', ':', ';', - 0x00b7, // middle dot - 0x0387, // Greek ano teleia - 0x05c3, // Hebrew punctuation sof pasuq - 0x060c, // Arabic comma - 0x061b, // Arabic semicolon - 0x066b, // Arabic decimal separator - 0x066c, // Arabic thousands separator - 0x0703, // Syriac contraction and others - 0x0704, 0x0705, 0x0706, 0x0707, 0x0708, 0x0709, 0x70a, - 0x070c, // Syric harklean metobelus - 0x0e5a, // Thai character angkhankhu - 0x0e5b, // Thai character khomut - 0x0f08, // Tibetan mark sbrul shad - 0x0f0d, // Tibetan mark shad..Tibetan mark rgya gram shad - 0x0f0e, 0x0f0f, 0x0f10, 0x0f11, 0x0f12, - 0x1361, // Ethiopic wordspace - 0x1363, // other Ethiopic chars - 0x1364, 0x1365, 0x1366, - 0x166d, // Canadian syllabics chi sign - 0x16eb, // Runic single punctuation..Runic cross punctuation - 0x16ed, - 0x17d5, // Khmer sign camnuc pii huuh and other - 0x17d6, - 0x17da, // Khmer sign koomut - 0x1802, // Mongolian comma - 0x1804, // Mongolian four dots and other - 0x1805, - 0x1808, // Mongolian manchu comma - 0x3001, // ideographic comma - 0xfe50, // small comma and others - 0xfe51, - 0xfe54, // small semicolon and other - 0xfe55, - 0xff0c, // fullwidth comma - 0xff0e, // fullwidth stop..fullwidth solidus - 0xff0f, - 0xff1a, // fullwidth colon..fullwidth semicolon - 0xff1b, - 0xff64, // halfwidth ideographic comma - 0x2016, // double vertical line - 0x2032, 0x2033, - 0x2034, // prime..triple prime - 0xfe61, // small asterisk - 0xfe68, // small reverse solidus - 0xff3c, // fullwidth reverse solidus -}; - -TEST_P(IsPunctuationWordParamTest, IsPunctuation) { - UnicodeUtil util(converter_); - std::string test_string = StringFromUnicodeChar(GetParam()); - bool result = false; - EXPECT_TRUE(util.IsPunctuationWord(test_string, &result).ok()); - EXPECT_TRUE(result); -} - -INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, - IsPunctuationWordParamTest, - ::testing::ValuesIn(punc_word_test_cases)); - -class IsEllipsisTest : public SentenceBreakingUtilsTest, - public ::testing::Test { - protected: - void SetUp() override { - converter_ = SentenceBreakingUtilsTest::GetUConverter(); - } - - void TearDown() override { ucnv_close(converter_); } - - UConverter* converter_; -}; - -TEST_F(IsEllipsisTest, IsEllipsis) { - UnicodeUtil util(converter_); - bool result = false; - EXPECT_TRUE(util.IsEllipsis("...", &result).ok()); - EXPECT_TRUE(result); - - EXPECT_TRUE(util.IsEllipsis("…", &result).ok()); - EXPECT_TRUE(result); - - EXPECT_TRUE(util.IsEllipsis("@", &result).ok()); - EXPECT_FALSE(result); -} - -} // namespace -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc deleted file mode 100644 index e0224f6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.cc +++ /dev/null
@@ -1,440 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/sentence_fragmenter.h" -#include <string> -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow_text/core/kernels/sentence_breaking_utils.h" - -using ::tensorflow::Status; - -namespace tensorflow { -namespace text { -namespace { - -// Sets a property of a sentence fragment. -void SetFragmentProperty(SentenceFragment::Property property, - SentenceFragment* fragment) { - fragment->properties = fragment->properties | property; -} - -// Returns true iff a token has any of the given properties. -bool TokenHasProperty(uint32 properties, const Token& token) { - return token.text_properties() & properties; -} - -// Returns true iff a token has the ACRONYM text property and token.word() -// ends with a period. -bool IsPeriodSeparatedAcronym(const Token& token) { - return TokenHasProperty(Token::ACRONYM, token) && - (!token.word().empty() && token.word().back() == '.'); -} - -// Returns true iff the token can appear after a space in a sentence-terminal -// token sequence. -Status SpaceAllowedBeforeToken(const UnicodeUtil* util, - const Token& token, - bool* result) { - const tstring& word = token.word(); - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util->IsEllipsis(word, &is_ellipsis)); - - bool is_terminal_punc = false; - TF_RETURN_IF_ERROR(util->IsTerminalPunc(word, &is_terminal_punc)); - - bool is_close_paren = false; - TF_RETURN_IF_ERROR(util->IsCloseParen(word, &is_close_paren)); - - *result = (TokenHasProperty(Token::EMOTICON, token) || - (is_ellipsis || is_terminal_punc || is_close_paren)); - return Status::OK(); -} -} // namespace - -class SentenceFragmenter::FragmentBoundaryMatch { - public: - FragmentBoundaryMatch() { Reset(); } - - // Goes to initial state. - void Reset() { - state_ = INITIAL_STATE; - first_terminal_punc_index_ = -1; - first_close_punc_index_ = -1; - limit_index_ = -1; - } - - // Follows the state transition for the token at the given index. Returns - // true for success, or false if there was no valid transition. - Status Advance(const UnicodeUtil* util, - const Document& document, - int index, - bool* result) { - const Token& token = document.tokens()[index]; - const tstring& word = token.word(); - bool no_transition = false; - - bool is_terminal_punc = false; - TF_RETURN_IF_ERROR(util->IsTerminalPunc(word, &is_terminal_punc)); - - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util->IsEllipsis(word, &is_ellipsis)); - - bool is_close_punc = false; - TF_RETURN_IF_ERROR(util->IsClosePunc(word, &is_close_punc)); - - switch (state_) { - case INITIAL_STATE: - if (is_terminal_punc || is_ellipsis || - IsPeriodSeparatedAcronym(token) || - TokenHasProperty(Token::EMOTICON, token)) { - first_terminal_punc_index_ = index; - state_ = COLLECTING_TERMINAL_PUNC; - } - break; - case COLLECTING_TERMINAL_PUNC: - - if (is_terminal_punc || is_ellipsis || - TokenHasProperty(Token::EMOTICON, token)) { - // Stay in COLLECTING_TERMINAL_PUNC state. - } else if (is_close_punc) { - first_close_punc_index_ = index; - state_ = COLLECTING_CLOSE_PUNC; - } else { - no_transition = true; - } - break; - case COLLECTING_CLOSE_PUNC: - if (is_close_punc || is_ellipsis || - TokenHasProperty(Token::EMOTICON, token)) { - // Stay in COLLECTING_CLOSE_PUNC state. We effectively ignore - // emoticons and ellipses and continue to accept closing punctuation - // after them. - } else { - no_transition = true; - } - break; - } - - if (no_transition) { - *result = false; - return Status::OK(); - } else { - limit_index_ = index + 1; - if (state_ == COLLECTING_TERMINAL_PUNC) { - // We've gotten terminal punctuation, but no close punctuation yet. - first_close_punc_index_ = limit_index_; - } - *result = true; - return Status::OK(); - } - } - - // Returns true iff we have matched at least one terminal punctuation - // character. - bool GotTerminalPunc() const { return first_terminal_punc_index_ >= 0; } - - // Field accessors. - int first_terminal_punc_index() const { return first_terminal_punc_index_; } - int first_close_punc_index() const { return first_close_punc_index_; } - int limit_index() const { return limit_index_; } - - private: - // Match state. - enum MatchState { - INITIAL_STATE = 0, - COLLECTING_TERMINAL_PUNC, - COLLECTING_CLOSE_PUNC - }; - MatchState state_ = INITIAL_STATE; - - // First terminal punctuation mark matched; may be an acronym. - // -1 for not found. - int first_terminal_punc_index_ = -1; - - // First closing punctuation mark matched. -1 for not found. - int first_close_punc_index_ = -1; - - // First token after the terminal sequence. - int limit_index_ = -1; -}; - -Status SentenceFragmenter::FindFragments( - std::vector<SentenceFragment>* result) { - // Partition tokens into sentence fragments. - for (int i_start = 0; i_start < document_->tokens().size();) { - SentenceFragment fragment; - - // Match regexp for fragment boundary. - FragmentBoundaryMatch match; - TF_RETURN_IF_ERROR(FindNextFragmentBoundary(i_start, &match)); - - // Update 'latest_open_paren_is_sentential_' for the tokens in this - // fragment. - TF_RETURN_IF_ERROR( - UpdateLatestOpenParenForFragment(i_start, match.limit_index())); - - // Add a new sentence fragment up to this boundary. - TF_RETURN_IF_ERROR(FillInFragmentFields(i_start, match, &fragment)); - - result->push_back(std::move(fragment)); - i_start = match.limit_index(); - } - return Status::OK(); -} - -// This method is essentially a control layer on top of a simple state machine -// that matches an end-of-fragment regexp. This method finds the next token to -// feed to the state machine, and handles embedded whitespace. The main -// complexity is that a space may delimit end-of-match, or be embedded in the -// termination sequence. When we encounter a space, we record the match found so -// far, but also continue matching. We return the longer match if it succeeds, -// else fall back to the earlier one. Note that the lookahead can incur at most -// 2n cost. -// -// E.g., suppose we're given: x? !!!y. We encounter the space after "x?" and -// have to look ahead all the way to "y" before realizing that the longer match -// fails. We put a fragment boundary after "x?", and next time around, we again -// scan "!!!" looking for a fragment boundary. Since we failed to find one last -// time, we'll fail again this time and therefore continue past "y" to find the -// next boundary. We will not try to scan "!!!" a third time. -Status SentenceFragmenter::FindNextFragmentBoundary( - int i_start, - SentenceFragmenter::FragmentBoundaryMatch* result) const { - FragmentBoundaryMatch current_match; - FragmentBoundaryMatch previous_match; - - for (int i = i_start; i < static_cast<int>(document_->tokens().size()); ++i) { - const auto& token = document_->tokens()[i]; - if (current_match.GotTerminalPunc() && i > i_start && - token.break_level() >= Token::SPACE_BREAK) { - // Got terminal punctuation and a space delimiter, so match is valid. - bool space_allowed_before_token = false; - TF_RETURN_IF_ERROR( - SpaceAllowedBeforeToken(util_, token, &space_allowed_before_token)); - if (space_allowed_before_token) { - // Remember this match. Try to extend it. - previous_match = current_match; - } else { - // Stop here. We're not allowed to extend the match in this case. - break; - } - } - bool got_transition = false; - TF_RETURN_IF_ERROR( - current_match.Advance(util_, *document_, i, &got_transition)); - if (!got_transition) { - if (previous_match.GotTerminalPunc()) { - // Extension failed. Return previous match. - *result = previous_match; - return Status::OK(); - } else { - // Start matching again from scratch. - current_match.Reset(); - - // Reprocess current token since it might be terminal punctuation. No - // infinite loop, because can't be "no transition" from INITIAL_STATE. - --i; - } - } - } - *result = current_match; - return Status::OK(); -} - -// Keep track of whether the latest open parenthesis seen so far appears to be -// sentence-initial. This is useful because if it is *non-sentence-initial*, -// then any terminal punctuation before the corresponding close paren is -// probably not a sentence boundary. Example: -// -// Mushrooms (they're fungi!!) are delicious. -// (Mushrooms are fungi!!) -// -// In the first case, the open paren is non-sentence-initial, and therefore -// the "!!)" is not a sentence boundary. In the second case, the open paren *is* -// sentence-initial, and so the "!!)" is a sentence boundary. -// -// Of course, we don't know true sentence boundaries, so we make the -// approximation that an open paren is sentence-initial iff it is -// fragment-initial. This will be wrong if the open paren occurs after terminal -// punctuation that turns out not to be a sentence boundary, e.g., -// "Yahoo! (known for search, etc.) blah", but this is not expected to happen -// often. -Status SentenceFragmenter::UpdateLatestOpenParenForFragment(int i_start, - int i_end) { - for (int i = i_end; i > i_start; --i) { - const auto& token = document_->tokens()[i - 1]; - bool is_open_paren = false; - TF_RETURN_IF_ERROR(util_->IsOpenParen(token.word(), &is_open_paren)); - if (is_open_paren) { - // Make the approximation that this open paren is sentence-initial iff it - // is fragment-initial. - latest_open_paren_is_sentential_ = (i - 1 == i_start); - break; - } - } - - return Status::OK(); -} - -Status SentenceFragmenter::FillInFragmentFields( - int i_start, - const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const { - // Set the fragment's boundaries. - fragment->start = i_start; - fragment->limit = match.limit_index(); - - // Set the fragment's properties. - if (match.GotTerminalPunc()) { - // TERMINAL_PUNC. - SetFragmentProperty(SentenceFragment::TERMINAL_PUNC, fragment); - int terminal_punc_index = -1; - TF_RETURN_IF_ERROR( - GetAdjustedFirstTerminalPuncIndex(match, &terminal_punc_index)); - bool has_unattachable_terminal_punc = false; - TF_RETURN_IF_ERROR( - HasUnattachableTerminalPunc(match, &has_unattachable_terminal_punc)); - bool has_close_paren = false; - TF_RETURN_IF_ERROR(HasCloseParen(match, &has_close_paren)); - - fragment->terminal_punc_token = terminal_punc_index; - // MULTIPLE_TERMINAL_PUNC. - if (has_unattachable_terminal_punc) { - SetFragmentProperty(SentenceFragment::MULTIPLE_TERMINAL_PUNC, fragment); - } - - // HAS_CLOSE_PAREN & HAS_SENTENTIAL_CLOSE_PAREN. - if (has_close_paren) { - SetFragmentProperty(SentenceFragment::HAS_CLOSE_PAREN, fragment); - - if (latest_open_paren_is_sentential_) { - SetFragmentProperty(SentenceFragment::HAS_SENTENTIAL_CLOSE_PAREN, - fragment); - } - } - } - - return Status::OK(); -} - -// The standard first terminal punctuation index is just -// match.first_terminal_punc_index(). But if there is an ambiguous terminal -// punctuation mark (ellipsis) followed by an unambiguous one (.!?), then we -// treat the ellipsis as part of the sentence, and return the index of the first -// unambiguous punctuation mark after it. Example: -// -// He agreed...! -// -// We treat "!" as the first terminal punctuation mark; the ellipsis acts as -// left context. -Status SentenceFragmenter::GetAdjustedFirstTerminalPuncIndex( - const FragmentBoundaryMatch& match, - int* result) const { - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); - if (i1 < 0) { - *result = i1; - return Status::OK(); - } - int i2 = match.first_close_punc_index(); - - for (int i = i2; i > i1; --i) { - const auto& token = document_->tokens()[i - 1]; - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util_->IsEllipsis(token.word(), &is_ellipsis)); - if (is_ellipsis || TokenHasProperty(Token::EMOTICON, token)) { - if (i == i2) { - // Ellipsis is last terminal punctuation mark. No adjustment. - *result = i1; - return Status::OK(); - } else { - // Ellipsis is not the last terminal punctuation mark. Return the index - // of the terminal punctuation mark after it. - *result = i; // current token = i - 1 - return Status::OK(); - } - } - } - - // No ellipsis. - *result = i1; - return Status::OK(); -} - -// Example of an an "unattachable" terminal punctuation mark: -// -// He agreed!? -// -// The "?" is "unattachable" in that it can't be part of the word "agreed" -// because of the intervening "!", and therefore strongly suggests this is a -// true sentence boundary. The terminal punctuation mark must be unambiguous -// (.!?), as ambiguous ones (ellipsis/emoticon) do not necessarily imply a -// sentence boundary. -Status SentenceFragmenter::HasUnattachableTerminalPunc( - const FragmentBoundaryMatch& match, - bool* result) const { - *result = false; - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); - if (i1 < 0) { - *result = false; - return Status::OK(); - } - int i2 = match.first_close_punc_index(); - - // Iterate over the second and later punctuation marks. - for (int i = i1 + 1; i < i2; ++i) { - const auto& token = document_->tokens()[i]; - bool is_punctuation = false; - TF_RETURN_IF_ERROR(util_->IsPunctuationWord(token.word(), &is_punctuation)); - bool is_ellipsis = false; - TF_RETURN_IF_ERROR(util_->IsEllipsis(token.word(), &is_ellipsis)); - if (is_punctuation && !is_ellipsis && - !TokenHasProperty(Token::EMOTICON, token)) { - // Found an unattachable, unambiguous terminal punctuation mark. - *result = true; - return Status::OK(); - } - } - - *result = false; - return Status::OK(); -} - -Status SentenceFragmenter::HasCloseParen(const FragmentBoundaryMatch& match, - bool* result) const { - *result = false; - // Get close punctuation span. - int i1 = match.first_close_punc_index(); - if (i1 < 0) { - *result = false; - return Status::OK(); - } - int i2 = match.limit_index(); - - for (int i = i1; i < i2; ++i) { - const auto& token = document_->tokens()[i]; - bool is_close_paren = false; - TF_RETURN_IF_ERROR(util_->IsCloseParen(token.word(), &is_close_paren)); - if (is_close_paren) { - *result = true; - return Status::OK(); - } - } - *result = false; - return Status::OK(); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h deleted file mode 100644 index 88b8198..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter.h +++ /dev/null
@@ -1,232 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// A class to split up a document into sentence fragments. A sentence -// fragment is a token sequence whose end is potentially an end-of-sentence. -// -// Example: -// -// Document text: -// John said, "I.B.M. went up 5 points today." -// -// SentenceFragments: -// (1) John said, "I.B.M. -// (2) went up 5 points today." -// -// Fragment boundaries are induced by punctuation and paragraph breaks. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_H_ - -#include <string> -#include <vector> - -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow_text/core/kernels/sentence_breaking_utils.h" - -namespace tensorflow { -namespace text { - -class Token { - public: - enum BreakLevel { - NO_BREAK = 0, // No separation between tokens. - SPACE_BREAK = 1, // Tokens separated by space. - LINE_BREAK = 2, // Tokens separated by line break. - SENTENCE_BREAK = 3, // Tokens separated by sentence break. - PARAGRAPH_BREAK = 4, // Tokens separated by paragraph break. - SECTION_BREAK = 10, // Tokens separated by section break. - CHAPTER_BREAK = 20, // Tokens separated by chapter break. - }; - - // Bitmask for properties of the token text. - enum TextProperty { - NONE = 0x00, - - // Token is ill-formed if: - // - // All tokens in a paragraph are marked as ill-formed if it has too few - // non-punctuation tokens in a paragraph (currently, a heading must have - // at least 2 tokens, and a non-heading must have at least 8). - // - // All tokens in a paragraph are marked as ill-formed if it lacks terminal - // sentence ending punctuation(e.g.: . ! ? …) or an emoticon (e.g.: ':)', - // ':D'). - // Exception: If a paragraph ends in an introductory punctuation - // character (','':' ';'), we say that it is an introductory paragraph. - // If it is followed by a "simple" HTML list (one whose list items have - // no substructure, such as embedded tables), then we keep both the - // introductory paragraph and the entire list. If not, we keep the - // introductory paragraph if it is followed by a well-formed paragraph. - // - // All tokens in a paragraph are marked as ill-formed if it contains the - // copyright sign (C in a circle) as this usually indicates a copyright - // notice, and is therefore effectively boilerplate. - ILL_FORMED = 0x01, - - // Indicates that the token is a part of the page title (<title> tag) or - // a heading (<hN> tag). - TITLE = 0x40, - HEADING = 0x02, - - // Text style. Determined from HTML tags only (<b>, etc), not from CSS. - BOLD = 0x04, - ITALIC = 0x08, - UNDERLINED = 0x10, - - // Indicates that the token is a part of a list. Currently set only for - // "simple" HTML lists (have no embedded paragraph boundaries) that are - // preceded by an introductory paragraph (ends in colon or a few other - // characters). - LIST = 0x20, - - // Token is an emoticon. - EMOTICON = 0x80, - - // Token was identified by Lexer as an acronym. Lexer identifies period-, - // hyphen-, and space-separated acronyms: "U.S.", "U-S", and "U S". - // Lexer normalizes all three to "US", but the token.word field - // normalizes only space-separated acronyms. - ACRONYM = 0x100, - - // Indicates that the token (or part of the token) is a covered by at - // least one hyperlink. More information of the hyperlink is stored in the - // first token covered by the hyperlink. - HYPERLINK = 0x200, - }; - - Token(const tstring& word, - uint32 start, - uint32 end, - BreakLevel break_level, - TextProperty text_properties) - : word_(word), - start_(start), - end_(end), - break_level_(break_level), - text_properties_(text_properties) {} - - const tstring& word() const { return word_; } - const uint32 start() const { return start_; } - const uint32 end() const { return end_; } - const BreakLevel break_level() const { return break_level_; } - const TextProperty text_properties() const { return text_properties_; } - - private: - const tstring& word_; - uint32 start_; - uint32 end_; - BreakLevel break_level_; - TextProperty text_properties_; -}; - -class Document { - public: - // Does NOT take ownership of 'tokens'. - Document(std::vector<Token>* tokens) : tokens_(tokens) {} - - void AddToken(const tstring& word, - uint32 start, - uint32 end, - Token::BreakLevel break_level, - Token::TextProperty text_properties) { - tokens_->emplace_back(word, start, end, break_level, text_properties); - } - - const std::vector<Token>& tokens() const { return *tokens_; } - - private: - // not owned - std::vector<Token>* tokens_; -}; - -struct SentenceFragment { - int start; - int limit; - - enum Property { - TERMINAL_PUNC = 0x0001, // ends with terminal punctuation - MULTIPLE_TERMINAL_PUNC = 0x0002, // e.g.: She said what?! - HAS_CLOSE_PAREN = 0x0004, // e.g.: Mushrooms (they're fungi!!) - HAS_SENTENTIAL_CLOSE_PAREN = 0x0008, // e.g.: (Mushrooms are fungi!) - }; - // A mask of the above listed properties. - uint32 properties = 0; - int terminal_punc_token = -1; -}; - -// Utility class for splitting documents into a list of sentence fragments. -class SentenceFragmenter { - public: - // Constructs a fragmenter to process a specific part of a document. - SentenceFragmenter(const Document* document, UnicodeUtil* util) - : document_(document), util_(util) {} - - // Finds sentence fragments in the [start_, limit_) range of the associated - // document. - ::tensorflow::Status FindFragments(std::vector<SentenceFragment>* result); - - private: - // State for matching a fragment-boundary regexp against a token sequence. - // The regexp is: terminal_punc+ close_punc*. - class FragmentBoundaryMatch; - - // Matches a fragment-boundary regexp against the tokens starting at - // 'i_start'. Returns the longest match found; will be non-empty as long as - // 'i_start' was not already at the end of the associated token range. - ::tensorflow::Status FindNextFragmentBoundary( - int i_start, - FragmentBoundaryMatch* result) const; - - // Updates 'latest_open_paren_is_sentential_' for the tokens in the given - // fragment. - ::tensorflow::Status UpdateLatestOpenParenForFragment(int i_start, int i_end); - - // Populates a sentence fragment with the tokens from 'i_start' to the end - // of the given FragmentBoundaryMatch. - ::tensorflow::Status FillInFragmentFields(int i_start, - const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const; - - // Returns the adjusted first terminal punctuation index in a - // FragmentBoundaryMatch. - ::tensorflow::Status GetAdjustedFirstTerminalPuncIndex( - const FragmentBoundaryMatch& match, - int* result) const; - - // Returns true iff a FragmentBoundaryMatch has an "unattachable" terminal - // punctuation mark. - ::tensorflow::Status HasUnattachableTerminalPunc( - const FragmentBoundaryMatch& match, - bool* result) const; - - // Returns true iff a FragmentBoundaryMatch has a close paren in its closing - // punctuation. - ::tensorflow::Status HasCloseParen(const FragmentBoundaryMatch& match, - bool* result) const; - - // Whether the latest open paren seen so far appears to be sentence-initial. - // See UpdateLatestOpenParenForFragment() in the .cc file for details. - bool latest_open_paren_is_sentential_ = false; - - const Document* document_ = nullptr; // not owned - UnicodeUtil* util_ = nullptr; // not owned - - // TODO(thuang513): DISALLOW_COPY_AND_ASSIGN(SentenceFragmenter); -}; - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc deleted file mode 100644 index 6c6786d8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.cc +++ /dev/null
@@ -1,708 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" - -#include <string> - -#include "absl/strings/match.h" -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -void ConsumeOneUChar(const absl::string_view& input, - UChar32* result, - int* offset) { - const char* source = input.data(); - - int input_length = input.length(); - U8_NEXT_OR_FFFD(source, *offset, input_length, *result); -} - -bool IsTerminalPunc(const absl::string_view& input, int* offset) { - *offset = 0; - bool is_ellipsis = IsEllipsis(input, offset); - if (is_ellipsis) - return true; - - *offset = 0; - UChar32 char_value; - ConsumeOneUChar(input, &char_value, offset); - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case 0x055C: // Armenian exclamation mark - case 0x055E: // Armenian question mark - case 0x17d4: // Khmer sign khan - case 0x037E: // Greek question mark - case 0x2026: // ellipsis - return true; - } - - USentenceBreak sb_property = static_cast<USentenceBreak>( - u_getIntPropertyValue(char_value, UCHAR_SENTENCE_BREAK)); - return sb_property == U_SB_ATERM || sb_property == U_SB_STERM; -} - -bool IsClosePunc(const absl::string_view& input, int* offset) { - *offset = 0; - - if (absl::StartsWith(input, "''")) { - *offset += absl::string_view("''").length(); - return true; - } - - UChar32 char_value; - ConsumeOneUChar(input, &char_value, offset); - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '>': - case ']': - case '`': - case 64831: // Ornate right parenthesis - case 65282: // fullwidth quotation mark - case 65287: // fullwidth apostrophe - return true; - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - - return lb_property == U_LB_CLOSE_PUNCTUATION || - lb_property == U_LB_CLOSE_PARENTHESIS || lb_property == U_LB_QUOTATION; -} - -bool IsOpenParen(const absl::string_view& input) { - int offset = 0; - UChar32 char_value; - ConsumeOneUChar(input, &char_value, &offset); - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '<': - case 64830: // Ornate left parenthesis - return true; - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - return lb_property == U_LB_OPEN_PUNCTUATION; -} - -bool IsCloseParen(const absl::string_view& input) { - int offset = 0; - - UChar32 char_value; - ConsumeOneUChar(input, &char_value, &offset); - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '>': - case 64831: // Ornate right parenthesis - return true; - } - - ULineBreak lb_property = static_cast<ULineBreak>( - u_getIntPropertyValue(char_value, UCHAR_LINE_BREAK)); - return lb_property == U_LB_CLOSE_PUNCTUATION || - lb_property == U_LB_CLOSE_PARENTHESIS; -} - -bool IsPunctuationWord(const absl::string_view& input) { - int offset = 0; - UChar32 char_value; - ConsumeOneUChar(input, &char_value, &offset); - - // These are unicode characters that should be considered in this category but - // are not covered by any of the ICU properties. - switch (char_value) { - case '`': - case '<': - case '>': - case '~': - case 5741: - return true; - } - - return u_ispunct(char_value) || u_hasBinaryProperty(char_value, UCHAR_DASH) || - u_hasBinaryProperty(char_value, UCHAR_HYPHEN); -} - -bool IsEllipsis(const absl::string_view& input, int* offset) { - *offset = 0; - if (absl::StartsWith(input, "...")) { - *offset += absl::string_view("...").length(); - return true; - } - - const UChar32 kEllipsisCharValue = 0x2026; - UChar32 char_value; - ConsumeOneUChar(input, &char_value, offset); - - return char_value == kEllipsisCharValue; -} - -inline bool IsAcronymComponent(const absl::string_view& input, int index) { - return (input.data()[index] >= 'A' && input.data()[index] <= 'Z') && - input.data()[index + 1] == '.'; -} - -bool IsPeriodSeparatedAcronym(const absl::string_view& input, int* offset) { - bool result = false; - string data = input.data(); - for (int i = 0; i < static_cast<int>(input.length()) - 1; i += 2) { - if (IsAcronymComponent(input, i)) { - *offset = i + 2; - if (*offset > 2) { - result = true; - } - } else { - break; - } - } - return result; -} - -bool IsEmoticon(const absl::string_view& input, int* offset) { - *offset = 0; - static std::vector<std::string> emoticon_list = {":(:)", - ":)", - ":(", - ":o)", - ":]", - ":3", - ":>", - "=]", - "=)", - ":}", - ":^)", - ":-D", - ":-)))))", - ":-))))", - ":-)))", - ":-))", - ":-)", - ">:[", - ":-(", - ":(", - ":-c", - ":c", - ":-<", - ":<", - ":-[", - ":[", - ":{", - ";(", - ":-||", - ":@", - ">:(", - ":'-(", - ":'(", - ":'-)", - ":')", - "D:<", - ">:O", - ":-O", - ":-o", - ":*", - ":-*", - ":^*", - ";-)", - ";)", - "*-)", - "*)", - ";-]", - ";]", - ";^)", - ":-,", - ">:P", - ":-P", - ":p", - "=p", - ":-p", - "=p", - ":P", - "=P", - ";p", - ";-p", - ";P", - ";-P", - ">:\\", - ">:/", - ":-/", - ":-.", - ":/", - ":\\", - "=/", - "=\\", - ":|", - ":-|", - ":$", - ":-#", - ":#", - "O:-)", - "0:-)", - "0:)", - "0;^)", - ">:)", - ">;)", - ">:-)", - "}:-)", - "}:)", - "3:-)", - ">_>^", - "^<_<", - "|;-)", - "|-O", - ":-J", - ":-&", - ":&", - "#-)", - "<3", - "8-)", - "^_^", - ":D", - ":-D", - "=D", - "^_^;;", - "O=)", - "}=)", - "B)", - "B-)", - "=|", - "-_-", - "o_o;", - "u_u", - ":-\\", - ":s", - ":S", - ":-s", - ":-S", - ";*", - ";-*" - "=(", - ">.<", - ">:-(", - ">:(", - ">=(", - ";_;", - "T_T", - "='(", - ">_<", - "D:", - ":o", - ":-o", - "=o", - "o.o", - ":O", - ":-O", - "=O", - "O.O", - "x_x", - "X-(", - "X(", - "X-o", - "X-O", - ":X)", - "(=^.^=)", - "(=^..^=)", - "=^_^=", - "-<@%", - ":(|)", - "(]:{", - "<\\3", - "~@~", - "8'(", - "XD", - "DX"}; - - for (int i = 0; i < static_cast<int>(emoticon_list.size()); ++i) { - if (absl::StartsWith(input, emoticon_list[i])) { - *offset = emoticon_list[i].length(); - return true; - } - } - return false; -} - -// Returns true iff the punctuation input can appear after a space in a -// sentence-terminal punctuation sequence. -bool SpaceAllowedBeforeChar(const absl::string_view& input) { - int offset = 0; - bool is_terminal_punc = IsTerminalPunc(input, &offset); - bool is_close_paren = IsCloseParen(input); - bool is_emoticon = IsEmoticon(input, &offset); - return is_terminal_punc || is_close_paren || is_emoticon; -} - -bool IsWhiteSpace(const absl::string_view& input) { - int offset = 0; - - if (absl::StartsWith(input, " ")) { - return true; - } else if (absl::StartsWith(input, "\n")) { - return true; - } else if (absl::StartsWith(input, " ")) { - return true; - } - - UChar32 char_value; - ConsumeOneUChar(input, &char_value, &offset); - - return u_isUWhiteSpace(char_value); -} - -// Follows the state transition for the slice at the given index. Returns true -// for success, or false if there was no valid transition. -bool FragmentBoundaryMatch::Advance(int index, absl::string_view slice) { - int temp_offset; - // By defualt offset is the next character. - int offset = 1; - bool no_transition = false; - bool is_terminal_punc = IsTerminalPunc(slice, &temp_offset); - if (is_terminal_punc) { - offset = temp_offset; - } - - bool is_ellipsis = IsEllipsis(slice, &temp_offset); - if (is_ellipsis) { - offset = temp_offset; - } - bool is_close_punc = IsClosePunc(slice, &temp_offset); - if (is_close_punc) { - offset = temp_offset; - } - bool is_acronym = IsPeriodSeparatedAcronym(slice, &temp_offset); - if (is_acronym) { - is_terminal_punc = false; - offset = temp_offset; - } - bool is_emoticon = IsEmoticon(slice, &temp_offset); - if (is_emoticon) { - is_terminal_punc = false; - offset = temp_offset; - } - - switch (state_) { - case INITIAL_STATE: - if (is_terminal_punc || is_acronym || is_emoticon) { - first_terminal_punc_index_ = index; - state_ = COLLECTING_TERMINAL_PUNC; - } - break; - case COLLECTING_TERMINAL_PUNC: - if (is_terminal_punc || is_emoticon) { - // Stay in COLLECTING_TERMINAL_PUNC state. - } else if (is_close_punc) { - first_close_punc_index_ = index; - state_ = COLLECTING_CLOSE_PUNC; - } else { - no_transition = true; - } - break; - case COLLECTING_CLOSE_PUNC: - if (is_close_punc || is_ellipsis || is_emoticon) { - // Stay in COLLECTING_CLOSE_PUNC state. We effectively ignore - // emoticons and ellipses and continue to accept closing punctuation - // after them. - } else { - no_transition = true; - } - break; - } - - if (no_transition) { - return false; - } else { - limit_index_ = index + offset; - if (state_ == COLLECTING_TERMINAL_PUNC) { - // We've gotten terminal punctuation, but no close punctuation yet. - first_close_punc_index_ = limit_index_; - } - return true; - } -} - -// Sets a property of a sentence fragment. -void SetFragmentProperty(SentenceFragment::Property property, - SentenceFragment* fragment) { - fragment->properties = fragment->properties | property; -} - -Status SentenceFragmenterV2::FindFragments( - std::vector<SentenceFragment>* result) { - // Partition document into sentence fragments. - for (int i_start = 0; i_start < static_cast<int>(document_.size());) { - bool is_white_space = IsWhiteSpace(document_.substr(i_start)); - if (is_white_space) { - ++i_start; - continue; - } - - SentenceFragment fragment; - - // Match regexp for fragment boundary. - FragmentBoundaryMatch match = FindNextFragmentBoundary(i_start); - - // Update 'latest_open_paren_is_sentential_' for this fragment. - UpdateLatestOpenParenForFragment(i_start, match.limit_index()); - - // Add a new sentence fragment up to this boundary. - FillInFragmentFields(i_start, match, &fragment); - - result->push_back(std::move(fragment)); - i_start = match.limit_index(); - } - return Status::OK(); -} - -// This method is essentially a control layer on top of a simple state machine -// that matches an end-of-fragment regexp. This method finds the next slice of -// text to feed to the state machine, and handles embedded whitespace. The main -// complexity is that a space may delimit end-of-match, or be embedded in the -// termination sequence. When we encounter a space, we record the match found so -// far, but also continue matching. We return the longer match if it succeeds, -// else fall back to the earlier one. Note that the lookahead can incur at most -// 2n cost. -// -// E.g., suppose we're given: x? !!!y. We encounter the space after "x?" and -// have to look ahead all the way to "y" before realizing that the longer match -// fails. We put a fragment boundary after "x?", and next time around, we again -// scan "!!!" looking for a fragment boundary. Since we failed to find one last -// time, we'll fail again this time and therefore continue past "y" to find the -// next boundary. We will not try to scan "!!!" a third time. - -FragmentBoundaryMatch SentenceFragmenterV2::FindNextFragmentBoundary( - int doc_index) const { - FragmentBoundaryMatch current_match; - FragmentBoundaryMatch previous_match; - - for (int i = doc_index; i < static_cast<int>(document_.size()); ++i) { - absl::string_view slice = document_.substr(i); - if (current_match.GotTerminalPunc() && i > doc_index) { - // Got terminal punctuation and a space delimiter, so match is valid. - bool space_allowed_before_char = SpaceAllowedBeforeChar(slice); - if (space_allowed_before_char) { - // Remember this match. Try to extend it. - previous_match = current_match; - } else { - // Stop here. We're not allowed to extend the match in this case. - break; - } - } - bool got_transition = current_match.Advance(i, slice); - if (!got_transition) { - if (previous_match.GotTerminalPunc()) { - // Extension failed. Return previous match. - return previous_match; - } else { - // Start matching again from scratch. - current_match.Reset(); - - // Reprocess current character since it might be terminal punctuation. - // No infinite loop, because can't be "no transition" from - // INITIAL_STATE. - --i; - } - } else { - i = current_match.limit_index() - 1; - } - } - return current_match; -} - -// Keep track of whether the latest open parenthesis seen so far appears to be -// sentence-initial. This is useful because if it is *non-sentence-initial*, -// then any terminal punctuation before the corresponding close paren is -// probably not a sentence boundary. Example: -// -// Mushrooms (they're fungi!!) are delicious. -// (Mushrooms are fungi!!) -// -// In the first case, the open paren is non-sentence-initial, and therefore -// the "!!)" is not a sentence boundary. In the second case, the open paren *is* -// sentence-initial, and so the "!!)" is a sentence boundary. -// -// Of course, we don't know true sentence boundaries, so we make the -// approximation that an open paren is sentence-initial iff it is -// fragment-initial. This will be wrong if the open paren occurs after terminal -// punctuation that turns out not to be a sentence boundary, e.g., -// "Yahoo! (known for search, etc.) blah", but this is not expected to happen -// often. -void SentenceFragmenterV2::UpdateLatestOpenParenForFragment(int i_start, - int i_end) { - for (int i = i_end; i > i_start; --i) { - absl::string_view slice = document_.substr(i); - if (IsOpenParen(slice)) { - // Make the approximation that this open paren is sentence-initial iff it - // is fragment-initial. - latest_open_paren_is_sentential_ = (i == i_start); - break; - } - } -} - -void SentenceFragmenterV2::FillInFragmentFields( - int i_start, - const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const { - // Set the fragment's boundaries. - fragment->start = i_start; - fragment->limit = match.limit_index(); - - // Set the fragment's properties. - if (match.GotTerminalPunc()) { - // TERMINAL_PUNC. - SetFragmentProperty(SentenceFragment::TERMINAL_PUNC, fragment); - int terminal_punc_index = GetAdjustedFirstTerminalPuncIndex(match); - - bool has_unattachable_terminal_punc = HasUnattachableTerminalPunc(match); - bool has_close_paren = HasCloseParen(match); - - fragment->terminal_punc_token = terminal_punc_index; - // MULTIPLE_TERMINAL_PUNC. - if (has_unattachable_terminal_punc) { - SetFragmentProperty(SentenceFragment::MULTIPLE_TERMINAL_PUNC, fragment); - } - - // HAS_CLOSE_PAREN & HAS_SENTENTIAL_CLOSE_PAREN. - if (has_close_paren) { - SetFragmentProperty(SentenceFragment::HAS_CLOSE_PAREN, fragment); - - if (latest_open_paren_is_sentential_) { - SetFragmentProperty(SentenceFragment::HAS_SENTENTIAL_CLOSE_PAREN, - fragment); - } - } - } -} - -// The standard first terminal punctuation index is just -// match.first_terminal_punc_index(). But if there is an ambiguous terminal -// punctuation mark (ellipsis) followed by an unambiguous one (.!?), then we -// treat the ellipsis as part of the sentence, and return the index of the first -// unambiguous punctuation mark after it. Example: -// -// He agreed...! -// -// We treat "!" as the first terminal punctuation mark; the ellipsis acts as -// left context. -int SentenceFragmenterV2::GetAdjustedFirstTerminalPuncIndex( - const FragmentBoundaryMatch& match) const { - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); - if (i1 < 0) { - return i1; - } - int i2 = match.first_close_punc_index(); - - for (int i = i2; i > i1; --i) { - absl::string_view slice = document_.substr(i); - int temp_offset = 0; - bool is_ellipsis = IsEllipsis(slice, &temp_offset); - bool is_emoticon = IsEmoticon(slice, &temp_offset); - if (is_ellipsis || is_emoticon) { - if (i == i2) { - // Ellipsis is last terminal punctuation mark. No adjustment. - return i1; - } else { - // Ellipsis is not the last terminal punctuation mark. Return the index - // of the terminal punctuation mark after it. - return i; // current character = i - 1 - } - } - } - // No ellipsis. - return i1; -} - -// Example of an an "unattachable" terminal punctuation mark: -// -// He agreed!? -// -// The "?" is "unattachable" in that it can't be part of the word "agreed" -// because of the intervening "!", and therefore strongly suggests this is a -// true sentence boundary. The terminal punctuation mark must be unambiguous -// (.!?), as ambiguous ones (ellipsis/emoticon) do not necessarily imply a -// sentence boundary. -bool SentenceFragmenterV2::HasUnattachableTerminalPunc( - const FragmentBoundaryMatch& match) const { - // Get terminal punctuation span. - int i1 = match.first_terminal_punc_index(); - if (i1 < 0) { - return false; - } - // Check where second and later punctuation marks start - absl::string_view start_slice = document_.substr(i1); - int temp_offset = 0; - bool is_ellipsis = IsEllipsis(start_slice, &temp_offset); - if (is_ellipsis) { - i1 += temp_offset - 1; - } - bool is_emoticon = IsEmoticon(start_slice, &temp_offset); - if (is_emoticon) { - i1 += temp_offset - 1; - } - - int i2 = match.first_close_punc_index(); - - // Iterate over the second and later punctuation marks. - for (int i = i1 + 1; i < i2; ++i) { - absl::string_view slice = document_.substr(i); - bool is_punctuation = IsPunctuationWord(slice); - is_ellipsis = IsEllipsis(slice, &temp_offset); - if (is_ellipsis) { - i += temp_offset - 1; - } - is_emoticon = IsEmoticon(slice, &temp_offset); - if (is_emoticon) { - i += temp_offset - 1; - } - if (is_punctuation && !is_ellipsis && !is_emoticon) { - // Found an unattachable, unambiguous terminal punctuation mark. - return true; - } - } - return false; -} - -bool SentenceFragmenterV2::HasCloseParen( - const FragmentBoundaryMatch& match) const { - // Get close punctuation span. - int i1 = match.first_close_punc_index(); - if (i1 < 0) { - return false; - } - int i2 = match.limit_index(); - - for (int i = i1; i < i2; ++i) { - absl::string_view slice = document_.substr(i); - if (IsCloseParen(slice)) { - return true; - } - } - return false; -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h deleted file mode 100644 index 94903e3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2.h +++ /dev/null
@@ -1,201 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Updated version of sentence fragmenter and util functions to split up a -// document into sentence fragments. A sentence fragment is a string whose end -// is potentially an end-of-sentence. The original version of -// sentence_fragmenter operates on tokens and defines the start and end of -// fragments using token indices, while sentence_fragmenter_v2 operates on a -// string_view sliding window of the text and defines the start and end of a -// fragment based on the character offset. -// -// Example: -// -// Document text: -// John said, "I.B.M. went up 5 points today." -// -// SentenceFragments: -// (1) John said, "I.B.M. -// (2) went up 5 points today." -// -// Fragment boundaries are induced by punctuation and paragraph breaks. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_V2_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_V2_H_ - -#include <vector> - -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/utypes.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -// A class of utils for identifying certain classes and properties of unicode -// characters. These utils are included in the header for use in tests. - -// Returns true iff a string is terminal punctuation. -bool IsTerminalPunc(const absl::string_view& input, int* offset); - -// Returns true iff a string is close punctuation (close quote or close -// paren). -bool IsClosePunc(const absl::string_view& input, int* offset); - -// Returns true iff a string is an open paren. -bool IsOpenParen(const absl::string_view& input); - -// Returns true iff a string is a close paren. -bool IsCloseParen(const absl::string_view& input); - -// Returns true iff a word is made of punctuation characters only. -bool IsPunctuationWord(const absl::string_view& input); - -// Returns true iff a string is an ellipsis ("..."). -bool IsEllipsis(const absl::string_view& input, int* offset); - -// Returns true iff a string is a period separated acronym (ex: "A.B.C."). -bool IsPeriodSeparatedAcronym(const absl::string_view& input, int* offset); - -// Returns true iff a string is an emoticon (ex: ":-)"). -bool IsEmoticon(const absl::string_view& input, int* offset); - -bool SpaceAllowedBeforeChar(const absl::string_view& input); - -void ConsumeOneUChar(const absl::string_view& input, - UChar32* result, - int* offset); - -// Returns true iff a string is white space. -bool IsWhiteSpace(const absl::string_view& input); - -class FragmentBoundaryMatch { - public: - FragmentBoundaryMatch() {} - - // Goes to initial state. - void Reset() { - state_ = INITIAL_STATE; - first_terminal_punc_index_ = -1; - first_close_punc_index_ = -1; - limit_index_ = -1; - } - - // Follows the state transition for the slice at - // the given index. Returns true for success, or - // false if there was no valid transition. - bool Advance(int index, absl::string_view slice); - - // Returns true iff we have matched at least one terminal punctuation - // character. - bool GotTerminalPunc() const { return first_terminal_punc_index_ >= 0; } - - // Field accessors. - int first_terminal_punc_index() const { return first_terminal_punc_index_; } - int first_close_punc_index() const { return first_close_punc_index_; } - int limit_index() const { return limit_index_; } - - // Match state. - enum MatchState { - INITIAL_STATE = 0, - COLLECTING_TERMINAL_PUNC, - COLLECTING_CLOSE_PUNC - }; - - MatchState state() const { return state_; } - - private: - MatchState state_ = INITIAL_STATE; - - // First terminal punctuation mark matched; may be an acronym. - // -1 for not found. - int first_terminal_punc_index_ = -1; - - // First closing punctuation mark matched. -1 for not found. - int first_close_punc_index_ = -1; - - // First character after the terminal sequence. - int limit_index_ = -1; -}; - -struct SentenceFragment { - int start; - int limit; - - enum Property { - TERMINAL_PUNC = 0x0001, // ends with terminal punctuation - MULTIPLE_TERMINAL_PUNC = 0x0002, // e.g.: She said what?! - HAS_CLOSE_PAREN = 0x0004, // e.g.: Mushrooms (they're fungi!!) - HAS_SENTENTIAL_CLOSE_PAREN = 0x0008, // e.g.: (Mushrooms are fungi!) - }; - // A mask of the above listed properties. - uint32 properties = 0; - int terminal_punc_token = -1; -}; - -// Utility class for splitting documents into a list of sentence fragments. -class SentenceFragmenterV2 { - public: - // Constructs a fragmenter to process a specific part of a document. - SentenceFragmenterV2(absl::string_view document) : document_(document) {} - - // Finds sentence fragments in the [start_, limit_) range of the associated - // document. - ::tensorflow::Status FindFragments(std::vector<SentenceFragment>* result); - - private: - // State for matching a fragment-boundary regexp against a character sequence. - // The regexp is: terminal_punc+ close_punc*. - - // Matches a fragment-boundary regexp against a slice of the document starting - // at 'doc_index'. Returns the longest match found; will be non-empty as long - // as 'doc_index' was not already at the end of the associated document. - FragmentBoundaryMatch FindNextFragmentBoundary(int doc_index) const; - - // Updates 'latest_open_paren_is_sentential_' for the given - // fragment. - void UpdateLatestOpenParenForFragment(int i_start, int i_end); - - // Populates a sentence fragment with the text from 'i_start' to the end - // of the given FragmentBoundaryMatch. - void FillInFragmentFields(int i_start, - const FragmentBoundaryMatch& match, - SentenceFragment* fragment) const; - - // Returns the adjusted first terminal punctuation index in a - // FragmentBoundaryMatch. - int GetAdjustedFirstTerminalPuncIndex( - const FragmentBoundaryMatch& match) const; - - // Returns true iff a FragmentBoundaryMatch has an "unattachable" terminal - // punctuation mark. - bool HasUnattachableTerminalPunc(const FragmentBoundaryMatch& match) const; - - // Returns true iff a FragmentBoundaryMatch has a close paren in its closing - // punctuation. - bool HasCloseParen(const FragmentBoundaryMatch& match) const; - - // Whether the latest open paren seen so far appears to be sentence-initial. - // See UpdateLatestOpenParenForFragment() in the .cc file for details. - bool latest_open_paren_is_sentential_ = false; - - absl::string_view document_ = nullptr; // not owned - - // TODO(thuang513): DISALLOW_COPY_AND_ASSIGN(SentenceFragmenter); -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_SENTENCE_FRAGMENTER_V2_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc deleted file mode 100644 index 32f45ea..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentence_fragmenter_v2_test.cc +++ /dev/null
@@ -1,1098 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/sentence_fragmenter_v2.h" - -#include <string> -#include <vector> - -#include <gtest/gtest.h> -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/unistr.h" - -namespace tensorflow { -namespace text { -namespace { - -class SentenceBreakingUtilsParamTest : public ::testing::TestWithParam<UChar> { - protected: - std::string StringFromUnicodeChar(UChar32 input) { - std::string result; - icu::UnicodeString test_unicode_string(input); - test_unicode_string.toUTF8String(result); - return result; - } -}; - -class SentenceBreakingUtilsStringParamTest - : public ::testing::TestWithParam<const char*> {}; - -class IsTerminalPuncParamTest : public SentenceBreakingUtilsParamTest {}; - -class IsTerminalPuncTest : public ::testing::Test {}; - -const UChar is_terminal_punc_test_cases[] = { - 0x055C, // Armenian exclamation mark - 0x055E, // Armenian question mark - 0x0589, // Armenian full stop - 0x061F, // Arabic question mark - 0x06D4, // Arabic full stop - 0x0700, // Syriabc end of paragraph - 0x0701, // Syriac supralinear full stop - 0x0702, // Syriac sublinear full stop - 0x1362, // Ethiopic full stop - 0x1367, // Ethiopic question mark - 0x1368, // Ethiopic paragraph separator - 0x104A, // Myanmar sign little section - 0x104B, // Myanmar sign section - 0x166E, // Canadian syllabics full stop - 0x17d4, // Khmer sign khan - 0x1803, // Mongolian full stop - 0x1809, // Mongolian Manchu full stop - 0x1944, // Limbu exclamation mark - 0x1945, // Limbu question mark - 0x203C, // double exclamation mark - 0x203D, // interrobang - 0x2047, // double question mark - 0x2048, // question exclamation mark - 0x2049, // exclamation question mark - 0x3002, // ideographic full stop - 0x037E, // Greek question mark - 0xFE52, // small full stop - 0xFE56, // small question mark - 0xFE57, // small exclamation mark - 0xFF01, // fullwidth exclamation mark - 0xFF0E, // fullwidth full stop - 0xFF1F, // fullwidth question mark - 0xFF61, // halfwidth ideographic full stop - 0x2026, // ellipsis - 0x0964, - 0x0965, // Devanagari danda..Devanagari double -}; - -TEST_P(IsTerminalPuncParamTest, IsTerminalPunc) { - std::string test_string = StringFromUnicodeChar(GetParam()); - int offset; - EXPECT_TRUE(IsTerminalPunc(test_string, &offset)); -} - -INSTANTIATE_TEST_SUITE_P(IsTerminalPuncTest, - IsTerminalPuncParamTest, - ::testing::ValuesIn(is_terminal_punc_test_cases)); - -TEST_F(IsTerminalPuncTest, IsMultiCharEllipseTerminalPunc) { - std::string test_string = "..."; - int offset; - EXPECT_TRUE(IsTerminalPunc(test_string, &offset)); -} - -TEST_F(IsTerminalPuncTest, TestMultiUnicodeChars) { - std::string test_string = "never gonna let you decode"; - int offset; - EXPECT_FALSE(IsTerminalPunc(test_string, &offset)); -} - -struct ClosePuncOffsetPairs { - const UChar close_punc; - const int offset; -}; - -class SentenceBreakingUtilsClosePuncPairParamTest - : public ::testing::TestWithParam<ClosePuncOffsetPairs> { - protected: - std::string StringFromUnicodeChar(UChar32 input) { - std::string result; - icu::UnicodeString test_unicode_string(input); - test_unicode_string.toUTF8String(result); - return result; - } -}; - -class ClosePuncParamTest : public SentenceBreakingUtilsClosePuncPairParamTest { -}; - -const ClosePuncOffsetPairs close_punc_test_cases[] = { - {0x29, 1}, - {0x5D, 1}, - {0x3E, 1}, - {0x7D, 1}, - {0x207E, 3}, // superscript right parenthesis - {0x208E, 3}, // subscript right parenthesis - {0x27E7, 3}, // mathematical right white square bracket - {0x27E9, 3}, // mathematical right angle bracket - {0x27EB, 3}, // mathematical right double angle bracket - {0x2984, 3}, // right white curly bracket - {0x2986, 3}, // right white parenthesis - {0x2988, 3}, // Z notation right image bracket - {0x298A, 3}, // Z notation right binding bracket - {0x298C, 3}, // right square bracket with underbar - {0x298E, 3}, // right square bracket with tick in top corner - {0x2990, 3}, // right square bracket with tick in bottom corner - {0x2992, 3}, // right angle bracket with dot - {0x2994, 3}, // right arc greater-than bracket - {0x2996, 3}, // double right arc less-than bracket - {0x2998, 3}, // right black tortoise shell bracket - {0x29D9, 3}, // right wiggly fence - {0x29DB, 3}, // right double wiggly fence - {0x29FD, 3}, // right-pointing curved angle bracket - {0x3009, 3}, // CJK right angle bracket - {0x300B, 3}, // CJK right double angle bracket - {0x3011, 3}, // CJK right black lenticular bracket - {0x3015, 3}, // CJK right tortoise shell bracket - {0x3017, 3}, // CJK right white lenticular bracket - {0x3019, 3}, // CJK right white tortoise shell bracket - {0x301B, 3}, // CJK right white square bracket - {0xFD3F, 3}, // Ornate right parenthesis - {0xFE5A, 3}, // small right parenthesis - {0xFE5C, 3}, // small right curly bracket - {0xFF09, 3}, // fullwidth right parenthesis - {0xFF3D, 3}, // fullwidth right square bracket - {0xFF5D, 3}, // fullwidth right curly bracket - {0x27, 1}, - {0x60, 1}, - {0x22, 1}, - {0xFF07, 3}, // fullwidth apostrophe - {0xFF02, 3}, // fullwidth quotation mark - {0x2019, 3}, // right single quotation mark (English, others) - {0x201D, 3}, // right double quotation mark (English, others) - {0x2018, 3}, // left single quotation mark (Czech, German, Slovak) - {0x201C, 3}, // left double quotation mark (Czech, German, Slovak) - {0x203A, 3}, // single right-pointing angle quotation mark (French, others) - {0x00BB, 2}, // right-pointing double angle quotation mark (French, others) - {0x2039, 3}, // single left-pointing angle quotation mark (Slovenian, - // others) - {0x00AB, 2}, // left-pointing double angle quotation mark (Slovenian, - // others) - {0x300D, 3}, // right corner bracket (East Asian languages) - {0xfe42, 3}, // presentation form for vertical right corner bracket - {0xFF63, 3}, // halfwidth right corner bracket (East Asian languages) - {0x300F, 3}, // right white corner bracket (East Asian languages) - {0xfe44, 3}, // presentation form for vertical right white corner bracket - {0x301F, 3}, // low double prime quotation mark (East Asian languages) - {0x301E, 3} // close double prime (East Asian languages written - // horizontally) -}; - -TEST_P(ClosePuncParamTest, IsClosePunc) { - ClosePuncOffsetPairs test_punc = GetParam(); - std::string test_string = StringFromUnicodeChar(test_punc.close_punc); - int expected_offset = test_punc.offset; - int offset; - EXPECT_TRUE(IsClosePunc(test_string, &offset)); - EXPECT_EQ(offset, expected_offset); -} - -INSTANTIATE_TEST_SUITE_P(IsClosePuncParamTest, - ClosePuncParamTest, - ::testing::ValuesIn(close_punc_test_cases)); - -class OpenParenParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar open_paren_test_cases[] = { - '(', '[', '<', '{', - 0x207D, // superscript left parenthesis - 0x208D, // subscript left parenthesis - 0x27E6, // mathematical left white square bracket - 0x27E8, // mathematical left angle bracket - 0x27EA, // mathematical left double angle bracket - 0x2983, // left white curly bracket - 0x2985, // left white parenthesis - 0x2987, // Z notation left image bracket - 0x2989, // Z notation left binding bracket - 0x298B, // left square bracket with underbar - 0x298D, // left square bracket with tick in top corner - 0x298F, // left square bracket with tick in bottom corner - 0x2991, // left angle bracket with dot - 0x2993, // left arc less-than bracket - 0x2995, // double left arc greater-than bracket - 0x2997, // left black tortoise shell bracket - 0x29D8, // left wiggly fence - 0x29DA, // left double wiggly fence - 0x29FC, // left-pointing curved angle bracket - 0x3008, // CJK left angle bracket - 0x300A, // CJK left double angle bracket - 0x3010, // CJK left black lenticular bracket - 0x3014, // CJK left tortoise shell bracket - 0x3016, // CJK left white lenticular bracket - 0x3018, // CJK left white tortoise shell bracket - 0x301A, // CJK left white square bracket - 0xFD3E, // Ornate left parenthesis - 0xFE59, // small left parenthesis - 0xFE5B, // small left curly bracket - 0xFF08, // fullwidth left parenthesis - 0xFF3B, // fullwidth left square bracket - 0xFF5B, // fullwidth left curly bracket -}; - -TEST_P(OpenParenParamTest, IsOpenParen) { - std::string test_string = StringFromUnicodeChar(GetParam()); - EXPECT_TRUE(IsOpenParen(test_string)); -} - -INSTANTIATE_TEST_SUITE_P(IsOpenParenParamTest, - OpenParenParamTest, - ::testing::ValuesIn(open_paren_test_cases)); - -class CloseParenParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar close_paren_test_cases[] = { - ')', ']', '>', '}', - 0x207E, // superscript right parenthesis - 0x208E, // subscript right parenthesis - 0x27E7, // mathematical right white square bracket - 0x27E9, // mathematical right angle bracket - 0x27EB, // mathematical right double angle bracket - 0x2984, // right white curly bracket - 0x2986, // right white parenthesis - 0x2988, // Z notation right image bracket - 0x298A, // Z notation right binding bracket - 0x298C, // right square bracket with underbar - 0x298E, // right square bracket with tick in top corner - 0x2990, // right square bracket with tick in bottom corner - 0x2992, // right angle bracket with dot - 0x2994, // right arc greater-than bracket - 0x2996, // double right arc less-than bracket - 0x2998, // right black tortoise shell bracket - 0x29D9, // right wiggly fence - 0x29DB, // right double wiggly fence - 0x29FD, // right-pointing curved angle bracket - 0x3009, // CJK right angle bracket - 0x300B, // CJK right double angle bracket - 0x3011, // CJK right black lenticular bracket - 0x3015, // CJK right tortoise shell bracket - 0x3017, // CJK right white lenticular bracket - 0x3019, // CJK right white tortoise shell bracket - 0x301B, // CJK right white square bracket - 0xFD3F, // Ornate right parenthesis - 0xFE5A, // small right parenthesis - 0xFE5C, // small right curly bracket - 0xFF09, // fullwidth right parenthesis - 0xFF3D, // fullwidth right square bracket - 0xFF5D, // fullwidth right curly bracket -}; - -TEST_P(CloseParenParamTest, IsCloseParen) { - std::string test_string = StringFromUnicodeChar(GetParam()); - EXPECT_TRUE(IsCloseParen(test_string)); -} - -INSTANTIATE_TEST_SUITE_P(IsCloseParenParamTest, - CloseParenParamTest, - ::testing::ValuesIn(close_paren_test_cases)); - -class IsPunctuationWordParamTest : public SentenceBreakingUtilsParamTest {}; - -const UChar punc_word_test_cases[] = { - '(', '[', '<', '{', - 0x207D, // superscript left parenthesis - 0x208D, // subscript left parenthesis - 0x27E6, // mathematical left white square bracket - 0x27E8, // mathematical left angle bracket - 0x27EA, // mathematical left double angle bracket - 0x2983, // left white curly bracket - 0x2985, // left white parenthesis - 0x2987, // Z notation left image bracket - 0x2989, // Z notation left binding bracket - 0x298B, // left square bracket with underbar - 0x298D, // left square bracket with tick in top corner - 0x298F, // left square bracket with tick in bottom corner - 0x2991, // left angle bracket with dot - 0x2993, // left arc less-than bracket - 0x2995, // double left arc greater-than bracket - 0x2997, // left black tortoise shell bracket - 0x29D8, // left wiggly fence - 0x29DA, // left double wiggly fence - 0x29FC, // left-pointing curved angle bracket - 0x3008, // CJK left angle bracket - 0x300A, // CJK left double angle bracket - 0x3010, // CJK left black lenticular bracket - 0x3014, // CJK left tortoise shell bracket - 0x3016, // CJK left white lenticular bracket - 0x3018, // CJK left white tortoise shell bracket - 0x301A, // CJK left white square bracket - 0xFD3E, // Ornate left parenthesis - 0xFE59, // small left parenthesis - 0xFE5B, // small left curly bracket - 0xFF08, // fullwidth left parenthesis - 0xFF3B, // fullwidth left square bracket - 0xFF5B, // fullwidth left curly bracket - '"', '\'', '`', - 0xFF07, // fullwidth apostrophe - 0xFF02, // fullwidth quotation mark - 0x2018, // left single quotation mark (English, others) - 0x201C, // left double quotation mark (English, others) - 0x201B, // single high-reveresed-9 quotation mark (PropList.txt) - 0x201A, // single low-9 quotation mark (Czech, German, Slovak) - 0x201E, // double low-9 quotation mark (Czech, German, Slovak) - 0x201F, // double high-reversed-9 quotation mark (PropList.txt) - 0x2019, // right single quotation mark (Danish, Finnish, Swedish, Norw.) - 0x201D, // right double quotation mark (Danish, Finnish, Swedish, Norw.) - 0x2039, // single left-pointing angle quotation mark (French, others) - 0x00AB, // left-pointing double angle quotation mark (French, others) - 0x203A, // single right-pointing angle quotation mark (Slovenian, others) - 0x00BB, // right-pointing double angle quotation mark (Slovenian, others) - 0x300C, // left corner bracket (East Asian languages) - 0xFE41, // presentation form for vertical left corner bracket - 0xFF62, // halfwidth left corner bracket (East Asian languages) - 0x300E, // left white corner bracket (East Asian languages) - 0xFE43, // presentation form for vertical left white corner bracket - 0x301D, // reversed double prime quotation mark (East Asian langs, horiz.) - ')', ']', '>', '}', - 0x207E, // superscript right parenthesis - 0x208E, // subscript right parenthesis - 0x27E7, // mathematical right white square bracket - 0x27E9, // mathematical right angle bracket - 0x27EB, // mathematical right double angle bracket - 0x2984, // right white curly bracket - 0x2986, // right white parenthesis - 0x2988, // Z notation right image bracket - 0x298A, // Z notation right binding bracket - 0x298C, // right square bracket with underbar - 0x298E, // right square bracket with tick in top corner - 0x2990, // right square bracket with tick in bottom corner - 0x2992, // right angle bracket with dot - 0x2994, // right arc greater-than bracket - 0x2996, // double right arc less-than bracket - 0x2998, // right black tortoise shell bracket - 0x29D9, // right wiggly fence - 0x29DB, // right double wiggly fence - 0x29FD, // right-pointing curved angle bracket - 0x3009, // CJK right angle bracket - 0x300B, // CJK right double angle bracket - 0x3011, // CJK right black lenticular bracket - 0x3015, // CJK right tortoise shell bracket - 0x3017, // CJK right white lenticular bracket - 0x3019, // CJK right white tortoise shell bracket - 0x301B, // CJK right white square bracket - 0xFD3F, // Ornate right parenthesis - 0xFE5A, // small right parenthesis - 0xFE5C, // small right curly bracket - 0xFF09, // fullwidth right parenthesis - 0xFF3D, // fullwidth right square bracket - 0xFF5D, // fullwidth right curly bracket - '\'', '"', '`', - 0xFF07, // fullwidth apostrophe - 0xFF02, // fullwidth quotation mark - 0x2019, // right single quotation mark (English, others) - 0x201D, // right double quotation mark (English, others) - 0x2018, // left single quotation mark (Czech, German, Slovak) - 0x201C, // left double quotation mark (Czech, German, Slovak) - 0x203A, // single right-pointing angle quotation mark (French, others) - 0x00BB, // right-pointing double angle quotation mark (French, others) - 0x2039, // single left-pointing angle quotation mark (Slovenian, others) - 0x00AB, // left-pointing double angle quotation mark (Slovenian, others) - 0x300D, // right corner bracket (East Asian languages) - 0xfe42, // presentation form for vertical right corner bracket - 0xFF63, // halfwidth right corner bracket (East Asian languages) - 0x300F, // right white corner bracket (East Asian languages) - 0xfe44, // presentation form for vertical right white corner bracket - 0x301F, // low double prime quotation mark (East Asian languages) - 0x301E, // close double prime (East Asian languages written horizontally) - 0x00A1, // Spanish inverted exclamation mark - 0x00BF, // Spanish inverted question mark - '.', '!', '?', - 0x055C, // Armenian exclamation mark - 0x055E, // Armenian question mark - 0x0589, // Armenian full stop - 0x061F, // Arabic question mark - 0x06D4, // Arabic full stop - 0x0700, // Syriac end of paragraph - 0x0701, // Syriac supralinear full stop - 0x0702, // Syriac sublinear full stop - 0x0964, // Devanagari danda..Devanagari double danda - 0x0965, - 0x1362, // Ethiopic full stop - 0x1367, // Ethiopic question mark - 0x1368, // Ethiopic paragraph separator - 0x104A, // Myanmar sign little section - 0x104B, // Myanmar sign section - 0x166E, // Canadian syllabics full stop - 0x17d4, // Khmer sign khan - 0x1803, // Mongolian full stop - 0x1809, // Mongolian Manchu full stop - 0x1944, // Limbu exclamation mark - 0x1945, // Limbu question mark - 0x203C, // double exclamation mark - 0x203D, // interrobang - 0x2047, // double question mark - 0x2048, // question exclamation mark - 0x2049, // exclamation question mark - 0x3002, // ideographic full stop - 0x037E, // Greek question mark - 0xFE52, // small full stop - 0xFE56, // small question mark - 0xFE57, // small exclamation mark - 0xFF01, // fullwidth exclamation mark - 0xFF0E, // fullwidth full stop - 0xFF1F, // fullwidth question mark - 0xFF61, // halfwidth ideographic full stop - 0x2026, // ellipsis - 0x30fb, // Katakana middle dot - 0xff65, // halfwidth Katakana middle dot - 0x2040, // character tie - '-', '~', - 0x058a, // Armenian hyphen - 0x1806, // Mongolian todo soft hyphen - 0x2010, // hyphen..horizontal bar - 0x2011, 0x2012, 0x2013, 0x2014, 0x2015, - 0x2053, // swung dash -- from Table 6-3 of Unicode book - 0x207b, // superscript minus - 0x208b, // subscript minus - 0x2212, // minus sign - 0x301c, // wave dash - 0x3030, // wavy dash - 0xfe31, // presentation form for vertical em dash..en dash - 0xfe32, - 0xfe58, // small em dash - 0xfe63, // small hyphen-minus - 0xff0d, // fullwidth hyphen-minus - ',', ':', ';', - 0x00b7, // middle dot - 0x0387, // Greek ano teleia - 0x05c3, // Hebrew punctuation sof pasuq - 0x060c, // Arabic comma - 0x061b, // Arabic semicolon - 0x066b, // Arabic decimal separator - 0x066c, // Arabic thousands separator - 0x0703, // Syriac contraction and others - 0x0704, 0x0705, 0x0706, 0x0707, 0x0708, 0x0709, 0x70a, - 0x070c, // Syric harklean metobelus - 0x0e5a, // Thai character angkhankhu - 0x0e5b, // Thai character khomut - 0x0f08, // Tibetan mark sbrul shad - 0x0f0d, // Tibetan mark shad..Tibetan mark rgya gram shad - 0x0f0e, 0x0f0f, 0x0f10, 0x0f11, 0x0f12, - 0x1361, // Ethiopic wordspace - 0x1363, // other Ethiopic chars - 0x1364, 0x1365, 0x1366, - 0x166d, // Canadian syllabics chi sign - 0x16eb, // Runic single punctuation..Runic cross punctuation - 0x16ed, - 0x17d5, // Khmer sign camnuc pii huuh and other - 0x17d6, - 0x17da, // Khmer sign koomut - 0x1802, // Mongolian comma - 0x1804, // Mongolian four dots and other - 0x1805, - 0x1808, // Mongolian manchu comma - 0x3001, // ideographic comma - 0xfe50, // small comma and others - 0xfe51, - 0xfe54, // small semicolon and other - 0xfe55, - 0xff0c, // fullwidth comma - 0xff0e, // fullwidth stop..fullwidth solidus - 0xff0f, - 0xff1a, // fullwidth colon..fullwidth semicolon - 0xff1b, - 0xff64, // halfwidth ideographic comma - 0x2016, // double vertical line - 0x2032, 0x2033, - 0x2034, // prime..triple prime - 0xfe61, // small asterisk - 0xfe68, // small reverse solidus - 0xff3c, // fullwidth reverse solidus -}; - -TEST_P(IsPunctuationWordParamTest, IsPunctuation) { - std::string test_string = StringFromUnicodeChar(GetParam()); - EXPECT_TRUE(IsPunctuationWord(test_string)); -} - -INSTANTIATE_TEST_SUITE_P(IsPuncWordParamTest, - IsPunctuationWordParamTest, - ::testing::ValuesIn(punc_word_test_cases)); - -class IsEllipsisTest : public ::testing::Test {}; - -TEST_F(IsEllipsisTest, IsEllipsis) { - int offset; - EXPECT_TRUE(IsEllipsis("...", &offset)); - EXPECT_EQ(offset, 3); - EXPECT_TRUE(IsEllipsis("…", &offset)); - EXPECT_EQ(offset, 3); - EXPECT_FALSE(IsEllipsis("@", &offset)); - EXPECT_EQ(offset, 1); -} - -class IsWhiteSpaceTest : public ::testing::Test {}; - -TEST_F(IsWhiteSpaceTest, IsWhiteSpace) { - EXPECT_TRUE(IsWhiteSpace(" ")); - - EXPECT_TRUE(IsWhiteSpace("\n")); - - EXPECT_TRUE(IsWhiteSpace(" ")); - - EXPECT_FALSE(IsWhiteSpace("@")); - - EXPECT_FALSE(IsWhiteSpace("w")); -} - -class IsAcronymTest : public ::testing::Test {}; - -TEST_F(IsAcronymTest, IsAcronym) { - int offset = 0; - EXPECT_TRUE(IsPeriodSeparatedAcronym("U.S.", &offset)); - EXPECT_EQ(offset, 4); - - offset = 0; - EXPECT_TRUE(IsPeriodSeparatedAcronym("E.A.T.", &offset)); - EXPECT_EQ(offset, 6); - - offset = 0; - EXPECT_TRUE(IsPeriodSeparatedAcronym("A.B.C.D.E.F.", &offset)); - EXPECT_EQ(offset, 12); - - offset = 0; - EXPECT_FALSE(IsPeriodSeparatedAcronym("X.", &offset)); - - EXPECT_FALSE(IsPeriodSeparatedAcronym("US", &offset)); - - EXPECT_FALSE(IsPeriodSeparatedAcronym("U-S", &offset)); -} - -class EmoticonParamTest : public SentenceBreakingUtilsStringParamTest {}; - -static const char* const emoticon_test_cases[] = {":(:)", - ":)", - ":(", - ":o)", - ":]", - ":3", - ":>", - "=]", - "=)", - ":}", - ":^)", - ":-D", - ":-)))))", - ":-))))", - ":-)))", - ":-))", - ":-)", - ">:[", - ":-(", - ":(", - ":-c", - ":c", - ":-<", - ":<", - ":-[", - ":[", - ":{", - ";(", - ":-||", - ":@", - ">:(", - ":'-(", - ":'(", - ":'-)", - ":')", - "D:<", - ">:O", - ":-O", - ":-o", - ":*", - ":-*", - ":^*", - ";-)", - ";)", - "*-)", - "*)", - ";-]", - ";]", - ";^)", - ":-,", - ">:P", - ":-P", - ":p", - "=p", - ":-p", - "=p", - ":P", - "=P", - ";p", - ";-p", - ";P", - ";-P", - ">:\\", - ">:/", - ":-/", - ":-.", - ":/", - ":\\", - "=/", - "=\\", - ":|", - ":-|", - ":$", - ":-#", - ":#", - "O:-)", - "0:-)", - "0:)", - "0;^)", - ">:)", - ">;)", - ">:-)", - "}:-)", - "}:)", - "3:-)", - ">_>^", - "^<_<", - "|;-)", - "|-O", - ":-J", - ":-&", - ":&", - "#-)", - "<3", - "8-)", - "^_^", - ":D", - ":-D", - "=D", - "^_^;;", - "O=)", - "}=)", - "B)", - "B-)", - "=|", - "-_-", - "o_o;", - "u_u", - ":-\\", - ":s", - ":S", - ":-s", - ":-S", - ";*", - ";-*" - "=(", - ">.<", - ">:-(", - ">:(", - ">=(", - ";_;", - "T_T", - "='(", - ">_<", - "D:", - ":o", - ":-o", - "=o", - "o.o", - ":O", - ":-O", - "=O", - "O.O", - "x_x", - "X-(", - "X(", - "X-o", - "X-O", - ":X)", - "(=^.^=)", - "(=^..^=)", - "=^_^=", - "-<@%", - ":(|)", - "(]:{", - "<\\3", - "~@~", - "8'(", - "XD", - "DX"}; - -TEST_P(EmoticonParamTest, IsEmoticon) { - int offset = 0; - EXPECT_TRUE(IsEmoticon(GetParam(), &offset)); -} - -INSTANTIATE_TEST_SUITE_P(IsEmoticonParamTest, - EmoticonParamTest, - ::testing::ValuesIn(emoticon_test_cases)); - -class IsEmoticonTest : public ::testing::Test {}; - -TEST_F(IsEmoticonTest, IsEmoticon) { - int offset = 0; - - EXPECT_TRUE(IsEmoticon(">:-(", &offset)); - - EXPECT_FALSE(IsEmoticon("w", &offset)); - - EXPECT_FALSE(IsEmoticon(":", &offset)); -} - -TEST(SentenceFragmenterTest, Basic) { - // 1 - // 012345678901234 - string test_input = "Hello. Foo bar!"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 6); - EXPECT_EQ(fragments[1].start, 7); - EXPECT_EQ(fragments[1].limit, 15); -} - -TEST(SentenceFragmenterTest, BasicEllipsis) { - // 1 - // 012345678901234 - string test_input = "Hello...foo bar"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 8); - EXPECT_EQ(fragments[1].start, 8); - EXPECT_EQ(fragments[1].limit, 15); -} - -TEST(SentenceFragmenterTest, Parentheses) { - // 1 2 - // 012345678901234567890123456789 - string test_input = "Hello (who are you...) foo bar"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 22); - EXPECT_EQ(fragments[1].start, 23); - EXPECT_EQ(fragments[1].limit, 30); -} - -TEST(SentenceFragmenterTest, MidFragmentParentheses) { - // 1 2 - // 012345678901234567890123456789 - string test_input = "Hello (who are you) world? Foo bar"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 26); - EXPECT_EQ(fragments[1].start, 27); - EXPECT_EQ(fragments[1].limit, 34); -} - -TEST(SentenceFragmenterTest, PunctuationAfterParentheses) { - // 1 2 - // 01234567890123456789012345678 - string test_input = "Hello (who are you)? Foo bar!"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 20); - EXPECT_EQ(fragments[1].start, 21); - EXPECT_EQ(fragments[1].limit, 29); -} - -TEST(SentenceFragmenterTest, ManyFinalPunctuations) { - // 1 2 - // 0123456789012345678901234 - string test_input = "Hello!!!!! Who are you??"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 10); - EXPECT_EQ(fragments[1].start, 11); - EXPECT_EQ(fragments[1].limit, 24); -} - -TEST(SentenceFragmenterTest, NewLine) { - // 1 2 3 - // 012345678901234567890 1 23456 7 89012 3 45678 - string test_input = "Who let the dogs out?\r\nWho?\r\nWho?\r\nWho?"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 21); - EXPECT_EQ(fragments[1].start, 23); - EXPECT_EQ(fragments[1].limit, 27); - EXPECT_EQ(fragments[2].start, 29); - EXPECT_EQ(fragments[2].limit, 33); - EXPECT_EQ(fragments[3].start, 35); - EXPECT_EQ(fragments[3].limit, 39); -} - -TEST(SentenceFragmenterTest, WhiteSpaceInPunctuation) { - // 1 2 - // 0123456789012345678901234 - string test_input = "Hello?? !!! Who are you??"; - SentenceFragmenterV2 fragmenter(test_input); - std::vector<SentenceFragment> fragments; - EXPECT_TRUE(fragmenter.FindFragments(&fragments).ok()); - EXPECT_EQ(fragments[0].start, 0); - EXPECT_EQ(fragments[0].limit, 7); - EXPECT_EQ(fragments[1].start, 8); - EXPECT_EQ(fragments[1].limit, 11); - EXPECT_EQ(fragments[2].start, 12); - EXPECT_EQ(fragments[2].limit, 25); -} - -} // namespace - -TEST(FragmentBoundaryMatchTest, NoStateChange) { - FragmentBoundaryMatch f; - // || - // 012345678901234 - string test_input = "Hello...foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_FALSE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), -1); - EXPECT_EQ(f.first_close_punc_index(), -1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::INITIAL_STATE); -} - -TEST(FragmentBoundaryMatchTest, BasicEllipsis) { - FragmentBoundaryMatch f; - // | | - // 0123456789 - string test_input = "...foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, BasicPeriod) { - FragmentBoundaryMatch f; - // || - // 0123456789 - string test_input = ". Foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, BasicAcronym) { - FragmentBoundaryMatch f; - // | | - // 0123456789 - string test_input = "A.B. xyz"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 4); - EXPECT_EQ(f.limit_index(), 4); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, LongerAcronym) { - FragmentBoundaryMatch f; - // | | - // 0123456789 - string test_input = "I.B.M. yo"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 6); - EXPECT_EQ(f.limit_index(), 6); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, Emoticon) { - FragmentBoundaryMatch f; - // | | - // 0123456789012 - string test_input = ">:-( hello..."; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 4); - EXPECT_EQ(f.limit_index(), 4); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, ParensWithEllipsis) { - FragmentBoundaryMatch f; - // || - // 0123456789012345 - string test_input = ".foo...) foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, ClosingParenWithEllipsis) { - FragmentBoundaryMatch f; - // | | - // 012345678901 - string test_input = "...) foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, BeginAndEndParenWithEllipsis) { - FragmentBoundaryMatch f; - // || - // 0123456789012 - string test_input = "(...) foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_FALSE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), -1); - EXPECT_EQ(f.first_close_punc_index(), -1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::INITIAL_STATE); - - // | | - // 0123456789012 - test_input = "...) foo bar"; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, AcronymInSentence) { - FragmentBoundaryMatch f; - // | | - // 0123456789012 - string test_input = "U.S. don't be surprised."; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 4); - EXPECT_EQ(f.limit_index(), 4); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, HelloWithEllipsis) { - FragmentBoundaryMatch f; - // || - // 01234567890 - string test_input = "o...foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_FALSE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), -1); - EXPECT_EQ(f.first_close_punc_index(), -1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::INITIAL_STATE); - - // | | - // 0123456789 - test_input = "...foo bar"; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -TEST(FragmentBoundaryMatchTest, ThreeStatesWithClosigParen) { - FragmentBoundaryMatch f; - // || - // 0123456789012 - string test_input = "w...) foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_FALSE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), -1); - EXPECT_EQ(f.first_close_punc_index(), -1); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::INITIAL_STATE); - - // | | - // 0123456789012 - test_input = "...) foo bar"; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); - - // || - // 0123456789012 - test_input = ") foo bar"; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 0); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_CLOSE_PUNC); - - // || - // 0123456789012 - test_input = " foo bar"; - EXPECT_FALSE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 0); - EXPECT_EQ(f.limit_index(), 1); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_CLOSE_PUNC); -} - -TEST(FragmentBoundaryMatchTest, NoTransition) { - FragmentBoundaryMatch f; - // | | - // 0123456789012 - string test_input = "...foo bar"; - int index = 0; - EXPECT_TRUE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); - - // || - // 0123456789012 - test_input = "foo bar"; - EXPECT_FALSE(f.Advance(index, test_input)); - EXPECT_TRUE(f.GotTerminalPunc()); - EXPECT_EQ(f.first_terminal_punc_index(), 0); - EXPECT_EQ(f.first_close_punc_index(), 3); - EXPECT_EQ(f.limit_index(), 3); - EXPECT_EQ(f.state(), FragmentBoundaryMatch::COLLECTING_TERMINAL_PUNC); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc deleted file mode 100644 index a7ee974..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/sentencepiece_kernels.cc +++ /dev/null
@@ -1,729 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "absl/base/attributes.h" -#include "absl/base/optimization.h" -#include "absl/base/thread_annotations.h" -#include "absl/container/flat_hash_map.h" -#include "absl/meta/type_traits.h" -#include "absl/strings/string_view.h" -#include "absl/synchronization/mutex.h" -#include "absl/types/span.h" -#include "src/sentencepiece.pb.h" -#include "src/sentencepiece_model.pb.h" -#include "src/sentencepiece_processor.h" -#include "tensorflow/core/framework/bounds_check.h" -#include "tensorflow/core/framework/dataset_stateful_op_allowlist.h" -#include "tensorflow/core/framework/device_base.h" -#include "tensorflow/core/framework/node_def_util.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/framework/types.pb.h" -#include "tensorflow/core/graph/graph_def_builder.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/refcount.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/errors.h" -#include "tensorflow/core/platform/macros.h" -#include "tensorflow/core/platform/mutex.h" -#include "tensorflow/core/platform/thread_annotations.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow/core/util/work_sharder.h" - -namespace tensorflow { -namespace text { - -namespace { - -// Our resource object that will hold the SentencePiece processor. -struct SentencepieceResource : public ResourceBase { - sentencepiece::SentencePieceProcessor processor; - int64 memory_used; - bool add_bos = false; - bool add_eos = false; - bool reverse = false; - mutable absl::Mutex mu; - - string DebugString() const override { return "Sentencepiece Resource"; } - - int64 MemoryUsed() const override { return memory_used; } - - bool SameOptions(bool add_bos, bool add_eos, bool reverse) const { - return (add_bos == this->add_bos) && (add_eos == this->add_eos) && - (reverse == this->reverse); - } - - Status AsGraphDef(GraphDefBuilder* builder, Node** out) const override { - absl::ReaderMutexLock l(&mu); - // We set use_node_name_sharing with a unique node name so that the resource - // can outlive the kernel. This means that the lifetime of the re-created - // resource will be tied to the lifetime of the resource manager it is - // created in. - static std::atomic<int64> counter(0); - std::string unique_node_name = strings::StrCat( - "SentencepieceResourceFromGraphDef", "/", counter.fetch_add(1)); - std::string model = processor.model_proto().SerializeAsString(); - *out = ops::SourceOp("SentencepieceOp", - builder->opts() - .WithName(unique_node_name) - .WithAttr("model", model) - .WithAttr("use_node_name_sharing", true)); - return Status::OK(); - } -}; - -// According to .../tensorflow/core/util/work_sharder.cc, this values determines -// how much to shard. It assumes each cost unit is 1ns, and the minimum cost -// per shard is 10000 (10us). -// TODO(broken) Determine a medium cost of a call to the SentencePiece processor -constexpr int64 kCostPerUnit = 10000; - -::tensorflow::Status ToTFStatus(const ::util::Status& s) { - if (s.ok()) - return ::tensorflow::Status(); - return ::tensorflow::Status(static_cast<::tensorflow::error::Code>(s.code()), - ::tensorflow::string(s.message())); -} - -template <typename T> -T GetPieceOrId(const sentencepiece::SentencePieceText::SentencePiece& sp); - -template <> -tensorflow::tstring GetPieceOrId<tensorflow::tstring>( - const sentencepiece::SentencePieceText::SentencePiece& sp) { - return sp.piece(); -} - -template <> -int32 GetPieceOrId<int32>( - const sentencepiece::SentencePieceText::SentencePiece& sp) { - return sp.id(); -} - -tensorflow::Status HandleExtraOptions(OpKernelContext* ctx, - SentencepieceResource* sp) { - const Tensor* add_bos_tensor = nullptr; - TF_RETURN_IF_ERROR(ctx->input("add_bos", &add_bos_tensor)); - const bool add_bos = add_bos_tensor->scalar<bool>()(); - - const Tensor* add_eos_tensor = nullptr; - TF_RETURN_IF_ERROR(ctx->input("add_eos", &add_eos_tensor)); - const bool add_eos = add_eos_tensor->scalar<bool>()(); - - const Tensor* reverse_tensor = nullptr; - TF_RETURN_IF_ERROR(ctx->input("reverse", &reverse_tensor)); - const bool reverse = reverse_tensor->scalar<bool>()(); - - { - // Because we expect most of the time no change in these options, we grab - // the reader lock once and do a quick check first. - absl::ReaderMutexLock l(&sp->mu); - if (sp->SameOptions(add_bos, add_eos, reverse)) { - return Status::OK(); - } - } - - absl::WriterMutexLock lock(&sp->mu); - if (sp->SameOptions(add_bos, add_eos, reverse)) { - return Status::OK(); - } - string options; - sp->add_bos = add_bos; - if (sp->add_bos) { - absl::StrAppend(&options, "bos"); - } - sp->add_eos = add_eos; - if (sp->add_eos) { - if (!options.empty()) { - absl::StrAppend(&options, ":"); - } - absl::StrAppend(&options, "eos"); - } - sp->reverse = reverse; - if (sp->reverse) { - if (!options.empty()) { - absl::StrAppend(&options, ":"); - } - absl::StrAppend(&options, "reverse"); - } - - TF_RETURN_IF_ERROR(ToTFStatus(sp->processor.SetEncodeExtraOptions(options))); - TF_RETURN_IF_ERROR(ToTFStatus(sp->processor.SetDecodeExtraOptions(options))); - - return Status::OK(); -} - -} // namespace - -class SentencepieceOp : public OpKernel { - public: - explicit SentencepieceOp(OpKernelConstruction* ctx) - : OpKernel(ctx), sp_set_(false) { - OP_REQUIRES_OK(ctx, ctx->allocate_temp(tensorflow::DT_STRING, - tensorflow::TensorShape({2}), &sp_)); - OP_REQUIRES_OK( - ctx, ctx->GetAttr("use_node_name_sharing", &use_node_name_sharing_)); - } - - ~SentencepieceOp() override { - // If the table object was not shared, delete it. - if (sp_set_ && cinfo_.resource_is_private_to_kernel()) { - if (!cinfo_.resource_manager() - ->template Delete<SentencepieceResource>(cinfo_.container(), - cinfo_.name()) - .ok()) { - // Do nothing; the resource may have been deleted by session resets. - } - } - } - - void Compute(OpKernelContext* ctx) override { - absl::MutexLock lock(&mu_); - - if (!sp_set_) { - OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def(), - use_node_name_sharing_)); - } - - auto creator = - [ctx, this](SentencepieceResource** resource) - ABSL_EXCLUSIVE_LOCKS_REQUIRED(mu_) { - SentencepieceResource* sp = new SentencepieceResource(); - - string model_proto_attr; - TF_RETURN_IF_ERROR( - GetNodeAttr(this->def(), "model", &model_proto_attr)); - - if (TF_PREDICT_FALSE(model_proto_attr.empty())) { - return Status(tensorflow::errors::InvalidArgument( - "Model argument must be specified.")); - } - // Loads serialized sentencepiece model proto to enable embedding - // the relatively small sentencepiece model proto into the - // tensorflow graph such that the tensorflow graph is - // self-contained. - TF_RETURN_IF_ERROR(ToTFStatus( - sp->processor.LoadFromSerializedProto(model_proto_attr))); - // TODO(broken): Determine a better computation of what the memory - // requirements for the processor are. - sp->memory_used = model_proto_attr.size(); - - if (ctx->track_allocations()) { - ctx->record_persistent_memory_allocation(sp->MemoryUsed()); - } - - *resource = sp; - return Status::OK(); - }; - - // Register the ResourceType alias. - SentencepieceResource* resource = nullptr; - OP_REQUIRES_OK( - ctx, cinfo_.resource_manager() - ->template LookupOrCreate<SentencepieceResource>( - cinfo_.container(), cinfo_.name(), &resource, creator)); - core::ScopedUnref unref_me(resource); - - // Put a handle to resource in the output tensor (the other aliases will - // have the same handle). - Tensor* handle; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &handle)); - handle->scalar<ResourceHandle>()() = - MakeResourceHandle<SentencepieceResource>(ctx, cinfo_.container(), - cinfo_.name()); - sp_set_ = true; - } - - private: - absl::Mutex mu_; - Tensor sp_ ABSL_GUARDED_BY(mu_); - bool sp_set_ ABSL_GUARDED_BY(mu_); - ContainerInfo cinfo_; - bool use_node_name_sharing_; - TF_DISALLOW_COPY_AND_ASSIGN(SentencepieceOp); -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceOp").Device(DEVICE_CPU), - tensorflow::text::SentencepieceOp); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceOp"); - -template <typename T, typename Tsplits> -class SentencepieceTokenizeOp : public OpKernel { - public: - explicit SentencepieceTokenizeOp(OpKernelConstruction* ctx) : OpKernel(ctx) { - ctx->GetAttr("return_nbest", &return_nbest_).IgnoreError(); - } - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - const Tensor& input_values_tensor = ctx->input(1); - const auto input_values_flat = - input_values_tensor.flat<tensorflow::tstring>(); - const int64 num_of_input_values = input_values_flat.size(); - - const Tensor* nbest_size_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->input("nbest_size", &nbest_size_tensor)); - const Tensor* alpha_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->input("alpha", &alpha_tensor)); - - OP_REQUIRES_OK(ctx, HandleExtraOptions(ctx, sp)); - - if (return_nbest_) { - OP_REQUIRES(ctx, nbest_size_tensor->dims() == 0, - errors::InvalidArgument( - "When return_nbest is true nbest_size must " - "be a scalar; got", - nbest_size_tensor->shape().DebugString(), "instead")); - OP_REQUIRES(ctx, nbest_size_tensor->scalar<int32>()() >= 1, - errors::InvalidArgument( - "When return_nbest is true nbest_size must be >= 1; got ", - nbest_size_tensor->scalar<int32>()())); - } - - std::vector<std::vector<typename std::conditional< - std::is_same<T, tstring>::value, std::string, T>::type>> - tokens(return_nbest_ ? 0 : num_of_input_values); - std::vector<std::vector<std::vector<typename std::conditional< - std::is_same<T, tstring>::value, std::string, T>::type>>> - nbest_tokens(return_nbest_ ? num_of_input_values : 0); - const bool return_nbest = return_nbest_; - const auto& worker_threads = - *(ctx->device()->tensorflow_cpu_worker_threads()); - ::tensorflow::Shard( - worker_threads.num_threads, // max parallelism - worker_threads.workers, // thread pool - num_of_input_values, // total number of data to process. - kCostPerUnit, // cost per unit - [ctx, sp, &input_values_flat, &tokens, &nbest_tokens, - &nbest_size_tensor, &alpha_tensor, - return_nbest](int64 start, int64 limit) { - absl::ReaderMutexLock lock(&sp->mu); - for (int i = start; i < limit; ++i) { - const int32 nbest_size = nbest_size_tensor->dims() == 1 - ? nbest_size_tensor->vec<int32>()(i) - : nbest_size_tensor->scalar<int32>()(); - if (return_nbest) { - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.NBestEncode( - input_values_flat(i), nbest_size, - &nbest_tokens[i]))); - } else if (nbest_size == 0 || nbest_size == 1) { - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.Encode( - input_values_flat(i), &tokens[i]))); - } else { - const float alpha = alpha_tensor->dims() == 1 - ? alpha_tensor->vec<float>()(i) - : alpha_tensor->scalar<float>()(); - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.SampleEncode( - input_values_flat(i), nbest_size, alpha, - &tokens[i]))); - } - } - }); - - if (return_nbest_) { - for (auto& col : nbest_tokens) { - for (auto& row : col) { - tokens.push_back(std::move(row)); - } - } - nbest_tokens.clear(); - } - int64 total_tokens = 0; - for (auto& tokens_row : tokens) { - total_tokens += tokens_row.size(); - } - - Tensor* output_values_tensor = nullptr; - Tensor* output_splits_tensor = nullptr; - - OP_REQUIRES_OK( - ctx, ctx->allocate_output(0, {total_tokens}, &output_values_tensor)); - int64 splits_size = tokens.size() + 1; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(1, {splits_size}, &output_splits_tensor)); - - auto values_tensor_flat = output_values_tensor->vec<T>(); - auto splits_tensor_flat = output_splits_tensor->vec<Tsplits>(); - - int i = 0; - splits_tensor_flat(0) = 0; - for (int row = 0; row < tokens.size(); ++row) { - for (int col = 0; col < tokens[row].size(); ++col, ++i) { - values_tensor_flat(i) = tokens[row][col]; - } - splits_tensor_flat(row + 1) = i; - } - } - - bool return_nbest_{false}; -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("out_type") - .TypeConstraint<int32>("Tsplits"), - SentencepieceTokenizeOp<int32, int32>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("out_type") - .TypeConstraint<int32>("Tsplits"), - SentencepieceTokenizeOp<tensorflow::tstring, int32>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("out_type") - .TypeConstraint<int64>("Tsplits"), - SentencepieceTokenizeOp<int32, int64>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("out_type") - .TypeConstraint<int64>("Tsplits"), - SentencepieceTokenizeOp<tensorflow::tstring, int64>); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceTokenizeOp"); - -template <typename T, typename Tsplits> -class SentencepieceTokenizeWithOffsetsOp : public OpKernel { - public: - explicit SentencepieceTokenizeWithOffsetsOp(OpKernelConstruction* ctx) - : OpKernel(ctx) { - ctx->GetAttr("return_nbest", &return_nbest_).IgnoreError(); - } - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - const Tensor& input_values_tensor = ctx->input(1); - const auto input_values_flat = - input_values_tensor.flat<tensorflow::tstring>(); - const int64 num_of_input_values = input_values_flat.size(); - - const Tensor* nbest_size_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->input("nbest_size", &nbest_size_tensor)); - const Tensor* alpha_tensor = nullptr; - OP_REQUIRES_OK(ctx, ctx->input("alpha", &alpha_tensor)); - - OP_REQUIRES_OK(ctx, HandleExtraOptions(ctx, sp)); - - if (return_nbest_) { - OP_REQUIRES(ctx, nbest_size_tensor->dims() == 0, - errors::InvalidArgument( - "When return_nbest is true nbest_size must " - "be a scalar; got", - nbest_size_tensor->shape().DebugString(), "instead")); - OP_REQUIRES(ctx, nbest_size_tensor->scalar<int32>()() >= 1, - errors::InvalidArgument( - "When return_nbest is true nbest_size must be >= 1; got ", - nbest_size_tensor->scalar<int32>()())); - } - - std::vector<sentencepiece::SentencePieceText> results( - return_nbest_ ? 0 : num_of_input_values); - std::vector<sentencepiece::NBestSentencePieceText> nbest_results( - return_nbest_ ? num_of_input_values : 0); - const bool return_nbest = return_nbest_; - const auto& worker_threads = - *(ctx->device()->tensorflow_cpu_worker_threads()); - ::tensorflow::Shard( - worker_threads.num_threads, // max parallelism - worker_threads.workers, // thread pool - num_of_input_values, // total number of data to process. - kCostPerUnit, - [ctx, sp, &input_values_flat, &results, &nbest_results, - &nbest_size_tensor, &alpha_tensor, - return_nbest](int64 start, int64 limit) { - absl::ReaderMutexLock lock(&sp->mu); - for (int i = start; i < limit; ++i) { - const int32 nbest_size = nbest_size_tensor->dims() == 1 - ? nbest_size_tensor->vec<int32>()(i) - : nbest_size_tensor->scalar<int32>()(); - if (return_nbest) { - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.NBestEncode( - input_values_flat(i), nbest_size, - &nbest_results[i]))); - } else if (nbest_size == 0 || nbest_size == 1) { - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.Encode( - input_values_flat(i), &results[i]))); - } else { - const float alpha = alpha_tensor->dims() == 1 - ? alpha_tensor->vec<float>()(i) - : alpha_tensor->scalar<float>()(); - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.SampleEncode( - input_values_flat(i), nbest_size, alpha, - &results[i]))); - } - } - }); - - if (return_nbest_) { - for (auto& nbest : nbest_results) { - for (auto& result : nbest.nbests()) { - results.push_back(std::move(result)); - } - } - } - int64 total_tokens = 0; - for (auto& sp_result : results) { - total_tokens += sp_result.pieces_size(); - } - - Tensor* output_values_tensor = nullptr; - Tensor* output_splits_tensor = nullptr; - Tensor* output_starts_tensor = nullptr; - Tensor* output_limits_tensor = nullptr; - - OP_REQUIRES_OK( - ctx, ctx->allocate_output(0, {total_tokens}, &output_values_tensor)); - int64 splits_size = results.size() + 1; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(1, {splits_size}, &output_splits_tensor)); - OP_REQUIRES_OK( - ctx, ctx->allocate_output(2, {total_tokens}, &output_starts_tensor)); - OP_REQUIRES_OK( - ctx, ctx->allocate_output(3, {total_tokens}, &output_limits_tensor)); - - auto values_tensor_flat = output_values_tensor->vec<T>(); - auto splits_tensor_flat = output_splits_tensor->vec<Tsplits>(); - auto starts_tensor_flat = output_starts_tensor->vec<int64>(); - auto limits_tensor_flat = output_limits_tensor->vec<int64>(); - - int i = 0; - splits_tensor_flat(0) = 0; - for (int row = 0; row < results.size(); ++row) { - for (auto& sp : results[row].pieces()) { - values_tensor_flat(i) = GetPieceOrId<T>(sp); - starts_tensor_flat(i) = sp.begin(); - limits_tensor_flat(i) = sp.end(); - ++i; - } - splits_tensor_flat(row + 1) = i; - } - } - - bool return_nbest_{false}; -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeWithOffsetsOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("out_type") - .TypeConstraint<int32>("Tsplits"), - SentencepieceTokenizeWithOffsetsOp<int32, int32>); -REGISTER_KERNEL_BUILDER( - Name("SentencepieceTokenizeWithOffsetsOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("out_type") - .TypeConstraint<int32>("Tsplits"), - SentencepieceTokenizeWithOffsetsOp<tensorflow::tstring, int32>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceTokenizeWithOffsetsOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("out_type") - .TypeConstraint<int64>("Tsplits"), - SentencepieceTokenizeWithOffsetsOp<int32, int64>); -REGISTER_KERNEL_BUILDER( - Name("SentencepieceTokenizeWithOffsetsOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("out_type") - .TypeConstraint<int64>("Tsplits"), - SentencepieceTokenizeWithOffsetsOp<tensorflow::tstring, int64>); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceTokenizeWithOffsetsOp"); - -template <typename T, typename Tsplits> -class SentencepieceDetokenizeOp : public OpKernel { - public: - explicit SentencepieceDetokenizeOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - const Tensor& input_values_tensor = ctx->input(1); - const auto input_values_flat = input_values_tensor.flat<T>(); - const Tensor& input_splits_tensor = ctx->input(2); - const auto input_splits_flat = input_splits_tensor.flat<Tsplits>(); - const int64 num_of_sentences = input_splits_flat.size() - 1; - - OP_REQUIRES_OK(ctx, HandleExtraOptions(ctx, sp)); - - Tensor* output_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_output(0, {num_of_sentences}, &output_tensor)); - auto output_flat = output_tensor->flat<tensorflow::tstring>(); - - const auto& worker_threads = - *(ctx->device()->tensorflow_cpu_worker_threads()); - ::tensorflow::Shard( - worker_threads.num_threads, // max parallelism - worker_threads.workers, // thread pool - num_of_sentences, // total number of data to process. - kCostPerUnit, - [ctx, sp, &input_values_flat, &input_splits_flat, &output_flat]( - int64 start, int64 limit) { - absl::ReaderMutexLock lock(&sp->mu); - for (int i = start; i < limit; ++i) { - if (i + 1 >= input_splits_flat.size()) { - ctx->CtxFailure(errors::OutOfRange("Invalid splits; ", i)); - return; - } - if (input_splits_flat(i) > input_values_flat.size()) { - ctx->CtxFailure(errors::OutOfRange( - "Splits and values do not match; split ", - input_splits_flat(i), "but values size is ", - input_values_flat.size())); - return; - } - const std::vector<typename std::conditional< - std::is_same<T, tstring>::value, std::string, T>::type> - pieces(&input_values_flat(input_splits_flat(i)), - &input_values_flat(input_splits_flat(i + 1))); - std::string output_flat_str; - OP_REQUIRES_OK(ctx, ToTFStatus(sp->processor.Decode( - pieces, &output_flat_str))); - output_flat(i) = output_flat_str; - } - }); - } -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceDetokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("T") - .TypeConstraint<int32>("Tsplits"), - SentencepieceDetokenizeOp<int32, int32>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceDetokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("T") - .TypeConstraint<int32>("Tsplits"), - SentencepieceDetokenizeOp<tensorflow::tstring, int32>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceDetokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("T") - .TypeConstraint<int64>("Tsplits"), - SentencepieceDetokenizeOp<int32, int64>); -REGISTER_KERNEL_BUILDER(Name("SentencepieceDetokenizeOp") - .Device(DEVICE_CPU) - .TypeConstraint<tensorflow::tstring>("T") - .TypeConstraint<int64>("Tsplits"), - SentencepieceDetokenizeOp<tensorflow::tstring, int64>); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceDetokenizeOp"); - -class SentencepieceVocabSizeOp : public OpKernel { - public: - explicit SentencepieceVocabSizeOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - Tensor* output_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {}, &output_tensor)); - output_tensor->scalar<int32>()() = sp->processor.GetPieceSize(); - } -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceVocabSizeOp").Device(DEVICE_CPU), - SentencepieceVocabSizeOp); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceVocabSizeOp"); - -class SentencepieceIdToStringOp : public OpKernel { - public: - explicit SentencepieceIdToStringOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - const Tensor& input_tensor = ctx->input(1); - const auto input_tensor_flat = input_tensor.flat<int32>(); - Tensor* output_tensor; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(0, input_tensor.shape(), &output_tensor)); - auto output_tensor_flat = output_tensor->flat<tensorflow::tstring>(); - - absl::ReaderMutexLock lock(&sp->mu); - for (int i = 0; i < input_tensor_flat.size(); ++i) { - output_tensor_flat(i) = sp->processor.IdToPiece(input_tensor_flat(i)); - } - } -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceIdToStringOp").Device(DEVICE_CPU), - SentencepieceIdToStringOp); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceIdToStringOp"); - -class SentencepieceStringToIdOp : public OpKernel { - public: - explicit SentencepieceStringToIdOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - SentencepieceResource* sp; - const Tensor& resource_tensor = ctx->input(0); - ResourceHandle resource_handle(resource_tensor.scalar<ResourceHandle>()()); - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->Lookup<SentencepieceResource>( - resource_handle.container(), resource_handle.name(), &sp)); - core::ScopedUnref unref_me(sp); - - const Tensor& input_tensor = ctx->input(1); - const auto input_tensor_flat = input_tensor.flat<tensorflow::tstring>(); - Tensor* output_tensor; - OP_REQUIRES_OK( - ctx, ctx->allocate_output(0, input_tensor.shape(), &output_tensor)); - auto output_tensor_flat = output_tensor->flat<int32>(); - - absl::ReaderMutexLock lock(&sp->mu); - for (int i = 0; i < input_tensor_flat.size(); ++i) { - output_tensor_flat(i) = sp->processor.PieceToId(input_tensor_flat(i)); - } - } -}; - -REGISTER_KERNEL_BUILDER(Name("SentencepieceStringToIdOp").Device(DEVICE_CPU), - SentencepieceStringToIdOp); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("SentencepieceStringToIdOp"); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc deleted file mode 100644 index 1cae768..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.cc +++ /dev/null
@@ -1,105 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/spanning_tree_iterator.h" - -namespace tensorflow { -namespace text { - -SpanningTreeIterator::SpanningTreeIterator(bool forest) : forest_(forest) {} - -bool SpanningTreeIterator::HasCycle(const SourceList& sources) { - // Flags for whether each node has already been searched. - searched_.assign(sources.size(), false); - - // Flags for whether the search is currently visiting each node. - visiting_.assign(sources.size(), false); - - // Search upwards from each node to find cycles. - for (uint32 initial_node = 0; initial_node < sources.size(); ++initial_node) { - // Search upwards to try to find a cycle. - uint32 current_node = initial_node; - while (true) { - if (searched_[current_node]) - break; // already searched - if (visiting_[current_node]) - return true; // revisiting implies cycle - visiting_[current_node] = true; // mark as being currently visited - const uint32 source_node = sources[current_node]; - if (source_node == current_node) - break; // self-loops are roots - current_node = source_node; // advance upwards - } - - // No cycle; search upwards again to update flags. - current_node = initial_node; - while (true) { - if (searched_[current_node]) - break; // already searched - searched_[current_node] = true; - visiting_[current_node] = false; - const uint32 source_node = sources[current_node]; - if (source_node == current_node) - break; // self-loops are roots - current_node = source_node; // advance upwards - } - } - - return false; -} - -uint32 SpanningTreeIterator::NumRoots(const SourceList& sources) { - uint32 num_roots = 0; - for (uint32 node = 0; node < sources.size(); ++node) { - num_roots += (node == sources[node]); - } - return num_roots; -} - -bool SpanningTreeIterator::NextSourceList(SourceList* sources) { - const uint32 num_nodes = sources->size(); - for (uint32 i = 0; i < num_nodes; ++i) { - const uint32 new_source = ++(*sources)[i]; - if (new_source < num_nodes) - return true; // absorbed in this digit - (*sources)[i] = 0; // overflowed this digit, carry to next digit - } - return false; // overflowed the last digit -} - -bool SpanningTreeIterator::NextTree(SourceList* sources) { - // Iterate source lists, skipping non-trees. - while (NextSourceList(sources)) { - // Check the number of roots. - const uint32 num_roots = NumRoots(*sources); - if (forest_) { - if (num_roots == 0) - continue; - } else { - if (num_roots != 1) - continue; - } - - // Check for cycles. - if (HasCycle(*sources)) - continue; - - // Acyclic and rooted, therefore tree. - return true; - } - return false; -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h deleted file mode 100644 index 89edc95..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator.h +++ /dev/null
@@ -1,78 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_SPANNING_TREE_ITERATOR_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_SPANNING_TREE_ITERATOR_H_ - -#include <vector> - -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -// A class that iterates over all possible spanning trees of a complete digraph. -// Thread-compatible. Useful for brute-force comparison tests. -// -// TODO(terrykoo): Try using Prufer sequences, which are more efficient to -// enumerate as there are no non-trees to filter out. -class SpanningTreeIterator { - public: - // An array that provides the source of the inbound arc for each node. Roots - // are represented as self-loops. - using SourceList = std::vector<uint32>; - - // Creates a spanning tree iterator. If |forest| is true, then this iterates - // over forests instead of trees (i.e., multiple roots are allowed). - explicit SpanningTreeIterator(bool forest); - - // Applies the |functor| to all spanning trees (or forests, if |forest_| is - // true) of a complete digraph containing |num_nodes| nodes. Each tree is - // passed to the |functor| as a SourceList. - template <class Functor> - void ForEachTree(uint32 num_nodes, Functor functor) { - // Conveniently, the all-zero vector represents a valid tree. - SourceList sources(num_nodes, 0); - do { - functor(sources); - } while (NextTree(&sources)); - } - - private: - // Returns true if the |sources| contains a cycle. - bool HasCycle(const SourceList& sources); - - // Returns the number of roots in the |sources|. - static uint32 NumRoots(const SourceList& sources); - - // Advances |sources| to the next source list, or returns false if there are - // no more source lists. - static bool NextSourceList(SourceList* sources); - - // Advances |sources| to the next tree (or forest, if |forest_| is true), or - // returns false if there are no more trees. - bool NextTree(SourceList* sources); - - // If true, iterate over spanning forests instead of spanning trees. - const bool forest_; - - // Workspaces used by the search in HasCycle(). - std::vector<bool> searched_; - std::vector<bool> visiting_; -}; - -} // namespace text -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_SPANNING_TREE_ITERATOR_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc deleted file mode 100644 index 4000117..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/spanning_tree_iterator_test.cc +++ /dev/null
@@ -1,142 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/spanning_tree_iterator.h" - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace text { - -// Testing rig. When the bool parameter is true, iterates over spanning forests -// instead of spanning trees. -class SpanningTreeIteratorTest : public ::testing::TestWithParam<bool> { - protected: - using SourceList = SpanningTreeIterator::SourceList; - - // Returns |base|^|exponent|. Computes the value as an integer to avoid - // rounding issues. - static int Pow(int base, int exponent) { - double real_product = 1.0; - int product = 1; - for (int i = 0; i < exponent; ++i) { - product *= base; - real_product *= base; - } - CHECK_EQ(product, real_product) << "Overflow detected."; - return product; - } - - // Expects that the number of possible spanning trees for a complete digraph - // of |num_nodes| nodes is |expected_num_trees|. - void ExpectNumTrees(int num_nodes, int expected_num_trees) { - int actual_num_trees = 0; - iterator_.ForEachTree( - num_nodes, [&](const SourceList& sources) { ++actual_num_trees; }); - LOG(INFO) << "num_nodes=" << num_nodes - << " expected_num_trees=" << expected_num_trees - << " actual_num_trees=" << actual_num_trees; - EXPECT_EQ(expected_num_trees, actual_num_trees); - } - - // Expects that the set of possible spanning trees for a complete digraph of - // |num_nodes| nodes is |expected_trees|. - void ExpectTrees(int num_nodes, const std::set<SourceList>& expected_trees) { - std::set<SourceList> actual_trees; - iterator_.ForEachTree(num_nodes, [&](const SourceList& sources) { - CHECK(actual_trees.insert(sources).second); - }); - EXPECT_EQ(expected_trees, actual_trees); - } - - // Instance for tests. Shared across assertions in a test to exercise reuse. - SpanningTreeIterator iterator_{GetParam()}; -}; - -INSTANTIATE_TEST_SUITE_P(AllowForest, - SpanningTreeIteratorTest, - ::testing::Bool()); - -TEST_P(SpanningTreeIteratorTest, NumberOfTrees) { - // According to Cayley's formula, the number of undirected spanning trees on a - // complete graph of n nodes is n^{n-2}: - // https://en.wikipedia.org/wiki/Cayley%27s_formula - // - // To count the number of directed spanning trees, note that each undirected - // spanning tree gives rise to n directed spanning trees: choose one of the n - // nodes as the root, and then orient arcs outwards. Therefore, the number of - // directed spanning trees on a complete digraph of n nodes is n^{n-1}. - // - // To count the number of directed spanning forests, consider undirected - // spanning trees on a complete graph of n+1 nodes. Arbitrarily select one - // node as the artificial root, orient arcs outwards, and then delete the - // artificial root and its outbound arcs. The result is a directed spanning - // forest on n nodes. Therefore, the number of directed spanning forests on a - // complete digraph of n nodes is (n+1)^{n-1}. - for (int num_nodes = 1; num_nodes <= 7; ++num_nodes) { - if (GetParam()) { // forest - ExpectNumTrees(num_nodes, Pow(num_nodes + 1, num_nodes - 1)); - } else { // tree - ExpectNumTrees(num_nodes, Pow(num_nodes, num_nodes - 1)); - } - } -} - -TEST_P(SpanningTreeIteratorTest, OneNodeDigraph) { - ExpectTrees(1, {{0}}); -} - -TEST_P(SpanningTreeIteratorTest, TwoNodeDigraph) { - if (GetParam()) { // forest - ExpectTrees(2, {{0, 0}, {0, 1}, {1, 1}}); // {0, 1} is two-root structure - } else { // tree - ExpectTrees(2, {{0, 0}, {1, 1}}); - } -} - -TEST_P(SpanningTreeIteratorTest, ThreeNodeDigraph) { - if (GetParam()) { // forest - ExpectTrees(3, {{0, 0, 0}, - {0, 0, 1}, - {0, 0, 2}, // 2-root - {0, 1, 0}, // 2-root - {0, 1, 1}, // 2-root - {0, 1, 2}, // 3-root - {0, 2, 0}, - {0, 2, 2}, // 2-root - {1, 1, 0}, - {1, 1, 1}, - {1, 1, 2}, // 2-root - {1, 2, 2}, - {2, 0, 2}, - {2, 1, 1}, - {2, 1, 2}, // 2-root - {2, 2, 2}}); - } else { // tree - ExpectTrees(3, {{0, 0, 0}, - {0, 0, 1}, - {0, 2, 0}, - {1, 1, 0}, - {1, 1, 1}, - {1, 2, 2}, - {2, 0, 2}, - {2, 1, 1}, - {2, 2, 2}}); - } -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc deleted file mode 100644 index b0ab1df..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/split_merge_tokenize_kernel.cc +++ /dev/null
@@ -1,219 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <limits> -#include <memory> -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -namespace { - -// Returns the length (number of bytes) of the UTF8 code point starting at src, -// by reading only the byte from address src. -// -// The result is a number from the set {1, 2, 3, 4}. -int OneCharLen(const char* src) { - // On most platforms, char is unsigned by default, but iOS is an exception. - // The cast below makes sure we always interpret *src as an unsigned char. - return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4" - [(*(reinterpret_cast<const unsigned char*>(src)) & 0xFF) >> 4]; -} - -bool GetUTF8Chars(absl::string_view text, - std::vector<absl::string_view>* chars) { - const char* start = text.data(); - const char* end = text.data() + text.size(); - while (start < end) { - const int char_length = OneCharLen(start); - if (char_length <= 0) { - return false; - } - chars->emplace_back(start, char_length); - start += char_length; - } - return true; -} - -bool IsBreakChar(absl::string_view text) { - UChar32 c; - int position = 0; - U8_NEXT_OR_FFFD(text.data(), position, text.length(), c); - return u_isUWhiteSpace(c); -} - -Status TokenizeByLabel(const absl::string_view& text, - const Tensor& labels_tensor, - bool force_split_at_break_character, - std::vector<std::string>* tokens, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_tokens) { - std::vector<absl::string_view> chars; - if (!GetUTF8Chars(text, &chars)) { - return Status(error::Code::INVALID_ARGUMENT, - absl::StrCat("Input string is not utf8 valid: ", text)); - } - - if (chars.size() > labels_tensor.dim_size(0)) { - return Status(error::Code::INVALID_ARGUMENT, - absl::StrCat("Number of labels ", labels_tensor.dim_size(0), - " is insufficient for text ", text)); - } - - const int split_label = 0; - bool last_character_is_break_character = false; - int start = 0; - bool has_new_token_generated_for_text = false; - const auto& labels = labels_tensor.unaligned_flat<int32>(); - for (int i = 0; i < chars.size(); ++i) { - const bool is_break_character = IsBreakChar(chars[i]); - if (!is_break_character) { - if (labels(i) == split_label || !has_new_token_generated_for_text || - (last_character_is_break_character && - force_split_at_break_character)) { - tokens->emplace_back(chars[i].data(), chars[i].length()); - begin_offset->push_back(start); - end_offset->push_back(start + chars[i].length()); - *num_tokens += 1; - has_new_token_generated_for_text = true; - } else { - tokens->back().append(chars[i].data(), chars[i].length()); - end_offset->back() = start + chars[i].length(); - } - } - - start += chars[i].length(); - last_character_is_break_character = is_break_character; - } - - return Status::OK(); -} - -} // namespace - -class SplitMergeTokenizeWithOffsetsOp : public OpKernel { - public: - explicit SplitMergeTokenizeWithOffsetsOp(OpKernelConstruction* ctx) - : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("force_split_at_break_character", - &force_split_at_break_character_)); - } - - void Compute(OpKernelContext* ctx) override { - const Tensor* input_values; - OP_REQUIRES_OK(ctx, ctx->input("input_values", &input_values)); - - const Tensor* labels; - OP_REQUIRES_OK(ctx, ctx->input("labels", &labels)); - const Tensor* row_splits; - OP_REQUIRES_OK(ctx, ctx->input("row_splits", &row_splits)); - OP_REQUIRES(ctx, input_values->dim_size(0) == row_splits->dim_size(0) - 1, - errors::InvalidArgument( - "Expecting row_splits have ", input_values->dim_size(0) + 1, - " elements, got ", row_splits->dim_size(0))); - - std::vector<string> tokens; - std::vector<int> begin_offset; - std::vector<int> end_offset; - std::vector<int> output_row_splits(1, 0); - - // Iterate through all the values and tokenize them. - const auto& values_vec = input_values->flat<tstring>(); - const auto& row_splits_vec = row_splits->flat<int32>(); - for (int i = 0; i < values_vec.size(); ++i) { - // Tokenize into tokens and record the offset locations. - int num_tokens = 0; - OP_REQUIRES_OK( - ctx, TokenizeByLabel( - values_vec(i), - labels->Slice(row_splits_vec(i), row_splits_vec(i + 1)), - force_split_at_break_character_, &tokens, &begin_offset, - &end_offset, &num_tokens)); - - // Record the row splits. - output_row_splits.push_back(num_tokens + output_row_splits.back()); - } - - std::vector<int64> output_tokens_shape; - output_tokens_shape.push_back(tokens.size()); - - std::vector<int64> output_row_splits_shape; - output_row_splits_shape.push_back(output_row_splits.size()); - - Tensor* output_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("output_values", - TensorShape(output_tokens_shape), - &output_values)); - auto output_values_vec = output_values->vec<tstring>(); - - Tensor* output_row_splits_tensor; - OP_REQUIRES_OK(ctx, - ctx->allocate_output("output_row_splits", - TensorShape(output_row_splits_shape), - &output_row_splits_tensor)); - auto output_row_splits_vec = output_row_splits_tensor->vec<int64>(); - - Tensor* start_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", - TensorShape(output_tokens_shape), - &start_values)); - auto start_values_vec = start_values->vec<int64>(); - - Tensor* limit_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", - TensorShape(output_tokens_shape), - &limit_values)); - auto limit_values_vec = limit_values->vec<int64>(); - - for (int i = 0; i < tokens.size(); ++i) { - output_values_vec(i) = tokens[i]; - } - - for (int i = 0; i < output_row_splits.size(); ++i) { - output_row_splits_vec(i) = output_row_splits[i]; - } - - for (int i = 0; i < begin_offset.size(); ++i) { - start_values_vec(i) = begin_offset[i]; - } - - for (int i = 0; i < end_offset.size(); ++i) { - limit_values_vec(i) = end_offset[i]; - } - } - - private: - bool force_split_at_break_character_; - - TF_DISALLOW_COPY_AND_ASSIGN(SplitMergeTokenizeWithOffsetsOp); -}; - -REGISTER_KERNEL_BUILDER( - Name("SplitMergeTokenizeWithOffsets").Device(DEVICE_CPU), - SplitMergeTokenizeWithOffsetsOp); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc deleted file mode 100644 index f0f5e99..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.cc +++ /dev/null
@@ -1,74 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -using ::testing::MakeMatcher; -using ::testing::Matcher; -using ::testing::MatchResultListener; - -namespace tensorflow { -namespace text_kernels_test_util { - -bool TensorEqMatcher::MatchAndExplain( - Tensor actual, - ::testing::MatchResultListener* listener) const { - string expect_values = expect_.SummarizeValue(expect_.NumElements()); - string actual_values = actual.SummarizeValue(actual.NumElements()); - if (expect_.dtype() != actual.dtype() || expect_.shape() != actual.shape() || - expect_values != actual_values) { - *listener << "\n dtype=" << DataTypeString(actual.dtype()); - *listener << "\n shape=" << actual.shape().DebugString(); - *listener << "\n values=" << actual_values; - return false; - } - return true; -} - -void TensorEqMatcher::DescribeTo(::std::ostream* gmock_os) const { - *gmock_os << "dtype=" << DataTypeString(expect_.dtype()) - << "\n shape=" << expect_.shape().DebugString() - << "\n values=" - << expect_.SummarizeValue(expect_.NumElements()); -} - -void TensorEqMatcher::DescribeNegationTo(::std::ostream* gmock_os) const { - *gmock_os << "is not equal to " << expect_.DebugString(); -} - -bool TensorHasShapeMatcher::MatchAndExplain( - Tensor actual, - ::testing::MatchResultListener* listener) const { - if (expect_ != actual.shape()) { - *listener << "\n shape=" << actual.shape().DebugString(); - return false; - } - return true; -} - -void TensorHasShapeMatcher::DescribeTo(::std::ostream* gmock_os) const { - *gmock_os << "shape=" << expect_.DebugString(); -} - -void TensorHasShapeMatcher::DescribeNegationTo(::std::ostream* gmock_os) const { - *gmock_os << "shape!=" << expect_.DebugString(); -} - -Matcher<Tensor> TensorHasShape(const TensorShape& shape) { - // MakeMatcher takes ownership of the TensorHasShapeMatcher. - return MakeMatcher(new TensorHasShapeMatcher(shape)); -} - -} // namespace text_kernels_test_util -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h deleted file mode 100644 index 89b885b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/text_kernels_test_util.h +++ /dev/null
@@ -1,124 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// GMock matchers for testing text kernels: -// TensorHasShapeAndValues<DTYPE>({dim1, ..., dimN}, {v1, v2, ..., vN}); -// VectorEq<DTYPE>({v1, v2, ..., vN}); -// MatrixEq<DTYPE>({{v1_1, ..., v1_M}, ..., {vN_1, ..., vN_M}}); -// TensorHasShape({dim1, ..., dimN}); - -#ifndef TENSORFLOW_TEXT_CORE_KERNELS_TEXT_KERNELS_TEST_UTIL_H_ -#define TENSORFLOW_TEXT_CORE_KERNELS_TEXT_KERNELS_TEST_UTIL_H_ - -#include <gmock/gmock.h> -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_testutil.h" - -namespace tensorflow { -namespace text_kernels_test_util { - -// GMock MatcherInterface for testing tensor equality. -class TensorEqMatcher : public ::testing::MatcherInterface<Tensor> { - public: - explicit TensorEqMatcher(const Tensor& expect) : expect_(expect) {} - bool MatchAndExplain(Tensor actual, - ::testing::MatchResultListener* listener) const override; - void DescribeTo(::std::ostream* gmock_os) const override; - void DescribeNegationTo(::std::ostream* gmock_os) const override; - - private: - Tensor expect_; -}; - -// GMock MatcherInterface for testing tensor shapes. -class TensorHasShapeMatcher : public ::testing::MatcherInterface<Tensor> { - public: - explicit TensorHasShapeMatcher(const TensorShape& expect) : expect_(expect) {} - bool MatchAndExplain(Tensor actual, - ::testing::MatchResultListener* listener) const override; - void DescribeTo(::std::ostream* gmock_os) const override; - void DescribeNegationTo(::std::ostream* gmock_os) const override; - - private: - TensorShape expect_; -}; - -// Returns a gmock matcher that checks whether a given tensor has the specified -// dtype, values, and shape. dtype is specified using the template parameter. -// values are specified as a flattened vector. -// Example: -// EXPECT_THAT(*GetOutput(0), -// TensorHasShapeAndValues<int64>({3, 2}, {1, 2, 3, 4, 5, 6}); -template <typename DTYPE> -::testing::Matcher<Tensor> TensorHasShapeAndValues( - const TensorShape& shape, - const std::vector<DTYPE>& values) { - Tensor expect = test::AsTensor<DTYPE>(values, shape); - // MakeMatcher takes ownership of the TensorEqMatcher. - return ::testing::MakeMatcher(new TensorEqMatcher(expect)); -} - -// Returns a gmock matcher that checks whether a given tensor is a 1-D tensor -// with the specified dtype and values. dtype is specified using the template -// parameter. -// Example: -// EXPECT_THAT(*GetOutput(0), -// VectorEq<int64>({1, 2, 3, 4, 5, 6}); -template <typename DTYPE> -::testing::Matcher<Tensor> VectorEq(const std::vector<DTYPE>& values) { - int64 nvals = values.size(); - Tensor expect = test::AsTensor<DTYPE>(values, {nvals}); - // MakeMatcher takes ownership of the TensorEqMatcher. - return ::testing::MakeMatcher(new TensorEqMatcher(expect)); -} - -// Returns a gmock matcher that checks whether a given tensor is a 2-D tensor -// with the specified dtype and values. dtype is specified using the template -// parameter. values are specified as a nested vector. All rows of the values -// vector must have the same length. The values vector may not be empty, -// since we can't infer the number of columns for an empty matrix; to test -// empty matrices, use the more general TensorHasShapeAndValues() instead. -// Example: -// EXPECT_THAT(*GetOutput(0), -// MatrixEq<int64>({{1, 2, 3}, {4, 5, 6}}); -template <typename DTYPE> -::testing::Matcher<Tensor> MatrixEq( - const std::vector<std::vector<DTYPE>>& values) { - int64 nrows = values.size(); - CHECK_GT(nrows, 0) // Crash OK - << "Invalid use of MatrixEq: to test empty matrices, use " - << "TensorHasShapeAndValues<dtype>{{0, ndims}, {}} instead."; - int64 ncols = values[0].size(); - std::vector<DTYPE> flat; - for (const auto& row : values) { - CHECK_EQ(ncols, row.size()) // Crash OK - << "Invalid use of MatrixEq: all rows must have equal length"; - flat.insert(flat.end(), row.begin(), row.end()); - } - Tensor expect = test::AsTensor<DTYPE>(flat, TensorShape({nrows, ncols})); - // MakeMatcher takes ownership of the TensorEqMatcher. - return ::testing::MakeMatcher(new TensorEqMatcher(expect)); -} - -// Returns a gmock matcher that checks whether a given tensor has a specified -// shape. -// Example: -// EXPECT_THAT(*GetOutput(0), TensorHasShape({2, 8}); -::testing::Matcher<Tensor> TensorHasShape(const TensorShape& shape); - -} // namespace text_kernels_test_util -} // namespace tensorflow - -#endif // TENSORFLOW_TEXT_CORE_KERNELS_TEXT_KERNELS_TEST_UTIL_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc deleted file mode 100644 index 65099251e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/tokenizer_from_logits_kernel.cc +++ /dev/null
@@ -1,227 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <limits> -#include <memory> -#include <string> -#include <vector> - -#include "absl/strings/str_cat.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -namespace { - -// Returns the length (number of bytes) of the UTF8 code point starting at src, -// by reading only the byte from address src. -// -// The result is a number from the set {1, 2, 3, 4}. -int OneCharLen(const char* src) { - // On most platforms, char is unsigned by default, but iOS is an exception. - // The cast below makes sure we always interpret *src as an unsigned char. - return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4" - [(*(reinterpret_cast<const unsigned char*>(src)) & 0xFF) >> 4]; -} - -bool GetUTF8Chars(absl::string_view text, - std::vector<absl::string_view>* chars) { - const char* start = text.data(); - const char* end = text.data() + text.size(); - while (start < end) { - const int char_length = OneCharLen(start); - if (char_length <= 0) { - return false; - } - chars->emplace_back(start, char_length); - start += char_length; - } - return true; -} - -bool IsBreakChar(absl::string_view text) { - UChar32 c; - int position = 0; - U8_NEXT_OR_FFFD(text.data(), position, text.length(), c); - return u_isUWhiteSpace(c); -} - -// Tokenizes text, the input string #(batch_index). Knowing the batch_index -// allows us to retrieve the corresponding data from logits. I.e., the logits -// for the i-th character from text are logits(batch_index, i, 0) (for the -// "split" action) and logits(batch_index, i, 1) (for the "merge" action). -Status TokenizeByLogits(const absl::string_view& text, - const TTypes<const float, 3>::Tensor& logits, - int batch_index, - bool force_split_at_break_character, - std::vector<std::string>* tokens, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_tokens) { - std::vector<absl::string_view> chars; - if (!GetUTF8Chars(text, &chars)) { - return Status(error::Code::INVALID_ARGUMENT, - absl::StrCat("Input string is not utf8 valid: ", text)); - } - - if (chars.size() > logits.dimension(1)) { - return Status(error::Code::INVALID_ARGUMENT, - absl::StrCat("Number of logits, ", logits.dimension(1), - ", is insufficient for text \"", text, "\"")); - } - - bool last_character_is_break_character = false; - int start = 0; - bool has_new_token_generated_for_text = false; - for (int i = 0; i < chars.size(); ++i) { - const bool is_break_character = IsBreakChar(chars[i]); - if (!is_break_character) { - const float logit_split = logits(batch_index, i, 0); - const float logit_merge = logits(batch_index, i, 1); - if ((logit_split > logit_merge) || !has_new_token_generated_for_text || - (last_character_is_break_character && - force_split_at_break_character)) { - tokens->emplace_back(chars[i].data(), chars[i].length()); - begin_offset->push_back(start); - end_offset->push_back(start + chars[i].length()); - *num_tokens += 1; - has_new_token_generated_for_text = true; - } else { - tokens->back().append(chars[i].data(), chars[i].length()); - end_offset->back() = start + chars[i].length(); - } - } - - start += chars[i].length(); - last_character_is_break_character = is_break_character; - } - - return Status::OK(); -} - -} // namespace - -class TokenizerFromLogitsOp : public OpKernel { - public: - explicit TokenizerFromLogitsOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - - void Compute(OpKernelContext* ctx) override { - const Tensor* strings; - OP_REQUIRES_OK(ctx, ctx->input("strings", &strings)); - const Tensor* logits; - OP_REQUIRES_OK(ctx, ctx->input("logits", &logits)); - OP_REQUIRES(ctx, strings->dim_size(0) == logits->dim_size(0), - errors::InvalidArgument("Expecting logits to have ", - strings->dim_size(0), " rows, got ", - logits->dim_size(0))); - const Tensor* force_split_at_break_character; - OP_REQUIRES_OK(ctx, ctx->input("force_split_at_break_character", - &force_split_at_break_character)); - const bool force_split_at_break_character_bool = - force_split_at_break_character->scalar<bool>()(); - - std::vector<string> tokens; - std::vector<int> begin_offset; - std::vector<int> end_offset; - std::vector<int> output_row_splits(1, 0); - - // Tensor to access values from logits. - const TTypes<const float, 3>::Tensor logits_tensor = - logits->tensor<float, 3>(); - - // Iterate through all the values and tokenize them. - const auto& strings_vec = strings->flat<tstring>(); - OP_REQUIRES(ctx, logits_tensor.dimension(0) >= strings_vec.size(), - errors::Internal( - "Bad logits dimension #0: ", logits_tensor.dimension(0), - " < ", strings_vec.size())); - // Dimension #1 of logits will be checked inside TokenizeByLogits. - OP_REQUIRES(ctx, logits_tensor.dimension(2) == 2, - errors::Internal("Bad logits dimension #2: ", - logits_tensor.dimension(2), " != 2")); - for (int i = 0; i < strings_vec.size(); ++i) { - // Tokenize into tokens and record the offset locations. - int num_tokens = 0; - OP_REQUIRES_OK( - ctx, TokenizeByLogits(strings_vec(i), logits_tensor, i, - force_split_at_break_character_bool, &tokens, - &begin_offset, &end_offset, &num_tokens)); - - // Record the row splits. - output_row_splits.push_back(num_tokens + output_row_splits.back()); - } - - std::vector<int64> output_tokens_shape; - output_tokens_shape.push_back(tokens.size()); - - std::vector<int64> output_row_splits_shape; - output_row_splits_shape.push_back(output_row_splits.size()); - - Tensor* output_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("output_values", - TensorShape(output_tokens_shape), - &output_values)); - auto output_values_vec = output_values->vec<tstring>(); - - Tensor* output_row_splits_tensor; - OP_REQUIRES_OK(ctx, ctx->allocate_output( - "row_splits", TensorShape(output_row_splits_shape), - &output_row_splits_tensor)); - auto output_row_splits_vec = output_row_splits_tensor->vec<int64>(); - - Tensor* start_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", - TensorShape(output_tokens_shape), - &start_values)); - auto start_values_vec = start_values->vec<int64>(); - - Tensor* limit_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", - TensorShape(output_tokens_shape), - &limit_values)); - auto limit_values_vec = limit_values->vec<int64>(); - - for (int i = 0; i < tokens.size(); ++i) { - output_values_vec(i) = tokens[i]; - } - - for (int i = 0; i < output_row_splits.size(); ++i) { - output_row_splits_vec(i) = output_row_splits[i]; - } - - for (int i = 0; i < begin_offset.size(); ++i) { - start_values_vec(i) = begin_offset[i]; - } - - for (int i = 0; i < end_offset.size(); ++i) { - limit_values_vec(i) = end_offset[i]; - } - } - - private: - TF_DISALLOW_COPY_AND_ASSIGN(TokenizerFromLogitsOp); -}; - -REGISTER_KERNEL_BUILDER(Name("TokenizerFromLogits").Device(DEVICE_CPU), - TokenizerFromLogitsOp); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc deleted file mode 100644 index e4287916..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel.cc +++ /dev/null
@@ -1,189 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string.h> - -#include <vector> - -#include "icu4c/source/common/unicode/errorcode.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/uscript.h" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/lookup_interface.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/macros.h" - -namespace tensorflow { -namespace text { - -template <typename SPLITS_TYPE> -class UnicodeScriptTokenizeWithOffsetsOp : public OpKernel { - public: - explicit UnicodeScriptTokenizeWithOffsetsOp(OpKernelConstruction* ctx) - : OpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("keep_whitespace", &keep_whitespace_)); - } - - /** - * Breaks a series of codepoints into individual groups based on the script - * code as defined by ICU. - * - * We gain a dimension while tokenizing since a series of integer codepoints - * is tokenized into different codepoint groups. - * - * This accepts two input tensors: a rank 1 tensor of codepoint values and - * a single rank 1 tensor of splits which determine where each string begins - * and ends from the provided codepoints. - */ - void Compute(OpKernelContext* context) override { - // Get inputs - const Tensor& input_values_tensor = context->input(0); - const auto input_values_flat = input_values_tensor.flat<int32>(); - const Tensor& input_splits_tensor = context->input(1); - const auto input_splits_flat = input_splits_tensor.flat<SPLITS_TYPE>(); - - // Since we limit to a 2-D input (flat_values of rank 1 and a single splits - // tensor), our output dimension will always be 3-D (flat_values of rank 1 - // with two splits - inner for the tokenized values and the outer for those - // grouped by the original strings). - // A few things to note: - // 1) The values and inner splits of the tokenized strings have an unknown - // length, as well as the offsets, so we allocate them at the end. - // 2) The outer splits of the tokenized strings matches that of the offset - // splits. Thus, we will only return one set and use it for all of them. - // 3) The outer splits shape will match the original input_splits. - Tensor* output_outer_splits_tensor; - OP_REQUIRES_OK(context, - context->allocate_output("output_outer_splits", - input_splits_tensor.shape(), - &output_outer_splits_tensor)); - auto output_outer_splits_flat = - output_outer_splits_tensor->flat<SPLITS_TYPE>(); - - std::vector<int32> output_values; - std::vector<SPLITS_TYPE> output_values_inner_splits; - std::vector<int64> output_offset_starts; - std::vector<int64> output_offset_limits; - - // Loop over the codepoints (a split at a time) and create splits of tokens. - icu::ErrorCode status; - for (int splits_idx = 0; splits_idx < input_splits_flat.size() - 1; - splits_idx++) { - output_outer_splits_flat(splits_idx) = output_offset_starts.size(); - UScriptCode prev_script = USCRIPT_INVALID_CODE; - bool token_has_start_set = false; - int32 curr_skipped_spaces = 0; // Used when computing the end of a token - const int curr_word_start_idx = input_splits_flat(splits_idx); - bool was_space = false; - for (int values_idx = curr_word_start_idx; - values_idx < input_splits_flat(splits_idx + 1); values_idx++) { - const int32 input_value = input_values_flat(values_idx); - const bool is_space = u_isUWhiteSpace(input_value); - UScriptCode script = uscript_getScript(input_value, status); - // Split these failures out as if they are a different code and ignore - // the error. - if (status.isFailure()) { - status.reset(); - script = USCRIPT_INVALID_CODE; - } - // Split out a new token if the unicode script changes from the - // previous token. - if (script != prev_script || - (keep_whitespace_ && is_space != was_space)) { - if (token_has_start_set) { - output_offset_limits.push_back(values_idx - curr_word_start_idx - - curr_skipped_spaces); - } - prev_script = script; - token_has_start_set = false; - } - // Only copy characters other than whitespace. Because of this, also do - // not start new tokens until a character other than a space is reached. - if (!is_space || keep_whitespace_) { - if (!token_has_start_set) { - // Set token start offset relative to current string. - output_offset_starts.push_back(values_idx - curr_word_start_idx); - // Set split to indicate start of a new token. - output_values_inner_splits.push_back(output_values.size()); - token_has_start_set = true; - } - output_values.push_back(input_value); - } - if (!keep_whitespace_) { - if (is_space) { - curr_skipped_spaces++; - } else { - curr_skipped_spaces = 0; - } - } - was_space = is_space; - } - // Looping through the codepoints for current tokens complete. Now set the - // last limit of out last token (if we found a start earlier). - if (token_has_start_set) { - output_offset_limits.push_back(input_splits_flat(splits_idx + 1) - - curr_word_start_idx - - curr_skipped_spaces); - } - } - // Now set the closing value of our splits. - output_outer_splits_flat(input_splits_flat.size() - 1) = - output_offset_starts.size(); - output_values_inner_splits.push_back(output_values.size()); - -// Allocate output & fill output tensors. -#define DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(name, dtype) \ - int64 name##_size = name.size(); \ - Tensor* name##_tensor = nullptr; \ - OP_REQUIRES_OK(context, \ - context->allocate_output(#name, TensorShape({name##_size}), \ - &name##_tensor)); \ - auto name##_data = name##_tensor->flat<dtype>().data(); \ - memcpy(name##_data, name.data(), name##_size * sizeof(dtype)); - - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values_inner_splits, - SPLITS_TYPE); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64); - -#undef DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR - } - - private: - bool keep_whitespace_; - - TF_DISALLOW_COPY_AND_ASSIGN(UnicodeScriptTokenizeWithOffsetsOp); -}; - -REGISTER_KERNEL_BUILDER(Name("UnicodeScriptTokenizeWithOffsets") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("Tsplits"), - UnicodeScriptTokenizeWithOffsetsOp<int32>); -REGISTER_KERNEL_BUILDER(Name("UnicodeScriptTokenizeWithOffsets") - .Device(DEVICE_CPU) - .TypeConstraint<int64>("Tsplits"), - UnicodeScriptTokenizeWithOffsetsOp<int64>); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc deleted file mode 100644 index ec712e8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/unicode_script_tokenize_kernel_test.cc +++ /dev/null
@@ -1,68 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { -namespace text { - -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::VectorEq; - -class UnicodeScriptTokenizeWithOffsetsKernelTest - : public tensorflow::OpsTestBase { - public: - void MakeOp() { - TF_ASSERT_OK(NodeDefBuilder("tested_op", "UnicodeScriptTokenizeWithOffsets") - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -TEST_F(UnicodeScriptTokenizeWithOffsetsKernelTest, Test) { - MakeOp(); - AddInputFromArray<int32>(TensorShape({6}), {111, 112, 32, 116, 117, 118}); - AddInputFromArray<int64>(TensorShape({3}), {0, 4, 6}); - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_values({111, 112, 116, 117, 118}); - std::vector<int64> expected_values_inner_splits({0, 2, 3, 5}); - std::vector<int64> expected_offset_starts({0, 3, 0}); - std::vector<int64> expected_offset_limits({2, 4, 2}); - std::vector<int64> output_outer_splits({0, 2, 3}); - EXPECT_THAT(*GetOutput(0), VectorEq(expected_values)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_values_inner_splits)); - EXPECT_THAT(*GetOutput(2), VectorEq(expected_offset_starts)); - EXPECT_THAT(*GetOutput(3), VectorEq(expected_offset_limits)); - EXPECT_THAT(*GetOutput(4), VectorEq(output_outer_splits)); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc deleted file mode 100644 index 9545d67e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel.cc +++ /dev/null
@@ -1,161 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string.h> - -#include <vector> - -#include "icu4c/source/common/unicode/uchar.h" -#include "tensorflow/core/framework/kernel_def_builder.h" -#include "tensorflow/core/framework/lookup_interface.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_types.h" -#include "tensorflow/core/framework/types.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow/core/platform/macros.h" - -namespace tensorflow { -namespace text { - -template <typename SPLITS_TYPE> -class WhitespaceTokenizeWithOffsetsOp : public OpKernel { - public: - explicit WhitespaceTokenizeWithOffsetsOp(OpKernelConstruction* ctx) - : OpKernel(ctx) {} - - /** - * Breaks a series of codepoints into individual groups based on the script - * code. - * - * We gain a dimension while tokenizing since a series of integer codepoints - * is tokenized into different codepoint groups. - * - * This accepts two input tensors: a rank 1 tensor of codepoint values and - * a single rank 1 tensor of splits which determine where each string begins - * and ends from the provided codepoints. - */ - void Compute(OpKernelContext* context) override { - // Get inputs - const Tensor& input_values_tensor = context->input(0); - const auto input_values_flat = input_values_tensor.flat<int32>(); - const Tensor& input_splits_tensor = context->input(1); - const auto input_splits_flat = input_splits_tensor.flat<SPLITS_TYPE>(); - - // Since we limit to a 2-D input (flat_values of rank 1 and a single splits - // tensor), our output dimension will always be 3-D (flat_values of rank 1 - // with two splits - inner for the tokenized values and the outer for those - // grouped by the original strings). - // A few things to note: - // 1) The values and inner splits of the tokenized strings have an unknown - // length, as well as the offsets, so we allocate them at the end. - // 2) The outer splits of the tokenized strings matches that of the offset - // splits. Thus, we will only return one set and use it for all of them. - // 3) The outer splits shape will match the original input_splits. - Tensor* output_outer_splits_tensor; - OP_REQUIRES_OK(context, - context->allocate_output("output_outer_splits", - input_splits_tensor.shape(), - &output_outer_splits_tensor)); - auto output_outer_splits_flat = - output_outer_splits_tensor->flat<SPLITS_TYPE>(); - - std::vector<int32> output_values; - std::vector<SPLITS_TYPE> output_values_inner_splits; - std::vector<int64> output_offset_starts; - std::vector<int64> output_offset_limits; - - // Loop over the codepoints (a split at a time) and create splits of tokens. - for (int splits_idx = 0; splits_idx < input_splits_flat.size() - 1; - splits_idx++) { - output_outer_splits_flat(splits_idx) = output_offset_starts.size(); - bool token_has_start_set = false; - int32 curr_skipped_spaces = 0; // Used when computing the end of a token - const int curr_word_start_idx = input_splits_flat(splits_idx); - for (int values_idx = curr_word_start_idx; - values_idx < input_splits_flat(splits_idx + 1); values_idx++) { - // End current token if we find whitespace - if (u_isUWhiteSpace(input_values_flat(values_idx))) { - if (token_has_start_set) { - output_offset_limits.push_back(values_idx - curr_word_start_idx - - curr_skipped_spaces); - } - token_has_start_set = false; - ++curr_skipped_spaces; - } else { - // Non whitespace. Start a new token if needed, and append the - // codepoint to our current token. - if (!token_has_start_set) { - // Set token start offset relative to current string. - output_offset_starts.push_back(values_idx - curr_word_start_idx); - // Set split to indicate start of a new token. - output_values_inner_splits.push_back(output_values.size()); - token_has_start_set = true; - } - output_values.push_back(input_values_flat(values_idx)); - curr_skipped_spaces = 0; - } - } - // Looping through the codepoints for current tokens complete. Now set the - // last limit of out last token (if we found a start earlier). - if (token_has_start_set) { - output_offset_limits.push_back(input_splits_flat(splits_idx + 1) - - curr_word_start_idx - - curr_skipped_spaces); - } - } - // Now set the closing value of our splits. - output_outer_splits_flat(input_splits_flat.size() - 1) = - output_offset_starts.size(); - output_values_inner_splits.push_back(output_values.size()); - -// Allocate output & fill output tensors. -#define DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(name, dtype) \ - int64 name##_size = name.size(); \ - Tensor* name##_tensor = nullptr; \ - OP_REQUIRES_OK(context, \ - context->allocate_output(#name, TensorShape({name##_size}), \ - &name##_tensor)); \ - auto name##_data = name##_tensor->flat<dtype>().data(); \ - memcpy(name##_data, name.data(), name##_size * sizeof(dtype)); - - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values, int32); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_values_inner_splits, - SPLITS_TYPE); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_starts, int64); - DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR(output_offset_limits, int64); - -#undef DECLARE_ALLOCATE_AND_FILL_OUTPUT_TENSOR - } - - private: - TF_DISALLOW_COPY_AND_ASSIGN(WhitespaceTokenizeWithOffsetsOp); -}; - -REGISTER_KERNEL_BUILDER(Name("WhitespaceTokenizeWithOffsets") - .Device(DEVICE_CPU) - .TypeConstraint<int32>("Tsplits"), - WhitespaceTokenizeWithOffsetsOp<int32>); -REGISTER_KERNEL_BUILDER(Name("WhitespaceTokenizeWithOffsets") - .Device(DEVICE_CPU) - .TypeConstraint<int64>("Tsplits"), - WhitespaceTokenizeWithOffsetsOp<int64>); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc deleted file mode 100644 index 86a3be8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenize_kernel_test.cc +++ /dev/null
@@ -1,67 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <vector> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow_text/core/kernels/text_kernels_test_util.h" - -namespace tensorflow { -namespace text { - -using tensorflow::FakeInput; -using tensorflow::NodeDefBuilder; -using tensorflow::Status; -using tensorflow::TensorShape; -using tensorflow::text_kernels_test_util::VectorEq; - -class WhitespaceTokenizeWithOffsetsKernelTest : public tensorflow::OpsTestBase { - public: - void MakeOp() { - TF_ASSERT_OK(NodeDefBuilder("tested_op", "WhitespaceTokenizeWithOffsets") - .Input(FakeInput()) - .Input(FakeInput()) - .Finalize(node_def())); - TF_ASSERT_OK(InitOp()); - } -}; - -TEST_F(WhitespaceTokenizeWithOffsetsKernelTest, Test) { - MakeOp(); - AddInputFromArray<int32>(TensorShape({6}), {111, 112, 32, 116, 117, 118}); - AddInputFromArray<int64>(TensorShape({3}), {0, 4, 6}); - TF_ASSERT_OK(RunOpKernel()); - - std::vector<int32> expected_values({111, 112, 116, 117, 118}); - std::vector<int64> expected_values_inner_splits({0, 2, 3, 5}); - std::vector<int64> expected_offset_starts({0, 3, 0}); - std::vector<int64> expected_offset_limits({2, 4, 2}); - std::vector<int64> output_outer_splits({0, 2, 3}); - EXPECT_THAT(*GetOutput(0), VectorEq(expected_values)); - EXPECT_THAT(*GetOutput(1), VectorEq(expected_values_inner_splits)); - EXPECT_THAT(*GetOutput(2), VectorEq(expected_offset_starts)); - EXPECT_THAT(*GetOutput(3), VectorEq(expected_offset_limits)); - EXPECT_THAT(*GetOutput(4), VectorEq(output_outer_splits)); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc deleted file mode 100644 index 10aed7da..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.cc +++ /dev/null
@@ -1,86 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer.h" - -#include <string> -#include <vector> - -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/appendable.h" -#include "icu4c/source/common/unicode/bytestream.h" -#include "icu4c/source/common/unicode/edits.h" -#include "icu4c/source/common/unicode/normalizer2.h" -#include "icu4c/source/common/unicode/schriter.h" -#include "icu4c/source/common/unicode/stringoptions.h" -#include "icu4c/source/common/unicode/stringpiece.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/ucnv.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/uniset.h" -#include "icu4c/source/common/unicode/unistr.h" -#include "icu4c/source/common/unicode/uset.h" -#include "icu4c/source/common/unicode/utf.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "icu4c/source/common/unicode/utypes.h" - -namespace tensorflow { -namespace text { - -void WhitespaceTokenizer::Tokenize(const absl::string_view input, - std::vector<std::string>* tokens) { - std::vector<int> start_offsets, end_offsets; - Tokenize(input, tokens, &start_offsets, &end_offsets); -} - -void WhitespaceTokenizer::Tokenize(const absl::string_view input, - std::vector<std::string>* tokens, - std::vector<int>* start_offsets, - std::vector<int>* end_offsets) { - const int input_size = input.size(); - int position = 0, prev_position = 0; - UChar32 codepoint; - bool inside_token = false; - while (position < input_size) { - prev_position = position; - U8_NEXT(input, position, input_size, codepoint); - if (config_.IsWhitespace(codepoint)) { - if (inside_token) { - int end_pos = position - 1; - end_offsets->push_back(end_pos); - int start_pos = start_offsets->back(); - std::string token(input.substr(start_pos, end_pos - start_pos)); - tokens->push_back(token); - inside_token = false; - } - } else { - if (!inside_token) { - start_offsets->push_back(prev_position); - inside_token = true; - } - } - } - // save final word - if (inside_token) { - int end_pos = position; - end_offsets->push_back(end_pos); - int start_pos = start_offsets->back(); - std::string token(input.substr(start_pos, end_pos - start_pos)); - tokens->push_back(token); - } -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h deleted file mode 100644 index 4fd41d5..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer.h +++ /dev/null
@@ -1,112 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_H_ - -#include <string> -#include <vector> - -#include "absl/strings/string_view.h" -#include "icu4c/source/common/unicode/umachine.h" - -namespace tensorflow { -namespace text { - -// Helper class for working with the WhitespaceaTokenizer config. The -// config is essentially a bit array stored in characters, where each bit in -// the char represents a Unicode character and whether or not it is considered -// as whitespace. -// -// This bit array contains all codepoints up to the largest whitespace -// character. So any codepoint larger than the array is not whitespace, and -// a lookup is simply using the codepoint value as the index. The first 3 bits -// of the codepoint indicate which bit in a character is the value located, and -// using the rest of the bits of the codepoint we can determine which -// character the particular codepoint is located at. -class WhitespaceTokenizerConfig { - public: - // This object does not own the config, so make certain it exists for the - // lifetime of the class. - WhitespaceTokenizerConfig(const absl::string_view config) - : config_(config), max_codepoint_(config.length() * 8) {} - WhitespaceTokenizerConfig(const std::string* config) - : config_(*config), max_codepoint_(config->length() * 8) {} - - inline bool IsWhitespace(const UChar32 codepoint) const { - return codepoint <= max_codepoint_ && - config_[codepoint >> 3] & (1 << (char)(codepoint & 0x7)); - } - - private: - const absl::string_view config_; - const int max_codepoint_; -}; - -class WhitespaceTokenizer { - public: - // Creates an instance. - // - // Args: - // * config: A WhitespaceTokenizerConfig which should be created using the - // WhitespaceTokenizerConfigBuilder - WhitespaceTokenizer(const WhitespaceTokenizerConfig& cfg) : config_(cfg) {} - - // Tokenizes a string (or series of character codepoints) by whitespace. - // - // Example: - // input = "Show me the way." - // tokens = ["Show", "me", "the", "way."] - // start_offsets = [0, 5, 8, 12] - // end_offsets = [4, 7, 11, 16] - // - // The input should be UTF-8 but the tokenization is performed on Unicode - // codepoints. - // - // Args: - // * input: The UTF-8 string of an input. - // * tokens: The output tokens. - // * start_offsets: The start offsets of output tokens in the input - // text, in utf-8 bytes. - // * end_offsets: The end offsets of output tokens in the input - // text, in utf-8 bytes. - // Note: the start offsets are inclusive and the end offsets are exclusive. - void Tokenize(const absl::string_view input, - std::vector<std::string>* tokens, - std::vector<int>* start_offsets, - std::vector<int>* end_offsets); - - // Tokenizes a string (or series of character codepoints) by whitespace. - // - // Example: - // input = "Show me the way." - // output = ["Show", "me", "the", "way."] - // - // The input should be UTF-8 but the tokenization is performed on Unicode - // codepoints. - // - // Args: - // * input: The UTF-8 string of an input. - // * tokens: The output tokens. - void Tokenize(const absl::string_view input, - std::vector<std::string>* tokens); - - private: - const WhitespaceTokenizerConfig config_; -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc deleted file mode 100644 index 16c3b93..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.cc +++ /dev/null
@@ -1,76 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - -#include <iterator> -#include <string> - -#include "icu4c/source/common/unicode/appendable.h" -#include "icu4c/source/common/unicode/bytestream.h" -#include "icu4c/source/common/unicode/edits.h" -#include "icu4c/source/common/unicode/normalizer2.h" -#include "icu4c/source/common/unicode/schriter.h" -#include "icu4c/source/common/unicode/stringoptions.h" -#include "icu4c/source/common/unicode/stringpiece.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/ucnv.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/uniset.h" -#include "icu4c/source/common/unicode/unistr.h" -#include "icu4c/source/common/unicode/uset.h" -#include "icu4c/source/common/unicode/utf.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "icu4c/source/common/unicode/utypes.h" - -namespace tensorflow { -namespace text { - -std::string BuildWhitespaceString() { - icu::UnicodeString unicode_string; - icu::UnicodeStringAppendable appendable_unicode_string(unicode_string); - // The maximum codepoint in Unicode is 0x0010FFFF. - for (UChar32 cp = 0; cp <= 0x0010FFFF; ++cp) { - if (U_IS_UNICODE_CHAR(cp) && u_isUWhiteSpace(cp)) { - appendable_unicode_string.appendCodePoint(cp); - } - } - std::string str; - unicode_string.toUTF8String(str); - return str; -} - -std::string BuildWhitespaceTokenizerConfig() { - // The maximum codepoint in Unicode is 0x0010FFFF. - UChar32 max_unicode_char = 0x0010FFFF; - // The string will hold our bit array - std::string bitset((max_unicode_char >> 3) + 1, 0); - auto bitdata = bitset.begin(); - UChar32 largest_whitespace = 0; - int shift = 0; - for (UChar32 cp = 0; cp <= max_unicode_char; ++cp, ++shift) { - if (shift == 8) { - ++bitdata; - shift = 0; - } - bool is_whitespace = U_IS_UNICODE_CHAR(cp) && u_isUWhiteSpace(cp); - largest_whitespace = is_whitespace ? cp : largest_whitespace; - *bitdata |= is_whitespace << shift; - } - return bitset.substr(0, (largest_whitespace >> 3) + 1); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h deleted file mode 100644 index 1d41210..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h +++ /dev/null
@@ -1,43 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_CONFIG_BUILDER_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_CONFIG_BUILDER_H_ - -#include <string> - -namespace tensorflow { -namespace text { - -// Builds a WhitespaceTokenizer config object. This contains the Unicode -// codepoints which are considered whitespaces. -// -// The config object is a series of bytes, where each bit represents a Unicode -// character and is 1 if it is a whitespace character, and 0 otherwise. -// -// Returns: -// The bytes of the config as a string. -std::string BuildWhitespaceTokenizerConfig(); - -// Builds a string full of all the whitespace characters. It is mainly used -// for testing and validation. -// -// Returns: -// A string of Unicode whitespace characters. -std::string BuildWhitespaceString(); - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_CONFIG_BUILDER_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder_test.cc deleted file mode 100644 index e1f789c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_config_builder_test.cc +++ /dev/null
@@ -1,89 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - -#include <string> - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "icu4c/source/common/unicode/appendable.h" -#include "icu4c/source/common/unicode/bytestream.h" -#include "icu4c/source/common/unicode/edits.h" -#include "icu4c/source/common/unicode/normalizer2.h" -#include "icu4c/source/common/unicode/schriter.h" -#include "icu4c/source/common/unicode/stringoptions.h" -#include "icu4c/source/common/unicode/stringpiece.h" -#include "icu4c/source/common/unicode/uchar.h" -#include "icu4c/source/common/unicode/ucnv.h" -#include "icu4c/source/common/unicode/ucnv_err.h" -#include "icu4c/source/common/unicode/umachine.h" -#include "icu4c/source/common/unicode/uniset.h" -#include "icu4c/source/common/unicode/unistr.h" -#include "icu4c/source/common/unicode/uset.h" -#include "icu4c/source/common/unicode/utf.h" -#include "icu4c/source/common/unicode/utf8.h" -#include "icu4c/source/common/unicode/utypes.h" -#include "tensorflow/core/platform/types.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer.h" - -namespace tensorflow { -namespace text { -namespace { - -TEST(WhitespaceTokenizerConfigBuilderTest, BuildWhitespaceString) { - std::string result = BuildWhitespaceString(); - EXPECT_THAT(result, ::testing::HasSubstr(" ")); - EXPECT_THAT(result, ::testing::HasSubstr("\n")); -} - -TEST(WhitespaceTokenizerConfigBuilderTest, - BuildWhitespaceTokenizerConfig_AllWhitespacePresent) { - std::string whitespaces = BuildWhitespaceString(); - icu::UnicodeString codepoints = icu::UnicodeString::fromUTF8(whitespaces); - std::string config = BuildWhitespaceTokenizerConfig(); - // verify all whitepaces are present - WhitespaceTokenizerConfig cfg(config); - for (int i = 0; i < codepoints.length(); ++i) { - EXPECT_TRUE(cfg.IsWhitespace(codepoints[i])); - } -} - -TEST(WhitespaceTokenizerConfigBuilderTest, - BuildWhitespaceTokenizerConfig_MinSize) { - std::string whitespaces = BuildWhitespaceString(); - icu::UnicodeString codepoints = icu::UnicodeString::fromUTF8(whitespaces); - std::string config = BuildWhitespaceTokenizerConfig(); - // verify we are the minimum perfect hash - auto largest_cp = codepoints[codepoints.length() - 1]; - EXPECT_EQ(config.length(), (largest_cp / 8) + 1); -} - -TEST(WhitespaceTokenizerConfigBuilderTest, - BuildWhitespaceTokenizerConfig_VerifyCount) { - std::string whitespaces = BuildWhitespaceString(); - icu::UnicodeString codepoints = icu::UnicodeString::fromUTF8(whitespaces); - std::string config = BuildWhitespaceTokenizerConfig(); - // verify we have the correct number of true values (rest will be false) - int count = 0; - WhitespaceTokenizerConfig cfg(config); - for (int i = 0; i < config.length() * 8; ++i) { - count += cfg.IsWhitespace(i) ? 1 : 0; - } - EXPECT_EQ(count, codepoints.length()); -} - -} // namespace -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.cc deleted file mode 100644 index bda41c3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.cc +++ /dev/null
@@ -1,27 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer_kernel.h" - -#include "tensorflow/core/framework/op_kernel.h" - -namespace tensorflow { -namespace text { - -REGISTER_KERNEL_BUILDER(Name(WhitespaceTokenizeWithOffsetsV2OpKernel::OpName()) - .Device(tensorflow::DEVICE_CPU), - WhitespaceTokenizeWithOffsetsV2OpKernel); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.h deleted file mode 100644 index 4dff723..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel.h +++ /dev/null
@@ -1,33 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZE_KERNEL_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZE_KERNEL_H_ - -#include "tensorflow/lite/kernels/shim/tf_op_shim.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h" - -namespace tensorflow { -namespace text { - -class WhitespaceTokenizeWithOffsetsV2OpKernel - : public tflite::shim::TfOpKernel<WhitespaceTokenizeWithOffsetsV2Op> { - public: - using TfOpKernel::TfOpKernel; -}; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZE_KERNEL_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h deleted file mode 100644 index 6a8b7c0..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h +++ /dev/null
@@ -1,220 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_KERNEL_TEMPLATE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_KERNEL_TEMPLATE_H_ - -#include <iostream> -#include <vector> - -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/lite/kernels/shim/op_kernel.h" -#include "tensorflow/lite/kernels/shim/shape.h" -#include "tensorflow/lite/kernels/shim/status_macros.h" -#include "tensorflow/lite/kernels/shim/tensor_view.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer.h" - -namespace tensorflow { -namespace text { - -template <tflite::shim::Runtime Rt> -class WhitespaceTokenizeWithOffsetsV2Op - : public tflite::shim::OpKernelShim<WhitespaceTokenizeWithOffsetsV2Op, Rt> { - private: - enum Inputs { kInputValues = 0, kInputConfig }; - enum Outputs { - kOutputTokens = 0, - kOutputRowSplits, - kOutputStartOffsets, - kOutputEndOffsets - }; - - using typename tflite::shim::OpKernelShim<WhitespaceTokenizeWithOffsetsV2Op, - Rt>::InitContext; - using typename tflite::shim::OpKernelShim<WhitespaceTokenizeWithOffsetsV2Op, - Rt>::InvokeContext; - using typename tflite::shim::OpKernelShim<WhitespaceTokenizeWithOffsetsV2Op, - Rt>::ShapeInferenceContext; - - public: - WhitespaceTokenizeWithOffsetsV2Op() = default; - static const char kOpName[]; - static const char kDoc[]; - - // Attributes declaration (syntax: https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Attrs() { return {}; } - - // Inputs declaration (syntax: https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Inputs(); - - // Outputs declaration (syntax: https://www.tensorflow.org/guide/create_op) - static std::vector<std::string> Outputs(); - - // Initializes the op - absl::Status Init(InitContext* context) { return absl::OkStatus(); } - - // Runs the operation - absl::Status Invoke(InvokeContext* context); - - // Shape inference - static absl::Status ShapeInference(ShapeInferenceContext* c); - - protected: - template <typename BufferType, typename DType> - inline absl::Status FillOutputTensor(const std::vector<BufferType>& buffer, - const int index, - InvokeContext* context); -}; - -template <tflite::shim::Runtime Rt> -std::vector<std::string> WhitespaceTokenizeWithOffsetsV2Op<Rt>::Inputs() { - return {"input_values: string", "input_config: string"}; -} - -template <tflite::shim::Runtime Rt> -std::vector<std::string> WhitespaceTokenizeWithOffsetsV2Op<Rt>::Outputs() { - return {"output_tokens: string", "output_row_splits: int64", - "output_start_offsets: int32", "output_end_offsets: int32"}; -} - -template <tflite::shim::Runtime Rt> -absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::ShapeInference( - ShapeInferenceContext* c) { - using tflite::shim::Shape; - const auto input_values_shape_status = c->GetInputShape(kInputValues); - if (!input_values_shape_status.ok()) { - return input_values_shape_status.status(); - } - const Shape& input_values_shape = *input_values_shape_status; - - const auto rank_1_shape = Shape({Shape::kUnknownDim}); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputTokens, rank_1_shape)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputStartOffsets, rank_1_shape)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputEndOffsets, rank_1_shape)); - const int num_splits = Shape::AddDims(1, input_values_shape.Dim(0)); - SH_RETURN_IF_ERROR(c->SetOutputShape(kOutputRowSplits, Shape({num_splits}))); - - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::Invoke( - InvokeContext* context) { - // Inputs - const auto values_statusor = context->GetInput(kInputValues); - if (!values_statusor.ok()) { - return values_statusor.status(); - } - const auto values = (*values_statusor)->template As<tensorflow::tstring, 1>(); - - const auto cfg_statusor = context->GetInput(kInputConfig); - if (!cfg_statusor.ok()) { - return cfg_statusor.status(); - } - const absl::string_view config = - (*cfg_statusor)->template AsScalar<tensorflow::tstring>(); - WhitespaceTokenizer tokenizer(config); - - // Outputs - std::vector<std::string> tokens; - std::vector<int64_t> row_splits; - std::vector<int32_t> start_offsets; - std::vector<int32_t> end_offsets; - - // Iterate through all the values and wordpiece tokenize them. - row_splits.push_back(0); - for (int i = 0; i < values.Dim(0); ++i) { - // Tokenize into subwords and record the offset locations. - const int orig_num_tokens = tokens.size(); - tokenizer.Tokenize(values(i), &tokens, &start_offsets, &end_offsets); - const int delta_num_tokens = tokens.size() - orig_num_tokens; - // Record the row splits. - row_splits.push_back(delta_num_tokens + row_splits.back()); - } - - // Allocate output & fill output tensors. - SH_RETURN_IF_ERROR(FillOutputTensor<std::string, tensorflow::tstring>( - tokens, kOutputTokens, context)); - SH_RETURN_IF_ERROR(FillOutputTensor<int64_t, int64_t>( - row_splits, kOutputRowSplits, context)); - SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>( - start_offsets, kOutputStartOffsets, context)); - SH_RETURN_IF_ERROR(FillOutputTensor<int32_t, int32_t>( - end_offsets, kOutputEndOffsets, context)); - - return absl::OkStatus(); -} - -template <tflite::shim::Runtime Rt> -template <typename BufferType, typename DType> -absl::Status WhitespaceTokenizeWithOffsetsV2Op<Rt>::FillOutputTensor( - const std::vector<BufferType>& buffer, - const int index, - InvokeContext* context) { - SH_ASSIGN_OR_RETURN( - const auto tensorview, - context->GetOutput( - index, tflite::shim::Shape({static_cast<int>(buffer.size())}))); - auto data = tensorview->template As<DType, 1>(); - // TODO(broken): investigate using memcpy like previous WST - for (int i = 0; i < buffer.size(); ++i) - data(i) = buffer.at(i); - return absl::OkStatus(); -} - -// Static member definitions. -// These can be inlined once the toolchain is bumped up to C++17 - -template <tflite::shim::Runtime Rt> -const char WhitespaceTokenizeWithOffsetsV2Op<Rt>::kOpName[] = - "TFText>WhitespaceTokenizeWithOffsetsV2"; - -template <tflite::shim::Runtime Rt> -const char WhitespaceTokenizeWithOffsetsV2Op<Rt>::kDoc[] = R"doc( - Splits a string into tokens based off of Unicode whitespaces. It also returns - the relative byte offsets for each token. - - ### Example: - - ```python - >>> splitter = WhitespaceTokenizer() - >>> tokens, starts, ends = splitter.tokenize_with_offsets("a bb ccc") - >>> print(tokens.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] - ``` - - Args: - input_values: 1D Tensor of strings to tokenize. - input_config: A string representing a WhitespaceTokenizerConfig. - - Returns: - * output_tokens: 1D tensor containing the tokens for all input strings. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * output_row_splits: 1D int tensor with the row splits that allow us to - build RaggedTensors from output_tokens, output_start_offsets, and - output_end_offsets. - * output_start_offsets: 1D tensor containing the inclusive start byte offset - for each token in all input strings. Corresponds 1:1 with output_tokens. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * output_end_offsets: 1D tensor containing the exclusive end byte offset for - each token in all input strings. Corresponds 1:1 with output_tokens. - A 2D RaggedTensor can be constructed from this and output_row_splits. - )doc"; - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_KERNEL_TEMPLATE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc deleted file mode 100644 index e7be52e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_test.cc +++ /dev/null
@@ -1,67 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer.h" - -#include <gmock/gmock.h> -#include <gtest/gtest.h> -#include "absl/flags/flag.h" -#include "absl/status/status.h" -#include "absl/status/statusor.h" -#include "tensorflow/core/platform/env.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - -namespace tensorflow { -namespace text { -namespace { - -using ::testing::ElementsAre; - -TEST(WhitespaceTokenizerTest, TokenizeWithOffsets) { - absl::string_view input("I heard the news today"); - std::vector<std::string> output_tokens; - std::vector<int> output_start_offsets; - std::vector<int> output_end_offsets; - std::string config(BuildWhitespaceTokenizerConfig()); - WhitespaceTokenizer t(&config); - t.Tokenize(input, &output_tokens, &output_start_offsets, &output_end_offsets); - EXPECT_THAT(output_tokens, ElementsAre("I", "heard", "the", "news", "today")); - EXPECT_THAT(output_start_offsets, ElementsAre(0, 2, 8, 12, 17)); - EXPECT_THAT(output_end_offsets, ElementsAre(1, 7, 11, 16, 22)); -} - -TEST(WhitespaceTokenizerTest, Tokenize) { - absl::string_view input("I heard the news today"); - std::vector<std::string> output_tokens; - std::string config = BuildWhitespaceTokenizerConfig(); - WhitespaceTokenizer t(&config); - t.Tokenize(input, &output_tokens); - EXPECT_THAT(output_tokens, ElementsAre("I", "heard", "the", "news", "today")); -} - -TEST(WhitespaceTokenizerTest, Internationalization) { - absl::string_view input("la灯 灯a 瀮b"); - std::vector<std::string> output_tokens; - std::vector<int> output_start_offsets; - std::vector<int> output_end_offsets; - std::string config = BuildWhitespaceTokenizerConfig(); - WhitespaceTokenizer t(&config); - t.Tokenize(input, &output_tokens, &output_start_offsets, &output_end_offsets); - EXPECT_THAT(output_start_offsets, ElementsAre(0, 6, 11)); - EXPECT_THAT(output_end_offsets, ElementsAre(5, 10, 15)); -} - -} // namespace -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.cc deleted file mode 100644 index 3c0e4bf4..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.cc +++ /dev/null
@@ -1,34 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow_text/core/kernels/whitespace_tokenizer_tflite.h" - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/kernels/shim/tflite_op_shim.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_kernel_template.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -extern "C" void AddWhitespaceTokenize(tflite::MutableOpResolver* resolver) { - tflite::shim::TfLiteOpKernel< - tensorflow::text::WhitespaceTokenizeWithOffsetsV2Op>::Add(resolver); -} - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.h deleted file mode 100644 index 99d3e56..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/whitespace_tokenizer_tflite.h +++ /dev/null
@@ -1,33 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_TFLITE_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_TFLITE_H_ - -#include "tensorflow/lite/c/common.h" -#include "tensorflow/lite/mutable_op_resolver.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace text { - -extern "C" void AddWhitespaceTokenize(::tflite::MutableOpResolver* resolver); - -} // namespace text -} // namespace custom -} // namespace ops -} // namespace tflite - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_KERNELS_WHITESPACE_TOKENIZER_TFLITE_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc deleted file mode 100644 index 535c695..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel.cc +++ /dev/null
@@ -1,311 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <limits> -#include <memory> -#include <string> -#include <vector> - -#include "tensorflow/core/framework/dataset_stateful_op_allowlist.h" -#include "tensorflow/core/framework/lookup_interface.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/core/threadpool.h" -#include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/platform/logging.h" -#include "tensorflow_text/core/kernels/wordpiece_tokenizer.h" - -namespace tensorflow { -namespace text { - -namespace { -string GetWordSplitChar(OpKernelConstruction* ctx) { - string suffix_indicator; - ([=](string* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("suffix_indicator", c)); - })(&suffix_indicator); - return suffix_indicator; -} - -int32 GetMaxCharsPerWord(OpKernelConstruction* ctx) { - int32 max_chars_per_word; - ([=](int32* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("max_bytes_per_word", c)); - })(&max_chars_per_word); - return max_chars_per_word; -} - -int32 GetMaxCharsPerToken(OpKernelConstruction* ctx) { - int32 max_chars_per_token; - ([=](int32* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("max_chars_per_token", c)); - })(&max_chars_per_token); - return max_chars_per_token; -} - -bool GetShouldUseUnknownToken(OpKernelConstruction* ctx) { - bool use_unknown_token; - ([=](bool* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("use_unknown_token", c)); - })(&use_unknown_token); - return use_unknown_token; -} - -string GetUnknownToken(OpKernelConstruction* ctx) { - string unknown_token; - ([=](string* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("unknown_token", c)); - })(&unknown_token); - return unknown_token; -} - -bool GetSplitUnknownCharacters(OpKernelConstruction* ctx) { - bool split_unknown_characters; - ([=](bool* c) -> void { - OP_REQUIRES_OK(ctx, ctx->GetAttr("split_unknown_characters", c)); - })(&split_unknown_characters); - return split_unknown_characters; -} - -Status GetTableHandle(const string& input_name, - OpKernelContext* ctx, - string* container, - string* table_handle) { - { - mutex* mu; - TF_RETURN_IF_ERROR(ctx->input_ref_mutex(input_name, &mu)); - mutex_lock l(*mu); - Tensor tensor; - TF_RETURN_IF_ERROR(ctx->mutable_input(input_name, &tensor, true)); - if (tensor.NumElements() != 2) { - return errors::InvalidArgument( - "Lookup table handle must be scalar, but had shape: ", - tensor.shape().DebugString()); - } - auto h = tensor.flat<tstring>(); - *container = h(0); - *table_handle = h(1); - } - return Status::OK(); -} - -// Gets the LookupTable stored in the ctx->resource_manager() with key -// passed by attribute with name input_name, returns null if the table -// doesn't exist. -Status GetLookupTable(const string& input_name, - OpKernelContext* ctx, - lookup::LookupInterface** table) { - string container; - string table_handle; - DataType handle_dtype; - TF_RETURN_IF_ERROR(ctx->input_dtype(input_name, &handle_dtype)); - if (handle_dtype == DT_RESOURCE) { - ResourceHandle handle; - TF_RETURN_IF_ERROR(HandleFromInput(ctx, input_name, &handle)); - return LookupResource(ctx, handle, table); - } else { - TF_RETURN_IF_ERROR( - GetTableHandle(input_name, ctx, &container, &table_handle)); - return ctx->resource_manager()->Lookup(container, table_handle, table); - } -} - -class LookupTableVocab : public WordpieceVocab { - public: - LookupTableVocab(lookup::LookupInterface* table, OpKernelContext* ctx); - - virtual LookupStatus Contains(const absl::string_view key, bool* value) const; - - private: - // not owned - mutable lookup::LookupInterface* table_; - OpKernelContext* ctx_; - Tensor default_value_; -}; - -Status ToStatus(const LookupStatus& status) { - if (status.success) { - return Status::OK(); - } - - return errors::InvalidArgument(status.error_msg); -} - -constexpr int64 kOutOfVocabValue = -1; - -LookupTableVocab::LookupTableVocab(lookup::LookupInterface* table, - OpKernelContext* ctx) - : table_(table), ctx_(ctx), default_value_(DT_INT64, TensorShape({1})) { - default_value_.flat<int64>()(0) = kOutOfVocabValue; -} - -LookupStatus LookupTableVocab::Contains(const absl::string_view key, - bool* value) const { - if (value == nullptr) { - return LookupStatus("Bad 'value' param."); - } - Tensor keys(DT_STRING, TensorShape({1})); - keys.flat<tstring>()(0) = tstring(key.data(), key.size()); - Tensor values(DT_INT64, TensorShape({1})); - auto status = table_->Find(ctx_, keys, &values, default_value_); - if (!status.ok()) - return LookupStatus(status.error_message()); - - if (static_cast<int64>(values.flat<int64>()(0)) != kOutOfVocabValue) { - *value = true; - return LookupStatus::OK(); - } - *value = false; - return LookupStatus::OK(); -} - -} // namespace - -class WordpieceTokenizeWithOffsetsOp : public OpKernel { - public: - explicit WordpieceTokenizeWithOffsetsOp(OpKernelConstruction* ctx) - : OpKernel(ctx), - suffix_indicator_(GetWordSplitChar(ctx)), - max_bytes_per_word_(GetMaxCharsPerWord(ctx)), - max_chars_per_token_(GetMaxCharsPerToken(ctx)), - use_unknown_token_(GetShouldUseUnknownToken(ctx)), - unknown_token_(GetUnknownToken(ctx)), - split_unknown_characters_(GetSplitUnknownCharacters(ctx)) { - string output_row_partition_type; - OP_REQUIRES_OK(ctx, ctx->GetAttr("output_row_partition_type", - &output_row_partition_type)); - if (output_row_partition_type == "row_lengths") { - row_partition_type_ = ROW_LENGTHS; - } else if (output_row_partition_type == "row_splits") { - row_partition_type_ = ROW_SPLITS; - } else { - OP_REQUIRES( - ctx, false, - errors::Internal("Unexpected value for output_row_partition_type")); - } - } - - void Compute(OpKernelContext* ctx) override { - const Tensor* input_values; - OP_REQUIRES_OK(ctx, ctx->input("input_values", &input_values)); - const auto& values_vec = input_values->flat<tstring>(); - - lookup::LookupInterface* lookup_table; - OP_REQUIRES_OK(ctx, - GetLookupTable("vocab_lookup_table", ctx, &lookup_table)); - core::ScopedUnref unref_me(lookup_table); - LookupTableVocab vocab_map(lookup_table, ctx); - - std::vector<string> subwords; - std::vector<int> begin_offset; - std::vector<int> end_offset; - std::vector<int> row_partition; - - if (row_partition_type_ == ROW_SPLITS) { - row_partition.push_back(0); - } - - // Iterate through all the values and wordpiece tokenize them. - for (int i = 0; i < values_vec.size(); ++i) { - // Tokenize into subwords and record the offset locations. - int num_wordpieces = 0; - OP_REQUIRES_OK( - ctx, ToStatus(WordpieceTokenize( - values_vec(i), max_bytes_per_word_, max_chars_per_token_, - suffix_indicator_, use_unknown_token_, unknown_token_, - split_unknown_characters_, &vocab_map, &subwords, - &begin_offset, &end_offset, &num_wordpieces))); - - // Record the row splits. - switch (row_partition_type_) { - case ROW_LENGTHS: - row_partition.push_back(num_wordpieces); - break; - case ROW_SPLITS: - row_partition.push_back(num_wordpieces + row_partition.back()); - break; - } - } - - std::vector<int64> output_subwords_shape; - output_subwords_shape.push_back(subwords.size()); - - std::vector<int64> output_row_partition_shape; - output_row_partition_shape.push_back(row_partition.size()); - - Tensor* output_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("output_values", - TensorShape(output_subwords_shape), - &output_values)); - auto output_values_vec = output_values->vec<tstring>(); - - Tensor* output_row_partition; - OP_REQUIRES_OK(ctx, - ctx->allocate_output("output_row_lengths", - TensorShape(output_row_partition_shape), - &output_row_partition)); - auto output_row_partition_vec = output_row_partition->vec<int64>(); - - Tensor* start_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("start_values", - TensorShape(output_subwords_shape), - &start_values)); - auto start_values_vec = start_values->vec<int64>(); - - Tensor* limit_values; - OP_REQUIRES_OK(ctx, ctx->allocate_output("limit_values", - TensorShape(output_subwords_shape), - &limit_values)); - auto limit_values_vec = limit_values->vec<int64>(); - - for (int i = 0; i < subwords.size(); ++i) { - output_values_vec(i) = subwords[i]; - } - - for (int i = 0; i < row_partition.size(); ++i) { - output_row_partition_vec(i) = row_partition[i]; - } - - for (int i = 0; i < begin_offset.size(); ++i) { - start_values_vec(i) = begin_offset[i]; - } - - for (int i = 0; i < end_offset.size(); ++i) { - limit_values_vec(i) = end_offset[i]; - } - } - - private: - enum RowPartitionType { ROW_LENGTHS, ROW_SPLITS }; - - const string suffix_indicator_; - const int max_bytes_per_word_; - const int max_chars_per_token_; - const bool use_unknown_token_; - const string unknown_token_; - const bool split_unknown_characters_; - RowPartitionType row_partition_type_; - - TF_DISALLOW_COPY_AND_ASSIGN(WordpieceTokenizeWithOffsetsOp); -}; - -REGISTER_KERNEL_BUILDER(Name("WordpieceTokenizeWithOffsets").Device(DEVICE_CPU), - WordpieceTokenizeWithOffsetsOp); -ALLOW_STATEFUL_OP_FOR_DATASET_FUNCTIONS("WordpieceTokenizeWithOffsets"); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc deleted file mode 100644 index 31bf958d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_kernel_test.cc +++ /dev/null
@@ -1,53 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/fake_input.h" -#include "tensorflow/core/framework/node_def_builder.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/shape_inference_testutil.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/kernels/ops_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace { - -TEST(WordpieceTokenizeWithOffsetsOpTest, ShapeFn) { - // WordpieceTokenizeWithOffsets(input_values, vocab_lookup_table) -> - // [output_values, output_row_lengths, start_values, limit_values] - ShapeInferenceTestOp op("WordpieceTokenizeWithOffsets"); - auto& attr = *op.node_def.mutable_attr(); - - attr["output_row_partition_type"].set_s("row_lengths"); - INFER_OK(op, "?;?", "[?];[?];[?];[?]"); - INFER_OK(op, "[?];?", "[?];[d0_0];[?];[?]"); - INFER_OK(op, "[?];[]", "[?];[d0_0];[?];[?]"); - INFER_OK(op, "[5];?", "[?];[d0_0];[?];[?]"); - INFER_OK(op, "[5];[]", "[?];[d0_0];[?];[?]"); - INFER_ERROR("Shape must be rank 1 but is rank 0", op, "[];?"); - INFER_ERROR("Shape must be rank 1 but is rank 2", op, "[1,2];?"); - INFER_ERROR("Shape must be rank 0 but is rank 1", op, "?;[1]"); - - attr["output_row_partition_type"].set_s("row_splits"); - INFER_OK(op, "?;?", "[?];[?];[?];[?]"); - INFER_OK(op, "[?];?", "[?];[?];[?];[?]"); - INFER_OK(op, "[?];[]", "[?];[?];[?];[?]"); - INFER_OK(op, "[5];?", "[?];[6];[?];[?]"); - INFER_OK(op, "[5];[]", "[?];[6];[?];[?]"); -} - -} // namespace -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc index b125daa..74d30ef 100644 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc +++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.cc
@@ -1,4 +1,4 @@ -// Copyright 2021 TF.Text Authors. +// Copyright 2023 TF.Text Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -17,29 +17,22 @@ #include "absl/strings/str_cat.h" #include "absl/strings/str_join.h" #include "absl/strings/string_view.h" -#include "third_party/icu/source/common/unicode/utf8.h" +#include "icu4c/source/common/unicode/utf8.h" namespace tensorflow { namespace text { namespace { -LookupStatus Lookup(int byte_start, - int byte_end, +LookupStatus Lookup(int byte_start, int byte_end, const absl::string_view& token, const std::string& suffix_indicator, - const WordpieceVocab* vocab_map, - bool* in_vocab) { + const WordpieceVocab* vocab_map, bool* in_vocab) { int byte_len = byte_end - byte_start; absl::string_view substr(token.data() + byte_start, byte_len); - std::string lookup_value; - if (byte_start > 0) { - lookup_value = absl::StrCat(suffix_indicator, substr); - } else { - // absl::CopyToString - lookup_value.assign(substr.begin(), substr.end()); - } - return vocab_map->Contains(lookup_value, in_vocab); + return vocab_map->Contains( + byte_start > 0 ? absl::StrCat(suffix_indicator, substr) : substr, + in_vocab); } // Sets byte_end to the longest byte sequence which: @@ -47,15 +40,11 @@ // 2) is in the vocab OR if split_unknown_characters is true, is a single // UTF8 character. // If no match is found, found_match is set to false. -LookupStatus LongestMatchStartingAt(int byte_start, - const absl::string_view& token, - const std::string& suffix_indicator, - const int max_chars_per_subtoken, - bool split_unknown_characters, - const WordpieceVocab* vocab_map, - int* byte_end, - bool* found_match, - bool* match_is_unknown_character) { +LookupStatus LongestMatchStartingAt( + int byte_start, const absl::string_view& token, + const std::string& suffix_indicator, const int max_chars_per_subtoken, + bool split_unknown_characters, const WordpieceVocab* vocab_map, + int* byte_end, bool* found_match, bool* match_is_unknown_character) { *match_is_unknown_character = false; *found_match = false; const char* token_bytes = token.data(); @@ -78,8 +67,7 @@ bool in_vocab; auto status = Lookup(byte_start, byte_ends[i], token, suffix_indicator, vocab_map, &in_vocab); - if (!status.success) - return status; + if (!status.success) return status; if (in_vocab) { *byte_end = byte_ends[i]; *found_match = true; @@ -102,8 +90,7 @@ const std::string& unknown_token, std::vector<std::string>* subwords, std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces) { + std::vector<int>* end_offset, int* num_word_pieces) { begin_offset->push_back(0); if (use_unknown_token) { subwords->push_back(unknown_token); @@ -119,12 +106,9 @@ // When a subword is found, this helper function will add the outputs to // 'subwords', 'begin_offset' and 'end_offset'. -void AddWord(const absl::string_view& token, - int byte_start, - int byte_end, +void AddWord(const absl::string_view& token, int byte_start, int byte_end, const std::string& suffix_indicator, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, std::vector<int>* end_offset) { begin_offset->push_back(byte_start); int len = byte_end - byte_start; @@ -141,10 +125,8 @@ // Adds a single unknown character subword, found when split_unknown_characters // is true. -void AddUnknownCharacter(const absl::string_view& token, - int byte_start, - int byte_end, - const std::string& suffix_indicator, +void AddUnknownCharacter(const absl::string_view& token, int byte_start, + int byte_end, const std::string& suffix_indicator, bool use_unknown_token, const std::string& unknown_token, std::vector<std::string>* subwords, @@ -171,18 +153,13 @@ } } -LookupStatus TokenizeL2RGreedy(const absl::string_view& token, - const int max_bytes_per_token, - const int max_chars_per_subtoken, - const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - bool split_unknown_characters, - const WordpieceVocab* vocab_map, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces) { +LookupStatus TokenizeL2RGreedy( + const absl::string_view& token, const int max_bytes_per_token, + const int max_chars_per_subtoken, const std::string& suffix_indicator, + bool use_unknown_token, const std::string& unknown_token, + bool split_unknown_characters, const WordpieceVocab* vocab_map, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, + std::vector<int>* end_offset, int* num_word_pieces) { std::vector<std::string> candidate_subwords; std::vector<int> candidate_begin_offsets; std::vector<int> candidate_end_offsets; @@ -195,8 +172,7 @@ byte_start, token, suffix_indicator, max_chars_per_subtoken, split_unknown_characters, vocab_map, &byte_end, &found_subword, &match_is_unknown_character); - if (!status.success) - return status; + if (!status.success) return status; if (found_subword) { if (match_is_unknown_character) { AddUnknownCharacter(token, byte_start, byte_end, suffix_indicator, @@ -227,18 +203,13 @@ } // namespace -LookupStatus WordpieceTokenize(const absl::string_view& token, - const int max_bytes_per_token, - const int max_chars_per_subtoken, - const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - bool split_unknown_characters, - const WordpieceVocab* vocab_map, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces) { +LookupStatus WordpieceTokenize( + const absl::string_view& token, const int max_bytes_per_token, + const int max_chars_per_subtoken, const std::string& suffix_indicator, + bool use_unknown_token, const std::string& unknown_token, + bool split_unknown_characters, const WordpieceVocab* vocab_map, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, + std::vector<int>* end_offset, int* num_word_pieces) { int token_len = token.size(); if (token_len > max_bytes_per_token) { begin_offset->push_back(0); @@ -258,16 +229,12 @@ begin_offset, end_offset, num_word_pieces); } -LookupStatus WordpieceTokenize(const absl::string_view& token, - const int max_bytes_per_token, - const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - const WordpieceVocab* vocab_map, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces) { +LookupStatus WordpieceTokenize( + const absl::string_view& token, const int max_bytes_per_token, + const std::string& suffix_indicator, bool use_unknown_token, + const std::string& unknown_token, const WordpieceVocab* vocab_map, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, + std::vector<int>* end_offset, int* num_word_pieces) { return WordpieceTokenize(token, max_bytes_per_token, /* max_chars_per_subtoken= */ 0, suffix_indicator, use_unknown_token, unknown_token,
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h index 464386c..c888aeb 100644 --- a/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h +++ b/third_party/tensorflow-text/src/tensorflow_text/core/kernels/wordpiece_tokenizer.h
@@ -1,4 +1,4 @@ -// Copyright 2021 TF.Text Authors. +// Copyright 2023 TF.Text Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #define TENSORFLOW_TEXT_CORE_KERNELS_WORDPIECE_TOKENIZER_H_ #include <string> +#include <utility> #include <vector> #include "absl/strings/string_view.h" @@ -25,7 +26,7 @@ struct LookupStatus { LookupStatus() : error_msg(""), success(true) {} - LookupStatus(const std::string& msg) : error_msg(msg), success(false) {} + LookupStatus(std::string msg) : error_msg(std::move(msg)), success(false) {} std::string error_msg; bool success; @@ -39,31 +40,22 @@ bool* value) const = 0; }; -LookupStatus WordpieceTokenize(const absl::string_view& token, - const int max_bytes_per_token, - const int max_chars_per_subtoken, - const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - bool split_unknown_characters, - const WordpieceVocab* vocab_map, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces); +LookupStatus WordpieceTokenize( + const absl::string_view& token, const int max_bytes_per_token, + const int max_chars_per_subtoken, const std::string& suffix_indicator, + bool use_unknown_token, const std::string& unknown_token, + bool split_unknown_characters, const WordpieceVocab* vocab_map, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, + std::vector<int>* end_offset, int* num_word_pieces); // As above but with `max_bytes_per_subtoken` unknown, // and split_unknown_characters=false. (For backwards compatability.) -LookupStatus WordpieceTokenize(const absl::string_view& token, - const int max_bytes_per_token, - const std::string& suffix_indicator, - bool use_unknown_token, - const std::string& unknown_token, - const WordpieceVocab* vocab_map, - std::vector<std::string>* subwords, - std::vector<int>* begin_offset, - std::vector<int>* end_offset, - int* num_word_pieces); +LookupStatus WordpieceTokenize( + const absl::string_view& token, const int max_bytes_per_token, + const std::string& suffix_indicator, bool use_unknown_token, + const std::string& unknown_token, const WordpieceVocab* vocab_map, + std::vector<std::string>* subwords, std::vector<int>* begin_offset, + std::vector<int>* end_offset, int* num_word_pieces); } // namespace text } // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/constrained_sequence_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/constrained_sequence_op.cc deleted file mode 100644 index 515153c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/constrained_sequence_op.cc +++ /dev/null
@@ -1,94 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -REGISTER_OP("ConstrainedSequence") - .Attr("Tin: {int32, int64}") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Attr("use_viterbi: bool") - .Attr("use_log_space: bool") - .Attr("use_start_and_end_states: bool") - .Input("scores: float") - .Input("sequence_lengths: Tin") - .Input("allowed_transitions: bool") - .Input("transition_weights: float") - .Output("states: int32") - .Output("states_splits: Tsplits") - - // TODO(b/122968457): Implement a shape function. - .Doc(R"doc( -Constrains a set of predictions based on a set of legal transitions and/or a -set of transition weights, returning the legal sequence that maximizes the -product of the state scores and the transition weights using the chained -conditional random field algorithm. (In case of a tie, the state with a higher -index will be chosen.) - -This op takes in a set of scores and outputs the most likely legal sequence -for each batch element, where the most likely legal sequence is determined by -the optional 'allowed_transitions' and 'transition_weights' tensors. - -The 'allowed_transition' tensor may be omitted; if it is, all sequence states -will be allowed to transition to all other sequence states. If the tensor is -provided it must be of the size [num_states+1][num_states+1]. - -allowed_transitions[i][j] is true if the transition from state i to state -j is allowed for i and j in 0...(num_states). -allowed_transitions[num_states][j] is true if the sequence is allowed to -start from state j. -allowed_transitions[i][num_states] is true if the sequence is allowed to -end on state i. -allowed_transitions[num_states][num_states] is ignored. - -The 'transition_weights' tensor may be omitted; if it is, all transitions will -be weighted with a value of 1.0. If the tensor is provided it must be of the -size [num_states+1][num_states+1]. - -transition_weights[i][j] is the coefficient that a candidate transition score -will be multiplied by if that transition is from state i to state j. -transition_weights[num_states][j] is the coefficient that will be used -if the transition starts with state j. -transition_weights[i][num_states] is the coefficient that will be used -if the final state in the sequence is state i. -transition_weights[num_states][num_states] is ignored. - -This op outputs a RaggedTensor value and splits pair. - -scores: <float>[batch_size, num_steps, |num_states|] A tensor of scores, where - `scores[b, t, s]` is the predicted score for transitioning to state `s` - at step `t` for batch `b`. The |num_states| dimension must correspond - to the num_states attribute for this op. -sequence_lengths: <{int32, int64}>[batch_size] A tensor containing the length - of each sequence in the batch. -allowed_transitions: <bool>[num_states+1, num_states+1] A boolean matrix of - allowed transitions, or an empty matrix '[]' to allow all transitions. -transition_weights: <float>[num_states+1, num_states+1] A float matrix of score - coefficients, or an empty matrix '[]' to weight all transitions equally. -states: <int32>[batch_size, max_sequence_length] OR <int32>[total_num_states] - A set of sequence outputs representing the most likely valid sequences - for each batch. If `output_ragged_tensor` is false, this will be in - [batch_size, max_sequence_length] form; if `output_ragged_tensor` is - true, this will be a RaggedTensor data vector of shape - [total_num_states]. -states_splits: <int64>[batch_size+1] A RaggedTensor splits vector. If - `output_ragged_tensor` is true, then the state sequence for input `i` - is stored in `states[states_splits[i]:states_splits[i+1]]`. If - `output_ragged_tensor` is false, this tensor will be empty and can be - ignored. -)doc"); - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op.cc deleted file mode 100644 index 541892a3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op.cc +++ /dev/null
@@ -1,31 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_FAST_WORDPIECE_TOKENIZER_OP_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_FAST_WORDPIECE_TOKENIZER_OP_H_ - -#include "tensorflow/lite/kernels/shim/tf_op_shim.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_kernel.h" - -namespace tensorflow { -namespace text { - -REGISTER_TF_OP_SHIM(FastWordpieceTokenizeWithOffsetsOpKernel); - -REGISTER_TF_OP_SHIM(FastWordpieceDetokenizeOpKernel); - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_FAST_WORDPIECE_TOKENIZER_OP_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op_test.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op_test.cc deleted file mode 100644 index c950d816..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/fast_wordpiece_tokenizer_op_test.cc +++ /dev/null
@@ -1,56 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/shape_inference_testutil.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/platform/test.h" - -namespace tensorflow { -namespace { - -TEST(FastWordpieceTokenizeWithOffsetsOpTest, ShapeFn) { - // FastWordpieceTokenizeWithOffsets(input_values, wp_model) -> - // [output_values, output_ids, output_row_splits, start_values, - // end_values] - ShapeInferenceTestOp op("FastWordpieceTokenizeWithOffsets"); - - INFER_OK(op, "?;?", "[?];[?];[?];[?];[?]"); - INFER_OK(op, "[?];?", "[?];[?];[?];[?];[?]"); - INFER_OK(op, "[5];?", "[?];[?];[6];[?];[?]"); - INFER_OK(op, "[6];[?]", "[?];[?];[7];[?];[?]"); - INFER_ERROR("Shape must be rank 1", op, "[];?"); - INFER_ERROR("Shape must be rank 1", op, "[1,2];?"); - INFER_ERROR("Shape must be rank 1", op, "?;[]"); - INFER_ERROR("Shape must be rank 1", op, "?;[?,?]"); -} - -TEST(FastWordpieceDetokenizeOpTest, ShapeFn) { - // FastWordpieceTokenizeWithOffsets(input_values, input_row_splits, wp_model) - // -> [output_values] - ShapeInferenceTestOp op("TFText>FastWordpieceDetokenize"); - INFER_OK(op, "?;?;?", "[?]"); - INFER_OK(op, "[?];[?];?", "[?]"); - INFER_OK(op, "[5];[?];?", "[?]"); - INFER_OK(op, "[6];[?];[?]", "[?]"); - INFER_ERROR("Shape must be rank 1", op, "[];?;?"); - INFER_ERROR("Shape must be rank 1", op, "[1,2];?;?"); - INFER_ERROR("Shape must be rank 1", op, "?;[];?"); - INFER_ERROR("Shape must be rank 1", op, "?;[?,?];?"); -} - -} // namespace -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc deleted file mode 100644 index 1c271ab..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/mst_ops.cc +++ /dev/null
@@ -1,85 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace text { - -REGISTER_OP("MaxSpanningTree") - .Attr("T: {int32, float, double}") - .Attr("forest: bool = false") - .Input("num_nodes: int32") - .Input("scores: T") - .Output("max_scores: T") - .Output("argmax_sources: int32") - .SetShapeFn([](tensorflow::shape_inference::InferenceContext* context) { - tensorflow::shape_inference::ShapeHandle num_nodes; - tensorflow::shape_inference::ShapeHandle scores; - TF_RETURN_IF_ERROR(context->WithRank(context->input(0), 1, &num_nodes)); - TF_RETURN_IF_ERROR(context->WithRank(context->input(1), 3, &scores)); - - // Extract dimensions while asserting that they match. - tensorflow::shape_inference::DimensionHandle batch_size; // aka "B" - TF_RETURN_IF_ERROR(context->Merge(context->Dim(num_nodes, 0), - context->Dim(scores, 0), &batch_size)); - tensorflow::shape_inference::DimensionHandle max_nodes; // aka "M" - TF_RETURN_IF_ERROR(context->Merge(context->Dim(scores, 1), - context->Dim(scores, 2), &max_nodes)); - - context->set_output(0, context->Vector(batch_size)); - context->set_output(1, context->Matrix(batch_size, max_nodes)); - return tensorflow::Status::OK(); - }) - .Doc(R"doc( -Finds the maximum directed spanning tree of a digraph. - -Given a batch of directed graphs with scored arcs and root selections, solves -for the maximum spanning tree of each digraph, where the score of a tree is -defined as the sum of the scores of the arcs and roots making up the tree. - -Returns the score of the maximum spanning tree of each digraph, as well as the -arcs and roots in that tree. Each digraph in a batch may contain a different -number of nodes, so the sizes of the digraphs must be provided as an input. - -Note that this operation is only differentiable w.r.t. its |scores| input and -its |max_scores| output. - -The code here is intended for NLP applications, but attempts to remain -agnostic to particular NLP tasks (such as dependency parsing). - -forest: If true, solves for a maximum spanning forest instead of a maximum - spanning tree, where a spanning forest is a set of disjoint trees that - span the nodes of the digraph. -num_nodes: [B] vector where entry b is number of nodes in the b'th digraph. -scores: [B,M,M] tensor where entry b,t,s is the score of the arc from node s to - node t in the b'th directed graph if s!=t, or the score of selecting - node t as a root in the b'th digraph if s==t. This uniform tenosor - requires that M is >= num_nodes[b] for all b (ie. all graphs in the - batch), and ignores entries b,s,t where s or t is >= num_nodes[b]. - Arcs or root selections with non-finite score are treated as - nonexistent. -max_scores: [B] vector where entry b is the score of the maximum spanning tree - of the b'th digraph. -argmax_sources: [B,M] matrix where entry b,t is the source of the arc inbound to - t in the maximum spanning tree of the b'th digraph, or t if t is - a root. Entries b,t where t is >= num_nodes[b] are set to -1. - Quickly finding the roots can be done as: - tf.equal(tf.map_fn(lambda x: tf.range(tf.size(x)), - argmax_sources), argmax_sources) -)doc"); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/normalize_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/normalize_ops.cc deleted file mode 100644 index 1170826..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/normalize_ops.cc +++ /dev/null
@@ -1,92 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace text { - -REGISTER_OP("CaseFoldUTF8") - .Input("input: string") - .Output("output: string") - .SetShapeFn(::tensorflow::shape_inference::UnchangedShape) - .Doc(R"doc( -Applies case folding to every UTF8 string in input_tensor. The input is a dense -tensor of any shape and the output has the same shape as the input. - -For example if: - - input = [ 'The Quick-Brown', - 'CAT jumped over', - 'the lazy dog !! '] - - output = [ 'The quick-brown', - 'cat jumped over', - 'the lazy dog !! '] -)doc"); - -REGISTER_OP("NormalizeUTF8") - .Input("input: string") - .Attr("normalization_form: string") - .Output("output: string") - .SetShapeFn(::tensorflow::shape_inference::UnchangedShape) - .Doc(R"doc( -Normalizes each UTF8 string in the input tensor using 'normalization_form' -rules. - -See http://unicode.org/reports/tr15/ -)doc"); - -REGISTER_OP("NormalizeUTF8WithOffsetsMap") - .Input("input: string") - .Attr("normalization_form: string") - .Output("output: string") - .Output("offsets_map: variant") - .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { - c->set_output(0, c->input(0)); - c->set_output(1, c->input(0)); - return Status::OK(); - }) - .Doc(R"doc( -Normalizes each UTF8 string in the input tensor using 'normalization_form' -rules. Returns the normalized strings in the output tensor and a tensor of the -same shape containing offsets_map variant, which can be used to map the post- -normalized string offsets to pre-normalized string offsets. - -See http://unicode.org/reports/tr15/ -)doc"); - -REGISTER_OP("FindSourceOffsets") - .Input("offsets_map: variant") - .Input("input_offsets_values: int64") - .Input("input_offsets_splits: Tsplits") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Output("output_offsets_values: int64") - .SetShapeFn([](::tensorflow::shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); - c->set_output(0, c->input(1)); - return Status::OK(); - }) - .Doc(R"doc( -Map the post-normalized string offsets in the input tensor to the pre-normalized -string offsets using an input tensor containing offsets_map variant. -)doc"); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/regex_split_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/regex_split_ops.cc deleted file mode 100644 index 36e6ae5..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/regex_split_ops.cc +++ /dev/null
@@ -1,51 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace text { - -Status RegexSplitOpShape(shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - - for (int i = 0; i < c->num_outputs(); ++i) { - c->set_output(i, c->UnknownShapeOfRank(1)); - } - return Status::OK(); -} - -REGISTER_OP("RegexSplitWithOffsets") - .Input("input: string") - .Input("delim_regex_pattern: string") - .Input("keep_delim_regex_pattern: string") - .Output("tokens: string") - .Output("begin_offsets: int64") - .Output("end_offsets: int64") - .Output("row_splits: int64") - .SetShapeFn(RegexSplitOpShape) - .Doc(R"doc( -Split strings using a regex as the delimiter. - -See https://github.com/google/re2/wiki/Syntax for the full list of supported -expressions. -)doc"); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc deleted file mode 100644 index ac9a3ff..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/rouge_l_op.cc +++ /dev/null
@@ -1,102 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -using shape_inference::DimensionHandle; -using shape_inference::InferenceContext; -using shape_inference::ShapeHandle; - -Status RougeLShapeFn(InferenceContext* c); - -REGISTER_OP("RougeL") - .Input("hyp_values: Tvalues") - .Input("hyp_splits: Tsplits") - .Input("ref_values: Tvalues") - .Input("ref_splits: Tsplits") - .Input("alpha: float") - .Output("f_measure: float") - .Output("p_measure: float") - .Output("r_measure: float") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Attr("Tvalues: type") - .SetShapeFn(RougeLShapeFn) - .Doc(R"doc( -Computes the LCS-based F-measure score between the hypotheses and references. - - Source: https://www.microsoft.com/en-us/research/publication/rouge-a-package-for-automatic-evaluation-of-summaries/ - -This Op does not impose any tokenization scheme, in order to give callers -more flexibility. - -An F-Measure is computed for each (hyp, ref) pair. As such, there must be an -equal number of sentences in the hypotheses and references. - -The alpha parameter is used to weight precision and recall. A value of .5 -represents matches the default value of the ROUGE-1.5.5.pl script. Negative -values will trigger a compatibility mode with tensor2tensor ROUGE. - -A convenient way to compute ROUGE-L over a batch of sentences is to tokenize -them into tf.RaggedTensor format and then call this method with -tokens.values and tokens.row_splits. - -The output is a 1D Tensor of shape [S-1], where S is the number of sentence -splits. - -hyp_values: a 1D Tensor of shape [H] containing all hypothesis tokens -hyp_splits: a 1D Tensor of shape [S] containing hypothesis sentence splits -ref_values: a 1D Tensor of shape [R] containing all reference tokens -ref_splits: a 1D Tensor of shape [S] containing reference sentence splits -alpha: a 0D scalar Tensor containing the value of the Alpha parameter -f_measure: a 1D Tensor of shape [S-1] containing LCS F-measure scores -p_measure: a 1D Tensor of shape [S-1] containing LCS P-measure scores -r_measure: a 1D Tensor of shape [S-1] containing LCS R-measure scores -)doc"); - -Status RougeLShapeFn(InferenceContext* c) { - ShapeHandle unused; - - // Check rank of inner values - ShapeHandle hyp_values_shape = c->input(0); - ShapeHandle hyp_splits_shape = c->input(1); - ShapeHandle ref_values_shape = c->input(2); - ShapeHandle ref_splits_shape = c->input(3); - ShapeHandle beta_shape = c->input(4); - - TF_RETURN_IF_ERROR(c->WithRank(hyp_values_shape, 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(hyp_splits_shape, 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(ref_values_shape, 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(ref_splits_shape, 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(beta_shape, 0, &unused)); - - ShapeHandle output_nrows_plus_one; - TF_RETURN_IF_ERROR( - c->Merge(hyp_splits_shape, ref_splits_shape, &output_nrows_plus_one)); - - // Output shape is a 1-D tensor with size equal to number of splits minus 1. - DimensionHandle dim; - TF_RETURN_IF_ERROR(c->Subtract(c->Dim(output_nrows_plus_one, 0), 1, &dim)); - - // All outputs have the same shape. - c->set_output(0, c->Vector(dim)); - c->set_output(1, c->Vector(dim)); - c->set_output(2, c->Vector(dim)); - - return Status::OK(); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops.cc deleted file mode 100644 index 4eaf7de..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops.cc +++ /dev/null
@@ -1,50 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -Status SentenceFragmentShapeFn( - ::tensorflow::shape_inference::InferenceContext* c) { - for (int i = 0; i < c->num_outputs(); ++i) { - c->set_output(i, c->UnknownShapeOfRank(1)); - } - - return Status::OK(); -} - -REGISTER_OP("SentenceFragments") - .Attr("input_encoding: string") - .Attr("errors: {'strict', 'replace', 'ignore'} = 'replace'") - .Attr("replacement_char: int = 65533") // 0xFFFD unicode replacement char - .Attr("replace_control_characters: bool = false") - .Input("row_lengths: int64") - .Input("token_start: int64") - .Input("token_end: int64") - .Input("token_word: string") - .Input("token_properties: int64") - .Output("fragment_start: int64") - .Output("fragment_end: int64") - .Output("fragment_properties: int64") - .Output("terminal_punc_token: int64") - .Output("output_row_lengths: int64") - .SetShapeFn(SentenceFragmentShapeFn); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops_v2.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops_v2.cc deleted file mode 100644 index 32c4088..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentence_breaking_ops_v2.cc +++ /dev/null
@@ -1,45 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/lib/core/status.h" - -namespace tensorflow { -namespace text { - -Status SentenceFragmentV2ShapeFn( - ::tensorflow::shape_inference::InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - - for (int i = 0; i < c->num_outputs(); ++i) { - c->set_output(i, c->UnknownShapeOfRank(1)); - } - - return Status::OK(); -} - -REGISTER_OP("SentenceFragmentsV2") - .Input("doc: string") - .Output("fragment_start: int64") - .Output("fragment_end: int64") - .Output("fragment_properties: int64") - .Output("terminal_punc_token: int64") - .Output("output_row_lengths: int64") - .SetShapeFn(SentenceFragmentV2ShapeFn); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentencepiece_ops.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentencepiece_ops.cc deleted file mode 100644 index 7b9a077a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/sentencepiece_ops.cc +++ /dev/null
@@ -1,173 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/lib/core/errors.h" - -namespace tensorflow { -namespace text { - -using errors::InvalidArgument; -using shape_inference::InferenceContext; - -REGISTER_OP("SentencepieceOp") - .Attr("model: string = ''") - .Attr("container: string = ''") - .Attr("shared_name: string = ''") - .Attr("use_node_name_sharing: bool = false") - .Output("handle: resource") - .SetIsStateful() - .SetShapeFn([](InferenceContext* c) { - c->set_output(0, c->Scalar()); - return Status::OK(); - }); - -REGISTER_OP("SentencepieceTokenizeOp") - .Input("sp_handle: resource") - .Input("input: string") - .Input("nbest_size: int32") - .Input("alpha: float") - .Input("add_bos: bool") - .Input("add_eos: bool") - .Input("reverse: bool") - .Attr("out_type: {int32, string} = DT_INT32") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Attr("return_nbest: bool = false") - .Output("output_values: out_type") - .Output("output_splits: Tsplits") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); - - c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); - bool return_nbest = false; - if (c->GetAttr("return_nbest", &return_nbest).ok() && return_nbest) { - c->set_output(1, c->Vector(c->UnknownDim())); - } else { - shape_inference::DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(c->NumElements(c->input(1)), 1, &num_splits)); - c->set_output(1, c->Vector(num_splits)); - } - return Status::OK(); - }); - -REGISTER_OP("SentencepieceTokenizeWithOffsetsOp") - .Input("sp_handle: resource") - .Input("input: string") - .Input("nbest_size: int32") - .Input("alpha: float") - .Input("add_bos: bool") - .Input("add_eos: bool") - .Input("reverse: bool") - .Attr("out_type: {int32, string} = DT_INT32") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Attr("return_nbest: bool = false") - .Output("output_values: out_type") - .Output("output_splits: Tsplits") - .Output("output_offset_starts: int64") - .Output("output_offset_limits: int64") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(6), 0, &unused)); - - c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); - - bool return_nbest = false; - if (c->GetAttr("return_nbest", &return_nbest).ok() && return_nbest) { - c->set_output(1, c->Vector(c->UnknownDim())); - } else { - shape_inference::DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(c->NumElements(c->input(1)), 1, &num_splits)); - c->set_output(1, c->Vector(num_splits)); - } - c->set_output(2, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(3, c->Vector(InferenceContext::kUnknownDim)); - return Status::OK(); - }); - -REGISTER_OP("SentencepieceDetokenizeOp") - .Input("sp_handle: resource") - .Input("input_values: T") - .Input("input_splits: Tsplits") - .Input("add_bos: bool") - .Input("add_eos: bool") - .Input("reverse: bool") - .Attr("T: {int32, string} = DT_INT32") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Output("output: string") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(5), 0, &unused)); - - shape_inference::DimensionHandle dim; - TF_RETURN_IF_ERROR(c->Subtract(c->NumElements(c->input(2)), 1, &dim)); - c->set_output(0, c->Vector(dim)); - return Status::OK(); - }); - -REGISTER_OP("SentencepieceVocabSizeOp") - .Input("sp_handle: resource") - .Output("vocab_size: int32") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - c->set_output(0, c->Scalar()); - return Status::OK(); - }); - -REGISTER_OP("SentencepieceIdToStringOp") - .Input("sp_handle: resource") - .Input("input: int32") - .Output("values: string") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - c->set_output(0, c->input(1)); - return Status::OK(); - }); - -REGISTER_OP("SentencepieceStringToIdOp") - .Input("sp_handle: resource") - .Input("input: string") - .Output("values: int32") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - c->set_output(0, c->input(1)); - return Status::OK(); - }); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc deleted file mode 100644 index 718ca92..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/split_merge_tokenize_op.cc +++ /dev/null
@@ -1,96 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace text { - -using shape_inference::DimensionHandle; -using shape_inference::InferenceContext; -using shape_inference::ShapeHandle; - -Status SplitMergeTokenizeWithOffsetsShapeFn(InferenceContext* c); - -REGISTER_OP("SplitMergeTokenizeWithOffsets") - .Input("input_values: string") - .Input("labels: int32") - .Input("row_splits: int32") - .Attr("force_split_at_break_character: bool = true") - .Output("output_values: string") - .Output("output_row_splits: int64") - .Output("start_values: int64") - .Output("limit_values: int64") - .SetShapeFn(SplitMergeTokenizeWithOffsetsShapeFn) - .Doc(R"doc( - Segment input string according to the given split(0)/merge(1) labels of each - character in the input string. - - ### Example: - - ```python - >>> strs = ["Itis", - "thanksgiving"] - >>> labels = [0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1] - >>> row_splits = [0, 4, 16] - >>> words, row_splits, start, end = create_token(strs, labels) - >>> RaggedTensor.from_row_splits(words, row_splits) - [['It', 'is'], ['thanks', 'giving']] - >>> RaggedTensor.from_row_splits(start, row_splits) - start = [[[0, 2], [0, 6]]] - >>> RaggedTensor.from_row_splits(end, row_splits) - end = [[[2, 4], [6, 11]]] - ``` - - Args: - input_values: 1D Tensor of strings to tokenize with. - labels: 1D Tensor of split merge labels. - row_splits: row_splits together with labels forms a 2D ragged tensor, the - ith row corresponds to the split/merge labels for input_values[i]. - force_split_at_break_character: bool indicates whether to force start a - new word after seeing a ICU defined whitespace character. - - Returns: - * output_values: 1D tensor containing the tokens for all input strings. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * output_row_splits: 1D tensor containing row split offsets indicating the - start and end offsets in the output values for each input string. - * start_values: 1D tensor containing the inclusive start byte offset for - each token in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_splits. - * limit_values: 1D tensor containing the exclusive end byte offset for - each token in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_splits. -)doc"); - -Status SplitMergeTokenizeWithOffsetsShapeFn(InferenceContext* c) { - ShapeHandle input_values = c->input(0); - ShapeHandle labels = c->input(1); - ShapeHandle row_splits = c->input(2); - TF_RETURN_IF_ERROR(c->WithRank(input_values, 1, &input_values)); - TF_RETURN_IF_ERROR(c->WithRank(labels, 1, &labels)); - TF_RETURN_IF_ERROR(c->WithRank(row_splits, 1, &row_splits)); - DimensionHandle num_input_values = c->Dim(input_values, 0); - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(num_input_values, 1, &num_splits)); - c->set_output(1, c->Vector(num_splits)); // row_splits - c->set_output(2, c->UnknownShapeOfRank(1)); // start_values - c->set_output(3, c->UnknownShapeOfRank(1)); // limit_values - return Status::OK(); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc deleted file mode 100644 index 93971119..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/tokenizer_from_logits_op.cc +++ /dev/null
@@ -1,122 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { -namespace text { - -using shape_inference::DimensionHandle; -using shape_inference::InferenceContext; -using shape_inference::ShapeHandle; - -Status TokenizerFromLogitsShapeFn(InferenceContext* c); - -REGISTER_OP("TokenizerFromLogits") - .Input("strings: string") - .Input("logits: float") - .Input("force_split_at_break_character: bool") - .Output("output_values: string") - .Output("row_splits: int64") - .Output("start_values: int64") - .Output("limit_values: int64") - .SetShapeFn(TokenizerFromLogitsShapeFn) - .Doc(R"doc( - Segment input string according to the given split(0)/merge(1) labels of each - character in the input string. - - ### Example: - - ```python - >>> strings = ["IloveFlume!", "and tensorflow"]) - >>> labels = [ - [ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0, - # paddings - 0, 0, 0 - ], [ - # and - 0, 1, 1, - # ' ' - 1, - # tensorflow - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 - ]] - >>> tokenizer = TokenizerFromLogits() - >>> token_values, rows_splits, start_values, limit_values = ( - gen_tokenizer_from_logits.tokenizer_from_logits(strings, labels) - >>> RaggedTensor.from_row_splits(token_values, row_splits) - [["I", "love", "Flume", "!"], ["and", "tensorflow"]] - >>> RaggedTensor.from_row_splits(start_values, row_splits) - >>> [[0, 1, 5, 10], [0, 4]] - >>> RaggedTensor.from_row_splits(limit_values, row_splits) - >>> [[1, 5, 10, 11], [3, 14]] - ``` - - Args: - strings: 1D Tensor of strings to tokenize with. - logits: 3D Tensor; logits[i,j,0] is the logit for the split action for j-th - character of strings[i]. logits[i,j,1] is the logit for the merge action - for that same character. For each character, we pick the action with the - greatest logit. Split starts a new word at this character and merge adds - this character to the previous word. The shape of this tensor should be - (n, m, 2) where n is the number of strings, and m is greater or equal with - the number of characters from each strings[i]. As the elements of the - strings tensor may have different lengths (in UTF-8 chars), padding may be - required to get a dense vector; for each row, the extra (padding) pairs of - logits are ignored. - force_split_at_break_character: bool scalar, indicates whether to force - start a new word after seeing an ICU defined whitespace character. - - Returns: - * token_values: 1D tensor containing the tokens for all input strings. - A 2D RaggedTensor can be constructed from this and row_splits. - * row_splits: 1D tensor containing row split offsets indicating the - start and end offsets in the output values for each input string. - * start_values: 1D tensor containing the inclusive start byte offset for - each token in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and row_splits. - * limit_values: 1D tensor containing the exclusive end byte offset for - each token in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and row_splits. -)doc"); - -Status TokenizerFromLogitsShapeFn(InferenceContext* c) { - ShapeHandle strings = c->input(0); - ShapeHandle logits = c->input(1); - ShapeHandle force_split_at_break_character = c->input(2); - TF_RETURN_IF_ERROR(c->WithRank(strings, 1, &strings)); - TF_RETURN_IF_ERROR(c->WithRank(logits, 3, &logits)); - TF_RETURN_IF_ERROR(c->WithRank(force_split_at_break_character, 0, - &force_split_at_break_character)); - DimensionHandle num_strings = c->Dim(strings, 0); - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(num_strings, 1, &num_splits)); - c->set_output(1, c->Vector(num_splits)); // row_splits - c->set_output(2, c->UnknownShapeOfRank(1)); // start_values - c->set_output(3, c->UnknownShapeOfRank(1)); // limit_values - return Status::OK(); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/unicode_script_tokenize_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/unicode_script_tokenize_op.cc deleted file mode 100644 index 588a7f69..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/unicode_script_tokenize_op.cc +++ /dev/null
@@ -1,56 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string> -#include <vector> - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -namespace shape_inference { -class InferenceContext; -} // namespace shape_inference - -namespace text { - -using shape_inference::InferenceContext; - -REGISTER_OP("UnicodeScriptTokenizeWithOffsets") - .Input("input_values: int32") - .Input("input_splits: Tsplits") - .Output("output_values: int32") - .Output("output_values_inner_splits: Tsplits") - .Output("output_offset_starts: int64") - .Output("output_offset_limits: int64") - .Output("output_outer_splits: Tsplits") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .Attr("keep_whitespace: bool = false") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - - c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(2, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(3, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(4, c->Vector(InferenceContext::kUnknownDim)); - return Status::OK(); - }); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenize_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenize_op.cc deleted file mode 100644 index 1eb680a3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenize_op.cc +++ /dev/null
@@ -1,55 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <string> -#include <vector> - -#include "tensorflow/core/framework/common_shape_fns.h" -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -namespace shape_inference { -class InferenceContext; -} // namespace shape_inference - -namespace text { - -using shape_inference::InferenceContext; - -REGISTER_OP("WhitespaceTokenizeWithOffsets") - .Input("input_values: int32") - .Input("input_splits: Tsplits") - .Output("output_values: int32") - .Output("output_values_inner_splits: Tsplits") - .Output("output_offset_starts: int64") - .Output("output_offset_limits: int64") - .Output("output_outer_splits: Tsplits") - .Attr("Tsplits: {int32, int64} = DT_INT64") - .SetShapeFn([](InferenceContext* c) { - shape_inference::ShapeHandle unused; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 1, &unused)); - TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 1, &unused)); - - c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(1, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(2, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(3, c->Vector(InferenceContext::kUnknownDim)); - c->set_output(4, c->Vector(InferenceContext::kUnknownDim)); - return Status::OK(); - }); - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenizer_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenizer_op.cc deleted file mode 100644 index d60c25d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/whitespace_tokenizer_op.cc +++ /dev/null
@@ -1,29 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_WHITESPACE_TOKENIZER_OP_H_ -#define THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_WHITESPACE_TOKENIZER_OP_H_ - -#include "tensorflow/lite/kernels/shim/tf_op_shim.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_kernel.h" - -namespace tensorflow { -namespace text { - -REGISTER_TF_OP_SHIM(WhitespaceTokenizeWithOffsetsV2OpKernel); - -} // namespace text -} // namespace tensorflow - -#endif // THIRD_PARTY_TENSORFLOW_TEXT_CORE_OPS_WHITESPACE_TOKENIZER_OP_H_
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc b/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc deleted file mode 100644 index aac35a6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/ops/wordpiece_op.cc +++ /dev/null
@@ -1,116 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/shape_inference.h" - -namespace tensorflow { - -using shape_inference::DimensionHandle; -using shape_inference::InferenceContext; -using shape_inference::ShapeHandle; - -Status WordpieceTokenizeWithOffsetsShapeFn(InferenceContext* c); - -REGISTER_OP("WordpieceTokenizeWithOffsets") - .Input("input_values: string") - .Input("vocab_lookup_table: resource") - .Attr("suffix_indicator: string") - .Attr("max_bytes_per_word: int") - .Attr("max_chars_per_token: int = 0") - .Attr("use_unknown_token: bool") - .Attr("unknown_token: string") - .Attr("split_unknown_characters: bool = false") - .Attr( - "output_row_partition_type: {'row_lengths', 'row_splits'}" - " = 'row_lengths'") - .Output("output_values: string") - .Output("output_row_lengths: int64") - .Output("start_values: int64") - .Output("limit_values: int64") - .SetShapeFn(WordpieceTokenizeWithOffsetsShapeFn) - .Doc(R"doc( - Tokenizes tokens into sub-word pieces based off of a vocabulary. - - `wordpiece_tokenize_with_offsets` returns the relative offsets. - - ### Example: - - ```python - >>> tokens = ['don', '\'t', 'treadness'] - >>> wordpiece, row_lengths, start, end = wordpiece_tokenize_with_offset( - ... tokens, vocab, '##', 100, False, '') - >>> RaggedTensor.from_row_lengths(wordpiece, row_lengths) - [['don', '\'', 't'], ['tread', '##ness']] - >>> RaggedTensor.from_row_lengths(start, row_lengths) - start = [[[0, 3, 4], [0, 5]]] - >>> RaggedTensor.from_row_lengths(end, row_lengths) - end = [[[3, 4, 5], [5, 10]]] - ``` - - Args: - input_values: 1D Tensor of strings to tokenize with. - vocab_lookup_table: Resource tensor for a lookup table implementing the - LookupInterface. - suffix_indicator: Characters prepended to a wordpiece to - indicate that it is a suffix to another subword. - max_bytes_per_word: Max size of input token. - max_chars_per_token: Max size of output tokens. A non-positive value - means the max size is not known. - use_unknown_token: Whether unknown_token should be used. - unknown_token: The value to use when an unknown token is found. - split_unknown_characters: Whether individual unknown unicode characters - should be split out as subtokens. - output_row_partition_type: Indicates what row-partitioning tensor should - be returned by the op. If this is set to 'row_splits', then the - `output_row_lengths` output will contain row-splits instead of - row-lengths. - - Returns: - * output_values: 1D tensor containing the wordpieces for all input strings. - A 2D RaggedTensor can be constructed from this and output_row_lengths. - * output_row_lengths: 1D int tensor indicating the number of wordpieces - corresponding with each input string. If output_row_partition_type is - row_splits, then this will contain row split offsets instead. - * start_values: 1D tensor containing the inclusive start byte offset for - each wordpiece in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_lengths. - * limit_values: 1D tensor containing the exclusive end byte offset for - each wordpiece in all input strings. Corresponds 1:1 with output_values. - A 2D RaggedTensor can be constructed from this and output_row_lengths. -)doc"); - -Status WordpieceTokenizeWithOffsetsShapeFn(InferenceContext* c) { - ShapeHandle input_values = c->input(0); - ShapeHandle vocab_lookup_table = c->input(1); - string output_row_partition_type; - TF_RETURN_IF_ERROR(c->WithRank(input_values, 1, &input_values)); - TF_RETURN_IF_ERROR(c->WithRank(vocab_lookup_table, 0, &vocab_lookup_table)); - TF_RETURN_IF_ERROR( - c->GetAttr("output_row_partition_type", &output_row_partition_type)); - DimensionHandle num_input_values = c->Dim(input_values, 0); - c->set_output(0, c->UnknownShapeOfRank(1)); // output_values - if (output_row_partition_type == "row_lengths") { - c->set_output(1, c->Vector(num_input_values)); // row_lengths - } else { - DimensionHandle num_splits; - TF_RETURN_IF_ERROR(c->Add(num_input_values, 1, &num_splits)); - c->set_output(1, c->Vector(num_splits)); // row_splits - } - c->set_output(2, c->UnknownShapeOfRank(1)); // start_values - c->set_output(3, c->UnknownShapeOfRank(1)); // limit_values - return Status::OK(); -} - -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/BUILD b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/BUILD deleted file mode 100644 index f2db478..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/BUILD +++ /dev/null
@@ -1,85 +0,0 @@ -# Code that exposes C++ libraries to Python via pybind11. - -load("@org_tensorflow//tensorflow:tensorflow.bzl", "pybind_extension") - -licenses(["notice"]) - -package(default_visibility = [ - "//tensorflow_text:__subpackages__", -]) - -pybind_extension( - name = "tflite_registrar", - srcs = [ - "tflite_registrar.cc", - ], - additional_exported_symbols = [ - "AddFastWordpieceDetokenize", - "AddFastWordpieceTokenize", - "AddNgramsStringJoin", - "AddWhitespaceTokenize", - ], - module_name = "tflite_registrar", - deps = [ - "@pybind11", - # lite:framework tensorflow dep, - # lite/c:common tensorflow dep, - # lite/kernels:builtin_ops tensorflow dep, - "//tensorflow_text/core/kernels:tflite_ops", - ], -) - -pybind_extension( - name = "pywrap_fast_wordpiece_tokenizer_model_builder", - srcs = ["pywrap_fast_wordpiece_tokenizer_model_builder.cc"], - additional_exported_symbols = [ - "BuildFastWordpieceModel", - ], - copts = ["-fexceptions"], - features = ["-use_header_modules"], - module_name = "pywrap_fast_wordpiece_tokenizer_model_builder", - srcs_version = "PY3ONLY", - deps = [ - "//tensorflow_text/core/kernels:fast_wordpiece_tokenizer_model_builder", - "@pybind11", - ], -) - -py_test( - name = "pywrap_fast_wordpiece_tokenizer_model_builder_test", - srcs = ["pywrap_fast_wordpiece_tokenizer_model_builder_test.py"], - data = [ - "//tensorflow_text:python/ops/test_data/fast_wordpiece_tokenizer_model.fb", - ], - python_version = "PY3", - deps = [ - ":pywrap_fast_wordpiece_tokenizer_model_builder", - # tensorflow package dep, - ], -) - -pybind_extension( - name = "pywrap_whitespace_tokenizer_config_builder", - srcs = ["pywrap_whitespace_tokenizer_config_builder.cc"], - additional_exported_symbols = [ - "BuildWhitespaceTokenizerConfig", - ], - copts = ["-fexceptions"], - features = ["-use_header_modules"], - module_name = "pywrap_whitespace_tokenizer_config_builder", - srcs_version = "PY3ONLY", - deps = [ - "//tensorflow_text/core/kernels:whitespace_tokenizer_config_builder", - "@pybind11", - ], -) - -py_test( - name = "pywrap_whitespace_tokenizer_config_builder_test", - srcs = ["pywrap_whitespace_tokenizer_config_builder_test.py"], - python_version = "PY3", - deps = [ - ":pywrap_whitespace_tokenizer_config_builder", - # tensorflow package dep, - ], -)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc deleted file mode 100644 index 43fa6497..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder.cc +++ /dev/null
@@ -1,43 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <stdexcept> - -// #include "include/pybind11/pybind11.h" -// #include "include/pybind11/stl.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_model_builder.h" - -namespace tensorflow { -namespace text { - -namespace py = pybind11; - -PYBIND11_MODULE(pywrap_fast_wordpiece_tokenizer_model_builder, m) { - m.def("build_fast_wordpiece_model", - [](const std::vector<std::string>& vocab, int max_bytes_per_token, - const std::string& suffix_indicator, const std::string& unk_token, - bool no_pretokenization, bool support_detokenization) { - const auto result = BuildModelAndExportToFlatBuffer( - vocab, max_bytes_per_token, suffix_indicator, unk_token, - no_pretokenization, support_detokenization); - if (!result.status().ok()) { - // Propagate the error to the Python code. - throw std::runtime_error(std::string(result.status().message())); - } - return py::bytes(*result); - }); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py deleted file mode 100644 index df76dca..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_fast_wordpiece_tokenizer_model_builder_test.py +++ /dev/null
@@ -1,64 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Tests for pywrap_fast_wordpiece_tokenizer_model_builder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import test_util -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow_text.core.pybinds import pywrap_fast_wordpiece_tokenizer_model_builder - -EXPECTED_MODEL_BUFFER_PATH = "third_party/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb" - - -class PywrapFastWordpieceBuilderTest(test_util.TensorFlowTestCase): - - def test_build(self): - vocab = [ - "a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", "##ghz", - "<unk>" - ] - max_bytes_per_token = 100 - suffix_indicator = "##" - unk_token = "<unk>" - expected_model_buffer = gfile.GFile(EXPECTED_MODEL_BUFFER_PATH, "rb").read() - self.assertEqual( - pywrap_fast_wordpiece_tokenizer_model_builder - .build_fast_wordpiece_model( - vocab, max_bytes_per_token, suffix_indicator, unk_token, True, - False), - expected_model_buffer) - - def test_build_throw_exception_unk_token_not_in_vocab(self): - vocab = [ - "a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", "##ghz" - ] - max_bytes_per_token = 100 - suffix_indicator = "##" - unk_token = "<unk>" - with self.assertRaisesRegex(RuntimeError, - "Cannot find unk_token in the vocab!"): - (pywrap_fast_wordpiece_tokenizer_model_builder - .build_fast_wordpiece_model( - vocab, max_bytes_per_token, suffix_indicator, unk_token, True, - False)) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc deleted file mode 100644 index 66b4062f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder.cc +++ /dev/null
@@ -1,34 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include <iostream> -#include <stdexcept> -// #include "include/pybind11/pybind11.h" -// #include "include/pybind11/stl.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_config_builder.h" - -namespace tensorflow { -namespace text { - -namespace py = pybind11; - -PYBIND11_MODULE(pywrap_whitespace_tokenizer_config_builder, m) { - m.def("build_whitespace_tokenizer_config", []() { - const auto result = BuildWhitespaceTokenizerConfig(); - return py::bytes(result); - }); -} - -} // namespace text -} // namespace tensorflow
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder_test.py b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder_test.py deleted file mode 100644 index e4e1740..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/pywrap_whitespace_tokenizer_config_builder_test.py +++ /dev/null
@@ -1,49 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -r"""Tests for pywrap_whitespace_tokenizer_config_builder.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import test_util -from tensorflow.python.platform import test -from tensorflow_text.core.pybinds import pywrap_whitespace_tokenizer_config_builder as pywrap_builder - - -class PywrapFastWordpieceBuilderTest(test_util.TensorFlowTestCase): - - # This is not supposed to be an exhaustive test. That is done with the - # builder test. We just want to sanity check a couple values to show we have - # received something. - def test_build(self): - # check non-empty - config = pywrap_builder.build_whitespace_tokenizer_config() - self.assertNotEmpty(config) - # check space character is whitespace - character = ord(' ') - bits = config[character >> 3] - mask = 1 << (character & 0x7) - self.assertGreater(bits & mask, 0) - # check letter is not whitespace - character = ord('a') - bits = config[character >> 3] - mask = 1 << (character & 0x7) - self.assertEqual(bits & mask, 0) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc b/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc deleted file mode 100644 index 99b5cc8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/core/pybinds/tflite_registrar.cc +++ /dev/null
@@ -1,76 +0,0 @@ -// Copyright 2021 TF.Text Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// #include "include/pybind11/pybind11.h" -// #include "include/pybind11/pytypes.h" -#include "tensorflow_text/core/kernels/fast_wordpiece_tokenizer_tflite.h" -#include "tensorflow_text/core/kernels/ngrams_tflite.h" -#include "tensorflow_text/core/kernels/ragged_tensor_to_tensor_tflite.h" -#include "tensorflow_text/core/kernels/whitespace_tokenizer_tflite.h" - -PYBIND11_MODULE(tflite_registrar, m) { - m.doc() = R"pbdoc( - tflite_registrar - A module with a Python wrapper for TFLite TFText ops. - )pbdoc"; - m.attr("_allowed_symbols") = pybind11::make_tuple( - "AddFastWordpieceTokenize", "AddFastWordpieceDetokenize", - "AddNgramsStringJoin", "AddRaggedTensorToTensor", "AddWhitespaceTokenize", - "SELECT_TFTEXT_OPS"); - m.def( - "AddFastWordpieceTokenize", - [](uintptr_t resolver) { - tflite::ops::custom::text::AddFastWordpieceTokenize( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - R"pbdoc( - The function that adds FastWordpieceTokenize to the TFLite interpreter. - )pbdoc"); - m.def( - "AddFastWordpieceDetokenize", - [](uintptr_t resolver) { - tflite::ops::custom::text::AddFastWordpieceDetokenize( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - R"pbdoc( - The function that adds FastWordpieceDetokenize to the TFLite interpreter. - )pbdoc"); - m.def( - "AddNgramsStringJoin", - [](uintptr_t resolver) { - tflite::ops::custom::text::AddNgramsStringJoin( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - R"pbdoc( - The function that adds AddNgramsStringJoin to the TFLite interpreter. - )pbdoc"); - m.def( - "AddRaggedTensorToTensor", - [](uintptr_t resolver) { - tflite::ops::custom::text::AddRaggedTensorToTensor( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - R"pbdoc( - The function that adds AddRaggedTensorToTensor to the TFLite interpreter. - )pbdoc"); - m.def( - "AddWhitespaceTokenize", - [](uintptr_t resolver) { - tflite::ops::custom::text::AddWhitespaceTokenize( - reinterpret_cast<tflite::MutableOpResolver*>(resolver)); - }, - R"pbdoc( - The function that adds AddWhitespaceTokenize to the TFLite interpreter. - )pbdoc"); -}
diff --git a/third_party/tensorflow-text/src/tensorflow_text/public_names_test.py b/third_party/tensorflow-text/src/tensorflow_text/public_names_test.py deleted file mode 100644 index fcfad68f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/public_names_test.py +++ /dev/null
@@ -1,59 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Test that the expected symbols are made public by tensorflow_text. - -Each public module should have an _allowed_symbols attribute, listing the -public symbols for that module; and that list should match the actual list -of public symbols in that module. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import types - -import tensorflow_text as tensorflow_text -from tensorflow.python.platform import test - - -class PublicNamesTest(test.TestCase): - - def check_names(self, module, prefix="tf_text."): - self.assertTrue( - hasattr(module, "_allowed_symbols"), - "Expected to find _allowed_symbols in %s" % prefix) - - actual_symbols = set( - name for name in module.__dict__ if not name.startswith("_")) - missing_names = set(module._allowed_symbols) - set(actual_symbols) - extra_names = set(actual_symbols) - set(module._allowed_symbols) - - self.assertEqual(extra_names, set(), - "Unexpected symbol(s) exported by %s" % prefix) - self.assertEqual(missing_names, set(), - "Missing expected symbol(s) in %s" % prefix) - - for (name, value) in module.__dict__.items(): - if isinstance(value, types.ModuleType) and not name.startswith("_"): - self.check_names(value, prefix + name + ".") - - def testPublicNames(self): - self.check_names(tensorflow_text) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/__init__.py deleted file mode 100644 index 3db2895..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/__init__.py +++ /dev/null
@@ -1,16 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Empty file required by setuptools.find_packages to recognize this as a package
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/__init__.py deleted file mode 100644 index 962e22f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/__init__.py +++ /dev/null
@@ -1,19 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tf.Text benchmarks.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/benchmark_utils.py b/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/benchmark_utils.py deleted file mode 100644 index ad3ffc2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/benchmark_utils.py +++ /dev/null
@@ -1,265 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Benchmarking utils for TF.Text ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import time - -import tensorflow_datasets as tfds - -from tensorflow.python.client import session -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import variables as variables_lib -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import benchmark -from tensorflow_text.python import ops as text_ops -# [internal] import xprof_session -from tensorflow.python.util import tf_inspect - - -class OpsBaseBenchmark(benchmark.Benchmark): - """Base class for op benchmarks.""" - - def __init__(self): - super(OpsBaseBenchmark, self).__init__() - self.input_data = None - self.batch_size = None - self.use_tf_function = False - - def _get_method_name(self): - """Returns the calling method name.""" - - # Find the caller method (outermost Benchmark class) - stack = tf_inspect.stack() - name = None - for frame in stack[::-1]: - f_locals = frame[0].f_locals - f_self = f_locals.get('self', None) - if isinstance(f_self, benchmark.Benchmark): - name = frame[3] - break - if name is None: - raise ValueError('Unable to determine the method name.') - - return name - - def load_input_data(self, batch_size): - """Loads the IMDB dataset and sets up the input data to run the ops on.""" - - self.batch_size = batch_size - data = tfds.load( - 'imdb_reviews/plain_text', split=tfds.Split.TRAIN).batch(batch_size) - # The input data has shape [batch_size, data] and the op is run multiple - # iterations over the first batch - self.batch_number = 1 - - if context.executing_eagerly(): - self.iterator = data.as_numpy_iterator() - self.input_data = [x['text'] for x in self.iterator][0] - else: - self.iterator = dataset_ops.make_initializable_iterator(data) - self.input_data = self.iterator.get_next()['text'] - - def run_and_report(self, - fn, - iters, - burn_iters, - xprof_enabled=False, - benchmark_name=None, - **kwargs): - """Runs the benchmark and reports results. - - Args: - fn: Function to be benchmarked. - iters: Number of iterations to run the benchmark. - burn_iters: Number of warm-up iterations to run to reach a stable state. - xprof_enabled: Enables xprof traces. - benchmark_name: Overwrites the default name. - **kwargs: Kwargs to the benchmarked function. - - Returns: - Dict which contains the wall time report for the runned op. - """ - name = benchmark_name or self._get_method_name() - - if context.executing_eagerly(): - self._run_and_report_eagerly(fn, iters, burn_iters, name, xprof_enabled, - **kwargs) - else: - self._run_and_report_graphmode(fn, iters, burn_iters, name, xprof_enabled, - **kwargs) - - def _convert_to_ragged_inputs(self, inputs): - """Transforms the text batch inputs to a ragged shape.""" - if isinstance(self.input_data, ragged_tensor.RaggedTensor): - return inputs - - inputs = text_ops.WhitespaceTokenizer().tokenize(inputs) - return inputs - - def run_and_report_ragged_vs_dense(self, - fn, - iters, - burn_iters, - xprof_enabled=False, - **kwargs): - """Runs the op on ragged inputs and on its dense counterpart for comparison.""" - ragged_data = self._convert_to_ragged_inputs(self.input_data) - - self.input_data = ragged_data - self.run_and_report( - fn, - iters, - burn_iters, - xprof_enabled, - benchmark_name=self._get_method_name() + '_ragged', - **kwargs) - - self.input_data = ragged_data.to_tensor() - self.run_and_report( - fn, - iters, - burn_iters, - xprof_enabled, - benchmark_name=self._get_method_name() + '_dense', - **kwargs) - - self.load_input_data(self.batch_size) - - def _run_and_report_eagerly(self, - fn, - iters, - burn_iters, - benchmark_name, - xprof_enabled=False, - **kwargs): - """Runs and reports benchmarks eagerly.""" - if self.input_data is None: - raise ValueError( - 'Input data is missing for {} benchmark'.format(benchmark_name)) - - @def_function.function - def tf_func(): - fn(self.input_data, **kwargs) - - def func(): - fn(self.input_data, **kwargs) - - op = tf_func if self.use_tf_function else func - - for _ in range(burn_iters): - op() - - def run_benchmark(): - total_time = 0 - for _ in range(iters): - start = time.time() - op() - total_time += time.time() - start - - return total_time - - total_time = run_benchmark() - mean_time = total_time / iters - benchmark_name = benchmark_name + ('_function' - if self.use_tf_function else '_eager') - metrics = [] - extras = {'sec_per_batch': total_time / iters} - if hasattr(self, 'batch_number'): - extras.update({'batches_per_sec': self.batch_number / mean_time}) - metrics.append({ - 'name': 'batches_per_sec', - 'value': self.batch_number / mean_time - }) - - if xprof_enabled: - extras.update(self._run_with_xprof(run_benchmark)) - - self.report_benchmark( - wall_time=mean_time, - name=benchmark_name, - extras=extras, - metrics=metrics) - - def _run_with_xprof(self, benchmark_fn): - output = {} - xprof = xprof_session.XprofSession() - xprof.start_session(enable_python_tracer=True) - _ = benchmark_fn() - output['xprof_link'] = xprof.end_session_and_get_url() - - return output - - def _run_and_report_graphmode(self, fn, iters, burn_iters, benchmark_name, - xprof_enabled, **kwargs): - """Runs and reports benchmarks in graph mode.""" - if self.input_data is None: - raise ValueError( - 'Input data is missing for {} benchmark'.format(benchmark_name)) - - # Uses the benchmark config to disable the static graph optimizations - with session.Session(config=benchmark.benchmark_config()) as sess: - if hasattr(self, 'iterator'): - sess.run(self.iterator.initializer) - - sess.run(lookup_ops.tables_initializer()) - sess.run(variables_lib.global_variables_initializer()) - - inputs = sess.run(self.input_data) - placeholder = array_ops.placeholder(dtypes.string, - tensor_shape.TensorShape({None})) - op_feed_dict = {placeholder: inputs} - benchmark_op = fn(placeholder, **kwargs) - - def run_benchmark(): - for _ in range(burn_iters): - sess.run(benchmark_op, op_feed_dict) - total_time = 0 - for _ in range(iters): - start_time = time.time() - sess.run(benchmark_op, op_feed_dict) - total_time += time.time() - start_time - - return total_time - - total_time = run_benchmark() - mean_time = total_time / iters - extras = {'sec_per_batch': mean_time} - - metrics = [] - if hasattr(self, 'batch_number'): - extras.update({'batches_per_sec': self.batch_number / mean_time}) - metrics.append({ - 'name': 'batches_per_sec', - 'value': self.batch_number / mean_time - }) - - if xprof_enabled: - extras.update(self._run_with_xprof(run_benchmark)) - - self.report_benchmark( - wall_time=mean_time, - name=benchmark_name + '_graph', - extras=extras, - metrics=metrics)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/ops_benchmarks.py b/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/ops_benchmarks.py deleted file mode 100644 index 98da1cd..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/ops_benchmarks.py +++ /dev/null
@@ -1,200 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Microbenchmarks for tokenizers on IMDB dataset.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import app -from absl import flags -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.platform import test -from tensorflow_text.python import ops as text_ops -from tensorflow_text.python.benchmarks import benchmark_utils - - -FLAGS = flags.FLAGS -flags.DEFINE_integer("run_iters", 1000, "Number of iterations to run") -flags.DEFINE_integer("burn_iters", 10, "Number of warmup runs") -flags.DEFINE_integer("batch_size", 32, "The size of a batch") -flags.DEFINE_boolean("run_eagerly", True, "Run in eager mode") -flags.DEFINE_boolean( - "use_tf_function", True, - "Wraps the op in a tf.function. Only works when eager mode is enabled") -flags.DEFINE_boolean("xprof_tracing", False, "Enables xprof tracing") -flags.DEFINE_boolean("with_offsets", False, - "Runs the op with offsets additionally") -flags.DEFINE_boolean( - "ragged_vs_dense", False, - "Run the tokenizers using ragged inputs and its dense counterpart") - - -class OpsBenchmark(benchmark_utils.OpsBaseBenchmark): - """Benchmarks for various ops in TF Text.""" - - def __init__(self): - if not FLAGS.run_eagerly: - ops.disable_eager_execution() - - self.use_tf_function = FLAGS.use_tf_function - self.load_input_data(FLAGS.batch_size) - - def _run(self, op, kwargs=None): - if FLAGS.ragged_vs_dense: - self.run_and_report_ragged_vs_dense( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - return - - self.run_and_report( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - - def benchmark_ngrams(self): - self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) - - self._run( - text_ops.ngrams, { - "width": 2, - "axis": -1, - "reduction_type": text_ops.Reduction.STRING_JOIN, - "string_separator": "|" - }) - - def benchmark_sliding_window(self): - self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) - - self._run(text_ops.sliding_window, {"width": 3, "axis": -1}) - - def benchmark_case_fold_utf8(self): - self._run(text_ops.case_fold_utf8) - - def benchmark_normalize_utf8(self): - self._run(text_ops.normalize_utf8, {"normalization_form": "NFKC"}) - - def benchmark_normalize_utf8_with_offsets(self): - if FLAGS.with_offsets: - self._run(text_ops.normalize_utf8_with_offsets_map, - {"normalization_form": "NFKC"}) - - def benchmark_coerce_to_structurally_valid_utf8(self): - if FLAGS.ragged_vs_dense: - return - - # The input here is a valid UTF-8 input - self._run(text_ops.coerce_to_structurally_valid_utf8) - - def benchmark_pad_along_dimension(self): - self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) - - self._run(text_ops.pad_along_dimension, { - "axis": -1, - "right_pad": ["RP"], - "left_pad": ["LP"] - }) - - def benchmark_state_based_sentence_breaking(self): - if FLAGS.ragged_vs_dense: - return - - # TODO(b/167267653): Remove custom input(line below) when the bug is fixed - self.input_data = constant_op.constant(["Hello (who are you)? Foo bar!"]) - - sentence_breaker = text_ops.StateBasedSentenceBreaker() - self._run(sentence_breaker.break_sentences) - - def benchmark_create_feature_bitmask(self): - if FLAGS.ragged_vs_dense: - return - - self.input_data = array_ops.placeholder_with_default( - constant_op.constant([[[True, True, False], [True, False, False]], - [[False, False, True], [True, False, True]]]), - shape=None) - - self._run(text_ops.create_feature_bitmask) - - -class ConstrainedSequenceOpsBenchmark(benchmark_utils.OpsBaseBenchmark): - """Benchmarks for constrained sequence ops in TF Text.""" - - def __init__(self): - if not FLAGS.run_eagerly: - ops.disable_eager_execution() - - self.use_tf_function = FLAGS.use_tf_function - self.load_input_data(FLAGS.batch_size) - - def load_input_data(self, batch_size): - scores = [[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]] - - self.input_data = constant_op.constant([scores, scores, scores], - dtype=np.float32) - self.transition_weights = constant_op.constant( - [[.1, .2, .3, .4, .1], [.5, .6, .7, .8, .1], [.9, 1, .15, 1, .1], - [.25, .35, .45, .55, .5], [.1, .5, .1, .1, 1]], - dtype=np.float32) - - self.allowed_transitions = constant_op.constant( - [[True, True, True, True, True], [True, True, True, True, True], - [True, False, True, False, True], [True, True, True, True, True], - [True, True, True, True, False]]) - - def _run(self, op, kwargs=None): - self.run_and_report( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - - def benchmark_greedy_constrained_sequence(self): - if FLAGS.ragged_vs_dense: - return - - self._run( - text_ops.greedy_constrained_sequence, { - "transition_weights": self.transition_weights, - "allowed_transitions": self.allowed_transitions, - "use_log_space": True, - "use_start_and_end_states": True - }) - - def benchmark_viterb_constrained_sequence(self): - if FLAGS.ragged_vs_dense: - return - - self._run( - text_ops.greedy_constrained_sequence, { - "transition_weights": self.transition_weights, - "allowed_transitions": self.allowed_transitions, - "use_log_space": True, - "use_start_and_end_states": True - }) - - -if __name__ == "__main__": - app.run(test.main())
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/test_data/uncased_L-12_H-768_A-12/vocab.txt b/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/test_data/uncased_L-12_H-768_A-12/vocab.txt deleted file mode 100644 index fb14027..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/test_data/uncased_L-12_H-768_A-12/vocab.txt +++ /dev/null
@@ -1,30522 +0,0 @@ -[PAD] -[unused0] -[unused1] -[unused2] -[unused3] -[unused4] -[unused5] -[unused6] -[unused7] -[unused8] -[unused9] -[unused10] -[unused11] -[unused12] -[unused13] -[unused14] -[unused15] -[unused16] -[unused17] -[unused18] -[unused19] -[unused20] -[unused21] -[unused22] -[unused23] -[unused24] -[unused25] -[unused26] -[unused27] -[unused28] -[unused29] -[unused30] -[unused31] -[unused32] -[unused33] -[unused34] -[unused35] -[unused36] -[unused37] -[unused38] -[unused39] -[unused40] -[unused41] -[unused42] -[unused43] -[unused44] -[unused45] -[unused46] -[unused47] -[unused48] -[unused49] -[unused50] -[unused51] -[unused52] -[unused53] -[unused54] -[unused55] -[unused56] -[unused57] -[unused58] -[unused59] -[unused60] -[unused61] -[unused62] -[unused63] -[unused64] -[unused65] -[unused66] -[unused67] -[unused68] -[unused69] -[unused70] -[unused71] -[unused72] -[unused73] -[unused74] -[unused75] -[unused76] -[unused77] -[unused78] -[unused79] -[unused80] -[unused81] -[unused82] -[unused83] -[unused84] -[unused85] -[unused86] -[unused87] -[unused88] -[unused89] -[unused90] -[unused91] -[unused92] -[unused93] -[unused94] -[unused95] -[unused96] -[unused97] -[unused98] -[UNK] -[CLS] -[SEP] -[MASK] -[unused99] -[unused100] -[unused101] -[unused102] -[unused103] -[unused104] -[unused105] -[unused106] -[unused107] -[unused108] -[unused109] -[unused110] -[unused111] -[unused112] -[unused113] -[unused114] -[unused115] -[unused116] -[unused117] -[unused118] -[unused119] -[unused120] -[unused121] -[unused122] -[unused123] -[unused124] -[unused125] -[unused126] -[unused127] -[unused128] -[unused129] -[unused130] -[unused131] -[unused132] -[unused133] -[unused134] -[unused135] -[unused136] -[unused137] -[unused138] -[unused139] -[unused140] -[unused141] -[unused142] -[unused143] -[unused144] -[unused145] -[unused146] -[unused147] -[unused148] -[unused149] -[unused150] -[unused151] -[unused152] -[unused153] -[unused154] -[unused155] -[unused156] -[unused157] -[unused158] -[unused159] -[unused160] -[unused161] -[unused162] -[unused163] -[unused164] -[unused165] -[unused166] -[unused167] -[unused168] -[unused169] -[unused170] -[unused171] -[unused172] -[unused173] -[unused174] -[unused175] -[unused176] -[unused177] -[unused178] -[unused179] -[unused180] -[unused181] -[unused182] -[unused183] -[unused184] -[unused185] -[unused186] -[unused187] -[unused188] -[unused189] -[unused190] -[unused191] -[unused192] -[unused193] -[unused194] -[unused195] -[unused196] -[unused197] -[unused198] -[unused199] -[unused200] -[unused201] -[unused202] -[unused203] -[unused204] -[unused205] -[unused206] -[unused207] -[unused208] -[unused209] -[unused210] -[unused211] -[unused212] -[unused213] -[unused214] -[unused215] -[unused216] -[unused217] -[unused218] -[unused219] -[unused220] -[unused221] -[unused222] -[unused223] -[unused224] -[unused225] -[unused226] -[unused227] -[unused228] -[unused229] -[unused230] -[unused231] -[unused232] -[unused233] -[unused234] -[unused235] -[unused236] -[unused237] -[unused238] -[unused239] -[unused240] -[unused241] -[unused242] -[unused243] -[unused244] -[unused245] -[unused246] -[unused247] -[unused248] -[unused249] -[unused250] -[unused251] -[unused252] -[unused253] -[unused254] -[unused255] -[unused256] -[unused257] -[unused258] -[unused259] -[unused260] -[unused261] -[unused262] -[unused263] -[unused264] -[unused265] -[unused266] -[unused267] -[unused268] -[unused269] -[unused270] -[unused271] -[unused272] -[unused273] -[unused274] -[unused275] -[unused276] -[unused277] -[unused278] -[unused279] -[unused280] -[unused281] -[unused282] -[unused283] -[unused284] -[unused285] -[unused286] -[unused287] -[unused288] -[unused289] -[unused290] -[unused291] -[unused292] -[unused293] -[unused294] -[unused295] -[unused296] -[unused297] -[unused298] -[unused299] -[unused300] -[unused301] -[unused302] -[unused303] -[unused304] -[unused305] -[unused306] -[unused307] -[unused308] -[unused309] -[unused310] -[unused311] -[unused312] -[unused313] -[unused314] -[unused315] -[unused316] -[unused317] -[unused318] -[unused319] -[unused320] -[unused321] -[unused322] -[unused323] -[unused324] -[unused325] -[unused326] -[unused327] -[unused328] -[unused329] -[unused330] -[unused331] -[unused332] -[unused333] -[unused334] -[unused335] -[unused336] -[unused337] -[unused338] -[unused339] -[unused340] -[unused341] -[unused342] -[unused343] -[unused344] -[unused345] -[unused346] -[unused347] -[unused348] -[unused349] -[unused350] -[unused351] -[unused352] -[unused353] -[unused354] -[unused355] -[unused356] -[unused357] -[unused358] -[unused359] -[unused360] -[unused361] -[unused362] -[unused363] -[unused364] -[unused365] -[unused366] -[unused367] -[unused368] -[unused369] -[unused370] -[unused371] -[unused372] -[unused373] -[unused374] -[unused375] -[unused376] -[unused377] -[unused378] -[unused379] -[unused380] -[unused381] -[unused382] -[unused383] -[unused384] -[unused385] -[unused386] -[unused387] -[unused388] -[unused389] -[unused390] -[unused391] -[unused392] -[unused393] -[unused394] -[unused395] -[unused396] -[unused397] -[unused398] -[unused399] -[unused400] -[unused401] -[unused402] -[unused403] -[unused404] -[unused405] -[unused406] -[unused407] -[unused408] -[unused409] -[unused410] -[unused411] -[unused412] -[unused413] -[unused414] -[unused415] -[unused416] -[unused417] -[unused418] -[unused419] -[unused420] -[unused421] -[unused422] -[unused423] -[unused424] -[unused425] -[unused426] -[unused427] -[unused428] -[unused429] -[unused430] -[unused431] -[unused432] -[unused433] -[unused434] -[unused435] -[unused436] -[unused437] -[unused438] -[unused439] -[unused440] -[unused441] -[unused442] -[unused443] -[unused444] -[unused445] -[unused446] -[unused447] -[unused448] -[unused449] -[unused450] -[unused451] -[unused452] -[unused453] -[unused454] -[unused455] -[unused456] -[unused457] -[unused458] -[unused459] -[unused460] -[unused461] -[unused462] -[unused463] -[unused464] -[unused465] -[unused466] -[unused467] -[unused468] -[unused469] -[unused470] -[unused471] -[unused472] -[unused473] -[unused474] -[unused475] -[unused476] -[unused477] -[unused478] -[unused479] -[unused480] -[unused481] -[unused482] -[unused483] -[unused484] -[unused485] -[unused486] -[unused487] -[unused488] -[unused489] -[unused490] -[unused491] -[unused492] -[unused493] -[unused494] -[unused495] -[unused496] -[unused497] -[unused498] -[unused499] -[unused500] -[unused501] -[unused502] -[unused503] -[unused504] -[unused505] -[unused506] -[unused507] -[unused508] -[unused509] -[unused510] -[unused511] -[unused512] -[unused513] -[unused514] -[unused515] -[unused516] -[unused517] -[unused518] -[unused519] -[unused520] -[unused521] -[unused522] -[unused523] -[unused524] -[unused525] -[unused526] -[unused527] -[unused528] -[unused529] -[unused530] -[unused531] -[unused532] -[unused533] -[unused534] -[unused535] -[unused536] -[unused537] -[unused538] -[unused539] -[unused540] -[unused541] -[unused542] -[unused543] -[unused544] -[unused545] -[unused546] -[unused547] -[unused548] -[unused549] -[unused550] -[unused551] -[unused552] -[unused553] -[unused554] -[unused555] -[unused556] -[unused557] -[unused558] -[unused559] -[unused560] -[unused561] -[unused562] -[unused563] -[unused564] -[unused565] -[unused566] -[unused567] -[unused568] -[unused569] -[unused570] -[unused571] -[unused572] -[unused573] -[unused574] -[unused575] -[unused576] -[unused577] -[unused578] -[unused579] -[unused580] -[unused581] -[unused582] -[unused583] -[unused584] -[unused585] -[unused586] -[unused587] -[unused588] -[unused589] -[unused590] -[unused591] -[unused592] -[unused593] -[unused594] -[unused595] -[unused596] -[unused597] -[unused598] -[unused599] -[unused600] -[unused601] -[unused602] -[unused603] -[unused604] -[unused605] -[unused606] -[unused607] -[unused608] -[unused609] -[unused610] -[unused611] -[unused612] -[unused613] -[unused614] -[unused615] -[unused616] -[unused617] -[unused618] -[unused619] -[unused620] -[unused621] -[unused622] -[unused623] -[unused624] -[unused625] -[unused626] -[unused627] -[unused628] -[unused629] -[unused630] -[unused631] -[unused632] -[unused633] -[unused634] -[unused635] -[unused636] -[unused637] -[unused638] -[unused639] -[unused640] -[unused641] -[unused642] -[unused643] -[unused644] -[unused645] -[unused646] -[unused647] -[unused648] -[unused649] -[unused650] -[unused651] -[unused652] -[unused653] -[unused654] -[unused655] -[unused656] -[unused657] -[unused658] -[unused659] -[unused660] -[unused661] -[unused662] -[unused663] -[unused664] -[unused665] -[unused666] -[unused667] -[unused668] -[unused669] -[unused670] -[unused671] -[unused672] -[unused673] -[unused674] -[unused675] -[unused676] -[unused677] -[unused678] -[unused679] -[unused680] -[unused681] -[unused682] -[unused683] -[unused684] -[unused685] -[unused686] -[unused687] -[unused688] -[unused689] -[unused690] -[unused691] -[unused692] -[unused693] -[unused694] -[unused695] -[unused696] -[unused697] -[unused698] -[unused699] -[unused700] -[unused701] -[unused702] -[unused703] -[unused704] -[unused705] -[unused706] -[unused707] -[unused708] -[unused709] -[unused710] -[unused711] -[unused712] -[unused713] -[unused714] -[unused715] -[unused716] -[unused717] -[unused718] -[unused719] -[unused720] -[unused721] -[unused722] -[unused723] -[unused724] -[unused725] -[unused726] -[unused727] -[unused728] -[unused729] -[unused730] -[unused731] -[unused732] -[unused733] -[unused734] -[unused735] -[unused736] -[unused737] -[unused738] -[unused739] -[unused740] -[unused741] -[unused742] -[unused743] -[unused744] -[unused745] -[unused746] -[unused747] -[unused748] -[unused749] -[unused750] -[unused751] -[unused752] -[unused753] -[unused754] -[unused755] -[unused756] -[unused757] -[unused758] -[unused759] -[unused760] -[unused761] -[unused762] -[unused763] -[unused764] -[unused765] -[unused766] -[unused767] -[unused768] -[unused769] -[unused770] -[unused771] -[unused772] -[unused773] -[unused774] -[unused775] -[unused776] -[unused777] -[unused778] -[unused779] -[unused780] -[unused781] -[unused782] -[unused783] -[unused784] -[unused785] -[unused786] -[unused787] -[unused788] -[unused789] -[unused790] -[unused791] -[unused792] -[unused793] -[unused794] -[unused795] -[unused796] -[unused797] -[unused798] -[unused799] -[unused800] -[unused801] -[unused802] -[unused803] -[unused804] -[unused805] -[unused806] -[unused807] -[unused808] -[unused809] -[unused810] -[unused811] -[unused812] -[unused813] -[unused814] -[unused815] -[unused816] -[unused817] -[unused818] -[unused819] -[unused820] -[unused821] -[unused822] -[unused823] -[unused824] -[unused825] -[unused826] -[unused827] -[unused828] -[unused829] -[unused830] -[unused831] -[unused832] -[unused833] -[unused834] -[unused835] -[unused836] -[unused837] -[unused838] -[unused839] -[unused840] -[unused841] -[unused842] -[unused843] -[unused844] -[unused845] -[unused846] -[unused847] -[unused848] -[unused849] -[unused850] -[unused851] -[unused852] -[unused853] -[unused854] -[unused855] -[unused856] -[unused857] -[unused858] -[unused859] -[unused860] -[unused861] -[unused862] -[unused863] -[unused864] -[unused865] -[unused866] -[unused867] -[unused868] -[unused869] -[unused870] -[unused871] -[unused872] -[unused873] -[unused874] -[unused875] -[unused876] -[unused877] -[unused878] -[unused879] -[unused880] -[unused881] -[unused882] -[unused883] -[unused884] -[unused885] -[unused886] -[unused887] -[unused888] -[unused889] -[unused890] -[unused891] -[unused892] -[unused893] -[unused894] -[unused895] -[unused896] -[unused897] -[unused898] -[unused899] -[unused900] -[unused901] -[unused902] -[unused903] -[unused904] -[unused905] -[unused906] -[unused907] -[unused908] -[unused909] -[unused910] -[unused911] -[unused912] -[unused913] -[unused914] -[unused915] -[unused916] -[unused917] -[unused918] -[unused919] -[unused920] -[unused921] -[unused922] -[unused923] -[unused924] -[unused925] -[unused926] -[unused927] -[unused928] -[unused929] -[unused930] -[unused931] -[unused932] -[unused933] -[unused934] -[unused935] -[unused936] -[unused937] -[unused938] -[unused939] -[unused940] -[unused941] -[unused942] -[unused943] -[unused944] -[unused945] -[unused946] -[unused947] -[unused948] -[unused949] -[unused950] -[unused951] -[unused952] -[unused953] -[unused954] -[unused955] -[unused956] -[unused957] -[unused958] -[unused959] -[unused960] -[unused961] -[unused962] -[unused963] -[unused964] -[unused965] -[unused966] -[unused967] -[unused968] -[unused969] -[unused970] -[unused971] -[unused972] -[unused973] -[unused974] -[unused975] -[unused976] -[unused977] -[unused978] -[unused979] -[unused980] -[unused981] -[unused982] -[unused983] -[unused984] -[unused985] -[unused986] -[unused987] -[unused988] -[unused989] -[unused990] -[unused991] -[unused992] -[unused993] -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¡ -¢ -£ -¤ -¥ -¦ -§ -¨ -© -ª -« -¬ -® -° -± -² -³ -´ -µ -¶ -· -¹ -º -» -¼ -½ -¾ -¿ -× -ß -æ -ð -÷ -ø -þ -đ -ħ -ı -ł -ŋ -œ -ƒ -ɐ -ɑ -ɒ -ɔ -ɕ -ə -ɛ -ɡ -ɣ -ɨ -ɪ -ɫ -ɬ -ɯ -ɲ -ɴ -ɹ -ɾ -ʀ -ʁ -ʂ -ʃ -ʉ -ʊ -ʋ -ʌ -ʎ -ʐ -ʑ -ʒ -ʔ -ʰ -ʲ -ʳ -ʷ -ʸ -ʻ -ʼ -ʾ -ʿ -ˈ -ː -ˡ -ˢ -ˣ -ˤ -α -β -γ -δ -ε -ζ -η -θ -ι -κ -λ -μ -ν -ξ -ο -π -ρ -ς -σ -τ -υ -φ -χ -ψ -ω -а -б -в -г -д -е -ж -з -и -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ђ -є -і -ј -љ -њ -ћ -ӏ -ա -բ -գ -դ -ե -թ -ի -լ -կ -հ -մ -յ -ն -ո -պ -ս -վ -տ -ր -ւ -ք -־ -א -ב -ג -ד -ה -ו -ז -ח -ט -י -ך -כ -ל -ם -מ -ן -נ -ס -ע -ף -פ -ץ -צ -ק -ר -ש -ת -، -ء -ا -ب -ة -ت -ث -ج -ح -خ -د -ذ -ر -ز -س -ش -ص -ض -ط -ظ -ع -غ -ـ -ف -ق -ك -ل -م -ن -ه -و -ى -ي -ٹ -پ -چ -ک -گ -ں -ھ -ہ -ی -ے -अ -आ -उ -ए -क -ख -ग -च -ज -ट -ड -ण -त -थ -द -ध -न -प -ब -भ -म -य -र -ल -व -श -ष -स -ह -ा -ि -ी -ो -। -॥ -ং -অ -আ -ই -উ -এ -ও -ক -খ -গ -চ -ছ -জ -ট -ড -ণ -ত -থ -দ -ধ -ন -প -ব -ভ -ম -য -র -ল -শ -ষ -স -হ -া -ি -ী -ে -க -ச -ட -த -ந -ன -ப -ம -ய -ர -ல -ள -வ -ா -ி -ு -ே -ை -ನ -ರ -ಾ -ක -ය -ර -ල -ව -ා -ก -ง -ต -ท -น -พ -ม -ย -ร -ล -ว -ส -อ -า -เ -་ -། -ག -ང -ད -ན -པ -བ -མ -འ -ར -ལ -ས -မ -ა -ბ -გ -დ -ე -ვ -თ -ი -კ -ლ -მ -ნ -ო -რ -ს -ტ -უ -ᄀ -ᄂ -ᄃ -ᄅ -ᄆ -ᄇ -ᄉ -ᄊ -ᄋ -ᄌ -ᄎ -ᄏ -ᄐ -ᄑ -ᄒ -ᅡ -ᅢ -ᅥ -ᅦ -ᅧ -ᅩ -ᅪ -ᅭ -ᅮ -ᅯ -ᅲ -ᅳ -ᅴ -ᅵ -ᆨ -ᆫ -ᆯ -ᆷ -ᆸ -ᆼ -ᴬ -ᴮ -ᴰ -ᴵ -ᴺ -ᵀ -ᵃ -ᵇ -ᵈ -ᵉ -ᵍ -ᵏ -ᵐ -ᵒ -ᵖ -ᵗ -ᵘ -ᵢ -ᵣ -ᵤ -ᵥ -ᶜ -ᶠ -‐ -‑ -‒ -– -— -― -‖ -‘ -’ -‚ -“ -” -„ -† -‡ -• -… -‰ -′ -″ -› -‿ -⁄ -⁰ -ⁱ -⁴ -⁵ -⁶ -⁷ -⁸ -⁹ -⁺ -⁻ -ⁿ -₀ -₁ -₂ -₃ -₄ -₅ -₆ -₇ -₈ -₉ -₊ -₍ -₎ -ₐ -ₑ -ₒ -ₓ -ₕ -ₖ -ₗ -ₘ -ₙ -ₚ -ₛ -ₜ -₤ -₩ -€ -₱ -₹ -ℓ -№ -ℝ -™ -⅓ -⅔ -← -↑ -→ -↓ -↔ -↦ -⇄ -⇌ -⇒ -∂ -∅ -∆ -∇ -∈ -− -∗ -∘ -√ -∞ -∧ -∨ -∩ -∪ -≈ -≡ -≤ -≥ -⊂ -⊆ -⊕ -⊗ -⋅ -─ -│ -■ -▪ -● -★ -☆ -☉ -♠ -♣ -♥ -♦ -♭ -♯ -⟨ -⟩ -ⱼ -⺩ -⺼ -⽥ -、 -。 -〈 -〉 -《 -》 -「 -」 -『 -』 -〜 -あ -い -う -え -お -か -き -く -け -こ -さ -し -す -せ -そ -た -ち -っ -つ -て -と -な -に -ぬ -ね -の -は -ひ -ふ -へ -ほ -ま -み -む -め -も -や -ゆ -よ -ら -り -る -れ -ろ -を -ん -ァ -ア -ィ -イ -ウ -ェ -エ -オ -カ -キ -ク -ケ -コ -サ -シ -ス -セ -タ -チ -ッ -ツ -テ -ト -ナ -ニ -ノ -ハ -ヒ -フ -ヘ -ホ -マ -ミ -ム -メ -モ -ャ -ュ -ョ -ラ -リ -ル -レ -ロ -ワ -ン -・ -ー -一 -三 -上 -下 -不 -世 -中 -主 -久 -之 -也 -事 -二 -五 -井 -京 -人 -亻 -仁 -介 -代 -仮 -伊 -会 -佐 -侍 -保 -信 -健 -元 -光 -八 -公 -内 -出 -分 -前 -劉 -力 -加 -勝 -北 -区 -十 -千 -南 -博 -原 -口 -古 -史 -司 -合 -吉 -同 -名 -和 -囗 -四 -国 -國 -土 -地 -坂 -城 -堂 -場 -士 -夏 -外 -大 -天 -太 -夫 -奈 -女 -子 -学 -宀 -宇 -安 -宗 -定 -宣 -宮 -家 -宿 -寺 -將 -小 -尚 -山 -岡 -島 -崎 -川 -州 -巿 -帝 -平 -年 -幸 -广 -弘 -張 -彳 -後 -御 -德 -心 -忄 -志 -忠 -愛 -成 -我 -戦 -戸 -手 -扌 -政 -文 -新 -方 -日 -明 -星 -春 -昭 -智 -曲 -書 -月 -有 -朝 -木 -本 -李 -村 -東 -松 -林 -森 -楊 -樹 -橋 -歌 -止 -正 -武 -比 -氏 -民 -水 -氵 -氷 -永 -江 -沢 -河 -治 -法 -海 -清 -漢 -瀬 -火 -版 -犬 -王 -生 -田 -男 -疒 -発 -白 -的 -皇 -目 -相 -省 -真 -石 -示 -社 -神 -福 -禾 -秀 -秋 -空 -立 -章 -竹 -糹 -美 -義 -耳 -良 -艹 -花 -英 -華 -葉 -藤 -行 -街 -西 -見 -訁 -語 -谷 -貝 -貴 -車 -軍 -辶 -道 -郎 -郡 -部 -都 -里 -野 -金 -鈴 -镇 -長 -門 -間 -阝 -阿 -陳 -陽 -雄 -青 -面 -風 -食 -香 -馬 -高 -龍 -龸 -fi -fl -! -( -) -, -- -. -/ -: -? -~ -the -of -and -in -to -was -he -is -as -for -on -with -that -it -his -by -at -from -her -##s -she -you -had -an -were -but -be -this -are -not -my -they -one -which -or -have -him -me -first -all -also -their -has -up -who -out -been -when -after -there -into -new -two -its -##a -time -would -no -what -about -said -we -over -then -other -so -more -##e -can -if -like -back -them -only -some -could -##i -where -just -##ing -during -before -##n -do -##o -made -school -through -than -now -years -most -world -may -between -down -well -three -##d -year -while -will -##ed -##r -##y -later -##t -city -under -around -did -such -being -used -state -people -part -know -against -your -many -second -university -both -national -##er -these -don -known -off -way -until -re -how -even -get -head -... -didn -##ly -team -american -because -de -##l -born -united -film -since -still -long -work -south -us -became -any -high -again -day -family -see -right -man -eyes -house -season -war -states -including -took -life -north -same -each -called -name -much -place -however -go -four -group -another -found -won -area -here -going -10 -away -series -left -home -music -best -make -hand -number -company -several -never -last -john -000 -very -album -take -end -good -too -following -released -game -played -little -began -district -##m -old -want -those -side -held -own -early -county -ll -league -use -west -##u -face -think -##es -2010 -government -##h -march -came -small -general -town -june -##on -line -based -something -##k -september -thought -looked -along -international -2011 -air -july -club -went -january -october -our -august -april -york -12 -few -2012 -2008 -east -show -member -college -2009 -father -public -##us -come -men -five -set -station -church -##c -next -former -november -room -party -located -december -2013 -age -got -2007 -##g -system -let -love -2006 -though -every -2014 -look -song -water -century -without -body -black -night -within -great -women -single -ve -building -large -population -river -named -band -white -started -##an -once -15 -20 -should -18 -2015 -service -top -built -british -open -death -king -moved -local -times -children -february -book -why -11 -door -need -president -order -final -road -wasn -although -due -major -died -village -third -knew -2016 -asked -turned -st -wanted -say -##p -together -received -main -son -served -different -##en -behind -himself -felt -members -power -football -law -voice -play -##in -near -park -history -30 -having -2005 -16 -##man -saw -mother -##al -army -point -front -help -english -street -art -late -hands -games -award -##ia -young -14 -put -published -country -division -across -told -13 -often -ever -french -london -center -six -red -2017 -led -days -include -light -25 -find -tell -among -species -really -according -central -half -2004 -form -original -gave -office -making -enough -lost -full -opened -must -included -live -given -german -player -run -business -woman -community -cup -might -million -land -2000 -court -development -17 -short -round -ii -km -seen -class -story -always -become -sure -research -almost -director -council -la -##2 -career -things -using -island -##z -couldn -car -##is -24 -close -force -##1 -better -free -support -control -field -students -2003 -education -married -##b -nothing -worked -others -record -big -inside -level -anything -continued -give -james -##3 -military -established -non -returned -feel -does -title -written -thing -feet -william -far -co -association -hard -already -2002 -##ra -championship -human -western -100 -##na -department -hall -role -various -production -21 -19 -heart -2001 -living -fire -version -##ers -##f -television -royal -##4 -produced -working -act -case -society -region -present -radio -period -looking -least -total -keep -england -wife -program -per -brother -mind -special -22 -##le -am -works -soon -##6 -political -george -services -taken -created -##7 -further -able -reached -david -union -joined -upon -done -important -social -information -either -##ic -##x -appeared -position -ground -lead -rock -dark -election -23 -board -france -hair -course -arms -site -police -girl -instead -real -sound -##v -words -moment -##te -someone -##8 -summer -project -announced -san -less -wrote -past -followed -##5 -blue -founded -al -finally -india -taking -records -america -##ne -1999 -design -considered -northern -god -stop -battle -toward -european -outside -described -track -today -playing -language -28 -call -26 -heard -professional -low -australia -miles -california -win -yet -green -##ie -trying -blood -##ton -southern -science -maybe -everything -match -square -27 -mouth -video -race -recorded -leave -above -##9 -daughter -points -space -1998 -museum -change -middle -common -##0 -move -tv -post -##ta -lake -seven -tried -elected -closed -ten -paul -minister -##th -months -start -chief -return -canada -person -sea -release -similar -modern -brought -rest -hit -formed -mr -##la -1997 -floor -event -doing -thomas -1996 -robert -care -killed -training -star -week -needed -turn -finished -railway -rather -news -health -sent -example -ran -term -michael -coming -currently -yes -forces -despite -gold -areas -50 -stage -fact -29 -dead -says -popular -2018 -originally -germany -probably -developed -result -pulled -friend -stood -money -running -mi -signed -word -songs -child -eventually -met -tour -average -teams -minutes -festival -current -deep -kind -1995 -decided -usually -eastern -seemed -##ness -episode -bed -added -table -indian -private -charles -route -available -idea -throughout -centre -addition -appointed -style -1994 -books -eight -construction -press -mean -wall -friends -remained -schools -study -##ch -##um -institute -oh -chinese -sometimes -events -possible -1992 -australian -type -brown -forward -talk -process -food -debut -seat -performance -committee -features -character -arts -herself -else -lot -strong -russian -range -hours -peter -arm -##da -morning -dr -sold -##ry -quickly -directed -1993 -guitar -china -##w -31 -list -##ma -performed -media -uk -players -smile -##rs -myself -40 -placed -coach -province -towards -wouldn -leading -whole -boy -official -designed -grand -census -##el -europe -attack -japanese -henry -1991 -##re -##os -cross -getting -alone -action -lower -network -wide -washington -japan -1990 -hospital -believe -changed -sister -##ar -hold -gone -sir -hadn -ship -##ka -studies -academy -shot -rights -below -base -bad -involved -kept -largest -##ist -bank -future -especially -beginning -mark -movement -section -female -magazine -plan -professor -lord -longer -##ian -sat -walked -hill -actually -civil -energy -model -families -size -thus -aircraft -completed -includes -data -captain -##or -fight -vocals -featured -richard -bridge -fourth -1989 -officer -stone -hear -##ism -means -medical -groups -management -self -lips -competition -entire -lived -technology -leaving -federal -tournament -bit -passed -hot -independent -awards -kingdom -mary -spent -fine -doesn -reported -##ling -jack -fall -raised -itself -stay -true -studio -1988 -sports -replaced -paris -systems -saint -leader -theatre -whose -market -capital -parents -spanish -canadian -earth -##ity -cut -degree -writing -bay -christian -awarded -natural -higher -bill -##as -coast -provided -previous -senior -ft -valley -organization -stopped -onto -countries -parts -conference -queen -security -interest -saying -allowed -master -earlier -phone -matter -smith -winning -try -happened -moving -campaign -los -##ley -breath -nearly -mid -1987 -certain -girls -date -italian -african -standing -fell -artist -##ted -shows -deal -mine -industry -1986 -##ng -everyone -republic -provide -collection -library -student -##ville -primary -owned -older -via -heavy -1st -makes -##able -attention -anyone -africa -##ri -stated -length -ended -fingers -command -staff -skin -foreign -opening -governor -okay -medal -kill -sun -cover -job -1985 -introduced -chest -hell -feeling -##ies -success -meet -reason -standard -meeting -novel -1984 -trade -source -buildings -##land -rose -guy -goal -##ur -chapter -native -husband -previously -unit -limited -entered -weeks -producer -operations -mountain -takes -covered -forced -related -roman -complete -successful -key -texas -cold -##ya -channel -1980 -traditional -films -dance -clear -approximately -500 -nine -van -prince -question -active -tracks -ireland -regional -silver -author -personal -sense -operation -##ine -economic -1983 -holding -twenty -isbn -additional -speed -hour -edition -regular -historic -places -whom -shook -movie -km² -secretary -prior -report -chicago -read -foundation -view -engine -scored -1982 -units -ask -airport -property -ready -immediately -lady -month -listed -contract -##de -manager -themselves -lines -##ki -navy -writer -meant -##ts -runs -##ro -practice -championships -singer -glass -commission -required -forest -starting -culture -generally -giving -access -attended -test -couple -stand -catholic -martin -caught -executive -##less -eye -##ey -thinking -chair -quite -shoulder -1979 -hope -decision -plays -defeated -municipality -whether -structure -offered -slowly -pain -ice -direction -##ion -paper -mission -1981 -mostly -200 -noted -individual -managed -nature -lives -plant -##ha -helped -except -studied -computer -figure -relationship -issue -significant -loss -die -smiled -gun -ago -highest -1972 -##am -male -bring -goals -mexico -problem -distance -commercial -completely -location -annual -famous -drive -1976 -neck -1978 -surface -caused -italy -understand -greek -highway -wrong -hotel -comes -appearance -joseph -double -issues -musical -companies -castle -income -review -assembly -bass -initially -parliament -artists -experience -1974 -particular -walk -foot -engineering -talking -window -dropped -##ter -miss -baby -boys -break -1975 -stars -edge -remember -policy -carried -train -stadium -bar -sex -angeles -evidence -##ge -becoming -assistant -soviet -1977 -upper -step -wing -1970 -youth -financial -reach -##ll -actor -numerous -##se -##st -nodded -arrived -##ation -minute -##nt -believed -sorry -complex -beautiful -victory -associated -temple -1968 -1973 -chance -perhaps -metal -##son -1945 -bishop -##et -lee -launched -particularly -tree -le -retired -subject -prize -contains -yeah -theory -empire -##ce -suddenly -waiting -trust -recording -##to -happy -terms -camp -champion -1971 -religious -pass -zealand -names -2nd -port -ancient -tom -corner -represented -watch -legal -anti -justice -cause -watched -brothers -45 -material -changes -simply -response -louis -fast -##ting -answer -60 -historical -1969 -stories -straight -create -feature -increased -rate -administration -virginia -el -activities -cultural -overall -winner -programs -basketball -legs -guard -beyond -cast -doctor -mm -flight -results -remains -cost -effect -winter -##ble -larger -islands -problems -chairman -grew -commander -isn -1967 -pay -failed -selected -hurt -fort -box -regiment -majority -journal -35 -edward -plans -##ke -##ni -shown -pretty -irish -characters -directly -scene -likely -operated -allow -spring -##j -junior -matches -looks -mike -houses -fellow -##tion -beach -marriage -##ham -##ive -rules -oil -65 -florida -expected -nearby -congress -sam -peace -recent -iii -wait -subsequently -cell -##do -variety -serving -agreed -please -poor -joe -pacific -attempt -wood -democratic -piece -prime -##ca -rural -mile -touch -appears -township -1964 -1966 -soldiers -##men -##ized -1965 -pennsylvania -closer -fighting -claimed -score -jones -physical -editor -##ous -filled -genus -specific -sitting -super -mom -##va -therefore -supported -status -fear -cases -store -meaning -wales -minor -spain -tower -focus -vice -frank -follow -parish -separate -golden -horse -fifth -remaining -branch -32 -presented -stared -##id -uses -secret -forms -##co -baseball -exactly -##ck -choice -note -discovered -travel -composed -truth -russia -ball -color -kiss -dad -wind -continue -ring -referred -numbers -digital -greater -##ns -metres -slightly -direct -increase -1960 -responsible -crew -rule -trees -troops -##no -broke -goes -individuals -hundred -weight -creek -sleep -memory -defense -provides -ordered -code -value -jewish -windows -1944 -safe -judge -whatever -corps -realized -growing -pre -##ga -cities -alexander -gaze -lies -spread -scott -letter -showed -situation -mayor -transport -watching -workers -extended -##li -expression -normal -##ment -chart -multiple -border -##ba -host -##ner -daily -mrs -walls -piano -##ko -heat -cannot -##ate -earned -products -drama -era -authority -seasons -join -grade -##io -sign -difficult -machine -1963 -territory -mainly -##wood -stations -squadron -1962 -stepped -iron -19th -##led -serve -appear -sky -speak -broken -charge -knowledge -kilometres -removed -ships -article -campus -simple -##ty -pushed -britain -##ve -leaves -recently -cd -soft -boston -latter -easy -acquired -poland -##sa -quality -officers -presence -planned -nations -mass -broadcast -jean -share -image -influence -wild -offer -emperor -electric -reading -headed -ability -promoted -yellow -ministry -1942 -throat -smaller -politician -##by -latin -spoke -cars -williams -males -lack -pop -80 -##ier -acting -seeing -consists -##ti -estate -1961 -pressure -johnson -newspaper -jr -chris -olympics -online -conditions -beat -elements -walking -vote -##field -needs -carolina -text -featuring -global -block -shirt -levels -francisco -purpose -females -et -dutch -duke -ahead -gas -twice -safety -serious -turning -highly -lieutenant -firm -maria -amount -mixed -daniel -proposed -perfect -agreement -affairs -3rd -seconds -contemporary -paid -1943 -prison -save -kitchen -label -administrative -intended -constructed -academic -nice -teacher -races -1956 -formerly -corporation -ben -nation -issued -shut -1958 -drums -housing -victoria -seems -opera -1959 -graduated -function -von -mentioned -picked -build -recognized -shortly -protection -picture -notable -exchange -elections -1980s -loved -percent -racing -fish -elizabeth -garden -volume -hockey -1941 -beside -settled -##ford -1940 -competed -replied -drew -1948 -actress -marine -scotland -steel -glanced -farm -steve -1957 -risk -tonight -positive -magic -singles -effects -gray -screen -dog -##ja -residents -bus -sides -none -secondary -literature -polish -destroyed -flying -founder -households -1939 -lay -reserve -usa -gallery -##ler -1946 -industrial -younger -approach -appearances -urban -ones -1950 -finish -avenue -powerful -fully -growth -page -honor -jersey -projects -advanced -revealed -basic -90 -infantry -pair -equipment -visit -33 -evening -search -grant -effort -solo -treatment -buried -republican -primarily -bottom -owner -1970s -israel -gives -jim -dream -bob -remain -spot -70 -notes -produce -champions -contact -ed -soul -accepted -ways -del -##ally -losing -split -price -capacity -basis -trial -questions -##ina -1955 -20th -guess -officially -memorial -naval -initial -##ization -whispered -median -engineer -##ful -sydney -##go -columbia -strength -300 -1952 -tears -senate -00 -card -asian -agent -1947 -software -44 -draw -warm -supposed -com -pro -##il -transferred -leaned -##at -candidate -escape -mountains -asia -potential -activity -entertainment -seem -traffic -jackson -murder -36 -slow -product -orchestra -haven -agency -bbc -taught -website -comedy -unable -storm -planning -albums -rugby -environment -scientific -grabbed -protect -##hi -boat -typically -1954 -1953 -damage -principal -divided -dedicated -mount -ohio -##berg -pick -fought -driver -##der -empty -shoulders -sort -thank -berlin -prominent -account -freedom -necessary -efforts -alex -headquarters -follows -alongside -des -simon -andrew -suggested -operating -learning -steps -1949 -sweet -technical -begin -easily -34 -teeth -speaking -settlement -scale -##sh -renamed -ray -max -enemy -semi -joint -compared -##rd -scottish -leadership -analysis -offers -georgia -pieces -captured -animal -deputy -guest -organized -##lin -tony -combined -method -challenge -1960s -huge -wants -battalion -sons -rise -crime -types -facilities -telling -path -1951 -platform -sit -1990s -##lo -tells -assigned -rich -pull -##ot -commonly -alive -##za -letters -concept -conducted -wearing -happen -bought -becomes -holy -gets -ocean -defeat -languages -purchased -coffee -occurred -titled -##q -declared -applied -sciences -concert -sounds -jazz -brain -##me -painting -fleet -tax -nick -##ius -michigan -count -animals -leaders -episodes -##line -content -##den -birth -##it -clubs -64 -palace -critical -refused -fair -leg -laughed -returning -surrounding -participated -formation -lifted -pointed -connected -rome -medicine -laid -taylor -santa -powers -adam -tall -shared -focused -knowing -yards -entrance -falls -##wa -calling -##ad -sources -chosen -beneath -resources -yard -##ite -nominated -silence -zone -defined -##que -gained -thirty -38 -bodies -moon -##ard -adopted -christmas -widely -register -apart -iran -premier -serves -du -unknown -parties -##les -generation -##ff -continues -quick -fields -brigade -quiet -teaching -clothes -impact -weapons -partner -flat -theater -supreme -1938 -37 -relations -##tor -plants -suffered -1936 -wilson -kids -begins -##age -1918 -seats -armed -internet -models -worth -laws -400 -communities -classes -background -knows -thanks -quarter -reaching -humans -carry -killing -format -kong -hong -setting -75 -architecture -disease -railroad -inc -possibly -wish -arthur -thoughts -harry -doors -density -##di -crowd -illinois -stomach -tone -unique -reports -anyway -##ir -liberal -der -vehicle -thick -dry -drug -faced -largely -facility -theme -holds -creation -strange -colonel -##mi -revolution -bell -politics -turns -silent -rail -relief -independence -combat -shape -write -determined -sales -learned -4th -finger -oxford -providing -1937 -heritage -fiction -situated -designated -allowing -distribution -hosted -##est -sight -interview -estimated -reduced -##ria -toronto -footballer -keeping -guys -damn -claim -motion -sport -sixth -stayed -##ze -en -rear -receive -handed -twelve -dress -audience -granted -brazil -##well -spirit -##ated -noticed -etc -olympic -representative -eric -tight -trouble -reviews -drink -vampire -missing -roles -ranked -newly -household -finals -wave -critics -##ee -phase -massachusetts -pilot -unlike -philadelphia -bright -guns -crown -organizations -roof -42 -respectively -clearly -tongue -marked -circle -fox -korea -bronze -brian -expanded -sexual -supply -yourself -inspired -labour -fc -##ah -reference -vision -draft -connection -brand -reasons -1935 -classic -driving -trip -jesus -cells -entry -1920 -neither -trail -claims -atlantic -orders -labor -nose -afraid -identified -intelligence -calls -cancer -attacked -passing -stephen -positions -imperial -grey -jason -39 -sunday -48 -swedish -avoid -extra -uncle -message -covers -allows -surprise -materials -fame -hunter -##ji -1930 -citizens -figures -davis -environmental -confirmed -shit -titles -di -performing -difference -acts -attacks -##ov -existing -votes -opportunity -nor -shop -entirely -trains -opposite -pakistan -##pa -develop -resulted -representatives -actions -reality -pressed -##ish -barely -wine -conversation -faculty -northwest -ends -documentary -nuclear -stock -grace -sets -eat -alternative -##ps -bag -resulting -creating -surprised -cemetery -1919 -drop -finding -sarah -cricket -streets -tradition -ride -1933 -exhibition -target -ear -explained -rain -composer -injury -apartment -municipal -educational -occupied -netherlands -clean -billion -constitution -learn -1914 -maximum -classical -francis -lose -opposition -jose -ontario -bear -core -hills -rolled -ending -drawn -permanent -fun -##tes -##lla -lewis -sites -chamber -ryan -##way -scoring -height -1934 -##house -lyrics -staring -55 -officials -1917 -snow -oldest -##tic -orange -##ger -qualified -interior -apparently -succeeded -thousand -dinner -lights -existence -fans -heavily -41 -greatest -conservative -send -bowl -plus -enter -catch -##un -economy -duty -1929 -speech -authorities -princess -performances -versions -shall -graduate -pictures -effective -remembered -poetry -desk -crossed -starring -starts -passenger -sharp -##ant -acres -ass -weather -falling -rank -fund -supporting -check -adult -publishing -heads -cm -southeast -lane -##burg -application -bc -##ura -les -condition -transfer -prevent -display -ex -regions -earl -federation -cool -relatively -answered -besides -1928 -obtained -portion -##town -mix -##ding -reaction -liked -dean -express -peak -1932 -##tte -counter -religion -chain -rare -miller -convention -aid -lie -vehicles -mobile -perform -squad -wonder -lying -crazy -sword -##ping -attempted -centuries -weren -philosophy -category -##ize -anna -interested -47 -sweden -wolf -frequently -abandoned -kg -literary -alliance -task -entitled -##ay -threw -promotion -factory -tiny -soccer -visited -matt -fm -achieved -52 -defence -internal -persian -43 -methods -##ging -arrested -otherwise -cambridge -programming -villages -elementary -districts -rooms -criminal -conflict -worry -trained -1931 -attempts -waited -signal -bird -truck -subsequent -programme -##ol -ad -49 -communist -details -faith -sector -patrick -carrying -laugh -##ss -controlled -korean -showing -origin -fuel -evil -1927 -##ent -brief -identity -darkness -address -pool -missed -publication -web -planet -ian -anne -wings -invited -##tt -briefly -standards -kissed -##be -ideas -climate -causing -walter -worse -albert -articles -winners -desire -aged -northeast -dangerous -gate -doubt -1922 -wooden -multi -##ky -poet -rising -funding -46 -communications -communication -violence -copies -prepared -ford -investigation -skills -1924 -pulling -electronic -##ak -##ial -##han -containing -ultimately -offices -singing -understanding -restaurant -tomorrow -fashion -christ -ward -da -pope -stands -5th -flow -studios -aired -commissioned -contained -exist -fresh -americans -##per -wrestling -approved -kid -employed -respect -suit -1925 -angel -asking -increasing -frame -angry -selling -1950s -thin -finds -##nd -temperature -statement -ali -explain -inhabitants -towns -extensive -narrow -51 -jane -flowers -images -promise -somewhere -object -fly -closely -##ls -1912 -bureau -cape -1926 -weekly -presidential -legislative -1921 -##ai -##au -launch -founding -##ny -978 -##ring -artillery -strike -un -institutions -roll -writers -landing -chose -kevin -anymore -pp -##ut -attorney -fit -dan -billboard -receiving -agricultural -breaking -sought -dave -admitted -lands -mexican -##bury -charlie -specifically -hole -iv -howard -credit -moscow -roads -accident -1923 -proved -wear -struck -hey -guards -stuff -slid -expansion -1915 -cat -anthony -##kin -melbourne -opposed -sub -southwest -architect -failure -plane -1916 -##ron -map -camera -tank -listen -regarding -wet -introduction -metropolitan -link -ep -fighter -inch -grown -gene -anger -fixed -buy -dvd -khan -domestic -worldwide -chapel -mill -functions -examples -##head -developing -1910 -turkey -hits -pocket -antonio -papers -grow -unless -circuit -18th -concerned -attached -journalist -selection -journey -converted -provincial -painted -hearing -aren -bands -negative -aside -wondered -knight -lap -survey -ma -##ow -noise -billy -##ium -shooting -guide -bedroom -priest -resistance -motor -homes -sounded -giant -##mer -150 -scenes -equal -comic -patients -hidden -solid -actual -bringing -afternoon -touched -funds -wedding -consisted -marie -canal -sr -kim -treaty -turkish -recognition -residence -cathedral -broad -knees -incident -shaped -fired -norwegian -handle -cheek -contest -represent -##pe -representing -beauty -##sen -birds -advantage -emergency -wrapped -drawing -notice -pink -broadcasting -##ong -somehow -bachelor -seventh -collected -registered -establishment -alan -assumed -chemical -personnel -roger -retirement -jeff -portuguese -wore -tied -device -threat -progress -advance -##ised -banks -hired -manchester -nfl -teachers -structures -forever -##bo -tennis -helping -saturday -sale -applications -junction -hip -incorporated -neighborhood -dressed -ceremony -##ds -influenced -hers -visual -stairs -decades -inner -kansas -hung -hoped -gain -scheduled -downtown -engaged -austria -clock -norway -certainly -pale -protected -1913 -victor -employees -plate -putting -surrounded -##ists -finishing -blues -tropical -##ries -minnesota -consider -philippines -accept -54 -retrieved -1900 -concern -anderson -properties -institution -gordon -successfully -vietnam -##dy -backing -outstanding -muslim -crossing -folk -producing -usual -demand -occurs -observed -lawyer -educated -##ana -kelly -string -pleasure -budget -items -quietly -colorado -philip -typical -##worth -derived -600 -survived -asks -mental -##ide -56 -jake -jews -distinguished -ltd -1911 -sri -extremely -53 -athletic -loud -thousands -worried -shadow -transportation -horses -weapon -arena -importance -users -tim -objects -contributed -dragon -douglas -aware -senator -johnny -jordan -sisters -engines -flag -investment -samuel -shock -capable -clark -row -wheel -refers -session -familiar -biggest -wins -hate -maintained -drove -hamilton -request -expressed -injured -underground -churches -walker -wars -tunnel -passes -stupid -agriculture -softly -cabinet -regarded -joining -indiana -##ea -##ms -push -dates -spend -behavior -woods -protein -gently -chase -morgan -mention -burning -wake -combination -occur -mirror -leads -jimmy -indeed -impossible -singapore -paintings -covering -##nes -soldier -locations -attendance -sell -historian -wisconsin -invasion -argued -painter -diego -changing -egypt -##don -experienced -inches -##ku -missouri -vol -grounds -spoken -switzerland -##gan -reform -rolling -ha -forget -massive -resigned -burned -allen -tennessee -locked -values -improved -##mo -wounded -universe -sick -dating -facing -pack -purchase -user -##pur -moments -##ul -merged -anniversary -1908 -coal -brick -understood -causes -dynasty -queensland -establish -stores -crisis -promote -hoping -views -cards -referee -extension -##si -raise -arizona -improve -colonial -formal -charged -##rt -palm -lucky -hide -rescue -faces -95 -feelings -candidates -juan -##ell -goods -6th -courses -weekend -59 -luke -cash -fallen -##om -delivered -affected -installed -carefully -tries -swiss -hollywood -costs -lincoln -responsibility -##he -shore -file -proper -normally -maryland -assistance -jump -constant -offering -friendly -waters -persons -realize -contain -trophy -800 -partnership -factor -58 -musicians -cry -bound -oregon -indicated -hero -houston -medium -##ure -consisting -somewhat -##ara -57 -cycle -##che -beer -moore -frederick -gotten -eleven -worst -weak -approached -arranged -chin -loan -universal -bond -fifteen -pattern -disappeared -##ney -translated -##zed -lip -arab -capture -interests -insurance -##chi -shifted -cave -prix -warning -sections -courts -coat -plot -smell -feed -golf -favorite -maintain -knife -vs -voted -degrees -finance -quebec -opinion -translation -manner -ruled -operate -productions -choose -musician -discovery -confused -tired -separated -stream -techniques -committed -attend -ranking -kings -throw -passengers -measure -horror -fan -mining -sand -danger -salt -calm -decade -dam -require -runner -##ik -rush -associate -greece -##ker -rivers -consecutive -matthew -##ski -sighed -sq -documents -steam -edited -closing -tie -accused -1905 -##ini -islamic -distributed -directors -organisation -bruce -7th -breathing -mad -lit -arrival -concrete -taste -08 -composition -shaking -faster -amateur -adjacent -stating -1906 -twin -flew -##ran -tokyo -publications -##tone -obviously -ridge -storage -1907 -carl -pages -concluded -desert -driven -universities -ages -terminal -sequence -borough -250 -constituency -creative -cousin -economics -dreams -margaret -notably -reduce -montreal -mode -17th -ears -saved -jan -vocal -##ica -1909 -andy -##jo -riding -roughly -threatened -##ise -meters -meanwhile -landed -compete -repeated -grass -czech -regularly -charges -tea -sudden -appeal -##ung -solution -describes -pierre -classification -glad -parking -##ning -belt -physics -99 -rachel -add -hungarian -participate -expedition -damaged -gift -childhood -85 -fifty -##red -mathematics -jumped -letting -defensive -mph -##ux -##gh -testing -##hip -hundreds -shoot -owners -matters -smoke -israeli -kentucky -dancing -mounted -grandfather -emma -designs -profit -argentina -##gs -truly -li -lawrence -cole -begun -detroit -willing -branches -smiling -decide -miami -enjoyed -recordings -##dale -poverty -ethnic -gay -##bi -gary -arabic -09 -accompanied -##one -##ons -fishing -determine -residential -acid -##ary -alice -returns -starred -mail -##ang -jonathan -strategy -##ue -net -forty -cook -businesses -equivalent -commonwealth -distinct -ill -##cy -seriously -##ors -##ped -shift -harris -replace -rio -imagine -formula -ensure -##ber -additionally -scheme -conservation -occasionally -purposes -feels -favor -##and -##ore -1930s -contrast -hanging -hunt -movies -1904 -instruments -victims -danish -christopher -busy -demon -sugar -earliest -colony -studying -balance -duties -##ks -belgium -slipped -carter -05 -visible -stages -iraq -fifa -##im -commune -forming -zero -07 -continuing -talked -counties -legend -bathroom -option -tail -clay -daughters -afterwards -severe -jaw -visitors -##ded -devices -aviation -russell -kate -##vi -entering -subjects -##ino -temporary -swimming -forth -smooth -ghost -audio -bush -operates -rocks -movements -signs -eddie -##tz -ann -voices -honorary -06 -memories -dallas -pure -measures -racial -promised -66 -harvard -ceo -16th -parliamentary -indicate -benefit -flesh -dublin -louisiana -1902 -1901 -patient -sleeping -1903 -membership -coastal -medieval -wanting -element -scholars -rice -62 -limit -survive -makeup -rating -definitely -collaboration -obvious -##tan -boss -ms -baron -birthday -linked -soil -diocese -##lan -ncaa -##mann -offensive -shell -shouldn -waist -##tus -plain -ross -organ -resolution -manufacturing -adding -relative -kennedy -98 -whilst -moth -marketing -gardens -crash -72 -heading -partners -credited -carlos -moves -cable -##zi -marshall -##out -depending -bottle -represents -rejected -responded -existed -04 -jobs -denmark -lock -##ating -treated -graham -routes -talent -commissioner -drugs -secure -tests -reign -restored -photography -##gi -contributions -oklahoma -designer -disc -grin -seattle -robin -paused -atlanta -unusual -##gate -praised -las -laughing -satellite -hungary -visiting -##sky -interesting -factors -deck -poems -norman -##water -stuck -speaker -rifle -domain -premiered -##her -dc -comics -actors -01 -reputation -eliminated -8th -ceiling -prisoners -script -##nce -leather -austin -mississippi -rapidly -admiral -parallel -charlotte -guilty -tools -gender -divisions -fruit -##bs -laboratory -nelson -fantasy -marry -rapid -aunt -tribe -requirements -aspects -suicide -amongst -adams -bone -ukraine -abc -kick -sees -edinburgh -clothing -column -rough -gods -hunting -broadway -gathered -concerns -##ek -spending -ty -12th -snapped -requires -solar -bones -cavalry -##tta -iowa -drinking -waste -index -franklin -charity -thompson -stewart -tip -flash -landscape -friday -enjoy -singh -poem -listening -##back -eighth -fred -differences -adapted -bomb -ukrainian -surgery -corporate -masters -anywhere -##more -waves -odd -sean -portugal -orleans -dick -debate -kent -eating -puerto -cleared -96 -expect -cinema -97 -guitarist -blocks -electrical -agree -involving -depth -dying -panel -struggle -##ged -peninsula -adults -novels -emerged -vienna -metro -debuted -shoes -tamil -songwriter -meets -prove -beating -instance -heaven -scared -sending -marks -artistic -passage -superior -03 -significantly -shopping -##tive -retained -##izing -malaysia -technique -cheeks -##ola -warren -maintenance -destroy -extreme -allied -120 -appearing -##yn -fill -advice -alabama -qualifying -policies -cleveland -hat -battery -smart -authors -10th -soundtrack -acted -dated -lb -glance -equipped -coalition -funny -outer -ambassador -roy -possibility -couples -campbell -dna -loose -ethan -supplies -1898 -gonna -88 -monster -##res -shake -agents -frequency -springs -dogs -practices -61 -gang -plastic -easier -suggests -gulf -blade -exposed -colors -industries -markets -pan -nervous -electoral -charts -legislation -ownership -##idae -mac -appointment -shield -copy -assault -socialist -abbey -monument -license -throne -employment -jay -93 -replacement -charter -cloud -powered -suffering -accounts -oak -connecticut -strongly -wright -colour -crystal -13th -context -welsh -networks -voiced -gabriel -jerry -##cing -forehead -mp -##ens -manage -schedule -totally -remix -##ii -forests -occupation -print -nicholas -brazilian -strategic -vampires -engineers -76 -roots -seek -correct -instrumental -und -alfred -backed -hop -##des -stanley -robinson -traveled -wayne -welcome -austrian -achieve -67 -exit -rates -1899 -strip -whereas -##cs -sing -deeply -adventure -bobby -rick -jamie -careful -components -cap -useful -personality -knee -##shi -pushing -hosts -02 -protest -ca -ottoman -symphony -##sis -63 -boundary -1890 -processes -considering -considerable -tons -##work -##ft -##nia -cooper -trading -dear -conduct -91 -illegal -apple -revolutionary -holiday -definition -harder -##van -jacob -circumstances -destruction -##lle -popularity -grip -classified -liverpool -donald -baltimore -flows -seeking -honour -approval -92 -mechanical -till -happening -statue -critic -increasingly -immediate -describe -commerce -stare -##ster -indonesia -meat -rounds -boats -baker -orthodox -depression -formally -worn -naked -claire -muttered -sentence -11th -emily -document -77 -criticism -wished -vessel -spiritual -bent -virgin -parker -minimum -murray -lunch -danny -printed -compilation -keyboards -false -blow -belonged -68 -raising -78 -cutting -##board -pittsburgh -##up -9th -shadows -81 -hated -indigenous -jon -15th -barry -scholar -ah -##zer -oliver -##gy -stick -susan -meetings -attracted -spell -romantic -##ver -ye -1895 -photo -demanded -customers -##ac -1896 -logan -revival -keys -modified -commanded -jeans -##ious -upset -raw -phil -detective -hiding -resident -vincent -##bly -experiences -diamond -defeating -coverage -lucas -external -parks -franchise -helen -bible -successor -percussion -celebrated -il -lift -profile -clan -romania -##ied -mills -##su -nobody -achievement -shrugged -fault -1897 -rhythm -initiative -breakfast -carbon -700 -69 -lasted -violent -74 -wound -ken -killer -gradually -filmed -°c -dollars -processing -94 -remove -criticized -guests -sang -chemistry -##vin -legislature -disney -##bridge -uniform -escaped -integrated -proposal -purple -denied -liquid -karl -influential -morris -nights -stones -intense -experimental -twisted -71 -84 -##ld -pace -nazi -mitchell -ny -blind -reporter -newspapers -14th -centers -burn -basin -forgotten -surviving -filed -collections -monastery -losses -manual -couch -description -appropriate -merely -tag -missions -sebastian -restoration -replacing -triple -73 -elder -julia -warriors -benjamin -julian -convinced -stronger -amazing -declined -versus -merchant -happens -output -finland -bare -barbara -absence -ignored -dawn -injuries -##port -producers -##ram -82 -luis -##ities -kw -admit -expensive -electricity -nba -exception -symbol -##ving -ladies -shower -sheriff -characteristics -##je -aimed -button -ratio -effectively -summit -angle -jury -bears -foster -vessels -pants -executed -evans -dozen -advertising -kicked -patrol -1889 -competitions -lifetime -principles -athletics -##logy -birmingham -sponsored -89 -rob -nomination -1893 -acoustic -##sm -creature -longest -##tra -credits -harbor -dust -josh -##so -territories -milk -infrastructure -completion -thailand -indians -leon -archbishop -##sy -assist -pitch -blake -arrangement -girlfriend -serbian -operational -hence -sad -scent -fur -dj -sessions -hp -refer -rarely -##ora -exists -1892 -##ten -scientists -dirty -penalty -burst -portrait -seed -79 -pole -limits -rival -1894 -stable -alpha -grave -constitutional -alcohol -arrest -flower -mystery -devil -architectural -relationships -greatly -habitat -##istic -larry -progressive -remote -cotton -##ics -##ok -preserved -reaches -##ming -cited -86 -vast -scholarship -decisions -cbs -joy -teach -1885 -editions -knocked -eve -searching -partly -participation -gap -animated -fate -excellent -##ett -na -87 -alternate -saints -youngest -##ily -climbed -##ita -##tors -suggest -##ct -discussion -staying -choir -lakes -jacket -revenue -nevertheless -peaked -instrument -wondering -annually -managing -neil -1891 -signing -terry -##ice -apply -clinical -brooklyn -aim -catherine -fuck -farmers -figured -ninth -pride -hugh -evolution -ordinary -involvement -comfortable -shouted -tech -encouraged -taiwan -representation -sharing -##lia -##em -panic -exact -cargo -competing -fat -cried -83 -1920s -occasions -pa -cabin -borders -utah -marcus -##isation -badly -muscles -##ance -victorian -transition -warner -bet -permission -##rin -slave -terrible -similarly -shares -seth -uefa -possession -medals -benefits -colleges -lowered -perfectly -mall -transit -##ye -##kar -publisher -##ened -harrison -deaths -elevation -##ae -asleep -machines -sigh -ash -hardly -argument -occasion -parent -leo -decline -1888 -contribution -##ua -concentration -1000 -opportunities -hispanic -guardian -extent -emotions -hips -mason -volumes -bloody -controversy -diameter -steady -mistake -phoenix -identify -violin -##sk -departure -richmond -spin -funeral -enemies -1864 -gear -literally -connor -random -sergeant -grab -confusion -1865 -transmission -informed -op -leaning -sacred -suspended -thinks -gates -portland -luck -agencies -yours -hull -expert -muscle -layer -practical -sculpture -jerusalem -latest -lloyd -statistics -deeper -recommended -warrior -arkansas -mess -supports -greg -eagle -1880 -recovered -rated -concerts -rushed -##ano -stops -eggs -files -premiere -keith -##vo -delhi -turner -pit -affair -belief -paint -##zing -mate -##ach -##ev -victim -##ology -withdrew -bonus -styles -fled -##ud -glasgow -technologies -funded -nbc -adaptation -##ata -portrayed -cooperation -supporters -judges -bernard -justin -hallway -ralph -##ick -graduating -controversial -distant -continental -spider -bite -##ho -recognize -intention -mixing -##ese -egyptian -bow -tourism -suppose -claiming -tiger -dominated -participants -vi -##ru -nurse -partially -tape -##rum -psychology -##rn -essential -touring -duo -voting -civilian -emotional -channels -##king -apparent -hebrew -1887 -tommy -carrier -intersection -beast -hudson -##gar -##zo -lab -nova -bench -discuss -costa -##ered -detailed -behalf -drivers -unfortunately -obtain -##lis -rocky -##dae -siege -friendship -honey -##rian -1861 -amy -hang -posted -governments -collins -respond -wildlife -preferred -operator -##po -laura -pregnant -videos -dennis -suspected -boots -instantly -weird -automatic -businessman -alleged -placing -throwing -ph -mood -1862 -perry -venue -jet -remainder -##lli -##ci -passion -biological -boyfriend -1863 -dirt -buffalo -ron -segment -fa -abuse -##era -genre -thrown -stroke -colored -stress -exercise -displayed -##gen -struggled -##tti -abroad -dramatic -wonderful -thereafter -madrid -component -widespread -##sed -tale -citizen -todd -monday -1886 -vancouver -overseas -forcing -crying -descent -##ris -discussed -substantial -ranks -regime -1870 -provinces -switch -drum -zane -ted -tribes -proof -lp -cream -researchers -volunteer -manor -silk -milan -donated -allies -venture -principle -delivery -enterprise -##ves -##ans -bars -traditionally -witch -reminded -copper -##uk -pete -inter -links -colin -grinned -elsewhere -competitive -frequent -##oy -scream -##hu -tension -texts -submarine -finnish -defending -defend -pat -detail -1884 -affiliated -stuart -themes -villa -periods -tool -belgian -ruling -crimes -answers -folded -licensed -resort -demolished -hans -lucy -1881 -lion -traded -photographs -writes -craig -##fa -trials -generated -beth -noble -debt -percentage -yorkshire -erected -ss -viewed -grades -confidence -ceased -islam -telephone -retail -##ible -chile -m² -roberts -sixteen -##ich -commented -hampshire -innocent -dual -pounds -checked -regulations -afghanistan -sung -rico -liberty -assets -bigger -options -angels -relegated -tribute -wells -attending -leaf -##yan -butler -romanian -forum -monthly -lisa -patterns -gmina -##tory -madison -hurricane -rev -##ians -bristol -##ula -elite -valuable -disaster -democracy -awareness -germans -freyja -##ins -loop -absolutely -paying -populations -maine -sole -prayer -spencer -releases -doorway -bull -##ani -lover -midnight -conclusion -##sson -thirteen -lily -mediterranean -##lt -nhl -proud -sample -##hill -drummer -guinea -##ova -murphy -climb -##ston -instant -attributed -horn -ain -railways -steven -##ao -autumn -ferry -opponent -root -traveling -secured -corridor -stretched -tales -sheet -trinity -cattle -helps -indicates -manhattan -murdered -fitted -1882 -gentle -grandmother -mines -shocked -vegas -produces -##light -caribbean -##ou -belong -continuous -desperate -drunk -historically -trio -waved -raf -dealing -nathan -bat -murmured -interrupted -residing -scientist -pioneer -harold -aaron -##net -delta -attempting -minority -mini -believes -chorus -tend -lots -eyed -indoor -load -shots -updated -jail -##llo -concerning -connecting -wealth -##ved -slaves -arrive -rangers -sufficient -rebuilt -##wick -cardinal -flood -muhammad -whenever -relation -runners -moral -repair -viewers -arriving -revenge -punk -assisted -bath -fairly -breathe -lists -innings -illustrated -whisper -nearest -voters -clinton -ties -ultimate -screamed -beijing -lions -andre -fictional -gathering -comfort -radar -suitable -dismissed -hms -ban -pine -wrist -atmosphere -voivodeship -bid -timber -##ned -##nan -giants -##ane -cameron -recovery -uss -identical -categories -switched -serbia -laughter -noah -ensemble -therapy -peoples -touching -##off -locally -pearl -platforms -everywhere -ballet -tables -lanka -herbert -outdoor -toured -derek -1883 -spaces -contested -swept -1878 -exclusive -slight -connections -##dra -winds -prisoner -collective -bangladesh -tube -publicly -wealthy -thai -##ys -isolated -select -##ric -insisted -pen -fortune -ticket -spotted -reportedly -animation -enforcement -tanks -110 -decides -wider -lowest -owen -##time -nod -hitting -##hn -gregory -furthermore -magazines -fighters -solutions -##ery -pointing -requested -peru -reed -chancellor -knights -mask -worker -eldest -flames -reduction -1860 -volunteers -##tis -reporting -##hl -wire -advisory -endemic -origins -settlers -pursue -knock -consumer -1876 -eu -compound -creatures -mansion -sentenced -ivan -deployed -guitars -frowned -involves -mechanism -kilometers -perspective -shops -maps -terminus -duncan -alien -fist -bridges -##pers -heroes -fed -derby -swallowed -##ros -patent -sara -illness -characterized -adventures -slide -hawaii -jurisdiction -##op -organised -##side -adelaide -walks -biology -se -##ties -rogers -swing -tightly -boundaries -##rie -prepare -implementation -stolen -##sha -certified -colombia -edwards -garage -##mm -recalled -##ball -rage -harm -nigeria -breast -##ren -furniture -pupils -settle -##lus -cuba -balls -client -alaska -21st -linear -thrust -celebration -latino -genetic -terror -##cia -##ening -lightning -fee -witness -lodge -establishing -skull -##ique -earning -hood -##ei -rebellion -wang -sporting -warned -missile -devoted -activist -porch -worship -fourteen -package -1871 -decorated -##shire -housed -##ock -chess -sailed -doctors -oscar -joan -treat -garcia -harbour -jeremy -##ire -traditions -dominant -jacques -##gon -##wan -relocated -1879 -amendment -sized -companion -simultaneously -volleyball -spun -acre -increases -stopping -loves -belongs -affect -drafted -tossed -scout -battles -1875 -filming -shoved -munich -tenure -vertical -romance -pc -##cher -argue -##ical -craft -ranging -www -opens -honest -tyler -yesterday -virtual -##let -muslims -reveal -snake -immigrants -radical -screaming -speakers -firing -saving -belonging -ease -lighting -prefecture -blame -farmer -hungry -grows -rubbed -beam -sur -subsidiary -##cha -armenian -sao -dropping -conventional -##fer -microsoft -reply -qualify -spots -1867 -sweat -festivals -##ken -immigration -physician -discover -exposure -sandy -explanation -isaac -implemented -##fish -hart -initiated -connect -stakes -presents -heights -householder -pleased -tourist -regardless -slip -closest -##ction -surely -sultan -brings -riley -preparation -aboard -slammed -baptist -experiment -ongoing -interstate -organic -playoffs -##ika -1877 -130 -##tar -hindu -error -tours -tier -plenty -arrangements -talks -trapped -excited -sank -ho -athens -1872 -denver -welfare -suburb -athletes -trick -diverse -belly -exclusively -yelled -1868 -##med -conversion -##ette -1874 -internationally -computers -conductor -abilities -sensitive -hello -dispute -measured -globe -rocket -prices -amsterdam -flights -tigers -inn -municipalities -emotion -references -3d -##mus -explains -airlines -manufactured -pm -archaeological -1873 -interpretation -devon -comment -##ites -settlements -kissing -absolute -improvement -suite -impressed -barcelona -sullivan -jefferson -towers -jesse -julie -##tin -##lu -grandson -hi -gauge -regard -rings -interviews -trace -raymond -thumb -departments -burns -serial -bulgarian -scores -demonstrated -##ix -1866 -kyle -alberta -underneath -romanized -##ward -relieved -acquisition -phrase -cliff -reveals -han -cuts -merger -custom -##dar -nee -gilbert -graduation -##nts -assessment -cafe -difficulty -demands -swung -democrat -jennifer -commons -1940s -grove -##yo -completing -focuses -sum -substitute -bearing -stretch -reception -##py -reflected -essentially -destination -pairs -##ched -survival -resource -##bach -promoting -doubles -messages -tear -##down -##fully -parade -florence -harvey -incumbent -partial -framework -900 -pedro -frozen -procedure -olivia -controls -##mic -shelter -personally -temperatures -##od -brisbane -tested -sits -marble -comprehensive -oxygen -leonard -##kov -inaugural -iranian -referring -quarters -attitude -##ivity -mainstream -lined -mars -dakota -norfolk -unsuccessful -##° -explosion -helicopter -congressional -##sing -inspector -bitch -seal -departed -divine -##ters -coaching -examination -punishment -manufacturer -sink -columns -unincorporated -signals -nevada -squeezed -dylan -dining -photos -martial -manuel -eighteen -elevator -brushed -plates -ministers -ivy -congregation -##len -slept -specialized -taxes -curve -restricted -negotiations -likes -statistical -arnold -inspiration -execution -bold -intermediate -significance -margin -ruler -wheels -gothic -intellectual -dependent -listened -eligible -buses -widow -syria -earn -cincinnati -collapsed -recipient -secrets -accessible -philippine -maritime -goddess -clerk -surrender -breaks -playoff -database -##ified -##lon -ideal -beetle -aspect -soap -regulation -strings -expand -anglo -shorter -crosses -retreat -tough -coins -wallace -directions -pressing -##oon -shipping -locomotives -comparison -topics -nephew -##mes -distinction -honors -travelled -sierra -ibn -##over -fortress -sa -recognised -carved -1869 -clients -##dan -intent -##mar -coaches -describing -bread -##ington -beaten -northwestern -##ona -merit -youtube -collapse -challenges -em -historians -objective -submitted -virus -attacking -drake -assume -##ere -diseases -marc -stem -leeds -##cus -##ab -farming -glasses -##lock -visits -nowhere -fellowship -relevant -carries -restaurants -experiments -101 -constantly -bases -targets -shah -tenth -opponents -verse -territorial -##ira -writings -corruption -##hs -instruction -inherited -reverse -emphasis -##vic -employee -arch -keeps -rabbi -watson -payment -uh -##ala -nancy -##tre -venice -fastest -sexy -banned -adrian -properly -ruth -touchdown -dollar -boards -metre -circles -edges -favour -comments -ok -travels -liberation -scattered -firmly -##ular -holland -permitted -diesel -kenya -den -originated -##ral -demons -resumed -dragged -rider -##rus -servant -blinked -extend -torn -##ias -##sey -input -meal -everybody -cylinder -kinds -camps -##fe -bullet -logic -##wn -croatian -evolved -healthy -fool -chocolate -wise -preserve -pradesh -##ess -respective -1850 -##ew -chicken -artificial -gross -corresponding -convicted -cage -caroline -dialogue -##dor -narrative -stranger -mario -br -christianity -failing -trent -commanding -buddhist -1848 -maurice -focusing -yale -bike -altitude -##ering -mouse -revised -##sley -veteran -##ig -pulls -theology -crashed -campaigns -legion -##ability -drag -excellence -customer -cancelled -intensity -excuse -##lar -liga -participating -contributing -printing -##burn -variable -##rk -curious -bin -legacy -renaissance -##my -symptoms -binding -vocalist -dancer -##nie -grammar -gospel -democrats -ya -enters -sc -diplomatic -hitler -##ser -clouds -mathematical -quit -defended -oriented -##heim -fundamental -hardware -impressive -equally -convince -confederate -guilt -chuck -sliding -##ware -magnetic -narrowed -petersburg -bulgaria -otto -phd -skill -##ama -reader -hopes -pitcher -reservoir -hearts -automatically -expecting -mysterious -bennett -extensively -imagined -seeds -monitor -fix -##ative -journalism -struggling -signature -ranch -encounter -photographer -observation -protests -##pin -influences -##hr -calendar -##all -cruz -croatia -locomotive -hughes -naturally -shakespeare -basement -hook -uncredited -faded -theories -approaches -dare -phillips -filling -fury -obama -##ain -efficient -arc -deliver -min -raid -breeding -inducted -leagues -efficiency -axis -montana -eagles -##ked -supplied -instructions -karen -picking -indicating -trap -anchor -practically -christians -tomb -vary -occasional -electronics -lords -readers -newcastle -faint -innovation -collect -situations -engagement -160 -claude -mixture -##feld -peer -tissue -logo -lean -##ration -°f -floors -##ven -architects -reducing -##our -##ments -rope -1859 -ottawa -##har -samples -banking -declaration -proteins -resignation -francois -saudi -advocate -exhibited -armor -twins -divorce -##ras -abraham -reviewed -jo -temporarily -matrix -physically -pulse -curled -##ena -difficulties -bengal -usage -##ban -annie -riders -certificate -##pi -holes -warsaw -distinctive -jessica -##mon -mutual -1857 -customs -circular -eugene -removal -loaded -mere -vulnerable -depicted -generations -dame -heir -enormous -lightly -climbing -pitched -lessons -pilots -nepal -ram -google -preparing -brad -louise -renowned -##₂ -liam -##ably -plaza -shaw -sophie -brilliant -bills -##bar -##nik -fucking -mainland -server -pleasant -seized -veterans -jerked -fail -beta -brush -radiation -stored -warmth -southeastern -nate -sin -raced -berkeley -joke -athlete -designation -trunk -##low -roland -qualification -archives -heels -artwork -receives -judicial -reserves -##bed -woke -installation -abu -floating -fake -lesser -excitement -interface -concentrated -addressed -characteristic -amanda -saxophone -monk -auto -##bus -releasing -egg -dies -interaction -defender -ce -outbreak -glory -loving -##bert -sequel -consciousness -http -awake -ski -enrolled -##ress -handling -rookie -brow -somebody -biography -warfare -amounts -contracts -presentation -fabric -dissolved -challenged -meter -psychological -lt -elevated -rally -accurate -##tha -hospitals -undergraduate -specialist -venezuela -exhibit -shed -nursing -protestant -fluid -structural -footage -jared -consistent -prey -##ska -succession -reflect -exile -lebanon -wiped -suspect -shanghai -resting -integration -preservation -marvel -variant -pirates -sheep -rounded -capita -sailing -colonies -manuscript -deemed -variations -clarke -functional -emerging -boxing -relaxed -curse -azerbaijan -heavyweight -nickname -editorial -rang -grid -tightened -earthquake -flashed -miguel -rushing -##ches -improvements -boxes -brooks -180 -consumption -molecular -felix -societies -repeatedly -variation -aids -civic -graphics -professionals -realm -autonomous -receiver -delayed -workshop -militia -chairs -trump -canyon -##point -harsh -extending -lovely -happiness -##jan -stake -eyebrows -embassy -wellington -hannah -##ella -sony -corners -bishops -swear -cloth -contents -xi -namely -commenced -1854 -stanford -nashville -courage -graphic -commitment -garrison -##bin -hamlet -clearing -rebels -attraction -literacy -cooking -ruins -temples -jenny -humanity -celebrate -hasn -freight -sixty -rebel -bastard -##art -newton -##ada -deer -##ges -##ching -smiles -delaware -singers -##ets -approaching -assists -flame -##ph -boulevard -barrel -planted -##ome -pursuit -##sia -consequences -posts -shallow -invitation -rode -depot -ernest -kane -rod -concepts -preston -topic -chambers -striking -blast -arrives -descendants -montgomery -ranges -worlds -##lay -##ari -span -chaos -praise -##ag -fewer -1855 -sanctuary -mud -fbi -##ions -programmes -maintaining -unity -harper -bore -handsome -closure -tournaments -thunder -nebraska -linda -facade -puts -satisfied -argentine -dale -cork -dome -panama -##yl -1858 -tasks -experts -##ates -feeding -equation -##las -##ida -##tu -engage -bryan -##ax -um -quartet -melody -disbanded -sheffield -blocked -gasped -delay -kisses -maggie -connects -##non -sts -poured -creator -publishers -##we -guided -ellis -extinct -hug -gaining -##ord -complicated -##bility -poll -clenched -investigate -##use -thereby -quantum -spine -cdp -humor -kills -administered -semifinals -##du -encountered -ignore -##bu -commentary -##maker -bother -roosevelt -140 -plains -halfway -flowing -cultures -crack -imprisoned -neighboring -airline -##ses -##view -##mate -##ec -gather -wolves -marathon -transformed -##ill -cruise -organisations -carol -punch -exhibitions -numbered -alarm -ratings -daddy -silently -##stein -queens -colours -impression -guidance -liu -tactical -##rat -marshal -della -arrow -##ings -rested -feared -tender -owns -bitter -advisor -escort -##ides -spare -farms -grants -##ene -dragons -encourage -colleagues -cameras -##und -sucked -pile -spirits -prague -statements -suspension -landmark -fence -torture -recreation -bags -permanently -survivors -pond -spy -predecessor -bombing -coup -##og -protecting -transformation -glow -##lands -##book -dug -priests -andrea -feat -barn -jumping -##chen -##ologist -##con -casualties -stern -auckland -pipe -serie -revealing -ba -##bel -trevor -mercy -spectrum -yang -consist -governing -collaborated -possessed -epic -comprises -blew -shane -##ack -lopez -honored -magical -sacrifice -judgment -perceived -hammer -mtv -baronet -tune -das -missionary -sheets -350 -neutral -oral -threatening -attractive -shade -aims -seminary -##master -estates -1856 -michel -wounds -refugees -manufacturers -##nic -mercury -syndrome -porter -##iya -##din -hamburg -identification -upstairs -purse -widened -pause -cared -breathed -affiliate -santiago -prevented -celtic -fisher -125 -recruited -byzantine -reconstruction -farther -##mp -diet -sake -au -spite -sensation -##ert -blank -separation -105 -##hon -vladimir -armies -anime -##lie -accommodate -orbit -cult -sofia -archive -##ify -##box -founders -sustained -disorder -honours -northeastern -mia -crops -violet -threats -blanket -fires -canton -followers -southwestern -prototype -voyage -assignment -altered -moderate -protocol -pistol -##eo -questioned -brass -lifting -1852 -math -authored -##ual -doug -dimensional -dynamic -##san -1851 -pronounced -grateful -quest -uncomfortable -boom -presidency -stevens -relating -politicians -chen -barrier -quinn -diana -mosque -tribal -cheese -palmer -portions -sometime -chester -treasure -wu -bend -download -millions -reforms -registration -##osa -consequently -monitoring -ate -preliminary -brandon -invented -ps -eaten -exterior -intervention -ports -documented -log -displays -lecture -sally -favourite -##itz -vermont -lo -invisible -isle -breed -##ator -journalists -relay -speaks -backward -explore -midfielder -actively -stefan -procedures -cannon -blond -kenneth -centered -servants -chains -libraries -malcolm -essex -henri -slavery -##hal -facts -fairy -coached -cassie -cats -washed -cop -##fi -announcement -item -2000s -vinyl -activated -marco -frontier -growled -curriculum -##das -loyal -accomplished -leslie -ritual -kenny -##00 -vii -napoleon -hollow -hybrid -jungle -stationed -friedrich -counted -##ulated -platinum -theatrical -seated -col -rubber -glen -1840 -diversity -healing -extends -id -provisions -administrator -columbus -##oe -tributary -te -assured -org -##uous -prestigious -examined -lectures -grammy -ronald -associations -bailey -allan -essays -flute -believing -consultant -proceedings -travelling -1853 -kit -kerala -yugoslavia -buddy -methodist -##ith -burial -centres -batman -##nda -discontinued -bo -dock -stockholm -lungs -severely -##nk -citing -manga -##ugh -steal -mumbai -iraqi -robot -celebrity -bride -broadcasts -abolished -pot -joel -overhead -franz -packed -reconnaissance -johann -acknowledged -introduce -handled -doctorate -developments -drinks -alley -palestine -##nis -##aki -proceeded -recover -bradley -grain -patch -afford -infection -nationalist -legendary -##ath -interchange -virtually -gen -gravity -exploration -amber -vital -wishes -powell -doctrine -elbow -screenplay -##bird -contribute -indonesian -pet -creates -##com -enzyme -kylie -discipline -drops -manila -hunger -##ien -layers -suffer -fever -bits -monica -keyboard -manages -##hood -searched -appeals -##bad -testament -grande -reid -##war -beliefs -congo -##ification -##dia -si -requiring -##via -casey -1849 -regret -streak -rape -depends -syrian -sprint -pound -tourists -upcoming -pub -##xi -tense -##els -practiced -echo -nationwide -guild -motorcycle -liz -##zar -chiefs -desired -elena -bye -precious -absorbed -relatives -booth -pianist -##mal -citizenship -exhausted -wilhelm -##ceae -##hed -noting -quarterback -urge -hectares -##gue -ace -holly -##tal -blonde -davies -parked -sustainable -stepping -twentieth -airfield -galaxy -nest -chip -##nell -tan -shaft -paulo -requirement -##zy -paradise -tobacco -trans -renewed -vietnamese -##cker -##ju -suggesting -catching -holmes -enjoying -md -trips -colt -holder -butterfly -nerve -reformed -cherry -bowling -trailer -carriage -goodbye -appreciate -toy -joshua -interactive -enabled -involve -##kan -collar -determination -bunch -facebook -recall -shorts -superintendent -episcopal -frustration -giovanni -nineteenth -laser -privately -array -circulation -##ovic -armstrong -deals -painful -permit -discrimination -##wi -aires -retiring -cottage -ni -##sta -horizon -ellen -jamaica -ripped -fernando -chapters -playstation -patron -lecturer -navigation -behaviour -genes -georgian -export -solomon -rivals -swift -seventeen -rodriguez -princeton -independently -sox -1847 -arguing -entity -casting -hank -criteria -oakland -geographic -milwaukee -reflection -expanding -conquest -dubbed -##tv -halt -brave -brunswick -doi -arched -curtis -divorced -predominantly -somerset -streams -ugly -zoo -horrible -curved -buenos -fierce -dictionary -vector -theological -unions -handful -stability -chan -punjab -segments -##lly -altar -ignoring -gesture -monsters -pastor -##stone -thighs -unexpected -operators -abruptly -coin -compiled -associates -improving -migration -pin -##ose -compact -collegiate -reserved -##urs -quarterfinals -roster -restore -assembled -hurry -oval -##cies -1846 -flags -martha -##del -victories -sharply -##rated -argues -deadly -neo -drawings -symbols -performer -##iel -griffin -restrictions -editing -andrews -java -journals -arabia -compositions -dee -pierce -removing -hindi -casino -runway -civilians -minds -nasa -hotels -##zation -refuge -rent -retain -potentially -conferences -suburban -conducting -##tto -##tions -##tle -descended -massacre -##cal -ammunition -terrain -fork -souls -counts -chelsea -durham -drives -cab -##bank -perth -realizing -palestinian -finn -simpson -##dal -betty -##ule -moreover -particles -cardinals -tent -evaluation -extraordinary -##oid -inscription -##works -wednesday -chloe -maintains -panels -ashley -trucks -##nation -cluster -sunlight -strikes -zhang -##wing -dialect -canon -##ap -tucked -##ws -collecting -##mas -##can -##sville -maker -quoted -evan -franco -aria -buying -cleaning -eva -closet -provision -apollo -clinic -rat -##ez -necessarily -ac -##gle -##ising -venues -flipped -cent -spreading -trustees -checking -authorized -##sco -disappointed -##ado -notion -duration -trumpet -hesitated -topped -brussels -rolls -theoretical -hint -define -aggressive -repeat -wash -peaceful -optical -width -allegedly -mcdonald -strict -copyright -##illa -investors -mar -jam -witnesses -sounding -miranda -michelle -privacy -hugo -harmony -##pp -valid -lynn -glared -nina -102 -headquartered -diving -boarding -gibson -##ncy -albanian -marsh -routine -dealt -enhanced -er -intelligent -substance -targeted -enlisted -discovers -spinning -observations -pissed -smoking -rebecca -capitol -visa -varied -costume -seemingly -indies -compensation -surgeon -thursday -arsenal -westminster -suburbs -rid -anglican -##ridge -knots -foods -alumni -lighter -fraser -whoever -portal -scandal -##ray -gavin -advised -instructor -flooding -terrorist -##ale -teenage -interim -senses -duck -teen -thesis -abby -eager -overcome -##ile -newport -glenn -rises -shame -##cc -prompted -priority -forgot -bomber -nicolas -protective -360 -cartoon -katherine -breeze -lonely -trusted -henderson -richardson -relax -banner -candy -palms -remarkable -##rio -legends -cricketer -essay -ordained -edmund -rifles -trigger -##uri -##away -sail -alert -1830 -audiences -penn -sussex -siblings -pursued -indianapolis -resist -rosa -consequence -succeed -avoided -1845 -##ulation -inland -##tie -##nna -counsel -profession -chronicle -hurried -##una -eyebrow -eventual -bleeding -innovative -cure -##dom -committees -accounting -con -scope -hardy -heather -tenor -gut -herald -codes -tore -scales -wagon -##oo -luxury -tin -prefer -fountain -triangle -bonds -darling -convoy -dried -traced -beings -troy -accidentally -slam -findings -smelled -joey -lawyers -outcome -steep -bosnia -configuration -shifting -toll -brook -performers -lobby -philosophical -construct -shrine -aggregate -boot -cox -phenomenon -savage -insane -solely -reynolds -lifestyle -##ima -nationally -holdings -consideration -enable -edgar -mo -mama -##tein -fights -relegation -chances -atomic -hub -conjunction -awkward -reactions -currency -finale -kumar -underwent -steering -elaborate -gifts -comprising -melissa -veins -reasonable -sunshine -chi -solve -trails -inhabited -elimination -ethics -huh -ana -molly -consent -apartments -layout -marines -##ces -hunters -bulk -##oma -hometown -##wall -##mont -cracked -reads -neighbouring -withdrawn -admission -wingspan -damned -anthology -lancashire -brands -batting -forgive -cuban -awful -##lyn -104 -dimensions -imagination -##ade -dante -##ship -tracking -desperately -goalkeeper -##yne -groaned -workshops -confident -burton -gerald -milton -circus -uncertain -slope -copenhagen -sophia -fog -philosopher -portraits -accent -cycling -varying -gripped -larvae -garrett -specified -scotia -mature -luther -kurt -rap -##kes -aerial -750 -ferdinand -heated -es -transported -##shan -safely -nonetheless -##orn -##gal -motors -demanding -##sburg -startled -##brook -ally -generate -caps -ghana -stained -demo -mentions -beds -ap -afterward -diary -##bling -utility -##iro -richards -1837 -conspiracy -conscious -shining -footsteps -observer -cyprus -urged -loyalty -developer -probability -olive -upgraded -gym -miracle -insects -graves -1844 -ourselves -hydrogen -amazon -katie -tickets -poets -##pm -planes -##pan -prevention -witnessed -dense -jin -randy -tang -warehouse -monroe -bang -archived -elderly -investigations -alec -granite -mineral -conflicts -controlling -aboriginal -carlo -##zu -mechanics -stan -stark -rhode -skirt -est -##berry -bombs -respected -##horn -imposed -limestone -deny -nominee -memphis -grabbing -disabled -##als -amusement -aa -frankfurt -corn -referendum -varies -slowed -disk -firms -unconscious -incredible -clue -sue -##zhou -twist -##cio -joins -idaho -chad -developers -computing -destroyer -103 -mortal -tucker -kingston -choices -yu -carson -1800 -os -whitney -geneva -pretend -dimension -staged -plateau -maya -##une -freestyle -##bc -rovers -hiv -##ids -tristan -classroom -prospect -##hus -honestly -diploma -lied -thermal -auxiliary -feast -unlikely -iata -##tel -morocco -pounding -treasury -lithuania -considerably -1841 -dish -1812 -geological -matching -stumbled -destroying -marched -brien -advances -cake -nicole -belle -settling -measuring -directing -##mie -tuesday -bassist -capabilities -stunned -fraud -torpedo -##list -##phone -anton -wisdom -surveillance -ruined -##ulate -lawsuit -healthcare -theorem -halls -trend -aka -horizontal -dozens -acquire -lasting -swim -hawk -gorgeous -fees -vicinity -decrease -adoption -tactics -##ography -pakistani -##ole -draws -##hall -willie -burke -heath -algorithm -integral -powder -elliott -brigadier -jackie -tate -varieties -darker -##cho -lately -cigarette -specimens -adds -##ree -##ensis -##inger -exploded -finalist -cia -murders -wilderness -arguments -nicknamed -acceptance -onwards -manufacture -robertson -jets -tampa -enterprises -blog -loudly -composers -nominations -1838 -ai -malta -inquiry -automobile -hosting -viii -rays -tilted -grief -museums -strategies -furious -euro -equality -cohen -poison -surrey -wireless -governed -ridiculous -moses -##esh -##room -vanished -##ito -barnes -attract -morrison -istanbul -##iness -absent -rotation -petition -janet -##logical -satisfaction -custody -deliberately -observatory -comedian -surfaces -pinyin -novelist -strictly -canterbury -oslo -monks -embrace -ibm -jealous -photograph -continent -dorothy -marina -doc -excess -holden -allegations -explaining -stack -avoiding -lance -storyline -majesty -poorly -spike -dos -bradford -raven -travis -classics -proven -voltage -pillow -fists -butt -1842 -interpreted -##car -1839 -gage -telegraph -lens -promising -expelled -casual -collector -zones -##min -silly -nintendo -##kh -##bra -downstairs -chef -suspicious -afl -flies -vacant -uganda -pregnancy -condemned -lutheran -estimates -cheap -decree -saxon -proximity -stripped -idiot -deposits -contrary -presenter -magnus -glacier -im -offense -edwin -##ori -upright -##long -bolt -##ois -toss -geographical -##izes -environments -delicate -marking -abstract -xavier -nails -windsor -plantation -occurring -equity -saskatchewan -fears -drifted -sequences -vegetation -revolt -##stic -1843 -sooner -fusion -opposing -nato -skating -1836 -secretly -ruin -lease -##oc -edit -##nne -flora -anxiety -ruby -##ological -##mia -tel -bout -taxi -emmy -frost -rainbow -compounds -foundations -rainfall -assassination -nightmare -dominican -##win -achievements -deserve -orlando -intact -armenia -##nte -calgary -valentine -106 -marion -proclaimed -theodore -bells -courtyard -thigh -gonzalez -console -troop -minimal -monte -everyday -##ence -##if -supporter -terrorism -buck -openly -presbyterian -activists -carpet -##iers -rubbing -uprising -##yi -cute -conceived -legally -##cht -millennium -cello -velocity -ji -rescued -cardiff -1835 -rex -concentrate -senators -beard -rendered -glowing -battalions -scouts -competitors -sculptor -catalogue -arctic -ion -raja -bicycle -wow -glancing -lawn -##woman -gentleman -lighthouse -publish -predicted -calculated -##val -variants -##gne -strain -##ui -winston -deceased -##nus -touchdowns -brady -caleb -sinking -echoed -crush -hon -blessed -protagonist -hayes -endangered -magnitude -editors -##tine -estimate -responsibilities -##mel -backup -laying -consumed -sealed -zurich -lovers -frustrated -##eau -ahmed -kicking -mit -treasurer -1832 -biblical -refuse -terrified -pump -agrees -genuine -imprisonment -refuses -plymouth -##hen -lou -##nen -tara -trembling -antarctic -ton -learns -##tas -crap -crucial -faction -atop -##borough -wrap -lancaster -odds -hopkins -erik -lyon -##eon -bros -##ode -snap -locality -tips -empress -crowned -cal -acclaimed -chuckled -##ory -clara -sends -mild -towel -##fl -##day -##а -wishing -assuming -interviewed -##bal -##die -interactions -eden -cups -helena -##lf -indie -beck -##fire -batteries -filipino -wizard -parted -##lam -traces -##born -rows -idol -albany -delegates -##ees -##sar -discussions -##ex -notre -instructed -belgrade -highways -suggestion -lauren -possess -orientation -alexandria -abdul -beats -salary -reunion -ludwig -alright -wagner -intimate -pockets -slovenia -hugged -brighton -merchants -cruel -stole -trek -slopes -repairs -enrollment -politically -underlying -promotional -counting -boeing -##bb -isabella -naming -##и -keen -bacteria -listing -separately -belfast -ussr -450 -lithuanian -anybody -ribs -sphere -martinez -cock -embarrassed -proposals -fragments -nationals -##fs -##wski -premises -fin -1500 -alpine -matched -freely -bounded -jace -sleeve -##af -gaming -pier -populated -evident -##like -frances -flooded -##dle -frightened -pour -trainer -framed -visitor -challenging -pig -wickets -##fold -infected -email -##pes -arose -##aw -reward -ecuador -oblast -vale -ch -shuttle -##usa -bach -rankings -forbidden -cornwall -accordance -salem -consumers -bruno -fantastic -toes -machinery -resolved -julius -remembering -propaganda -iceland -bombardment -tide -contacts -wives -##rah -concerto -macdonald -albania -implement -daisy -tapped -sudan -helmet -angela -mistress -##lic -crop -sunk -finest -##craft -hostile -##ute -##tsu -boxer -fr -paths -adjusted -habit -ballot -supervision -soprano -##zen -bullets -wicked -sunset -regiments -disappear -lamp -performs -app -##gia -##oa -rabbit -digging -incidents -entries -##cion -dishes -##oi -introducing -##ati -##fied -freshman -slot -jill -tackles -baroque -backs -##iest -lone -sponsor -destiny -altogether -convert -##aro -consensus -shapes -demonstration -basically -feminist -auction -artifacts -##bing -strongest -twitter -halifax -2019 -allmusic -mighty -smallest -precise -alexandra -viola -##los -##ille -manuscripts -##illo -dancers -ari -managers -monuments -blades -barracks -springfield -maiden -consolidated -electron -##end -berry -airing -wheat -nobel -inclusion -blair -payments -geography -bee -cc -eleanor -react -##hurst -afc -manitoba -##yu -su -lineup -fitness -recreational -investments -airborne -disappointment -##dis -edmonton -viewing -##row -renovation -##cast -infant -bankruptcy -roses -aftermath -pavilion -##yer -carpenter -withdrawal -ladder -##hy -discussing -popped -reliable -agreements -rochester -##abad -curves -bombers -220 -rao -reverend -decreased -choosing -107 -stiff -consulting -naples -crawford -tracy -ka -ribbon -cops -##lee -crushed -deciding -unified -teenager -accepting -flagship -explorer -poles -sanchez -inspection -revived -skilled -induced -exchanged -flee -locals -tragedy -swallow -loading -hanna -demonstrate -##ela -salvador -flown -contestants -civilization -##ines -wanna -rhodes -fletcher -hector -knocking -considers -##ough -nash -mechanisms -sensed -mentally -walt -unclear -##eus -renovated -madame -##cks -crews -governmental -##hin -undertaken -monkey -##ben -##ato -fatal -armored -copa -caves -governance -grasp -perception -certification -froze -damp -tugged -wyoming -##rg -##ero -newman -##lor -nerves -curiosity -graph -115 -##ami -withdraw -tunnels -dull -meredith -moss -exhibits -neighbors -communicate -accuracy -explored -raiders -republicans -secular -kat -superman -penny -criticised -##tch -freed -update -conviction -wade -ham -likewise -delegation -gotta -doll -promises -technological -myth -nationality -resolve -convent -##mark -sharon -dig -sip -coordinator -entrepreneur -fold -##dine -capability -councillor -synonym -blown -swan -cursed -1815 -jonas -haired -sofa -canvas -keeper -rivalry -##hart -rapper -speedway -swords -postal -maxwell -estonia -potter -recurring -##nn -##ave -errors -##oni -cognitive -1834 -##² -claws -nadu -roberto -bce -wrestler -ellie -##ations -infinite -ink -##tia -presumably -finite -staircase -108 -noel -patricia -nacional -##cation -chill -eternal -tu -preventing -prussia -fossil -limbs -##logist -ernst -frog -perez -rene -##ace -pizza -prussian -##ios -##vy -molecules -regulatory -answering -opinions -sworn -lengths -supposedly -hypothesis -upward -habitats -seating -ancestors -drank -yield -hd -synthesis -researcher -modest -##var -mothers -peered -voluntary -homeland -##the -acclaim -##igan -static -valve -luxembourg -alto -carroll -fe -receptor -norton -ambulance -##tian -johnston -catholics -depicting -jointly -elephant -gloria -mentor -badge -ahmad -distinguish -remarked -councils -precisely -allison -advancing -detection -crowded -##10 -cooperative -ankle -mercedes -dagger -surrendered -pollution -commit -subway -jeffrey -lesson -sculptures -provider -##fication -membrane -timothy -rectangular -fiscal -heating -teammate -basket -particle -anonymous -deployment -##ple -missiles -courthouse -proportion -shoe -sec -##ller -complaints -forbes -blacks -abandon -remind -sizes -overwhelming -autobiography -natalie -##awa -risks -contestant -countryside -babies -scorer -invaded -enclosed -proceed -hurling -disorders -##cu -reflecting -continuously -cruiser -graduates -freeway -investigated -ore -deserved -maid -blocking -phillip -jorge -shakes -dove -mann -variables -lacked -burden -accompanying -que -consistently -organizing -provisional -complained -endless -##rm -tubes -juice -georges -krishna -mick -labels -thriller -##uch -laps -arcade -sage -snail -##table -shannon -fi -laurence -seoul -vacation -presenting -hire -churchill -surprisingly -prohibited -savannah -technically -##oli -170 -##lessly -testimony -suited -speeds -toys -romans -mlb -flowering -measurement -talented -kay -settings -charleston -expectations -shattered -achieving -triumph -ceremonies -portsmouth -lanes -mandatory -loser -stretching -cologne -realizes -seventy -cornell -careers -webb -##ulating -americas -budapest -ava -suspicion -##ison -yo -conrad -##hai -sterling -jessie -rector -##az -1831 -transform -organize -loans -christine -volcanic -warrant -slender -summers -subfamily -newer -danced -dynamics -rhine -proceeds -heinrich -gastropod -commands -sings -facilitate -easter -ra -positioned -responses -expense -fruits -yanked -imported -25th -velvet -vic -primitive -tribune -baldwin -neighbourhood -donna -rip -hay -pr -##uro -1814 -espn -welcomed -##aria -qualifier -glare -highland -timing -##cted -shells -eased -geometry -louder -exciting -slovakia -##sion -##iz -##lot -savings -prairie -##ques -marching -rafael -tonnes -##lled -curtain -preceding -shy -heal -greene -worthy -##pot -detachment -bury -sherman -##eck -reinforced -seeks -bottles -contracted -duchess -outfit -walsh -##sc -mickey -##ase -geoffrey -archer -squeeze -dawson -eliminate -invention -##enberg -neal -##eth -stance -dealer -coral -maple -retire -polo -simplified -##ht -1833 -hid -watts -backwards -jules -##oke -genesis -mt -frames -rebounds -burma -woodland -moist -santos -whispers -drained -subspecies -##aa -streaming -ulster -burnt -correspondence -maternal -gerard -denis -stealing -##load -genius -duchy -##oria -inaugurated -momentum -suits -placement -sovereign -clause -thames -##hara -confederation -reservation -sketch -yankees -lets -rotten -charm -hal -verses -ultra -commercially -dot -salon -citation -adopt -winnipeg -mist -allocated -cairo -##boy -jenkins -interference -objectives -##wind -1820 -portfolio -armoured -sectors -##eh -initiatives -##world -integrity -exercises -robe -tap -ab -gazed -##tones -distracted -rulers -111 -favorable -jerome -tended -cart -factories -##eri -diplomat -valued -gravel -charitable -##try -calvin -exploring -chang -shepherd -terrace -pdf -pupil -##ural -reflects -ups -##rch -governors -shelf -depths -##nberg -trailed -crest -tackle -##nian -##ats -hatred -##kai -clare -makers -ethiopia -longtime -detected -embedded -lacking -slapped -rely -thomson -anticipation -iso -morton -successive -agnes -screenwriter -straightened -philippe -playwright -haunted -licence -iris -intentions -sutton -112 -logical -correctly -##weight -branded -licked -tipped -silva -ricky -narrator -requests -##ents -greeted -supernatural -cow -##wald -lung -refusing -employer -strait -gaelic -liner -##piece -zoe -sabha -##mba -driveway -harvest -prints -bates -reluctantly -threshold -algebra -ira -wherever -coupled -240 -assumption -picks -##air -designers -raids -gentlemen -##ean -roller -blowing -leipzig -locks -screw -dressing -strand -##lings -scar -dwarf -depicts -##nu -nods -##mine -differ -boris -##eur -yuan -flip -##gie -mob -invested -questioning -applying -##ture -shout -##sel -gameplay -blamed -illustrations -bothered -weakness -rehabilitation -##of -##zes -envelope -rumors -miners -leicester -subtle -kerry -##ico -ferguson -##fu -premiership -ne -##cat -bengali -prof -catches -remnants -dana -##rily -shouting -presidents -baltic -ought -ghosts -dances -sailors -shirley -fancy -dominic -##bie -madonna -##rick -bark -buttons -gymnasium -ashes -liver -toby -oath -providence -doyle -evangelical -nixon -cement -carnegie -embarked -hatch -surroundings -guarantee -needing -pirate -essence -##bee -filter -crane -hammond -projected -immune -percy -twelfth -##ult -regent -doctoral -damon -mikhail -##ichi -lu -critically -elect -realised -abortion -acute -screening -mythology -steadily -##fc -frown -nottingham -kirk -wa -minneapolis -##rra -module -algeria -mc -nautical -encounters -surprising -statues -availability -shirts -pie -alma -brows -munster -mack -soup -crater -tornado -sanskrit -cedar -explosive -bordered -dixon -planets -stamp -exam -happily -##bble -carriers -kidnapped -##vis -accommodation -emigrated -##met -knockout -correspondent -violation -profits -peaks -lang -specimen -agenda -ancestry -pottery -spelling -equations -obtaining -ki -linking -1825 -debris -asylum -##20 -buddhism -teddy -##ants -gazette -##nger -##sse -dental -eligibility -utc -fathers -averaged -zimbabwe -francesco -coloured -hissed -translator -lynch -mandate -humanities -mackenzie -uniforms -lin -##iana -##gio -asset -mhz -fitting -samantha -genera -wei -rim -beloved -shark -riot -entities -expressions -indo -carmen -slipping -owing -abbot -neighbor -sidney -##av -rats -recommendations -encouraging -squadrons -anticipated -commanders -conquered -##oto -donations -diagnosed -##mond -divide -##iva -guessed -decoration -vernon -auditorium -revelation -conversations -##kers -##power -herzegovina -dash -alike -protested -lateral -herman -accredited -mg -##gent -freeman -mel -fiji -crow -crimson -##rine -livestock -##pped -humanitarian -bored -oz -whip -##lene -##ali -legitimate -alter -grinning -spelled -anxious -oriental -wesley -##nin -##hole -carnival -controller -detect -##ssa -bowed -educator -kosovo -macedonia -##sin -occupy -mastering -stephanie -janeiro -para -unaware -nurses -noon -135 -cam -hopefully -ranger -combine -sociology -polar -rica -##eer -neill -##sman -holocaust -##ip -doubled -lust -1828 -109 -decent -cooling -unveiled -##card -1829 -nsw -homer -chapman -meyer -##gin -dive -mae -reagan -expertise -##gled -darwin -brooke -sided -prosecution -investigating -comprised -petroleum -genres -reluctant -differently -trilogy -johns -vegetables -corpse -highlighted -lounge -pension -unsuccessfully -elegant -aided -ivory -beatles -amelia -cain -dubai -sunny -immigrant -babe -click -##nder -underwater -pepper -combining -mumbled -atlas -horns -accessed -ballad -physicians -homeless -gestured -rpm -freak -louisville -corporations -patriots -prizes -rational -warn -modes -decorative -overnight -din -troubled -phantom -##ort -monarch -sheer -##dorf -generals -guidelines -organs -addresses -##zon -enhance -curling -parishes -cord -##kie -linux -caesar -deutsche -bavaria -##bia -coleman -cyclone -##eria -bacon -petty -##yama -##old -hampton -diagnosis -1824 -throws -complexity -rita -disputed -##₃ -pablo -##sch -marketed -trafficking -##ulus -examine -plague -formats -##oh -vault -faithful -##bourne -webster -##ox -highlights -##ient -##ann -phones -vacuum -sandwich -modeling -##gated -bolivia -clergy -qualities -isabel -##nas -##ars -wears -screams -reunited -annoyed -bra -##ancy -##rate -differential -transmitter -tattoo -container -poker -##och -excessive -resides -cowboys -##tum -augustus -trash -providers -statute -retreated -balcony -reversed -void -storey -preceded -masses -leap -laughs -neighborhoods -wards -schemes -falcon -santo -battlefield -pad -ronnie -thread -lesbian -venus -##dian -beg -sandstone -daylight -punched -gwen -analog -stroked -wwe -acceptable -measurements -dec -toxic -##kel -adequate -surgical -economist -parameters -varsity -##sberg -quantity -ella -##chy -##rton -countess -generating -precision -diamonds -expressway -ga -##ı -1821 -uruguay -talents -galleries -expenses -scanned -colleague -outlets -ryder -lucien -##ila -paramount -##bon -syracuse -dim -fangs -gown -sweep -##sie -toyota -missionaries -websites -##nsis -sentences -adviser -val -trademark -spells -##plane -patience -starter -slim -##borg -toe -incredibly -shoots -elliot -nobility -##wyn -cowboy -endorsed -gardner -tendency -persuaded -organisms -emissions -kazakhstan -amused -boring -chips -themed -##hand -llc -constantinople -chasing -systematic -guatemala -borrowed -erin -carey -##hard -highlands -struggles -1810 -##ifying -##ced -wong -exceptions -develops -enlarged -kindergarten -castro -##ern -##rina -leigh -zombie -juvenile -##most -consul -##nar -sailor -hyde -clarence -intensive -pinned -nasty -useless -jung -clayton -stuffed -exceptional -ix -apostolic -230 -transactions -##dge -exempt -swinging -cove -religions -##ash -shields -dairy -bypass -190 -pursuing -bug -joyce -bombay -chassis -southampton -chat -interact -redesignated -##pen -nascar -pray -salmon -rigid -regained -malaysian -grim -publicity -constituted -capturing -toilet -delegate -purely -tray -drift -loosely -striker -weakened -trinidad -mitch -itv -defines -transmitted -ming -scarlet -nodding -fitzgerald -fu -narrowly -sp -tooth -standings -virtue -##₁ -##wara -##cting -chateau -gloves -lid -##nel -hurting -conservatory -##pel -sinclair -reopened -sympathy -nigerian -strode -advocated -optional -chronic -discharge -##rc -suck -compatible -laurel -stella -shi -fails -wage -dodge -128 -informal -sorts -levi -buddha -villagers -##aka -chronicles -heavier -summoned -gateway -3000 -eleventh -jewelry -translations -accordingly -seas -##ency -fiber -pyramid -cubic -dragging -##ista -caring -##ops -android -contacted -lunar -##dt -kai -lisbon -patted -1826 -sacramento -theft -madagascar -subtropical -disputes -ta -holidays -piper -willow -mare -cane -itunes -newfoundland -benny -companions -dong -raj -observe -roar -charming -plaque -tibetan -fossils -enacted -manning -bubble -tina -tanzania -##eda -##hir -funk -swamp -deputies -cloak -ufc -scenario -par -scratch -metals -anthem -guru -engaging -specially -##boat -dialects -nineteen -cecil -duet -disability -messenger -unofficial -##lies -defunct -eds -moonlight -drainage -surname -puzzle -honda -switching -conservatives -mammals -knox -broadcaster -sidewalk -cope -##ried -benson -princes -peterson -##sal -bedford -sharks -eli -wreck -alberto -gasp -archaeology -lgbt -teaches -securities -madness -compromise -waving -coordination -davidson -visions -leased -possibilities -eighty -jun -fernandez -enthusiasm -assassin -sponsorship -reviewer -kingdoms -estonian -laboratories -##fy -##nal -applies -verb -celebrations -##zzo -rowing -lightweight -sadness -submit -mvp -balanced -dude -##vas -explicitly -metric -magnificent -mound -brett -mohammad -mistakes -irregular -##hing -##ass -sanders -betrayed -shipped -surge -##enburg -reporters -termed -georg -pity -verbal -bulls -abbreviated -enabling -appealed -##are -##atic -sicily -sting -heel -sweetheart -bart -spacecraft -brutal -monarchy -##tter -aberdeen -cameo -diane -##ub -survivor -clyde -##aries -complaint -##makers -clarinet -delicious -chilean -karnataka -coordinates -1818 -panties -##rst -pretending -ar -dramatically -kiev -bella -tends -distances -113 -catalog -launching -instances -telecommunications -portable -lindsay -vatican -##eim -angles -aliens -marker -stint -screens -bolton -##rne -judy -wool -benedict -plasma -europa -spark -imaging -filmmaker -swiftly -##een -contributor -##nor -opted -stamps -apologize -financing -butter -gideon -sophisticated -alignment -avery -chemicals -yearly -speculation -prominence -professionally -##ils -immortal -institutional -inception -wrists -identifying -tribunal -derives -gains -##wo -papal -preference -linguistic -vince -operative -brewery -##ont -unemployment -boyd -##ured -##outs -albeit -prophet -1813 -bi -##rr -##face -##rad -quarterly -asteroid -cleaned -radius -temper -##llen -telugu -jerk -viscount -menu -##ote -glimpse -##aya -yacht -hawaiian -baden -##rl -laptop -readily -##gu -monetary -offshore -scots -watches -##yang -##arian -upgrade -needle -xbox -lea -encyclopedia -flank -fingertips -##pus -delight -teachings -confirm -roth -beaches -midway -winters -##iah -teasing -daytime -beverly -gambling -bonnie -##backs -regulated -clement -hermann -tricks -knot -##shing -##uring -##vre -detached -ecological -owed -specialty -byron -inventor -bats -stays -screened -unesco -midland -trim -affection -##ander -##rry -jess -thoroughly -feedback -##uma -chennai -strained -heartbeat -wrapping -overtime -pleaded -##sworth -mon -leisure -oclc -##tate -##ele -feathers -angelo -thirds -nuts -surveys -clever -gill -commentator -##dos -darren -rides -gibraltar -##nc -##mu -dissolution -dedication -shin -meals -saddle -elvis -reds -chaired -taller -appreciation -functioning -niece -favored -advocacy -robbie -criminals -suffolk -yugoslav -passport -constable -congressman -hastings -vera -##rov -consecrated -sparks -ecclesiastical -confined -##ovich -muller -floyd -nora -1822 -paved -1827 -cumberland -ned -saga -spiral -##flow -appreciated -yi -collaborative -treating -similarities -feminine -finishes -##ib -jade -import -##nse -##hot -champagne -mice -securing -celebrities -helsinki -attributes -##gos -cousins -phases -ache -lucia -gandhi -submission -vicar -spear -shine -tasmania -biting -detention -constitute -tighter -seasonal -##gus -terrestrial -matthews -##oka -effectiveness -parody -philharmonic -##onic -1816 -strangers -encoded -consortium -guaranteed -regards -shifts -tortured -collision -supervisor -inform -broader -insight -theaters -armour -emeritus -blink -incorporates -mapping -##50 -##ein -handball -flexible -##nta -substantially -generous -thief -##own -carr -loses -1793 -prose -ucla -romeo -generic -metallic -realization -damages -mk -commissioners -zach -default -##ther -helicopters -lengthy -stems -spa -partnered -spectators -rogue -indication -penalties -teresa -1801 -sen -##tric -dalton -##wich -irving -photographic -##vey -dell -deaf -peters -excluded -unsure -##vable -patterson -crawled -##zio -resided -whipped -latvia -slower -ecole -pipes -employers -maharashtra -comparable -va -textile -pageant -##gel -alphabet -binary -irrigation -chartered -choked -antoine -offs -waking -supplement -##wen -quantities -demolition -regain -locate -urdu -folks -alt -114 -##mc -scary -andreas -whites -##ava -classrooms -mw -aesthetic -publishes -valleys -guides -cubs -johannes -bryant -conventions -affecting -##itt -drain -awesome -isolation -prosecutor -ambitious -apology -captive -downs -atmospheric -lorenzo -aisle -beef -foul -##onia -kidding -composite -disturbed -illusion -natives -##ffer -emi -rockets -riverside -wartime -painters -adolf -melted -##ail -uncertainty -simulation -hawks -progressed -meantime -builder -spray -breach -unhappy -regina -russians -##urg -determining -##tation -tram -1806 -##quin -aging -##12 -1823 -garion -rented -mister -diaz -terminated -clip -1817 -depend -nervously -disco -owe -defenders -shiva -notorious -disbelief -shiny -worcester -##gation -##yr -trailing -undertook -islander -belarus -limitations -watershed -fuller -overlooking -utilized -raphael -1819 -synthetic -breakdown -klein -##nate -moaned -memoir -lamb -practicing -##erly -cellular -arrows -exotic -##graphy -witches -117 -charted -rey -hut -hierarchy -subdivision -freshwater -giuseppe -aloud -reyes -qatar -marty -sideways -utterly -sexually -jude -prayers -mccarthy -softball -blend -damien -##gging -##metric -wholly -erupted -lebanese -negro -revenues -tasted -comparative -teamed -transaction -labeled -maori -sovereignty -parkway -trauma -gran -malay -121 -advancement -descendant -2020 -buzz -salvation -inventory -symbolic -##making -antarctica -mps -##gas -##bro -mohammed -myanmar -holt -submarines -tones -##lman -locker -patriarch -bangkok -emerson -remarks -predators -kin -afghan -confession -norwich -rental -emerge -advantages -##zel -rca -##hold -shortened -storms -aidan -##matic -autonomy -compliance -##quet -dudley -atp -##osis -1803 -motto -documentation -summary -professors -spectacular -christina -archdiocese -flashing -innocence -remake -##dell -psychic -reef -scare -employ -rs -sticks -meg -gus -leans -##ude -accompany -bergen -tomas -##iko -doom -wages -pools -##nch -##bes -breasts -scholarly -alison -outline -brittany -breakthrough -willis -realistic -##cut -##boro -competitor -##stan -pike -picnic -icon -designing -commercials -washing -villain -skiing -micro -costumes -auburn -halted -executives -##hat -logistics -cycles -vowel -applicable -barrett -exclaimed -eurovision -eternity -ramon -##umi -##lls -modifications -sweeping -disgust -##uck -torch -aviv -ensuring -rude -dusty -sonic -donovan -outskirts -cu -pathway -##band -##gun -##lines -disciplines -acids -cadet -paired -##40 -sketches -##sive -marriages -##⁺ -folding -peers -slovak -implies -admired -##beck -1880s -leopold -instinct -attained -weston -megan -horace -##ination -dorsal -ingredients -evolutionary -##its -complications -deity -lethal -brushing -levy -deserted -institutes -posthumously -delivering -telescope -coronation -motivated -rapids -luc -flicked -pays -volcano -tanner -weighed -##nica -crowds -frankie -gifted -addressing -granddaughter -winding -##rna -constantine -gomez -##front -landscapes -rudolf -anthropology -slate -werewolf -##lio -astronomy -circa -rouge -dreaming -sack -knelt -drowned -naomi -prolific -tracked -freezing -herb -##dium -agony -randall -twisting -wendy -deposit -touches -vein -wheeler -##bbled -##bor -batted -retaining -tire -presently -compare -specification -daemon -nigel -##grave -merry -recommendation -czechoslovakia -sandra -ng -roma -##sts -lambert -inheritance -sheikh -winchester -cries -examining -##yle -comeback -cuisine -nave -##iv -ko -retrieve -tomatoes -barker -polished -defining -irene -lantern -personalities -begging -tract -swore -1809 -175 -##gic -omaha -brotherhood -##rley -haiti -##ots -exeter -##ete -##zia -steele -dumb -pearson -210 -surveyed -elisabeth -trends -##ef -fritz -##rf -premium -bugs -fraction -calmly -viking -##birds -tug -inserted -unusually -##ield -confronted -distress -crashing -brent -turks -resign -##olo -cambodia -gabe -sauce -##kal -evelyn -116 -extant -clusters -quarry -teenagers -luna -##lers -##ister -affiliation -drill -##ashi -panthers -scenic -libya -anita -strengthen -inscriptions -##cated -lace -sued -judith -riots -##uted -mint -##eta -preparations -midst -dub -challenger -##vich -mock -cf -displaced -wicket -breaths -enables -schmidt -analyst -##lum -ag -highlight -automotive -axe -josef -newark -sufficiently -resembles -50th -##pal -flushed -mum -traits -##ante -commodore -incomplete -warming -titular -ceremonial -ethical -118 -celebrating -eighteenth -cao -lima -medalist -mobility -strips -snakes -##city -miniature -zagreb -barton -escapes -umbrella -automated -doubted -differs -cooled -georgetown -dresden -cooked -fade -wyatt -rna -jacobs -carlton -abundant -stereo -boost -madras -inning -##hia -spur -ip -malayalam -begged -osaka -groan -escaping -charging -dose -vista -##aj -bud -papa -communists -advocates -edged -tri -##cent -resemble -peaking -necklace -fried -montenegro -saxony -goose -glances -stuttgart -curator -recruit -grocery -sympathetic -##tting -##fort -127 -lotus -randolph -ancestor -##rand -succeeding -jupiter -1798 -macedonian -##heads -hiking -1808 -handing -fischer -##itive -garbage -node -##pies -prone -singular -papua -inclined -attractions -italia -pouring -motioned -grandma -garnered -jacksonville -corp -ego -ringing -aluminum -##hausen -ordering -##foot -drawer -traders -synagogue -##play -##kawa -resistant -wandering -fragile -fiona -teased -var -hardcore -soaked -jubilee -decisive -exposition -mercer -poster -valencia -hale -kuwait -1811 -##ises -##wr -##eed -tavern -gamma -122 -johan -##uer -airways -amino -gil -##ury -vocational -domains -torres -##sp -generator -folklore -outcomes -##keeper -canberra -shooter -fl -beams -confrontation -##lling -##gram -feb -aligned -forestry -pipeline -jax -motorway -conception -decay -##tos -coffin -##cott -stalin -1805 -escorted -minded -##nam -sitcom -purchasing -twilight -veronica -additions -passive -tensions -straw -123 -frequencies -1804 -refugee -cultivation -##iate -christie -clary -bulletin -crept -disposal -##rich -##zong -processor -crescent -##rol -bmw -emphasized -whale -nazis -aurora -##eng -dwelling -hauled -sponsors -toledo -mega -ideology -theatres -tessa -cerambycidae -saves -turtle -cone -suspects -kara -rusty -yelling -greeks -mozart -shades -cocked -participant -##tro -shire -spit -freeze -necessity -##cos -inmates -nielsen -councillors -loaned -uncommon -omar -peasants -botanical -offspring -daniels -formations -jokes -1794 -pioneers -sigma -licensing -##sus -wheelchair -polite -1807 -liquor -pratt -trustee -##uta -forewings -balloon -##zz -kilometre -camping -explicit -casually -shawn -foolish -teammates -nm -hassan -carrie -judged -satisfy -vanessa -knives -selective -cnn -flowed -##lice -eclipse -stressed -eliza -mathematician -cease -cultivated -##roy -commissions -browns -##ania -destroyers -sheridan -meadow -##rius -minerals -##cial -downstream -clash -gram -memoirs -ventures -baha -seymour -archie -midlands -edith -fare -flynn -invite -canceled -tiles -stabbed -boulder -incorporate -amended -camden -facial -mollusk -unreleased -descriptions -yoga -grabs -550 -raises -ramp -shiver -##rose -coined -pioneering -tunes -qing -warwick -tops -119 -melanie -giles -##rous -wandered -##inal -annexed -nov -30th -unnamed -##ished -organizational -airplane -normandy -stoke -whistle -blessing -violations -chased -holders -shotgun -##ctic -outlet -reactor -##vik -tires -tearing -shores -fortified -mascot -constituencies -nc -columnist -productive -tibet -##rta -lineage -hooked -oct -tapes -judging -cody -##gger -hansen -kashmir -triggered -##eva -solved -cliffs -##tree -resisted -anatomy -protesters -transparent -implied -##iga -injection -mattress -excluding -##mbo -defenses -helpless -devotion -##elli -growl -liberals -weber -phenomena -atoms -plug -##iff -mortality -apprentice -howe -convincing -aaa -swimmer -barber -leone -promptly -sodium -def -nowadays -arise -##oning -gloucester -corrected -dignity -norm -erie -##ders -elders -evacuated -sylvia -compression -##yar -hartford -pose -backpack -reasoning -accepts -24th -wipe -millimetres -marcel -##oda -dodgers -albion -1790 -overwhelmed -aerospace -oaks -1795 -showcase -acknowledge -recovering -nolan -ashe -hurts -geology -fashioned -disappearance -farewell -swollen -shrug -marquis -wimbledon -124 -rue -1792 -commemorate -reduces -experiencing -inevitable -calcutta -intel -##court -murderer -sticking -fisheries -imagery -bloom -280 -brake -##inus -gustav -hesitation -memorable -po -viral -beans -accidents -tunisia -antenna -spilled -consort -treatments -aye -perimeter -##gard -donation -hostage -migrated -banker -addiction -apex -lil -trout -##ously -conscience -##nova -rams -sands -genome -passionate -troubles -##lets -##set -amid -##ibility -##ret -higgins -exceed -vikings -##vie -payne -##zan -muscular -##ste -defendant -sucking -##wal -ibrahim -fuselage -claudia -vfl -europeans -snails -interval -##garh -preparatory -statewide -tasked -lacrosse -viktor -##lation -angola -##hra -flint -implications -employs -teens -patrons -stall -weekends -barriers -scrambled -nucleus -tehran -jenna -parsons -lifelong -robots -displacement -5000 -##bles -precipitation -##gt -knuckles -clutched -1802 -marrying -ecology -marx -accusations -declare -scars -kolkata -mat -meadows -bermuda -skeleton -finalists -vintage -crawl -coordinate -affects -subjected -orchestral -mistaken -##tc -mirrors -dipped -relied -260 -arches -candle -##nick -incorporating -wildly -fond -basilica -owl -fringe -rituals -whispering -stirred -feud -tertiary -slick -goat -honorable -whereby -skip -ricardo -stripes -parachute -adjoining -submerged -synthesizer -##gren -intend -positively -ninety -phi -beaver -partition -fellows -alexis -prohibition -carlisle -bizarre -fraternity -##bre -doubts -icy -cbc -aquatic -sneak -sonny -combines -airports -crude -supervised -spatial -merge -alfonso -##bic -corrupt -scan -undergo -##ams -disabilities -colombian -comparing -dolphins -perkins -##lish -reprinted -unanimous -bounced -hairs -underworld -midwest -semester -bucket -paperback -miniseries -coventry -demise -##leigh -demonstrations -sensor -rotating -yan -##hler -arrange -soils -##idge -hyderabad -labs -##dr -brakes -grandchildren -##nde -negotiated -rover -ferrari -continuation -directorate -augusta -stevenson -counterpart -gore -##rda -nursery -rican -ave -collectively -broadly -pastoral -repertoire -asserted -discovering -nordic -styled -fiba -cunningham -harley -middlesex -survives -tumor -tempo -zack -aiming -lok -urgent -##rade -##nto -devils -##ement -contractor -turin -##wl -##ool -bliss -repaired -simmons -moan -astronomical -cr -negotiate -lyric -1890s -lara -bred -clad -angus -pbs -##ience -engineered -posed -##lk -hernandez -possessions -elbows -psychiatric -strokes -confluence -electorate -lifts -campuses -lava -alps -##ep -##ution -##date -physicist -woody -##page -##ographic -##itis -juliet -reformation -sparhawk -320 -complement -suppressed -jewel -##½ -floated -##kas -continuity -sadly -##ische -inability -melting -scanning -paula -flour -judaism -safer -vague -##lm -solving -curb -##stown -financially -gable -bees -expired -miserable -cassidy -dominion -1789 -cupped -145 -robbery -facto -amos -warden -resume -tallest -marvin -ing -pounded -usd -declaring -gasoline -##aux -darkened -270 -650 -sophomore -##mere -erection -gossip -televised -risen -dial -##eu -pillars -##link -passages -profound -##tina -arabian -ashton -silicon -nail -##ead -##lated -##wer -##hardt -fleming -firearms -ducked -circuits -blows -waterloo -titans -##lina -atom -fireplace -cheshire -financed -activation -algorithms -##zzi -constituent -catcher -cherokee -partnerships -sexuality -platoon -tragic -vivian -guarded -whiskey -meditation -poetic -##late -##nga -##ake -porto -listeners -dominance -kendra -mona -chandler -factions -22nd -salisbury -attitudes -derivative -##ido -##haus -intake -paced -javier -illustrator -barrels -bias -cockpit -burnett -dreamed -ensuing -##anda -receptors -someday -hawkins -mattered -##lal -slavic -1799 -jesuit -cameroon -wasted -tai -wax -lowering -victorious -freaking -outright -hancock -librarian -sensing -bald -calcium -myers -tablet -announcing -barack -shipyard -pharmaceutical -##uan -greenwich -flush -medley -patches -wolfgang -pt -speeches -acquiring -exams -nikolai -##gg -hayden -kannada -##type -reilly -##pt -waitress -abdomen -devastated -capped -pseudonym -pharmacy -fulfill -paraguay -1796 -clicked -##trom -archipelago -syndicated -##hman -lumber -orgasm -rejection -clifford -lorraine -advent -mafia -rodney -brock -##ght -##used -##elia -cassette -chamberlain -despair -mongolia -sensors -developmental -upstream -##eg -##alis -spanning -165 -trombone -basque -seeded -interred -renewable -rhys -leapt -revision -molecule -##ages -chord -vicious -nord -shivered -23rd -arlington -debts -corpus -sunrise -bays -blackburn -centimetres -##uded -shuddered -gm -strangely -gripping -cartoons -isabelle -orbital -##ppa -seals -proving -##lton -refusal -strengthened -bust -assisting -baghdad -batsman -portrayal -mara -pushes -spears -og -##cock -reside -nathaniel -brennan -1776 -confirmation -caucus -##worthy -markings -yemen -nobles -ku -lazy -viewer -catalan -encompasses -sawyer -##fall -sparked -substances -patents -braves -arranger -evacuation -sergio -persuade -dover -tolerance -penguin -cum -jockey -insufficient -townships -occupying -declining -plural -processed -projection -puppet -flanders -introduces -liability -##yon -gymnastics -antwerp -taipei -hobart -candles -jeep -wes -observers -126 -chaplain -bundle -glorious -##hine -hazel -flung -sol -excavations -dumped -stares -sh -bangalore -triangular -icelandic -intervals -expressing -turbine -##vers -songwriting -crafts -##igo -jasmine -ditch -rite -##ways -entertaining -comply -sorrow -wrestlers -basel -emirates -marian -rivera -helpful -##some -caution -downward -networking -##atory -##tered -darted -genocide -emergence -replies -specializing -spokesman -convenient -unlocked -fading -augustine -concentrations -resemblance -elijah -investigator -andhra -##uda -promotes -bean -##rrell -fleeing -wan -simone -announcer -##ame -##bby -lydia -weaver -132 -residency -modification -##fest -stretches -##ast -alternatively -nat -lowe -lacks -##ented -pam -tile -concealed -inferior -abdullah -residences -tissues -vengeance -##ided -moisture -peculiar -groove -zip -bologna -jennings -ninja -oversaw -zombies -pumping -batch -livingston -emerald -installations -1797 -peel -nitrogen -rama -##fying -##star -schooling -strands -responding -werner -##ost -lime -casa -accurately -targeting -##rod -underway -##uru -hemisphere -lester -##yard -occupies -2d -griffith -angrily -reorganized -##owing -courtney -deposited -##dd -##30 -estadio -##ifies -dunn -exiled -##ying -checks -##combe -##о -##fly -successes -unexpectedly -blu -assessed -##flower -##ه -observing -sacked -spiders -kn -##tail -mu -nodes -prosperity -audrey -divisional -155 -broncos -tangled -adjust -feeds -erosion -paolo -surf -directory -snatched -humid -admiralty -screwed -gt -reddish -##nese -modules -trench -lamps -bind -leah -bucks -competes -##nz -##form -transcription -##uc -isles -violently -clutching -pga -cyclist -inflation -flats -ragged -unnecessary -##hian -stubborn -coordinated -harriet -baba -disqualified -330 -insect -wolfe -##fies -reinforcements -rocked -duel -winked -embraced -bricks -##raj -hiatus -defeats -pending -brightly -jealousy -##xton -##hm -##uki -lena -gdp -colorful -##dley -stein -kidney -##shu -underwear -wanderers -##haw -##icus -guardians -m³ -roared -habits -##wise -permits -gp -uranium -punished -disguise -bundesliga -elise -dundee -erotic -partisan -pi -collectors -float -individually -rendering -behavioral -bucharest -ser -hare -valerie -corporal -nutrition -proportional -##isa -immense -##kis -pavement -##zie -##eld -sutherland -crouched -1775 -##lp -suzuki -trades -endurance -operas -crosby -prayed -priory -rory -socially -##urn -gujarat -##pu -walton -cube -pasha -privilege -lennon -floods -thorne -waterfall -nipple -scouting -approve -##lov -minorities -voter -dwight -extensions -assure -ballroom -slap -dripping -privileges -rejoined -confessed -demonstrating -patriotic -yell -investor -##uth -pagan -slumped -squares -##cle -##kins -confront -bert -embarrassment -##aid -aston -urging -sweater -starr -yuri -brains -williamson -commuter -mortar -structured -selfish -exports -##jon -cds -##him -unfinished -##rre -mortgage -destinations -##nagar -canoe -solitary -buchanan -delays -magistrate -fk -##pling -motivation -##lier -##vier -recruiting -assess -##mouth -malik -antique -1791 -pius -rahman -reich -tub -zhou -smashed -airs -galway -xii -conditioning -honduras -discharged -dexter -##pf -lionel -129 -debates -lemon -tiffany -volunteered -dom -dioxide -procession -devi -sic -tremendous -advertisements -colts -transferring -verdict -hanover -decommissioned -utter -relate -pac -racism -##top -beacon -limp -similarity -terra -occurrence -ant -##how -becky -capt -updates -armament -richie -pal -##graph -halloween -mayo -##ssen -##bone -cara -serena -fcc -dolls -obligations -##dling -violated -lafayette -jakarta -exploitation -##ime -infamous -iconic -##lah -##park -kitty -moody -reginald -dread -spill -crystals -olivier -modeled -bluff -equilibrium -separating -notices -ordnance -extinction -onset -cosmic -attachment -sammy -expose -privy -anchored -##bil -abbott -admits -bending -baritone -emmanuel -policeman -vaughan -winged -climax -dresses -denny -polytechnic -mohamed -burmese -authentic -nikki -genetics -grandparents -homestead -gaza -postponed -metacritic -una -##sby -##bat -unstable -dissertation -##rial -##cian -curls -obscure -uncovered -bronx -praying -disappearing -##hoe -prehistoric -coke -turret -mutations -nonprofit -pits -monaco -##ي -##usion -prominently -dispatched -podium -##mir -uci -##uation -133 -fortifications -birthplace -kendall -##lby -##oll -preacher -rack -goodman -##rman -persistent -##ott -countless -jaime -recorder -lexington -persecution -jumps -renewal -wagons -##11 -crushing -##holder -decorations -##lake -abundance -wrath -laundry -£1 -garde -##rp -jeanne -beetles -peasant -##sl -splitting -caste -sergei -##rer -##ema -scripts -##ively -rub -satellites -##vor -inscribed -verlag -scrapped -gale -packages -chick -potato -slogan -kathleen -arabs -##culture -counterparts -reminiscent -choral -##tead -rand -retains -bushes -dane -accomplish -courtesy -closes -##oth -slaughter -hague -krakow -lawson -tailed -elias -ginger -##ttes -canopy -betrayal -rebuilding -turf -##hof -frowning -allegiance -brigades -kicks -rebuild -polls -alias -nationalism -td -rowan -audition -bowie -fortunately -recognizes -harp -dillon -horrified -##oro -renault -##tics -ropes -##α -presumed -rewarded -infrared -wiping -accelerated -illustration -##rid -presses -practitioners -badminton -##iard -detained -##tera -recognizing -relates -misery -##sies -##tly -reproduction -piercing -potatoes -thornton -esther -manners -hbo -##aan -ours -bullshit -ernie -perennial -sensitivity -illuminated -rupert -##jin -##iss -##ear -rfc -nassau -##dock -staggered -socialism -##haven -appointments -nonsense -prestige -sharma -haul -##tical -solidarity -gps -##ook -##rata -igor -pedestrian -##uit -baxter -tenants -wires -medication -unlimited -guiding -impacts -diabetes -##rama -sasha -pas -clive -extraction -131 -continually -constraints -##bilities -sonata -hunted -sixteenth -chu -planting -quote -mayer -pretended -abs -spat -##hua -ceramic -##cci -curtains -pigs -pitching -##dad -latvian -sore -dayton -##sted -##qi -patrols -slice -playground -##nted -shone -stool -apparatus -inadequate -mates -treason -##ija -desires -##liga -##croft -somalia -laurent -mir -leonardo -oracle -grape -obliged -chevrolet -thirteenth -stunning -enthusiastic -##ede -accounted -concludes -currents -basil -##kovic -drought -##rica -mai -##aire -shove -posting -##shed -pilgrimage -humorous -packing -fry -pencil -wines -smells -144 -marilyn -aching -newest -clung -bon -neighbours -sanctioned -##pie -mug -##stock -drowning -##mma -hydraulic -##vil -hiring -reminder -lilly -investigators -##ncies -sour -##eous -compulsory -packet -##rion -##graphic -##elle -cannes -##inate -depressed -##rit -heroic -importantly -theresa -##tled -conway -saturn -marginal -rae -##xia -corresponds -royce -pact -jasper -explosives -packaging -aluminium -##ttered -denotes -rhythmic -spans -assignments -hereditary -outlined -originating -sundays -lad -reissued -greeting -beatrice -##dic -pillar -marcos -plots -handbook -alcoholic -judiciary -avant -slides -extract -masculine -blur -##eum -##force -homage -trembled -owens -hymn -trey -omega -signaling -socks -accumulated -reacted -attic -theo -lining -angie -distraction -primera -talbot -##key -1200 -ti -creativity -billed -##hey -deacon -eduardo -identifies -proposition -dizzy -gunner -hogan -##yam -##pping -##hol -ja -##chan -jensen -reconstructed -##berger -clearance -darius -##nier -abe -harlem -plea -dei -circled -emotionally -notation -fascist -neville -exceeded -upwards -viable -ducks -##fo -workforce -racer -limiting -shri -##lson -possesses -1600 -kerr -moths -devastating -laden -disturbing -locking -##cture -gal -fearing -accreditation -flavor -aide -1870s -mountainous -##baum -melt -##ures -motel -texture -servers -soda -##mb -herd -##nium -erect -puzzled -hum -peggy -examinations -gould -testified -geoff -ren -devised -sacks -##law -denial -posters -grunted -cesar -tutor -ec -gerry -offerings -byrne -falcons -combinations -ct -incoming -pardon -rocking -26th -avengers -flared -mankind -seller -uttar -loch -nadia -stroking -exposing -##hd -fertile -ancestral -instituted -##has -noises -prophecy -taxation -eminent -vivid -pol -##bol -dart -indirect -multimedia -notebook -upside -displaying -adrenaline -referenced -geometric -##iving -progression -##ddy -blunt -announce -##far -implementing -##lav -aggression -liaison -cooler -cares -headache -plantations -gorge -dots -impulse -thickness -ashamed -averaging -kathy -obligation -precursor -137 -fowler -symmetry -thee -225 -hears -##rai -undergoing -ads -butcher -bowler -##lip -cigarettes -subscription -goodness -##ically -browne -##hos -##tech -kyoto -donor -##erty -damaging -friction -drifting -expeditions -hardened -prostitution -152 -fauna -blankets -claw -tossing -snarled -butterflies -recruits -investigative -coated -healed -138 -communal -hai -xiii -academics -boone -psychologist -restless -lahore -stephens -mba -brendan -foreigners -printer -##pc -ached -explode -27th -deed -scratched -dared -##pole -cardiac -1780 -okinawa -proto -commando -compelled -oddly -electrons -##base -replica -thanksgiving -##rist -sheila -deliberate -stafford -tidal -representations -hercules -ou -##path -##iated -kidnapping -lenses -##tling -deficit -samoa -mouths -consuming -computational -maze -granting -smirk -razor -fixture -ideals -inviting -aiden -nominal -##vs -issuing -julio -pitt -ramsey -docks -##oss -exhaust -##owed -bavarian -draped -anterior -mating -ethiopian -explores -noticing -##nton -discarded -convenience -hoffman -endowment -beasts -cartridge -mormon -paternal -probe -sleeves -interfere -lump -deadline -##rail -jenks -bulldogs -scrap -alternating -justified -reproductive -nam -seize -descending -secretariat -kirby -coupe -grouped -smash -panther -sedan -tapping -##18 -lola -cheer -germanic -unfortunate -##eter -unrelated -##fan -subordinate -##sdale -suzanne -advertisement -##ility -horsepower -##lda -cautiously -discourse -luigi -##mans -##fields -noun -prevalent -mao -schneider -everett -surround -governorate -kira -##avia -westward -##take -misty -rails -sustainability -134 -unused -##rating -packs -toast -unwilling -regulate -thy -suffrage -nile -awe -assam -definitions -travelers -affordable -##rb -conferred -sells -undefeated -beneficial -torso -basal -repeating -remixes -##pass -bahrain -cables -fang -##itated -excavated -numbering -statutory -##rey -deluxe -##lian -forested -ramirez -derbyshire -zeus -slamming -transfers -astronomer -banana -lottery -berg -histories -bamboo -##uchi -resurrection -posterior -bowls -vaguely -##thi -thou -preserving -tensed -offence -##inas -meyrick -callum -ridden -watt -langdon -tying -lowland -snorted -daring -truman -##hale -##girl -aura -overly -filing -weighing -goa -infections -philanthropist -saunders -eponymous -##owski -latitude -perspectives -reviewing -mets -commandant -radial -##kha -flashlight -reliability -koch -vowels -amazed -ada -elaine -supper -##rth -##encies -predator -debated -soviets -cola -##boards -##nah -compartment -crooked -arbitrary -fourteenth -##ctive -havana -majors -steelers -clips -profitable -ambush -exited -packers -##tile -nude -cracks -fungi -##е -limb -trousers -josie -shelby -tens -frederic -##ος -definite -smoothly -constellation -insult -baton -discs -lingering -##nco -conclusions -lent -staging -becker -grandpa -shaky -##tron -einstein -obstacles -sk -adverse -elle -economically -##moto -mccartney -thor -dismissal -motions -readings -nostrils -treatise -##pace -squeezing -evidently -prolonged -1783 -venezuelan -je -marguerite -beirut -takeover -shareholders -##vent -denise -digit -airplay -norse -##bbling -imaginary -pills -hubert -blaze -vacated -eliminating -##ello -vine -mansfield -##tty -retrospective -barrow -borne -clutch -bail -forensic -weaving -##nett -##witz -desktop -citadel -promotions -worrying -dorset -ieee -subdivided -##iating -manned -expeditionary -pickup -synod -chuckle -185 -barney -##rz -##ffin -functionality -karachi -litigation -meanings -uc -lick -turbo -anders -##ffed -execute -curl -oppose -ankles -typhoon -##د -##ache -##asia -linguistics -compassion -pressures -grazing -perfection -##iting -immunity -monopoly -muddy -backgrounds -136 -namibia -francesca -monitors -attracting -stunt -tuition -##ии -vegetable -##mates -##quent -mgm -jen -complexes -forts -##ond -cellar -bites -seventeenth -royals -flemish -failures -mast -charities -##cular -peruvian -capitals -macmillan -ipswich -outward -frigate -postgraduate -folds -employing -##ouse -concurrently -fiery -##tai -contingent -nightmares -monumental -nicaragua -##kowski -lizard -mal -fielding -gig -reject -##pad -harding -##ipe -coastline -##cin -##nos -beethoven -humphrey -innovations -##tam -##nge -norris -doris -solicitor -huang -obey -141 -##lc -niagara -##tton -shelves -aug -bourbon -curry -nightclub -specifications -hilton -##ndo -centennial -dispersed -worm -neglected -briggs -sm -font -kuala -uneasy -plc -##nstein -##bound -##aking -##burgh -awaiting -pronunciation -##bbed -##quest -eh -optimal -zhu -raped -greens -presided -brenda -worries -##life -venetian -marxist -turnout -##lius -refined -braced -sins -grasped -sunderland -nickel -speculated -lowell -cyrillic -communism -fundraising -resembling -colonists -mutant -freddie -usc -##mos -gratitude -##run -mural -##lous -chemist -wi -reminds -28th -steals -tess -pietro -##ingen -promoter -ri -microphone -honoured -rai -sant -##qui -feather -##nson -burlington -kurdish -terrorists -deborah -sickness -##wed -##eet -hazard -irritated -desperation -veil -clarity -##rik -jewels -xv -##gged -##ows -##cup -berkshire -unfair -mysteries -orchid -winced -exhaustion -renovations -stranded -obe -infinity -##nies -adapt -redevelopment -thanked -registry -olga -domingo -noir -tudor -ole -##atus -commenting -behaviors -##ais -crisp -pauline -probable -stirling -wigan -##bian -paralympics -panting -surpassed -##rew -luca -barred -pony -famed -##sters -cassandra -waiter -carolyn -exported -##orted -andres -destructive -deeds -jonah -castles -vacancy -suv -##glass -1788 -orchard -yep -famine -belarusian -sprang -##forth -skinny -##mis -administrators -rotterdam -zambia -zhao -boiler -discoveries -##ride -##physics -lucius -disappointing -outreach -spoon -##frame -qualifications -unanimously -enjoys -regency -##iidae -stade -realism -veterinary -rodgers -dump -alain -chestnut -castile -censorship -rumble -gibbs -##itor -communion -reggae -inactivated -logs -loads -##houses -homosexual -##iano -ale -informs -##cas -phrases -plaster -linebacker -ambrose -kaiser -fascinated -850 -limerick -recruitment -forge -mastered -##nding -leinster -rooted -threaten -##strom -borneo -##hes -suggestions -scholarships -propeller -documentaries -patronage -coats -constructing -invest -neurons -comet -entirety -shouts -identities -annoying -unchanged -wary -##antly -##ogy -neat -oversight -##kos -phillies -replay -constance -##kka -incarnation -humble -skies -minus -##acy -smithsonian -##chel -guerrilla -jar -cadets -##plate -surplus -audit -##aru -cracking -joanna -louisa -pacing -##lights -intentionally -##iri -diner -nwa -imprint -australians -tong -unprecedented -bunker -naive -specialists -ark -nichols -railing -leaked -pedal -##uka -shrub -longing -roofs -v8 -captains -neural -tuned -##ntal -##jet -emission -medina -frantic -codex -definitive -sid -abolition -intensified -stocks -enrique -sustain -genoa -oxide -##written -clues -cha -##gers -tributaries -fragment -venom -##rity -##ente -##sca -muffled -vain -sire -laos -##ingly -##hana -hastily -snapping -surfaced -sentiment -motive -##oft -contests -approximate -mesa -luckily -dinosaur -exchanges -propelled -accord -bourne -relieve -tow -masks -offended -##ues -cynthia -##mmer -rains -bartender -zinc -reviewers -lois -##sai -legged -arrogant -rafe -rosie -comprise -handicap -blockade -inlet -lagoon -copied -drilling -shelley -petals -##inian -mandarin -obsolete -##inated -onward -arguably -productivity -cindy -praising -seldom -busch -discusses -raleigh -shortage -ranged -stanton -encouragement -firstly -conceded -overs -temporal -##uke -cbe -##bos -woo -certainty -pumps -##pton -stalked -##uli -lizzie -periodic -thieves -weaker -##night -gases -shoving -chooses -wc -##chemical -prompting -weights -##kill -robust -flanked -sticky -hu -tuberculosis -##eb -##eal -christchurch -resembled -wallet -reese -inappropriate -pictured -distract -fixing -fiddle -giggled -burger -heirs -hairy -mechanic -torque -apache -obsessed -chiefly -cheng -logging -##tag -extracted -meaningful -numb -##vsky -gloucestershire -reminding -##bay -unite -##lit -breeds -diminished -clown -glove -1860s -##ن -##ug -archibald -focal -freelance -sliced -depiction -##yk -organism -switches -sights -stray -crawling -##ril -lever -leningrad -interpretations -loops -anytime -reel -alicia -delighted -##ech -inhaled -xiv -suitcase -bernie -vega -licenses -northampton -exclusion -induction -monasteries -racecourse -homosexuality -##right -##sfield -##rky -dimitri -michele -alternatives -ions -commentators -genuinely -objected -pork -hospitality -fencing -stephan -warships -peripheral -wit -drunken -wrinkled -quentin -spends -departing -chung -numerical -spokesperson -##zone -johannesburg -caliber -killers -##udge -assumes -neatly -demographic -abigail -bloc -##vel -mounting -##lain -bentley -slightest -xu -recipients -##jk -merlin -##writer -seniors -prisons -blinking -hindwings -flickered -kappa -##hel -80s -strengthening -appealing -brewing -gypsy -mali -lashes -hulk -unpleasant -harassment -bio -treaties -predict -instrumentation -pulp -troupe -boiling -mantle -##ffe -ins -##vn -dividing -handles -verbs -##onal -coconut -senegal -340 -thorough -gum -momentarily -##sto -cocaine -panicked -destined -##turing -teatro -denying -weary -captained -mans -##hawks -##code -wakefield -bollywood -thankfully -##16 -cyril -##wu -amendments -##bahn -consultation -stud -reflections -kindness -1787 -internally -##ovo -tex -mosaic -distribute -paddy -seeming -143 -##hic -piers -##15 -##mura -##verse -popularly -winger -kang -sentinel -mccoy -##anza -covenant -##bag -verge -fireworks -suppress -thrilled -dominate -##jar -swansea -##60 -142 -reconciliation -##ndi -stiffened -cue -dorian -##uf -damascus -amor -ida -foremost -##aga -porsche -unseen -dir -##had -##azi -stony -lexi -melodies -##nko -angular -integer -podcast -ants -inherent -jaws -justify -persona -##olved -josephine -##nr -##ressed -customary -flashes -gala -cyrus -glaring -backyard -ariel -physiology -greenland -html -stir -avon -atletico -finch -methodology -ked -##lent -mas -catholicism -townsend -branding -quincy -fits -containers -1777 -ashore -aragon -##19 -forearm -poisoning -##sd -adopting -conquer -grinding -amnesty -keller -finances -evaluate -forged -lankan -instincts -##uto -guam -bosnian -photographed -workplace -desirable -protector -##dog -allocation -intently -encourages -willy -##sten -bodyguard -electro -brighter -##ν -bihar -##chev -lasts -opener -amphibious -sal -verde -arte -##cope -captivity -vocabulary -yields -##tted -agreeing -desmond -pioneered -##chus -strap -campaigned -railroads -##ович -emblem -##dre -stormed -501 -##ulous -marijuana -northumberland -##gn -##nath -bowen -landmarks -beaumont -##qua -danube -##bler -attorneys -th -ge -flyers -critique -villains -cass -mutation -acc -##0s -colombo -mckay -motif -sampling -concluding -syndicate -##rell -neon -stables -ds -warnings -clint -mourning -wilkinson -##tated -merrill -leopard -evenings -exhaled -emil -sonia -ezra -discrete -stove -farrell -fifteenth -prescribed -superhero -##rier -worms -helm -wren -##duction -##hc -expo -##rator -hq -unfamiliar -antony -prevents -acceleration -fiercely -mari -painfully -calculations -cheaper -ign -clifton -irvine -davenport -mozambique -##np -pierced -##evich -wonders -##wig -##cate -##iling -crusade -ware -##uel -enzymes -reasonably -mls -##coe -mater -ambition -bunny -eliot -kernel -##fin -asphalt -headmaster -torah -aden -lush -pins -waived -##care -##yas -joao -substrate -enforce -##grad -##ules -alvarez -selections -epidemic -tempted -##bit -bremen -translates -ensured -waterfront -29th -forrest -manny -malone -kramer -reigning -cookies -simpler -absorption -205 -engraved -##ffy -evaluated -1778 -haze -146 -comforting -crossover -##abe -thorn -##rift -##imo -##pop -suppression -fatigue -cutter -##tr -201 -wurttemberg -##orf -enforced -hovering -proprietary -gb -samurai -syllable -ascent -lacey -tick -lars -tractor -merchandise -rep -bouncing -defendants -##yre -huntington -##ground -##oko -standardized -##hor -##hima -assassinated -nu -predecessors -rainy -liar -assurance -lyrical -##uga -secondly -flattened -ios -parameter -undercover -##mity -bordeaux -punish -ridges -markers -exodus -inactive -hesitate -debbie -nyc -pledge -savoy -nagar -offset -organist -##tium -hesse -marin -converting -##iver -diagram -propulsion -pu -validity -reverted -supportive -##dc -ministries -clans -responds -proclamation -##inae -##ø -##rea -ein -pleading -patriot -sf -birch -islanders -strauss -hates -##dh -brandenburg -concession -rd -##ob -1900s -killings -textbook -antiquity -cinematography -wharf -embarrassing -setup -creed -farmland -inequality -centred -signatures -fallon -370 -##ingham -##uts -ceylon -gazing -directive -laurie -##tern -globally -##uated -##dent -allah -excavation -threads -##cross -148 -frantically -icc -utilize -determines -respiratory -thoughtful -receptions -##dicate -merging -chandra -seine -147 -builders -builds -diagnostic -dev -visibility -goddamn -analyses -dhaka -cho -proves -chancel -concurrent -curiously -canadians -pumped -restoring -1850s -turtles -jaguar -sinister -spinal -traction -declan -vows -1784 -glowed -capitalism -swirling -install -universidad -##lder -##oat -soloist -##genic -##oor -coincidence -beginnings -nissan -dip -resorts -caucasus -combustion -infectious -##eno -pigeon -serpent -##itating -conclude -masked -salad -jew -##gr -surreal -toni -##wc -harmonica -151 -##gins -##etic -##coat -fishermen -intending -bravery -##wave -klaus -titan -wembley -taiwanese -ransom -40th -incorrect -hussein -eyelids -jp -cooke -dramas -utilities -##etta -##print -eisenhower -principally -granada -lana -##rak -openings -concord -##bl -bethany -connie -morality -sega -##mons -##nard -earnings -##kara -##cine -wii -communes -##rel -coma -composing -softened -severed -grapes -##17 -nguyen -analyzed -warlord -hubbard -heavenly -behave -slovenian -##hit -##ony -hailed -filmmakers -trance -caldwell -skye -unrest -coward -likelihood -##aging -bern -sci -taliban -honolulu -propose -##wang -1700 -browser -imagining -cobra -contributes -dukes -instinctively -conan -violinist -##ores -accessories -gradual -##amp -quotes -sioux -##dating -undertake -intercepted -sparkling -compressed -139 -fungus -tombs -haley -imposing -rests -degradation -lincolnshire -retailers -wetlands -tulsa -distributor -dungeon -nun -greenhouse -convey -atlantis -aft -exits -oman -dresser -lyons -##sti -joking -eddy -judgement -omitted -digits -##cts -##game -juniors -##rae -cents -stricken -une -##ngo -wizards -weir -breton -nan -technician -fibers -liking -royalty -##cca -154 -persia -terribly -magician -##rable -##unt -vance -cafeteria -booker -camille -warmer -##static -consume -cavern -gaps -compass -contemporaries -foyer -soothing -graveyard -maj -plunged -blush -##wear -cascade -demonstrates -ordinance -##nov -boyle -##lana -rockefeller -shaken -banjo -izzy -##ense -breathless -vines -##32 -##eman -alterations -chromosome -dwellings -feudal -mole -153 -catalonia -relics -tenant -mandated -##fm -fridge -hats -honesty -patented -raul -heap -cruisers -accusing -enlightenment -infants -wherein -chatham -contractors -zen -affinity -hc -osborne -piston -156 -traps -maturity -##rana -lagos -##zal -peering -##nay -attendant -dealers -protocols -subset -prospects -biographical -##cre -artery -##zers -insignia -nuns -endured -##eration -recommend -schwartz -serbs -berger -cromwell -crossroads -##ctor -enduring -clasped -grounded -##bine -marseille -twitched -abel -choke -https -catalyst -moldova -italians -##tist -disastrous -wee -##oured -##nti -wwf -nope -##piration -##asa -expresses -thumbs -167 -##nza -coca -1781 -cheating -##ption -skipped -sensory -heidelberg -spies -satan -dangers -semifinal -202 -bohemia -whitish -confusing -shipbuilding -relies -surgeons -landings -ravi -baku -moor -suffix -alejandro -##yana -litre -upheld -##unk -rajasthan -##rek -coaster -insists -posture -scenarios -etienne -favoured -appoint -transgender -elephants -poked -greenwood -defences -fulfilled -militant -somali -1758 -chalk -potent -##ucci -migrants -wink -assistants -nos -restriction -activism -niger -##ario -colon -shaun -##sat -daphne -##erated -swam -congregations -reprise -considerations -magnet -playable -xvi -##р -overthrow -tobias -knob -chavez -coding -##mers -propped -katrina -orient -newcomer -##suke -temperate -##pool -farmhouse -interrogation -##vd -committing -##vert -forthcoming -strawberry -joaquin -macau -ponds -shocking -siberia -##cellular -chant -contributors -##nant -##ologists -sped -absorb -hail -1782 -spared -##hore -barbados -karate -opus -originates -saul -##xie -evergreen -leaped -##rock -correlation -exaggerated -weekday -unification -bump -tracing -brig -afb -pathways -utilizing -##ners -mod -mb -disturbance -kneeling -##stad -##guchi -100th -pune -##thy -decreasing -168 -manipulation -miriam -academia -ecosystem -occupational -rbi -##lem -rift -##14 -rotary -stacked -incorporation -awakening -generators -guerrero -racist -##omy -cyber -derivatives -culminated -allie -annals -panzer -sainte -wikipedia -pops -zu -austro -##vate -algerian -politely -nicholson -mornings -educate -tastes -thrill -dartmouth -##gating -db -##jee -regan -differing -concentrating -choreography -divinity -##media -pledged -alexandre -routing -gregor -madeline -##idal -apocalypse -##hora -gunfire -culminating -elves -fined -liang -lam -programmed -tar -guessing -transparency -gabrielle -##gna -cancellation -flexibility -##lining -accession -shea -stronghold -nets -specializes -##rgan -abused -hasan -sgt -ling -exceeding -##₄ -admiration -supermarket -##ark -photographers -specialised -tilt -resonance -hmm -perfume -380 -sami -threatens -garland -botany -guarding -boiled -greet -puppy -russo -supplier -wilmington -vibrant -vijay -##bius -paralympic -grumbled -paige -faa -licking -margins -hurricanes -##gong -fest -grenade -ripping -##uz -counseling -weigh -##sian -needles -wiltshire -edison -costly -##not -fulton -tramway -redesigned -staffordshire -cache -gasping -watkins -sleepy -candidacy -##group -monkeys -timeline -throbbing -##bid -##sos -berth -uzbekistan -vanderbilt -bothering -overturned -ballots -gem -##iger -sunglasses -subscribers -hooker -compelling -ang -exceptionally -saloon -stab -##rdi -carla -terrifying -rom -##vision -coil -##oids -satisfying -vendors -31st -mackay -deities -overlooked -ambient -bahamas -felipe -olympia -whirled -botanist -advertised -tugging -##dden -disciples -morales -unionist -rites -foley -morse -motives -creepy -##₀ -soo -##sz -bargain -highness -frightening -turnpike -tory -reorganization -##cer -depict -biographer -##walk -unopposed -manifesto -##gles -institut -emile -accidental -kapoor -##dam -kilkenny -cortex -lively -##13 -romanesque -jain -shan -cannons -##ood -##ske -petrol -echoing -amalgamated -disappears -cautious -proposes -sanctions -trenton -##ر -flotilla -aus -contempt -tor -canary -cote -theirs -##hun -conceptual -deleted -fascinating -paso -blazing -elf -honourable -hutchinson -##eiro -##outh -##zin -surveyor -tee -amidst -wooded -reissue -intro -##ono -cobb -shelters -newsletter -hanson -brace -encoding -confiscated -dem -caravan -marino -scroll -melodic -cows -imam -##adi -##aneous -northward -searches -biodiversity -cora -310 -roaring -##bers -connell -theologian -halo -compose -pathetic -unmarried -dynamo -##oot -az -calculation -toulouse -deserves -humour -nr -forgiveness -tam -undergone -martyr -pamela -myths -whore -counselor -hicks -290 -heavens -battleship -electromagnetic -##bbs -stellar -establishments -presley -hopped -##chin -temptation -90s -wills -nas -##yuan -nhs -##nya -seminars -##yev -adaptations -gong -asher -lex -indicator -sikh -tobago -cites -goin -##yte -satirical -##gies -characterised -correspond -bubbles -lure -participates -##vid -eruption -skate -therapeutic -1785 -canals -wholesale -defaulted -sac -460 -petit -##zzled -virgil -leak -ravens -256 -portraying -##yx -ghetto -creators -dams -portray -vicente -##rington -fae -namesake -bounty -##arium -joachim -##ota -##iser -aforementioned -axle -snout -depended -dismantled -reuben -480 -##ibly -gallagher -##lau -##pd -earnest -##ieu -##iary -inflicted -objections -##llar -asa -gritted -##athy -jericho -##sea -##was -flick -underside -ceramics -undead -substituted -195 -eastward -undoubtedly -wheeled -chimney -##iche -guinness -cb -##ager -siding -##bell -traitor -baptiste -disguised -inauguration -149 -tipperary -choreographer -perched -warmed -stationary -eco -##ike -##ntes -bacterial -##aurus -flores -phosphate -##core -attacker -invaders -alvin -intersects -a1 -indirectly -immigrated -businessmen -cornelius -valves -narrated -pill -sober -ul -nationale -monastic -applicants -scenery -##jack -161 -motifs -constitutes -cpu -##osh -jurisdictions -sd -tuning -irritation -woven -##uddin -fertility -gao -##erie -antagonist -impatient -glacial -hides -boarded -denominations -interception -##jas -cookie -nicola -##tee -algebraic -marquess -bahn -parole -buyers -bait -turbines -paperwork -bestowed -natasha -renee -oceans -purchases -157 -vaccine -215 -##tock -fixtures -playhouse -integrate -jai -oswald -intellectuals -##cky -booked -nests -mortimer -##isi -obsession -sept -##gler -##sum -440 -scrutiny -simultaneous -squinted -##shin -collects -oven -shankar -penned -remarkably -##я -slips -luggage -spectral -1786 -collaborations -louie -consolidation -##ailed -##ivating -420 -hoover -blackpool -harness -ignition -vest -tails -belmont -mongol -skinner -##nae -visually -mage -derry -##tism -##unce -stevie -transitional -##rdy -redskins -drying -prep -prospective -##21 -annoyance -oversee -##loaded -fills -##books -##iki -announces -fda -scowled -respects -prasad -mystic -tucson -##vale -revue -springer -bankrupt -1772 -aristotle -salvatore -habsburg -##geny -dal -natal -nut -pod -chewing -darts -moroccan -walkover -rosario -lenin -punjabi -##ße -grossed -scattering -wired -invasive -hui -polynomial -corridors -wakes -gina -portrays -##cratic -arid -retreating -erich -irwin -sniper -##dha -linen -lindsey -maneuver -butch -shutting -socio -bounce -commemorative -postseason -jeremiah -pines -275 -mystical -beads -bp -abbas -furnace -bidding -consulted -assaulted -empirical -rubble -enclosure -sob -weakly -cancel -polly -yielded -##emann -curly -prediction -battered -70s -vhs -jacqueline -render -sails -barked -detailing -grayson -riga -sloane -raging -##yah -herbs -bravo -##athlon -alloy -giggle -imminent -suffers -assumptions -waltz -##itate -accomplishments -##ited -bathing -remixed -deception -prefix -##emia -deepest -##tier -##eis -balkan -frogs -##rong -slab -##pate -philosophers -peterborough -grains -imports -dickinson -rwanda -##atics -1774 -dirk -lan -tablets -##rove -clone -##rice -caretaker -hostilities -mclean -##gre -regimental -treasures -norms -impose -tsar -tango -diplomacy -variously -complain -192 -recognise -arrests -1779 -celestial -pulitzer -##dus -bing -libretto -##moor -adele -splash -##rite -expectation -lds -confronts -##izer -spontaneous -harmful -wedge -entrepreneurs -buyer -##ope -bilingual -translate -rugged -conner -circulated -uae -eaton -##gra -##zzle -lingered -lockheed -vishnu -reelection -alonso -##oom -joints -yankee -headline -cooperate -heinz -laureate -invading -##sford -echoes -scandinavian -##dham -hugging -vitamin -salute -micah -hind -trader -##sper -radioactive -##ndra -militants -poisoned -ratified -remark -campeonato -deprived -wander -prop -##dong -outlook -##tani -##rix -##eye -chiang -darcy -##oping -mandolin -spice -statesman -babylon -182 -walled -forgetting -afro -##cap -158 -giorgio -buffer -##polis -planetary -##gis -overlap -terminals -kinda -centenary -##bir -arising -manipulate -elm -ke -1770 -ak -##tad -chrysler -mapped -moose -pomeranian -quad -macarthur -assemblies -shoreline -recalls -stratford -##rted -noticeable -##evic -imp -##rita -##sque -accustomed -supplying -tents -disgusted -vogue -sipped -filters -khz -reno -selecting -luftwaffe -mcmahon -tyne -masterpiece -carriages -collided -dunes -exercised -flare -remembers -muzzle -##mobile -heck -##rson -burgess -lunged -middleton -boycott -bilateral -##sity -hazardous -lumpur -multiplayer -spotlight -jackets -goldman -liege -porcelain -rag -waterford -benz -attracts -hopeful -battling -ottomans -kensington -baked -hymns -cheyenne -lattice -levine -borrow -polymer -clashes -michaels -monitored -commitments -denounced -##25 -##von -cavity -##oney -hobby -akin -##holders -futures -intricate -cornish -patty -##oned -illegally -dolphin -##lag -barlow -yellowish -maddie -apologized -luton -plagued -##puram -nana -##rds -sway -fanny -łodz -##rino -psi -suspicions -hanged -##eding -initiate -charlton -##por -nak -competent -235 -analytical -annex -wardrobe -reservations -##rma -sect -162 -fairfax -hedge -piled -buckingham -uneven -bauer -simplicity -snyder -interpret -accountability -donors -moderately -byrd -continents -##cite -##max -disciple -hr -jamaican -ping -nominees -##uss -mongolian -diver -attackers -eagerly -ideological -pillows -miracles -apartheid -revolver -sulfur -clinics -moran -163 -##enko -ile -katy -rhetoric -##icated -chronology -recycling -##hrer -elongated -mughal -pascal -profiles -vibration -databases -domination -##fare -##rant -matthias -digest -rehearsal -polling -weiss -initiation -reeves -clinging -flourished -impress -ngo -##hoff -##ume -buckley -symposium -rhythms -weed -emphasize -transforming -##taking -##gence -##yman -accountant -analyze -flicker -foil -priesthood -voluntarily -decreases -##80 -##hya -slater -sv -charting -mcgill -##lde -moreno -##iu -besieged -zur -robes -##phic -admitting -api -deported -turmoil -peyton -earthquakes -##ares -nationalists -beau -clair -brethren -interrupt -welch -curated -galerie -requesting -164 -##ested -impending -steward -viper -##vina -complaining -beautifully -brandy -foam -nl -1660 -##cake -alessandro -punches -laced -explanations -##lim -attribute -clit -reggie -discomfort -##cards -smoothed -whales -##cene -adler -countered -duffy -disciplinary -widening -recipe -reliance -conducts -goats -gradient -preaching -##shaw -matilda -quasi -striped -meridian -cannabis -cordoba -certificates -##agh -##tering -graffiti -hangs -pilgrims -repeats -##ych -revive -urine -etat -##hawk -fueled -belts -fuzzy -susceptible -##hang -mauritius -salle -sincere -beers -hooks -##cki -arbitration -entrusted -advise -sniffed -seminar -junk -donnell -processors -principality -strapped -celia -mendoza -everton -fortunes -prejudice -starving -reassigned -steamer -##lund -tuck -evenly -foreman -##ffen -dans -375 -envisioned -slit -##xy -baseman -liberia -rosemary -##weed -electrified -periodically -potassium -stride -contexts -sperm -slade -mariners -influx -bianca -subcommittee -##rane -spilling -icao -estuary -##nock -delivers -iphone -##ulata -isa -mira -bohemian -dessert -##sbury -welcoming -proudly -slowing -##chs -musee -ascension -russ -##vian -waits -##psy -africans -exploit -##morphic -gov -eccentric -crab -peck -##ull -entrances -formidable -marketplace -groom -bolted -metabolism -patton -robbins -courier -payload -endure -##ifier -andes -refrigerator -##pr -ornate -##uca -ruthless -illegitimate -masonry -strasbourg -bikes -adobe -##³ -apples -quintet -willingly -niche -bakery -corpses -energetic -##cliffe -##sser -##ards -177 -centimeters -centro -fuscous -cretaceous -rancho -##yde -andrei -telecom -tottenham -oasis -ordination -vulnerability -presiding -corey -cp -penguins -sims -##pis -malawi -piss -##48 -correction -##cked -##ffle -##ryn -countdown -detectives -psychiatrist -psychedelic -dinosaurs -blouse -##get -choi -vowed -##oz -randomly -##pol -49ers -scrub -blanche -bruins -dusseldorf -##using -unwanted -##ums -212 -dominique -elevations -headlights -om -laguna -##oga -1750 -famously -ignorance -shrewsbury -##aine -ajax -breuning -che -confederacy -greco -overhaul -##screen -paz -skirts -disagreement -cruelty -jagged -phoebe -shifter -hovered -viruses -##wes -mandy -##lined -##gc -landlord -squirrel -dashed -##ι -ornamental -gag -wally -grange -literal -spurs -undisclosed -proceeding -yin -##text -billie -orphan -spanned -humidity -indy -weighted -presentations -explosions -lucian -##tary -vaughn -hindus -##anga -##hell -psycho -171 -daytona -protects -efficiently -rematch -sly -tandem -##oya -rebranded -impaired -hee -metropolis -peach -godfrey -diaspora -ethnicity -prosperous -gleaming -dar -grossing -playback -##rden -stripe -pistols -##tain -births -labelled -##cating -172 -rudy -alba -##onne -aquarium -hostility -##gb -##tase -shudder -sumatra -hardest -lakers -consonant -creeping -demos -homicide -capsule -zeke -liberties -expulsion -pueblo -##comb -trait -transporting -##ddin -##neck -##yna -depart -gregg -mold -ledge -hangar -oldham -playboy -termination -analysts -gmbh -romero -##itic -insist -cradle -filthy -brightness -slash -shootout -deposed -bordering -##truct -isis -microwave -tumbled -sheltered -cathy -werewolves -messy -andersen -convex -clapped -clinched -satire -wasting -edo -vc -rufus -##jak -mont -##etti -poznan -##keeping -restructuring -transverse -##rland -azerbaijani -slovene -gestures -roommate -choking -shear -##quist -vanguard -oblivious -##hiro -disagreed -baptism -##lich -coliseum -##aceae -salvage -societe -cory -locke -relocation -relying -versailles -ahl -swelling -##elo -cheerful -##word -##edes -gin -sarajevo -obstacle -diverted -##nac -messed -thoroughbred -fluttered -utrecht -chewed -acquaintance -assassins -dispatch -mirza -##wart -nike -salzburg -swell -yen -##gee -idle -ligue -samson -##nds -##igh -playful -spawned -##cise -tease -##case -burgundy -##bot -stirring -skeptical -interceptions -marathi -##dies -bedrooms -aroused -pinch -##lik -preferences -tattoos -buster -digitally -projecting -rust -##ital -kitten -priorities -addison -pseudo -##guard -dusk -icons -sermon -##psis -##iba -bt -##lift -##xt -ju -truce -rink -##dah -##wy -defects -psychiatry -offences -calculate -glucose -##iful -##rized -##unda -francaise -##hari -richest -warwickshire -carly -1763 -purity -redemption -lending -##cious -muse -bruises -cerebral -aero -carving -##name -preface -terminology -invade -monty -##int -anarchist -blurred -##iled -rossi -treats -guts -shu -foothills -ballads -undertaking -premise -cecilia -affiliates -blasted -conditional -wilder -minors -drone -rudolph -buffy -swallowing -horton -attested -##hop -rutherford -howell -primetime -livery -penal -##bis -minimize -hydro -wrecked -wrought -palazzo -##gling -cans -vernacular -friedman -nobleman -shale -walnut -danielle -##ection -##tley -sears -##kumar -chords -lend -flipping -streamed -por -dracula -gallons -sacrifices -gamble -orphanage -##iman -mckenzie -##gible -boxers -daly -##balls -##ان -208 -##ific -##rative -##iq -exploited -slated -##uity -circling -hillary -pinched -goldberg -provost -campaigning -lim -piles -ironically -jong -mohan -successors -usaf -##tem -##ught -autobiographical -haute -preserves -##ending -acquitted -comparisons -203 -hydroelectric -gangs -cypriot -torpedoes -rushes -chrome -derive -bumps -instability -fiat -pets -##mbe -silas -dye -reckless -settler -##itation -info -heats -##writing -176 -canonical -maltese -fins -mushroom -stacy -aspen -avid -##kur -##loading -vickers -gaston -hillside -statutes -wilde -gail -kung -sabine -comfortably -motorcycles -##rgo -169 -pneumonia -fetch -##sonic -axel -faintly -parallels -##oop -mclaren -spouse -compton -interdisciplinary -miner -##eni -181 -clamped -##chal -##llah -separates -versa -##mler -scarborough -labrador -##lity -##osing -rutgers -hurdles -como -166 -burt -divers -##100 -wichita -cade -coincided -##erson -bruised -mla -##pper -vineyard -##ili -##brush -notch -mentioning -jase -hearted -kits -doe -##acle -pomerania -##ady -ronan -seizure -pavel -problematic -##zaki -domenico -##ulin -catering -penelope -dependence -parental -emilio -ministerial -atkinson -##bolic -clarkson -chargers -colby -grill -peeked -arises -summon -##aged -fools -##grapher -faculties -qaeda -##vial -garner -refurbished -##hwa -geelong -disasters -nudged -bs -shareholder -lori -algae -reinstated -rot -##ades -##nous -invites -stainless -183 -inclusive -##itude -diocesan -til -##icz -denomination -##xa -benton -floral -registers -##ider -##erman -##kell -absurd -brunei -guangzhou -hitter -retaliation -##uled -##eve -blanc -nh -consistency -contamination -##eres -##rner -dire -palermo -broadcasters -diaries -inspire -vols -brewer -tightening -ky -mixtape -hormone -##tok -stokes -##color -##dly -##ssi -pg -##ometer -##lington -sanitation -##tility -intercontinental -apps -##adt -¹⁄₂ -cylinders -economies -favourable -unison -croix -gertrude -odyssey -vanity -dangling -##logists -upgrades -dice -middleweight -practitioner -##ight -206 -henrik -parlor -orion -angered -lac -python -blurted -##rri -sensual -intends -swings -angled -##phs -husky -attain -peerage -precinct -textiles -cheltenham -shuffled -dai -confess -tasting -bhutan -##riation -tyrone -segregation -abrupt -ruiz -##rish -smirked -blackwell -confidential -browning -amounted -##put -vase -scarce -fabulous -raided -staple -guyana -unemployed -glider -shay -##tow -carmine -troll -intervene -squash -superstar -##uce -cylindrical -len -roadway -researched -handy -##rium -##jana -meta -lao -declares -##rring -##tadt -##elin -##kova -willem -shrubs -napoleonic -realms -skater -qi -volkswagen -##ł -tad -hara -archaeologist -awkwardly -eerie -##kind -wiley -##heimer -##24 -titus -organizers -cfl -crusaders -lama -usb -vent -enraged -thankful -occupants -maximilian -##gaard -possessing -textbooks -##oran -collaborator -quaker -##ulo -avalanche -mono -silky -straits -isaiah -mustang -surged -resolutions -potomac -descend -cl -kilograms -plato -strains -saturdays -##olin -bernstein -##ype -holstein -ponytail -##watch -belize -conversely -heroine -perpetual -##ylus -charcoal -piedmont -glee -negotiating -backdrop -prologue -##jah -##mmy -pasadena -climbs -ramos -sunni -##holm -##tner -##tri -anand -deficiency -hertfordshire -stout -##avi -aperture -orioles -##irs -doncaster -intrigued -bombed -coating -otis -##mat -cocktail -##jit -##eto -amir -arousal -sar -##proof -##act -##ories -dixie -pots -##bow -whereabouts -159 -##fted -drains -bullying -cottages -scripture -coherent -fore -poe -appetite -##uration -sampled -##ators -##dp -derrick -rotor -jays -peacock -installment -##rro -advisors -##coming -rodeo -scotch -##mot -##db -##fen -##vant -ensued -rodrigo -dictatorship -martyrs -twenties -##н -towed -incidence -marta -rainforest -sai -scaled -##cles -oceanic -qualifiers -symphonic -mcbride -dislike -generalized -aubrey -colonization -##iation -##lion -##ssing -disliked -lublin -salesman -##ulates -spherical -whatsoever -sweating -avalon -contention -punt -severity -alderman -atari -##dina -##grant -##rop -scarf -seville -vertices -annexation -fairfield -fascination -inspiring -launches -palatinate -regretted -##rca -feral -##iom -elk -nap -olsen -reddy -yong -##leader -##iae -garment -transports -feng -gracie -outrage -viceroy -insides -##esis -breakup -grady -organizer -softer -grimaced -222 -murals -galicia -arranging -vectors -##rsten -bas -##sb -##cens -sloan -##eka -bitten -ara -fender -nausea -bumped -kris -banquet -comrades -detector -persisted -##llan -adjustment -endowed -cinemas -##shot -sellers -##uman -peek -epa -kindly -neglect -simpsons -talon -mausoleum -runaway -hangul -lookout -##cic -rewards -coughed -acquainted -chloride -##ald -quicker -accordion -neolithic -##qa -artemis -coefficient -lenny -pandora -tx -##xed -ecstasy -litter -segunda -chairperson -gemma -hiss -rumor -vow -nasal -antioch -compensate -patiently -transformers -##eded -judo -morrow -penis -posthumous -philips -bandits -husbands -denote -flaming -##any -##phones -langley -yorker -1760 -walters -##uo -##kle -gubernatorial -fatty -samsung -leroy -outlaw -##nine -unpublished -poole -jakob -##ᵢ -##ₙ -crete -distorted -superiority -##dhi -intercept -crust -mig -claus -crashes -positioning -188 -stallion -301 -frontal -armistice -##estinal -elton -aj -encompassing -camel -commemorated -malaria -woodward -calf -cigar -penetrate -##oso -willard -##rno -##uche -illustrate -amusing -convergence -noteworthy -##lma -##rva -journeys -realise -manfred -##sable -410 -##vocation -hearings -fiance -##posed -educators -provoked -adjusting -##cturing -modular -stockton -paterson -vlad -rejects -electors -selena -maureen -##tres -uber -##rce -swirled -##num -proportions -nanny -pawn -naturalist -parma -apostles -awoke -ethel -wen -##bey -monsoon -overview -##inating -mccain -rendition -risky -adorned -##ih -equestrian -germain -nj -conspicuous -confirming -##yoshi -shivering -##imeter -milestone -rumours -flinched -bounds -smacked -token -##bei -lectured -automobiles -##shore -impacted -##iable -nouns -nero -##leaf -ismail -prostitute -trams -##lace -bridget -sud -stimulus -impressions -reins -revolves -##oud -##gned -giro -honeymoon -##swell -criterion -##sms -##uil -libyan -prefers -##osition -211 -preview -sucks -accusation -bursts -metaphor -diffusion -tolerate -faye -betting -cinematographer -liturgical -specials -bitterly -humboldt -##ckle -flux -rattled -##itzer -archaeologists -odor -authorised -marshes -discretion -##ов -alarmed -archaic -inverse -##leton -explorers -##pine -drummond -tsunami -woodlands -##minate -##tland -booklet -insanity -owning -insert -crafted -calculus -##tore -receivers -##bt -stung -##eca -##nched -prevailing -travellers -eyeing -lila -graphs -##borne -178 -julien -##won -morale -adaptive -therapist -erica -cw -libertarian -bowman -pitches -vita -##ional -crook -##ads -##entation -caledonia -mutiny -##sible -1840s -automation -##ß -flock -##pia -ironic -pathology -##imus -remarried -##22 -joker -withstand -energies -##att -shropshire -hostages -madeleine -tentatively -conflicting -mateo -recipes -euros -ol -mercenaries -nico -##ndon -albuquerque -augmented -mythical -bel -freud -##child -cough -##lica -365 -freddy -lillian -genetically -nuremberg -calder -209 -bonn -outdoors -paste -suns -urgency -vin -restraint -tyson -##cera -##selle -barrage -bethlehem -kahn -##par -mounts -nippon -barony -happier -ryu -makeshift -sheldon -blushed -castillo -barking -listener -taped -bethel -fluent -headlines -pornography -rum -disclosure -sighing -mace -doubling -gunther -manly -##plex -rt -interventions -physiological -forwards -emerges -##tooth -##gny -compliment -rib -recession -visibly -barge -faults -connector -exquisite -prefect -##rlin -patio -##cured -elevators -brandt -italics -pena -173 -wasp -satin -ea -botswana -graceful -respectable -##jima -##rter -##oic -franciscan -generates -##dl -alfredo -disgusting -##olate -##iously -sherwood -warns -cod -promo -cheryl -sino -##ة -##escu -twitch -##zhi -brownish -thom -ortiz -##dron -densely -##beat -carmel -reinforce -##bana -187 -anastasia -downhill -vertex -contaminated -remembrance -harmonic -homework -##sol -fiancee -gears -olds -angelica -loft -ramsay -quiz -colliery -sevens -##cape -autism -##hil -walkway -##boats -ruben -abnormal -ounce -khmer -##bbe -zachary -bedside -morphology -punching -##olar -sparrow -convinces -##35 -hewitt -queer -remastered -rods -mabel -solemn -notified -lyricist -symmetric -##xide -174 -encore -passports -wildcats -##uni -baja -##pac -mildly -##ease -bleed -commodity -mounds -glossy -orchestras -##omo -damian -prelude -ambitions -##vet -awhile -remotely -##aud -asserts -imply -##iques -distinctly -modelling -remedy -##dded -windshield -dani -xiao -##endra -audible -powerplant -1300 -invalid -elemental -acquisitions -##hala -immaculate -libby -plata -smuggling -ventilation -denoted -minh -##morphism -430 -differed -dion -kelley -lore -mocking -sabbath -spikes -hygiene -drown -runoff -stylized -tally -liberated -aux -interpreter -righteous -aba -siren -reaper -pearce -millie -##cier -##yra -gaius -##iso -captures -##ttering -dorm -claudio -##sic -benches -knighted -blackness -##ored -discount -fumble -oxidation -routed -##ς -novak -perpendicular -spoiled -fracture -splits -##urt -pads -topology -##cats -axes -fortunate -offenders -protestants -esteem -221 -broadband -convened -frankly -hound -prototypes -isil -facilitated -keel -##sher -sahara -awaited -bubba -orb -prosecutors -186 -hem -520 -##xing -relaxing -remnant -romney -sorted -slalom -stefano -ulrich -##active -exemption -folder -pauses -foliage -hitchcock -epithet -204 -criticisms -##aca -ballistic -brody -hinduism -chaotic -youths -equals -##pala -pts -thicker -analogous -capitalist -improvised -overseeing -sinatra -ascended -beverage -##tl -straightforward -##kon -curran -##west -bois -325 -induce -surveying -emperors -sax -unpopular -##kk -cartoonist -fused -##mble -unto -##yuki -localities -##cko -##ln -darlington -slain -academie -lobbying -sediment -puzzles -##grass -defiance -dickens -manifest -tongues -alumnus -arbor -coincide -184 -appalachian -mustafa -examiner -cabaret -traumatic -yves -bracelet -draining -heroin -magnum -baths -odessa -consonants -mitsubishi -##gua -kellan -vaudeville -##fr -joked -null -straps -probation -##ław -ceded -interfaces -##pas -##zawa -blinding -viet -224 -rothschild -museo -640 -huddersfield -##vr -tactic -##storm -brackets -dazed -incorrectly -##vu -reg -glazed -fearful -manifold -benefited -irony -##sun -stumbling -##rte -willingness -balkans -mei -wraps -##aba -injected -##lea -gu -syed -harmless -##hammer -bray -takeoff -poppy -timor -cardboard -astronaut -purdue -weeping -southbound -cursing -stalls -diagonal -##neer -lamar -bryce -comte -weekdays -harrington -##uba -negatively -##see -lays -grouping -##cken -##henko -affirmed -halle -modernist -##lai -hodges -smelling -aristocratic -baptized -dismiss -justification -oilers -##now -coupling -qin -snack -healer -##qing -gardener -layla -battled -formulated -stephenson -gravitational -##gill -##jun -1768 -granny -coordinating -suites -##cd -##ioned -monarchs -##cote -##hips -sep -blended -apr -barrister -deposition -fia -mina -policemen -paranoid -##pressed -churchyard -covert -crumpled -creep -abandoning -tr -transmit -conceal -barr -understands -readiness -spire -##cology -##enia -##erry -610 -startling -unlock -vida -bowled -slots -##nat -##islav -spaced -trusting -admire -rig -##ink -slack -##70 -mv -207 -casualty -##wei -classmates -##odes -##rar -##rked -amherst -furnished -evolve -foundry -menace -mead -##lein -flu -wesleyan -##kled -monterey -webber -##vos -wil -##mith -##на -bartholomew -justices -restrained -##cke -amenities -191 -mediated -sewage -trenches -ml -mainz -##thus -1800s -##cula -##inski -caine -bonding -213 -converts -spheres -superseded -marianne -crypt -sweaty -ensign -historia -##br -spruce -##post -##ask -forks -thoughtfully -yukon -pamphlet -ames -##uter -karma -##yya -bryn -negotiation -sighs -incapable -##mbre -##ntial -actresses -taft -##mill -luce -prevailed -##amine -1773 -motionless -envoy -testify -investing -sculpted -instructors -provence -kali -cullen -horseback -##while -goodwin -##jos -gaa -norte -##ldon -modify -wavelength -abd -214 -skinned -sprinter -forecast -scheduling -marries -squared -tentative -##chman -boer -##isch -bolts -swap -fisherman -assyrian -impatiently -guthrie -martins -murdoch -194 -tanya -nicely -dolly -lacy -med -##45 -syn -decks -fashionable -millionaire -##ust -surfing -##ml -##ision -heaved -tammy -consulate -attendees -routinely -197 -fuse -saxophonist -backseat -malaya -##lord -scowl -tau -##ishly -193 -sighted -steaming -##rks -303 -911 -##holes -##hong -ching -##wife -bless -conserved -jurassic -stacey -unix -zion -chunk -rigorous -blaine -198 -peabody -slayer -dismay -brewers -nz -##jer -det -##glia -glover -postwar -int -penetration -sylvester -imitation -vertically -airlift -heiress -knoxville -viva -##uin -390 -macon -##rim -##fighter -##gonal -janice -##orescence -##wari -marius -belongings -leicestershire -196 -blanco -inverted -preseason -sanity -sobbing -##due -##elt -##dled -collingwood -regeneration -flickering -shortest -##mount -##osi -feminism -##lat -sherlock -cabinets -fumbled -northbound -precedent -snaps -##mme -researching -##akes -guillaume -insights -manipulated -vapor -neighbour -sap -gangster -frey -f1 -stalking -scarcely -callie -barnett -tendencies -audi -doomed -assessing -slung -panchayat -ambiguous -bartlett -##etto -distributing -violating -wolverhampton -##hetic -swami -histoire -##urus -liable -pounder -groin -hussain -larsen -popping -surprises -##atter -vie -curt -##station -mute -relocate -musicals -authorization -richter -##sef -immortality -tna -bombings -##press -deteriorated -yiddish -##acious -robbed -colchester -cs -pmid -ao -verified -balancing -apostle -swayed -recognizable -oxfordshire -retention -nottinghamshire -contender -judd -invitational -shrimp -uhf -##icient -cleaner -longitudinal -tanker -##mur -acronym -broker -koppen -sundance -suppliers -##gil -4000 -clipped -fuels -petite -##anne -landslide -helene -diversion -populous -landowners -auspices -melville -quantitative -##xes -ferries -nicky -##llus -doo -haunting -roche -carver -downed -unavailable -##pathy -approximation -hiroshima -##hue -garfield -valle -comparatively -keyboardist -traveler -##eit -congestion -calculating -subsidiaries -##bate -serb -modernization -fairies -deepened -ville -averages -##lore -inflammatory -tonga -##itch -co₂ -squads -##hea -gigantic -serum -enjoyment -retailer -verona -35th -cis -##phobic -magna -technicians -##vati -arithmetic -##sport -levin -##dation -amtrak -chow -sienna -##eyer -backstage -entrepreneurship -##otic -learnt -tao -##udy -worcestershire -formulation -baggage -hesitant -bali -sabotage -##kari -barren -enhancing -murmur -pl -freshly -putnam -syntax -aces -medicines -resentment -bandwidth -##sier -grins -chili -guido -##sei -framing -implying -gareth -lissa -genevieve -pertaining -admissions -geo -thorpe -proliferation -sato -bela -analyzing -parting -##gor -awakened -##isman -huddled -secrecy -##kling -hush -gentry -540 -dungeons -##ego -coasts -##utz -sacrificed -##chule -landowner -mutually -prevalence -programmer -adolescent -disrupted -seaside -gee -trusts -vamp -georgie -##nesian -##iol -schedules -sindh -##market -etched -hm -sparse -bey -beaux -scratching -gliding -unidentified -216 -collaborating -gems -jesuits -oro -accumulation -shaping -mbe -anal -##xin -231 -enthusiasts -newscast -##egan -janata -dewey -parkinson -179 -ankara -biennial -towering -dd -inconsistent -950 -##chet -thriving -terminate -cabins -furiously -eats -advocating -donkey -marley -muster -phyllis -leiden -##user -grassland -glittering -iucn -loneliness -217 -memorandum -armenians -##ddle -popularized -rhodesia -60s -lame -##illon -sans -bikini -header -orbits -##xx -##finger -##ulator -sharif -spines -biotechnology -strolled -naughty -yates -##wire -fremantle -milo -##mour -abducted -removes -##atin -humming -wonderland -##chrome -##ester -hume -pivotal -##rates -armand -grams -believers -elector -rte -apron -bis -scraped -##yria -endorsement -initials -##llation -eps -dotted -hints -buzzing -emigration -nearer -##tom -indicators -##ulu -coarse -neutron -protectorate -##uze -directional -exploits -pains -loire -1830s -proponents -guggenheim -rabbits -ritchie -305 -hectare -inputs -hutton -##raz -verify -##ako -boilers -longitude -##lev -skeletal -yer -emilia -citrus -compromised -##gau -pokemon -prescription -paragraph -eduard -cadillac -attire -categorized -kenyan -weddings -charley -##bourg -entertain -monmouth -##lles -nutrients -davey -mesh -incentive -practised -ecosystems -kemp -subdued -overheard -##rya -bodily -maxim -##nius -apprenticeship -ursula -##fight -lodged -rug -silesian -unconstitutional -patel -inspected -coyote -unbeaten -##hak -34th -disruption -convict -parcel -##cl -##nham -collier -implicated -mallory -##iac -##lab -susannah -winkler -##rber -shia -phelps -sediments -graphical -robotic -##sner -adulthood -mart -smoked -##isto -kathryn -clarified -##aran -divides -convictions -oppression -pausing -burying -##mt -federico -mathias -eileen -##tana -kite -hunched -##acies -189 -##atz -disadvantage -liza -kinetic -greedy -paradox -yokohama -dowager -trunks -ventured -##gement -gupta -vilnius -olaf -##thest -crimean -hopper -##ej -progressively -arturo -mouthed -arrondissement -##fusion -rubin -simulcast -oceania -##orum -##stra -##rred -busiest -intensely -navigator -cary -##vine -##hini -##bies -fife -rowe -rowland -posing -insurgents -shafts -lawsuits -activate -conor -inward -culturally -garlic -265 -##eering -eclectic -##hui -##kee -##nl -furrowed -vargas -meteorological -rendezvous -##aus -culinary -commencement -##dition -quota -##notes -mommy -salaries -overlapping -mule -##iology -##mology -sums -wentworth -##isk -##zione -mainline -subgroup -##illy -hack -plaintiff -verdi -bulb -differentiation -engagements -multinational -supplemented -bertrand -caller -regis -##naire -##sler -##arts -##imated -blossom -propagation -kilometer -viaduct -vineyards -##uate -beckett -optimization -golfer -songwriters -seminal -semitic -thud -volatile -evolving -ridley -##wley -trivial -distributions -scandinavia -jiang -##ject -wrestled -insistence -##dio -emphasizes -napkin -##ods -adjunct -rhyme -##ricted -##eti -hopeless -surrounds -tremble -32nd -smoky -##ntly -oils -medicinal -padded -steer -wilkes -219 -255 -concessions -hue -uniquely -blinded -landon -yahoo -##lane -hendrix -commemorating -dex -specify -chicks -##ggio -intercity -1400 -morley -##torm -highlighting -##oting -pang -oblique -stalled -##liner -flirting -newborn -1769 -bishopric -shaved -232 -currie -##ush -dharma -spartan -##ooped -favorites -smug -novella -sirens -abusive -creations -espana -##lage -paradigm -semiconductor -sheen -##rdo -##yen -##zak -nrl -renew -##pose -##tur -adjutant -marches -norma -##enity -ineffective -weimar -grunt -##gat -lordship -plotting -expenditure -infringement -lbs -refrain -av -mimi -mistakenly -postmaster -1771 -##bara -ras -motorsports -tito -199 -subjective -##zza -bully -stew -##kaya -prescott -1a -##raphic -##zam -bids -styling -paranormal -reeve -sneaking -exploding -katz -akbar -migrant -syllables -indefinitely -##ogical -destroys -replaces -applause -##phine -pest -##fide -218 -articulated -bertie -##thing -##cars -##ptic -courtroom -crowley -aesthetics -cummings -tehsil -hormones -titanic -dangerously -##ibe -stadion -jaenelle -auguste -ciudad -##chu -mysore -partisans -##sio -lucan -philipp -##aly -debating -henley -interiors -##rano -##tious -homecoming -beyonce -usher -henrietta -prepares -weeds -##oman -ely -plucked -##pire -##dable -luxurious -##aq -artifact -password -pasture -juno -maddy -minsk -##dder -##ologies -##rone -assessments -martian -royalist -1765 -examines -##mani -##rge -nino -223 -parry -scooped -relativity -##eli -##uting -##cao -congregational -noisy -traverse -##agawa -strikeouts -nickelodeon -obituary -transylvania -binds -depictions -polk -trolley -##yed -##lard -breeders -##under -dryly -hokkaido -1762 -strengths -stacks -bonaparte -connectivity -neared -prostitutes -stamped -anaheim -gutierrez -sinai -##zzling -bram -fresno -madhya -##86 -proton -##lena -##llum -##phon -reelected -wanda -##anus -##lb -ample -distinguishing -##yler -grasping -sermons -tomato -bland -stimulation -avenues -##eux -spreads -scarlett -fern -pentagon -assert -baird -chesapeake -ir -calmed -distortion -fatalities -##olis -correctional -pricing -##astic -##gina -prom -dammit -ying -collaborate -##chia -welterweight -33rd -pointer -substitution -bonded -umpire -communicating -multitude -paddle -##obe -federally -intimacy -##insky -betray -ssr -##lett -##lean -##lves -##therapy -airbus -##tery -functioned -ud -bearer -biomedical -netflix -##hire -##nca -condom -brink -ik -##nical -macy -##bet -flap -gma -experimented -jelly -lavender -##icles -##ulia -munro -##mian -##tial -rye -##rle -60th -gigs -hottest -rotated -predictions -fuji -bu -##erence -##omi -barangay -##fulness -##sas -clocks -##rwood -##liness -cereal -roe -wight -decker -uttered -babu -onion -xml -forcibly -##df -petra -sarcasm -hartley -peeled -storytelling -##42 -##xley -##ysis -##ffa -fibre -kiel -auditor -fig -harald -greenville -##berries -geographically -nell -quartz -##athic -cemeteries -##lr -crossings -nah -holloway -reptiles -chun -sichuan -snowy -660 -corrections -##ivo -zheng -ambassadors -blacksmith -fielded -fluids -hardcover -turnover -medications -melvin -academies -##erton -ro -roach -absorbing -spaniards -colton -##founded -outsider -espionage -kelsey -245 -edible -##ulf -dora -establishes -##sham -##tries -contracting -##tania -cinematic -costello -nesting -##uron -connolly -duff -##nology -mma -##mata -fergus -sexes -gi -optics -spectator -woodstock -banning -##hee -##fle -differentiate -outfielder -refinery -226 -312 -gerhard -horde -lair -drastically -##udi -landfall -##cheng -motorsport -odi -##achi -predominant -quay -skins -##ental -edna -harshly -complementary -murdering -##aves -wreckage -##90 -ono -outstretched -lennox -munitions -galen -reconcile -470 -scalp -bicycles -gillespie -questionable -rosenberg -guillermo -hostel -jarvis -kabul -volvo -opium -yd -##twined -abuses -decca -outpost -##cino -sensible -neutrality -##64 -ponce -anchorage -atkins -turrets -inadvertently -disagree -libre -vodka -reassuring -weighs -##yal -glide -jumper -ceilings -repertory -outs -stain -##bial -envy -##ucible -smashing -heightened -policing -hyun -mixes -lai -prima -##ples -celeste -##bina -lucrative -intervened -kc -manually -##rned -stature -staffed -bun -bastards -nairobi -priced -##auer -thatcher -##kia -tripped -comune -##ogan -##pled -brasil -incentives -emanuel -hereford -musica -##kim -benedictine -biennale -##lani -eureka -gardiner -rb -knocks -sha -##ael -##elled -##onate -efficacy -ventura -masonic -sanford -maize -leverage -##feit -capacities -santana -##aur -novelty -vanilla -##cter -##tour -benin -##oir -##rain -neptune -drafting -tallinn -##cable -humiliation -##boarding -schleswig -fabian -bernardo -liturgy -spectacle -sweeney -pont -routledge -##tment -cosmos -ut -hilt -sleek -universally -##eville -##gawa -typed -##dry -favors -allegheny -glaciers -##rly -recalling -aziz -##log -parasite -requiem -auf -##berto -##llin -illumination -##breaker -##issa -festivities -bows -govern -vibe -vp -333 -sprawled -larson -pilgrim -bwf -leaping -##rts -##ssel -alexei -greyhound -hoarse -##dler -##oration -seneca -##cule -gaping -##ulously -##pura -cinnamon -##gens -##rricular -craven -fantasies -houghton -engined -reigned -dictator -supervising -##oris -bogota -commentaries -unnatural -fingernails -spirituality -tighten -##tm -canadiens -protesting -intentional -cheers -sparta -##ytic -##iere -##zine -widen -belgarath -controllers -dodd -iaaf -navarre -##ication -defect -squire -steiner -whisky -##mins -560 -inevitably -tome -##gold -chew -##uid -##lid -elastic -##aby -streaked -alliances -jailed -regal -##ined -##phy -czechoslovak -narration -absently -##uld -bluegrass -guangdong -quran -criticizing -hose -hari -##liest -##owa -skier -streaks -deploy -##lom -raft -bose -dialed -huff -##eira -haifa -simplest -bursting -endings -ib -sultanate -##titled -franks -whitman -ensures -sven -##ggs -collaborators -forster -organising -ui -banished -napier -injustice -teller -layered -thump -##otti -roc -battleships -evidenced -fugitive -sadie -robotics -##roud -equatorial -geologist -##iza -yielding -##bron -##sr -internationale -mecca -##diment -sbs -skyline -toad -uploaded -reflective -undrafted -lal -leafs -bayern -##dai -lakshmi -shortlisted -##stick -##wicz -camouflage -donate -af -christi -lau -##acio -disclosed -nemesis -1761 -assemble -straining -northamptonshire -tal -##asi -bernardino -premature -heidi -42nd -coefficients -galactic -reproduce -buzzed -sensations -zionist -monsieur -myrtle -##eme -archery -strangled -musically -viewpoint -antiquities -bei -trailers -seahawks -cured -pee -preferring -tasmanian -lange -sul -##mail -##working -colder -overland -lucivar -massey -gatherings -haitian -##smith -disapproval -flaws -##cco -##enbach -1766 -npr -##icular -boroughs -creole -forums -techno -1755 -dent -abdominal -streetcar -##eson -##stream -procurement -gemini -predictable -##tya -acheron -christoph -feeder -fronts -vendor -bernhard -jammu -tumors -slang -##uber -goaltender -twists -curving -manson -vuelta -mer -peanut -confessions -pouch -unpredictable -allowance -theodor -vascular -##factory -bala -authenticity -metabolic -coughing -nanjing -##cea -pembroke -##bard -splendid -36th -ff -hourly -##ahu -elmer -handel -##ivate -awarding -thrusting -dl -experimentation -##hesion -##46 -caressed -entertained -steak -##rangle -biologist -orphans -baroness -oyster -stepfather -##dridge -mirage -reefs -speeding -##31 -barons -1764 -227 -inhabit -preached -repealed -##tral -honoring -boogie -captives -administer -johanna -##imate -gel -suspiciously -1767 -sobs -##dington -backbone -hayward -garry -##folding -##nesia -maxi -##oof -##ppe -ellison -galileo -##stand -crimea -frenzy -amour -bumper -matrices -natalia -baking -garth -palestinians -##grove -smack -conveyed -ensembles -gardening -##manship -##rup -##stituting -1640 -harvesting -topography -jing -shifters -dormitory -##carriage -##lston -ist -skulls -##stadt -dolores -jewellery -sarawak -##wai -##zier -fences -christy -confinement -tumbling -credibility -fir -stench -##bria -##plication -##nged -##sam -virtues -##belt -marjorie -pba -##eem -##made -celebrates -schooner -agitated -barley -fulfilling -anthropologist -##pro -restrict -novi -regulating -##nent -padres -##rani -##hesive -loyola -tabitha -milky -olson -proprietor -crambidae -guarantees -intercollegiate -ljubljana -hilda -##sko -ignorant -hooded -##lts -sardinia -##lidae -##vation -frontman -privileged -witchcraft -##gp -jammed -laude -poking -##than -bracket -amazement -yunnan -##erus -maharaja -linnaeus -264 -commissioning -milano -peacefully -##logies -akira -rani -regulator -##36 -grasses -##rance -luzon -crows -compiler -gretchen -seaman -edouard -tab -buccaneers -ellington -hamlets -whig -socialists -##anto -directorial -easton -mythological -##kr -##vary -rhineland -semantic -taut -dune -inventions -succeeds -##iter -replication -branched -##pired -jul -prosecuted -kangaroo -penetrated -##avian -middlesbrough -doses -bleak -madam -predatory -relentless -##vili -reluctance -##vir -hailey -crore -silvery -1759 -monstrous -swimmers -transmissions -hawthorn -informing -##eral -toilets -caracas -crouch -kb -##sett -295 -cartel -hadley -##aling -alexia -yvonne -##biology -cinderella -eton -superb -blizzard -stabbing -industrialist -maximus -##gm -##orus -groves -maud -clade -oversized -comedic -##bella -rosen -nomadic -fulham -montane -beverages -galaxies -redundant -swarm -##rot -##folia -##llis -buckinghamshire -fen -bearings -bahadur -##rom -gilles -phased -dynamite -faber -benoit -vip -##ount -##wd -booking -fractured -tailored -anya -spices -westwood -cairns -auditions -inflammation -steamed -##rocity -##acion -##urne -skyla -thereof -watford -torment -archdeacon -transforms -lulu -demeanor -fucked -serge -##sor -mckenna -minas -entertainer -##icide -caress -originate -residue -##sty -1740 -##ilised -##org -beech -##wana -subsidies -##ghton -emptied -gladstone -ru -firefighters -voodoo -##rcle -het -nightingale -tamara -edmond -ingredient -weaknesses -silhouette -285 -compatibility -withdrawing -hampson -##mona -anguish -giggling -##mber -bookstore -##jiang -southernmost -tilting -##vance -bai -economical -rf -briefcase -dreadful -hinted -projections -shattering -totaling -##rogate -analogue -indicted -periodical -fullback -##dman -haynes -##tenberg -##ffs -##ishment -1745 -thirst -stumble -penang -vigorous -##ddling -##kor -##lium -octave -##ove -##enstein -##inen -##ones -siberian -##uti -cbn -repeal -swaying -##vington -khalid -tanaka -unicorn -otago -plastered -lobe -riddle -##rella -perch -##ishing -croydon -filtered -graeme -tripoli -##ossa -crocodile -##chers -sufi -mined -##tung -inferno -lsu -##phi -swelled -utilizes -£2 -cale -periodicals -styx -hike -informally -coop -lund -##tidae -ala -hen -qui -transformations -disposed -sheath -chickens -##cade -fitzroy -sas -silesia -unacceptable -odisha -1650 -sabrina -pe -spokane -ratios -athena -massage -shen -dilemma -##drum -##riz -##hul -corona -doubtful -niall -##pha -##bino -fines -cite -acknowledging -bangor -ballard -bathurst -##resh -huron -mustered -alzheimer -garments -kinase -tyre -warship -##cp -flashback -pulmonary -braun -cheat -kamal -cyclists -constructions -grenades -ndp -traveller -excuses -stomped -signalling -trimmed -futsal -mosques -relevance -##wine -wta -##23 -##vah -##lter -hoc -##riding -optimistic -##´s -deco -sim -interacting -rejecting -moniker -waterways -##ieri -##oku -mayors -gdansk -outnumbered -pearls -##ended -##hampton -fairs -totals -dominating -262 -notions -stairway -compiling -pursed -commodities -grease -yeast -##jong -carthage -griffiths -residual -amc -contraction -laird -sapphire -##marine -##ivated -amalgamation -dissolve -inclination -lyle -packaged -altitudes -suez -canons -graded -lurched -narrowing -boasts -guise -wed -enrico -##ovsky -rower -scarred -bree -cub -iberian -protagonists -bargaining -proposing -trainers -voyages -vans -fishes -##aea -##ivist -##verance -encryption -artworks -kazan -sabre -cleopatra -hepburn -rotting -supremacy -mecklenburg -##brate -burrows -hazards -outgoing -flair -organizes -##ctions -scorpion -##usions -boo -234 -chevalier -dunedin -slapping -##34 -ineligible -pensions -##38 -##omic -manufactures -emails -bismarck -238 -weakening -blackish -ding -mcgee -quo -##rling -northernmost -xx -manpower -greed -sampson -clicking -##ange -##horpe -##inations -##roving -torre -##eptive -##moral -symbolism -38th -asshole -meritorious -outfits -splashed -biographies -sprung -astros -##tale -302 -737 -filly -raoul -nw -tokugawa -linden -clubhouse -##apa -tracts -romano -##pio -putin -tags -##note -chained -dickson -gunshot -moe -gunn -rashid -##tails -zipper -##bas -##nea -contrasted -##ply -##udes -plum -pharaoh -##pile -aw -comedies -ingrid -sandwiches -subdivisions -1100 -mariana -nokia -kamen -hz -delaney -veto -herring -##words -possessive -outlines -##roup -siemens -stairwell -rc -gallantry -messiah -palais -yells -233 -zeppelin -##dm -bolivar -##cede -smackdown -mckinley -##mora -##yt -muted -geologic -finely -unitary -avatar -hamas -maynard -rees -bog -contrasting -##rut -liv -chico -disposition -pixel -##erate -becca -dmitry -yeshiva -narratives -##lva -##ulton -mercenary -sharpe -tempered -navigate -stealth -amassed -keynes -##lini -untouched -##rrie -havoc -lithium -##fighting -abyss -graf -southward -wolverine -balloons -implements -ngos -transitions -##icum -ambushed -concacaf -dormant -economists -##dim -costing -csi -rana -universite -boulders -verity -##llon -collin -mellon -misses -cypress -fluorescent -lifeless -spence -##ulla -crewe -shepard -pak -revelations -##م -jolly -gibbons -paw -##dro -##quel -freeing -##test -shack -fries -palatine -##51 -##hiko -accompaniment -cruising -recycled -##aver -erwin -sorting -synthesizers -dyke -realities -sg -strides -enslaved -wetland -##ghan -competence -gunpowder -grassy -maroon -reactors -objection -##oms -carlson -gearbox -macintosh -radios -shelton -##sho -clergyman -prakash -254 -mongols -trophies -oricon -228 -stimuli -twenty20 -cantonese -cortes -mirrored -##saurus -bhp -cristina -melancholy -##lating -enjoyable -nuevo -##wny -downfall -schumacher -##ind -banging -lausanne -rumbled -paramilitary -reflex -ax -amplitude -migratory -##gall -##ups -midi -barnard -lastly -sherry -##hp -##nall -keystone -##kra -carleton -slippery -##53 -coloring -foe -socket -otter -##rgos -mats -##tose -consultants -bafta -bison -topping -##km -490 -primal -abandonment -transplant -atoll -hideous -mort -pained -reproduced -tae -howling -##turn -unlawful -billionaire -hotter -poised -lansing -##chang -dinamo -retro -messing -nfc -domesday -##mina -blitz -timed -##athing -##kley -ascending -gesturing -##izations -signaled -tis -chinatown -mermaid -savanna -jameson -##aint -catalina -##pet -##hers -cochrane -cy -chatting -##kus -alerted -computation -mused -noelle -majestic -mohawk -campo -octagonal -##sant -##hend -241 -aspiring -##mart -comprehend -iona -paralyzed -shimmering -swindon -rhone -##eley -reputed -configurations -pitchfork -agitation -francais -gillian -lipstick -##ilo -outsiders -pontifical -resisting -bitterness -sewer -rockies -##edd -##ucher -misleading -1756 -exiting -galloway -##nging -risked -##heart -246 -commemoration -schultz -##rka -integrating -##rsa -poses -shrieked -##weiler -guineas -gladys -jerking -owls -goldsmith -nightly -penetrating -##unced -lia -##33 -ignited -betsy -##aring -##thorpe -follower -vigorously -##rave -coded -kiran -knit -zoology -tbilisi -##28 -##bered -repository -govt -deciduous -dino -growling -##bba -enhancement -unleashed -chanting -pussy -biochemistry -##eric -kettle -repression -toxicity -nrhp -##arth -##kko -##bush -ernesto -commended -outspoken -242 -mca -parchment -sms -kristen -##aton -bisexual -raked -glamour -navajo -a2 -conditioned -showcased -##hma -spacious -youthful -##esa -usl -appliances -junta -brest -layne -conglomerate -enchanted -chao -loosened -picasso -circulating -inspect -montevideo -##centric -##kti -piazza -spurred -##aith -bari -freedoms -poultry -stamford -lieu -##ect -indigo -sarcastic -bahia -stump -attach -dvds -frankenstein -lille -approx -scriptures -pollen -##script -nmi -overseen -##ivism -tides -proponent -newmarket -inherit -milling -##erland -centralized -##rou -distributors -credentials -drawers -abbreviation -##lco -##xon -downing -uncomfortably -ripe -##oes -erase -franchises -##ever -populace -##bery -##khar -decomposition -pleas -##tet -daryl -sabah -##stle -##wide -fearless -genie -lesions -annette -##ogist -oboe -appendix -nair -dripped -petitioned -maclean -mosquito -parrot -rpg -hampered -1648 -operatic -reservoirs -##tham -irrelevant -jolt -summarized -##fp -medallion -##taff -##− -clawed -harlow -narrower -goddard -marcia -bodied -fremont -suarez -altering -tempest -mussolini -porn -##isms -sweetly -oversees -walkers -solitude -grimly -shrines -hk -ich -supervisors -hostess -dietrich -legitimacy -brushes -expressive -##yp -dissipated -##rse -localized -systemic -##nikov -gettysburg -##js -##uaries -dialogues -muttering -251 -housekeeper -sicilian -discouraged -##frey -beamed -kaladin -halftime -kidnap -##amo -##llet -1754 -synonymous -depleted -instituto -insulin -reprised -##opsis -clashed -##ctric -interrupting -radcliffe -insisting -medici -1715 -ejected -playfully -turbulent -##47 -starvation -##rini -shipment -rebellious -petersen -verification -merits -##rified -cakes -##charged -1757 -milford -shortages -spying -fidelity -##aker -emitted -storylines -harvested -seismic -##iform -cheung -kilda -theoretically -barbie -lynx -##rgy -##tius -goblin -mata -poisonous -##nburg -reactive -residues -obedience -##евич -conjecture -##rac -401 -hating -sixties -kicker -moaning -motown -##bha -emancipation -neoclassical -##hering -consoles -ebert -professorship -##tures -sustaining -assaults -obeyed -affluent -incurred -tornadoes -##eber -##zow -emphasizing -highlanders -cheated -helmets -##ctus -internship -terence -bony -executions -legislators -berries -peninsular -tinged -##aco -1689 -amplifier -corvette -ribbons -lavish -pennant -##lander -worthless -##chfield -##forms -mariano -pyrenees -expenditures -##icides -chesterfield -mandir -tailor -39th -sergey -nestled -willed -aristocracy -devotees -goodnight -raaf -rumored -weaponry -remy -appropriations -harcourt -burr -riaa -##lence -limitation -unnoticed -guo -soaking -swamps -##tica -collapsing -tatiana -descriptive -brigham -psalm -##chment -maddox -##lization -patti -caliph -##aja -akron -injuring -serra -##ganj -basins -##sari -astonished -launcher -##church -hilary -wilkins -sewing -##sf -stinging -##fia -##ncia -underwood -startup -##ition -compilations -vibrations -embankment -jurist -##nity -bard -juventus -groundwater -kern -palaces -helium -boca -cramped -marissa -soto -##worm -jae -princely -##ggy -faso -bazaar -warmly -##voking -229 -pairing -##lite -##grate -##nets -wien -freaked -ulysses -rebirth -##alia -##rent -mummy -guzman -jimenez -stilled -##nitz -trajectory -tha -woken -archival -professions -##pts -##pta -hilly -shadowy -shrink -##bolt -norwood -glued -migrate -stereotypes -devoid -##pheus -625 -evacuate -horrors -infancy -gotham -knowles -optic -downloaded -sachs -kingsley -parramatta -darryl -mor -##onale -shady -commence -confesses -kan -##meter -##placed -marlborough -roundabout -regents -frigates -io -##imating -gothenburg -revoked -carvings -clockwise -convertible -intruder -##sche -banged -##ogo -vicky -bourgeois -##mony -dupont -footing -##gum -pd -##real -buckle -yun -penthouse -sane -720 -serviced -stakeholders -neumann -bb -##eers -comb -##gam -catchment -pinning -rallies -typing -##elles -forefront -freiburg -sweetie -giacomo -widowed -goodwill -worshipped -aspirations -midday -##vat -fishery -##trick -bournemouth -turk -243 -hearth -ethanol -guadalajara -murmurs -sl -##uge -afforded -scripted -##hta -wah -##jn -coroner -translucent -252 -memorials -puck -progresses -clumsy -##race -315 -candace -recounted -##27 -##slin -##uve -filtering -##mac -howl -strata -heron -leveled -##ays -dubious -##oja -##т -##wheel -citations -exhibiting -##laya -##mics -##pods -turkic -##lberg -injunction -##ennial -##mit -antibodies -##44 -organise -##rigues -cardiovascular -cushion -inverness -##zquez -dia -cocoa -sibling -##tman -##roid -expanse -feasible -tunisian -algiers -##relli -rus -bloomberg -dso -westphalia -bro -tacoma -281 -downloads -##ours -konrad -duran -##hdi -continuum -jett -compares -legislator -secession -##nable -##gues -##zuka -translating -reacher -##gley -##ła -aleppo -##agi -tc -orchards -trapping -linguist -versatile -drumming -postage -calhoun -superiors -##mx -barefoot -leary -##cis -ignacio -alfa -kaplan -##rogen -bratislava -mori -##vot -disturb -haas -313 -cartridges -gilmore -radiated -salford -tunic -hades -##ulsive -archeological -delilah -magistrates -auditioned -brewster -charters -empowerment -blogs -cappella -dynasties -iroquois -whipping -##krishna -raceway -truths -myra -weaken -judah -mcgregor -##horse -mic -refueling -37th -burnley -bosses -markus -premio -query -##gga -dunbar -##economic -darkest -lyndon -sealing -commendation -reappeared -##mun -addicted -ezio -slaughtered -satisfactory -shuffle -##eves -##thic -##uj -fortification -warrington -##otto -resurrected -fargo -mane -##utable -##lei -##space -foreword -ox -##aris -##vern -abrams -hua -##mento -sakura -##alo -uv -sentimental -##skaya -midfield -##eses -sturdy -scrolls -macleod -##kyu -entropy -##lance -mitochondrial -cicero -excelled -thinner -convoys -perceive -##oslav -##urable -systematically -grind -burkina -287 -##tagram -ops -##aman -guantanamo -##cloth -##tite -forcefully -wavy -##jou -pointless -##linger -##tze -layton -portico -superficial -clerical -outlaws -##hism -burials -muir -##inn -creditors -hauling -rattle -##leg -calais -monde -archers -reclaimed -dwell -wexford -hellenic -falsely -remorse -##tek -dough -furnishings -##uttered -gabon -neurological -novice -##igraphy -contemplated -pulpit -nightstand -saratoga -##istan -documenting -pulsing -taluk -##firmed -busted -marital -##rien -disagreements -wasps -##yes -hodge -mcdonnell -mimic -fran -pendant -dhabi -musa -##nington -congratulations -argent -darrell -concussion -losers -regrets -thessaloniki -reversal -donaldson -hardwood -thence -achilles -ritter -##eran -demonic -jurgen -prophets -goethe -eki -classmate -buff -##cking -yank -irrational -##inging -perished -seductive -qur -sourced -##crat -##typic -mustard -ravine -barre -horizontally -characterization -phylogenetic -boise -##dit -##runner -##tower -brutally -intercourse -seduce -##bbing -fay -ferris -ogden -amar -nik -unarmed -##inator -evaluating -kyrgyzstan -sweetness -##lford -##oki -mccormick -meiji -notoriety -stimulate -disrupt -figuring -instructional -mcgrath -##zoo -groundbreaking -##lto -flinch -khorasan -agrarian -bengals -mixer -radiating -##sov -ingram -pitchers -nad -tariff -##cript -tata -##codes -##emi -##ungen -appellate -lehigh -##bled -##giri -brawl -duct -texans -##ciation -##ropolis -skipper -speculative -vomit -doctrines -stresses -253 -davy -graders -whitehead -jozef -timely -cumulative -haryana -paints -appropriately -boon -cactus -##ales -##pid -dow -legions -##pit -perceptions -1730 -picturesque -##yse -periphery -rune -wr -##aha -celtics -sentencing -whoa -##erin -confirms -variance -425 -moines -mathews -spade -rave -m1 -fronted -fx -blending -alleging -reared -##gl -237 -##paper -grassroots -eroded -##free -##physical -directs -ordeal -##sław -accelerate -hacker -rooftop -##inia -lev -buys -cebu -devote -##lce -specialising -##ulsion -choreographed -repetition -warehouses -##ryl -paisley -tuscany -analogy -sorcerer -hash -huts -shards -descends -exclude -nix -chaplin -gaga -ito -vane -##drich -causeway -misconduct -limo -orchestrated -glands -jana -##kot -u2 -##mple -##sons -branching -contrasts -scoop -longed -##virus -chattanooga -##75 -syrup -cornerstone -##tized -##mind -##iaceae -careless -precedence -frescoes -##uet -chilled -consult -modelled -snatch -peat -##thermal -caucasian -humane -relaxation -spins -temperance -##lbert -occupations -lambda -hybrids -moons -mp3 -##oese -247 -rolf -societal -yerevan -ness -##ssler -befriended -mechanized -nominate -trough -boasted -cues -seater -##hom -bends -##tangle -conductors -emptiness -##lmer -eurasian -adriatic -tian -##cie -anxiously -lark -propellers -chichester -jock -ev -2a -##holding -credible -recounts -tori -loyalist -abduction -##hoot -##redo -nepali -##mite -ventral -tempting -##ango -##crats -steered -##wice -javelin -dipping -laborers -prentice -looming -titanium -##ː -badges -emir -tensor -##ntation -egyptians -rash -denies -hawthorne -lombard -showers -wehrmacht -dietary -trojan -##reus -welles -executing -horseshoe -lifeboat -##lak -elsa -infirmary -nearing -roberta -boyer -mutter -trillion -joanne -##fine -##oked -sinks -vortex -uruguayan -clasp -sirius -##block -accelerator -prohibit -sunken -byu -chronological -diplomats -ochreous -510 -symmetrical -1644 -maia -##tology -salts -reigns -atrocities -##ия -hess -bared -issn -##vyn -cater -saturated -##cycle -##isse -sable -voyager -dyer -yusuf -##inge -fountains -wolff -##39 -##nni -engraving -rollins -atheist -ominous -##ault -herr -chariot -martina -strung -##fell -##farlane -horrific -sahib -gazes -saetan -erased -ptolemy -##olic -flushing -lauderdale -analytic -##ices -530 -navarro -beak -gorilla -herrera -broom -guadalupe -raiding -sykes -311 -bsc -deliveries -1720 -invasions -carmichael -tajikistan -thematic -ecumenical -sentiments -onstage -##rians -##brand -##sume -catastrophic -flanks -molten -##arns -waller -aimee -terminating -##icing -alternately -##oche -nehru -printers -outraged -##eving -empires -template -banners -repetitive -za -##oise -vegetarian -##tell -guiana -opt -cavendish -lucknow -synthesized -##hani -##mada -finalized -##ctable -fictitious -mayoral -unreliable -##enham -embracing -peppers -rbis -##chio -##neo -inhibition -slashed -togo -orderly -embroidered -safari -salty -236 -barron -benito -totaled -##dak -pubs -simulated -caden -devin -tolkien -momma -welding -sesame -##ept -gottingen -hardness -630 -shaman -temeraire -620 -adequately -pediatric -##kit -ck -assertion -radicals -composure -cadence -seafood -beaufort -lazarus -mani -warily -cunning -kurdistan -249 -cantata -##kir -ares -##41 -##clusive -nape -townland -geared -insulted -flutter -boating -violate -draper -dumping -malmo -##hh -##romatic -firearm -alta -bono -obscured -##clave -exceeds -panorama -unbelievable -##train -preschool -##essed -disconnected -installing -rescuing -secretaries -accessibility -##castle -##drive -##ifice -##film -bouts -slug -waterway -mindanao -##buro -##ratic -halves -##ل -calming -liter -maternity -adorable -bragg -electrification -mcc -##dote -roxy -schizophrenia -##body -munoz -kaye -whaling -239 -mil -tingling -tolerant -##ago -unconventional -volcanoes -##finder -deportivo -##llie -robson -kaufman -neuroscience -wai -deportation -masovian -scraping -converse -##bh -hacking -bulge -##oun -administratively -yao -580 -amp -mammoth -booster -claremont -hooper -nomenclature -pursuits -mclaughlin -melinda -##sul -catfish -barclay -substrates -taxa -zee -originals -kimberly -packets -padma -##ality -borrowing -ostensibly -solvent -##bri -##genesis -##mist -lukas -shreveport -veracruz -##ь -##lou -##wives -cheney -tt -anatolia -hobbs -##zyn -cyclic -radiant -alistair -greenish -siena -dat -independents -##bation -conform -pieter -hyper -applicant -bradshaw -spores -telangana -vinci -inexpensive -nuclei -322 -jang -nme -soho -spd -##ign -cradled -receptionist -pow -##43 -##rika -fascism -##ifer -experimenting -##ading -##iec -##region -345 -jocelyn -maris -stair -nocturnal -toro -constabulary -elgin -##kker -msc -##giving -##schen -##rase -doherty -doping -sarcastically -batter -maneuvers -##cano -##apple -##gai -##git -intrinsic -##nst -##stor -1753 -showtime -cafes -gasps -lviv -ushered -##thed -fours -restart -astonishment -transmitting -flyer -shrugs -##sau -intriguing -cones -dictated -mushrooms -medial -##kovsky -##elman -escorting -gaped -##26 -godfather -##door -##sell -djs -recaptured -timetable -vila -1710 -3a -aerodrome -mortals -scientology -##orne -angelina -mag -convection -unpaid -insertion -intermittent -lego -##nated -endeavor -kota -pereira -##lz -304 -bwv -glamorgan -insults -agatha -fey -##cend -fleetwood -mahogany -protruding -steamship -zeta -##arty -mcguire -suspense -##sphere -advising -urges -##wala -hurriedly -meteor -gilded -inline -arroyo -stalker -##oge -excitedly -revered -##cure -earle -introductory -##break -##ilde -mutants -puff -pulses -reinforcement -##haling -curses -lizards -stalk -correlated -##fixed -fallout -macquarie -##unas -bearded -denton -heaving -802 -##ocation -winery -assign -dortmund -##lkirk -everest -invariant -charismatic -susie -##elling -bled -lesley -telegram -sumner -bk -##ogen -##к -wilcox -needy -colbert -duval -##iferous -##mbled -allotted -attends -imperative -##hita -replacements -hawker -##inda -insurgency -##zee -##eke -casts -##yla -680 -ives -transitioned -##pack -##powering -authoritative -baylor -flex -cringed -plaintiffs -woodrow -##skie -drastic -ape -aroma -unfolded -commotion -nt -preoccupied -theta -routines -lasers -privatization -wand -domino -ek -clenching -nsa -strategically -showered -bile -handkerchief -pere -storing -christophe -insulting -316 -nakamura -romani -asiatic -magdalena -palma -cruises -stripping -405 -konstantin -soaring -##berman -colloquially -forerunner -havilland -incarcerated -parasites -sincerity -##utus -disks -plank -saigon -##ining -corbin -homo -ornaments -powerhouse -##tlement -chong -fastened -feasibility -idf -morphological -usable -##nish -##zuki -aqueduct -jaguars -keepers -##flies -aleksandr -faust -assigns -ewing -bacterium -hurled -tricky -hungarians -integers -wallis -321 -yamaha -##isha -hushed -oblivion -aviator -evangelist -friars -##eller -monograph -ode -##nary -airplanes -labourers -charms -##nee -1661 -hagen -tnt -rudder -fiesta -transcript -dorothea -ska -inhibitor -maccabi -retorted -raining -encompassed -clauses -menacing -1642 -lineman -##gist -vamps -##ape -##dick -gloom -##rera -dealings -easing -seekers -##nut -##pment -helens -unmanned -##anu -##isson -basics -##amy -##ckman -adjustments -1688 -brutality -horne -##zell -sui -##55 -##mable -aggregator -##thal -rhino -##drick -##vira -counters -zoom -##01 -##rting -mn -montenegrin -packard -##unciation -##♭ -##kki -reclaim -scholastic -thugs -pulsed -##icia -syriac -quan -saddam -banda -kobe -blaming -buddies -dissent -##lusion -##usia -corbett -jaya -delle -erratic -lexie -##hesis -435 -amiga -hermes -##pressing -##leen -chapels -gospels -jamal -##uating -compute -revolving -warp -##sso -##thes -armory -##eras -##gol -antrim -loki -##kow -##asian -##good -##zano -braid -handwriting -subdistrict -funky -pantheon -##iculate -concurrency -estimation -improper -juliana -##his -newcomers -johnstone -staten -communicated -##oco -##alle -sausage -stormy -##stered -##tters -superfamily -##grade -acidic -collateral -tabloid -##oped -##rza -bladder -austen -##ellant -mcgraw -##hay -hannibal -mein -aquino -lucifer -wo -badger -boar -cher -christensen -greenberg -interruption -##kken -jem -244 -mocked -bottoms -cambridgeshire -##lide -sprawling -##bbly -eastwood -ghent -synth -##buck -advisers -##bah -nominally -hapoel -qu -daggers -estranged -fabricated -towels -vinnie -wcw -misunderstanding -anglia -nothin -unmistakable -##dust -##lova -chilly -marquette -truss -##edge -##erine -reece -##lty -##chemist -##connected -272 -308 -41st -bash -raion -waterfalls -##ump -##main -labyrinth -queue -theorist -##istle -bharatiya -flexed -soundtracks -rooney -leftist -patrolling -wharton -plainly -alleviate -eastman -schuster -topographic -engages -immensely -unbearable -fairchild -1620 -dona -lurking -parisian -oliveira -ia -indictment -hahn -bangladeshi -##aster -vivo -##uming -##ential -antonia -expects -indoors -kildare -harlan -##logue -##ogenic -##sities -forgiven -##wat -childish -tavi -##mide -##orra -plausible -grimm -successively -scooted -##bola -##dget -##rith -spartans -emery -flatly -azure -epilogue -##wark -flourish -##iny -##tracted -##overs -##oshi -bestseller -distressed -receipt -spitting -hermit -topological -##cot -drilled -subunit -francs -##layer -eel -##fk -##itas -octopus -footprint -petitions -ufo -##say -##foil -interfering -leaking -palo -##metry -thistle -valiant -##pic -narayan -mcpherson -##fast -gonzales -##ym -##enne -dustin -novgorod -solos -##zman -doin -##raph -##patient -##meyer -soluble -ashland -cuffs -carole -pendleton -whistling -vassal -##river -deviation -revisited -constituents -rallied -rotate -loomed -##eil -##nting -amateurs -augsburg -auschwitz -crowns -skeletons -##cona -bonnet -257 -dummy -globalization -simeon -sleeper -mandal -differentiated -##crow -##mare -milne -bundled -exasperated -talmud -owes -segregated -##feng -##uary -dentist -piracy -props -##rang -devlin -##torium -malicious -paws -##laid -dependency -##ergy -##fers -##enna -258 -pistons -rourke -jed -grammatical -tres -maha -wig -512 -ghostly -jayne -##achal -##creen -##ilis -##lins -##rence -designate -##with -arrogance -cambodian -clones -showdown -throttle -twain -##ception -lobes -metz -nagoya -335 -braking -##furt -385 -roaming -##minster -amin -crippled -##37 -##llary -indifferent -hoffmann -idols -intimidating -1751 -261 -influenza -memo -onions -1748 -bandage -consciously -##landa -##rage -clandestine -observes -swiped -tangle -##ener -##jected -##trum -##bill -##lta -hugs -congresses -josiah -spirited -##dek -humanist -managerial -filmmaking -inmate -rhymes -debuting -grimsby -ur -##laze -duplicate -vigor -##tf -republished -bolshevik -refurbishment -antibiotics -martini -methane -newscasts -royale -horizons -levant -iain -visas -##ischen -paler -##around -manifestation -snuck -alf -chop -futile -pedestal -rehab -##kat -bmg -kerman -res -fairbanks -jarrett -abstraction -saharan -##zek -1746 -procedural -clearer -kincaid -sash -luciano -##ffey -crunch -helmut -##vara -revolutionaries -##tute -creamy -leach -##mmon -1747 -permitting -nes -plight -wendell -##lese -contra -ts -clancy -ipa -mach -staples -autopsy -disturbances -nueva -karin -pontiac -##uding -proxy -venerable -haunt -leto -bergman -expands -##helm -wal -##pipe -canning -celine -cords -obesity -##enary -intrusion -planner -##phate -reasoned -sequencing -307 -harrow -##chon -##dora -marred -mcintyre -repay -tarzan -darting -248 -harrisburg -margarita -repulsed -##hur -##lding -belinda -hamburger -novo -compliant -runways -bingham -registrar -skyscraper -ic -cuthbert -improvisation -livelihood -##corp -##elial -admiring -##dened -sporadic -believer -casablanca -popcorn -##29 -asha -shovel -##bek -##dice -coiled -tangible -##dez -casper -elsie -resin -tenderness -rectory -##ivision -avail -sonar -##mori -boutique -##dier -guerre -bathed -upbringing -vaulted -sandals -blessings -##naut -##utnant -1680 -306 -foxes -pia -corrosion -hesitantly -confederates -crystalline -footprints -shapiro -tirana -valentin -drones -45th -microscope -shipments -texted -inquisition -wry -guernsey -unauthorized -resigning -760 -ripple -schubert -stu -reassure -felony -##ardo -brittle -koreans -##havan -##ives -dun -implicit -tyres -##aldi -##lth -magnolia -##ehan -##puri -##poulos -aggressively -fei -gr -familiarity -##poo -indicative -##trust -fundamentally -jimmie -overrun -395 -anchors -moans -##opus -britannia -armagh -##ggle -purposely -seizing -##vao -bewildered -mundane -avoidance -cosmopolitan -geometridae -quartermaster -caf -415 -chatter -engulfed -gleam -purge -##icate -juliette -jurisprudence -guerra -revisions -##bn -casimir -brew -##jm -1749 -clapton -cloudy -conde -hermitage -278 -simulations -torches -vincenzo -matteo -##rill -hidalgo -booming -westbound -accomplishment -tentacles -unaffected -##sius -annabelle -flopped -sloping -##litz -dreamer -interceptor -vu -##loh -consecration -copying -messaging -breaker -climates -hospitalized -1752 -torino -afternoons -winfield -witnessing -##teacher -breakers -choirs -sawmill -coldly -##ege -sipping -haste -uninhabited -conical -bibliography -pamphlets -severn -edict -##oca -deux -illnesses -grips -##pl -rehearsals -sis -thinkers -tame -##keepers -1690 -acacia -reformer -##osed -##rys -shuffling -##iring -##shima -eastbound -ionic -rhea -flees -littered -##oum -rocker -vomiting -groaning -champ -overwhelmingly -civilizations -paces -sloop -adoptive -##tish -skaters -##vres -aiding -mango -##joy -nikola -shriek -##ignon -pharmaceuticals -##mg -tuna -calvert -gustavo -stocked -yearbook -##urai -##mana -computed -subsp -riff -hanoi -kelvin -hamid -moors -pastures -summons -jihad -nectar -##ctors -bayou -untitled -pleasing -vastly -republics -intellect -##η -##ulio -##tou -crumbling -stylistic -sb -##ی -consolation -frequented -h₂o -walden -widows -##iens -404 -##ignment -chunks -improves -288 -grit -recited -##dev -snarl -sociological -##arte -##gul -inquired -##held -bruise -clube -consultancy -homogeneous -hornets -multiplication -pasta -prick -savior -##grin -##kou -##phile -yoon -##gara -grimes -vanishing -cheering -reacting -bn -distillery -##quisite -##vity -coe -dockyard -massif -##jord -escorts -voss -##valent -byte -chopped -hawke -illusions -workings -floats -##koto -##vac -kv -annapolis -madden -##onus -alvaro -noctuidae -##cum -##scopic -avenge -steamboat -forte -illustrates -erika -##trip -570 -dew -nationalities -bran -manifested -thirsty -diversified -muscled -reborn -##standing -arson -##lessness -##dran -##logram -##boys -##kushima -##vious -willoughby -##phobia -286 -alsace -dashboard -yuki -##chai -granville -myspace -publicized -tricked -##gang -adjective -##ater -relic -reorganisation -enthusiastically -indications -saxe -##lassified -consolidate -iec -padua -helplessly -ramps -renaming -regulars -pedestrians -accents -convicts -inaccurate -lowers -mana -##pati -barrie -bjp -outta -someplace -berwick -flanking -invoked -marrow -sparsely -excerpts -clothed -rei -##ginal -wept -##straße -##vish -alexa -excel -##ptive -membranes -aquitaine -creeks -cutler -sheppard -implementations -ns -##dur -fragrance -budge -concordia -magnesium -marcelo -##antes -gladly -vibrating -##rral -##ggles -montrose -##omba -lew -seamus -1630 -cocky -##ament -##uen -bjorn -##rrick -fielder -fluttering -##lase -methyl -kimberley -mcdowell -reductions -barbed -##jic -##tonic -aeronautical -condensed -distracting -##promising -huffed -##cala -##sle -claudius -invincible -missy -pious -balthazar -ci -##lang -butte -combo -orson -##dication -myriad -1707 -silenced -##fed -##rh -coco -netball -yourselves -##oza -clarify -heller -peg -durban -etudes -offender -roast -blackmail -curvature -##woods -vile -309 -illicit -suriname -##linson -overture -1685 -bubbling -gymnast -tucking -##mming -##ouin -maldives -##bala -gurney -##dda -##eased -##oides -backside -pinto -jars -racehorse -tending -##rdial -baronetcy -wiener -duly -##rke -barbarian -cupping -flawed -##thesis -bertha -pleistocene -puddle -swearing -##nob -##tically -fleeting -prostate -amulet -educating -##mined -##iti -##tler -75th -jens -respondents -analytics -cavaliers -papacy -raju -##iente -##ulum -##tip -funnel -271 -disneyland -##lley -sociologist -##iam -2500 -faulkner -louvre -menon -##dson -276 -##ower -afterlife -mannheim -peptide -referees -comedians -meaningless -##anger -##laise -fabrics -hurley -renal -sleeps -##bour -##icle -breakout -kristin -roadside -animator -clover -disdain -unsafe -redesign -##urity -firth -barnsley -portage -reset -narrows -268 -commandos -expansive -speechless -tubular -##lux -essendon -eyelashes -smashwords -##yad -##bang -##claim -craved -sprinted -chet -somme -astor -wrocław -orton -266 -bane -##erving -##uing -mischief -##amps -##sund -scaling -terre -##xious -impairment -offenses -undermine -moi -soy -contiguous -arcadia -inuit -seam -##tops -macbeth -rebelled -##icative -##iot -590 -elaborated -frs -uniformed -##dberg -259 -powerless -priscilla -stimulated -980 -qc -arboretum -frustrating -trieste -bullock -##nified -enriched -glistening -intern -##adia -locus -nouvelle -ollie -ike -lash -starboard -ee -tapestry -headlined -hove -rigged -##vite -pollock -##yme -thrive -clustered -cas -roi -gleamed -olympiad -##lino -pressured -regimes -##hosis -##lick -ripley -##ophone -kickoff -gallon -rockwell -##arable -crusader -glue -revolutions -scrambling -1714 -grover -##jure -englishman -aztec -263 -contemplating -coven -ipad -preach -triumphant -tufts -##esian -rotational -##phus -328 -falkland -##brates -strewn -clarissa -rejoin -environmentally -glint -banded -drenched -moat -albanians -johor -rr -maestro -malley -nouveau -shaded -taxonomy -v6 -adhere -bunk -airfields -##ritan -1741 -encompass -remington -tran -##erative -amelie -mazda -friar -morals -passions -##zai -breadth -vis -##hae -argus -burnham -caressing -insider -rudd -##imov -##mini -##rso -italianate -murderous -textual -wainwright -armada -bam -weave -timer -##taken -##nh -fra -##crest -ardent -salazar -taps -tunis -##ntino -allegro -gland -philanthropic -##chester -implication -##optera -esq -judas -noticeably -wynn -##dara -inched -indexed -crises -villiers -bandit -royalties -patterned -cupboard -interspersed -accessory -isla -kendrick -entourage -stitches -##esthesia -headwaters -##ior -interlude -distraught -draught -1727 -##basket -biased -sy -transient -triad -subgenus -adapting -kidd -shortstop -##umatic -dimly -spiked -mcleod -reprint -nellie -pretoria -windmill -##cek -singled -##mps -273 -reunite -##orous -747 -bankers -outlying -##omp -##ports -##tream -apologies -cosmetics -patsy -##deh -##ocks -##yson -bender -nantes -serene -##nad -lucha -mmm -323 -##cius -##gli -cmll -coinage -nestor -juarez -##rook -smeared -sprayed -twitching -sterile -irina -embodied -juveniles -enveloped -miscellaneous -cancers -dq -gulped -luisa -crested -swat -donegal -ref -##anov -##acker -hearst -mercantile -##lika -doorbell -ua -vicki -##alla -##som -bilbao -psychologists -stryker -sw -horsemen -turkmenistan -wits -##national -anson -mathew -screenings -##umb -rihanna -##agne -##nessy -aisles -##iani -##osphere -hines -kenton -saskatoon -tasha -truncated -##champ -##itan -mildred -advises -fredrik -interpreting -inhibitors -##athi -spectroscopy -##hab -##kong -karim -panda -##oia -##nail -##vc -conqueror -kgb -leukemia -##dity -arrivals -cheered -pisa -phosphorus -shielded -##riated -mammal -unitarian -urgently -chopin -sanitary -##mission -spicy -drugged -hinges -##tort -tipping -trier -impoverished -westchester -##caster -267 -epoch -nonstop -##gman -##khov -aromatic -centrally -cerro -##tively -##vio -billions -modulation -sedimentary -283 -facilitating -outrageous -goldstein -##eak -##kt -ld -maitland -penultimate -pollard -##dance -fleets -spaceship -vertebrae -##nig -alcoholism -als -recital -##bham -##ference -##omics -m2 -##bm -trois -##tropical -##в -commemorates -##meric -marge -##raction -1643 -670 -cosmetic -ravaged -##ige -catastrophe -eng -##shida -albrecht -arterial -bellamy -decor -harmon -##rde -bulbs -synchronized -vito -easiest -shetland -shielding -wnba -##glers -##ssar -##riam -brianna -cumbria -##aceous -##rard -cores -thayer -##nsk -brood -hilltop -luminous -carts -keynote -larkin -logos -##cta -##ا -##mund -##quay -lilith -tinted -277 -wrestle -mobilization -##uses -sequential -siam -bloomfield -takahashi -274 -##ieving -presenters -ringo -blazed -witty -##oven -##ignant -devastation -haydn -harmed -newt -therese -##peed -gershwin -molina -rabbis -sudanese -001 -innate -restarted -##sack -##fus -slices -wb -##shah -enroll -hypothetical -hysterical -1743 -fabio -indefinite -warped -##hg -exchanging -525 -unsuitable -##sboro -gallo -1603 -bret -cobalt -homemade -##hunter -mx -operatives -##dhar -terraces -durable -latch -pens -whorls -##ctuated -##eaux -billing -ligament -succumbed -##gly -regulators -spawn -##brick -##stead -filmfare -rochelle -##nzo -1725 -circumstance -saber -supplements -##nsky -##tson -crowe -wellesley -carrot -##9th -##movable -primate -drury -sincerely -topical -##mad -##rao -callahan -kyiv -smarter -tits -undo -##yeh -announcements -anthologies -barrio -nebula -##islaus -##shaft -##tyn -bodyguards -2021 -assassinate -barns -emmett -scully -##mah -##yd -##eland -##tino -##itarian -demoted -gorman -lashed -prized -adventist -writ -##gui -alla -invertebrates -##ausen -1641 -amman -1742 -align -healy -redistribution -##gf -##rize -insulation -##drop -adherents -hezbollah -vitro -ferns -yanking -269 -php -registering -uppsala -cheerleading -confines -mischievous -tully -##ross -49th -docked -roam -stipulated -pumpkin -##bry -prompt -##ezer -blindly -shuddering -craftsmen -frail -scented -katharine -scramble -shaggy -sponge -helix -zaragoza -279 -##52 -43rd -backlash -fontaine -seizures -posse -cowan -nonfiction -telenovela -wwii -hammered -undone -##gpur -encircled -irs -##ivation -artefacts -oneself -searing -smallpox -##belle -##osaurus -shandong -breached -upland -blushing -rankin -infinitely -psyche -tolerated -docking -evicted -##col -unmarked -##lving -gnome -lettering -litres -musique -##oint -benevolent -##jal -blackened -##anna -mccall -racers -tingle -##ocene -##orestation -introductions -radically -292 -##hiff -##باد -1610 -1739 -munchen -plead -##nka -condo -scissors -##sight -##tens -apprehension -##cey -##yin -hallmark -watering -formulas -sequels -##llas -aggravated -bae -commencing -##building -enfield -prohibits -marne -vedic -civilized -euclidean -jagger -beforehand -blasts -dumont -##arney -##nem -740 -conversions -hierarchical -rios -simulator -##dya -##lellan -hedges -oleg -thrusts -shadowed -darby -maximize -1744 -gregorian -##nded -##routed -sham -unspecified -##hog -emory -factual -##smo -##tp -fooled -##rger -ortega -wellness -marlon -##oton -##urance -casket -keating -ley -enclave -##ayan -char -influencing -jia -##chenko -412 -ammonia -erebidae -incompatible -violins -cornered -##arat -grooves -astronauts -columbian -rampant -fabrication -kyushu -mahmud -vanish -##dern -mesopotamia -##lete -ict -##rgen -caspian -kenji -pitted -##vered -999 -grimace -roanoke -tchaikovsky -twinned -##analysis -##awan -xinjiang -arias -clemson -kazakh -sizable -1662 -##khand -##vard -plunge -tatum -vittorio -##nden -cholera -##dana -##oper -bracing -indifference -projectile -superliga -##chee -realises -upgrading -299 -porte -retribution -##vies -nk -stil -##resses -ama -bureaucracy -blackberry -bosch -testosterone -collapses -greer -##pathic -ioc -fifties -malls -##erved -bao -baskets -adolescents -siegfried -##osity -##tosis -mantra -detecting -existent -fledgling -##cchi -dissatisfied -gan -telecommunication -mingled -sobbed -6000 -controversies -outdated -taxis -##raus -fright -slams -##lham -##fect -##tten -detectors -fetal -tanned -##uw -fray -goth -olympian -skipping -mandates -scratches -sheng -unspoken -hyundai -tracey -hotspur -restrictive -##buch -americana -mundo -##bari -burroughs -diva -vulcan -##6th -distinctions -thumping -##ngen -mikey -sheds -fide -rescues -springsteen -vested -valuation -##ece -##ely -pinnacle -rake -sylvie -##edo -almond -quivering -##irus -alteration -faltered -##wad -51st -hydra -ticked -##kato -recommends -##dicated -antigua -arjun -stagecoach -wilfred -trickle -pronouns -##pon -aryan -nighttime -##anian -gall -pea -stitch -##hei -leung -milos -##dini -eritrea -nexus -starved -snowfall -kant -parasitic -cot -discus -hana -strikers -appleton -kitchens -##erina -##partisan -##itha -##vius -disclose -metis -##channel -1701 -tesla -##vera -fitch -1735 -blooded -##tila -decimal -##tang -##bai -cyclones -eun -bottled -peas -pensacola -basha -bolivian -crabs -boil -lanterns -partridge -roofed -1645 -necks -##phila -opined -patting -##kla -##lland -chuckles -volta -whereupon -##nche -devout -euroleague -suicidal -##dee -inherently -involuntary -knitting -nasser -##hide -puppets -colourful -courageous -southend -stills -miraculous -hodgson -richer -rochdale -ethernet -greta -uniting -prism -umm -##haya -##itical -##utation -deterioration -pointe -prowess -##ropriation -lids -scranton -billings -subcontinent -##koff -##scope -brute -kellogg -psalms -degraded -##vez -stanisław -##ructured -ferreira -pun -astonishing -gunnar -##yat -arya -prc -gottfried -##tight -excursion -##ographer -dina -##quil -##nare -huffington -illustrious -wilbur -gundam -verandah -##zard -naacp -##odle -constructive -fjord -kade -##naud -generosity -thrilling -baseline -cayman -frankish -plastics -accommodations -zoological -##fting -cedric -qb -motorized -##dome -##otted -squealed -tackled -canucks -budgets -situ -asthma -dail -gabled -grasslands -whimpered -writhing -judgments -##65 -minnie -pv -##carbon -bananas -grille -domes -monique -odin -maguire -markham -tierney -##estra -##chua -libel -poke -speedy -atrium -laval -notwithstanding -##edly -fai -kala -##sur -robb -##sma -listings -luz -supplementary -tianjin -##acing -enzo -jd -ric -scanner -croats -transcribed -##49 -arden -cv -##hair -##raphy -##lver -##uy -357 -seventies -staggering -alam -horticultural -hs -regression -timbers -blasting -##ounded -montagu -manipulating -##cit -catalytic -1550 -troopers -##meo -condemnation -fitzpatrick -##oire -##roved -inexperienced -1670 -castes -##lative -outing -314 -dubois -flicking -quarrel -ste -learners -1625 -iq -whistled -##class -282 -classify -tariffs -temperament -355 -folly -liszt -##yles -immersed -jordanian -ceasefire -apparel -extras -maru -fished -##bio -harta -stockport -assortment -craftsman -paralysis -transmitters -##cola -blindness -##wk -fatally -proficiency -solemnly -##orno -repairing -amore -groceries -ultraviolet -##chase -schoolhouse -##tua -resurgence -nailed -##otype -##× -ruse -saliva -diagrams -##tructing -albans -rann -thirties -1b -antennas -hilarious -cougars -paddington -stats -##eger -breakaway -ipod -reza -authorship -prohibiting -scoffed -##etz -##ttle -conscription -defected -trondheim -##fires -ivanov -keenan -##adan -##ciful -##fb -##slow -locating -##ials -##tford -cadiz -basalt -blankly -interned -rags -rattling -##tick -carpathian -reassured -sync -bum -guildford -iss -staunch -##onga -astronomers -sera -sofie -emergencies -susquehanna -##heard -duc -mastery -vh1 -williamsburg -bayer -buckled -craving -##khan -##rdes -bloomington -##write -alton -barbecue -##bians -justine -##hri -##ndt -delightful -smartphone -newtown -photon -retrieval -peugeot -hissing -##monium -##orough -flavors -lighted -relaunched -tainted -##games -##lysis -anarchy -microscopic -hopping -adept -evade -evie -##beau -inhibit -sinn -adjustable -hurst -intuition -wilton -cisco -44th -lawful -lowlands -stockings -thierry -##dalen -##hila -##nai -fates -prank -tb -maison -lobbied -provocative -1724 -4a -utopia -##qual -carbonate -gujarati -purcell -##rford -curtiss -##mei -overgrown -arenas -mediation -swallows -##rnik -respectful -turnbull -##hedron -##hope -alyssa -ozone -##ʻi -ami -gestapo -johansson -snooker -canteen -cuff -declines -empathy -stigma -##ags -##iner -##raine -taxpayers -gui -volga -##wright -##copic -lifespan -overcame -tattooed -enactment -giggles -##ador -##camp -barrington -bribe -obligatory -orbiting -peng -##enas -elusive -sucker -##vating -cong -hardship -empowered -anticipating -estrada -cryptic -greasy -detainees -planck -sudbury -plaid -dod -marriott -kayla -##ears -##vb -##zd -mortally -##hein -cognition -radha -319 -liechtenstein -meade -richly -argyle -harpsichord -liberalism -trumpets -lauded -tyrant -salsa -tiled -lear -promoters -reused -slicing -trident -##chuk -##gami -##lka -cantor -checkpoint -##points -gaul -leger -mammalian -##tov -##aar -##schaft -doha -frenchman -nirvana -##vino -delgado -headlining -##eron -##iography -jug -tko -1649 -naga -intersections -##jia -benfica -nawab -##suka -ashford -gulp -##deck -##vill -##rug -brentford -frazier -pleasures -dunne -potsdam -shenzhen -dentistry -##tec -flanagan -##dorff -##hear -chorale -dinah -prem -quezon -##rogated -relinquished -sutra -terri -##pani -flaps -##rissa -poly -##rnet -homme -aback -##eki -linger -womb -##kson -##lewood -doorstep -orthodoxy -threaded -westfield -##rval -dioceses -fridays -subsided -##gata -loyalists -##biotic -##ettes -letterman -lunatic -prelate -tenderly -invariably -souza -thug -winslow -##otide -furlongs -gogh -jeopardy -##runa -pegasus -##umble -humiliated -standalone -tagged -##roller -freshmen -klan -##bright -attaining -initiating -transatlantic -logged -viz -##uance -1723 -combatants -intervening -stephane -chieftain -despised -grazed -317 -cdc -galveston -godzilla -macro -simulate -##planes -parades -##esses -960 -##ductive -##unes -equator -overdose -##cans -##hosh -##lifting -joshi -epstein -sonora -treacherous -aquatics -manchu -responsive -##sation -supervisory -##christ -##llins -##ibar -##balance -##uso -kimball -karlsruhe -mab -##emy -ignores -phonetic -reuters -spaghetti -820 -almighty -danzig -rumbling -tombstone -designations -lured -outset -##felt -supermarkets -##wt -grupo -kei -kraft -susanna -##blood -comprehension -genealogy -##aghan -##verted -redding -##ythe -1722 -bowing -##pore -##roi -lest -sharpened -fulbright -valkyrie -sikhs -##unds -swans -bouquet -merritt -##tage -##venting -commuted -redhead -clerks -leasing -cesare -dea -hazy -##vances -fledged -greenfield -servicemen -##gical -armando -blackout -dt -sagged -downloadable -intra -potion -pods -##4th -##mism -xp -attendants -gambia -stale -##ntine -plump -asteroids -rediscovered -buds -flea -hive -##neas -1737 -classifications -debuts -##eles -olympus -scala -##eurs -##gno -##mute -hummed -sigismund -visuals -wiggled -await -pilasters -clench -sulfate -##ances -bellevue -enigma -trainee -snort -##sw -clouded -denim -##rank -##rder -churning -hartman -lodges -riches -sima -##missible -accountable -socrates -regulates -mueller -##cr -1702 -avoids -solids -himalayas -nutrient -pup -##jevic -squat -fades -nec -##lates -##pina -##rona -##ου -privateer -tequila -##gative -##mpton -apt -hornet -immortals -##dou -asturias -cleansing -dario -##rries -##anta -etymology -servicing -zhejiang -##venor -##nx -horned -erasmus -rayon -relocating -£10 -##bags -escalated -promenade -stubble -2010s -artisans -axial -liquids -mora -sho -yoo -##tsky -bundles -oldies -##nally -notification -bastion -##ths -sparkle -##lved -1728 -leash -pathogen -highs -##hmi -immature -880 -gonzaga -ignatius -mansions -monterrey -sweets -bryson -##loe -polled -regatta -brightest -pei -rosy -squid -hatfield -payroll -addict -meath -cornerback -heaviest -lodging -##mage -capcom -rippled -##sily -barnet -mayhem -ymca -snuggled -rousseau -##cute -blanchard -284 -fragmented -leighton -chromosomes -risking -##md -##strel -##utter -corinne -coyotes -cynical -hiroshi -yeomanry -##ractive -ebook -grading -mandela -plume -agustin -magdalene -##rkin -bea -femme -trafford -##coll -##lun -##tance -52nd -fourier -upton -##mental -camilla -gust -iihf -islamabad -longevity -##kala -feldman -netting -##rization -endeavour -foraging -mfa -orr -##open -greyish -contradiction -graz -##ruff -handicapped -marlene -tweed -oaxaca -spp -campos -miocene -pri -configured -cooks -pluto -cozy -pornographic -##entes -70th -fairness -glided -jonny -lynne -rounding -sired -##emon -##nist -remade -uncover -##mack -complied -lei -newsweek -##jured -##parts -##enting -##pg -293 -finer -guerrillas -athenian -deng -disused -stepmother -accuse -gingerly -seduction -521 -confronting -##walker -##going -gora -nostalgia -sabres -virginity -wrenched -##minated -syndication -wielding -eyre -##56 -##gnon -##igny -behaved -taxpayer -sweeps -##growth -childless -gallant -##ywood -amplified -geraldine -scrape -##ffi -babylonian -fresco -##rdan -##kney -##position -1718 -restricting -tack -fukuoka -osborn -selector -partnering -##dlow -318 -gnu -kia -tak -whitley -gables -##54 -##mania -mri -softness -immersion -##bots -##evsky -1713 -chilling -insignificant -pcs -##uis -elites -lina -purported -supplemental -teaming -##americana -##dding -##inton -proficient -rouen -##nage -##rret -niccolo -selects -##bread -fluffy -1621 -gruff -knotted -mukherjee -polgara -thrash -nicholls -secluded -smoothing -thru -corsica -loaf -whitaker -inquiries -##rrier -##kam -indochina -289 -marlins -myles -peking -##tea -extracts -pastry -superhuman -connacht -vogel -##ditional -##het -##udged -##lash -gloss -quarries -refit -teaser -##alic -##gaon -20s -materialized -sling -camped -pickering -tung -tracker -pursuant -##cide -cranes -soc -##cini -##typical -##viere -anhalt -overboard -workout -chores -fares -orphaned -stains -##logie -fenton -surpassing -joyah -triggers -##itte -grandmaster -##lass -##lists -clapping -fraudulent -ledger -nagasaki -##cor -##nosis -##tsa -eucalyptus -tun -##icio -##rney -##tara -dax -heroism -ina -wrexham -onboard -unsigned -##dates -moshe -galley -winnie -droplets -exiles -praises -watered -noodles -##aia -fein -adi -leland -multicultural -stink -bingo -comets -erskine -modernized -canned -constraint -domestically -chemotherapy -featherweight -stifled -##mum -darkly -irresistible -refreshing -hasty -isolate -##oys -kitchener -planners -##wehr -cages -yarn -implant -toulon -elects -childbirth -yue -##lind -##lone -cn -rightful -sportsman -junctions -remodeled -specifies -##rgh -291 -##oons -complimented -##urgent -lister -ot -##logic -bequeathed -cheekbones -fontana -gabby -##dial -amadeus -corrugated -maverick -resented -triangles -##hered -##usly -nazareth -tyrol -1675 -assent -poorer -sectional -aegean -##cous -296 -nylon -ghanaian -##egorical -##weig -cushions -forbid -fusiliers -obstruction -somerville -##scia -dime -earrings -elliptical -leyte -oder -polymers -timmy -atm -midtown -piloted -settles -continual -externally -mayfield -##uh -enrichment -henson -keane -persians -1733 -benji -braden -pep -324 -##efe -contenders -pepsi -valet -##isches -298 -##asse -##earing -goofy -stroll -##amen -authoritarian -occurrences -adversary -ahmedabad -tangent -toppled -dorchester -1672 -modernism -marxism -islamist -charlemagne -exponential -racks -unicode -brunette -mbc -pic -skirmish -##bund -##lad -##powered -##yst -hoisted -messina -shatter -##ctum -jedi -vantage -##music -##neil -clemens -mahmoud -corrupted -authentication -lowry -nils -##washed -omnibus -wounding -jillian -##itors -##opped -serialized -narcotics -handheld -##arm -##plicity -intersecting -stimulating -##onis -crate -fellowships -hemingway -casinos -climatic -fordham -copeland -drip -beatty -leaflets -robber -brothel -madeira -##hedral -sphinx -ultrasound -##vana -valor -forbade -leonid -villas -##aldo -duane -marquez -##cytes -disadvantaged -forearms -kawasaki -reacts -consular -lax -uncles -uphold -##hopper -concepcion -dorsey -lass -##izan -arching -passageway -1708 -researches -tia -internationals -##graphs -##opers -distinguishes -javanese -divert -##uven -plotted -##listic -##rwin -##erik -##tify -affirmative -signifies -validation -##bson -kari -felicity -georgina -zulu -##eros -##rained -##rath -overcoming -##dot -argyll -##rbin -1734 -chiba -ratification -windy -earls -parapet -##marks -hunan -pristine -astrid -punta -##gart -brodie -##kota -##oder -malaga -minerva -rouse -##phonic -bellowed -pagoda -portals -reclamation -##gur -##odies -##⁄₄ -parentheses -quoting -allergic -palette -showcases -benefactor -heartland -nonlinear -##tness -bladed -cheerfully -scans -##ety -##hone -1666 -girlfriends -pedersen -hiram -sous -##liche -##nator -1683 -##nery -##orio -##umen -bobo -primaries -smiley -##cb -unearthed -uniformly -fis -metadata -1635 -ind -##oted -recoil -##titles -##tura -##ια -406 -hilbert -jamestown -mcmillan -tulane -seychelles -##frid -antics -coli -fated -stucco -##grants -1654 -bulky -accolades -arrays -caledonian -carnage -optimism -puebla -##tative -##cave -enforcing -rotherham -seo -dunlop -aeronautics -chimed -incline -zoning -archduke -hellenistic -##oses -##sions -candi -thong -##ople -magnate -rustic -##rsk -projective -slant -##offs -danes -hollis -vocalists -##ammed -congenital -contend -gesellschaft -##ocating -##pressive -douglass -quieter -##cm -##kshi -howled -salim -spontaneously -townsville -buena -southport -##bold -kato -1638 -faerie -stiffly -##vus -##rled -297 -flawless -realising -taboo -##7th -bytes -straightening -356 -jena -##hid -##rmin -cartwright -berber -bertram -soloists -411 -noses -417 -coping -fission -hardin -inca -##cen -1717 -mobilized -vhf -##raf -biscuits -curate -##85 -##anial -331 -gaunt -neighbourhoods -1540 -##abas -blanca -bypassed -sockets -behold -coincidentally -##bane -nara -shave -splinter -terrific -##arion -##erian -commonplace -juris -redwood -waistband -boxed -caitlin -fingerprints -jennie -naturalized -##ired -balfour -craters -jody -bungalow -hugely -quilt -glitter -pigeons -undertaker -bulging -constrained -goo -##sil -##akh -assimilation -reworked -##person -persuasion -##pants -felicia -##cliff -##ulent -1732 -explodes -##dun -##inium -##zic -lyman -vulture -hog -overlook -begs -northwards -ow -spoil -##urer -fatima -favorably -accumulate -sargent -sorority -corresponded -dispersal -kochi -toned -##imi -##lita -internacional -newfound -##agger -##lynn -##rigue -booths -peanuts -##eborg -medicare -muriel -nur -##uram -crates -millennia -pajamas -worsened -##breakers -jimi -vanuatu -yawned -##udeau -carousel -##hony -hurdle -##ccus -##mounted -##pod -rv -##eche -airship -ambiguity -compulsion -recapture -##claiming -arthritis -##osomal -1667 -asserting -ngc -sniffing -dade -discontent -glendale -ported -##amina -defamation -rammed -##scent -fling -livingstone -##fleet -875 -##ppy -apocalyptic -comrade -lcd -##lowe -cessna -eine -persecuted -subsistence -demi -hoop -reliefs -710 -coptic -progressing -stemmed -perpetrators -1665 -priestess -##nio -dobson -ebony -rooster -itf -tortricidae -##bbon -##jian -cleanup -##jean -##øy -1721 -eighties -taxonomic -holiness -##hearted -##spar -antilles -showcasing -stabilized -##nb -gia -mascara -michelangelo -dawned -##uria -##vinsky -extinguished -fitz -grotesque -£100 -##fera -##loid -##mous -barges -neue -throbbed -cipher -johnnie -##a1 -##mpt -outburst -##swick -spearheaded -administrations -c1 -heartbreak -pixels -pleasantly -##enay -lombardy -plush -##nsed -bobbie -##hly -reapers -tremor -xiang -minogue -substantive -hitch -barak -##wyl -kwan -##encia -910 -obscene -elegance -indus -surfer -bribery -conserve -##hyllum -##masters -horatio -##fat -apes -rebound -psychotic -##pour -iteration -##mium -##vani -botanic -horribly -antiques -dispose -paxton -##hli -##wg -timeless -1704 -disregard -engraver -hounds -##bau -##version -looted -uno -facilitates -groans -masjid -rutland -antibody -disqualification -decatur -footballers -quake -slacks -48th -rein -scribe -stabilize -commits -exemplary -tho -##hort -##chison -pantry -traversed -##hiti -disrepair -identifiable -vibrated -baccalaureate -##nnis -csa -interviewing -##iensis -##raße -greaves -wealthiest -343 -classed -jogged -£5 -##58 -##atal -illuminating -knicks -respecting -##uno -scrubbed -##iji -##dles -kruger -moods -growls -raider -silvia -chefs -kam -vr -cree -percival -##terol -gunter -counterattack -defiant -henan -ze -##rasia -##riety -equivalence -submissions -##fra -##thor -bautista -mechanically -##heater -cornice -herbal -templar -##mering -outputs -ruining -ligand -renumbered -extravagant -mika -blockbuster -eta -insurrection -##ilia -darkening -ferocious -pianos -strife -kinship -##aer -melee -##anor -##iste -##may -##oue -decidedly -weep -##jad -##missive -##ppel -354 -puget -unease -##gnant -1629 -hammering -kassel -ob -wessex -##lga -bromwich -egan -paranoia -utilization -##atable -##idad -contradictory -provoke -##ols -##ouring -##tangled -knesset -##very -##lette -plumbing -##sden -##¹ -greensboro -occult -sniff -338 -zev -beaming -gamer -haggard -mahal -##olt -##pins -mendes -utmost -briefing -gunnery -##gut -##pher -##zh -##rok -1679 -khalifa -sonya -##boot -principals -urbana -wiring -##liffe -##minating -##rrado -dahl -nyu -skepticism -np -townspeople -ithaca -lobster -somethin -##fur -##arina -##−1 -freighter -zimmerman -biceps -contractual -##herton -amend -hurrying -subconscious -##anal -336 -meng -clermont -spawning -##eia -##lub -dignitaries -impetus -snacks -spotting -twigs -##bilis -##cz -##ouk -libertadores -nic -skylar -##aina -##firm -gustave -asean -##anum -dieter -legislatures -flirt -bromley -trolls -umar -##bbies -##tyle -blah -parc -bridgeport -crank -negligence -##nction -46th -constantin -molded -bandages -seriousness -00pm -siegel -carpets -compartments -upbeat -statehood -##dner -##edging -marko -730 -platt -##hane -paving -##iy -1738 -abbess -impatience -limousine -nbl -##talk -441 -lucille -mojo -nightfall -robbers -##nais -karel -brisk -calves -replicate -ascribed -telescopes -##olf -intimidated -##reen -ballast -specialization -##sit -aerodynamic -caliphate -rainer -visionary -##arded -epsilon -##aday -##onte -aggregation -auditory -boosted -reunification -kathmandu -loco -robyn -402 -acknowledges -appointing -humanoid -newell -redeveloped -restraints -##tained -barbarians -chopper -1609 -italiana -##lez -##lho -investigates -wrestlemania -##anies -##bib -690 -##falls -creaked -dragoons -gravely -minions -stupidity -volley -##harat -##week -musik -##eries -##uously -fungal -massimo -semantics -malvern -##ahl -##pee -discourage -embryo -imperialism -1910s -profoundly -##ddled -jiangsu -sparkled -stat -##holz -sweatshirt -tobin -##iction -sneered -##cheon -##oit -brit -causal -smyth -##neuve -diffuse -perrin -silvio -##ipes -##recht -detonated -iqbal -selma -##nism -##zumi -roasted -##riders -tay -##ados -##mament -##mut -##rud -840 -completes -nipples -cfa -flavour -hirsch -##laus -calderon -sneakers -moravian -##ksha -1622 -rq -294 -##imeters -bodo -##isance -##pre -##ronia -anatomical -excerpt -##lke -dh -kunst -##tablished -##scoe -biomass -panted -unharmed -gael -housemates -montpellier -##59 -coa -rodents -tonic -hickory -singleton -##taro -451 -1719 -aldo -breaststroke -dempsey -och -rocco -##cuit -merton -dissemination -midsummer -serials -##idi -haji -polynomials -##rdon -gs -enoch -prematurely -shutter -taunton -£3 -##grating -##inates -archangel -harassed -##asco -326 -archway -dazzling -##ecin -1736 -sumo -wat -##kovich -1086 -honneur -##ently -##nostic -##ttal -##idon -1605 -403 -1716 -blogger -rents -##gnan -hires -##ikh -##dant -howie -##rons -handler -retracted -shocks -1632 -arun -duluth -kepler -trumpeter -##lary -peeking -seasoned -trooper -##mara -laszlo -##iciencies -##rti -heterosexual -##inatory -##ssion -indira -jogging -##inga -##lism -beit -dissatisfaction -malice -##ately -nedra -peeling -##rgeon -47th -stadiums -475 -vertigo -##ains -iced -restroom -##plify -##tub -illustrating -pear -##chner -##sibility -inorganic -rappers -receipts -watery -##kura -lucinda -##oulos -reintroduced -##8th -##tched -gracefully -saxons -nutritional -wastewater -rained -favourites -bedrock -fisted -hallways -likeness -upscale -##lateral -1580 -blinds -prequel -##pps -##tama -deter -humiliating -restraining -tn -vents -1659 -laundering -recess -rosary -tractors -coulter -federer -##ifiers -##plin -persistence -##quitable -geschichte -pendulum -quakers -##beam -bassett -pictorial -buffet -koln -##sitor -drills -reciprocal -shooters -##57 -##cton -##tees -converge -pip -dmitri -donnelly -yamamoto -aqua -azores -demographics -hypnotic -spitfire -suspend -wryly -roderick -##rran -sebastien -##asurable -mavericks -##fles -##200 -himalayan -prodigy -##iance -transvaal -demonstrators -handcuffs -dodged -mcnamara -sublime -1726 -crazed -##efined -##till -ivo -pondered -reconciled -shrill -sava -##duk -bal -cad -heresy -jaipur -goran -##nished -341 -lux -shelly -whitehall -##hre -israelis -peacekeeping -##wled -1703 -demetrius -ousted -##arians -##zos -beale -anwar -backstroke -raged -shrinking -cremated -##yck -benign -towing -wadi -darmstadt -landfill -parana -soothe -colleen -sidewalks -mayfair -tumble -hepatitis -ferrer -superstructure -##gingly -##urse -##wee -anthropological -translators -##mies -closeness -hooves -##pw -mondays -##roll -##vita -landscaping -##urized -purification -sock -thorns -thwarted -jalan -tiberius -##taka -saline -##rito -confidently -khyber -sculptors -##ij -brahms -hammersmith -inspectors -battista -fivb -fragmentation -hackney -##uls -arresting -exercising -antoinette -bedfordshire -##zily -dyed -##hema -1656 -racetrack -variability -##tique -1655 -austrians -deteriorating -madman -theorists -aix -lehman -weathered -1731 -decreed -eruptions -1729 -flaw -quinlan -sorbonne -flutes -nunez -1711 -adored -downwards -fable -rasped -1712 -moritz -mouthful -renegade -shivers -stunts -dysfunction -restrain -translit -327 -pancakes -##avio -##cision -##tray -351 -vial -##lden -bain -##maid -##oxide -chihuahua -malacca -vimes -##rba -##rnier -1664 -donnie -plaques -##ually -337 -bangs -floppy -huntsville -loretta -nikolay -##otte -eater -handgun -ubiquitous -##hett -eras -zodiac -1634 -##omorphic -1820s -##zog -cochran -##bula -##lithic -warring -##rada -dalai -excused -blazers -mcconnell -reeling -bot -este -##abi -geese -hoax -taxon -##bla -guitarists -##icon -condemning -hunts -inversion -moffat -taekwondo -##lvis -1624 -stammered -##rest -##rzy -sousa -fundraiser -marylebone -navigable -uptown -cabbage -daniela -salman -shitty -whimper -##kian -##utive -programmers -protections -rm -##rmi -##rued -forceful -##enes -fuss -##tao -##wash -brat -oppressive -reykjavik -spartak -ticking -##inkles -##kiewicz -adolph -horst -maui -protege -straighten -cpc -landau -concourse -clements -resultant -##ando -imaginative -joo -reactivated -##rem -##ffled -##uising -consultative -##guide -flop -kaitlyn -mergers -parenting -somber -##vron -supervise -vidhan -##imum -courtship -exemplified -harmonies -medallist -refining -##rrow -##ка -amara -##hum -780 -goalscorer -sited -overshadowed -rohan -displeasure -secretive -multiplied -osman -##orth -engravings -padre -##kali -##veda -miniatures -mis -##yala -clap -pali -rook -##cana -1692 -57th -antennae -astro -oskar -1628 -bulldog -crotch -hackett -yucatan -##sure -amplifiers -brno -ferrara -migrating -##gree -thanking -turing -##eza -mccann -ting -andersson -onslaught -gaines -ganga -incense -standardization -##mation -sentai -scuba -stuffing -turquoise -waivers -alloys -##vitt -regaining -vaults -##clops -##gizing -digger -furry -memorabilia -probing -##iad -payton -rec -deutschland -filippo -opaque -seamen -zenith -afrikaans -##filtration -disciplined -inspirational -##merie -banco -confuse -grafton -tod -##dgets -championed -simi -anomaly -biplane -##ceptive -electrode -##para -1697 -cleavage -crossbow -swirl -informant -##lars -##osta -afi -bonfire -spec -##oux -lakeside -slump -##culus -##lais -##qvist -##rrigan -1016 -facades -borg -inwardly -cervical -xl -pointedly -050 -stabilization -##odon -chests -1699 -hacked -ctv -orthogonal -suzy -##lastic -gaulle -jacobite -rearview -##cam -##erted -ashby -##drik -##igate -##mise -##zbek -affectionately -canine -disperse -latham -##istles -##ivar -spielberg -##orin -##idium -ezekiel -cid -##sg -durga -middletown -##cina -customized -frontiers -harden -##etano -##zzy -1604 -bolsheviks -##66 -coloration -yoko -##bedo -briefs -slabs -debra -liquidation -plumage -##oin -blossoms -dementia -subsidy -1611 -proctor -relational -jerseys -parochial -ter -##ici -esa -peshawar -cavalier -loren -cpi -idiots -shamrock -1646 -dutton -malabar -mustache -##endez -##ocytes -referencing -terminates -marche -yarmouth -##sop -acton -mated -seton -subtly -baptised -beige -extremes -jolted -kristina -telecast -##actic -safeguard -waldo -##baldi -##bular -endeavors -sloppy -subterranean -##ensburg -##itung -delicately -pigment -tq -##scu -1626 -##ound -collisions -coveted -herds -##personal -##meister -##nberger -chopra -##ricting -abnormalities -defective -galician -lucie -##dilly -alligator -likened -##genase -burundi -clears -complexion -derelict -deafening -diablo -fingered -champaign -dogg -enlist -isotope -labeling -mrna -##erre -brilliance -marvelous -##ayo -1652 -crawley -ether -footed -dwellers -deserts -hamish -rubs -warlock -skimmed -##lizer -870 -buick -embark -heraldic -irregularities -##ajan -kiara -##kulam -##ieg -antigen -kowalski -##lge -oakley -visitation -##mbit -vt -##suit -1570 -murderers -##miento -##rites -chimneys -##sling -condemn -custer -exchequer -havre -##ghi -fluctuations -##rations -dfb -hendricks -vaccines -##tarian -nietzsche -biking -juicy -##duced -brooding -scrolling -selangor -##ragan -352 -annum -boomed -seminole -sugarcane -##dna -departmental -dismissing -innsbruck -arteries -ashok -batavia -daze -kun -overtook -##rga -##tlan -beheaded -gaddafi -holm -electronically -faulty -galilee -fractures -kobayashi -##lized -gunmen -magma -aramaic -mala -eastenders -inference -messengers -bf -##qu -407 -bathrooms -##vere -1658 -flashbacks -ideally -misunderstood -##jali -##weather -mendez -##grounds -505 -uncanny -##iii -1709 -friendships -##nbc -sacrament -accommodated -reiterated -logistical -pebbles -thumped -##escence -administering -decrees -drafts -##flight -##cased -##tula -futuristic -picket -intimidation -winthrop -##fahan -interfered -339 -afar -francoise -morally -uta -cochin -croft -dwarfs -##bruck -##dents -##nami -biker -##hner -##meral -nano -##isen -##ometric -##pres -##ан -brightened -meek -parcels -securely -gunners -##jhl -##zko -agile -hysteria -##lten -##rcus -bukit -champs -chevy -cuckoo -leith -sadler -theologians -welded -##section -1663 -jj -plurality -xander -##rooms -##formed -shredded -temps -intimately -pau -tormented -##lok -##stellar -1618 -charred -ems -essen -##mmel -alarms -spraying -ascot -blooms -twinkle -##abia -##apes -internment -obsidian -##chaft -snoop -##dav -##ooping -malibu -##tension -quiver -##itia -hays -mcintosh -travers -walsall -##ffie -1623 -beverley -schwarz -plunging -structurally -m3 -rosenthal -vikram -##tsk -770 -ghz -##onda -##tiv -chalmers -groningen -pew -reckon -unicef -##rvis -55th -##gni -1651 -sulawesi -avila -cai -metaphysical -screwing -turbulence -##mberg -augusto -samba -56th -baffled -momentary -toxin -##urian -##wani -aachen -condoms -dali -steppe -##3d -##app -##oed -##year -adolescence -dauphin -electrically -inaccessible -microscopy -nikita -##ega -atv -##cel -##enter -##oles -##oteric -##ы -accountants -punishments -wrongly -bribes -adventurous -clinch -flinders -southland -##hem -##kata -gough -##ciency -lads -soared -##ה -undergoes -deformation -outlawed -rubbish -##arus -##mussen -##nidae -##rzburg -arcs -##ingdon -##tituted -1695 -wheelbase -wheeling -bombardier -campground -zebra -##lices -##oj -##bain -lullaby -##ecure -donetsk -wylie -grenada -##arding -##ης -squinting -eireann -opposes -##andra -maximal -runes -##broken -##cuting -##iface -##ror -##rosis -additive -britney -adultery -triggering -##drome -detrimental -aarhus -containment -jc -swapped -vichy -##ioms -madly -##oric -##rag -brant -##ckey -##trix -1560 -1612 -broughton -rustling -##stems -##uder -asbestos -mentoring -##nivorous -finley -leaps -##isan -apical -pry -slits -substitutes -##dict -intuitive -fantasia -insistent -unreasonable -##igen -##vna -domed -hannover -margot -ponder -##zziness -impromptu -jian -lc -rampage -stemming -##eft -andrey -gerais -whichever -amnesia -appropriated -anzac -clicks -modifying -ultimatum -cambrian -maids -verve -yellowstone -##mbs -conservatoire -##scribe -adherence -dinners -spectra -imperfect -mysteriously -sidekick -tatar -tuba -##aks -##ifolia -distrust -##athan -##zle -c2 -ronin -zac -##pse -celaena -instrumentalist -scents -skopje -##mbling -comical -compensated -vidal -condor -intersect -jingle -wavelengths -##urrent -mcqueen -##izzly -carp -weasel -422 -kanye -militias -postdoctoral -eugen -gunslinger -##ɛ -faux -hospice -##for -appalled -derivation -dwarves -##elis -dilapidated -##folk -astoria -philology -##lwyn -##otho -##saka -inducing -philanthropy -##bf -##itative -geek -markedly -sql -##yce -bessie -indices -rn -##flict -495 -frowns -resolving -weightlifting -tugs -cleric -contentious -1653 -mania -rms -##miya -##reate -##ruck -##tucket -bien -eels -marek -##ayton -##cence -discreet -unofficially -##ife -leaks -##bber -1705 -332 -dung -compressor -hillsborough -pandit -shillings -distal -##skin -381 -##tat -##you -nosed -##nir -mangrove -undeveloped -##idia -textures -##inho -##500 -##rise -ae -irritating -nay -amazingly -bancroft -apologetic -compassionate -kata -symphonies -##lovic -airspace -##lch -930 -gifford -precautions -fulfillment -sevilla -vulgar -martinique -##urities -looting -piccolo -tidy -##dermott -quadrant -armchair -incomes -mathematicians -stampede -nilsson -##inking -##scan -foo -quarterfinal -##ostal -shang -shouldered -squirrels -##owe -344 -vinegar -##bner -##rchy -##systems -delaying -##trics -ars -dwyer -rhapsody -sponsoring -##gration -bipolar -cinder -starters -##olio -##urst -421 -signage -##nty -aground -figurative -mons -acquaintances -duets -erroneously -soyuz -elliptic -recreated -##cultural -##quette -##ssed -##tma -##zcz -moderator -scares -##itaire -##stones -##udence -juniper -sighting -##just -##nsen -britten -calabria -ry -bop -cramer -forsyth -stillness -##л -airmen -gathers -unfit -##umber -##upt -taunting -##rip -seeker -streamlined -##bution -holster -schumann -tread -vox -##gano -##onzo -strive -dil -reforming -covent -newbury -predicting -##orro -decorate -tre -##puted -andover -ie -asahi -dept -dunkirk -gills -##tori -buren -huskies -##stis -##stov -abstracts -bets -loosen -##opa -1682 -yearning -##glio -##sir -berman -effortlessly -enamel -napoli -persist -##peration -##uez -attache -elisa -b1 -invitations -##kic -accelerating -reindeer -boardwalk -clutches -nelly -polka -starbucks -##kei -adamant -huey -lough -unbroken -adventurer -embroidery -inspecting -stanza -##ducted -naia -taluka -##pone -##roids -chases -deprivation -florian -##jing -##ppet -earthly -##lib -##ssee -colossal -foreigner -vet -freaks -patrice -rosewood -triassic -upstate -##pkins -dominates -ata -chants -ks -vo -##400 -##bley -##raya -##rmed -555 -agra -infiltrate -##ailing -##ilation -##tzer -##uppe -##werk -binoculars -enthusiast -fujian -squeak -##avs -abolitionist -almeida -boredom -hampstead -marsden -rations -##ands -inflated -334 -bonuses -rosalie -patna -##rco -329 -detachments -penitentiary -54th -flourishing -woolf -##dion -##etched -papyrus -##lster -##nsor -##toy -bobbed -dismounted -endelle -inhuman -motorola -tbs -wince -wreath -##ticus -hideout -inspections -sanjay -disgrace -infused -pudding -stalks -##urbed -arsenic -leases -##hyl -##rrard -collarbone -##waite -##wil -dowry -##bant -##edance -genealogical -nitrate -salamanca -scandals -thyroid -necessitated -##! -##" -### -##$ -##% -##& -##' -##( -##) -##* -##+ -##, -##- -##. -##/ -##: -##; -##< -##= -##> -##? -##@ -##[ -##\ -##] -##^ -##_ -##` -##{ -##| -##} -##~ -##¡ -##¢ -##£ -##¤ -##¥ -##¦ -##§ -##¨ -##© -##ª -##« -##¬ -##® -##± -##´ -##µ -##¶ -##· -##º -##» -##¼ -##¾ -##¿ -##æ -##ð -##÷ -##þ -##đ -##ħ -##ŋ -##œ -##ƒ -##ɐ -##ɑ -##ɒ -##ɔ -##ɕ -##ə -##ɡ -##ɣ -##ɨ -##ɪ -##ɫ -##ɬ -##ɯ -##ɲ -##ɴ -##ɹ -##ɾ -##ʀ -##ʁ -##ʂ -##ʃ -##ʉ -##ʊ -##ʋ -##ʌ -##ʎ -##ʐ -##ʑ -##ʒ -##ʔ -##ʰ -##ʲ -##ʳ -##ʷ -##ʸ -##ʻ -##ʼ -##ʾ -##ʿ -##ˈ -##ˡ -##ˢ -##ˣ -##ˤ -##β -##γ -##δ -##ε -##ζ -##θ -##κ -##λ -##μ -##ξ -##ο -##π -##ρ -##σ -##τ -##υ -##φ -##χ -##ψ -##ω -##б -##г -##д -##ж -##з -##м -##п -##с -##у -##ф -##х -##ц -##ч -##ш -##щ -##ъ -##э -##ю -##ђ -##є -##і -##ј -##љ -##њ -##ћ -##ӏ -##ա -##բ -##գ -##դ -##ե -##թ -##ի -##լ -##կ -##հ -##մ -##յ -##ն -##ո -##պ -##ս -##վ -##տ -##ր -##ւ -##ք -##־ -##א -##ב -##ג -##ד -##ו -##ז -##ח -##ט -##י -##ך -##כ -##ל -##ם -##מ -##ן -##נ -##ס -##ע -##ף -##פ -##ץ -##צ -##ק -##ר -##ש -##ת -##، -##ء -##ب -##ت -##ث -##ج -##ح -##خ -##ذ -##ز -##س -##ش -##ص -##ض -##ط -##ظ -##ع -##غ -##ـ -##ف -##ق -##ك -##و -##ى -##ٹ -##پ -##چ -##ک -##گ -##ں -##ھ -##ہ -##ے -##अ -##आ -##उ -##ए -##क -##ख -##ग -##च -##ज -##ट -##ड -##ण -##त -##थ -##द -##ध -##न -##प -##ब -##भ -##म -##य -##र -##ल -##व -##श -##ष -##स -##ह -##ा -##ि -##ी -##ो -##। -##॥ -##ং -##অ -##আ -##ই -##উ -##এ -##ও -##ক -##খ -##গ -##চ -##ছ -##জ -##ট -##ড -##ণ -##ত -##থ -##দ -##ধ -##ন -##প -##ব -##ভ -##ম -##য -##র -##ল -##শ -##ষ -##স -##হ -##া -##ি -##ী -##ে -##க -##ச -##ட -##த -##ந -##ன -##ப -##ம -##ய -##ர -##ல -##ள -##வ -##ா -##ி -##ு -##ே -##ை -##ನ -##ರ -##ಾ -##ක -##ය -##ර -##ල -##ව -##ා -##ก -##ง -##ต -##ท -##น -##พ -##ม -##ย -##ร -##ล -##ว -##ส -##อ -##า -##เ -##་ -##། -##ག -##ང -##ད -##ན -##པ -##བ -##མ -##འ -##ར -##ལ -##ས -##မ -##ა -##ბ -##გ -##დ -##ე -##ვ -##თ -##ი -##კ -##ლ -##მ -##ნ -##ო -##რ -##ს -##ტ -##უ -##ᄀ -##ᄂ -##ᄃ -##ᄅ -##ᄆ -##ᄇ -##ᄉ -##ᄊ -##ᄋ -##ᄌ -##ᄎ -##ᄏ -##ᄐ -##ᄑ -##ᄒ -##ᅡ -##ᅢ -##ᅥ -##ᅦ -##ᅧ -##ᅩ -##ᅪ -##ᅭ -##ᅮ -##ᅯ -##ᅲ -##ᅳ -##ᅴ -##ᅵ -##ᆨ -##ᆫ -##ᆯ -##ᆷ -##ᆸ -##ᆼ -##ᴬ -##ᴮ -##ᴰ -##ᴵ -##ᴺ -##ᵀ -##ᵃ -##ᵇ -##ᵈ -##ᵉ -##ᵍ -##ᵏ -##ᵐ -##ᵒ -##ᵖ -##ᵗ -##ᵘ -##ᵣ -##ᵤ -##ᵥ -##ᶜ -##ᶠ -##‐ -##‑ -##‒ -##– -##— -##― -##‖ -##‘ -##’ -##‚ -##“ -##” -##„ -##† -##‡ -##• -##… -##‰ -##′ -##″ -##› -##‿ -##⁄ -##⁰ -##ⁱ -##⁴ -##⁵ -##⁶ -##⁷ -##⁸ -##⁹ -##⁻ -##ⁿ -##₅ -##₆ -##₇ -##₈ -##₉ -##₊ -##₍ -##₎ -##ₐ -##ₑ -##ₒ -##ₓ -##ₕ -##ₖ -##ₗ -##ₘ -##ₚ -##ₛ -##ₜ -##₤ -##₩ -##€ -##₱ -##₹ -##ℓ -##№ -##ℝ -##™ -##⅓ -##⅔ -##← -##↑ -##→ -##↓ -##↔ -##↦ -##⇄ -##⇌ -##⇒ -##∂ -##∅ -##∆ -##∇ -##∈ -##∗ -##∘ -##√ -##∞ -##∧ -##∨ -##∩ -##∪ -##≈ -##≡ -##≤ -##≥ -##⊂ -##⊆ -##⊕ -##⊗ -##⋅ -##─ -##│ -##■ -##▪ -##● -##★ -##☆ -##☉ -##♠ -##♣ -##♥ -##♦ -##♯ -##⟨ -##⟩ -##ⱼ -##⺩ -##⺼ -##⽥ -##、 -##。 -##〈 -##〉 -##《 -##》 -##「 -##」 -##『 -##』 -##〜 -##あ -##い -##う -##え -##お -##か -##き -##く -##け -##こ -##さ -##し -##す -##せ -##そ -##た -##ち -##っ -##つ -##て -##と -##な -##に -##ぬ -##ね -##の -##は -##ひ -##ふ -##へ -##ほ -##ま -##み -##む -##め -##も -##や -##ゆ -##よ -##ら -##り -##る -##れ -##ろ -##を -##ん -##ァ -##ア -##ィ -##イ -##ウ -##ェ -##エ -##オ -##カ -##キ -##ク -##ケ -##コ -##サ -##シ -##ス -##セ -##タ -##チ -##ッ -##ツ -##テ -##ト -##ナ -##ニ -##ノ -##ハ -##ヒ -##フ -##ヘ -##ホ -##マ -##ミ -##ム -##メ -##モ -##ャ -##ュ -##ョ -##ラ -##リ -##ル -##レ -##ロ -##ワ -##ン -##・ -##ー -##一 -##三 -##上 -##下 -##不 -##世 -##中 -##主 -##久 -##之 -##也 -##事 -##二 -##五 -##井 -##京 -##人 -##亻 -##仁 -##介 -##代 -##仮 -##伊 -##会 -##佐 -##侍 -##保 -##信 -##健 -##元 -##光 -##八 -##公 -##内 -##出 -##分 -##前 -##劉 -##力 -##加 -##勝 -##北 -##区 -##十 -##千 -##南 -##博 -##原 -##口 -##古 -##史 -##司 -##合 -##吉 -##同 -##名 -##和 -##囗 -##四 -##国 -##國 -##土 -##地 -##坂 -##城 -##堂 -##場 -##士 -##夏 -##外 -##大 -##天 -##太 -##夫 -##奈 -##女 -##子 -##学 -##宀 -##宇 -##安 -##宗 -##定 -##宣 -##宮 -##家 -##宿 -##寺 -##將 -##小 -##尚 -##山 -##岡 -##島 -##崎 -##川 -##州 -##巿 -##帝 -##平 -##年 -##幸 -##广 -##弘 -##張 -##彳 -##後 -##御 -##德 -##心 -##忄 -##志 -##忠 -##愛 -##成 -##我 -##戦 -##戸 -##手 -##扌 -##政 -##文 -##新 -##方 -##日 -##明 -##星 -##春 -##昭 -##智 -##曲 -##書 -##月 -##有 -##朝 -##木 -##本 -##李 -##村 -##東 -##松 -##林 -##森 -##楊 -##樹 -##橋 -##歌 -##止 -##正 -##武 -##比 -##氏 -##民 -##水 -##氵 -##氷 -##永 -##江 -##沢 -##河 -##治 -##法 -##海 -##清 -##漢 -##瀬 -##火 -##版 -##犬 -##王 -##生 -##田 -##男 -##疒 -##発 -##白 -##的 -##皇 -##目 -##相 -##省 -##真 -##石 -##示 -##社 -##神 -##福 -##禾 -##秀 -##秋 -##空 -##立 -##章 -##竹 -##糹 -##美 -##義 -##耳 -##良 -##艹 -##花 -##英 -##華 -##葉 -##藤 -##行 -##街 -##西 -##見 -##訁 -##語 -##谷 -##貝 -##貴 -##車 -##軍 -##辶 -##道 -##郎 -##郡 -##部 -##都 -##里 -##野 -##金 -##鈴 -##镇 -##長 -##門 -##間 -##阝 -##阿 -##陳 -##陽 -##雄 -##青 -##面 -##風 -##食 -##香 -##馬 -##高 -##龍 -##龸 -##fi -##fl -##! -##( -##) -##, -##- -##. -##/ -##: -##? -##~
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/tokenizers_benchmarks.py b/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/tokenizers_benchmarks.py deleted file mode 100644 index 876d10f0..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/benchmarks/tokenizers_benchmarks.py +++ /dev/null
@@ -1,249 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Microbenchmarks for tokenizers on IMDB dataset.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import app -from absl import flags -import six - -from tensorflow.python.client import session -from tensorflow.python.eager import context -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.platform import benchmark -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow_text.python import ops as text_ops -from tensorflow_text.python.benchmarks import benchmark_utils -from tensorflow_text.python.ops.bert_tokenizer import BasicTokenizer - - -FLAGS = flags.FLAGS -flags.DEFINE_integer("run_iters", 1000, "Number of iterations to run") -flags.DEFINE_integer("burn_iters", 10, "Number of warmup runs") -flags.DEFINE_integer("batch_size", 32, "The size of a batch") -flags.DEFINE_boolean("run_eagerly", True, "Run in eager mode") -flags.DEFINE_boolean( - "use_tf_function", True, - "Wraps the op in a tf.function. Only works when eager mode is enabled") -flags.DEFINE_boolean( - "ragged_vs_dense", False, - "Run the tokenizers using ragged inputs and its dense counterpart") -flags.DEFINE_boolean("xprof_tracing", False, "Enables xprof tracing") -flags.DEFINE_boolean("with_offsets", False, - "Runs the tokenize_with_offsets op instead of tokenize") - -# These are needed when generating the parameterized benchmarks and cannot use -# absl FLAGS -_BERT_VOCAB_PATH = "third_party/tensorflow_text/python/benchmarks/test_data/uncased_L-12_H-768_A-12/vocab.txt" -_HUB_MODULE_HANDLE = "third_party/tensorflow_text/python/ops/test_data/segmenter_hub_module" -_SENTENCEPIECE_MODEL_FILE = "third_party/tensorflow_text/python/ops/test_data/test_oss_model.model" - - -class TokenizationBenchmark( - six.with_metaclass(benchmark.ParameterizedBenchmark, - benchmark_utils.OpsBaseBenchmark)): - """Benchmarks for tokenizers.""" - - def __init__(self): - if not FLAGS.run_eagerly: - ops.disable_eager_execution() - - self.use_tf_function = FLAGS.use_tf_function - self.load_input_data(FLAGS.batch_size) - - # Tokenizers to benchmark which do not require a special/extra input can be - # added here as parameters to "_benchmark_parameters". - # This method assumes the tokenizers given implement the Tokenizer class and - # will run benchmarks for the "tokenize" and "tokenize_with_offsets" methods. - - # The parameters for each tokenizers are: - # - The tokenizer name - # - The tokenizer class to instantiate - # - The kwargs used in instantiating and initialization of the tokenizer - _benchmark_parameters = [ - ("whitespace_tokenizer", text_ops.WhitespaceTokenizer), - ("unicode_script_tokenizer", text_ops.UnicodeScriptTokenizer), - ("unicode_char_tokenizer", text_ops.UnicodeCharTokenizer), - ("bert_tokenizer", text_ops.BertTokenizer, { - "vocab_lookup_table": _BERT_VOCAB_PATH, - "token_out_type": dtypes.int32, - "lower_case": False - }), - ("hub_module_tokenizer", text_ops.HubModuleTokenizer, { - "hub_module_handle": _HUB_MODULE_HANDLE - }), - ("basic_tokenizer", BasicTokenizer), - ] - - def benchmark(self, tokenizer, kwargs=None): - tokenizer = tokenizer(**(kwargs or {})) - op = tokenizer.tokenize_with_offsets if FLAGS.with_offsets else tokenizer.tokenize - - if FLAGS.ragged_vs_dense: - self.run_and_report_ragged_vs_dense( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing) - return - - self.run_and_report( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing) - - -class CustomInputTokenizationBenchmark(benchmark_utils.OpsBaseBenchmark): - """Benchmarks for tokenizers that require extra preprocessing or inputs.""" - - def __init__(self): - if not FLAGS.run_eagerly: - ops.disable_eager_execution() - - self.use_tf_function = FLAGS.use_tf_function - self.load_input_data(FLAGS.batch_size) - - def _create_table(self, vocab, num_oov=100): - init = lookup_ops.TextFileIdTableInitializer(vocab) - return lookup_ops.StaticVocabularyTableV1(init, num_oov) - - def _run(self, tokenizer, kwargs=None): - op = tokenizer.tokenize_with_offsets if FLAGS.with_offsets else tokenizer.tokenize - - if FLAGS.ragged_vs_dense: - self.run_and_report_ragged_vs_dense( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - - self.run_and_report( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - - def benchmark_wordpiece_tokenizer(self): - self.input_data = text_ops.WhitespaceTokenizer().tokenize(self.input_data) - - tokenizer = text_ops.WordpieceTokenizer( - vocab_lookup_table=self._create_table((_BERT_VOCAB_PATH)), - unknown_token=None, - token_out_type=dtypes.int64) - self._run(tokenizer) - - def benchmark_sentencepiece_tokenizer(self): - model = gfile.GFile((_SENTENCEPIECE_MODEL_FILE), "rb").read() - tokenizer = text_ops.SentencepieceTokenizer(model) - self._run(tokenizer) - # TODO(irinabejan): Add benchmark for detokenization - - def _get_char_level_splits(self): - """Get splits that match inputs char level.""" - char_tokenizer = text_ops.UnicodeCharTokenizer() - char_splits = array_ops.zeros_like(char_tokenizer.tokenize(self.input_data)) - - return char_splits - - def benchmark_split_merge_tokenizer(self): - if FLAGS.ragged_vs_dense: - return - - random_seed.set_seed(5) - - char_splits = self._get_char_level_splits() - if not context.executing_eagerly(): - # Evaluate splits as their shape cannot be infered in graph mode - # and are needed for mapping - with session.Session() as sess: - sess.run(self.iterator.initializer) - char_splits = sess.run(char_splits) - - def randomize_splits(inputs): - return random_ops.random_uniform( - inputs.shape, maxval=2, dtype=dtypes.int32) - - labels = ragged_functional_ops.map_flat_values(randomize_splits, - char_splits) - - if not context.executing_eagerly(): - # Evaluate labels computation to exclude these steps from op benchmarking - with session.Session() as sess: - labels = sess.run(labels) - - tokenizer = text_ops.SplitMergeTokenizer() - self._run(tokenizer, {"labels": labels}) - - def benchmark_split_merge_from_logits_tokenizer(self): - if FLAGS.ragged_vs_dense: - return - - random_seed.set_seed(5) - - char_splits = self._get_char_level_splits().to_tensor() - if not context.executing_eagerly(): - with session.Session() as sess: - sess.run(self.iterator.initializer) - char_splits = sess.run(char_splits) - - logits = random_ops.random_uniform( - char_splits.shape + (2,), minval=-6, maxval=6, dtype=dtypes.float32) - - if not context.executing_eagerly(): - # Evaluate logits computation to exclude these steps from op benchmarking - with session.Session() as sess: - logits = sess.run(logits) - - tokenizer = text_ops.SplitMergeFromLogitsTokenizer() - self._run(tokenizer, {"logits": logits}) - - -class RegexSplitOpsBenchmark(benchmark_utils.OpsBaseBenchmark): - """Benchmarks for regex split ops.""" - - def __init__(self): - if not FLAGS.run_eagerly: - ops.disable_eager_execution() - - self.use_tf_function = FLAGS.use_tf_function - self.load_input_data(FLAGS.batch_size) - - def benchmark_regex_split_ops(self): - op = text_ops.regex_split_with_offsets if FLAGS.with_offsets else text_ops.regex_split - kwargs = {"delim_regex_pattern": r"[\p{S}|\p{P}]+|\s"} - - self.run_and_report( - op, - FLAGS.run_iters, - FLAGS.burn_iters, - xprof_enabled=FLAGS.xprof_tracing, - **(kwargs or {})) - - -if __name__ == "__main__": - app.run(test.main())
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/__init__.py deleted file mode 100644 index 09d31e1..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/__init__.py +++ /dev/null
@@ -1,28 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tensorflow Text Layers for Keras API.""" - -from tensorflow.python.util.all_util import remove_undocumented - -# pylint: disable=wildcard-import -from tensorflow_text.python.keras.layers import * - -# Public symbols in the "tensorflow_text.layers" package. -_allowed_symbols = [ - "layers", -] - -remove_undocumented(__name__, _allowed_symbols)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/__init__.py deleted file mode 100644 index fa0ae8f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/__init__.py +++ /dev/null
@@ -1,28 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tensorflow Text layers for Keras API.""" - -from tensorflow.python.util.all_util import remove_undocumented - -# pylint: disable=wildcard-import -from tensorflow_text.python.keras.layers.todense import * - -# Public symbols in the "tensorflow_text.layers" package. -_allowed_symbols = [ - "ToDense", -] - -remove_undocumented(__name__, _allowed_symbols)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense.py deleted file mode 100644 index 198c372..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense.py +++ /dev/null
@@ -1,111 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""ToDense layer implementation to support composite tensors in keras models. - -Implements the ToDense Keras layer that's to be used in feeding composite -tensors to recurrent layers or embeddings. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -class ToDense(tf.keras.layers.Layer): # pylint: disable=g-classes-have-attributes - """Layer that makes padding and masking a Composite Tensors effortless. - - The layer takes a RaggedTensor or a SparseTensor and converts it to a uniform - tensor by right-padding it or filling in missing values. - - Example: - - ```python - x = tf.keras.layers.Input(shape=(None, None), ragged=True) - y = tf_text.keras.layers.ToDense(mask=True)(x) - model = tf.keras.Model(x, y) - - rt = tf.RaggedTensor.from_nested_row_splits( - flat_values=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19], - nested_row_splits=([0, 1, 1, 5], [0, 3, 3, 5, 9, 10])) - model.predict(rt) - - [[[10, 11, 12, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0]], - [[ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0], [ 0, 0, 0, 0]], - [[ 0, 0, 0, 0], [13, 14, 0, 0], [15, 16, 17, 18], [19, 0, 0, 0]]] - ``` - - Args: - pad_value: A value used to pad and fill in the missing values. Should be a - meaningless value for the input data. Default is '0'. - mask: A Boolean value representing whether to mask the padded values. If - true, no any downstream Masking layer or Embedding layer with - mask_zero=True should be added. Default is 'False'. - shape: If not `None`, the resulting dense tensor will be guaranteed to have - this shape. For RaggedTensor inputs, this is passed to `tf.RaggedTensor`'s - `to_tensor` method. For other tensor types, a `tf.ensure_shape` call is - added to assert that the output has this shape. - **kwargs: kwargs of parent class. - Input shape: Any Ragged or Sparse Tensor is accepted, but it requires the type - of input to be specified via the Input or InputLayer from the Keras API. - Output shape: The output is a uniform tensor having the same shape, in case of - a ragged input or the same dense shape, in case of a sparse input. - """ - - def __init__(self, pad_value=0, mask=False, shape=None, **kwargs): - super(ToDense, self).__init__(**kwargs) - - self._pad_value = pad_value - self._mask = mask - self._shape = shape - self._compute_output_and_mask_jointly = True - self._supports_ragged_inputs = True - self.trainable = False - self.masking_layer = tf.keras.layers.Masking(mask_value=self._pad_value) - - def call(self, inputs): - if isinstance(inputs, tf.RaggedTensor): - # Convert the ragged tensor to a padded uniform tensor - outputs = inputs.to_tensor( - default_value=self._pad_value, shape=self._shape) - elif isinstance(inputs, tf.sparse.SparseTensor): - # Fill in the missing value in the sparse_tensor - outputs = tf.sparse.to_dense(inputs, default_value=self._pad_value) - if self._shape is not None: - outputs = tf.ensure_shape(outputs, shape=self._shape) - elif isinstance(inputs, tf.Tensor): - outputs = inputs - if self._shape is not None: - outputs = tf.ensure_shape(outputs, shape=self._shape) - else: - raise TypeError('Unexpected tensor type %s' % type(inputs).__name__) - - if self._mask: - outputs = self.masking_layer(outputs) - - return outputs - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'pad_value': self._pad_value, - 'mask': self._mask, - 'shape': self._shape, - } - base_config = super(ToDense, self).get_config() - return dict(list(base_config.items()) + list(config.items()))
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense_test.py deleted file mode 100644 index a0b73cb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/todense_test.py +++ /dev/null
@@ -1,245 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ToDense Keras layer.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -from keras import keras_parameterized -from keras import testing_utils -import numpy as np -import tensorflow as tf - -from tensorflow.python.framework import test_util -from tensorflow_text.python.keras.layers.todense import ToDense - - -class Final(tf.keras.layers.Layer): - """This is a helper layer that can be used as the last layer in a network for testing purposes.""" - - def call(self, inputs): - return tf.dtypes.cast(inputs, tf.dtypes.float32) - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - base_config = super(Final, self).get_config() - return dict(list(base_config.items())) - - -def get_input_dataset(in_data, out_data=None): - batch_size = in_data.shape[0] - if out_data is None: - return tf.data.Dataset.from_tensor_slices(in_data).batch( - batch_size) - - return tf.data.Dataset.from_tensor_slices( - (in_data, out_data)).batch(batch_size) - - -@keras_parameterized.run_with_all_model_types -@keras_parameterized.run_all_keras_modes -class RaggedTensorsToDenseLayerTest(keras_parameterized.TestCase): - - def SKIP_test_ragged_input_default_padding(self): - input_data = get_input_dataset( - tf.ragged.constant([[1, 2, 3, 4, 5], [2, 3]])) - expected_output = np.array([[1, 2, 3, 4, 5], [2, 3, 0, 0, 0]]) - - layers = [ToDense(), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_ragged=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - def SKIP_test_ragged_input_with_padding(self): - input_data = get_input_dataset( - tf.ragged.constant([[[1, 2, 3, 4, 5]], [[2], [3]]])) - expected_output = np.array([[[1., 2., 3., 4., 5.], - [-1., -1., -1., -1., -1.]], - [[2., -1., -1., -1., -1.], - [3., -1., -1., -1., -1.]]]) - - layers = [ToDense(pad_value=-1), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None, None), - input_ragged=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - def test_ragged_input_pad_and_mask(self): - input_data = tf.ragged.constant([[1, 2, 3, 4, 5], []]) - expected_mask = np.array([True, False]) - - output = ToDense(pad_value=-1, mask=True)(input_data) - self.assertTrue(hasattr(output, "_keras_mask")) - self.assertIsNot(output._keras_mask, None) - self.assertAllEqual( - tf.keras.backend.get_value(output._keras_mask), expected_mask) - - def test_ragged_input_shape(self): - input_data = get_input_dataset( - tf.ragged.constant([[1, 2, 3, 4, 5], [2, 3]])) - expected_output = np.array([[1, 2, 3, 4, 5, 0, 0], [2, 3, 0, 0, 0, 0, 0]]) - - layers = [ToDense(shape=[2, 7]), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_ragged=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - @parameterized.named_parameters( - *test_util.generate_combinations_with_testcase_name(layer=[ - tf.keras.layers.SimpleRNN, tf.compat.v1.keras.layers.GRU, - tf.compat.v1.keras.layers.LSTM, tf.keras.layers.GRU, - tf.keras.layers.LSTM - ])) - def SKIP_test_ragged_input_RNN_layer(self, layer): - input_data = get_input_dataset( - tf.ragged.constant([[1, 2, 3, 4, 5], [5, 6]])) - - layers = [ - ToDense(pad_value=7, mask=True), - tf.keras.layers.Embedding(8, 16), - layer(16), - tf.keras.layers.Dense(3, activation="softmax"), - tf.keras.layers.Dense(1, activation="sigmoid") - ] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_ragged=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="rmsprop", - loss="binary_crossentropy", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - - output = model.predict(input_data) - self.assertAllEqual(np.zeros((2, 1)).shape, output.shape) - - -@keras_parameterized.run_with_all_model_types -@keras_parameterized.run_all_keras_modes -class SparseTensorsToDenseLayerTest(keras_parameterized.TestCase): - - def SKIP_test_sparse_input_default_padding(self): - input_data = get_input_dataset( - tf.sparse.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])) - - expected_output = np.array([[1., 0., 0., 0.], [0., 0., 2., 0.], - [0., 0., 0., 0.]]) - - layers = [ToDense(), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_sparse=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - def SKIP_test_sparse_input_with_padding(self): - input_data = get_input_dataset( - tf.sparse.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])) - - expected_output = np.array([[1., -1., -1., -1.], [-1., -1., 2., -1.], - [-1., -1., -1., -1.]]) - - layers = [ToDense(pad_value=-1, trainable=False), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_sparse=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - def test_sparse_input_pad_and_mask(self): - input_data = tf.sparse.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]) - - expected_mask = np.array([True, True, False]) - - output = ToDense(pad_value=-1, mask=True)(input_data) - self.assertTrue(hasattr(output, "_keras_mask")) - self.assertIsNot(output._keras_mask, None) - self.assertAllEqual( - tf.keras.backend.get_value(output._keras_mask), expected_mask) - - def test_sparse_input_shape(self): - input_data = get_input_dataset( - tf.sparse.SparseTensor( - indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])) - - expected_output = np.array([[1., 0., 0., 0.], [0., 0., 2., 0.], - [0., 0., 0., 0.]]) - - layers = [ToDense(shape=[3, 4]), Final()] - model = testing_utils.get_model_from_layers( - layers, - input_shape=(None,), - input_sparse=True, - input_dtype=tf.dtypes.int32) - model.compile( - optimizer="sgd", - loss="mse", - metrics=["accuracy"], - run_eagerly=testing_utils.should_run_eagerly()) - output = model.predict(input_data) - self.assertAllEqual(output, expected_output) - - -if __name__ == "__main__": - tf.test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers.py deleted file mode 100644 index d98660aa..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers.py +++ /dev/null
@@ -1,322 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""keras tokenization layers.""" - -import os - -import tensorflow as tf - -from tensorflow.python.keras.utils import tf_utils -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops.ragged import ragged_conversion_ops -from tensorflow_text.python.ops import unicode_script_tokenizer -from tensorflow_text.python.ops import whitespace_tokenizer -from tensorflow_text.python.ops import wordpiece_tokenizer - - -class TokenizerBase(tf.keras.layers.Layer): - """Abstract Layer for tensorflow_text tokenizers. - - Input shape: - N-D (2D as default) tensor with shape: `(batch_size, input_length)`. - - Output shape: - (N+1)-D (3D as default) tensor with shape: - `(batch_size, input_length, token_dim)`. - """ - - def __init__(self, tokenizer_instance, pad_value, squeeze_token_dim, - **kwargs): - if kwargs.get('dtype') is not None and kwargs.get('dtype') != 'string': - raise ValueError('The only valid dtype for %s is string, but got: %s' % - (self.__class__.__name__, kwargs.get('dtype'))) - kwargs['dtype'] = 'string' - super(TokenizerBase, self).__init__(**kwargs) - - self._tokenizer = tokenizer_instance - self._pad_value = pad_value - self._squeeze_token_dim = squeeze_token_dim - - def build(self, input_shape): - # We have to use 'and not ==' here, because input_shape[1] !/== 1 can result - # in None for undefined shape axes. If using 'and !=', this causes the - # expression to evaluate to False instead of True if the shape is undefined; - # the expression needs to evaluate to True in that case. - if ((self._squeeze_token_dim) and (input_shape.ndims > 1) and - (not input_shape[1] == 1)): # pylint: disable=g-comparison-negation - raise RuntimeError( - '`squeeze_token_dim` should be set to False if you are calling this ' - 'layer on a Tensor with inner dimension not equal to 1 (got ' - 'input_shape: %s).' % input_shape) - super(TokenizerBase, self).build(input_shape) - - def _set_tokenizer(self, tokenizer): - self._tokenizer = tokenizer - - def call(self, text_to_be_tokenized): - if self._squeeze_token_dim and (text_to_be_tokenized.shape.ndims > 1): - text_to_be_tokenized = tf.compat.v1.squeeze(text_to_be_tokenized, axis=1) - text = self._tokenizer.tokenize(text_to_be_tokenized) - if self._pad_value is not None: - text = ragged_conversion_ops.to_tensor( - text, default_value=self._pad_value) - return text - - @tf_utils.shape_type_conversion - def compute_output_shape(self, input_shape): - """Computes output shape for the layer. - - Args: - input_shape: Shape tuple (tuple of integers) or list of shape tuples (one - per output tensor of the layer). Shape tuples can include None for free - dimensions, instead of an integer. - - Returns: - Computed output shape(s). - """ - input_shape = tf.TensorShape(input_shape).as_list() - shape = [dim for dim in input_shape] - # because the output of tokenization is ragged, the added dimension should - # be set as None - shape.append(None) - return tf.TensorShape(shape) - - def compute_output_signature(self, input_signature): - """Compute the output tensor signature of the layer based on the inputs. - - Args: - input_signature: Single TensorSpec or nested structure of TensorSpec - objects, describing a candidate input for the layer. - - Returns: - Single TensorSpec or nested structure of TensorSpec objects, describing - how the layer would transform the provided input. - - Raises: - TypeError: If input_signature contains a non-TensorSpec object. - """ - - def CheckAndReturnShape(s): - if not isinstance(s, tf.TensorSpec): - raise TypeError('Only TensorSpec signature types are supported, ' - 'but saw signature signature entry: {}.'.format(s)) - return s.shape - - input_shape = CheckAndReturnShape(input_signature) - output_shape = self.compute_output_shape(input_shape) - return tf.TensorSpec(dtype=tf.string, shape=output_shape) - - def get_config(self): - config = { - 'pad_value': self._pad_value, - 'squeeze_token_dim': self._squeeze_token_dim - } - base_config = super(TokenizerBase, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@tf.keras.utils.register_keras_serializable(package='Text') -class UnicodeScriptTokenizer(TokenizerBase): - """Unicode script tokenization layer. - - Splits a string when successive tokens change their Unicode script or change - being whitespace or not. By not keeping the whitespace tokens, this allows - you to split on whitespace, and also to split out tokens from different - scripts (so, for instance, a string with both Latin and Japanese characters - would be split at the boundary of the Latin and Japanese characters in - addition to any whitespace boundaries). - - Attributes: - keep_whitespace: A boolean that specifices whether to emit whitespace - tokens (default `False`). - pad_value: if not None, performs the padding (using pad_value) at the - inner-most dimension (i.e. token dimension) and outputs a padded dense - tensor (default=None). - squeeze_token_dim: Whether to squeeze the dimension added by tokenization. - When this arg is set to False, the output will have an additional inner - dimension added, containing the tokens in each string; when this arg is - True, the layer will attempt to squeeze that dimension out. If you are - passing one string per batch, you probably want to keep this as True; if - you are passing more than one string per batch or are using this layer in - a context like the Keras `TextVectorization` layer which expects a - tf.strings.split()-stype output, this should be False. Defaults to True. - """ - - def __init__(self, - keep_whitespace=False, - pad_value=None, - squeeze_token_dim=True, - **kwargs): - tokenizer_fn = unicode_script_tokenizer.UnicodeScriptTokenizer( - keep_whitespace=keep_whitespace) - super(UnicodeScriptTokenizer, self).__init__( - tokenizer_instance=tokenizer_fn, - squeeze_token_dim=squeeze_token_dim, - pad_value=pad_value, - **kwargs) - - -@tf.keras.utils.register_keras_serializable(package='Text') -class WhitespaceTokenizer(TokenizerBase): - """Whitespace tokenization layer. - - Splits a string into substrings at ICU whitespace boundaries. - - Attributes: - pad_value: if not None, performs the padding (using pad_value) at the - inner-most dimension (i.e. token dimension) and outputs a padded dense - tensor (default=None). - squeeze_token_dim: Whether to squeeze the dimension added by tokenization. - When this arg is set to False, the output will have an additional inner - dimension added, containing the tokens in each string; when this arg is - True, the layer will attempt to squeeze that dimension out. If you are - passing one string per batch, you probably want to keep this as True; if - you are passing more than one string per batch or are using this layer in - a context like the Keras `TextVectorization` layer which expects a - tf.strings.split()-stype output, this should be False. Defaults to True. - """ - - def __init__(self, pad_value=None, squeeze_token_dim=True, **kwargs): - tokenizer_fn = whitespace_tokenizer.WhitespaceTokenizer() - super(WhitespaceTokenizer, self).__init__( - tokenizer_instance=tokenizer_fn, - squeeze_token_dim=squeeze_token_dim, - pad_value=pad_value, - **kwargs) - - -@tf.keras.utils.register_keras_serializable(package='Text') -class WordpieceTokenizer(TokenizerBase): - """Splits an already-tokenized tensor of tokens further into WordPiece tokens. - - Splits a set of string tokens into subwords as described in - https://arxiv.org/pdf/1609.08144.pdf. This layer does not build the WordPiece - vocabulary; instead, users should set the vocabulary by either passing it to - the init call or by calling set_vocabulary() after the layer is constructed. - - Attributes: - vocabulary: An optional list of vocabulary terms, or a path to a text file - containing a vocabulary to load into this layer. The file should contain - one token per line. If the list or file contains the same token multiple - times, an error will be thrown. - suffix_indicator: (optional) The characters prepended to a wordpiece to - indicate that it is a suffix to another subword. Default is '##'. - max_bytes_per_word: (optional) Max size of input token. Default is 100. - token_out_type: (optional) The type of the token to return. This can be - `tf.int64` IDs, or `tf.string` subwords. The default is `tf.int64`. - unknown_token: (optional) The string value to substitute for an unknown - token. Default is "[UNK]". If set to `None`, no substitution occurs. - If `token_out_type` is `tf.int64`, the `vocabulary` is used (after - substitution) to convert the unknown token to an integer, resulting in -1 - if `unknown_token` is set to `None` or not contained in the `vocabulary`. - pad_value: if not None, performs the padding (using pad_value) at the - inner-most dimension (i.e. token dimension) and outputs a padded dense - tensor (default=None). - merge_wordpiece_dim: If False, this layer will output a RaggedTensor - with an additional inner 'wordpiece' dimension, containing the wordpieces - for each token. If set to True, this layer will concatenate and squeeze - along that dimension. Defaults to True. - """ - - def __init__(self, - vocabulary=None, - suffix_indicator='##', - max_bytes_per_word=100, - token_out_type=tf.string, - unknown_token='[UNK]', - pad_value=None, - merge_wordpiece_dim=True, - **kwargs): - self._suffix_indicator = suffix_indicator - self._max_bytes_per_word = max_bytes_per_word - self._token_out_type = tf.dtypes.as_dtype(token_out_type) - self._unknown_token = unknown_token - self._merge_wordpiece_dim = merge_wordpiece_dim - - self._table = lookup_ops.MutableHashTable( - key_dtype=tf.string, value_dtype=tf.int64, default_value=-1) - - tokenizer_instance = wordpiece_tokenizer.WordpieceTokenizer( - vocab_lookup_table=self._table, - suffix_indicator=self._suffix_indicator, - max_bytes_per_word=self._max_bytes_per_word, - token_out_type=self._token_out_type, - unknown_token=self._unknown_token) - - super(WordpieceTokenizer, self).__init__( - tokenizer_instance=tokenizer_instance, - squeeze_token_dim=False, - pad_value=pad_value, - **kwargs) - - # We need to add the trackable after the superclass was called, since - # it adds the table to a list that is created there. - tracked_table = self._add_trackable(self._table, trainable=False) - # This is a workaround for summary() on this layer. Because the table is - # not mutable during training, the effective number of parameters (and so - # the weight shape) is 0; we add this as an attr so that the parameter - # counting code in the Model object doesn't throw an attribute error. - tracked_table.shape = tf.TensorShape((0,)) - - if vocabulary is not None: - self.set_vocabulary(vocabulary) - - def set_vocabulary(self, vocab): - if isinstance(vocab, (str, bytes)): - vocab = _GetVocabularyFromFile(vocab) - keys = tf.convert_to_tensor(vocab, dtype=tf.string) - values = tf.range(len(vocab), dtype=tf.int64) - op = self._table.insert(keys, values) - if not tf.executing_eagerly(): - tf.compat.v1.get_default_session().run(op) - - def get_config(self): - config = { - 'suffix_indicator': self._suffix_indicator, - 'max_bytes_per_word': self._max_bytes_per_word, - 'token_out_type': self._token_out_type.name, - 'unknown_token': self._unknown_token, - 'vocabulary': None, - } - base_config = super(WordpieceTokenizer, self).get_config() - del base_config['squeeze_token_dim'] - return dict(list(base_config.items()) + list(config.items())) - - def call(self, inputs): - wordpiece_tensor = super(WordpieceTokenizer, self).call(inputs) - if self._merge_wordpiece_dim: - wordpiece_tensor = tf.concat(wordpiece_tensor, -1) - return wordpiece_tensor - - -def _GetVocabularyFromFile(vocabulary_path): - """Read a vocabulary in from a file.""" - vocab = [] - with tf.io.gfile.GFile(vocabulary_path, 'r') as reader: - while True: - # Get the next line (incl. \n), and break if nothing is left to read. - text = reader.readline() - if not text: - break - - # Convert the raw text and strip whitespace. - if isinstance(text, str): - token = text - elif isinstance(text, bytes): - token = text.decode('utf-8', 'ignore') - token = token.rstrip(os.linesep) - vocab.append(token) - return vocab
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers_test.py deleted file mode 100644 index 80da3531..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/keras/layers/tokenization_layers_test.py +++ /dev/null
@@ -1,589 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for Keras tokenization_layers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -from keras import keras_parameterized -from keras import testing_utils -import numpy as np -import tensorflow as tf - -from tensorflow_text.python.keras.layers import tokenization_layers - - -@keras_parameterized.run_all_keras_modes -class TokenizationLayersTest(keras_parameterized.TestCase, tf.test.TestCase): - - @parameterized.named_parameters( - { - 'cls': tokenization_layers.UnicodeScriptTokenizer, - 'input_shape': (1,), - 'input_data': [b'I love Flume!'], - 'expected': [[b'I', b'love', b'Flume', b'!']], - 'kwargs': { - 'squeeze_token_dim': False, - }, - 'testcase_name': 'unicode_layer', - }, - { - 'cls': tokenization_layers.WhitespaceTokenizer, - 'input_shape': (1,), - 'input_data': [b'I love Flume!'], - 'expected': [[b'I', b'love', b'Flume!']], - # TODO(raw-pointer): layer test will fail if squeeze_token_dim option - # is disabled. Not sure if it is layer_test limitaitons or the layer - # itself. Fix it when layer_test is updated. - 'kwargs': { - 'squeeze_token_dim': False, - }, - 'testcase_name': 'whitespace_layer', - }, - { - 'cls': - tokenization_layers.WordpieceTokenizer, - 'input_shape': ( - 1, - None, - ), - 'kwargs': { - 'vocabulary': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'merge_wordpiece_dim': False - }, - 'input_data': - np.array([[b"don't", b'treadness', b'whatchamacallit?']]), - 'expected': [[[b'don', b"##'", b'##t'], [b'tread', b'##ness'], - [b'what', b'##cha', b'##ma', b'##call', b'##it?']]], - 'testcase_name': - 'wordpiece_layer', - }) - def test_tokenizer_layer_sequential(self, - cls, - input_shape, - input_data=None, - expected=None, - kwargs=None): - # TODO(raw-pointer): was there meant to be a wordpiece test that tests the layers - # on an empty zero-value tensor? I think Keras doesn't support that in TF2. - # Or was it meant to test on an empty string? - - if not tf.executing_eagerly(): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - output_data = testing_utils.layer_test( - cls, - kwargs=kwargs, - validate_training=False, - input_shape=input_shape, - input_dtype='string', - input_data=input_data, - test_harness=self, - ) - self.assertAllEqual(output_data, expected) - - @parameterized.named_parameters( - { - 'batch_input_shape': (None,), - 'batch_output_shape': (None, None), - 'testcase_name': 'basic_test', - }, { - 'batch_input_shape': (2, 3), - 'batch_output_shape': (2, 3, None), - 'testcase_name': 'multi_dimensional', - }) - def test_compute_output_signature(self, - batch_input_shape, - batch_output_shape): - layer = tokenization_layers.WhitespaceTokenizer() - self.assertEqual( - layer.compute_output_signature( - tf.TensorSpec(batch_input_shape, tf.string)), - tf.TensorSpec(batch_output_shape, tf.string)) - - @parameterized.named_parameters( - { - 'input_data': [b'I love Flume!'], - 'expected_output': [[b'I', b'love', b'Flume!']], - 'input_shape': (1,), - 'testcase_name': 'basic_test', - }, { - 'input_data': [[b'I love Flume!'], [b'Good day']], - 'expected_output': [[b'I', b'love', b'Flume!'], [b'Good', b'day']], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2', - }, { - 'input_data': [[b' '], [b' ']], - 'expected_output': [[], []], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2_all_whitespace', - }, { - 'input_data': np.array([['I love Flume!', 'Good day']]), - 'expected_output': [[[b'I', b'love', b'Flume!'], [b'Good', b'day']]], - 'input_shape': (None,), - 'squeeze_token_dim': False, - 'testcase_name': 'multi_dimensional', - }, { - 'input_data': np.array([[b'I love Flume!', b'Good day']]), - 'expected_output': [[[b'I', b'love', b'Flume!'], - [b'Good', b'day', b'[PAD]']]], - 'pad_value': b'[PAD]', - 'input_shape': (None,), - 'squeeze_token_dim': False, - 'testcase_name': 'multi_dim_with_padding', - }) - def test_whitespace_tokenization_layer(self, - input_data, - expected_output, - input_shape=(None,), - pad_value=None, - squeeze_token_dim=True): - if (not tf.executing_eagerly() and - not isinstance(input_data, np.ndarray)): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer = tokenization_layers.WhitespaceTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - o = layer(i) - model = tf.keras.models.Model(i, o) - self.assertAllEqual(model.predict(input_data), expected_output) - - @parameterized.named_parameters( - { - 'input_data': [[b'I love Flume!']], - 'expected_output': [[b'I', b'love', b'Flume', b'!']], - 'input_shape': (1,), - 'testcase_name': 'basic_test', - }, { - 'input_data': [[b'I love Flume!'], [b'Good day']], - 'expected_output': [[b'I', b'love', b'Flume', b'!'], - [b'Good', b'day']], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2', - }, { - 'input_data': [[b' '], [b' ']], - 'expected_output': [[], []], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2_all_whitespace', - }, { - 'input_data': np.array([[b'I love Flume!', b'Good day']]), - 'expected_output': [[[b'I', b'love', b'Flume', b'!'], - [b'Good', b'day']]], - 'squeeze_token_dim': False, - 'testcase_name': 'multi_dimensional', - }, { - 'input_data': np.array([[b'I love Flume!', b'Good day']]), - 'expected_output': [[[b'I', b'love', b'Flume', b'!'], - [b'Good', b'day', b'[PAD]', b'[PAD]']]], - 'pad_value': b'[PAD]', - 'input_shape': (None,), - 'squeeze_token_dim': False, - 'testcase_name': 'multi_dim_with_padding', - }) - def test_unicode_tokenization_layer(self, - input_data, - expected_output, - input_shape=(None,), - pad_value=None, - squeeze_token_dim=True): - if (not tf.executing_eagerly() and - not isinstance(input_data, np.ndarray)): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer = tokenization_layers.UnicodeScriptTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - o = layer(i) - model = tf.keras.models.Model(i, o) - self.assertAllEqual(model.predict(input_data), expected_output) - - def test_unicode_tokenization_in_text_vec(self): - input_data = [[b'I love Flume!']] - expected_output = [[b'i', b'love', b'flume']] - if (not tf.executing_eagerly() and - not isinstance(input_data, np.ndarray)): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=(1,), dtype=tf.string) - splitter = tokenization_layers.UnicodeScriptTokenizer() - layer = tf.keras.layers.experimental.preprocessing.TextVectorization( - split=splitter, output_mode=None) - o = layer(i) - model = tf.keras.models.Model(i, o) - - # evaluate the model - self.assertAllEqual(model.predict(input_data), expected_output) - - @parameterized.named_parameters( - { - 'input_data': [[b' ']], - 'expected_output': [[[b'[UNK]']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'input_shape': (1,), - 'merge_wordpiece_dim': False, - 'testcase_name': 'basic_test_whitespace', - }, - { - 'input_data': [[b"don't"]], - 'expected_output': [[[b'don', b"##'", b'##t']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'input_shape': (1,), - 'merge_wordpiece_dim': False, - 'testcase_name': 'basic_test', - }, - { - 'input_data': - np.array([[b"don't", b'treadness', b'whatchamacallit?']]), - 'expected_output': [[[b'don', b"##'", b'##t'], [ - b'tread', b'##ness' - ], [b'what', b'##cha', b'##ma', b'##call', b'##it?']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'input_shape': (3,), - 'merge_wordpiece_dim': - False, - 'testcase_name': - 'multi_dimensional', - }, - { - 'input_data': - np.array([[b"don't", b'treadness', b'whatchamacallit?']]), - 'expected_output': [[[ - b'don', b"##'", b'##t', b'[PAD]', b'[PAD]' - ], [b'tread', b'##ness', b'[PAD]', b'[PAD]', b'[PAD]' - ], [b'what', b'##cha', b'##ma', b'##call', b'##it?']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'pad_value': - b'[PAD]', - 'input_shape': (3,), - 'merge_wordpiece_dim': - False, - 'testcase_name': - 'multi_dim_with_padding', - }, - ) - def test_wordpiece_tokenization_layer(self, - input_data, - expected_output, - vocab_list, - input_shape=(None,), - pad_value=None, - merge_wordpiece_dim=False): - if (not tf.executing_eagerly() and - not isinstance(input_data, np.ndarray)): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer = tokenization_layers.WordpieceTokenizer( - vocabulary=vocab_list, - pad_value=pad_value, - merge_wordpiece_dim=merge_wordpiece_dim) - o = layer(i) - model = tf.keras.models.Model(i, o) - self.assertAllEqual(model.predict(input_data), expected_output) - - @parameterized.named_parameters({ - 'input_data': [[b'I love Flume!'], [b'Good day']], - 'expected_output': [[b'I', b'love', b'Flume!'], [b'Good', b'day']], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2', - }) - def test_whitespace_tokenization_multi_layer(self, - input_data, - expected_output, - input_shape=(None,), - pad_value=None, - squeeze_token_dim=True): - if not tf.executing_eagerly(): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer1 = tokenization_layers.WhitespaceTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - z = layer1(i) - layer2 = tokenization_layers.WhitespaceTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - o = layer2(i) - model = tf.keras.models.Model(i, [z, o]) - out1, out2 = model.predict(input_data) - self.assertAllEqual(out1, expected_output) - self.assertAllEqual(out2, expected_output) - - @parameterized.named_parameters({ - 'input_data': [[b'I love Flume!'], [b'Good day']], - 'expected_output': [[b'I', b'love', b'Flume', b'!'], [b'Good', b'day']], - 'input_shape': (1,), - 'testcase_name': 'batch_of_2', - }) - def test_unicode_tokenization_multi_layer(self, - input_data, - expected_output, - input_shape=(None,), - pad_value=None, - squeeze_token_dim=True): - if not tf.executing_eagerly(): - # In TF1 list-of-lists-of-scalars need to be wrapped in an extra list - # for single-io models, because it tries to disambiguate which - # input to send an input to (which causes issues w/ single io models) - input_data = [input_data] - - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer1 = tokenization_layers.UnicodeScriptTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - z = layer1(i) - layer2 = tokenization_layers.UnicodeScriptTokenizer( - pad_value=pad_value, squeeze_token_dim=squeeze_token_dim) - o = layer2(i) - model = tf.keras.models.Model(i, [z, o]) - out1, out2 = model.predict(input_data) - self.assertAllEqual(out1, expected_output) - self.assertAllEqual(out2, expected_output) - - @parameterized.named_parameters( - { - 'input_data': - np.array([[b"don't", b'treadness', b'whatchamacallit?']]), - 'expected_output': [[[b'don', b"##'", b'##t'], [ - b'tread', b'##ness' - ], [b'what', b'##cha', b'##ma', b'##call', b'##it?']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'input_shape': (3,), - 'merge_wordpiece_dim': - False, - 'testcase_name': - 'multi_dimensional', - }, - { - 'input_data': - np.array([[b"don't", b'treadness', b'whatchamacallit?']]), - 'expected_output': [[[b'don', b"##'", b'##t'], [ - b'tread', b'##ness' - ], [b'what', b'##cha', b'##ma', b'##call', b'##it?']]], - 'vocab_list': [ - b'don', - b"##'", - b'##t', - b'tread', - b'##ness', - b'hel', - b'##lo', - b'there', - b'my', - b'na', - b'##me', - b'is', - b'ter', - b'##ry', - b'what', - b'##cha', - b'##ma', - b'##call', - b'##it?', - b'you', - b'said', - ], - 'input_shape': (3,), - 'merge_wordpiece_dim': - True, - 'testcase_name': - 'merge', - }, - ) - def test_wordpiece_tokenization_multi_layer(self, - input_data, - expected_output, - vocab_list, - input_shape=(None,), - pad_value=None, - merge_wordpiece_dim=False): - # create a functional API model - i = tf.keras.layers.Input(shape=input_shape, dtype=tf.string) - layer1 = tokenization_layers.WordpieceTokenizer( - vocabulary=vocab_list, - pad_value=pad_value, - merge_wordpiece_dim=merge_wordpiece_dim) - o1 = layer1(i) - layer2 = tokenization_layers.WordpieceTokenizer( - vocabulary=vocab_list, - pad_value=pad_value, - merge_wordpiece_dim=merge_wordpiece_dim) - o2 = layer2(i) - model = tf.keras.models.Model(i, [o1, o2]) - - out1, out2 = model.predict(input_data) - self.assertAllEqual(out1, expected_output) - self.assertAllEqual(out2, expected_output) - -if __name__ == '__main__': - tf.test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/metrics/__init__.py deleted file mode 100644 index ff098a7f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/__init__.py +++ /dev/null
@@ -1,27 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tensorflow text-processing metrics.""" -from tensorflow.python.util.all_util import remove_undocumented - -# pylint: disable=wildcard-import -from tensorflow_text.python.metrics.text_similarity_metric_ops import * - -# Public symbols in the "tensorflow_text.metrics" package. -_allowed_symbols = [ - "rouge_l", -] - -remove_undocumented(__name__, _allowed_symbols)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops.py deleted file mode 100644 index 02968e81..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops.py +++ /dev/null
@@ -1,92 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops to compute similarity metrics between texts.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import ops -from tensorflow.python.ops.ragged import ragged_tensor - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_text_similarity_metric_ops = load_library.load_op_library(resource_loader.get_path_to_datafile('_text_similarity_metric_ops.so')) - - -def rouge_l(hypotheses, references, alpha=None): - """Computes LCS-based similarity score between the hypotheses and references. - - The Rouge-L metric is a score from 0 to 1 indicating how similar two sequences - are, based on the length of the longest common subsequence (LCS). In - particular, Rouge-L is the weighted harmonic mean (or f-measure) combining - the LCS precision (the percentage of the hypothesis sequence covered by the - LCS) and the LCS recall (the percentage of the reference sequence covered by - the LCS). - - Source: https://www.microsoft.com/en-us/research/publication/ - rouge-a-package-for-automatic-evaluation-of-summaries/ - - This method returns the F-measure, Precision, and Recall for each - (hypothesis, reference) pair. - - Alpha is used as a weight for the harmonic mean of precision and recall. A - value of 0 means recall is more important and 1 means precision is - more important. Leaving alpha unset implies alpha=.5, which is the default in - the official ROUGE-1.5.5.pl script. Setting alpha to a negative number - triggers a compatibility mode with the tensor2tensor implementation of - ROUGE-L. - - >>> hypotheses = tf.ragged.constant([["a","b"]]) - >>> references = tf.ragged.constant([["b"]]) - >>> f, p, r = rouge_l(hypotheses, references, alpha=1) - >>> print("f: %s, p: %s, r: %s" % (f, p, r)) - f: tf.Tensor([0.5], shape=(1,), dtype=float32), - p: tf.Tensor([0.5], shape=(1,), dtype=float32), - r: tf.Tensor([1.], shape=(1,), dtype=float32) - - Args: - hypotheses: A RaggedTensor with shape [N, (hyp_sentence_len)] and integer or - string values. - references: A RaggedTensor with shape [N, (ref_sentence_len)] and integer or - string values. - alpha: optional float parameter for weighting - - Returns: - an (f_measure, p_measure, r_measure) tuple, where each element is a - vector of floats with shape [N]. The i-th float in each vector contains - the similarity measure of hypotheses[i] and references[i]. - """ - if not isinstance(hypotheses, ragged_tensor.RaggedTensor): - raise ValueError('hypotheses must be a RaggedTensor') - if not isinstance(references, ragged_tensor.RaggedTensor): - raise ValueError('references must be a RaggedTensor') - if hypotheses.ragged_rank != 1: - raise ValueError('hypotheses.ragged_rank must be 1') - if references.ragged_rank != 1: - raise ValueError('references.ragged_rank must be 1') - if alpha is None: - alpha = .5 - if isinstance(alpha, (float, int)) and alpha > 1: - raise ValueError('alpha cannot be greater than 1') - with ops.name_scope(None, 'RougeL', [hypotheses, references]): - return gen_text_similarity_metric_ops.rouge_l( - hypotheses.values, - hypotheses.row_splits, - references.values, - references.row_splits, - alpha)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops_test.py deleted file mode 100644 index 68d93a8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/metrics/text_similarity_metric_ops_test.py +++ /dev/null
@@ -1,361 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -"""Tests for text_similarity_metric_ops op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -from absl.testing import parameterized -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.metrics import text_similarity_metric_ops - - -def _tokenize_whitespace(text): - """Tokenizes text by splitting on whitespace.""" - return text.split() - - -def _tokenize_155_compat(text): - """Tokenizes text in a manner that is consistent with ROUGE-1.5.5.pl.""" - text = re.sub(r"-", " - ", text) - text = re.sub(r"[^A-Za-z0-9\-]", " ", text) - tokens = text.split() - tokens = [t for t in tokens if re.match(r"^[a-z0-9$]", t)] - return tokens - - -_TEST_HYPOTHESES = ( - "the #### transcript is a written version of each day 's cnn " - "student news program use this transcript to help students with " - "reading comprehension and vocabulary use the weekly newsquiz " - "to test your knowledge of storie s you saw on cnn student " - "news", - "a u.s. citizen was killed in a a shootout in mississippi in " - "#### he was shot in the head and died in a bath tub in omaha , " - "louisiana authorities are investigating the death\",", - "nelson mandela is a women 's advocate for women , nelson " - "mandela says nelson mandela is a women 's advocate for women " - "she says women do n't know how women are women", - "the captain of the delta flight was en route to <unk> airport " - ", the coast guard says the plane was carrying ### passengers " - "and ## crew members the plane was en route from atlanta to the " - "dominican republic") - - -_TEST_REFERENCES = ( - "this page includes the show transcript use the transcript to " - "help students with reading comprehension and vocabulary at the " - "bottom of the page , comment for a chance to be mentioned on " - "cnn student news . you must be a teacher or a student age # # " - "or older to request a mention on the cnn student news roll " - "call . the weekly newsquiz tests students ' knowledge of even " - "ts in the news", - "the fugitive who killed the marshal was \" extremely dangerous " - ", \" u.s. marshals service director says deputy u.s. marshal " - "josie wells , ## , died after trying to arrest jamie croom \" " - "before he 'd go back to jail , he said , he 'd rather be dead, " - "\" croom 's sister says", - "cnn 's kelly wallace wonders why women too often do n't lift " - "each up in the workplace author of \" the woman code \" says " - "women need to start operating like the boys women need to " - "realize they win when they help other women get ahead , says " - "author", - "delta air lines flight #### skidded into a fence last week at " - "a laguardia airport beset by winter weather the ntsb says the " - "crew reported they did not sense any deceleration from the " - "wheel brake upon landing") - - -class TextSimilarityMetricOpsTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - - @parameterized.parameters([ - # Corner-case - dict( - hyp=[[]], - ref=[[]], - expected_f_measures=[0], - expected_p_measures=[0], - expected_r_measures=[0], - value_dtype=dtypes.int32, - ), - # Corner-case - dict( - hyp=[], - ref=[], - expected_f_measures=[], - expected_p_measures=[], - expected_r_measures=[], - value_dtype=dtypes.int32, - ), - # Corner-case - dict( - hyp=[[]], - ref=[[1, 2, 3]], - expected_f_measures=[0], - expected_p_measures=[0], - expected_r_measures=[0], - value_dtype=dtypes.int32, - ), - # Corner-case - dict( - hyp=[[1, 2, 3]], - ref=[[]], - expected_f_measures=[0], - expected_p_measures=[0], - expected_r_measures=[0], - value_dtype=dtypes.int32, - ), - # Identical case - dict( - hyp=[[1, 2, 3, 4, 5, 1, 6, 7, 0], - [1, 2, 3, 4, 5, 1, 6]], - ref=[[1, 2, 3, 4, 5, 1, 6, 7, 0], - [1, 2, 3, 4, 5, 1, 6]], - expected_f_measures=[1.0, 1.0], - expected_p_measures=[1.0, 1.0], - expected_r_measures=[1.0, 1.0], - ), - # Disjoint case - dict( - hyp=[[1, 2, 3, 4, 5, 1, 6, 7, 0], - [1, 2, 3, 4, 5, 1, 6, 8, 7]], - ref=[[8, 9, 10, 11, 12, 13, 14, 15, 16, 17], - [9, 10, 11, 12, 13, 14, 15, 16, 17, 0]], - expected_f_measures=[0.0, 0.0], - expected_p_measures=[0.0, 0.0], - expected_r_measures=[0.0, 0.0], - ), - # Basic case (alpha=-1) - dict( - hyp=[["a", "b",]], - ref=[["b"]], - expected_f_measures=[.555], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=-1, - ), - # Basic case (alpha=0) - dict( - hyp=[["a", "b",]], - ref=[["b"]], - expected_f_measures=[1.0], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=0, - ), - # Basic case (alpha=1) - dict( - hyp=[["a", "b",]], - ref=[["b"]], - expected_f_measures=[.5], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=1, - ), - # Basic case (alpha=.5) - dict( - hyp=[["a", "b",]], - ref=[["b"]], - expected_f_measures=[.666], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=.5, - ), - # Basic case (alpha=.8) - dict( - hyp=[["a", "b",]], - ref=[["b"]], - expected_f_measures=[.555], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=.8, - ), - # Partial overlap case 1 - dict( - hyp=[[1, 2, 3, 4, 5, 1, 6, 7, 0], - [1, 2, 3, 4, 5, 1, 6, 8, 7]], - ref=[[1, 9, 2, 3, 4, 5, 1, 10, 6, 7], - [1, 9, 2, 3, 4, 5, 1, 10, 6, 7]], - expected_f_measures=[.837, .837], - expected_p_measures=[.889, .889], - expected_r_measures=[.8, .8], - alpha=-1, - ), - # Partial overlap case 2 - dict( - hyp=[["12", "23", "34", "45"]], - ref=[["12", "23"]], - expected_f_measures=[.555], - expected_p_measures=[.5], - expected_r_measures=[1.0], - alpha=-1, - ), - # Obscured sequence case - dict( - hyp=[[1, 2, 3]], - ref=[[1, 2, 3, 2, 3]], - expected_f_measures=[.671], - expected_p_measures=[1.0], - expected_r_measures=[.6], - alpha=-1, - ), - # Thorough test case for Alpha=.5 (default; same as ROUGE-1.5.5.pl). - # - # The official ROUGE-1.5.5.pl script computes the following scores for - # these examples: - # - # f=[.345, .076, .177, .247] - # p=[.452, .091, .219, .243] - # r=[.279, .065, .149, .250] - dict( - hyp=_TEST_HYPOTHESES, - ref=_TEST_REFERENCES, - expected_f_measures=[.345, .076, .177, .253], - expected_p_measures=[.452, .091, .219, .257], - expected_r_measures=[.279, .065, .149, .250], - tokenize_fn=_tokenize_155_compat, - ), - # Same as above case but with Alpha=0. - dict( - hyp=_TEST_HYPOTHESES, - ref=_TEST_REFERENCES, - expected_f_measures=[.279, .065, .149, .250], - expected_p_measures=[.452, .091, .219, .257], - expected_r_measures=[.279, .065, .149, .250], - tokenize_fn=_tokenize_155_compat, - alpha=0, - ), - # Same as above case but with Alpha=1 - dict( - hyp=_TEST_HYPOTHESES, - ref=_TEST_REFERENCES, - expected_f_measures=[.452, .091, .219, .257], - expected_p_measures=[.452, .091, .219, .257], - expected_r_measures=[.279, .065, .149, .250], - tokenize_fn=_tokenize_155_compat, - alpha=1, - ), - # Thorough test case for Alpha=-1 (same as tensor2tensor). - # - # A popular unofficial implementation of ROUGE-L on Github also reports - # these values: - # https://github.com/pltrdy/rouge/blob/master/tests/data.json - # - # f=[.287, .083, .137, .240] - # p=[.442, .118, .188, .237] - # r=[.257, .074, .122, .243] - dict( - hyp=_TEST_HYPOTHESES, - ref=_TEST_REFERENCES, - expected_f_measures=[.287, .083, .137, .240], - expected_p_measures=[.442, .118, .188, .237], - expected_r_measures=[.257, .074, .122, .243], - alpha=-1, - tokenize_fn=_tokenize_whitespace - ), - ]) - def testRougeLOp(self, hyp, ref, expected_f_measures, expected_p_measures, - expected_r_measures, value_dtype=None, alpha=None, - tokenize_fn=None): - if tokenize_fn: - hyp = [tokenize_fn(h) for h in hyp] - ref = [tokenize_fn(r) for r in ref] - tokens_hyp = ragged_factory_ops.constant(hyp, dtype=value_dtype, - ragged_rank=1) - tokens_ref = ragged_factory_ops.constant(ref, dtype=value_dtype, - ragged_rank=1) - forward = text_similarity_metric_ops.rouge_l( - tokens_hyp, tokens_ref, alpha=alpha) - # Check tuple ordering+naming. - self.assertIs(forward.f_measure, forward[0]) - self.assertIs(forward.p_measure, forward[1]) - self.assertIs(forward.r_measure, forward[2]) - # Check actual vs expected values. - self.assertAllClose(forward.f_measure, expected_f_measures, atol=1e-3) - self.assertAllClose(forward.p_measure, expected_p_measures, atol=1e-3) - self.assertAllClose(forward.r_measure, expected_r_measures, atol=1e-3) - # Reverse alpha. - if alpha is None or alpha < 0: - reverse_alpha = alpha - else: - reverse_alpha = 1 - alpha - # Now pass the arguments in reverse. - reverse = text_similarity_metric_ops.rouge_l(tokens_ref, tokens_hyp, - alpha=reverse_alpha) - self.assertAllClose(reverse.f_measure, expected_f_measures, atol=1e-3) - self.assertAllClose(reverse.p_measure, expected_r_measures, atol=1e-3) - self.assertAllClose(reverse.r_measure, expected_p_measures, atol=1e-3) - - @parameterized.parameters([ - # Corner-case (input not ragged) - dict( - hyp=[], - ref=[], - ), - # Corner-case (input not ragged) - dict( - hyp=[[]], - ref=[], - ), - # Corner-case (input not ragged) - dict( - hyp=[], - ref=[[]], - ), - ]) - def testRougeLOp_notRagged(self, hyp, ref): - # Note: ragged_factory_ops.constant returns a tf.Tensor for flat input lists - tokens_hyp = ragged_factory_ops.constant(hyp, dtype=dtypes.int32) - tokens_ref = ragged_factory_ops.constant(ref, dtype=dtypes.int32) - with self.assertRaises(ValueError): - text_similarity_metric_ops.rouge_l(tokens_hyp, tokens_ref) - - @parameterized.parameters([ - # Corner-case (ref is ragged rank 2) - dict( - hyp=[[1, 2, 3]], - ref=[[[1], []], [[1, 2]]], - ), - # Corner-case (hyp is ragged rank 2) - dict( - hyp=[[[1], []], [[1, 2]]], - ref=[[1, 2, 3]], - ), - ]) - def testRougeLOp_raggedRank2(self, hyp, ref): - with self.assertRaises(ValueError): - text_similarity_metric_ops.rouge_l(hyp, ref) - - def testRougeLOp_alphaValues(self): - hyp = ragged_factory_ops.constant([[1, 2]], dtype=dtypes.int32) - ref = ragged_factory_ops.constant([[2, 3]], dtype=dtypes.int32) - text_similarity_metric_ops.rouge_l(hyp, ref, alpha=-1) - text_similarity_metric_ops.rouge_l(hyp, ref, alpha=0) - text_similarity_metric_ops.rouge_l(hyp, ref, alpha=.5) - text_similarity_metric_ops.rouge_l(hyp, ref, alpha=1) - with self.assertRaises(ValueError): - text_similarity_metric_ops.rouge_l(hyp, ref, alpha=1.00001) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/numpy/__init__.py deleted file mode 100644 index 4cc813a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/__init__.py +++ /dev/null
@@ -1,18 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Numpy-based code for text processing.""" - -from tensorflow_text.python.numpy import viterbi_decode
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode.py b/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode.py deleted file mode 100644 index 3b26a4c3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode.py +++ /dev/null
@@ -1,167 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Helper functions for decoding Viterbi sequences outside of Tensorflow. - -viterbi_decode provides known-tested snippets for Viterbi decoding in log and -standard space for use outside of a Tensorflow graph. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -def decode(score, - transition_params=None, - allowed_transitions=None, - use_log_space=True, - use_start_and_end_states=False): - """Decode the highest scoring sequence of tags. - - This function uses numpy instead of Tensorflow ops, and so cannot be used - inside a Tensorflow graph or function. - - Args: - score: A [seq_len, num_tags] matrix of unary potentials. - transition_params: A [num_tags, num_tags] matrix of binary potentials. - allowed_transitions: A [num_tags, num_tags] matrix where FALSE indicates - a transition that cannot be taken. - use_log_space: Whether to perform the Viterbi calculation in logarithmic - space. - use_start_and_end_states: If True, add an implicit 'start' and 'end' state - to the start and end of the given sequence. If this is True, - transition_params should contain an extra row and column, representing - potentials for starting/ending a sequence with a given state. These values - should occupy the outermost row and column of the transition_params - matrix. - - Returns: - viterbi: A [seq_len] list of integers containing the highest scoring tag - indices. - viterbi_score: A float containing the score for the Viterbi sequence. - """ - if transition_params is None: - num_tags = score.shape[-1] - if use_log_space: - transition_params = np.zeros(num_tags, num_tags) - else: - transition_params = np.ones(num_tags, num_tags) - - if allowed_transitions is not None: - if use_log_space: - transition_mask = np.where(allowed_transitions, 1, -float("inf")) - else: - transition_mask = np.where(allowed_transitions, 1, 0.0) - - transition_params = transition_params * transition_mask - - if use_log_space: - return _decode_in_log_space(score, transition_params, - use_start_and_end_states) - else: - return _decode_in_exp_space(score, transition_params, - use_start_and_end_states) - - -def _decode_in_log_space(score, transition_params, use_start_and_end_states): - """Perform Viterbi decoding in log space.""" - trellis = np.zeros_like(score) - backpointers = np.zeros_like(score, dtype=np.int32) - - if use_start_and_end_states: - start_potentials = transition_params[-1, :-1] - end_potentials = transition_params[:-1, -1] - transition_potentials = transition_params[:-1, :-1] - else: - transition_potentials = transition_params - - # Calculate the start value. - if use_start_and_end_states: - trellis[0] = score[0] + start_potentials - else: - trellis[0] = score[0] - - # Calculate intermediate values. - for t in range(1, score.shape[0]): - v = np.expand_dims(trellis[t - 1], 1) + transition_potentials - trellis[t] = score[t] + np.max(v, 0) - backpointers[t] = np.argmax(v, 0) - - # If we are using explicit start and end states, change the final scores - # based on the final state's potentials. - if use_start_and_end_states: - final_scores = trellis[-1] + end_potentials - else: - final_scores = trellis[-1] - - viterbi = [np.argmax(final_scores)] - for bp in reversed(backpointers[1:]): - viterbi.append(bp[viterbi[-1]]) - viterbi.reverse() - - viterbi_score = np.max(final_scores) - - return viterbi, viterbi_score - - -def _decode_in_exp_space(score, transition_params, use_start_and_end_states): - """Perform Viterbi decoding in exp space.""" - if np.any(transition_params < 0): - raise ValueError("Transition params must be non-negative in exp space.") - trellis = np.zeros_like(score) - backpointers = np.zeros_like(score, dtype=np.int32) - max_scores = np.zeros(score.shape[0]) - - if use_start_and_end_states: - start_potentials = transition_params[-1, :-1] - end_potentials = transition_params[:-1, -1] - transition_potentials = transition_params[:-1, :-1] - else: - transition_potentials = transition_params - - # Calculate the start value. - if use_start_and_end_states: - trellis[0] = score[0] * start_potentials - else: - trellis[0] = score[0] - - max_scores[0] = np.max(trellis[0]) - trellis[0] = trellis[0] / max_scores[0] - - # Calculate intermediate values. - for t in range(1, score.shape[0]): - v = np.expand_dims(trellis[t - 1], 1) * transition_potentials - trellis[t] = score[t] * np.max(v, 0) - backpointers[t] = np.argmax(v, 0) - max_scores[t] = np.max(trellis[t]) - trellis[t] = trellis[t] / max_scores[t] - - # If we are using explicit start and end states, change the final scores - # based on the final state's potentials. - if use_start_and_end_states: - final_scores = trellis[-1] * end_potentials - else: - final_scores = trellis[-1] - - viterbi = [np.argmax(final_scores)] - for bp in reversed(backpointers[1:]): - viterbi.append(bp[viterbi[-1]]) - viterbi.reverse() - - viterbi_score = np.max(final_scores) * np.prod(max_scores) - return viterbi, viterbi_score
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode_test.py deleted file mode 100644 index 8ff6605..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/numpy/viterbi_decode_test.py +++ /dev/null
@@ -1,300 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.python.numpy.viterbi_decode.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import absltest - -import numpy as np - -from tensorflow_text.python.numpy import viterbi_decode - - -class ViterbiDecodeTest(absltest.TestCase): - - def test_viterbi_in_log_space(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - x = -float('inf') - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, x, 10.0, x], - [-7.0, 7.0, -8.0, 8.0]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # Starting scores are {10.0, 12.0, 6.0, 4.0} - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # To get the weighted scores, add the column of the final state to - # the raw score. - # - # Final state 0: (13.0) Weighted scores are {12.0, 16.0, 18.0, 6.0} - # New totals are {22, 28, 24, 10} [max 28 from 1] - # - # Final state 1: (12.0) Weighted scores are {13.0, 9.0, X, 19.0}, - # New totals are {23, 21, X, 23} [max 23 from 3] - # - # Final state 2: (11.0) Weighted scores are {9, 15, 21, 3}, - # New totals are {19, 27, 27, 7} [max 27 from 2] - # - # Final state 3: (10.0) Weighted scores are {12, 6, X, 18}, - # New totals are {19, 18, X, 22} [max 25 from 3] - # - # Top scores are [28, 26, 27, 25] from [1, 3, 2, 3]. - # Final state is [0] with a sequence of [1->0]. - - sequence, score = viterbi_decode.decode(scores, transition_params) - self.assertAlmostEqual(28.0, score) - self.assertEqual([1, 0], sequence) - - def test_viterbi_with_allowed_transitions(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 100.0, 10.0, 200.0], - [-7.0, 7.0, -8.0, 8.0]]) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # Starting scores are {10.0, 12.0, 6.0, 4.0} - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # Final state 0: (13.0) Weighted scores are {12.0, 16.0, 18.0, 6.0} - # New totals are {22, 28, 24, 10} [max 28 from 1] - # - # Final state 1: (12.0) Weighted scores are {13.0, 9.0, X, 19.0}, - # New totals are {23, 21, X, 23} [max 23 from 3] - # - # Final state 2: (11.0) Weighted scores are {9, 15, 21, 3}, - # New totals are {19, 27, 27, 7} [max 27 from 2] - # - # Final state 3: (10.0) Weighted scores are {12, 6, X, 18}, - # New totals are {19, 18, X, 22} [max 22 from 3] - # - # Top scores are [28, 26, 27, 25] from [1, 3, 2, 3]. - # Final state is [0] with a sequence of [1->0]. - - sequence, score = viterbi_decode.decode(scores, transition_params, - allowed_transitions) - self.assertAlmostEqual(28.0, score) - self.assertEqual([1, 0], sequence) - - def test_viterbi_in_log_space_with_start_and_end(self): - scores = np.array([[10.0, 12.0, 7.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - x = -float('inf') - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[-1.0, 1.0, -2.0, 2.0, 0.0], - [ 3.0, -3.0, 4.0, -4.0, 0.0], - [ 5.0, x, 10.0, x, x], - [-7.0, 7.0, -8.0, 8.0, 0.0], - [ 0.0, x, 2.0, 3.0, 0.0]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # All scores should be summed with the last row in the weight tensor, so the - # 'real' scores are: - # B0: { 10.0, X, 9.0, 7.0} - # - # STEP 2: - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # Final state 0: (13.0) Weighted scores are {12.0, 16.0, 18.0, 6.0} - # New totals are {22, X, 27, 18} [max 27 from 2] - # - # Final state 1: (12.0) Weighted scores are {13.0, 9.0, X, 19.0}, - # New totals are {23, X, X, 26} [max 26 from 3] - # - # Final state 2: (11.0) Weighted scores are {9, 15, 21, 3}, - # New totals are {19, X, 30, 10} [max 30 from 2] - # - # Final state 3: (10.0) Weighted scores are {12, 6, X, 18}, - # New totals are {19, X, X, 25} [max 25 from 3] - # - # Top scores are [27, 26, 30, 25] from [2, 3, 2, 3]. - # 2->OUT is X, so final scores are [27, 26, X, 25] for a - # final state of [0] with a sequence of [2->0]. - - sequence, score = viterbi_decode.decode( - scores, transition_params, use_start_and_end_states=True) - self.assertAlmostEqual(27.0, score) - self.assertEqual([2, 0], sequence) - - def test_viterbi_in_exp_space(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - x = 0.0 - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, x, .15, x], - [.25, .35, .45, .55]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # Starting scores are {10.0, 12.0, 6.0, 4.0} - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # Final state 0: (13.0) Weighted scores are {1.3, 6.5, 11.7, 3.25} - # New totals are {13, 78, 70.2, 13} [max 78 from 1] - # - # Final state 1: (12.0) Weighted scores are {2.4, 7.2, 0, 4.2}, - # New totals are {24, 86.4, 0, 16.8} [max 86.4 from 1] - # - # Final state 2: (11.0) Weighted scores are {3.3, 7.7, 1.65, 4.95}, - # New totals are {33, 92.4, 9.9, 19.8} [max 92.4 from 1] - # - # Final state 3: (10.0) Weighted scores are {4, 8, 0, 5.5}, - # New totals are {40, 96, 0, 22} [max 96 from 1] - # - # Top scores are [78, 86.4, 92.4, 96] from [1, 1, 1, 1]. - # Final state is [3] with a sequence of [1->3]. - - sequence, score = viterbi_decode.decode( - scores, transition_params, use_log_space=False) - self.assertAlmostEqual(96.0, score) - self.assertEqual([1, 3], sequence) - - def test_viterbi_in_exp_space_with_allowed_transitions(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, .5, .15, .5], - [.25, .35, .45, .55]]) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # Starting scores are {10.0, 12.0, 6.0, 4.0} - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # Final state 0: (13.0) Weighted scores are {1.3, 6.5, 11.7, 3.25} - # New totals are {13, 78, 70.2, 13} [max 78 from 1] - # - # Final state 1: (12.0) Weighted scores are {2.4, 7.2, 0, 4.2}, - # New totals are {24, 86.4, 0, 16.8} [max 86.4 from 1] - # - # Final state 2: (11.0) Weighted scores are {3.3, 7.7, 1.65, 4.95}, - # New totals are {33, 92.4, 9.9, 19.8} [max 92.4 from 1] - # - # Final state 3: (10.0) Weighted scores are {4, 8, 0, 5.5}, - # New totals are {40, 96, 0, 22} [max 96 from 1] - # - # Top scores are [78, 86.4, 92.4, 96] from [1, 1, 1, 1]. - # Final state is [3] with a sequence of [1->3]. - - sequence, score = viterbi_decode.decode( - scores, transition_params, allowed_transitions, use_log_space=False) - self.assertAlmostEqual(96.0, score) - self.assertEqual([1, 3], sequence) - - def test_viterbi_in_exp_space_with_start_and_end(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - x = 0.0 - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[ .1, .2, .3, .4, .1], - [ .5, .6, .7, .8, .1], - [ .9, x, .15, x, .1], - [.25, .35, .45, .55, .5], - [ .1, .5, .1, .1, x]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - # STEP 1: - # Starting scores are {.5, 6.0, .6, .4} - # Raw scores are: {13.0, 12.0, 11.0, 10.0} - # - # Final state 0: (13.0) Weighted scores are {1.3, 6.5, 11.7, 3.25} - # New totals are {0.13, 39, 7.02, 1.3} [max 39 from 1] - # - # Final state 1: (12.0) Weighted scores are {2.4, 7.2, 0, 4.2}, - # New totals are {0.24, 43.2, 0, 1.68} [max 43.2 from 1] - # - # Final state 2: (11.0) Weighted scores are {3.3, 7.7, 1.65, 4.95}, - # New totals are {0.33, 46.2, 0.99, 1.98} [max 46.2 from 1] - # - # Final state 3: (10.0) Weighted scores are {4, 8, 0, 5.5}, - # New totals are {0.4, 48, 0, 2.2} [max 48 from 1] - # - # Top scores are [39, 43.2, 46.2, 48] from [1, 1, 1, 1]. - # Final multiplication results in [3.9, 4.32, 4.62, 24] - # Final state is [3] with a sequence of [1->3]. - - sequence, score = viterbi_decode.decode( - scores, - transition_params, - use_log_space=False, - use_start_and_end_states=True) - self.assertAlmostEqual(24.0, score) - self.assertEqual([1, 3], sequence) - - def test_viterbi_in_exp_space_with_negative_weights_fails(self): - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - x = 0.0 - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_params = np.array([[ .1, .2, .3, .4], - [ .5, -.6, .7, .8], - [ .9, x, .15, x], - [.25, .35, .45, .55]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - - with self.assertRaises(ValueError): - _, _ = viterbi_decode.decode( - scores, transition_params, use_log_space=False) - - -if __name__ == '__main__': - absltest.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/__init__.py deleted file mode 100644 index c3cc3f6fe..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/__init__.py +++ /dev/null
@@ -1,71 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Various TensorFlow ops related to text-processing.""" - -# pylint: disable=g-import-not-at-top,g-statement-before-imports -try: - from tensorflow.python.ops.ragged import ragged_ops as _ragged_ops -except ImportError: - pass -from tensorflow_text.core.pybinds.pywrap_fast_wordpiece_tokenizer_model_builder import build_fast_wordpiece_model -from tensorflow_text.python.ops.bert_tokenizer import BertTokenizer -from tensorflow_text.python.ops.create_feature_bitmask_op import create_feature_bitmask -from tensorflow_text.python.ops.fast_wordpiece_tokenizer import FastWordpieceTokenizer -from tensorflow_text.python.ops.greedy_constrained_sequence_op import greedy_constrained_sequence -from tensorflow_text.python.ops.hub_module_splitter import HubModuleSplitter -from tensorflow_text.python.ops.hub_module_tokenizer import HubModuleTokenizer -from tensorflow_text.python.ops.item_selector_ops import FirstNItemSelector -from tensorflow_text.python.ops.item_selector_ops import RandomItemSelector -from tensorflow_text.python.ops.masking_ops import mask_language_model -from tensorflow_text.python.ops.masking_ops import MaskValuesChooser -from tensorflow_text.python.ops.mst_ops import max_spanning_tree -from tensorflow_text.python.ops.mst_ops import max_spanning_tree_gradient -from tensorflow_text.python.ops.ngrams_op import ngrams -from tensorflow_text.python.ops.ngrams_op import Reduction -from tensorflow_text.python.ops.normalize_ops import case_fold_utf8 -from tensorflow_text.python.ops.normalize_ops import find_source_offsets -from tensorflow_text.python.ops.normalize_ops import normalize_utf8 -from tensorflow_text.python.ops.normalize_ops import normalize_utf8_with_offsets_map -from tensorflow_text.python.ops.pad_along_dimension_op import pad_along_dimension -from tensorflow_text.python.ops.pad_model_inputs_ops import pad_model_inputs -from tensorflow_text.python.ops.pointer_ops import gather_with_default -from tensorflow_text.python.ops.pointer_ops import span_alignment -from tensorflow_text.python.ops.pointer_ops import span_overlaps -from tensorflow_text.python.ops.regex_split_ops import regex_split -from tensorflow_text.python.ops.regex_split_ops import regex_split_with_offsets -from tensorflow_text.python.ops.regex_split_ops import RegexSplitter -from tensorflow_text.python.ops.segment_combiner_ops import combine_segments -from tensorflow_text.python.ops.sentence_breaking_ops import sentence_fragments -from tensorflow_text.python.ops.sentencepiece_tokenizer import SentencepieceTokenizer -from tensorflow_text.python.ops.sliding_window_op import sliding_window -from tensorflow_text.python.ops.split_merge_from_logits_tokenizer import SplitMergeFromLogitsTokenizer -from tensorflow_text.python.ops.split_merge_tokenizer import SplitMergeTokenizer -from tensorflow_text.python.ops.splitter import Splitter -from tensorflow_text.python.ops.splitter import SplitterWithOffsets -from tensorflow_text.python.ops.state_based_sentence_breaker_op import StateBasedSentenceBreaker -from tensorflow_text.python.ops.string_ops import coerce_to_structurally_valid_utf8 -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import Tokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets -from tensorflow_text.python.ops.trimmer_ops import RoundRobinTrimmer -from tensorflow_text.python.ops.trimmer_ops import WaterfallTrimmer -from tensorflow_text.python.ops.unicode_char_tokenizer import UnicodeCharTokenizer -from tensorflow_text.python.ops.unicode_script_tokenizer import UnicodeScriptTokenizer -from tensorflow_text.python.ops.viterbi_constrained_sequence_op import viterbi_constrained_sequence -from tensorflow_text.python.ops.whitespace_tokenizer import WhitespaceTokenizer -from tensorflow_text.python.ops.wordpiece_tokenizer import WordpieceTokenizer -from tensorflow_text.python.ops.wordshape_ops import WordShape -from tensorflow_text.python.ops.wordshape_ops import wordshape
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer.py deleted file mode 100644 index a4ef9b8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer.py +++ /dev/null
@@ -1,326 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Basic tokenization ops for BERT preprocessing.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy - - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import string_ops -from tensorflow_text.python.ops import regex_split_ops -from tensorflow_text.python.ops.normalize_ops import case_fold_utf8 -from tensorflow_text.python.ops.normalize_ops import normalize_utf8 -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets -from tensorflow_text.python.ops.wordpiece_tokenizer import WordpieceTokenizer - -_tf_text_bert_tokenizer_op_create_counter = monitoring.Counter( - "/nlx/api/python/bert_tokenizer_create_counter", - "Counter for number of BertTokenizers created in Python.") - -_DELIM_REGEX = [ - r"\s+", - r"|".join([ - r"[!-/]", - r"[:-@]", - r"[\[-`]", - r"[{-~]", - r"[\p{P}]", - ]), - r"|".join([ - r"[\x{4E00}-\x{9FFF}]", - r"[\x{3400}-\x{4DBF}]", - r"[\x{20000}-\x{2A6DF}]", - r"[\x{2A700}-\x{2B73F}]", - r"[\x{2B740}-\x{2B81F}]", - r"[\x{2B820}-\x{2CEAF}]", - r"[\x{F900}-\x{FAFF}]", - r"[\x{2F800}-\x{2FA1F}]", - ]), -] - -_DELIM_REGEX_PATTERN = "|".join(_DELIM_REGEX) -_KEEP_DELIM_NO_WHITESPACE = copy.deepcopy(_DELIM_REGEX) -_KEEP_DELIM_NO_WHITESPACE.remove(r"\s+") -_UNUSED_TOKEN_REGEX = "\\[unused\\d+\\]" -_KEEP_DELIM_NO_WHITESPACE_PATTERN = "|".join(_KEEP_DELIM_NO_WHITESPACE) - - -class BasicTokenizer(TokenizerWithOffsets): - r"""Basic tokenizer for for tokenizing text. - - A basic tokenizer that tokenizes using some deterministic rules: - - For most languages, this tokenizer will split on whitespace. - - For Chinese, Japanese, and Korean characters, this tokenizer will split on - Unicode characters. - - Example: - >>> text_inputs = [b'taste the rustisc indiefrost'] - >>> tokenizer = BasicTokenizer( - ... lower_case=False, normalization_form='NFC') - >>> tokenizer.tokenize(text_inputs) - <tf.RaggedTensor [[b'taste', b'the', b'rustisc', b'indiefrost']]> - - Attributes: - lower_case: bool - If true, a preprocessing step is added to lowercase the - text, which also applies NFD normalization and strip accents from - characters. - keep_whitespace: bool - If true, preserves whitespace characters instead of - stripping them away. - normalization_form: If set to a valid value and lower_case=False, the input - text will be normalized to `normalization_form`. See normalize_utf8() op - for a list of valid values. - preserve_unused_token: If true, text in the regex format "\\[unused\\d+\\]" - will be treated as a token and thus remain preserved as-is to be looked up - in the vocabulary. - """ - - def __init__(self, - lower_case=False, - keep_whitespace=False, - normalization_form=None, - preserve_unused_token=False): - self._lower_case = lower_case - if not keep_whitespace: - self._keep_delim_regex_pattern = _KEEP_DELIM_NO_WHITESPACE_PATTERN - else: - self._keep_delim_regex_pattern = _DELIM_REGEX_PATTERN - - if lower_case and normalization_form not in [None, "NFD"]: - raise ValueError("`lower_case` strips accents. When `lower_case` is set, " - "`normalization_form` is 'NFD'.") - self._normalization_form = normalization_form - - if preserve_unused_token: - self._delim_regex_pattern = "|".join( - [_UNUSED_TOKEN_REGEX, _DELIM_REGEX_PATTERN]) - self._keep_delim_regex_pattern = "|".join( - [_UNUSED_TOKEN_REGEX, self._keep_delim_regex_pattern]) - else: - self._delim_regex_pattern = _DELIM_REGEX_PATTERN - - def tokenize(self, text_input): - tokens, _, _ = self.tokenize_with_offsets(text_input) - return tokens - - def tokenize_with_offsets(self, text_input): - """Performs basic word tokenization for BERT. - - Args: - text_input: A `Tensor` or `RaggedTensor` of untokenized UTF-8 strings. - - Returns: - A `RaggedTensor` of tokenized strings from text_input. - """ - # lowercase and strip accents (if option is set) - if self._lower_case: - text_input = self.lower_case(text_input) - else: - # utf8 normalization - if self._normalization_form is not None: - text_input = normalize_utf8(text_input, self._normalization_form) - - # strip out control characters - text_input = string_ops.regex_replace(text_input, r"\p{Cc}|\p{Cf}", " ") - return regex_split_ops.regex_split_with_offsets( - text_input, self._delim_regex_pattern, self._keep_delim_regex_pattern, - "BertBasicTokenizer") - - def lower_case(self, text_input): - """Lower-cases the `text_input'.""" - text_input = case_fold_utf8(text_input) - text_input = normalize_utf8(text_input, "NFD") - text_input = string_ops.regex_replace(text_input, r"\p{Mn}", "") - return text_input - - -class AccentPreservingBasicTokenizer(BasicTokenizer): - """I18n-friendly tokenizer that keeps accent characters during lowercasing.""" - - def __init__(self, *args, **kwargs): - super(AccentPreservingBasicTokenizer, self).__init__(*args, **kwargs) - - def lower_case(self, text_input): - return string_ops.string_lower(text_input, encoding="utf-8") - - -class BertTokenizer(TokenizerWithOffsets, Detokenizer): - r"""Tokenizer used for BERT. - - This tokenizer applies an end-to-end, text string to wordpiece tokenization. - It first applies basic tokenization, followed by wordpiece - tokenization. - - See `WordpieceTokenizer` for details on the subword tokenization. - - For an example of use, see - https://www.tensorflow.org/text/guide/bert_preprocessing_guide - - Attributes: - vocab_lookup_table: A lookup table implementing the LookupInterface - containing the vocabulary of subwords or a string which is the file path - to the vocab.txt file. - suffix_indicator: (optional) The characters prepended to a wordpiece to - indicate that it is a suffix to another subword. Default is '##'. - max_bytes_per_word: (optional) Max size of input token. Default is 100. - max_chars_per_token: (optional) Max size of subwords, excluding suffix - indicator. If known, providing this improves the efficiency of decoding - long words. - token_out_type: (optional) The type of the token to return. This can be - `tf.int64` IDs, or `tf.string` subwords. The default is `tf.int64`. - unknown_token: (optional) The value to use when an unknown token is found. - Default is "[UNK]". If this is set to a string, and `token_out_type` is - `tf.int64`, the `vocab_lookup_table` is used to convert the - `unknown_token` to an integer. If this is set to `None`, out-of-vocabulary - tokens are left as is. - split_unknown_characters: (optional) Whether to split out single unknown - characters as subtokens. If False (default), words containing unknown - characters will be treated as single unknown tokens. - lower_case: bool - If true, a preprocessing step is added to lowercase the - text, apply NFD normalization, and strip accents characters. - keep_whitespace: bool - If true, preserves whitespace characters instead of - stripping them away. - normalization_form: If set to a valid value and lower_case=False, the input - text will be normalized to `normalization_form`. See normalize_utf8() op - for a list of valid values. - preserve_unused_token: If true, text in the regex format `\\[unused\\d+\\]` - will be treated as a token and thus remain preserved as is to be looked up - in the vocabulary. - basic_tokenizer_class: If set, the class to use instead of BasicTokenizer - """ - - def __init__(self, - vocab_lookup_table, - suffix_indicator="##", - max_bytes_per_word=100, - max_chars_per_token=None, - token_out_type=dtypes.int64, - unknown_token="[UNK]", - split_unknown_characters=False, - lower_case=False, - keep_whitespace=False, - normalization_form=None, - preserve_unused_token=False, - basic_tokenizer_class=BasicTokenizer): - super(BertTokenizer, self).__init__() - _tf_text_bert_tokenizer_op_create_counter.get_cell().increase_by(1) - - self._basic_tokenizer = basic_tokenizer_class(lower_case, keep_whitespace, - normalization_form, - preserve_unused_token) - self._wordpiece_tokenizer = WordpieceTokenizer( - vocab_lookup_table, suffix_indicator, max_bytes_per_word, - max_chars_per_token, token_out_type, unknown_token, - split_unknown_characters) - - def tokenize_with_offsets(self, text_input): - r"""Tokenizes a tensor of string tokens into subword tokens for BERT. - - Example: - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokenizer = BertTokenizer( - ... vocab_lookup_table='/tmp/tok_vocab.txt') - >>> text_inputs = tf.constant(['greatest'.encode('utf-8')]) - >>> tokenizer.tokenize_with_offsets(text_inputs) - (<tf.RaggedTensor [[[4, 5]]]>, - <tf.RaggedTensor [[[0, 5]]]>, - <tf.RaggedTensor [[[5, 8]]]>) - - Args: - text_input: input: A `Tensor` or `RaggedTensor` of untokenized UTF-8 - strings. - - Returns: - A tuple of `RaggedTensor`s where the first element is the tokens where - `tokens[i1...iN, j]`, the second element is the starting offsets, the - third element is the end offset. (Please look at `tokenize` for details - on tokens.) - - """ - tokens, begin, _ = self._basic_tokenizer.tokenize_with_offsets(text_input) - wordpieces, wp_begin, wp_end = ( - self._wordpiece_tokenizer.tokenize_with_offsets(tokens)) - begin_expanded = array_ops.expand_dims(begin, axis=2) - final_begin = begin_expanded + wp_begin - final_end = begin_expanded + wp_end - return wordpieces, final_begin, final_end - - def tokenize(self, text_input): - r"""Tokenizes a tensor of string tokens into subword tokens for BERT. - - Example: - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokenizer = BertTokenizer( - ... vocab_lookup_table='/tmp/tok_vocab.txt') - >>> text_inputs = tf.constant(['greatest'.encode('utf-8') ]) - >>> tokenizer.tokenize(text_inputs) - <tf.RaggedTensor [[[4, 5]]]> - - Args: - text_input: input: A `Tensor` or `RaggedTensor` of untokenized UTF-8 - strings. - - Returns: - A `RaggedTensor` of tokens where `tokens[i1...iN, j]` is the string - contents (or ID in the vocab_lookup_table representing that string) - of the `jth` token in `input[i1...iN]` - """ - tokens = self._basic_tokenizer.tokenize(text_input) - return self._wordpiece_tokenizer.tokenize(tokens) - - def detokenize(self, token_ids): - r"""Convert a `Tensor` or `RaggedTensor` of wordpiece IDs to string-words. - - See `WordpieceTokenizer.detokenize` for details. - - Note: `BertTokenizer.tokenize`/`BertTokenizer.detokenize` does not round - trip losslessly. The result of `detokenize` will not, in general, have the - same content or offsets as the input to `tokenize`. This is because the - "basic tokenization" step, that splits the strings into words before - applying the `WordpieceTokenizer`, includes irreversible - steps like lower-casing and splitting on punctuation. `WordpieceTokenizer` - on the other hand **is** reversible. - - Note: This method assumes wordpiece IDs are dense on the interval - `[0, vocab_size)`. - - Example: - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokenizer = BertTokenizer( - ... vocab_lookup_table='/tmp/tok_vocab.txt') - >>> text_inputs = tf.constant(['greatest'.encode('utf-8')]) - >>> tokenizer.detokenize([[4, 5]]) - <tf.RaggedTensor [[b'greatest']]> - - Args: - token_ids: A `RaggedTensor` or `Tensor` with an int dtype. - - Returns: - A `RaggedTensor` with dtype `string` and the same rank as the input - `token_ids`. - """ - return self._wordpiece_tokenizer.detokenize(token_ids)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer_test.py deleted file mode 100644 index 3a5af77d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/bert_tokenizer_test.py +++ /dev/null
@@ -1,440 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -r"""Tests for BertTokenizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_map_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops import bert_tokenizer - - -def _utf8(x): - return x.encode('utf-8') - - -# TODO(thuang513): It appears there isn't a Ragged version of substr; consider -# checking this into core TF. -def _ragged_substr(text_input, begin, end): - text_input_flat = None - if ragged_tensor.is_ragged(text_input): - text_input_flat = text_input.flat_values - else: - text_input_flat = text_input - - def _ragged_tile(x): - input_text, indices = x - multiple = math_ops.reduce_sum(indices.row_lengths()) - return array_ops.tile([input_text], [multiple]) - - broadcasted_text = ragged_map_ops.map_fn( - _ragged_tile, - (text_input_flat, begin), - dtype=ragged_tensor.RaggedTensorType(dtype=dtypes.string, ragged_rank=1), - infer_shape=False, - ) - size = math_ops.sub( - array_ops.squeeze(end.flat_values), array_ops.squeeze(begin.flat_values)) - new_tokens = string_ops.substr_v2(broadcasted_text, - array_ops.squeeze(begin.flat_values), size) - return begin.with_flat_values(new_tokens.flat_values) - - -_VOCAB = [ - b'[unused1]', - b'[unused23]', - b"'", - b'##%', - b'##af', - b'##book', - b'##c', - b'##fr', - b'##hey', - b'##is', - b'##o', - b'##ost', - b'##s', - b'##tri', - b'##y', - b'$', - b'%', - b'&', - b'(', - b')', - b'*', - b'-', - b'.', - b'20', - b':', - b'?', - b'[CLS]', - b'[SEP]', - _utf8(u'國'), - _utf8(u'暐'), - _utf8(u'瀚'), - _utf8(u'韓'), - _utf8(u'食'), - _utf8(u'黃'), - _utf8(u'🤔'), - _utf8(u'🤣'), - b'^', - b'a', - b'ago', - b'among', - b'an', - b'and', - b'are', - b'aren', - b'awesome', - b'between', - b'candy', - b'china', - b'companies', - b'company', - b'crushed', - b'dug', - b'earnings', - b'engaged', - b'even', - b'few', - b'forecast', - b'getting', - b'had', - b'han', - b'has', - b'hers', - b'high', - b'hit', - b'hs', - b'hurting', - b'in', - b'indie', - b'is', - b'isn', - b'ka', - b'ku', - b'major', - b'maker', - b'moth', - b'nearly', - b'new', - b'now', - b'president', - b'record', - b'regulators', - b'reported', - b'rift', - b'rust', - b'sales', - b'shares', - b'slightly', - b'sprint', - b'states', - b'stock', - b't', - b'taste', - b'tension', - b'that', - b'the', - b'this', - b'today', - b'told', - b'topped', - b'trade', - b'trump', - b'united', - b'up', - b'weeks', - b'what', - b'why', - b'with', - b'year', - b'yo', - b'yu', - _utf8(u'\u7231'), - _utf8(u'\u4e0a'), - _utf8(u'\u4e00'), - _utf8(u'\u4e2a'), - _utf8(u'\u4e0d'), - _utf8(u'\u56de'), - _utf8(u'\u5bb6'), - _utf8(u'\u7684'), - _utf8(u'\u4eba'), -] - - -def _create_table(vocab, num_oov=1): - init = lookup_ops.KeyValueTensorInitializer( - vocab, - math_ops.range( - array_ops.size(vocab, out_type=dtypes.int64), dtype=dtypes.int64), - key_dtype=dtypes.string, - value_dtype=dtypes.int64) - return lookup_ops.StaticVocabularyTableV1( - init, num_oov, lookup_key_dtype=dtypes.string) - - -class BertTokenizerTest(test_util.TensorFlowTestCase, parameterized.TestCase): - - def test_bert_tokenizer_outputs(self): - text_inputs = constant_op.constant([_utf8('Test')]) - vocab = _VOCAB - table = _create_table(vocab, 2) - self.evaluate(table.initializer) - tokenizer = bert_tokenizer.BertTokenizer( - table, - token_out_type=dtypes.int32) - results = tokenizer.tokenize(text_inputs) - self.assertAllEqual(results.dtype, dtypes.int32) - - @parameterized.parameters([ - dict( - text_inputs=[ - _utf8(u'taste the rustisc indiefrost'), - _utf8(u'Han Kuo-yu (韓國食)🤔'), - _utf8(u'Añade la información del formulario y tus preguntas'), - ], - expected_tokens=[[b'taste', b'the', b'rustisc', b'indiefrost'], - [ - b'Han', b'Kuo', b'-', b'yu', b'(', - b'\xe9\x9f\x93', b'\xe5\x9c\x8b', - b'\xe9\xa3\x9f', b')', b'\xf0\x9f\xa4\x94' - ], - [ - b'A\xc3\xb1ade', b'la', b'informaci\xc3\xb3n', - b'del', b'formulario', b'y', b'tus', b'preguntas' - ]], - ), - dict( - text_inputs=[ - _utf8(u'UNwant\u00E9d,running'), - _utf8(u'Añade la información del formulario y tus preguntas'), - ], - expected_tokens=[[b'unwanted', b',', b'running'], - [ - b'anade', b'la', b'informacion', b'del', - b'formulario', b'y', b'tus', b'preguntas' - ]], - lower_case=True, - # `lower_case` doesn't let you override the `normalization_form` - normalization_form=None, - ), - dict( - text_inputs=[ - _utf8(u'Añade la información del formulario y tus preguntas') - ], - expected_tokens=[[ - b'An\xcc\x83ade', b'la', b'informacio\xcc\x81n', b'del', - b'formulario', b'y', b'tus', b'preguntas' - ]], - normalization_form='NFD', - ), - # Test CJK are tokenized by unicode characters - dict( - text_inputs=[ - _utf8(u'香港では4日'), - _utf8(u'영어독해 자만심 왜 문제일까'), - _utf8(u'據港媒《東網》報導') - ], - expected_tokens=[ - [_utf8(u'香'), - _utf8(u'港'), - _utf8(u'では4'), - _utf8(u'日')], - [ - _utf8(u'영어독해'), - _utf8(u'자만심'), - _utf8(u'왜'), - _utf8(u'문제일까'), - ], - [ - _utf8(u'據'), - _utf8(u'港'), - _utf8(u'媒'), - _utf8(u'《'), - _utf8(u'東'), - _utf8(u'網'), - _utf8(u'》'), - _utf8(u'報'), - _utf8(u'導') - ], - ], - normalization_form=None, - ), - # Test Katakana followed by Hiragana. - dict( - text_inputs=[_utf8(u'のテキストとして')], - expected_tokens=[ - [_utf8(u'のテキストとして')], - ], - normalization_form=None, - ), - ]) - @test_util.run_in_graph_and_eager_modes - def test_basic_tokenize(self, - text_inputs, - expected_tokens, - lower_case=False, - normalization_form='NFC'): - text_inputs = ragged_factory_ops.constant(text_inputs) - tokenizer = bert_tokenizer.BasicTokenizer( - lower_case=lower_case, normalization_form=normalization_form) - tokens = tokenizer.tokenize(text_inputs) - self.assertAllEqual(tokens, expected_tokens) - - @parameterized.parameters([ - dict( - text_inputs=[ - b'taste the rustisc indiefrost', - _utf8(u'Han Kuo-yu (韓國食)🤔'), - _utf8(u'dugtrio had an awesome 🤣 dugbook'), - b'yo^what$is*up?', - b'mothaf*&%ka', - ], - expected=[[[b'taste'], [b'the'], [b'rust', b'##is', b'##c'], - [b'indie', b'##fr', b'##ost']], - [[b'han'], [b'ku', b'##o'], [b'-'], [b'yu'], [b'('], - [_utf8(u'韓')], [_utf8(u'國')], [_utf8(u'食')], [b')'], - [_utf8(u'🤔')]], - [[b'dug', b'##tri', b'##o'], [b'had'], [b'an'], - [b'awesome'], [_utf8(u'🤣')], [b'dug', b'##book']], - [[b'yo'], [b'^'], [b'what'], [b'$'], [b'is'], [b'*'], - [b'up'], [b'?']], - [[b'moth', b'##af'], [b'*'], [b'&'], [b'%'], [b'ka']]], - expected_extracted=[[[b'taste'], [b'the'], [b'rust', b'is', b'c'], - [b'indie', b'fr', b'ost']], - [[b'Han'], [b'Ku', b'o'], [b'-'], [b'yu'], [b'('], - [_utf8(u'韓')], [_utf8(u'國')], [_utf8(u'食')], - [b')'], [_utf8(u'🤔')]], - [[b'dug', b'tri', b'o'], [b'had'], [b'an'], - [b'awesome'], [_utf8(u'🤣')], [b'dug', b'book']], - [[b'yo'], [b'^'], [b'what'], [b'$'], [b'is'], - [b'*'], [b'up'], [b'?']], - [[b'moth', b'af'], [b'*'], [b'&'], [b'%'], - [b'ka']]], - lower_case=True, - ), - # Test when we are expecting multiple OOV vocab ids and tf.string just - # maps out [UNK] token. - dict( - text_inputs=[ - b'mothaf*&%ka cantfindme whodis', - ], - expected=[[[b'moth', b'##af'], [b'*'], [b'&'], [b'%'], [b'ka'], - [b'[UNK]'], [b'[UNK]']]], - expected_extracted=[[[b'moth', b'af'], [b'*'], [b'&'], [b'%'], - [b'ka'], [b'cantfindme'], [b'whodis']]], - lower_case=True, - num_oov=2, - ), - dict( - text_inputs=[ - b'candy', - ], - expected=[[[b'candy']]], - lower_case=True, - num_oov=2, - ), - dict( - text_inputs=[ - _utf8(u'爱上一个不回家的人'), - ], - expected=[[[_utf8(u'爱')], [_utf8(u'上')], [_utf8(u'一')], [_utf8(u'个')], - [_utf8(u'不')], [_utf8(u'回')], [_utf8(u'家')], [_utf8(u'的')], - [_utf8(u'人')]]], - lower_case=True, - num_oov=2, - ), - # Test 'preserve_unused_token' option - dict( - text_inputs=[ - b'taste the rustisc indiefrost [unused1]', - _utf8(u'爱上一个不回家的人[unused23]'), - ], - expected=[[[b'taste'], [b'the'], [b'rust', b'##is', b'##c'], - [b'indie', b'##fr', b'##ost'], [b'[unused1]']], - [[_utf8(u'爱')], [_utf8(u'上')], [_utf8(u'一')], [_utf8(u'个')], - [_utf8(u'不')], [_utf8(u'回')], [_utf8(u'家')], [_utf8(u'的')], - [_utf8(u'人')], [b'[unused23]']]], - preserve_unused_token=True, - ), - ]) - @test_util.run_in_graph_and_eager_modes - def test_bert_tokenizer(self, - text_inputs, - expected, - vocab=None, - expected_extracted=None, - lower_case=True, - num_oov=1, - preserve_unused_token=False): - text_inputs = constant_op.constant(text_inputs) - if not vocab: - vocab = _VOCAB - table = _create_table(vocab, num_oov) - self.evaluate(table.initializer) - tokenizer = bert_tokenizer.BertTokenizer( - table, - token_out_type=dtypes.string, - lower_case=lower_case, - preserve_unused_token=preserve_unused_token) - results = tokenizer.tokenize(text_inputs) - self.assertAllEqual(results, expected) - - # Verify that the int ids are the same. - expected_rt = ragged_factory_ops.constant(expected) - expected_int = table.lookup(expected_rt.flat_values) - expected_int_rt = ragged_tensor.RaggedTensor.from_nested_row_splits( - expected_int, expected_rt.nested_row_splits) - int_tokenizer = bert_tokenizer.BertTokenizer( - vocab_lookup_table=table, - token_out_type=dtypes.int64, - lower_case=lower_case, - preserve_unused_token=preserve_unused_token) - results_int = int_tokenizer.tokenize(text_inputs) - self.assertAllEqual(results_int, expected_int_rt) - - # Verify that the offsets can extract the expected tokens - _, begin, end = tokenizer.tokenize_with_offsets(text_inputs) - - extracted_wordpieces = _ragged_substr(text_inputs, begin, end) - if expected_extracted: - self.assertAllEqual(extracted_wordpieces, expected_extracted) - else: - # The extracted won't have any wordpieces with '##' prefix. Strip them - # out. - stripped_prefix_flat = string_ops.regex_replace(expected_rt.flat_values, - '##', '') - stripped_prefix = expected_rt.with_flat_values(stripped_prefix_flat) - self.assertAllEqual(extracted_wordpieces, stripped_prefix) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/coerce_to_valid_utf8_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/coerce_to_valid_utf8_op_test.py deleted file mode 100644 index 98278a75..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/coerce_to_valid_utf8_op_test.py +++ /dev/null
@@ -1,51 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for Utf8Chars Op from string_ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.platform import test -from tensorflow_text.python.ops import string_ops - - -class CoerceToUtf8Test(test.TestCase): - - def testCoercetoStructurallyValidOnValidInput(self): - with self.test_session(): - utf8 = string_ops.coerce_to_structurally_valid_utf8(["abc"]) - self.assertAllEqual(utf8, [b"abc"]) - - def testCoercetoStructurallyValidOnValidInputWithDefault(self): - with self.test_session(): - utf8 = string_ops.coerce_to_structurally_valid_utf8(["abc"], "?") - self.assertAllEqual(utf8, [b"abc"]) - - def testCoercetoStructurallyValidOnInvalidInput(self): - with self.test_session(): - utf8 = string_ops.coerce_to_structurally_valid_utf8([b"abc\xfd"]) - self.assertAllEqual(utf8, [u"abc�".encode("utf-8")]) - - def testCoercetoStructurallyValidOnInvalidInputWithDefault(self): - with self.test_session(): - utf8 = string_ops.coerce_to_structurally_valid_utf8([b"abc\xfd"], "?") - self.assertAllEqual(utf8, [b"abc?"]) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op.py deleted file mode 100644 index d2940b2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op.py +++ /dev/null
@@ -1,122 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tokenize text ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops - -# The maximum number of bits that can be encoded by create_feature_bitmask -# in each datatype. -_max_bits = { - dtypes.uint8: 8, - dtypes.int8: 7, - dtypes.uint16: 16, - dtypes.int16: 15, - dtypes.int32: 31, - dtypes.int64: 63, -} - - -def create_feature_bitmask(tensor, dtype=dtypes.int32, name=None): - """Packs the innermost dimension of a boolean tensor into integer values. - - `result[i1...iN]` is the integer formed by interpreting the booleans - `tensor[i1...iN, 0:num_bits]` as individual bits, with big-endian order. - E.g., if `tensor[i1...iN, 0:num_bits] = [True, False, False, True, False]`, - then `result[i1...iN] = 0b10010 = 18`. The return tensor is of type `dtype`, - if specified; if `dtype` is not set, `int32` will be used. - - If `num_bits` is too large to fit in `dtype`, then an exception is raised - when this op is called (if `num_bits` is statically known) or when it is - evaluated (if `num_bits` is not statically known). - - Args: - tensor: `<bool>[D1...DN, num_bits]` The boolean tensor whose innermost - dimension should be packed to form integer values. - dtype: The datatype to output for this op (optional). - name: The name for this op (optional). - - Returns: - `<dtype> [D1...DN]` - An integer tensor formed by interpreting the innermost dimension of - `tensor` as individual bits. - - Raises: - ValueError: If the data to be packed is too large for the chosen data - type. - ValueError: If the data to be packed is not boolean. - InvalidArgumentError: If the input tensor is a list, or the dtype is not a - supported integer type. - - Examples: - - >>> assert create_feature_bitmask([True, False, False, True]) == 0b1001 - >>> create_feature_bitmask([[True, False], [False, True], [True, True]]) - <tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 1, 3], dtype=int32)> - """ - with ops.name_scope(name, 'CreateFeatureBitmask', [tensor]): - if (isinstance(tensor, (list, tuple)) and tensor and - isinstance(tensor[0], ops.Tensor)): - raise errors.InvalidArgumentError( - None, None, - 'CreateFeatureBitmask does not support lists of tensors. Consider ' - 'using tf.stack(list,-1) to create a single tensor before invoking ' - 'this op.') - - tensor = ops.convert_to_tensor(tensor, dtypes.bool, 'tensor') - - if dtype not in _max_bits.keys(): - raise errors.InvalidArgumentError( - None, None, 'dtype must be one of: [%s], was %s' % - (sorted(_max_bits.items(), key=lambda kv: kv[1]), dtype.name)) - - integer_data = math_ops.cast(tensor, dtype=dtype) - shape = tensor.shape - if shape.ndims is not None and shape.dims[-1].value is not None: - num_bits = shape.dims[-1].value - if num_bits > 63: - raise ValueError( - 'data.shape[-1] must be less than 64, is %d.' % num_bits) - elif num_bits > _max_bits[dtype]: - raise ValueError( - 'data.shape[-1] is too large for %s (was %d, cannot exceed %d); ' - 'consider switching condense_boolean_tensor to a larger ' - 'dtype.' % (dtype.name, num_bits, _max_bits[dtype])) - bit_masks = constant_op.constant( - [2**pos for pos in range(num_bits - 1, -1, -1)], dtype) - else: - bit_masks = constant_op.constant( - [2**pos for pos in range(_max_bits[dtype] - 1, -1, -1)], dtype) - num_bits = array_ops.shape(tensor)[-1] - with ops.control_dependencies([ - check_ops.assert_less_equal( - num_bits, - _max_bits[dtype], - message='data.shape[-1] is too large for %s (cannot exceed %s)' % - (dtype.name, _max_bits[dtype])) - ]): - # The second slice ("[:num_bits]") is a no-op unless num_bits==0. - bit_masks = bit_masks[-num_bits:][:num_bits] - return math_ops.reduce_sum(integer_data * bit_masks, axis=-1)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op_test.py deleted file mode 100644 index 7e505f0..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/create_feature_bitmask_op_test.py +++ /dev/null
@@ -1,159 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for create_feature_bitmask_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import create_feature_bitmask_op - - -@test_util.run_all_in_graph_and_eager_modes -class CreateFeatureBitmaskOpTest(test_util.TensorFlowTestCase): - - def test_docstring_example1(self): - data = [True, False, False, True] - result = create_feature_bitmask_op.create_feature_bitmask(data) - self.assertAllEqual(result, 0b1001) - - def test_docstring_example2(self): - data = [[True, False], [False, True], [True, True]] - result = create_feature_bitmask_op.create_feature_bitmask(data) - expected_result = constant_op.constant([0b10, 0b01, 0b11]) - self.assertAllEqual(result, expected_result) - - def test_feature_bitmask_single_dim_single_tensor(self): - """Test that the op can reduce a single-dimension tensor to a constant.""" - data = constant_op.constant([True, False]) - result = create_feature_bitmask_op.create_feature_bitmask(data) - - expected_result = constant_op.constant(2) - self.assertAllEqual(expected_result, result) - - def test_feature_bitmask_multiple_tensors_stack(self): - """Test that the op can reduce a stacked list of tensors.""" - data_1 = constant_op.constant([True, False]) - data_2 = constant_op.constant([False, True]) - stack_data = array_ops.stack([data_1, data_2], -1) - - expected_result = constant_op.constant([2, 1]) - result = create_feature_bitmask_op.create_feature_bitmask(stack_data) - self.assertAllEqual(expected_result, result) - - def test_feature_bitmask_multi_dim_single_tensor(self): - """Test that the op can reduce a multi-dimension tensor.""" - data = constant_op.constant([[True, True, False], [True, False, False]]) - result = create_feature_bitmask_op.create_feature_bitmask(data) - - expected_result = constant_op.constant([6, 4]) - self.assertAllEqual(expected_result, result) - - def test_feature_bitmask_3_dim_single_tensor(self): - """Test that the op can reduce a 3-dimension tensor.""" - data = constant_op.constant([[[True, True, False], [True, False, False]], - [[False, False, True], [True, False, True]]]) - result = create_feature_bitmask_op.create_feature_bitmask(data) - - expected_result = constant_op.constant([[6, 4], [1, 5]]) - self.assertAllEqual(expected_result, result) - - def test_feature_bitmask_multiple_tensors_multi_dim_stack(self): - """Test that the op can reduce a stacked list of multi-dim tensors.""" - data_1 = constant_op.constant([[True, False], [False, True]]) - data_2 = constant_op.constant([[False, True], [True, True]]) - stack_data = array_ops.stack([data_1, data_2], -1) - - expected_result = constant_op.constant([[2, 1], [1, 3]]) - result = create_feature_bitmask_op.create_feature_bitmask(stack_data) - self.assertAllEqual(expected_result, result) - - def test_supports_tensors_with_unknown_shape(self): - """Test that the op handles tensors with unknown shape.""" - data = array_ops.placeholder_with_default( - constant_op.constant([[[True, True, False], [True, False, False]], - [[False, False, True], [True, False, True]]]), - shape=None) - result = create_feature_bitmask_op.create_feature_bitmask(data) - - expected_result = constant_op.constant([[6, 4], [1, 5]]) - - self.assertAllEqual(expected_result, result) - - def test_feature_bitmask_multiple_tensors_error(self): - """Test that the op errors when presented with a single tensor.""" - data_1 = constant_op.constant([True, False]) - data_2 = constant_op.constant([True, True]) - list_data = [data_1, data_2] - error_message = 'CreateFeatureBitmask does not support lists of tensors.*' - - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - _ = create_feature_bitmask_op.create_feature_bitmask(list_data) - - def test_unsupported_dtype_type(self): - data = constant_op.constant([True, False]) - bad_dtype = dtypes.uint32 - error_message = 'dtype must be one of: .*, was %s' % bad_dtype.name - - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - _ = create_feature_bitmask_op.create_feature_bitmask( - data, dtype=bad_dtype) - - def test_unsupported_input_type(self): - data = constant_op.constant([1.0, 0.0]) - error_message = ('Tensor conversion requested dtype bool for Tensor' - ' with dtype float32: .*') - - with self.assertRaisesRegexp(ValueError, error_message): - _ = create_feature_bitmask_op.create_feature_bitmask(data) - - def test_larger_than_max_shape(self): - data = array_ops.fill([2, 64], False) - error_message = r'data.shape\[-1\] must be less than 64, is 64.' - - with self.assertRaisesRegexp(ValueError, error_message): - _ = create_feature_bitmask_op.create_feature_bitmask(data) - - def test_larger_than_dtype_shape(self): - data = array_ops.fill([2, 9], False) - error_message = (r'data.shape\[-1\] is too large for %s \(was 9, cannot ' - r'exceed 8\).*') % dtypes.uint8.name - - with self.assertRaisesRegexp(ValueError, error_message): - _ = create_feature_bitmask_op.create_feature_bitmask( - data, dtype=dtypes.uint8) - - def test_larger_than_dtype_shape_at_runtime(self): - data = array_ops.placeholder_with_default( - array_ops.fill([2, 9], False), shape=None) - error_message = (r'.*data.shape\[-1\] is too large for %s.*' % - dtypes.uint8.name) - - with self.assertRaisesRegexp((errors.InvalidArgumentError, ValueError), - error_message): - self.evaluate( - create_feature_bitmask_op.create_feature_bitmask( - data, dtype=dtypes.uint8)) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer.py deleted file mode 100644 index 7b31479..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer.py +++ /dev/null
@@ -1,300 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops to tokenize words into subwords.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.core.pybinds import pywrap_fast_wordpiece_tokenizer_model_builder -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_fast_wordpiece_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_fast_wordpiece_tokenizer.so')) - -_tf_text_fast_wordpiece_tokenizer_op_create_counter = monitoring.Counter( - '/nlx/api/python/fast_wordpiece_tokenizer_create_counter', - 'Counter for number of FastWordpieceTokenizers created in Python.') - - -class FastWordpieceTokenizer(TokenizerWithOffsets, Detokenizer): - """Tokenizes a tensor of UTF-8 string tokens into subword pieces. - - It employs the linear (as opposed to quadratic) WordPiece algorithm. - - Differences compared to the classic WordpieceTokenizer (as of 11/2020): - (1) `unknown_token` cannot be None or empty. That means if a word is too long - or cannot be tokenized, FastWordpieceTokenizer always returns - `unknown_token`. In constrast, the original WordpieceTokenizer would - return the original word if `unknown_token` is empty or None. - (2) `unknown_token` must be included in the vocabulary. - (3) When `unknown_token` is returned, in tokenize_with_offsets(), the result - end_offset is set to be the length of the original input word. In - contrast, when `unknown_token` is returned by the original - WordpieceTokenizer, the end_offset is set to be the length of the - `unknown_token` string. - (4) `split_unknown_characters` is not supported. - (5) `max_chars_per_token` is not used or needed. - """ - - def __init__(self, - vocab=None, - suffix_indicator='##', - max_bytes_per_word=100, - token_out_type=dtypes.int64, - unknown_token='[UNK]', - no_pretokenization=False, - support_detokenization=False, - model_buffer=None): - """Initializes the FastWordpieceTokenizer. - - Two ways to initialize: - * (preferred) use a precompiled `model_buffer`. - * use `vocab`, `suffix_indicator`, `max_bytes_per_word`, `unknown_token`, - and `no_pretokenization`. - - Args: - vocab: (optional) The list of tokens in the vocabulary. - suffix_indicator: (optional) The characters prepended to a wordpiece to - indicate that it is a suffix to another subword. - max_bytes_per_word: (optional) Max size of input token. - token_out_type: (optional) The type of the token to return. This can be - `tf.int64` or `tf.int32` IDs, or `tf.string` subwords. - unknown_token: (optional) The string value to substitute for an unknown - token. It must be included in `vocab`. - no_pretokenization: (optional) By default, the input is split on - whitespaces and punctuations before applying the Wordpiece tokenization. - When true, the input is assumed to be pretokenized already. - support_detokenization: (optional) Whether to make the tokenizer support - doing detokenization. Setting it to true expands the size of the model - flatbuffer. As a reference, when using 120k multilingual BERT WordPiece - vocab, the flatbuffer's size increases from ~5MB to ~6MB. - model_buffer: (optional) Bytes object that contains the wordpiece model in - flatbuffer format (see fast_wordpiece_tokenizer_model.fbs). If not - `None`, all other arguments (except `token_output_type`) are ignored. - """ - super(FastWordpieceTokenizer, self).__init__() - _tf_text_fast_wordpiece_tokenizer_op_create_counter.get_cell().increase_by( - 1) - - if model_buffer is None: - model_buffer = (pywrap_fast_wordpiece_tokenizer_model_builder - .build_fast_wordpiece_model( - vocab, max_bytes_per_word, suffix_indicator, - unknown_token, no_pretokenization, - support_detokenization)) - # Use uint8 tensor as a buffer for the model to avoid any possible changes, - # for example truncation by '\0'. - self._model = constant_op.constant(list(model_buffer), dtype=dtypes.uint8) - - self._token_out_type = token_out_type - - def tokenize(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - - ### Example 1, single word tokenization: - >>> vocab = ["they", "##'", "##re", "the", "great", "##est", "[UNK]"] - >>> tokenizer = FastWordpieceTokenizer(vocab, token_out_type=tf.string, - ... no_pretokenization=True) - >>> tokens = [["they're", "the", "greatest"]] - >>> tokenizer.tokenize(tokens) - <tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> - - ### Example 2, general text tokenization (pre-tokenization on - ### punctuation and whitespace followed by WordPiece tokenization): - >>> vocab = ["they", "##'", "##re", "the", "great", "##est", "[UNK]", - ... "'", "re"] - >>> tokenizer = FastWordpieceTokenizer(vocab, token_out_type=tf.string) - >>> tokens = [["they're the greatest", "the greatest"]] - >>> tokenizer.tokenize(tokens) - <tf.RaggedTensor [[[b'they', b"'", b're', b'the', b'great', b'##est'], - [b'the', b'great', b'##est']]]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A `RaggedTensor` of tokens where `tokens[i, j]` is the j-th token - (i.e., wordpiece) for `input[i]` (i.e., the i-th input word). This token - is either the actual token string content, or the corresponding integer - id, i.e., the index of that token string in the vocabulary. This choice - is controlled by the `token_out_type` parameter passed to the initializer - method. - """ - # TODO(xysong): Optimize below by calling different overload kernels. - subword, _, _ = self.tokenize_with_offsets(input) - return subword - - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - - ### Example 1, single word tokenization: - >>> vocab = ["they", "##'", "##re", "the", "great", "##est", "[UNK]"] - >>> tokenizer = FastWordpieceTokenizer(vocab, token_out_type=tf.string, - ... no_pretokenization=True) - >>> tokens = [["they're", "the", "greatest"]] - >>> subtokens, starts, ends = tokenizer.tokenize_with_offsets(tokens) - >>> subtokens - <tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> - >>> starts - <tf.RaggedTensor [[[0, 4, 5], [0], [0, 5]]]> - >>> ends - <tf.RaggedTensor [[[4, 5, 7], [3], [5, 8]]]> - - ### Example 2, general text tokenization (pre-tokenization on - ### punctuation and whitespace followed by WordPiece tokenization): - >>> vocab = ["they", "##'", "##re", "the", "great", "##est", "[UNK]", - ... "'", "re"] - >>> tokenizer = FastWordpieceTokenizer(vocab, token_out_type=tf.string) - >>> tokens = [["they're the greatest", "the greatest"]] - >>> subtokens, starts, ends = tokenizer.tokenize_with_offsets(tokens) - >>> subtokens - <tf.RaggedTensor [[[b'they', b"'", b're', b'the', b'great', b'##est'], - [b'the', b'great', b'##est']]]> - >>> starts - <tf.RaggedTensor [[[0, 4, 5, 8, 12, 17], [0, 4, 9]]]> - >>> ends - <tf.RaggedTensor [[[4, 5, 7, 11, 17, 20], [3, 9, 12]]]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - tokens: is a `RaggedTensor`, where `tokens[i, j]` is the j-th token - (i.e., wordpiece) for `input[i]` (i.e., the i-th input word). This - token is either the actual token string content, or the corresponding - integer id, i.e., the index of that token string in the vocabulary. - This choice is controlled by the `token_out_type` parameter passed to - the initializer method. - start_offsets[i1...iN, j]: is a `RaggedTensor` of the byte offsets - for the inclusive start of the `jth` token in `input[i1...iN]`. - end_offsets[i1...iN, j]: is a `RaggedTensor` of the byte offsets for - the exclusive end of the `jth` token in `input[i`...iN]` (exclusive, - i.e., first byte after the end of the token). - """ - name = None - with ops.name_scope(name, 'FastWordpieceTokenizeWithOffsets', - [input, self._model]): - # Check that the types are expected and the ragged rank is appropriate. - tokens = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - rank = tokens.shape.ndims - if rank is None: - raise ValueError('input must have a known rank.') - - if rank == 0: - wordpieces, starts, ends = self.tokenize_with_offsets( - array_ops.stack([tokens])) - return wordpieces.values, starts.values, ends.values - - elif rank > 1: - if not ragged_tensor.is_ragged(tokens): - tokens = ragged_tensor.RaggedTensor.from_tensor( - tokens, ragged_rank=rank - 1) - wordpieces, starts, ends = self.tokenize_with_offsets( - tokens.flat_values) - wordpieces = wordpieces.with_row_splits_dtype(tokens.row_splits.dtype) - starts = starts.with_row_splits_dtype(tokens.row_splits.dtype) - ends = ends.with_row_splits_dtype(tokens.row_splits.dtype) - return (tokens.with_flat_values(wordpieces), - tokens.with_flat_values(starts), tokens.with_flat_values(ends)) - - # Tokenize the tokens into subwords. - # TODO(xysong): Optimize below by calling different overload kernels. - subwords, subword_ids, row_splits, starts, ends = ( - gen_fast_wordpiece_tokenizer.fast_wordpiece_tokenize_with_offsets( - input_values=tokens, wp_model=self._model)) - - if self._token_out_type == dtypes.int64: - values = math_ops.cast(subword_ids, dtypes.int64) - elif self._token_out_type == dtypes.int32: - values = math_ops.cast(subword_ids, dtypes.int32) - else: - values = subwords - - wordpieces = RaggedTensor.from_row_splits( - values, row_splits, validate=False) - starts = RaggedTensor.from_row_splits(starts, row_splits, validate=False) - ends = RaggedTensor.from_row_splits(ends, row_splits, validate=False) - - return wordpieces, starts, ends - - def detokenize(self, input): # pylint: disable=redefined-builtin - """Detokenizes a tensor of int64 or int32 subword ids into sentences. - - Detokenize and tokenize an input string returns itself when the input string - is normalized and the tokenized wordpieces don't contain `<unk>`. - - ### Example: - >>> vocab = ["they", "##'", "##re", "the", "great", "##est", "[UNK]", - ... "'", "re", "ok"] - >>> tokenizer = FastWordpieceTokenizer(vocab, support_detokenization=True) - >>> ids = tf.ragged.constant([[0, 1, 2, 3, 4, 5], [9]]) - >>> tokenizer.detokenize(ids) - <tf.Tensor: shape=(2,), dtype=string, - ... numpy=array([b"they're the greatest", b'ok'], dtype=object)> - >>> ragged_ids = tf.ragged.constant([[[0, 1, 2, 3, 4, 5], [9]], [[4, 5]]]) - >>> tokenizer.detokenize(ragged_ids) - <tf.RaggedTensor [[b"they're the greatest", b'ok'], [b'greatest']]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of int64 or int32. - - Returns: - A `RaggedTensor` of sentences that has N - 1 dimension when N > 1. - Otherwise, a string tensor. - """ - name = None - with ops.name_scope(name, 'FastWordpieceDetokenize', [input, self._model]): - # Check that the types are expected and the ragged rank is appropriate. - subword_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - subword_ids = math_ops.cast(subword_ids, dtypes.int32) - rank = subword_ids.shape.ndims - if rank is None: - raise ValueError('input must have a known rank.') - - if rank < 2: - words = self.detokenize(array_ops.stack([subword_ids])) - return words[0] - - if not ragged_tensor.is_ragged(subword_ids): - subword_ids = ragged_tensor.RaggedTensor.from_tensor( - subword_ids, ragged_rank=rank - 1) - nested_row_splits = subword_ids.nested_row_splits - # Detokenize the wordpiece ids to texts. - words = ( - gen_fast_wordpiece_tokenizer.tf_text_fast_wordpiece_detokenize( - input_values=subword_ids.flat_values, - input_row_splits=nested_row_splits[-1], - wp_model=self._model)) - words = RaggedTensor.from_nested_row_splits( - words, nested_row_splits[:-1], validate=False) - - return words
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer_test.py deleted file mode 100644 index 0fd0c6e..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/fast_wordpiece_tokenizer_test.py +++ /dev/null
@@ -1,793 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -"""Tests for fast_wordpiece_tokenizer op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl import flags -from absl.testing import parameterized -import numpy as np -import tensorflow as tf -import tensorflow_text as tf_text - -from tensorflow.lite.python import interpreter -from tensorflow.python.data.kernel_tests import test_base -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.eager import def_function -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow_text.python.ops.fast_wordpiece_tokenizer import FastWordpieceTokenizer - -FLAGS = flags.FLAGS - - -def _Utf8(char): - return char.encode("utf-8") - - -_ENGLISH_VOCAB = [ - b"don", - b"##'", - b"##t", - b"tread", - b"##ness", - b"hel", - b"##lo", - b"there", - b"my", - b"na", - b"##me", - b"is", - b"ter", - b"##ry", - b"what", - b"##cha", - b"##ma", - b"##call", - b"##it?", - b"you", - b"said", - b"[UNK]", -] - -_CHINESE_VOCAB = [ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者"), - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福"), - "[UNK]", -] - -_MIXED_LANG_VOCAB = [ - b"don", - b"##'", - b"##t", - b"tread", - b"##ness", - b"hel", - b"##lo", - b"there", - b"my", - b"na", - b"##me", - b"is", - b"ter", - b"##ry", - b"what", - b"##cha", - b"##ma", - b"##call", - b"##it?", - b"you", - b"said", - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者"), - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福"), - "[UNK]", -] - -_RUSSIAN_VOCAB = [ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - "[UNK]", -] - -# Vocab with Unicode chars that crashed ICU in the past. -_DEATH_VOCAB = [ - _Utf8(u"क"), - _Utf8(u"##र"), - _Utf8(u"##े"), - _Utf8(u"##ं"), - b"##*", - _Utf8(u"##👇"), - "[UNK]", -] - - -def _GetTokensFromWordpieceOffsets(tokens, begin_indices, end_indices): - begin_indices = begin_indices.to_list() - end_indices = end_indices.to_list() - result = [] - for docs_idx in range(0, len(tokens)): - tokens_in_doc = [] - for tokens_idx in range(0, len(tokens[docs_idx])): - token = bytes(tokens[docs_idx][tokens_idx]) - begin_offsets = begin_indices[docs_idx][tokens_idx] - end_offsets = end_indices[docs_idx][tokens_idx] - tokens_in_doc.append(b"".join( - [token[begin:end] for begin, end in zip(begin_offsets, end_offsets)])) - result.append(tokens_in_doc) - return result - - -class FastWordpieceOpOriginalTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - """Adapted from the original WordpieceTokenizer tests.""" - - @parameterized.parameters([ - # Basic case - dict( - tokens=[[_Utf8(u"купиха")]], - expected_subwords=[[[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ]]], - vocab=_RUSSIAN_VOCAB, - ), - dict( - tokens=[[b"don't", b"treadness"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[b"hello", b"there", b"my", b"name", b"is", b"terry"], - [b"whatchamacallit?", b"you", b"said"]], - expected_subwords=[[[b"hel", b"##lo"], [b"there"], [b"my"], - [b"na", b"##me"], [b"is"], [b"ter", b"##ry"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"], - [b"you"], [b"said"]]], - vocab=_ENGLISH_VOCAB, - ), - # Basic case w/ unknown token - dict( - tokens=[[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread"], [b"[UNK]"], - [b"[UNK]"]]], - vocab=_ENGLISH_VOCAB, - ), - # Basic case w/ int id lookup - dict( - tokens=[[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]], - token_out_type=dtypes.int64, - expected_subwords=[[[0, 1, 2], [3], [21], [21]]], - vocab=_ENGLISH_VOCAB, - ), - # Chinese test case - dict( - tokens=[[ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者") - ], - [ - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福") - ]], - expected_subwords=[[[_Utf8(u"貿")], [_Utf8(u"易")], [_Utf8(u"戰")], - [_Utf8(u"最")], [_Utf8(u"大")], [_Utf8(u"受")], - [_Utf8(u"益")], [_Utf8(u"者")]], - [[_Utf8(u"越")], [_Utf8(u"南")], [_Utf8(u"總")], - [_Utf8(u"理")], [_Utf8(u"阮")], [_Utf8(u"春")], - [_Utf8(u"福")]]], - vocab=_CHINESE_VOCAB, - ), - # Mixed lang test cases - dict( - tokens=[ - [ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者") - ], - [ - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福") - ], - [b"don't", b"treadness"], - ], - expected_subwords=[ - [[_Utf8(u"貿")], [_Utf8(u"易")], [_Utf8(u"戰")], - [_Utf8(u"最")], [_Utf8(u"大")], [_Utf8(u"受")], - [_Utf8(u"益")], [_Utf8(u"者")]], - [[_Utf8(u"越")], [_Utf8(u"南")], [_Utf8(u"總")], - [_Utf8(u"理")], [_Utf8(u"阮")], [_Utf8(u"春")], - [_Utf8(u"福")]], - [[b"don", b"##'", b"##t"], [b"tread", b"##ness"]], - ], - vocab=_MIXED_LANG_VOCAB, - ), - # Test token whose size is > max_bytes_per_word. When "[UNK]" is returned, - # FastWordpieceTokenizer sets the end_offset as the length of the input - # word. This is different from the original WordpieceTokenizer. See the - # comments of the FastWordpieceTokenizer class. - dict( - tokens=[[b"don't", b"treadness"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"[UNK]"]]], - vocab=_ENGLISH_VOCAB, - max_bytes_per_word=5, - # Explicitly specify the offsets here because the current way of - # testing offsets would require '[UNK]' to be part of tokens. - expected_start=[[[0, 3, 4], [0]]], - expected_end=[[[3, 4, 5], [9]]], - ), - # Test the token of death usecase. - dict( - tokens=[[_Utf8(u"करें*👇👇")]], - token_out_type=dtypes.string, - expected_subwords=[[[ - _Utf8(u"क"), - _Utf8(u"##र"), - _Utf8(u"##े"), - _Utf8(u"##ं"), b"##*", - _Utf8(u"##👇"), - _Utf8(u"##👇") - ]]], - vocab=_DEATH_VOCAB, - max_bytes_per_word=40, - ), - # Test not splitting out unknown characters. - # (p and ! are unknown) - dict( - tokens=[[b"nap", b"hello!me"]], - expected_subwords=[[[b"[UNK]"], [b"[UNK]"]]], - unknown_token="[UNK]", - vocab=_ENGLISH_VOCAB, - ), - ]) - def testWordPieceOpAndVerifyOffsets(self, - tokens, - expected_subwords, - vocab, - expected_start=None, - expected_end=None, - unknown_token="[UNK]", - token_out_type=dtypes.string, - max_bytes_per_word=100): - tokens_t = ragged_factory_ops.constant(tokens) - tokenizer = FastWordpieceTokenizer( - vocab=vocab, - unknown_token=unknown_token, - token_out_type=token_out_type, - max_bytes_per_word=max_bytes_per_word, - no_pretokenization=True - ) - subwords_t, begin_t, end_t = tokenizer.tokenize_with_offsets(tokens_t) - self.assertAllEqual(subwords_t, expected_subwords) - - # Verify the indices by performing the following: - # - Extract subwords and join them together to form the original tokens. - # - Then compare the extracted tokens and original tokens. - begin, end = (self.evaluate((begin_t, end_t))) - - # If expected start/end offsets were provided, check them explicitly. - # Otherwise test the offsets by extracting subwords using token offsets - # from the original 'tokens' input. - if expected_start is None or expected_end is None: - extracted_tokens = _GetTokensFromWordpieceOffsets(tokens, begin, end) - self.assertAllEqual(extracted_tokens, tokens) - else: - self.assertAllEqual(begin, expected_start) - self.assertAllEqual(end, expected_end) - - @parameterized.parameters([ - dict( - tokens=[[[b"don't"], [b"treadness"], - [b"whatchamacallit?", b"you", b"hello"]], [[b"treadness"]]], - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"], [b"you"], - [b"hel", b"##lo"]]], [[[b"tread", b"##ness"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - ]) - def testWordPieceOpWithMultipleRaggedRank(self, - tokens, - expected_subwords, - vocab, - expected_start=None, - expected_end=None, - token_out_type=dtypes.string): - for row_splits_dtype in (dtypes.int32, dtypes.int64): - ragged_tokens = ragged_factory_ops.constant( - tokens, row_splits_dtype=row_splits_dtype) - tokenizer = FastWordpieceTokenizer( - vocab=vocab, token_out_type=token_out_type, - no_pretokenization=True) - subwords = tokenizer.tokenize(ragged_tokens) - self.assertAllEqual(subwords, expected_subwords) - - def testWordPieceOpWithIdReturned(self): - """Let the table determine how to do a lookup on unknown tokens.""" - tokens = ragged_factory_ops.constant( - [[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]]) - tokenizer = FastWordpieceTokenizer( - vocab=_ENGLISH_VOCAB, token_out_type=dtypes.int64, - no_pretokenization=True) - subwords, _, _ = tokenizer.tokenize_with_offsets(tokens) - - self.assertAllEqual(subwords, [[[0, 1, 2], [3], [21], [21]]]) - self.assertEqual(subwords.dtype, dtypes.int64) - - def testWordPieceOpWithInt32IdReturned(self): - """Let the table determine how to do a lookup on unknown tokens.""" - tokens = ragged_factory_ops.constant( - [[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]]) - tokenizer = FastWordpieceTokenizer( - vocab=_ENGLISH_VOCAB, token_out_type=dtypes.int32, - no_pretokenization=True) - subwords, _, _ = tokenizer.tokenize_with_offsets(tokens) - - self.assertAllEqual(subwords, [[[0, 1, 2], [3], [21], [21]]]) - self.assertEqual(subwords.dtype, dtypes.int32) - - # pyformat: disable - @parameterized.parameters([ - dict( - tokens=[[b"don't", b"treadness", b"whatchamacallit?"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"], - [b"what", b"##cha", b"##ma", b"##call", - b"##it?"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[[b"don't"], [b"treadness"], [b"whatchamacallit?"]]], - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[[b"don't", _Utf8(u"貿")], - [b"treadness", _Utf8(u"大")], - [b"whatchamacallit?", _Utf8(u"福")]]], - expected_subwords=[[[[b"don", b"##'", b"##t"], [_Utf8(u"貿")]], - [[b"tread", b"##ness"], [_Utf8(u"大")]], - [[ - b"what", b"##cha", b"##ma", b"##call", - b"##it?" - ], [_Utf8(u"福")]]]], - vocab=_MIXED_LANG_VOCAB, - ), - # # Vector input - dict( - tokens=[_Utf8(u"купиха")], - expected_subwords=[[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ]], - vocab=_RUSSIAN_VOCAB, - ), - # # Scalar input - dict( - tokens=_Utf8(u"купиха"), - expected_subwords=[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ], - vocab=_RUSSIAN_VOCAB, - ), - # 3D input with 1 ragged dimension. - dict( - tokens=[[b"don't", b"treadness", b"whatchamacallit?"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"], - [b"what", b"##cha", b"##ma", b"##call", - b"##it?"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=ragged_factory_ops.constant_value( - [[[b"don't"], [b"treadness"], [b"whatchamacallit?"]]], - ragged_rank=1), - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - # Specifying max_chars_per_token. - dict( - tokens=[[b"don't", b"treadness"]], - max_chars_per_token=5, - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"]]], - vocab=_ENGLISH_VOCAB + [b"trea", b"##d"], - ), - ]) - # pyformat: enable - def testTensors(self, - tokens, - expected_subwords, - vocab, - max_chars_per_token=None, - expected_start=None, - expected_end=None, - token_out_type=dtypes.string): - tokenizer = FastWordpieceTokenizer( - vocab=vocab, - token_out_type=token_out_type, - no_pretokenization=True - ) - subwords = tokenizer.tokenize(tokens) - self.assertAllEqual(subwords, expected_subwords) - - -# The following WordPiece setup is used in `FastWordpieceOpAdditionalTest` and -# `EndToEndFastWordpieceOpTest`. -_TEST_VOCAB = [ - "a", "abc", "abcdefghi", "##de", "##defgxy", "##deh", "##f", "##ghz", - "<unk>", "," -] -_TEST_MAX_BYTES_PER_WORD = 100 -_TEST_SUFFIX_INDICATOR = "##" -_TEST_UNKNOWN_TOKEN = "<unk>" - -# The same WordPiece model but precompiled in buffer. -_TEST_MODEL_BUFFER_PATH = "tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb" - - -def _LoadTestModelBuffer(): - return gfile.GFile(_TEST_MODEL_BUFFER_PATH, "rb").read() - - -@parameterized.parameters([ - # Test 0: Basic. - dict( - text_inputs=[u"", u"abcdefghz", u"abc", u"abcX"], - expected_outputs=[[], [1, 3, 6, 7], [1], [8]], - ), - # Test 1: 2D input. - dict( - text_inputs=[[u"", u"abcdefghz", u"abc", u"abcX"]], - expected_outputs=[[[], [1, 3, 6, 7], [1], [8]]], - ), - # Test 2: RaggedTensor input. - dict( - text_inputs=ragged_factory_ops.constant_value( - [[u"", u"abcdefghz", u"abc"], [u"abcX"]]), - expected_outputs=[[[], [1, 3, 6, 7], [1]], [[8]]], - ), -]) -class FastWordpieceOpAdditionalTest(test_base.DatasetTestBase, - test_util.TensorFlowTestCase, - parameterized.TestCase): - """Some new tests, including tests on `tf.function`.""" - - def testTokenizerBuiltFromConfig(self, text_inputs, expected_outputs): - tokenizer = FastWordpieceTokenizer( - vocab=_TEST_VOCAB, - max_bytes_per_word=_TEST_MAX_BYTES_PER_WORD, - suffix_indicator=_TEST_SUFFIX_INDICATOR, - unknown_token=_TEST_UNKNOWN_TOKEN, - no_pretokenization=True) - - self.assertAllEqual(tokenizer.tokenize(text_inputs), expected_outputs) - - def testTokenizerBuiltFromModel(self, text_inputs, expected_outputs): - model_buffer = _LoadTestModelBuffer() - tokenizer = FastWordpieceTokenizer(model_buffer=model_buffer) - - self.assertAllEqual(tokenizer.tokenize(text_inputs), expected_outputs) - - def testTokenizerBuiltInsideTfFunctionFromConfig(self, text_inputs, - expected_outputs): - - @def_function.function - def Preprocess(text_input): - tokenizer = FastWordpieceTokenizer( - vocab=_TEST_VOCAB, - max_bytes_per_word=_TEST_MAX_BYTES_PER_WORD, - suffix_indicator=_TEST_SUFFIX_INDICATOR, - unknown_token=_TEST_UNKNOWN_TOKEN, - no_pretokenization=True) - return tokenizer.tokenize(text_input) - - # Basic tests. - self.assertAllEqual(Preprocess(text_inputs), expected_outputs) - - # Test with tf.data.DataSets. - dataset = dataset_ops.Dataset.from_tensor_slices(text_inputs) - self.assertDatasetProduces(dataset.map(Preprocess), expected_outputs) - - def testTokenizerBuiltInsideTfFunctionFromModel(self, text_inputs, - expected_outputs): - - @def_function.function - def Preprocess(text_input): - model_buffer = _LoadTestModelBuffer() - tokenizer = FastWordpieceTokenizer(model_buffer=model_buffer) - return tokenizer.tokenize(text_input) - - # Basic tests. - self.assertAllEqual(Preprocess(text_inputs), expected_outputs) - - # Test with tf.data.DataSets. - dataset = dataset_ops.Dataset.from_tensor_slices(text_inputs) - self.assertDatasetProduces(dataset.map(Preprocess), expected_outputs) - - def testTokenizerBuiltOutsideTfFunctionFromConfig(self, text_inputs, - expected_outputs): - tokenizer = FastWordpieceTokenizer( - vocab=_TEST_VOCAB, - max_bytes_per_word=_TEST_MAX_BYTES_PER_WORD, - suffix_indicator=_TEST_SUFFIX_INDICATOR, - unknown_token=_TEST_UNKNOWN_TOKEN, - no_pretokenization=True) - - @def_function.function - def Preprocess(text_input): - return tokenizer.tokenize(text_input) - - # Basic tests. - self.assertAllEqual(Preprocess(text_inputs), expected_outputs) - - # Test with tf.data.DataSets. - dataset = dataset_ops.Dataset.from_tensor_slices(text_inputs) - self.assertDatasetProduces(dataset.map(Preprocess), expected_outputs) - - def testTokenizerBuiltOutsideTfFunctionFromModel(self, text_inputs, - expected_outputs): - model_buffer = _LoadTestModelBuffer() - tokenizer = FastWordpieceTokenizer(model_buffer=model_buffer) - - @def_function.function - def Preprocess(text_input): - return tokenizer.tokenize(text_input) - - # Basic tests. - self.assertAllEqual(Preprocess(text_inputs), expected_outputs) - - # Test with tf.data.DataSets. - dataset = dataset_ops.Dataset.from_tensor_slices(text_inputs) - self.assertDatasetProduces(dataset.map(Preprocess), expected_outputs) - - -@parameterized.parameters([ - # Test 0: Basic. - dict( - text_inputs=[u"abcdefghz abc, abcX"], - expected_outputs=[[1, 3, 6, 7, 1, 9, 8]], - ), - # Test 1: 2D input. - dict( - text_inputs=[[u"abcdefghz abc abcX"]], - expected_outputs=[[[1, 3, 6, 7, 1, 8]]], - ), - # Test 2: RaggedTensor input. - dict( - text_inputs=ragged_factory_ops.constant_value( - [[u"", u"abcdefghz", u"abc"], [u"abcX"]]), - expected_outputs=[[[], [1, 3, 6, 7], [1]], [[8]]], - ), -]) -class EndToEndFastWordpieceOpTest(test_base.DatasetTestBase, - test_util.TensorFlowTestCase, - parameterized.TestCase): - """Test on end-to-end fast WordPiece when input is sentence.""" - - def testTokenizerBuiltFromConfig(self, text_inputs, expected_outputs): - tokenizer = FastWordpieceTokenizer( - vocab=_TEST_VOCAB, - max_bytes_per_word=_TEST_MAX_BYTES_PER_WORD, - suffix_indicator=_TEST_SUFFIX_INDICATOR, - unknown_token=_TEST_UNKNOWN_TOKEN) - self.assertAllEqual(tokenizer.tokenize(text_inputs), expected_outputs) - - -@parameterized.parameters([ - # Test 0: Basic. - dict( - id_inputs=[[1, 3, 6, 7, 1, 9, 8]], # Ids of [[u"abcdefghz abc, abcX"]]. - expected_outputs=[b"abcdefghz abc , <unk>"], - ), - # Test 1: 1D input. - dict( - id_inputs=[1, 3, 6, 7, 1, 9, 8], # Ids of [u"abcdefghz abc, abcX"]. - expected_outputs=b"abcdefghz abc , <unk>", - ), - # Test 2: RaggedTensor input. - dict( - id_inputs=ragged_factory_ops.constant_value([[[], [1, 3, 6, 7, 1, 9, 8], - [1]], [[8]]]), - expected_outputs=[[b"", b"abcdefghz abc , <unk>", b"abc"], [b"<unk>"]], - ), -]) -class FastWordpieceDetokenizeOpTest(test_base.DatasetTestBase, - test_util.TensorFlowTestCase, - parameterized.TestCase): - """Test on end-to-end fast WordPiece when input is sentence.""" - - def testTokenizerBuiltFromConfig(self, id_inputs, expected_outputs): - tokenizer = FastWordpieceTokenizer( - vocab=_TEST_VOCAB, - max_bytes_per_word=_TEST_MAX_BYTES_PER_WORD, - suffix_indicator=_TEST_SUFFIX_INDICATOR, - unknown_token=_TEST_UNKNOWN_TOKEN, - support_detokenization=True) - results = tokenizer.detokenize(id_inputs) - self.assertAllEqual(results, expected_outputs) - - -@parameterized.parameters([ - # Test 0: Single-word case. - dict( - no_pretokenization=True, - text_inputs=[["", "abcdefghz", "abc", "abcX"]], - ), - # Test 1: End-to-end case. - dict( - no_pretokenization=False, - text_inputs=[["", "abcdefghz, a", "abcdefghz abc abcX"]], - ), -]) -class FastWordpieceInKerasModelTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - """Tests fast WordPiece when used in a Keras model.""" - - def testTfLiteWordpieceTokenizer( - self, no_pretokenization, text_inputs): - """Checks TFLite conversion and inference.""" - - class TokenizerModel(tf.keras.Model): - - def __init__(self, - vocab, - max_bytes_per_word=100, - suffix_indicator="##", - unknown_token="<unk>", - no_pretokenization=True, - support_detokenization=False, - **kwargs): - super().__init__(**kwargs) - self.wp = FastWordpieceTokenizer( - vocab=vocab, - max_bytes_per_word=max_bytes_per_word, - suffix_indicator=suffix_indicator, - unknown_token=unknown_token, - no_pretokenization=no_pretokenization, - support_detokenization=support_detokenization) - - def call(self, input_tensor, **kwargs): - return self.wp.tokenize(input_tensor).flat_values - - @tf.function( - input_signature=[tf.TensorSpec(shape=[None], dtype=dtypes.int64)]) - def detokenize(self, input_tensor): - return self.wp.detokenize(input_tensor) - - # Test input data. - input_data = np.array(text_inputs) - - # Define a Keras model. - model = TokenizerModel( - _TEST_VOCAB, - _TEST_MAX_BYTES_PER_WORD, - _TEST_SUFFIX_INDICATOR, - _TEST_UNKNOWN_TOKEN, - no_pretokenization, - support_detokenization=True) - # Test tokenization. - # Do TF.Text inference. - tf_result = model(input_data) - - # Convert to TFLite. - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] - converter.allow_custom_ops = True - tflite_model = converter.convert() - - # Do TFLite inference. - interp = interpreter.InterpreterWithCustomOps( - model_content=tflite_model, - custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS) - interp.allocate_tensors() - input_details = interp.get_input_details() - interp.set_tensor(input_details[0]["index"], input_data) - interp.invoke() - output_details = interp.get_output_details() - tflite_result = interp.get_tensor(output_details[0]["index"]) - - # Assert the results are identical. - self.assertAllEqual(tflite_result, tf_result) - - # Test detokenization. - # Do TF.Text detokenization. - tf_detokenization_result = model.detokenize(tf_result) - # Convert to TFLite. - converter = tf.lite.TFLiteConverter.from_concrete_functions( - [model.detokenize.get_concrete_function()], model) - converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] - converter.allow_custom_ops = True - tflite_model = converter.convert() - - # Do TFLite detokenization. - interp = interpreter.InterpreterWithCustomOps( - model_content=tflite_model, - custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS) - interp.allocate_tensors() - detokenize = interp.get_signature_runner("serving_default") - tflite_detokenization_result = detokenize( - input_tensor=tf_result)["output_0"] - - # Assert the results are identical. - self.assertAllEqual(tf_detokenization_result, tflite_detokenization_result) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/gather_with_default_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/gather_with_default_op_test.py deleted file mode 100644 index d396c2c2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/gather_with_default_op_test.py +++ /dev/null
@@ -1,259 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for gather_with_default op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# Dependency imports -from absl.testing import parameterized -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import pointer_ops - - -def _MakeTestTensor(shape, prefix=b'v'): - """Constructs a string tensor with the specified shape, for testing.""" - if not shape: - return prefix - return [ - _MakeTestTensor(shape[1:], b'%s%s' % (prefix, ('%s' % i).encode('ascii'))) - for i in range(shape[0]) - ] - - -@test_util.run_all_in_graph_and_eager_modes -class GatherWithDefaultOpTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - - def testDocStringExample(self): - gathered = pointer_ops.gather_with_default(['a', 'b', 'c', 'd'], - [2, 0, -1, 2, -1], '_') - self.assertAllEqual(gathered, [b'c', b'a', b'_', b'c', b'_']) - - @parameterized.parameters( - (_MakeTestTensor([8]), -1, b'_'), - (_MakeTestTensor([8]), 0, b'_'), - (_MakeTestTensor([8]), 1, b'_'), - (_MakeTestTensor([8]), 6, b'_'), - (_MakeTestTensor([8]), 7, b'_'), - ) - def testScalarIndicesWith1DParams(self, params, indices, default): - indices_t = constant_op.constant(indices, dtype=dtypes.int32) - params_t = constant_op.constant(params) - assert isinstance(indices, int) - gathered = pointer_ops.gather_with_default(params_t, indices_t, default) - expected = default if indices == -1 else params[indices] - - self.assertAllEqual(expected, gathered) - # When there are no -1 indices, check that behavior matches tf.gather. - if indices != -1: - self.assertAllEqual(gathered, array_ops.gather(params_t, indices_t)) - - @parameterized.parameters( - (_MakeTestTensor([3, 2]), -1, [b'_', b'_']), - (_MakeTestTensor([3, 2]), 0, [b'_', b'_']), - (_MakeTestTensor([3, 2]), 1, [b'_', b'_']), - (_MakeTestTensor([3, 2]), 2, [b'_', b'_']), - ) - def testScalarIndicesWith2DParams(self, params, indices, default): - indices_t = constant_op.constant(indices, dtype=dtypes.int32) - params_t = constant_op.constant(params) - assert isinstance(indices, int) - gathered = pointer_ops.gather_with_default(params_t, indices_t, default) - expected = default if indices == -1 else params[indices] - self.assertAllEqual(gathered, expected) - # When there are no -1 indices, check that behavior matches tf.gather. - if indices != -1: - self.assertAllEqual(gathered, array_ops.gather(params_t, indices_t)) - - @parameterized.parameters( - # 1D params - (_MakeTestTensor([8]), [], '_'), - (_MakeTestTensor([8]), [0], '_'), - (_MakeTestTensor([8]), [-1], '_'), - (_MakeTestTensor([8]), [6], '_'), - (_MakeTestTensor([8]), [2, 0, 2, -1, 5, -1], '_'), - (_MakeTestTensor([8]), [2, 0, 2, 1, 5, 3], '_'), - # 2D params - (_MakeTestTensor([3, 2]), [], ['_', '_'], [0, 2]), - (_MakeTestTensor([3, 2]), [0], ['_', '_']), - (_MakeTestTensor([3, 2]), [1], ['_', '_']), - (_MakeTestTensor([3, 2]), [-1], ['_', '_']), - (_MakeTestTensor([3, 2]), [2], ['_', '_']), - (_MakeTestTensor([3, 2]), [1, 0, -1, 2, -1], ['_', '_']), - (_MakeTestTensor([3, 2]), [1, 0, 1, 2, 0], ['_', '_']), - ) - def testVectorIndices(self, params, indices, default, expected_shape=None): - indices_t = constant_op.constant(indices, dtype=dtypes.int32) - params_t = constant_op.constant(params) - gathered = pointer_ops.gather_with_default(params_t, indices_t, default) - expected = [default if i == -1 else params[i] for i in indices] - expected = constant_op.constant(expected, shape=expected_shape) - self.assertAllEqual(gathered, expected) - # When there are no -1 indices, check that behavior matches tf.gather. - if not any(i == -1 for i in indices): - self.assertAllEqual(gathered, array_ops.gather(params_t, indices_t)) - - @parameterized.parameters( - # 1D params - (_MakeTestTensor([8]), [], '_'), - (_MakeTestTensor([8]), [[0]], '_'), - (_MakeTestTensor([8]), [[-1]], '_'), - (_MakeTestTensor([8]), [[6]], '_'), - (_MakeTestTensor([8]), [[2, 0], [2, -1], [5, -1]], '_'), - (_MakeTestTensor([8]), [[2, 0], [2, 1], [5, 2]], '_'), - # 2D params - (_MakeTestTensor([3, 2]), [], ['_', '_'], [0, 2]), - (_MakeTestTensor([3, 2]), [[0]], ['_', '_']), - (_MakeTestTensor([3, 2]), [[1]], ['_', '_']), - (_MakeTestTensor([3, 2]), [[-1]], ['_', '_']), - (_MakeTestTensor([3, 2]), [[2]], ['_', '_']), - (_MakeTestTensor([3, 2]), [[1, 0], [-1, 2], [-1, -1]], ['_', '_']), - (_MakeTestTensor([3, 2]), [[1, 0], [1, 2], [0, 0]], ['_', '_']), - ) - def test2DIndices(self, params, indices, default, expected_shape=None): - indices_t = constant_op.constant(indices, dtype=dtypes.int32) - params_t = constant_op.constant(params) - gathered = pointer_ops.gather_with_default(params_t, indices_t, default) - expected = [[default if i == -1 else params[i] - for i in indices_row] - for indices_row in indices] - expected = constant_op.constant(expected, shape=expected_shape) - self.assertAllEqual(gathered, expected) - # When there are no -1 indices, check that behavior matches tf.gather. - if not any(i == -1 for index_row in indices for i in index_row): - self.assertAllEqual(gathered, array_ops.gather(params_t, indices_t)) - - def testAxisGreaterThan0(self): - params = [['a0', 'a1', 'a2', 'a3', 'a4'], - ['b0', 'b1', 'b2', 'b3', 'b4'], - ['c0', 'c1', 'c2', 'c3', 'c4']] # pyformat: disable - indices = [2, 0, -1, 4, -1] - gathered = pointer_ops.gather_with_default(params, indices, '__', axis=1) - expected = [[b'a2', b'a0', b'__', b'a4', b'__'], - [b'b2', b'b0', b'__', b'b4', b'__'], - [b'c2', b'c0', b'__', b'c4', b'__']] # pyformat: disable - self.assertAllEqual(gathered, expected) - - def testNegativeAxis(self): - params_1d = _MakeTestTensor(shape=[3]) - params_2d = _MakeTestTensor(shape=[3, 3]) - params_3d = _MakeTestTensor(shape=[3, 3, 3]) - indices = [2, 0, -1, 1, -1] - - gathered1a = pointer_ops.gather_with_default( - params_1d, indices, '__', axis=0) - gathered1b = pointer_ops.gather_with_default( - params_1d, indices, '__', axis=-1) - expected1 = [b'v2', b'v0', b'__', b'v1', b'__'] - - gathered2a = pointer_ops.gather_with_default( - params_2d, indices, ['__', '__', '__'], axis=0) - gathered2b = pointer_ops.gather_with_default( - params_2d, indices, ['__', '__', '__'], axis=-2) - expected2 = [[b'v20', b'v21', b'v22'], - [b'v00', b'v01', b'v02'], - [b'__', b'__', b'__'], - [b'v10', b'v11', b'v12'], - [b'__', b'__', b'__']] # pyformat: disable - - gathered3a = pointer_ops.gather_with_default( - params_2d, indices, '__', axis=1) - gathered3b = pointer_ops.gather_with_default( - params_2d, indices, '__', axis=-1) - expected3 = [[b'v02', b'v00', b'__', b'v01', b'__'], - [b'v12', b'v10', b'__', b'v11', b'__'], - [b'v22', b'v20', b'__', b'v21', b'__']] # pyformat: disable - - gathered4a = pointer_ops.gather_with_default( - params_3d, indices, '__', axis=2) - gathered4b = pointer_ops.gather_with_default( - params_3d, indices, '__', axis=-1) - expected4 = [[[b'v002', b'v000', b'__', b'v001', b'__'], - [b'v012', b'v010', b'__', b'v011', b'__'], - [b'v022', b'v020', b'__', b'v021', b'__']], - [[b'v102', b'v100', b'__', b'v101', b'__'], - [b'v112', b'v110', b'__', b'v111', b'__'], - [b'v122', b'v120', b'__', b'v121', b'__']], - [[b'v202', b'v200', b'__', b'v201', b'__'], - [b'v212', b'v210', b'__', b'v211', b'__'], - [b'v222', b'v220', b'__', b'v221', b'__']]] - - self.assertAllEqual(gathered1a, expected1) - self.assertAllEqual(gathered1b, expected1) - self.assertAllEqual(gathered2a, expected2) - self.assertAllEqual(gathered2b, expected2) - self.assertAllEqual(gathered3a, expected3) - self.assertAllEqual(gathered3b, expected3) - self.assertAllEqual(gathered4a, expected4) - self.assertAllEqual(gathered4b, expected4) - - def testAxisGreaterThan0_BehaviorMatchesTfGather(self): - params = [['a1', 'a2', 'a3', 'a4'], ['b1', 'b2', 'b3', 'b4'], - ['c1', 'c2', 'c3', 'c4']] - indices = [2, 0, 2, 1] - gathered = pointer_ops.gather_with_default(params, indices, '__', axis=1) - expected = array_ops.gather(params, indices, axis=1) - self.assertAllEqual(gathered, expected) - - def testBadDefaultShape(self): - with self.assertRaises((ValueError, errors.InvalidArgumentError)): - pointer_ops.gather_with_default( - params=[0, 1, 2, 3], indices=[0], default=[0]) - with self.assertRaises((ValueError, errors.InvalidArgumentError)): - pointer_ops.gather_with_default( - params=[[0, 1], [2, 3]], indices=[0], default=0) - - def testBadDefaultDtype(self): - with self.assertRaisesRegexp( - TypeError, - 'Expected int32.*|Cannot convert .*'): - pointer_ops.gather_with_default( - params=[0, 1, 2, 3], indices=[0], default='a') - - def testBadAxis(self): - with self.assertRaises((ValueError, errors.InvalidArgumentError)): - pointer_ops.gather_with_default( - params=[0, 1, 2, 3], indices=[0], default=-1, axis=1) - with self.assertRaises((ValueError, errors.InvalidArgumentError)): - pointer_ops.gather_with_default( - params=[[0, 1], [2, 3]], indices=[0], default=[0, 0], axis=2) - - def testIndexOutOfRange(self): - # Note: because of the way gather_with_default is implemented, these - # error messages will report values and ranges that are one greater than - # those that were supplied to gather_with_default. - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r'indices\[0\] = .* is not in .*'): - self.evaluate( - pointer_ops.gather_with_default( - params=[0, 1, 2, 3], indices=[4], default=0)) - - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r'indices\[0\] = .* is not in .*'): - self.evaluate( - pointer_ops.gather_with_default( - params=[0, 1, 2, 3], indices=[-2], default=0)) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op.py deleted file mode 100644 index 5daf41b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op.py +++ /dev/null
@@ -1,170 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Bulk Greedy Constrained Sequence. - -Constrains a set of predictions based on a set of legal transitions and/or a -set of transition weights, returning the legal sequence that maximizes the -product of the state scores and the transition weights at each step. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_tensor - -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_constrained_sequence_op = load_library.load_op_library(resource_loader.get_path_to_datafile('_constrained_sequence_op.so')) - - -def greedy_constrained_sequence(scores, - sequence_length=None, - allowed_transitions=None, - transition_weights=None, - use_log_space=False, - use_start_and_end_states=False, - name=None): - """Performs greedy constrained sequence on a batch of examples. - - Constrains a set of predictions based on a set of legal transitions - and/or a set of transition weights, returning the legal sequence that - maximizes the product or sum of the state scores and the transition weights - at each step. If `use_log_space` is true, the sum is used; if false, the - product is used. - - This op also takes a parameter `use_start_and_end_states`, which when true - will add an implicit start and end state to each sequence. These implicit - states allow the user to specify additional weights and permitted transitions - to start and end a sequence (so, for instance, if you wanted to forbid your - output from ending in a certain set of states you could do so). - - Inputs to this op can take one of three forms: a single TensorFlow tensor - of scores with no sequence lengths, a TensorFlow tensor of scores along - with a TensorFlow tensor of sequence lengths, or a RaggedTensor. If only the - scores tensor is passed, this op will assume that the sequence lengths are - equal to the size of the tensor (and so use all the data provided). If a - scores tensor and sequence_lengths tensor is provided, the op will only - use the data in the scores tensor as specified by the sequence_lengths tensor. - Finally, if a RaggedTensor is provided, the sequence_lengths will be ignored - and the variable length sequences in the RaggedTensor will be used. - - Args: - scores: `<float32> [batch_size, num_steps, |num_states|]` - A tensor of scores, where `scores[b, t, s]` is the predicted score for - transitioning to state `s` at step `t` for batch `b`. The |num_states| - dimension must correspond to the num_states attribute for this op. This - input may be ragged; if it is ragged, the ragged tensor should have the - same structure [b, t, s] and only axis 1 should be ragged. - - sequence_length: `<{int32, int64}>[batch_size]` - A rank-1 tensor representing the length of the output sequence. If None, - and the 'scores' input is not ragged, sequence lengths will be assumed - to be the length of the score tensor. - - allowed_transitions: - if use_start_and_end_states is TRUE: - `<bool>[num_states+1, num_states+1]` - if use_start_and_end_states is FALSE: - `<bool>[num_states, num_states]` - A rank-2 tensor representing allowed transitions. - - allowed_transitions[i][j] is true if the transition from state i to - state j is allowed for i and j in 0...(num_states). - - allowed_transitions[num_states][num_states] is ignored. - If use_start_and_end_states is TRUE: - - allowed_transitions[num_states][j] is true if the sequence is allowed - to start from state j. - - allowed_transitions[i][num_states] is true if the sequence is allowed - to end on state i. - Default - An empty tensor. This allows all sequence states to transition - to all other sequence states. - - transition_weights: - if use_start_and_end_states is TRUE: - `<float32>[num_states+1, num_states+1]` - if use_start_and_end_states is FALSE: - `<float32>[num_states, num_states]` - A rank-2 tensor representing transition weights. - - transition_weights[i][j] is the coefficient that a candidate transition - score will be multiplied by if that transition is from state i to - state j. - - transition_weights[num_states][num_states] is ignored. - If use_start_and_end_states is TRUE: - - transition_weights[num_states][j] is the coefficient that will be used - if the transition starts with state j. - - transition_weights[i][num_states] is the coefficient that will be used - if the final state in the sequence is state i. - Default - An empty tensor. This assigns a wieght of 1.0 all transitions - - use_log_space: Whether to use log space for the calculation. If false, - calculations will be done in exp-space. - - use_start_and_end_states: If True, sequences will have an implicit start - and end state added. - - name: The name scope within which this op should be constructed. - - Returns: - An <int32>[batch_size, (num_steps)] ragged tensor containing the appropriate - sequence of transitions. If a sequence is impossible, the value of the - RaggedTensor for that and all following transitions in that sequence shall - be '-1'. - """ - with ops.name_scope( - name, "BulkViterbiConstrainedSequence", - [scores, sequence_length, allowed_transitions, transition_weights]): - if allowed_transitions is None: - allowed_transitions = [] - - if transition_weights is None: - transition_weights = [] - - score_data = ragged_tensor.convert_to_tensor_or_ragged_tensor( - scores, name="score_data") - - if isinstance(score_data, ragged_tensor.RaggedTensor): - # TODO(momernick): Extend the generated op to support ragged tensors. - dense_scores = score_data.to_tensor(default_value=0) - sequence_lengths = score_data.row_lengths(axis=1) - else: - dense_scores = score_data - # In this case, the core input was a dense tensor. - if sequence_length is not None: - sequence_lengths = ops.convert_to_tensor(sequence_length) - else: - batch_size = array_ops.shape(dense_scores)[0] - dense_length = array_ops.shape(dense_scores)[-2] - sequence_lengths = array_ops.ones([batch_size], - dtype=dtypes.int32) * dense_length - - transition_weights = ops.convert_to_tensor(transition_weights) - allowed_transitions = ops.convert_to_tensor( - allowed_transitions, dtype=dtypes.bool) - - output, output_splits = gen_constrained_sequence_op.constrained_sequence( - scores=dense_scores, - sequence_lengths=sequence_lengths, - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_viterbi=False, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - return ragged_tensor.RaggedTensor.from_row_splits( - values=output, row_splits=output_splits)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op_test.py deleted file mode 100644 index a2e5194..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/greedy_constrained_sequence_op_test.py +++ /dev/null
@@ -1,647 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.greedy_constrained_sequence_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import greedy_constrained_sequence_op as greedy_op - - -# TODO(b/122968457): Refactor this test logic. -@test_util.run_all_in_graph_and_eager_modes -class GreedyConstrainedSequenceOpTest(test_util.TensorFlowTestCase): - - def _last_max(self, array): - """Helper function that matches the maximum behaviour in the C++ op.""" - score = -float("inf") - index = -1 - for i in range(len(array)): - if array[i] >= score: - index = i - score = array[i] - return index - - def _decode_greedy_sequence(self, - score_data, - transition_params=None, - allowed_transitions=None, - use_log_space=True, - use_start_and_end_states=False): - scores = np.array(score_data) - num_states = scores.shape[-1] - - if transition_params is None: - if use_log_space: - transition_params = np.zeros(num_states, num_states) - else: - transition_params = np.ones(num_states, num_states) - - if allowed_transitions is not None: - if use_log_space: - transition_mask = np.where(allowed_transitions, 1, -float("inf")) - else: - transition_mask = np.where(allowed_transitions, 1, 0.0) - transition_params = transition_params * transition_mask - - # Handle state 0. - current_scores = scores[0] - print(current_scores) - if use_start_and_end_states: - for i in range(num_states): - if use_log_space: - current_scores[i] += transition_params[-1][i] - else: - current_scores[i] *= transition_params[-1][i] - sequence = [self._last_max(current_scores)] - - for score_idx in range(1, len(scores)): - current_scores = scores[score_idx] - for i in range(num_states): - if use_log_space: - current_scores[i] += transition_params[sequence[-1]][i] - else: - current_scores[i] *= transition_params[sequence[-1]][i] - sequence.append(self._last_max(current_scores)) - print(current_scores) - - # Handle the end state if necessary. Since this is greedy, we can - # simply multiply the final current_scores array by the end index - # and overwrite the end of the sequence. - if use_start_and_end_states: - for i in range(num_states): - if use_log_space: - current_scores[i] += transition_params[sequence[-1]][-1] - else: - current_scores[i] *= transition_params[sequence[-1]][-1] - sequence[-1] = self._last_max(current_scores) - - return sequence - - def test_sequence_in_exp_space_with_start_end_states_single_batch_item(self): - use_log_space = False - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4, .1], - [ .5, .6, .7, .8, .1], - [ .9, 1, .15, 1, .1], - [.25, .35, .45, .55, .5], - [ .1, .5, .1, .1, 1]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True, True], - [ True, True, True, True, True], - [ True, False, True, False, True], - [ True, True, True, True, True], - [ True, True, True, True, False]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_exp_space_with_start_end_states_multi_batch_item(self): - use_log_space = False - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4, .1], - [ .5, .6, .7, .8, .1], - [ .9, 1, .15, 1, .1], - [.25, .35, .45, .55, .5], - [ .1, .5, .1, .1, 1]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True, True], - [ True, True, True, True, True], - [ True, False, True, False, True], - [ True, True, True, True, True], - [ True, True, True, True, False]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_exp_space_without_start_end_states_single_batch_item( - self): - use_log_space = False - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, .1, .15, .2], - [.25, .35, .45, .55]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_exp_space_without_start_end_states_multi_batch_item( - self): - use_log_space = False - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, .1, .15, .2], - [.25, .35, .45, .55]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_log_space_with_start_end_states_single_batch_item(self): - use_log_space = True - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 7.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0, 0.0], - [ 3.0, -3.0, 4.0, -4.0, 0.0], - [ 5.0, 1.0, 10.0, 1.0, 1.0], - [-7.0, 7.0, -8.0, 8.0, 0.0], - [ 0.0, 1.0, 2.0, 3.0, 0.0]], - dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True, True], - [ True, True, True, True, True], - [ True, False, True, False, False], - [ True, True, True, True, True], - [ True, False, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_log_space_with_start_end_states_multi_batch_item(self): - use_log_space = True - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 7.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0, 0.0], - [ 3.0, -3.0, 4.0, -4.0, 0.0], - [ 5.0, 1.0, 10.0, 1.0, 1.0], - [-7.0, 7.0, -8.0, 8.0, 0.0], - [ 0.0, 1.0, 2.0, 3.0, 0.0]], - dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True, True], - [ True, True, True, True, True], - [ True, False, True, False, False], - [ True, True, True, True, True], - [ True, False, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_log_space_without_start_end_states_single_batch_item( - self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_log_space_without_start_end_states_multi_batch_item( - self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_with_none_weights_single_batch_item(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_with_none_weights_multi_batch_item(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_with_none_permissions_single_batch_item(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = greedy_op.greedy_constrained_sequence( - single_input, [2], - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_with_none_permissions_multi_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, [2, 2, 2], - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_with_implicit_sequence_lengths(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence = self._decode_greedy_sequence( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = greedy_op.greedy_constrained_sequence( - multiple_input, - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_ragged_inputs(self): - use_log_space = True - use_start_and_end_states = False - input_1 = np.array([[10.0, 13.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0], - [13.0, 12.0, 11.0, 10.0]]) - input_2 = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # TODO(momernick): Extend RT support to lists-of-ndarrays. - scores = ragged_factory_ops.constant([input_1.tolist(), input_2.tolist()]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [3.0, -3.0, 4.0, -4.0], - [5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], - dtype=np.float32) - - allowed_transitions = np.array([[ True, True, True, True], - [ True, True, True, True], - [ True, False, True, False], - [ True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence_1 = self._decode_greedy_sequence( - input_1, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - sequence_2 = self._decode_greedy_sequence( - input_2, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - expected_sequence = ragged_factory_ops.constant([sequence_1, sequence_2]) - - # Test a ragged batch. - ragged_op = greedy_op.greedy_constrained_sequence( - scores, - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - ragged_result = self.evaluate(ragged_op) - self.assertAllEqual(ragged_result, expected_sequence) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter.py deleted file mode 100644 index 0fa6cb4..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter.py +++ /dev/null
@@ -1,183 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Splitter that uses a Hub module.""" - -import tensorflow_hub as hub -from tensorflow.python.eager import monitoring -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow_text.python.ops.splitter import SplitterWithOffsets - -_tf_text_hub_module_splitter_create_counter = monitoring.Counter( - '/nlx/api/python/hub_module_splitter_create_counter', - 'Counter for number of HubModuleSplitters created in Python.') - - -class HubModuleSplitter(SplitterWithOffsets): - r"""Splitter that uses a Hub module. - - The TensorFlow graph from the module performs the real work. The Python code - from this class handles the details of interfacing with that module, as well - as the support for ragged tensors and high-rank tensors. - - The Hub module should be supported by `hub.load() - <https://www.tensorflow.org/hub/api_docs/python/hub/load>`_ If a v1 module, it - should have a graph variant with an empty set of tags; we consider that graph - variant to be the module and ignore everything else. The module should have a - signature named `default` that takes a `text` input (a rank-1 tensor of - strings to split into pieces) and returns a dictionary of tensors, let's say - `output_dict`, such that: - - * `output_dict['num_pieces']` is a rank-1 tensor of integers, where - num_pieces[i] is the number of pieces that text[i] was split into. - - * `output_dict['pieces']` is a rank-1 tensor of strings containing all pieces - for text[0] (in order), followed by all pieces for text[1] (in order) and so - on. - - * `output_dict['starts']` is a rank-1 tensor of integers with the byte offsets - where the pieces start (relative to the beginning of the corresponding input - string). - - * `output_dict['end']` is a rank-1 tensor of integers with the byte offsets - right after the end of the tokens (relative to the beginning of the - corresponding input string). - - The output dictionary may contain other tensors (e.g., for debugging) but this - class is not using them. - - Example: - - >>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" - >>> segmenter = HubModuleSplitter(HUB_MODULE) - >>> segmenter.split(["新华社北京"]) - <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> - - You can also use this tokenizer to return the split strings and their offsets: - - >>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" - >>> segmenter = HubModuleSplitter(HUB_MODULE) - >>> pieces, starts, ends = segmenter.split_with_offsets(["新华社北京"]) - >>> print("pieces: %s starts: %s ends: %s" % (pieces, starts, ends)) - pieces: <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> - starts: <tf.RaggedTensor [[0, 9]]> - ends: <tf.RaggedTensor [[9, 15]]> - - - Currently, this class also supports an older API, which uses slightly - different key names for the output dictionary. For new Hub modules, please - use the API described above. - """ - - def __init__(self, hub_module_handle): - """Initializes a new HubModuleSplitter instance. - - Args: - hub_module_handle: A string handle accepted by hub.load(). Supported - cases include (1) a local path to a directory containing a module, and - (2) a handle to a module uploaded to e.g., https://tfhub.dev. The - module should implement the signature described in the docstring for - this class. - """ - super(HubModuleSplitter, self).__init__() - empty_tags = set() - self._hub_module = hub.load(hub_module_handle, tags=empty_tags) - self._hub_module_signature = self._hub_module.signatures['default'] - _tf_text_hub_module_splitter_create_counter.get_cell().increase_by(1) - - def _predict_pieces(self, input_strs): - output_dict = self._hub_module_signature(text=input_strs) - if 'tokens' in output_dict: - # Use the legacy hub module API. That API was originally intended only - # for tokenization, hence the 'token'-heavy string literals: - pieces = output_dict['tokens'] - num_pieces = output_dict['num_tokens'] - starts = output_dict['starts'] - ends = output_dict['ends'] - else: - pieces = output_dict['pieces'] - num_pieces = output_dict['num_pieces'] - starts = output_dict['starts'] - ends = output_dict['ends'] - - pieces = ragged_tensor.RaggedTensor.from_row_lengths( - pieces, row_lengths=num_pieces) - starts = ragged_tensor.RaggedTensor.from_row_lengths( - starts, row_lengths=num_pieces) - ends = ragged_tensor.RaggedTensor.from_row_lengths( - ends, row_lengths=num_pieces) - return pieces, starts, ends - - def split_with_offsets(self, input_strs): - """Splits a tensor of UTF-8 strings into pieces with [start,end) offsets. - - Args: - input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A tuple `(pieces, start_offsets, end_offsets)` where: - * `pieces` is a `RaggedTensor` of strings where `pieces[i1...iN, j]` is - the string content of the `j-th` piece in `input_strs[i1...iN]` - * `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i1...iN, j]` is the byte offset for the start of the - `j-th` piece in `input_strs[i1...iN]`. - * `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i1...iN, j]` is the byte offset immediately after the - end of the `j-th` piece in `input_strs[i...iN]`. - """ - input_strs = ragged_tensor.convert_to_tensor_or_ragged_tensor(input_strs) - rank = input_strs.shape.ndims - if rank is None: - raise ValueError('input must have a known rank.') - - # Currently, the hub_module accepts only rank 1 input tensors, and outputs - # rank 2 pieces/starts/ends. To handle input of different ranks (0, 2, 3, - # etc), we first convert the input into a rank 1 tensor, then run the - # module, and finally convert the output back to the expected shape. - if rank == 0: - # Build a rank 1 input batch with one string. - input_batch = array_ops.stack([input_strs]) - # [1, (number pieces)] - pieces, starts, ends = self._predict_pieces(input_batch) - return pieces.flat_values, starts.flat_values, ends.flat_values - elif rank == 1: - return self._predict_pieces(input_strs) - else: - if not ragged_tensor.is_ragged(input_strs): - input_strs = ragged_tensor.RaggedTensor.from_tensor( - input_strs, ragged_rank=rank - 1) - - # [number strings, (number pieces)] - pieces, starts, ends = self._predict_pieces(input_strs.flat_values) - pieces = input_strs.with_flat_values(pieces) - starts = input_strs.with_flat_values(starts) - ends = input_strs.with_flat_values(ends) - return pieces, starts, ends - - def split(self, input_strs): - """Splits a tensor of UTF-8 strings into pieces. - - Args: - input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A `RaggedTensor` of segmented text. The returned shape is the shape of the - input tensor with an added ragged dimension for the pieces of each string. - """ - pieces, _, _ = self.split_with_offsets(input_strs) - return pieces
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter_test.py deleted file mode 100644 index c1611fc6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_splitter_test.py +++ /dev/null
@@ -1,122 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -"""Tests for HubModuleSplitter.""" - -from absl.testing import parameterized - -from tensorflow.python.framework import test_util -from tensorflow.python.lib.io import file_io -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import variables as variables_lib -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow.python.saved_model import save -from tensorflow_text.python.ops import hub_module_splitter - - -def _Utf8(char): - return char.encode("utf-8") - - -@test_util.run_all_in_graph_and_eager_modes -class HubModuleSplitterTest(parameterized.TestCase, test.TestCase): - - @parameterized.parameters([ - # Test scalar input. - dict( - text_input=_Utf8(u"新华社北京"), - expected_pieces=[_Utf8(u"新华社"), _Utf8(u"北京")], - expected_starts=[0, 9], - expected_ends=[9, 15] - ), - # Test rank 1 input. - dict( - text_input=[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - expected_pieces=[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - expected_starts=[[0, 9], [0, 6]], - expected_ends=[[9, 15], [6, 12]] - ), - # Test rank 2 ragged input. - dict( - text_input=ragged_factory_ops.constant_value( - [[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - [_Utf8(u"新华社上海")]]), - expected_pieces=[[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - [[_Utf8(u"新华社"), _Utf8(u"上海")]]], - expected_starts=[[[0, 9], [0, 6]], [[0, 9]]], - expected_ends=[[[9, 15], [6, 12]], [[9, 15]]] - ), - # Test rank 2 dense input. - dict( - text_input=ragged_factory_ops.constant_value( - [[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - [_Utf8(u"新华社上海"), _Utf8(u"英国交通")]]), - expected_pieces=[[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - [[_Utf8(u"新华社"), _Utf8(u"上海")], - [_Utf8(u"英国"), _Utf8(u"交通")]]], - expected_starts=[[[0, 9], [0, 6]], [[0, 9], [0, 6]]], - expected_ends=[[[9, 15], [6, 12]], [[9, 15], [6, 12]]] - ), - # Test ragged input with rank higher than 2. - dict( - text_input=ragged_factory_ops.constant_value( - [ - [[_Utf8(u"新华社北京")], [_Utf8(u"中文测试")]], - [[_Utf8(u"新华社上海")]] - ]), - expected_pieces=[ - [[[_Utf8(u"新华社"), _Utf8(u"北京")]], - [[_Utf8(u"中文"), _Utf8(u"测试")]]], - [[[_Utf8(u"新华社"), _Utf8(u"上海")]]]], - expected_starts=[ - [[[0, 9]], [[0, 6]]], - [[[0, 9]]]], - expected_ends=[ - [[[9, 15]], [[6, 12]]], - [[[9, 15]]]] - ) - ]) - def testSplit(self, - text_input, - expected_pieces, - expected_starts, - expected_ends): - hub_module_handle = ("tensorflow_text/python/ops/test_data/" - "segmenter_hub_module") - splitter = hub_module_splitter.HubModuleSplitter(hub_module_handle) - pieces, starts, ends = splitter.split_with_offsets(text_input) - pieces_no_offset = splitter.split(text_input) - self.evaluate(lookup_ops.tables_initializer()) - self.evaluate(variables_lib.global_variables_initializer()) - self.assertAllEqual(expected_pieces, pieces) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - self.assertAllEqual(expected_pieces, pieces_no_offset) - - def exportSavedModel(self): - hub_module_handle = ("tensorflow_text/python/ops/test_data/" - "segmenter_hub_module") - splitter = hub_module_splitter.HubModuleSplitter(hub_module_handle) - save.save(splitter, "ram://saved_model") - self.assertEqual(file_io.file_exists_v2("ram://saved_model"), True) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer.py deleted file mode 100644 index 17dc9830..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer.py +++ /dev/null
@@ -1,95 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tokenizer that uses a Hub module.""" - -from tensorflow.python.eager import monitoring -from tensorflow_text.python.ops import hub_module_splitter -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -_tf_text_hub_module_tokenizer_create_counter = monitoring.Counter( - '/nlx/api/python/hub_module_tokenizer_create_counter', - 'Counter for number of HubModuleTokenizers created in Python.') - - -class HubModuleTokenizer(TokenizerWithOffsets): - r"""Tokenizer that uses a Hub module. - - This class is just a wrapper around an internal HubModuleSplitter. It offers - the same functionality, but with 'token'-based method names: e.g., one can use - tokenize() instead of the more general and less informatively named split(). - - Example: - - >>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" - >>> segmenter = HubModuleTokenizer(HUB_MODULE) - >>> segmenter.tokenize(["新华社北京"]) - <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> - - You can also use this tokenizer to return the split strings and their offsets: - - >>> HUB_MODULE = "https://tfhub.dev/google/zh_segmentation/1" - >>> segmenter = HubModuleTokenizer(HUB_MODULE) - >>> pieces, starts, ends = segmenter.tokenize_with_offsets(["新华社北京"]) - >>> print("pieces: %s starts: %s ends: %s" % (pieces, starts, ends)) - pieces: <tf.RaggedTensor [[b'\xe6\x96\xb0\xe5\x8d\x8e\xe7\xa4\xbe', - b'\xe5\x8c\x97\xe4\xba\xac']]> - starts: <tf.RaggedTensor [[0, 9]]> - ends: <tf.RaggedTensor [[9, 15]]> - - """ - - def __init__(self, hub_module_handle): - """Initializes a new HubModuleTokenizer instance. - - Args: - hub_module_handle: A string handle accepted by hub.load(). Supported - cases include (1) a local path to a directory containing a module, and - (2) a handle to a module uploaded to e.g., https://tfhub.dev - """ - super(HubModuleTokenizer, self).__init__() - self._splitter = hub_module_splitter.HubModuleSplitter(hub_module_handle) - - def tokenize_with_offsets(self, input_strs): - """Tokenizes a tensor of UTF-8 strings into words with [start,end) offsets. - - Args: - input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - * `tokens` is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is - the string content of the `j-th` token in `input_strs[i1...iN]` - * `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i1...iN, j]` is the byte offset for the start of the - `j-th` token in `input_strs[i1...iN]`. - * `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i1...iN, j]` is the byte offset immediately after the - end of the `j-th` token in `input_strs[i...iN]`. - """ - return self._splitter.split_with_offsets(input_strs) - - def tokenize(self, input_strs): - """Tokenizes a tensor of UTF-8 strings into words. - - Args: - input_strs: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A `RaggedTensor` of segmented text. The returned shape is the shape of the - input tensor with an added ragged dimension for tokens of each string. - """ - return self._splitter.split(input_strs)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer_test.py deleted file mode 100644 index b43a0ef3..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/hub_module_tokenizer_test.py +++ /dev/null
@@ -1,113 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -"""Tests for HubModuleTokenizer.""" - -from absl.testing import parameterized - -from tensorflow.python.framework import test_util -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import variables as variables_lib -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import hub_module_tokenizer # pylint: disable=line-too-long - - -def _Utf8(char): - return char.encode("utf-8") - - -@test_util.run_all_in_graph_and_eager_modes -class HubModuleTokenizerTest(parameterized.TestCase, test.TestCase): - - @parameterized.parameters([ - # Test scalar input. - dict( - text_input=_Utf8(u"新华社北京"), - expected_tokens=[_Utf8(u"新华社"), _Utf8(u"北京")], - expected_starts=[0, 9], - expected_ends=[9, 15] - ), - # Test rank 1 input. - dict( - text_input=[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - expected_tokens=[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - expected_starts=[[0, 9], [0, 6]], - expected_ends=[[9, 15], [6, 12]] - ), - # Test rank 2 ragged input. - dict( - text_input=ragged_factory_ops.constant_value( - [[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - [_Utf8(u"新华社上海")]]), - expected_tokens=[[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - [[_Utf8(u"新华社"), _Utf8(u"上海")]]], - expected_starts=[[[0, 9], [0, 6]], [[0, 9]]], - expected_ends=[[[9, 15], [6, 12]], [[9, 15]]] - ), - # Test rank 2 dense input. - dict( - text_input=ragged_factory_ops.constant_value( - [[_Utf8(u"新华社北京"), _Utf8(u"中文测试")], - [_Utf8(u"新华社上海"), _Utf8(u"英国交通")]]), - expected_tokens=[[[_Utf8(u"新华社"), _Utf8(u"北京")], - [_Utf8(u"中文"), _Utf8(u"测试")]], - [[_Utf8(u"新华社"), _Utf8(u"上海")], - [_Utf8(u"英国"), _Utf8(u"交通")]]], - expected_starts=[[[0, 9], [0, 6]], [[0, 9], [0, 6]]], - expected_ends=[[[9, 15], [6, 12]], [[9, 15], [6, 12]]] - ), - # Test ragged input with rank higher than 2. - dict( - text_input=ragged_factory_ops.constant_value( - [ - [[_Utf8(u"新华社北京")], [_Utf8(u"中文测试")]], - [[_Utf8(u"新华社上海")]] - ]), - expected_tokens=[ - [[[_Utf8(u"新华社"), _Utf8(u"北京")]], - [[_Utf8(u"中文"), _Utf8(u"测试")]]], - [[[_Utf8(u"新华社"), _Utf8(u"上海")]]]], - expected_starts=[ - [[[0, 9]], [[0, 6]]], - [[[0, 9]]]], - expected_ends=[ - [[[9, 15]], [[6, 12]]], - [[[9, 15]]]] - ) - ]) - def testTokenize(self, - text_input, - expected_tokens, - expected_starts, - expected_ends): - hub_module_handle = ("tensorflow_text/python/ops/test_data/" - "segmenter_hub_module") - segmenter = hub_module_tokenizer.HubModuleTokenizer(hub_module_handle) - tokens, starts, ends = segmenter.tokenize_with_offsets(text_input) - tokens_no_offset = segmenter.tokenize(text_input) - self.evaluate(lookup_ops.tables_initializer()) - self.evaluate(variables_lib.global_variables_initializer()) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - self.assertAllEqual(expected_tokens, tokens_no_offset) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops.py deleted file mode 100644 index 521b09b1..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops.py +++ /dev/null
@@ -1,392 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops for selecting items in RaggedTensors.""" -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.ops import map_fn -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.ragged import ragged_array_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.ops.ragged import ragged_math_ops -from tensorflow.python.ops.ragged import ragged_tensor - - -class ItemSelector(object): - """A class encapsulating the logic for selecting items. - - `ItemSelector` implementations contain algorithms for selecting items in a - `RaggedTensor`. Users of `ItemSelector` implementations can call - `get_selection_mask()` to retrieve a bool `RaggedTensor` mask indicating the - items that have been selected. For example: - - >>> inputs = tf.ragged.constant([ - ... [1, 2, 3, 4], - ... [100, 200] - ... ]) - >>> tf.random.set_seed(1234) - >>> selector = RandomItemSelector(3, .5) - >>> selected = selector.get_selection_mask(inputs, axis=1) - >>> print(selected) - <tf.RaggedTensor [[False, True, True, False], - [False, True]]> - - - An important use case for these classes is in creating inputs for masked - language model training dataset preparation. See `masked_language_model` for - an example of using the selectors in such a context. - - Subclass writers will typically implement a selection algorithm by overriding - `get_selection_mask()`. - - A helper function `get_selectable()` is provided to help subclass writers - filter out excluded items from selection (e.g. CLS and SEP for bert style - models). This will frequently serve as a prefilter for subclass item - selection (see e.g. the implementation of `RandomItemSelector`. The base class - behavior is to simply return the mask obtained by filtering out items listed - in `unselectable_ids`. - """ - - def __init__(self, unselectable_ids=None): - """Creates an instance of a `ItemSelector`. - - Args: - unselectable_ids: a list, or `Tensor` of ids that are not selectable. - """ - if unselectable_ids is None: - unselectable_ids = [] - if isinstance(unselectable_ids, list): - self._unselectable_ids = unselectable_ids - elif isinstance(unselectable_ids, ops.Tensor): - if unselectable_ids.shape.rank not in (1, None): - raise ValueError(f"`unselectable_ids` must have a rank of 1 or None, " - f"but was: {unselectable_ids.shape.rank}") - self._unselectable_ids = array_ops.unstack(unselectable_ids) - else: - raise ValueError("`unselectable_ids` must be either a list or " + - "`1 dimensional Tensor`, instead it is a " + - str(unselectable_ids)) - - @property - def unselectable_ids(self): - return self._unselectable_ids - - def get_selectable(self, input_ids, axis): - """Return a boolean mask of items that can be chosen for selection. - - The default implementation marks all items whose IDs are not in the - `unselectable_ids` list. This can be overridden if there is a need for - a more complex or algorithmic approach for selectability. - - Args: - input_ids: a `RaggedTensor`. - axis: axis to apply selection on. - - Returns: - a `RaggedTensor` with dtype of bool and with shape - `input_ids.shape[:axis]`. Its values are True if the - corresponding item (or broadcasted subitems) should be considered for - masking. In the default implementation, all `input_ids` items that are not - listed in `unselectable_ids` (from the class arg) are considered - selectable. - """ - # merge to the desired axis - input_ids = input_ids.merge_dims(1, axis) if axis > 1 else input_ids - - all_selectable_flats = [ - ragged_functional_ops.map_flat_values(math_ops.not_equal, input_ids, - i).flat_values - for i in self._unselectable_ids - ] - - # if there are no blacklisted ids, mark everything as selectable - if all_selectable_flats: - reduce_flat = math_ops.reduce_all(all_selectable_flats, axis=0) - else: - reduce_flat = array_ops.ones_like( - input_ids.flat_values, dtype=dtypes.bool) - - # reduce to the requested axis and broadcast to match original shape - axis = array_ops.get_positive_axis( - axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank) - results = input_ids.with_flat_values(reduce_flat) - if axis < input_ids.ragged_rank: - reduce_axis = list(range(input_ids.ragged_rank, axis, -1)) - results = math_ops.reduce_all(results, reduce_axis) - - return results - - def get_selection_mask(self, input_ids, axis=1): - """Returns a mask of items that have been selected. - - The default implementation simply returns all items not excluded by - `get_selectable`. - - Args: - input_ids: A `RaggedTensor`. - axis: (optional) An int detailing the dimension to apply selection on. - Default is the 1st dimension. - - Returns: - a `RaggedTensor` with shape `input_ids.shape[:axis]`. Its values are True - if the corresponding item (or broadcasted subitems) should be selected. - """ - return self.get_selectable(input_ids, axis) - - -class RandomItemSelector(ItemSelector): - """An `ItemSelector` implementation that randomly selects items in a batch. - - `RandomItemSelector` randomly selects items in a batch subject to - restrictions given (max_selections_per_batch, selection_rate and - unselectable_ids). - - Example: - >>> vocab = ["[UNK]", "[MASK]", "[RANDOM]", "[CLS]", "[SEP]", - ... "abc", "def", "ghi"] - >>> # Note that commonly in masked language model work, there are - >>> # special tokens we don't want to mask, like CLS, SEP, and probably - >>> # any OOV (out-of-vocab) tokens here called UNK. - >>> # Note that if e.g. there are bucketed OOV tokens in the code, - >>> # that might be a use case for overriding `get_selectable()` to - >>> # exclude a range of IDs rather than enumerating them. - >>> tf.random.set_seed(1234) - >>> selector = tf_text.RandomItemSelector( - ... max_selections_per_batch=2, - ... selection_rate=0.2, - ... unselectable_ids=[0, 3, 4]) # indices of UNK, CLS, SEP - >>> selection = selector.get_selection_mask( - ... tf.ragged.constant([[3, 5, 7, 7], [4, 6, 7, 5]]), axis=1) - >>> print(selection) - <tf.RaggedTensor [[False, False, False, True], [False, False, True, False]]> - - The selection has skipped the first elements (the CLS and SEP token codings) - and picked random elements from the other elements of the segments -- if - run with a different random seed the selections might be different. - """ - - def __init__(self, - max_selections_per_batch, - selection_rate, - unselectable_ids=None, - shuffle_fn=None): - """Creates instance of `RandomItemSelector`. - - By default the source of randomness will be the one set by - tf.random.set_seed. Users can adjust this independently by providing - a separate `shuffle_fn` to the selector. - - Args: - max_selections_per_batch: An int of the max number of items to mask out. - selection_rate: The rate at which items are randomly selected. - unselectable_ids: (optional) A list of python ints or 1D `Tensor` of ints - which are ids that will be not be masked. - shuffle_fn: (optional) A function that shuffles a 1D `Tensor`. Default - uses `tf.random.shuffle`. - """ - if selection_rate is None: - raise ValueError("`selection_rate` cannot be None") - if shuffle_fn is None: - self._shuffle_fn = random_ops.random_shuffle - else: - self._shuffle_fn = shuffle_fn - - self._max_selections_per_batch = max_selections_per_batch - self._selection_rate = selection_rate - super(RandomItemSelector, self).__init__(unselectable_ids) - - @property - def shuffle_fn(self): - return self._shuffle_fn - - @property - def max_selections_per_batch(self): - return self._max_selections_per_batch - - @property - def selection_rate(self): - return self._selection_rate - - def get_selection_mask(self, input_ids, axis): - selectable = self.get_selectable(input_ids, axis) - - # Run the selection algorithm on positions RT - positions_flat = math_ops.range(array_ops.size(input_ids.flat_values)) - positions = input_ids.with_flat_values(positions_flat) - # Mask out positions that are not selectable - positions = ragged_array_ops.boolean_mask(positions, selectable) - - # merge to the desired axis - positions = positions.merge_dims(1, axis) if axis > 1 else positions - - # Figure out how many we are going to select - num_to_select = math_ops.ceil( - math_ops.cast(positions.row_lengths(), dtypes.float32) * - self.selection_rate) - num_to_select = math_ops.minimum(num_to_select, - self.max_selections_per_batch) - num_to_select = math_ops.cast(num_to_select, dtypes.int64) - - # Shuffle and trim to items that are going to be selected - def _shuffle_and_trim(x): - positions, top_n = x - if isinstance(positions, ragged_tensor.RaggedTensor): - positions_at_axis = math_ops.range(positions.nrows()) - chosen_positions_at_axis = self._shuffle_fn(positions_at_axis)[:top_n] - return array_ops.gather(positions, chosen_positions_at_axis) - else: - shuffled = self._shuffle_fn(positions) - return shuffled[:top_n] - - selected_for_mask = map_fn.map_fn( - _shuffle_and_trim, (positions, num_to_select), - fn_output_signature=ragged_tensor.RaggedTensorSpec( - ragged_rank=positions.ragged_rank - 1, dtype=positions.dtype)) - selected_for_mask.flat_values.set_shape([None]) - - # Construct the result which is a boolean RT - # Scatter 1's to positions that have been selected_for_mask - update_values = array_ops.ones_like(selected_for_mask.flat_values) - update_values = math_ops.cast(update_values, input_ids.dtype) - update_indices = selected_for_mask.flat_values - update_indices = array_ops.expand_dims(update_indices, -1) - update_indices = math_ops.cast(update_indices, input_ids.dtype) - - results_flat = array_ops.zeros_like(input_ids.flat_values) - results_flat = gen_array_ops.tensor_scatter_update( - results_flat, update_indices, update_values) - results = math_ops.cast( - input_ids.with_flat_values(results_flat), dtypes.bool) - - if axis < results.ragged_rank: - reduce_axis = list(range(results.ragged_rank, axis, -1)) - results = math_ops.reduce_all(results, reduce_axis) - return results - - -def _get_row_lengths_merged_to_axis(segments, axis=-1): - """Get the row lengths relative to a desired axis.""" - axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1 - row_lengths = ragged_tensor.RaggedTensor.from_nested_row_lengths( - segments.nested_row_lengths()[axis], - segments.nested_row_lengths()[:axis]) - for _ in range(axis): - row_lengths = math_ops.reduce_sum(row_lengths, -1) - return row_lengths - - -def _get_selection_mask(original, num_to_select, axis=-1): - """Get a selection mask given how many items to select.""" - num_to_select = ops.convert_to_tensor(num_to_select) - num_to_select = array_ops.reshape(num_to_select, [-1]) - row_lengths = _get_row_lengths_merged_to_axis(original, axis) - num_to_select = array_ops.broadcast_to(num_to_select, - array_ops.shape(row_lengths)) - num_to_select = math_ops.cast(num_to_select, row_lengths.dtype) - num_to_select = math_ops.minimum(num_to_select, row_lengths) - ones = array_ops.ones_like(ragged_math_ops.range(num_to_select)) - ones = math_ops.cast(ones, dtypes.int32) - zeros_row_length = row_lengths - num_to_select - zeros = math_ops.cast( - array_ops.zeros_like(ragged_math_ops.range(zeros_row_length)), - dtypes.int32) - results = array_ops.concat([ones, zeros], 1) - results = math_ops.cast(results, dtypes.bool) - return results - - -class FirstNItemSelector(ItemSelector): - """An `ItemSelector` that selects the first `n` items in the batch.""" - - def __init__(self, num_to_select, unselectable_ids=None): - """Creates an instance of `FirstNItemSelector`. - - Example: - >>> selector = FirstNItemSelector(2) - >>> selection = selector.get_selection_mask( - ... tf.ragged.constant([[1, 2, 3, 4], [5, 6, 7, 8]]), axis=1) - >>> print(selection) - <tf.RaggedTensor [[True, True, False, False], [True, True, False, False]]> - - This kind of selection mechanism is useful for batch trimming operations, - e.g. for `RoundRobinTrimmer`. - - Args: - num_to_select: An int which is the leading number of items to select. - unselectable_ids: (optional) A list of int ids that cannot be selected. - Default is empty list. - """ - super(FirstNItemSelector, self).__init__(unselectable_ids) - self._num_to_select = num_to_select - - def get_selectable(self, input_ids, axis): - """See `get_selectable()` in superclass.""" - selectable = super(FirstNItemSelector, self).get_selectable(input_ids, axis) - axis = array_ops.get_positive_axis( - axis, input_ids.ragged_rank + input_ids.flat_values.shape.rank) - # Create a positions RT and mask out positions that are not selectable - positions_flat = math_ops.range(array_ops.size(input_ids.flat_values)) - positions = input_ids.with_flat_values(positions_flat) - selectable_positions = ragged_array_ops.boolean_mask(positions, selectable) - - # merge to the desired axis - selectable_positions = selectable_positions.merge_dims( - 1, axis) if axis > 1 else selectable_positions - - # Get a selection mask based off of how many items are desired for selection - merged_axis = axis - (axis - 1) - selection_mask = _get_selection_mask(selectable_positions, - self._num_to_select, merged_axis) - # Mask out positions that were not selected. - selected_positions = ragged_array_ops.boolean_mask(selectable_positions, - selection_mask) - - # Now that we have all the positions which were chosen, we recreate a mask - # (matching the original input's shape) where the value is True if it was - # selected. We do this by creating a "all false" RT and scattering true - # values to the positions chosen for selection. - all_true = selected_positions.with_flat_values( - array_ops.ones_like(selected_positions.flat_values)) - all_false = math_ops.cast( - array_ops.zeros(array_ops.shape(input_ids.flat_values)), dtypes.int32) - results_flat = array_ops.tensor_scatter_update( - all_false, array_ops.expand_dims(selected_positions.flat_values, -1), - all_true.flat_values) - results = input_ids.with_flat_values(results_flat) - results = math_ops.cast(results, dtypes.bool) - - # Reduce until input.shape[:axis] - for _ in range(input_ids.shape.ndims - axis - 1): - results = math_ops.reduce_all(results, -1) - return results - - -class NothingSelector(ItemSelector): - """An `ItemSelector` that selects nothing.""" - - def __init__(self): - super(NothingSelector, self).__init__([]) - - def get_selectable(self, tokens, axis): - """Returns a prefilter mask which excludes all items.""" - flat_false_values = math_ops.cast( - array_ops.zeros_like(tokens.flat_values), dtypes.bool) - results = tokens.with_flat_values(flat_false_values) - for _ in range(tokens.ragged_rank - axis): - results = math_ops.reduce_all(results, -1) - return results
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops_test.py deleted file mode 100644 index 8ea0e18..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/item_selector_ops_test.py +++ /dev/null
@@ -1,276 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ItemSelectors.""" -import functools - -from absl.testing import parameterized - -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_array_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import item_selector_ops - - -@test_util.run_all_in_graph_and_eager_modes -class FirstNItemSelectorTest(test.TestCase, parameterized.TestCase): - # pyformat: disable - @parameterized.parameters([ - dict( - description="Basic test on 2D `RaggedTensor`", - masking_inputs=[ - [1, 2, 3, 4, 5, 6], - [10, 20, 30, 40], - [100, 200, 300, 400, 500] - ], - expected_selectable=[ - [1, 2], - [10, 20], - [100, 200] - ], - ), - dict( - description="Test broadcast", - masking_inputs=[ - [[1, 2], [3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - expected_selectable=[ - [[1, 2], [3]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - ), - dict( - description="Select the first two items. Test broadcast and " + - "dropping nonselectable ids.", - masking_inputs=[ - [[1, 2], [3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - unselectable_ids=[1, 200], - expected_selectable=[ - [[3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[300, 400, 500]]], - axis=1, - ), - dict( - description="Select the first two items on axis=-1.", - masking_inputs=[ - [[b"hello"], [b"there"]], - [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]], - ], - expected_selectable=[ - [[b"hello"], [b"there"]], - [[b"name", b"is"]], - [[b"what", b"time"], [], [], []]], - axis=-1, - ), - dict( - description="Select the first two items on axis=1.", - masking_inputs=[ - [[b"hello"], [b"there"]], - [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]], - ], - expected_selectable=[ - [[b"hello"], [b"there"]], - [[b"name", b"is"]], - [[b"what", b"time"], [b"is"]] - ], - axis=1, - ), - dict( - description="num_to_select is a 2D Tensor", - masking_inputs=[ - [1, 2, 3], - [4, 5], - [6] - ], - expected_selectable=[ - [1, 2], - [4], - [6], - ], - num_to_select=[[2], [1], [1]], - axis=-1, - ), - ]) - # pyformat: enable - - def testGetSelectable(self, - masking_inputs, - expected_selectable, - num_to_select=2, - unselectable_ids=None, - axis=1, - description=""): - masking_inputs = ragged_factory_ops.constant(masking_inputs) - item_selector = item_selector_ops.FirstNItemSelector( - num_to_select=num_to_select, unselectable_ids=unselectable_ids) - selectable = item_selector.get_selectable(masking_inputs, axis) - actual_selection = ragged_array_ops.boolean_mask(masking_inputs, selectable) - self.assertAllEqual(actual_selection, expected_selectable) - - -@test_util.run_all_in_graph_and_eager_modes -class RandomItemSelectorTest(test.TestCase, parameterized.TestCase): - - # pyformat: disable - @parameterized.parameters([ - dict( - description="Basic test on 2D `RaggedTensor`", - masking_inputs=[ - [1, 2, 3, 4, 5, 6], - [10, 20, 30, 40], - [100, 200, 300, 400, 500] - ], - expected_selected_items=[ - [1, 2], - [10, 20], - [100, 200], - ], - ), - dict( - description="Test broadcast", - masking_inputs=[ - [[1, 2], [3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - expected_selected_items=[ - [[1, 2], [3]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - ), - dict( - description="Select the first two items that don't have " + - "unselectable ids; test that broadcasting works appropriately", - masking_inputs=[ - [[1, 2], [3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]] - ], - unselectable_ids=[1, 200], - expected_selected_items=[ - [[3], [4, 5, 6]], - [[10, 20], [30, 40]], - [[300, 400, 500]] - ], - axis=1, - ), - dict( - description="Test shape[:axis+1]", - masking_inputs=[ - [[0, 1], [2, 3], [4, 5]], - [], - [[6, 7]] - ], - expected_selected_items=[ - [[0, 1], [2, 3]], - [], - [[6, 7]], - ], - axis=1, - ), - dict( - description="Test rank 3 ragged tensor selecting on axis=1", - masking_inputs=[ - [[101], [100], [2045], [1012], [102], [100], [2051], - [2003], [2009], [1029], [102]], - [[101], [100], [2292], [1996], [6077], [2041], [1029], - [102], [100], [1029], [102]]], - expected_selected_items=[ - [[101], [100]], - [[101], [100]], - ], - axis=1, - ), - dict( - description="Test rank 3 ragged tensor selecting on axis=1, but " + - "w/ reverse shuffle_fn", - masking_inputs=[ - [[101], [100], [2045], [1012], [102], [100], [2051], - [2003], [2009], [1029], [102]], - [[101], [100], [2292], [1996], [6077], [2041], [1029], - [102], [100], [1029], [102]]], - expected_selected_items=[ - [[1029], [102]], - [[1029], [102]], - ], - axis=1, - shuffle_fn="reverse", - ), - ]) - # pyformat: enable - def testGetSelectionMask(self, - masking_inputs, - expected_selected_items, - unselectable_ids=None, - axis=1, - shuffle_fn="", - description=""): - shuffle_fn = ( - functools.partial(array_ops.reverse, axis=[-1]) - if shuffle_fn == "reverse" else array_ops.identity) - masking_inputs = ragged_factory_ops.constant(masking_inputs) - item_selector = item_selector_ops.RandomItemSelector( - max_selections_per_batch=2, - selection_rate=1, - shuffle_fn=shuffle_fn, - unselectable_ids=unselectable_ids, - ) - selection_mask = item_selector.get_selection_mask(masking_inputs, axis) - selected_items = ragged_array_ops.boolean_mask(masking_inputs, - selection_mask) - self.assertAllEqual(selected_items, expected_selected_items) - - -@test_util.run_all_in_graph_and_eager_modes -class NothingSelectorTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - dict( - description="Basic test", - masking_inputs=[[[1, 2], [3], [4, 5, 6]], [[10, 20], [30, 40]], - [[100, 200], [300, 400, 500]]], - unselectable_ids=[1, 200], - expected_selected_items=[[], [], []], - ), - ]) - def testNothingSelector(self, - masking_inputs, - unselectable_ids, - expected_selected_items, - num_to_select=2, - axis=1, - description=""): - masking_inputs = ragged_factory_ops.constant(masking_inputs) - item_selector = item_selector_ops.NothingSelector() - selection_mask = item_selector.get_selectable(masking_inputs, axis) - selected_items = ragged_array_ops.boolean_mask(masking_inputs, - selection_mask) - self.assertAllEqual(selected_items, expected_selected_items) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops.py deleted file mode 100644 index eca3d89..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops.py +++ /dev/null
@@ -1,352 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops for applying language model masking dynamically to inputs.""" - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.ragged import ragged_array_ops -from tensorflow.python.ops.ragged import ragged_math_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged import ragged_where_op -from tensorflow_text.python.ops import item_selector_ops - - -# TODO(b/166323018): Replace once tensor_scatter_nd_update for RaggedTensor is -# available. -def _ragged_tensor_scatter_nd_update(params, indices, updates): - """Version of tensor_scatter_nd_update() where the values are ragged.""" - # Create a RT in the shape of `params` and containing the "global" positions. - # Here "global" means the element position in the flat values Tensor. - global_positions_flat = math_ops.range(array_ops.size(params.flat_values)) - global_positions = params.with_flat_values(global_positions_flat) - - global_indices = array_ops.batch_gather(global_positions, indices) - update_indices = global_indices.flat_values - update_indices = array_ops.expand_dims(update_indices, -1) - update_indices = math_ops.cast(update_indices, params.dtype) - params_flat = params.flat_values - update_values = math_ops.cast(updates.flat_values, params_flat.dtype) - results_flat = array_ops.tensor_scatter_update( - params_flat, update_indices, update_values) - return params.with_flat_values(results_flat) - - -def _get_random(positions): - """Get a random tensor like `positions`.""" - flat_random = random_ops.random_uniform( - array_ops.shape(positions.flat_values), 0, 1, dtype=dtypes.float32) - return positions.with_flat_values(flat_random) - - -def _get_selected_item_positions(item_selector, input_ids, axis=1): - """Get the positions of the items that have been selected. - - Args: - item_selector: an instance of `ItemSelector`. - input_ids: a `RaggedTensor` with n dimensions, whose items will be - selected on. - axis: (optional) An int detailing the dimension to apply selection on. - Default is the 1st dimension. - - Returns: - A `RaggedTensor` of int64s, with rank 2, shape - [batch, (num_selections)] and whose values are the positions of items - that have been selected. - """ - original_input_ids = input_ids - - # select items for masking - selected_for_mask = item_selector.get_selection_mask(input_ids, axis) - - # create a positions RT - original_input_ids = ( - original_input_ids.merge_dims(1, -1) - if original_input_ids.ragged_rank > 1 else original_input_ids) - positions = ragged_math_ops.range(original_input_ids.row_lengths()) - positions = input_ids.with_flat_values(positions.flat_values) - - # drop out not-masked positions - results = ragged_array_ops.boolean_mask(positions, selected_for_mask) - results = results.merge_dims(1, -1) if results.ragged_rank > 1 else results - return results - - -def mask_language_model( - input_ids, - item_selector, - mask_values_chooser, - axis=1): - """Applies dynamic language model masking. - - `mask_language_model` implements the `Masked LM and Masking Procedure` - described in `BERT: Pre-training of Deep Bidirectional Transformers for - Language Understanding` (https://arxiv.org/pdf/1810.04805.pdf). - `mask_language_model` uses an `ItemSelector` to select the items for masking, - and a `MaskValuesChooser` to assign the values to the selected items. - The purpose of this is to bias the representation towards the actual - observed item. - - Masking is performed on items in an axis. A decision is taken independently at - random to mask with [MASK], mask with random tokens from the full vocab, or - not mask at all. Note that the masking decision is broadcasted to the - sub-dimensions. - - For example, in a RaggedTensor of shape `[batch, (wordpieces)]` and if axis=1, - each wordpiece independently gets masked (or not). - - With the following input: - - ``` - [[b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" ], - [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - ``` - - `mask_language_model` could end up masking individual wordpieces: - - ``` - [[b"[MASK]", b"##onge", b"bob", b"Sq", b"[MASK]", b"##pants" ], - [b"Bar", b"##ack", b"[MASK]", b"##ama"], - [b"[MASK]", b"##vel", b"A", b"##ven", b"##gers"]] - ``` - - Or with random token inserted: - - ``` - [[b"[MASK]", b"##onge", b"bob", b"Sq", b"[MASK]", b"##pants" ], - [b"Bar", b"##ack", b"Sq", b"##ama"], # random token inserted for 'Ob' - [b"Bar", b"##vel", b"A", b"##ven", b"##gers"]] # random token inserted for - # 'Mar' - ``` - - In a RaggedTensor of shape `[batch, (words), (wordpieces)]`, whole words get - masked (or not). If a word gets masked, all its tokens are independently - either replaced by `[MASK]`, by random tokens, or no substitution occurs. - Note that any arbitrary spans that can be constructed by a `RaggedTensor` can - be masked in the same way. - - For example, if we have an `RaggedTensor` with shape - `[batch, (token), (wordpieces)]`: - - ``` - [[[b"Sp", "##onge"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"Ob", b"##ama"]], - [[b"Mar", "##vel"], [b"A", b"##ven", b"##gers"]]] - ``` - - `mask_language_model` could mask whole spans (items grouped together - by the same 1st dimension): - - ``` - [[[b"[MASK]", "[MASK]"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"[MASK]", b"[MASK]"]], - [[b"[MASK]", "[MASK]"], [b"A", b"##ven", b"##gers"]]] - ``` - - or insert random items in spans: - - ``` - [[[b"Mar", "##ama"], [b"bob"], [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"##onge", b"##gers"]], - [[b"Ob", "Sp"], [b"A", b"##ven", b"##gers"]]] - ``` - - Args: - input_ids: A `RaggedTensor` of n dimensions (where n >= 2) on which - masking will be applied to items up to dimension 1. - item_selector: An instance of `ItemSelector` that is used for selecting - items to be masked. - mask_values_chooser: An instance of `MaskValuesChooser` which determines the - values assigned to the ids chosen for masking. - axis: the axis where items will be treated atomically for masking. - Returns: - A tuple of (masked_input_ids, masked_positions, masked_ids) where: - - masked_input_ids: A `RaggedTensor` in the same shape and dtype as - `input_ids`, but with items in `masked_positions` possibly replaced - with `mask_token`, random id, or no change. - masked_positions: A `RaggedTensor` of ints with shape - [batch, (num_masked)] containing the positions of items selected for - masking. - masked_ids: A `RaggedTensor` with shape [batch, (num_masked)] and same - type as `input_ids` containing the original values before masking - and thus used as labels for the task. - """ - if not isinstance(item_selector, item_selector_ops.ItemSelector): - raise ValueError("`item_selector` must be an instance of `ItemSelector`") - - if not isinstance(mask_values_chooser, MaskValuesChooser): - raise ValueError("`mask_values_chooser` must be an instance of " + - "`MaskValuesChooser`") - - input_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(input_ids) - - # Identify the items that are maskable and obtain their positions in the - # rank 2 space. - masked_token_positions = _get_selected_item_positions( - item_selector, input_ids, axis) - - # Flatten everything down to a 2D RaggedTensor - masked_token_positions = ( - masked_token_positions if masked_token_positions.ragged_rank == 1 else - masked_token_positions.merge_dims(1, -1)) - input_ids = ( - input_ids if input_ids.ragged_rank == 1 else input_ids.merge_dims(1, -1)) - - # Gather all the current ids in the places selected for masking. - masked_lm_ids = array_ops.batch_gather(input_ids, masked_token_positions) - - # Figure out what we are going to replace these values with -- either masked - # token, random int id, or do nothing. - mask_values = mask_values_chooser.get_mask_values(masked_lm_ids) - - # scatter the new mask values back to their respective positions - new_input_ids = _ragged_tensor_scatter_nd_update(input_ids, - masked_token_positions, - mask_values) - return new_input_ids, masked_token_positions, masked_lm_ids - - -class MaskValuesChooser(object): - """Assigns values to the items chosen for masking. - - `MaskValuesChooser` encapsulates the logic for deciding the value to assign - items that where chosen for masking. The following are the behavior in the - default implementation: - - For `mask_token_rate` of the time, replace the item with the `[MASK]` token: - - ``` - my dog is hairy -> my dog is [MASK] - ``` - - For `random_token_rate` of the time, replace the item with a random word: - - ``` - my dog is hairy -> my dog is apple - ``` - - For `1 - mask_token_rate - random_token_rate` of the time, keep the item - unchanged: - - ``` - my dog is hairy -> my dog is hairy. - ``` - - The default behavior is consistent with the methodology specified in - `Masked LM and Masking Procedure` described in `BERT: Pre-training of Deep - Bidirectional Transformers for Language Understanding` - (https://arxiv.org/pdf/1810.04805.pdf). - - Users may further customize this with behavior through subclassing and - overriding `get_mask_values()`. - """ - - def __init__(self, - vocab_size, - mask_token, - mask_token_rate=0.8, - random_token_rate=0.1): - """Creates an instance of `MaskValueChooser`. - - Args: - vocab_size: size of vocabulary. - mask_token: The id of the mask token. - mask_token_rate: (optional) A float between 0 and 1 which indicates how - often the `mask_token` is substituted for tokens selected for masking. - Default is 0.8, NOTE: `mask_token_rate` + `random_token_rate` <= 1. - random_token_rate: A float between 0 and 1 which indicates how often a - random token is substituted for tokens selected for masking. Default is - 0.1. NOTE: `mask_token_rate` + `random_token_rate` <= 1. - """ - if mask_token_rate is None: - raise ValueError("`mask_token_rate` cannot be None") - if random_token_rate is None: - raise ValueError("`random_token_rate` cannot be None") - self._mask_token_rate = mask_token_rate - self._random_token_rate = random_token_rate - self._mask_token = mask_token - self._vocab_size = vocab_size - - @property - def mask_token(self): - return self._mask_token - - @property - def random_token_rate(self): - return self._random_token_rate - - @property - def vocab_size(self): - return self._vocab_size - - def get_mask_values(self, masked_lm_ids): - """Get the values used for masking, random injection or no-op. - - Args: - masked_lm_ids: a `RaggedTensor` of n dimensions and dtype int32 or int64 - whose values are the ids of items that have been selected for masking. - Returns: - a `RaggedTensor` of the same dtype and shape with `masked_lm_ids` whose - values contain either the mask token, randomly injected token or original - value. - """ - validate_rates = control_flow_ops.Assert( - self._mask_token_rate + self._random_token_rate <= 1, - ["mask_token_rate + random_token_rate must be <= 1"]) - with ops.control_dependencies([validate_rates]): - - # Generate a random number for all mask-able items. Items that should be - # treated atomically (e.g. all wordpieces in a token, span, etc) will have - # the same random number. - random_uniform = _get_random(masked_lm_ids) - - # Merge down to rank 2. - random_uniform = ( - random_uniform if random_uniform.ragged_rank == 1 else - random_uniform.merge_dims(1, -1)) - mask_values = masked_lm_ids - - all_mask_flat = array_ops.tile([self._mask_token], - array_ops.shape(mask_values.flat_values)) - - # Maybe add mask token `mask_token_rate`% of the time - should_mask_flat = random_uniform.flat_values < math_ops.cast( - self._mask_token_rate, dtypes.float32) - mask_values = mask_values.with_flat_values( - ragged_where_op.where( - should_mask_flat, - x=math_ops.cast(all_mask_flat, mask_values.flat_values.dtype), - y=mask_values.flat_values)) - - # Maybe inject random token `random_token_rate`% of the time. - all_random_flat = random_ops.random_uniform( - array_ops.shape(mask_values.flat_values), maxval=math_ops.cast( - self._vocab_size, dtypes.float32)) - should_inject_random_flat = math_ops.logical_and( - random_uniform.flat_values > self._mask_token_rate, - random_uniform.flat_values < - self._mask_token_rate + self._random_token_rate) - mask_values = mask_values.with_flat_values( - ragged_where_op.where( - should_inject_random_flat, - x=math_ops.cast(all_random_flat, mask_values.flat_values.dtype), - y=mask_values.flat_values)) - return mask_values
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops_test.py deleted file mode 100644 index e292cfd9..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/masking_ops_test.py +++ /dev/null
@@ -1,421 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for masking ops.""" -from absl.testing import parameterized - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import item_selector_ops -from tensorflow_text.python.ops import masking_ops - -_VOCAB = [ - b"[MASK]", - b"[RANDOM]", - b"[CLS]", - b"[SEP]", - b"##ack", - b"##ama", - b"##gers", - b"##onge", - b"##pants", - b"##uare", - b"##vel", - b"##ven", - b"A", - b"Bar", - b"Hates", - b"Mar", - b"Ob", - b"Patrick", - b"President", - b"Sp", - b"Sq", - b"bob", - b"box", - b"has", - b"highest", - b"is", - b"office", - b"the", -] - -_MASK_TOKEN = _VOCAB.index(b"[MASK]") -_RANDOM_TOKEN = _VOCAB.index(b"[RANDOM]") -_VOCAB_SIZE = len(_VOCAB) - - -def _create_table(vocab, num_oov=1): - init = lookup_ops.KeyValueTensorInitializer( - vocab, - math_ops.range( - array_ops.size(vocab, out_type=dtypes.int64), dtype=dtypes.int64), - key_dtype=dtypes.string, - value_dtype=dtypes.int64) - return lookup_ops.StaticVocabularyTableV1( - init, num_oov, lookup_key_dtype=dtypes.string) - - -class AlwaysRandomValuesChooser(masking_ops.MaskValuesChooser): - - def __init__(self, - vocab_size, - mask_token, - random_token, - mask_token_rate=0.8, - random_token_rate=0.1): - super(AlwaysRandomValuesChooser, self).__init__(1, 0, 0, 1) - self._random_token = random_token - - def get_mask_values(self, masked_lm_ids, **kwargs): - # If we're expecting all random tokens, set to all [RANDOM] - if self.random_token_rate == 1: - flat_mask_values = array_ops.tile( - array_ops.expand_dims(self._random_token, -1), - array_ops.shape(masked_lm_ids.flat_values)) - flat_mask_values = math_ops.cast(flat_mask_values, dtypes.int64) - else: - # Give them all [MASK] values. - flat_mask_values = array_ops.tile( - array_ops.expand_dims(self.mask_token, -1), - array_ops.shape(masked_lm_ids.flat_values)) - flat_mask_values = math_ops.cast(flat_mask_values, dtypes.int64) - return masked_lm_ids.with_flat_values(flat_mask_values) - - -@test_util.run_all_in_graph_and_eager_modes -class MaskingOpsTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - dict( - description="Masking wordpieces", - masking_inputs=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_masked_positions=[[0, 1], [0, 1], [0, 1]], - expected_masked_ids=[[b"Sp", b"##onge"], [b"Bar", b"##ack"], - [b"Mar", b"##vel"]], - expected_input_ids=[[ - b"[MASK]", b"[MASK]", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"[MASK]", b"[MASK]", b"Ob", b"##ama"], - [b"[MASK]", b"[MASK]", b"A", b"##ven", - b"##gers"]], - ), - dict( - description="Masking wordpieces; allow all wordpieces", - masking_inputs=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", - b"##ama"], [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_masked_positions=[[0, 1], [0, 1], [0, 1]], - expected_masked_ids=[[b"Sp", b"##onge"], [b"Bar", b"##ack"], - [b"Mar", b"##vel"]], - expected_input_ids=[[ - b"[MASK]", b"[MASK]", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"[MASK]", b"[MASK]", b"Ob", b"##ama"], - [b"[MASK]", b"[MASK]", b"A", - b"##ven", b"##gers"]], - no_mask_ids=[], - ), - dict( - description="Masking wordpieces w/ no_mask_ids", - masking_inputs=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", - b"##ama"], [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - no_mask_ids=[b"[CLS]", b"[SEP]", b"##onge", b"Mar"], - expected_masked_positions=[[0, 2], [0, 1], [1, 2]], - expected_masked_ids=[[b"Sp", b"bob"], [b"Bar", b"##ack"], - [b"##vel", b"A"]], - expected_input_ids=[[ - b"[MASK]", b"##onge", b"[MASK]", b"Sq", b"##uare", b"##pants" - ], [b"[MASK]", b"[MASK]", b"Ob", - b"##ama"], [b"Mar", b"[MASK]", b"[MASK]", b"##ven", b"##gers"]], - ), - dict( - description=b"Masking whole words, first masked tokens are selected" + - b" as [MASK]", - masking_inputs=[[[b"Sp", "##onge"], [b"bob"], - [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"Ob", b"##ama"]], - [[b"Mar", "##vel"], [b"A", b"##ven", b"##gers"]]], - expected_masked_positions=[[0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]], - expected_masked_ids=[[b"Sp", b"##onge", b"bob"], - [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_input_ids=[ - [b"[MASK]", b"[MASK]", b"[MASK]", b"Sq", b"##uare", b"##pants"], - [b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]"], - [b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]"] - ], - ), - dict( - description=b"Masking whole words w/ no_mask_ids", - masking_inputs=[[[b"Sp", b"##onge"], [b"bob"], - [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", b"##ack"], [b"Ob", b"##ama"]], - [[b"Mar", b"##vel"], [b"A", b"##ven", b"##gers"]]], - no_mask_ids=[b"##onge", b"Mar"], - expected_masked_positions=[[2, 3, 4, 5], [0, 1, 2, 3], [2, 3, 4]], - expected_masked_ids=[[b"bob", b"Sq", b"##uare", b"##pants"], - [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"A", b"##ven", b"##gers"]], - expected_input_ids=[[ - b"Sp", b"##onge", b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]" - ], [b"[MASK]", b"[MASK]", b"[MASK]", - b"[MASK]"], [b"Mar", b"##vel", b"[MASK]", b"[MASK]", b"[MASK]"]], - axis=1, - ), - dict( - description=b"Masking arbitrary spans", - # [batch, (num_spans), (num_tokens), (num_wordpieces)] - masking_inputs=[ - # "Sponge bob" is a single span - [[[b"Sp", b"##onge"], [b"bob"]], [[b"Sq", b"##uare", b"##pants"]], - [[b"Hates"]], [[b"Patrick"]]], - # "Barack Obama"is a single span - [[[b"Bar", b"##ack"], [b"Ob", b"##ama"]], [[b"is"]], - [[b"President"]]], - [[[b"Mar", b"##vel"]], [[b"A", b"##ven", b"##gers"]], [[b"has"]], - [[b"the"]], [[b"highest"]], [[b"box"]], [[b"office"]]], - ], - expected_masked_positions=[[0, 1, 2, 3, 4, 5], [0, 1, 2, 3, 4], - [0, 1, 2, 3, 4]], - expected_masked_ids=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", b"##ama", b"is"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_input_ids=[ - [ - b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", - b"[MASK]", b"Hates", b"Patrick" - ], - [ - b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", - b"President" - ], - [ - b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", b"[MASK]", b"has", - b"the", b"highest", b"box", b"office" - ], - ], - ), - ]) - def testMaskingOps(self, - masking_inputs, - expected_masked_positions, - description, - expected_input_ids=None, - expected_masked_ids=None, - selection_rate=None, - mask_token_rate=1, - random_token_rate=0, - shuffle_fn=None, - no_mask_ids=None, - max_selections_per_batch=10, - axis=1): - - if no_mask_ids: - no_mask_ids = [_VOCAB.index(i) for i in no_mask_ids] - item_selector = item_selector_ops.FirstNItemSelector(2, no_mask_ids) - values_chooser = masking_ops.MaskValuesChooser( - vocab_size=_VOCAB_SIZE, - mask_token=_MASK_TOKEN, - mask_token_rate=mask_token_rate, - random_token_rate=random_token_rate) - - return self.runMaskingOpsTest( - masking_inputs=masking_inputs, - expected_masked_positions=expected_masked_positions, - description=description, - expected_input_ids=expected_input_ids, - expected_masked_ids=expected_masked_ids, - axis=axis, - item_selector=item_selector, - values_chooser=values_chooser, - ) - - @parameterized.parameters([ - dict( - description="Masking wordpieces, no masking, nor random injection " + - "allowed", - masking_inputs=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_masked_positions=[[], [], []], - expected_masked_ids=[[], [], []], - expected_input_ids=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - ), - ]) - def testNothingSelectedMasker(self, - masking_inputs, - expected_masked_positions, - description, - expected_input_ids=None, - expected_masked_ids=None, - axis=1): - item_selector = item_selector_ops.NothingSelector() - values_chooser = masking_ops.MaskValuesChooser(_VOCAB_SIZE, _MASK_TOKEN, - 0.9, 0.1) - return self.runMaskingOpsTest( - masking_inputs=masking_inputs, - description=description, - expected_input_ids=expected_input_ids, - expected_masked_ids=expected_masked_ids, - expected_masked_positions=expected_masked_positions, - axis=axis, - item_selector=item_selector, - values_chooser=values_chooser, - ) - - @parameterized.parameters([ - dict( - description=b"Masking wordpieces, all random", - masking_inputs=[[ - b"Sp", b"##onge", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - mask_token_rate=0.001, - random_token_rate=0.9, - expected_masked_positions=[[0, 1], [0, 1], [0, 1]], - expected_masked_ids=[ - [b"Sp", b"##onge"], - [b"Bar", b"##ack"], - [b"Mar", b"##vel"], - ], - expected_input_ids=[[ - b"[RANDOM]", b"[RANDOM]", b"bob", b"Sq", b"##uare", b"##pants" - ], [b"[RANDOM]", b"[RANDOM]", b"Ob", - b"##ama"], [b"[RANDOM]", b"[RANDOM]", b"A", b"##ven", b"##gers"]], - ), - dict( - description=b"Masking whole words w/ random injections", - masking_inputs=[[[b"Sp", "##onge"], [b"bob"], - [b"Sq", b"##uare", b"##pants"]], - [[b"Bar", "##ack"], [b"Ob", b"##ama"]], - [[b"Mar", "##vel"], [b"A", b"##ven", b"##gers"]]], - mask_token_rate=0, - random_token_rate=1, - expected_masked_positions=[[0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]], - expected_masked_ids=[[b"Sp", b"##onge", b"bob"], - [b"Bar", b"##ack", b"Ob", b"##ama"], - [b"Mar", b"##vel", b"A", b"##ven", b"##gers"]], - expected_input_ids=[ - [ - b"[RANDOM]", b"[RANDOM]", b"[RANDOM]", b"Sq", b"##uare", - b"##pants" - ], - [b"[RANDOM]", b"[RANDOM]", b"[RANDOM]", b"[RANDOM]"], - [b"[RANDOM]", b"[RANDOM]", b"[RANDOM]", b"[RANDOM]", b"[RANDOM]"], - ]), - ]) - def testRandomMasking(self, - masking_inputs, - expected_masked_positions, - description, - expected_input_ids=None, - expected_masked_ids=None, - selection_rate=None, - mask_token_rate=1, - random_token_rate=None, - shuffle_fn=None, - no_mask_ids=None, - max_selections_per_batch=10, - axis=1): - if no_mask_ids: - no_mask_ids = [_VOCAB.index(i) for i in no_mask_ids] - - item_selector = item_selector_ops.FirstNItemSelector(2, no_mask_ids) - values_chooser = AlwaysRandomValuesChooser( - vocab_size=_VOCAB_SIZE, - mask_token=_MASK_TOKEN, - random_token=_RANDOM_TOKEN, - mask_token_rate=mask_token_rate, - random_token_rate=random_token_rate) - return self.runMaskingOpsTest( - masking_inputs=masking_inputs, - expected_masked_positions=expected_masked_positions, - description=description, - expected_input_ids=expected_input_ids, - expected_masked_ids=expected_masked_ids, - axis=axis, - item_selector=item_selector, - values_chooser=values_chooser, - ) - - def runMaskingOpsTest(self, - masking_inputs, - expected_masked_positions, - description, - expected_input_ids=None, - expected_masked_ids=None, - axis=1, - item_selector=None, - values_chooser=None): - masking_inputs = ragged_factory_ops.constant(masking_inputs) - - # Lookup int IDs - table = _create_table(_VOCAB) - self.evaluate(table.initializer) - - # Transform human-readable string wordpieces into int ids, which is what - # will actually be tested. - masking_inputs = ( - ragged_functional_ops.map_flat_values(table.lookup, masking_inputs)) - - actual_input_ids, actual_masked_positions, actual_masked_ids = ( - masking_ops.mask_language_model( - masking_inputs, - axis=axis, - item_selector=item_selector, - mask_values_chooser=values_chooser)) - - self.assertAllEqual(actual_masked_positions, expected_masked_positions) - # Decode back into human readable wordpieces for comparison - actual_masked_ids_flat = array_ops.gather(_VOCAB, - actual_masked_ids.flat_values) - actual_masked_ids = actual_masked_ids.with_flat_values( - actual_masked_ids_flat) - - self.assertAllEqual(actual_masked_ids, expected_masked_ids) - actual_input_ids_flat = array_ops.gather(_VOCAB, - actual_input_ids.flat_values) - actual_input_ids = actual_input_ids.with_flat_values(actual_input_ids_flat) - self.assertAllEqual(actual_input_ids, expected_input_ids) - - def testInvalidRates(self): - with self.assertRaises(errors.InvalidArgumentError): - values_chooser = masking_ops.MaskValuesChooser( - _VOCAB_SIZE, _MASK_TOKEN, - 0.9, 5.6) - self.evaluate(values_chooser.get_mask_values( - ragged_factory_ops.constant([ - [1, 2, 3], [4, 5]]))) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops.py deleted file mode 100644 index b5108c3c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops.py +++ /dev/null
@@ -1,71 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""TensorFlow ops for maximum spanning tree problems.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import standard_ops - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_mst_ops = load_library.load_op_library(resource_loader.get_path_to_datafile('_mst_ops.so')) - -# Re-export the generated MST op. -max_spanning_tree = gen_mst_ops.max_spanning_tree - - -@ops.RegisterGradient("MaxSpanningTree") -def max_spanning_tree_gradient(mst_op, d_loss_d_max_scores, *_): - """Returns a subgradient of the MaximumSpanningTree op. - - Note that MaximumSpanningTree is only differentiable w.r.t. its |scores| input - and its |max_scores| output. - - Args: - mst_op: The MaximumSpanningTree op being differentiated. - d_loss_d_max_scores: [B] vector where entry b is the gradient of the network - loss w.r.t. entry b of the |max_scores| output of the |mst_op|. - *_: The gradients w.r.t. the other outputs; ignored. - - Returns: - 1. None, since the op is not differentiable w.r.t. its |num_nodes| input. - 2. [B,M,M] tensor where entry b,t,s is a subgradient of the network loss - w.r.t. entry b,t,s of the |scores| input, with the same dtype as - |d_loss_d_max_scores|. - """ - dtype = d_loss_d_max_scores.dtype.base_dtype - if dtype is None: - raise errors.InvalidArgumentError("Expected (%s) is not None" % dtype) - - argmax_sources_bxm = mst_op.outputs[1] - input_dim = array_ops.shape(argmax_sources_bxm)[1] # M in the docstring - - # The one-hot argmax is a subgradient of max. Convert the batch of maximal - # spanning trees into 0/1 indicators, then scale them by the relevant output - # gradients from |d_loss_d_max_scores|. Note that |d_loss_d_max_scores| must - # be reshaped in order for it to broadcast across the batch dimension. - indicators_bxmxm = standard_ops.one_hot( - argmax_sources_bxm, input_dim, dtype=dtype) - d_loss_d_max_scores_bx1 = array_ops.expand_dims(d_loss_d_max_scores, -1) - d_loss_d_max_scores_bx1x1 = array_ops.expand_dims(d_loss_d_max_scores_bx1, -1) - d_loss_d_scores_bxmxm = indicators_bxmxm * d_loss_d_max_scores_bx1x1 - return None, d_loss_d_scores_bxmxm
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops_test.py deleted file mode 100644 index 5dd852f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/mst_ops_test.py +++ /dev/null
@@ -1,121 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for maximum spanning tree ops.""" - -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import test_util -from tensorflow.python.platform import test -from tensorflow_text.python.ops import mst_ops - - -class MstOpsTest(test.TestCase): - """Testing rig.""" - - @test_util.run_all_in_graph_and_eager_modes - def testMaximumSpanningTree(self): - """Tests that the MST op can recover a simple tree.""" - # The first batch element prefers 3 as root, then 3->0->1->2, for a total - # score of 4+2+1=7. The second batch element is smaller and has reversed - # scores, so 0 is root and 0->2->1. - num_nodes = constant_op.constant([4, 3], dtypes.int32) - scores = constant_op.constant([[[0, 0, 0, 0], - [1, 0, 0, 0], - [1, 2, 0, 0], - [1, 2, 3, 4]], - [[4, 3, 2, 9], - [0, 0, 2, 9], - [0, 0, 0, 9], - [9, 9, 9, 9]]], - dtypes.int32) # pyformat: disable - - (max_scores, argmax_sources) = mst_ops.max_spanning_tree( - num_nodes, scores, forest=False) - - self.assertAllEqual(max_scores, [7, 6]) - self.assertAllEqual(argmax_sources, [[3, 0, 1, 3], - [0, 2, 0, -1]]) # pyformat: disable - - @test_util.run_deprecated_v1 - def testMaximumSpanningTreeGradient(self): - """Tests the MST max score gradient.""" - with self.test_session() as session: - num_nodes = constant_op.constant([4, 3], dtypes.int32) - scores = constant_op.constant([[[0, 0, 0, 0], - [1, 0, 0, 0], - [1, 2, 0, 0], - [1, 2, 3, 4]], - [[4, 3, 2, 9], - [0, 0, 2, 9], - [0, 0, 0, 9], - [9, 9, 9, 9]]], - dtypes.int32) # pyformat: disable - - mst_ops.max_spanning_tree(num_nodes, scores, forest=False, name='MST') - mst_op = session.graph.get_operation_by_name('MST') - - d_loss_d_max_scores = constant_op.constant([3, 7], dtypes.float32) - d_loss_d_num_nodes, d_loss_d_scores = ( - mst_ops.max_spanning_tree_gradient(mst_op, d_loss_d_max_scores)) - - # The num_nodes input is non-differentiable. - self.assertIs(d_loss_d_num_nodes, None) - self.assertAllEqual(d_loss_d_scores.eval(), - [[[0, 0, 0, 3], - [3, 0, 0, 0], - [0, 3, 0, 0], - [0, 0, 0, 3]], - [[7, 0, 0, 0], - [0, 0, 7, 0], - [7, 0, 0, 0], - [0, 0, 0, 0]]]) # pyformat: disable - - @test_util.run_deprecated_v1 - def testMaximumSpanningTreeGradientError(self): - """Numerically validates the max score gradient.""" - with self.test_session(): - # The maximum-spanning-tree-score function, as a max of linear functions, - # is piecewise-linear (i.e., faceted). The numerical gradient estimate - # may be inaccurate if the epsilon ball used for the estimate crosses an - # edge from one facet to another. To avoid spurious errors, we manually - # set the sample point so the epsilon ball fits in a facet. Or in other - # words, we set the scores so there is a non-trivial margin between the - # best and second-best trees. - scores_raw = [[[0, 0, 0, 0], - [1, 0, 0, 0], - [1, 2, 0, 0], - [1, 2, 3, 4]], - [[4, 3, 2, 9], - [0, 0, 2, 9], - [0, 0, 0, 9], - [9, 9, 9, 9]]] # pyformat: disable - - # Use 64-bit floats to reduce numerical error. - scores = constant_op.constant(scores_raw, dtypes.float64) - init_scores = np.array(scores_raw) - - num_nodes = constant_op.constant([4, 3], dtypes.int32) - max_scores = mst_ops.max_spanning_tree(num_nodes, scores, forest=False)[0] - - gradient_error = test.compute_gradient_error(scores, [2, 4, 4], - max_scores, [2], init_scores) - self.assertIsNot(gradient_error, None) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op.py deleted file mode 100644 index 52eb601..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op.py +++ /dev/null
@@ -1,137 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tensorflow ngram operations.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import enum - -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow_text.python.ops.sliding_window_op import sliding_window - - -class Reduction(enum.Enum): - """Type of reduction to be done by the n-gram op. - - The supported reductions are as follows: - - * `Reduction.SUM`: Add values in the window. - * `Reduction.MEAN`: Average values in the window. - * `Reduction.STRING_JOIN`: Join strings in the window. - """ - - SUM = 1 - MEAN = 2 - STRING_JOIN = 3 - - -def ngrams(data, - width, - axis=-1, - reduction_type=None, - string_separator=" ", - name=None): - """Create a tensor of n-grams based on the input data `data`. - - Creates a tensor of n-grams based on `data`. The n-grams are of width `width` - and are created along axis `axis`; the n-grams are created by combining - windows of `width` adjacent elements from `data` using `reduction_type`. This - op is intended to cover basic use cases; more complex combinations can be - created using the sliding_window op. - - >>> input_data = tf.ragged.constant([["e", "f", "g"], ["dd", "ee"]]) - >>> ngrams( - ... input_data, - ... width=2, - ... axis=-1, - ... reduction_type=Reduction.STRING_JOIN, - ... string_separator="|") - <tf.RaggedTensor [[b'e|f', b'f|g'], [b'dd|ee']]> - - Args: - data: The data to reduce. - width: The width of the ngram window. If there is not sufficient data to - fill out the ngram window, the resulting ngram will be empty. - axis: The axis to create ngrams along. Note that for string join reductions, - only axis '-1' is supported; for other reductions, any positive or - negative axis can be used. Should be a constant. - reduction_type: A member of the Reduction enum. Should be a constant. - Currently supports: - - * `Reduction.SUM`: Add values in the window. - * `Reduction.MEAN`: Average values in the window. - * `Reduction.STRING_JOIN`: Join strings in the window. - Note that axis must be -1 here. - - string_separator: The separator string used for `Reduction.STRING_JOIN`. - Ignored otherwise. Must be a string constant, not a Tensor. - name: The op name. - - Returns: - A tensor of ngrams. If the input is a tf.Tensor, the output will also - be a tf.Tensor; if the input is a tf.RaggedTensor, the output will be - a tf.RaggedTensor. - - Raises: - InvalidArgumentError: if `reduction_type` is either None or not a Reduction, - or if `reduction_type` is STRING_JOIN and `axis` is not -1. - """ - - with ops.name_scope(name, "NGrams", [data, width]): - if reduction_type is None: - raise errors.InvalidArgumentError(None, None, - "reduction_type must be specified.") - - if not isinstance(reduction_type, Reduction): - raise errors.InvalidArgumentError(None, None, - "reduction_type must be a Reduction.") - - # TODO(b/122967921): Lift this restriction after ragged_reduce_join is done. - if reduction_type is Reduction.STRING_JOIN and axis != -1: - raise errors.InvalidArgumentError( - None, None, "%s requires that ngrams' 'axis' parameter be -1." % - Reduction.STRING_JOIN.name) - - windowed_data = sliding_window(data, width, axis) - - if axis < 0: - reduction_axis = axis - else: - reduction_axis = axis + 1 - - # Ragged reduction ops work on both Tensor and RaggedTensor, so we can - # use them here regardless of the type of tensor in 'windowed_data'. - if reduction_type is Reduction.SUM: - return math_ops.reduce_sum(windowed_data, reduction_axis) - elif reduction_type is Reduction.MEAN: - return math_ops.reduce_mean(windowed_data, reduction_axis) - elif reduction_type is Reduction.STRING_JOIN: - if isinstance(data, ragged_tensor.RaggedTensor): - return ragged_functional_ops.map_flat_values( - string_ops.reduce_join, - windowed_data, - axis=axis, - separator=string_separator) - else: - return string_ops.reduce_join( - windowed_data, axis=axis, separator=string_separator)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op_test.py deleted file mode 100644 index ebf51fb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/ngrams_op_test.py +++ /dev/null
@@ -1,178 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ngram ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import ngrams_op - - -@test_util.run_all_in_graph_and_eager_modes -class NgramsOpTest(test_util.TensorFlowTestCase): - - def testSumReduction(self): - test_data = constant_op.constant([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=1, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[3.0, 5.0], [30.0, 50.0]] - - self.assertAllEqual(expected_values, op) - - def testRaggedSumReduction(self): - test_data = ragged_factory_ops.constant([[1.0, 2.0, 3.0, 4.0], - [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=1, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[3.0, 5.0, 7.0], [30.0, 50.0]] - - self.assertAllEqual(expected_values, op) - - def testRaggedSumReductionAxisZero(self): - test_data = ragged_factory_ops.constant([[1.0, 2.0, 3.0, 4.0], - [10.0, 20.0, 30.0, 40.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=0, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[11.0, 22.0, 33.0, 44.0]] - - self.assertAllEqual(expected_values, op) - - def testMeanReduction(self): - test_data = constant_op.constant([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=1, reduction_type=ngrams_op.Reduction.MEAN) - expected_values = [[1.5, 2.5], [15.0, 25.0]] - - self.assertAllEqual(expected_values, op) - - def testRaggedMeanReduction(self): - test_data = ragged_factory_ops.constant([[1.0, 2.0, 3.0, 4.0], - [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=-1, reduction_type=ngrams_op.Reduction.MEAN) - expected_values = [[1.5, 2.5, 3.5], [15.0, 25.0]] - - self.assertAllEqual(expected_values, op) - - def testStringJoinReduction(self): - test_data = constant_op.constant([["a", "b", "c"], ["dd", "ee", "ff"]]) - op = ngrams_op.ngrams( - test_data, - width=2, - axis=-1, - reduction_type=ngrams_op.Reduction.STRING_JOIN, - string_separator="|") - expected_values = [[b"a|b", b"b|c"], [b"dd|ee", b"ee|ff"]] - - self.assertAllEqual(expected_values, op) - - def testStringJoinReductionAxisZero(self): - test_data = constant_op.constant(["a", "b", "c"]) - op = ngrams_op.ngrams( - test_data, - width=2, - axis=-1, # The -1 axis is the zero axis here. - reduction_type=ngrams_op.Reduction.STRING_JOIN, - string_separator="|") - expected_values = [b"a|b", b"b|c"] - - self.assertAllEqual(expected_values, op) - - def testRaggedStringJoinReduction(self): - test_data = ragged_factory_ops.constant([["a", "b", "c"], ["dd", "ee"]]) - op = ngrams_op.ngrams( - test_data, - width=2, - axis=-1, - reduction_type=ngrams_op.Reduction.STRING_JOIN, - string_separator="|") - expected_values = [[b"a|b", b"b|c"], [b"dd|ee"]] - - self.assertAllEqual(expected_values, op) - - def testReductionWithNegativeAxis(self): - test_data = constant_op.constant([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=-1, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[3.0, 5.0], [30.0, 50.0]] - - self.assertAllEqual(expected_values, op) - - def testReductionOnInnerAxis(self): - test_data = constant_op.constant([[[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]], - [[4.0, 5.0, 6.0], [40.0, 50.0, 60.0]]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=-2, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[[11.0, 22.0, 33.0]], [[44.0, 55.0, 66.0]]] - - self.assertAllEqual(expected_values, op) - - def testRaggedReductionOnInnerAxis(self): - test_data = ragged_factory_ops.constant([[[1.0, 2.0, 3.0, 4.0], - [10.0, 20.0, 30.0, 40.0]], - [[100.0, 200.0], [300.0, 400.0]]]) - op = ngrams_op.ngrams( - test_data, width=2, axis=-2, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[[11.0, 22.0, 33.0, 44.0]], [[400.0, 600.0]]] - - self.assertAllEqual(expected_values, op) - - def testReductionOnAxisWithInsufficientValuesReturnsEmptySet(self): - test_data = constant_op.constant([[1.0, 2.0, 3.0], [10.0, 20.0, 30.0]]) - op = ngrams_op.ngrams( - test_data, width=4, axis=-1, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[], []] - - self.assertAllEqual(expected_values, op) - - def testRaggedReductionOnAxisWithInsufficientValuesReturnsEmptySet(self): - test_data = ragged_factory_ops.constant([[1.0, 2.0, 3.0], - [10.0, 20.0, 30.0, 40.0]]) - op = ngrams_op.ngrams( - test_data, width=4, axis=1, reduction_type=ngrams_op.Reduction.SUM) - expected_values = [[], [100.0]] - - self.assertAllEqual(expected_values, op) - - def testStringJoinReductionFailsWithImproperAxis(self): - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r".*requires that ngrams' 'axis' parameter be -1."): - _ = ngrams_op.ngrams( - data=[], - width=2, - axis=0, - reduction_type=ngrams_op.Reduction.STRING_JOIN) - - def testUnspecifiedReductionTypeFails(self): - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r"reduction_type must be specified."): - _ = ngrams_op.ngrams(data=[], width=2, axis=0) - - def testBadReductionTypeFails(self): - with self.assertRaisesRegexp(errors.InvalidArgumentError, - r"reduction_type must be a Reduction."): - _ = ngrams_op.ngrams(data=[], width=2, axis=0, reduction_type="SUM") - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops.py deleted file mode 100644 index 460dc7f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops.py +++ /dev/null
@@ -1,225 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=utf-8 -"""Tensorflow lowercasing operation for UTF8 strings.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_conversion_ops -from tensorflow.python.ops.ragged import ragged_tensor - -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_normalize_ops = load_library.load_op_library(resource_loader.get_path_to_datafile('_normalize_ops.so')) - - -# pylint: disable=redefined-builtin -def case_fold_utf8(input, name=None): - """Applies case folding to every UTF-8 string in the input. - - The input is a `Tensor` or `RaggedTensor` of any shape, and the resulting - output has the same shape as the input. Note that NFKC normalization is - implicitly applied to the strings. - - #### Examples: - - >>> # input: <string>[num_strings] - >>> case_fold_utf8(['The Quick-Brown', - ... 'CAT jumped over', - ... 'the lazy dog !! ']) - >>> # output: <string>[num_strings] - <tf.Tensor: shape=(3,), dtype=string, numpy= - array([b'the quick-brown', b'cat jumped over', b'the lazy dog !! '], - dtype=object)> - - Args: - input: A `Tensor` or `RaggedTensor` of UTF-8 encoded strings. - name: The name for this op (optional). - - Returns: - A `Tensor` or `RaggedTensor` of type string, with case-folded contents. - """ - with ops.name_scope(name, "CaseFoldUTF8", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( - input, dtype=dtypes.string) - if ragged_tensor.is_ragged(input_tensor): - result = gen_normalize_ops.case_fold_utf8(input_tensor.flat_values) - return input_tensor.with_flat_values(result) - else: - return gen_normalize_ops.case_fold_utf8(input_tensor) - - -# pylint: disable=redefined-builtin) -def normalize_utf8(input, normalization_form="NFKC", name=None): - r"""Normalizes each UTF-8 string in the input tensor using the specified rule. - - See http://unicode.org/reports/tr15/ - - #### Examples: - - >>> # input: <string>[num_strings] - >>> normalize_utf8(["株式会社", "KADOKAWA"]) - >>> # output: <string>[num_strings] - <tf.Tensor: shape=(2,), dtype=string, numpy= - array([b'\xe6\xa0\xaa\xe5\xbc\x8f\xe4\xbc\x9a\xe7\xa4\xbe', b'KADOKAWA'], - dtype=object)> - - Args: - input: A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.) - normalization_form: One of the following string values ('NFC', 'NFKC', - 'NFD', 'NFKD'). Default is 'NFKC'. - name: The name for this op (optional). - - Returns: - A `Tensor` or `RaggedTensor` of type string, with normalized contents. - """ - with ops.name_scope(name, "NormalizeUTF8", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( - input, dtype=dtypes.string) - if ragged_tensor.is_ragged(input_tensor): - result = gen_normalize_ops.normalize_utf8(input_tensor.flat_values, - normalization_form) - return input_tensor.with_flat_values(result) - else: - return gen_normalize_ops.normalize_utf8(input_tensor, normalization_form) - - -# pylint: disable=redefined-builtin) -def normalize_utf8_with_offsets_map(input, - normalization_form="NFKC", - name=None): - r"""Normalizes each UTF-8 string in the input tensor using the specified rule. - - Returns normalized strings and an offset map used by another operation to map - post-normalized string offsets to pre-normalized string offsets. - - See http://unicode.org/reports/tr15/ - - #### Examples: - - >>> # input: <string>[num_strings] - >>> normalize_utf8_with_offsets_map(["株式会社", "KADOKAWA"]) - >>> # output: <string>[num_strings], <variant>[num_strings] - NormalizeUTF8WithOffsetsMap(output=<tf.Tensor: shape=(2,), dtype=string, - numpy= - array([b'\xe6\xa0\xaa\xe5\xbc\x8f\xe4\xbc\x9a\xe7\xa4\xbe', b'KADOKAWA'], - dtype=object)>, offsets_map=<tf.Tensor: shape=(2,), dtype=variant, - numpy=<unprintable>>) - - Args: - input: A `Tensor` or `RaggedTensor` of type string. (Must be UTF-8.) - normalization_form: One of the following string values ('NFC', 'NFKC', - 'NFD', 'NFKD'). Default is 'NFKC'. NOTE: `NFD` and `NFKD` for - `normalize_utf8_with_offsets_map` will not be available until the - tf.text release w/ ICU 69 (scheduled after 4/2021). - name: The name for this op (optional). - - Returns: - A tuple of (results, offsets_map) where: - - results: A `Tensor` or `RaggedTensor` of type string, with normalized - contents. - offsets_map: A `Tensor` or `RaggedTensor` of type `variant`, used to map - the post-normalized string offsets to pre-normalized string offsets. It - has the same shape as the results tensor. offsets_map is an input to - `find_source_offsets` op. - """ - with ops.name_scope(name, "NormalizeUTF8WithOffsets", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( - input, dtype=dtypes.string) - if ragged_tensor.is_ragged(input_tensor): - result, offsets_map = gen_normalize_ops.normalize_utf8_with_offsets_map( - input_tensor.flat_values, normalization_form) - return input_tensor.with_flat_values( - result), input_tensor.with_flat_values(offsets_map) - else: - return gen_normalize_ops.normalize_utf8_with_offsets_map( - input_tensor, normalization_form) - - -# pylint: disable=redefined-builtin) -def find_source_offsets(offsets_map, input_offsets, name=None): - """Maps the input post-normalized string offsets to pre-normalized offsets. - - Returns the source (i.e. pre-normalized) string offsets mapped from the input - post-normalized string offsets using the input offsets_map, which is an output - from the `normalize_utf8_with_offsets_map` op. offsets_map can be indexed or - sliced along with the input_offsets. - - #### Examples: - - >>> # input: <string>[num_strings] - >>> post_normalized_str, offsets_map = normalize_utf8_with_offsets_map( - ... ["株式会社", "KADOKAWA"]) - >>> # input: <variant>[num_strings], <int64>[num_strings, num_offsets] - >>> find_source_offsets(offsets_map, [[0, 1, 2], [0, 1, 2]]) - >>> # output: <int64>[num_strings, num_offsets] - <tf.Tensor: shape=(2, 3), dtype=int64, numpy=array([[0, 1, 2], [0, 3, 6]])> - >>> # Offsets map can be indexed. - >>> find_source_offsets(offsets_map[1], [[0, 1, 2]]) - <tf.Tensor: shape=(1, 3), dtype=int64, numpy=array([[0, 3, 6]])> - - Args: - offsets_map: A `Tensor` or `RaggedTensor` of type `variant`, used to map the - post-normalized string offsets to pre-normalized string offsets. - offsets_map is an output from `normalize_utf8_with_offsets_map` function. - input_offsets: A `Tensor` or `RaggedTensor` of type int64 representing the - the post-normalized string offsets, - name: The name for this op (optional). - - Returns: - results: A `Tensor` or `RaggedTensor` of type int64, with pre-normalized - string offsets. - """ - - with ops.name_scope(name, "FindSourceOffsets", [offsets_map, input_offsets]): - offsets_map_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( - offsets_map, dtype=dtypes.variant) - input_offsets_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor( - input_offsets, dtype=dtypes.int64) - - if ragged_tensor.is_ragged(input_offsets_tensor): - if ragged_tensor.is_ragged(offsets_map_tensor): - offsets_map_values = offsets_map_tensor.flat_values - else: - offsets_map_values = array_ops.reshape(offsets_map_tensor, [-1]) - - output_values = gen_normalize_ops.find_source_offsets( - offsets_map=offsets_map_values, - input_offsets_values=input_offsets_tensor.flat_values, - input_offsets_splits=input_offsets_tensor.nested_row_splits[-1]) - return input_offsets_tensor.with_flat_values(output_values) - else: - if input_offsets_tensor.shape.ndims > 1: - output_offsets = find_source_offsets( - offsets_map, - ragged_conversion_ops.from_tensor( - input_offsets_tensor, - ragged_rank=input_offsets_tensor.shape.ndims - 1)) - return ragged_conversion_ops.to_tensor(output_offsets) - elif input_offsets_tensor.shape.ndims == 0: - output_offsets = find_source_offsets( - offsets_map, array_ops.expand_dims(input_offsets_tensor, 0)) - return output_offsets[0] - else: - output_offsets = find_source_offsets( - offsets_map, array_ops.expand_dims(input_offsets_tensor, 0)) - return array_ops.squeeze(output_offsets, [0])
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops_test.py deleted file mode 100644 index 954279a1..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/normalize_ops_test.py +++ /dev/null
@@ -1,360 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# coding=utf-8 -"""Tests for normalization ops in tensorflow_text.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import normalize_ops - - -def _Utf8(char): - return char.encode("utf-8") - - -@test_util.run_all_in_graph_and_eager_modes -class NormalizeOpsTest(test.TestCase): - - def test_lowercase_one_string(self): - txt = [ - " TExt to loWERcase! ", - ] - expected = [ - b" text to lowercase! ", - ] - self.assertAllEqual(expected, normalize_ops.case_fold_utf8(txt)) - - def test_lowercase_text(self): - txt = [ - "Punctuation and digits: -*/+$#%@%$123456789#^$*%&", - "Non-latin UTF8 chars: ΘͽʦȺЩ", - "Accented chars: ĎÔPQRŔSŠoóôpqrŕsštťuúvwxyý", - "Non-UTF8-letters: e.g. ◆, ♥, and the emoji symbol ( ͡° ͜ʖ ͡°)", - "Folded: ßς", "" - ] - expected = [ - _Utf8(u"punctuation and digits: -*/+$#%@%$123456789#^$*%&"), - _Utf8(u"non-latin utf8 chars: θͽʦⱥщ"), - _Utf8(u"accented chars: ďôpqrŕsšoóôpqrŕsštťuúvwxyý"), - _Utf8( - u"non-utf8-letters: e.g. ◆, ♥, and the emoji symbol ( ͡° ͜ʖ ͡°)" - ), - _Utf8(u"folded: ssσ"), b"" - ] - self.assertAllEqual(expected, normalize_ops.case_fold_utf8(txt)) - - def test_lowercase_one_string_ragged(self): - txt = ragged_factory_ops.constant([[" TExt ", "to", " loWERcase! "], - [" TExt to loWERcase! "]]) - expected = [[b" text ", b"to", b" lowercase! "], [b" text to lowercase! "]] - self.assertAllEqual(expected, normalize_ops.case_fold_utf8(txt)) - - def test_lowercase_empty_string(self): - txt = [ - "", - ] - expected = [ - b"", - ] - self.assertAllEqual(expected, normalize_ops.case_fold_utf8(txt)) - - def test_normalize_nfkc(self): - txt = [ - u"\u1e9b\u0323", - ] - expected = [ - u"ṩ".encode("utf-8"), - ] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "NFKC")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "nfkc")) - - def test_normalize_nfkc_batch(self): - txt = [ - u"\u1e9b\u0323", - u"\ufb01", - ] - expected = [ - b"\xe1\xb9\xa9", - b"fi", - ] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, u"NFKC")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, u"nfkc")) - - def test_normalize_nfkc_ragged(self): - txt = ragged_factory_ops.constant([[[u"\u1e9b\u0323 \ufb01"], []], - [[u"\u1e9b\u0323", u"\ufb01"]]]) - expected = [[[u"ṩ fi".encode("utf-8")], []], - [[u"ṩ".encode("utf-8"), b"fi"]]] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "NFKC")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "nfkc")) - - def test_normalize_nfc(self): - txt = [ - u"\u1e9b\u0323", - ] - expected = [ - u"\u1e9b\u0323".encode("utf-8"), - ] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "NFC")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "nfc")) - - def test_normalize_nfd(self): - txt = [u"\u1e9b\u0323"] - expected = [ - u"\u017f\u0323\u0307".encode("utf-8"), - ] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "NFD")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "nfd")) - - def test_normalize_nfkd(self): - txt = [ - u"\u1e9b\u0323", - ] - expected = [ - u"\u0073\u0323\u0307".encode("utf-8"), - ] - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "NFKD")) - self.assertAllEqual(expected, normalize_ops.normalize_utf8(txt, "nfkd")) - - def test_unknown_normalization_form(self): - with self.assertRaises(errors.InvalidArgumentError): - bomb = normalize_ops.normalize_utf8(["cant readme", "wont read me"], - "cantfindme") - self.evaluate(bomb) - - -@test_util.run_all_in_graph_and_eager_modes -class NormalizeWithOffsetsMapOpsTest(parameterized.TestCase, test.TestCase): - - def test_normalize_nfkc(self): - txt = [ - u"\u1e9b\u0323", - ] - expected = [ - u"ṩ".encode("utf-8"), - ] - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, "NFKC") - self.assertAllEqual(expected, actual) - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, "nfkc") - self.assertAllEqual(expected, actual) - - def test_normalize_nfc(self): - txt = [ - u"\u1e9b\u0323", - ] - expected = [ - u"\u1e9b\u0323".encode("utf-8"), - ] - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, "NFC") - self.assertAllEqual(expected, actual) - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, "nfc") - self.assertAllEqual(expected, actual) - - def test_normalize_nfkc_batch(self): - txt = [ - u"\u1e9b\u0323", - u"\ufb01", - ] - expected = [ - b"\xe1\xb9\xa9", - b"fi", - ] - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, u"NFKC") - self.assertAllEqual(expected, actual) - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, u"nfkc") - self.assertAllEqual(expected, actual) - - def test_normalize_nfkc_ragged(self): - txt = ragged_factory_ops.constant([[[u"\u1e9b\u0323 \ufb01"], []], - [[u"\u1e9b\u0323", u"\ufb01"]]]) - expected = [[[u"ṩ fi".encode("utf-8")], []], - [[u"ṩ".encode("utf-8"), b"fi"]]] - actual, _ = normalize_ops.normalize_utf8_with_offsets_map(txt, "NFKC") - self.assertAllEqual(expected, actual) - - def test_unaccepted_normalization_form(self): - with self.assertRaises(errors.InvalidArgumentError): - bomb = normalize_ops.normalize_utf8_with_offsets_map( - ["cant readme", "wont read me"], "CANTNORMALIZEME") - self.evaluate(bomb) - - -@test_util.run_all_in_graph_and_eager_modes -class FindSourceOffsetsTest(parameterized.TestCase, test.TestCase): - - def _extract_substrs(self, txt_input, start, end): - extracted = [] - start = self.evaluate(start) - end = self.evaluate(end) - txt_input = txt_input.encode("utf-8") - for i in range(start.shape[1]): - pre_norm_start = int(start[0][i]) - pre_norm_end = int(end[0][i]) - extracted.append(txt_input[pre_norm_start:pre_norm_end]) - return extracted - - def test_one_string(self): - txt = [ - u"株式会社KADOKAWA", - ] - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map(txt, u"NFKC") - - # post_norm_txt = "株式会社KADOKAWA" - post_norm_offsets_starts = [[ - 0, 3, 6, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20 - ]] - post_norm_offsets_ends = [[3, 6, 9, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]] - - pre_norm_offsets_starts = normalize_ops.find_source_offsets( - offsets_map, post_norm_offsets_starts) - pre_norm_offsets_ends = normalize_ops.find_source_offsets( - offsets_map, post_norm_offsets_ends) - expected_pre_norm_characters = [ - u"株", u"式", u"会", u"社", u"K", u"A", u"D", u"O", u"K", - u"A", u"W", u"A", u"" - ] - self.assertAllEqual( - self._extract_substrs(txt[0], pre_norm_offsets_starts, - pre_norm_offsets_ends), - [x.encode("utf-8") for x in expected_pre_norm_characters]) - - @parameterized.parameters([ - # Test one string and rank = 0 offset input - dict( - txt_input=["株式会社KADOKAWA"], - normalization_form="NFKC", - post_norm_offsets=22, - expected=36), - # Test one string and rank = 1 offset input - dict( - txt_input=["株式会社KADOKAWA"], - normalization_form="NFKC", - post_norm_offsets=[0, 1, 2], - expected=[0, 1, 2]), - # Test multiple strings and rank = 2 offset input - dict( - txt_input=[ - "株式会社", - "KADOKAWA", - ], - normalization_form="NFKC", - post_norm_offsets=[[0, 1, 2], [0, 1, 2]], - expected=[[0, 1, 2], [0, 3, 6]]), - # Test multiple strings and rank > 2 offset input - dict( - txt_input=[ - ["株式会社"], - ["KADOKAWA"], - ], - normalization_form="NFKC", - post_norm_offsets=[[[0, 1, 2]], [[0, 1, 2]]], - expected=[[[0, 1, 2]], [[0, 3, 6]]]), - ]) - def test_tensor_input(self, txt_input, normalization_form, post_norm_offsets, - expected): - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map( - txt_input, normalization_form) - pre_norm_offsets = normalize_ops.find_source_offsets( - offsets_map, post_norm_offsets) - self.assertAllEqual(expected, pre_norm_offsets) - - @parameterized.parameters([ - # Test multiple strings with an empty str - dict( - txt_input=[ - ["株式会社"], - [""], - ["KADOKAWA"], - ], - normalization_form="NFKC", - post_norm_offsets=[[[0, 1, 2]], [[0, 1, 2]], [[0, 1, 2]]], - expected=[[[0, 1, 2]], [[0, 0, 0]], [[0, 3, 6]]]), - # Test multiple strings with an empty element - dict( - txt_input=[ - ["株式会社"], - [], - ["KADOKAWA"], - ], - normalization_form="NFKC", - post_norm_offsets=[[[0, 1, 2]], [[]], [[0, 1, 2]]], - expected=[[[0, 1, 2]], [[]], [[0, 3, 6]]]), - ]) - def test_ragged_tensor_input(self, txt_input, normalization_form, - post_norm_offsets, expected): - txt_input = ragged_factory_ops.constant(txt_input) - post_norm_offsets = ragged_factory_ops.constant( - post_norm_offsets, dtype="int64") - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map( - txt_input, normalization_form) - pre_norm_offsets = normalize_ops.find_source_offsets( - offsets_map, post_norm_offsets) - self.assertAllEqual(expected, pre_norm_offsets) - - def test_string_ragged_dimension_lower_than_offsets_input(self): - txt = ragged_factory_ops.constant([ - ["株式会社"], - [], - ["KADOKAWA"], - ]) - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map(txt, u"NFKC") - post_norm_offsets = ragged_factory_ops.constant( - [[[0, 1, 2]], [[0, 1, 2]], [[0, 1, 2]]], dtype="int64") - with self.assertRaises(errors.InvalidArgumentError): - bomb = normalize_ops.find_source_offsets(offsets_map, post_norm_offsets) - self.evaluate(bomb) - - def test_string_ragged_dimension_higher_than_offsets_input(self): - txt = ragged_factory_ops.constant([ - ["株式会社"], - [""], - ["KADOKAWA"], - ]) - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map(txt, u"NFKC") - post_norm_offsets = ragged_factory_ops.constant( - [[[0, 1, 2]], [[]], [[0, 1, 2]]], dtype="int64") - with self.assertRaises(errors.InvalidArgumentError): - bomb = normalize_ops.find_source_offsets(offsets_map, post_norm_offsets) - self.evaluate(bomb) - - def test_sliced_offsets_map_and_input_offset(self): - txt = ragged_factory_ops.constant([ - ["株式会社"], - [""], - ["KADOKAWA"], - ]) - _, offsets_map = normalize_ops.normalize_utf8_with_offsets_map(txt, u"NFKC") - post_norm_offsets = ragged_factory_ops.constant( - [[[0, 1, 2]], [[]], [[0, 1, 2]]], dtype="int64") - - sliced_offsets_map = offsets_map[2] - sliced_post_norm_offsets = post_norm_offsets[2] - sliced_pre_norm_offsets = normalize_ops.find_source_offsets( - sliced_offsets_map, sliced_post_norm_offsets) - expected = [[0, 3, 6]] - self.assertAllEqual(expected, sliced_pre_norm_offsets) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op.py deleted file mode 100644 index 52216c8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op.py +++ /dev/null
@@ -1,179 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Pad-along-dimension op. - -Pads the beginning and end of a given dimension. -""" - -from __future__ import absolute_import -from __future__ import print_function - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_tensor - - -def pad_along_dimension(data, axis=-1, left_pad=None, right_pad=None, - name=None): - """Add padding to the beginning and end of data in a specific dimension. - - Returns a tensor constructed from `data`, where each row in dimension `axis` - is replaced by the concatenation of the left padding followed by the row - followed by the right padding. I.e., if `L=left_pad.shape[0]` and - `R=right_pad.shape[0]`, then: - - ```python - result[i1...iaxis, 0:L] = left_pad - result[i1...iaxis, L:-R] = data[i0...iaxis] - result[i1...iaxis, -R:] = right_pad - ``` - - Args: - data: `<dtype>[O1...ON, A, I1...IM]` A potentially ragged `K` dimensional - tensor with outer dimensions of size `O1...ON`; axis dimension of size - `A`; and inner dimensions of size `I1...IM`. I.e. `K = N + 1 + M`, where - `N>=0` and `M>=0`. - axis: An integer constant specifying the axis along which padding is added. - Negative axis values from `-K` to `-1` are supported. - left_pad: `<dtype>[L, I1...IM]` An `M+1` dimensional tensor that should be - prepended to each row along dimension `axis`; or `None` if no padding - should be added to the left side. - right_pad: `<dtype>[R, I1...IM]` An `M+1` dimensional tensor that should be - appended to each row along dimension `axis`; or `None` if no padding - should be added to the right side. - name: The name of this op (optional). - - Returns: - `<dtype>[O1...ON, L + A + R, I1...IM]` - A potentially ragged `K` dimensional tensor with outer dimensions of size - `O1...ON`; padded axis dimension size `L+A+R`; and inner dimensions of - size `I1...IM`. If `data` is a `RaggedTensor`, then the returned tensor - is a `RaggedTensor` with the same `ragged_rank`. - """ - data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name="data") - - if not isinstance(axis, int): - raise TypeError("axis must be an int; got %s" % type(axis).__name__) - - if left_pad is None and right_pad is None: - return data - - with ops.name_scope(name, "PadAlongDimension", [data]): - if data.shape.ndims is not None and (axis < -data.shape.ndims or - axis >= data.shape.ndims): - raise errors.InvalidArgumentError( - None, None, "axis must be between -k <= axis <= -1 OR 0 <= axis < k") - if isinstance(data, ragged_tensor.RaggedTensor): - axis = _get_positive_axis(axis, data.shape.ndims) - - if left_pad is not None: - left_pad = ragged_tensor.convert_to_tensor_or_ragged_tensor( - left_pad, dtype=data.dtype, name="left_pad") - if right_pad is not None: - right_pad = ragged_tensor.convert_to_tensor_or_ragged_tensor( - right_pad, dtype=data.dtype, name="left_pad") - - left_padding = _padding_for_dimension(data, axis, left_pad) - right_padding = _padding_for_dimension(data, axis, right_pad) - - pieces = [left_padding, data, right_padding] - if isinstance(data, ragged_tensor.RaggedTensor): - return array_ops.concat([p for p in pieces if p is not None], axis) - else: - return array_ops.concat([p for p in pieces if p is not None], axis) - - -def _get_positive_axis(axis, ndims): - """Normalize axis` to be positive.""" - if axis >= 0: - return axis - elif ndims is None: - raise ValueError("axis may not be negative if data is ragged and " - "data.ndims is not statically known.") - else: - return axis + ndims - - -def _padding_for_dimension(data, axis, pad_value): - """Tile `pad_value` so it can be used to pad `data` at the given axis. - - Returns a tensor `result` that has the same shape as `data` up to dimension - `axis`, but where each value `data[i0...iaxis]` is replaced by `pad_value`. - I.e., returns `result[i0...iaxis, j0...jN] = pad_value[j0...jN]` - (where `N=rank(pad_value)`). - - Args: - data: The potentially ragged tensor that will be padded. - axis: The axis along which padding will be added. - pad_value: The padding value that should be used, or None if no padding will - be added. `rank(pad_value)` must be `rank(data) - axis`, and - `pad_value.shape[1:]` must be compatible with `data.shape[axis + 1:]`. - - Returns: - A padding tensor with the same rank as `data`, which can be concatenated - to `data` to add padding. - """ - if pad_value is None: - return None - - # Verify shape compatibility. - pad_value.shape[1:].assert_is_compatible_with(data.shape[axis:][1:]) - - if not isinstance(data, ragged_tensor.RaggedTensor): - data_shape = array_ops.shape(data) - pad_shape = array_ops.shape(pad_value) - outer_dimensions = data_shape[:axis] - expanded_pad_shape = array_ops.concat( - [array_ops.ones_like(outer_dimensions), pad_shape], axis=0) - tile_multiples = array_ops.concat( - [outer_dimensions, array_ops.ones_like(pad_shape)], axis=0) - tiled_padding = array_ops.tile( - array_ops.reshape(pad_value, expanded_pad_shape), tile_multiples) - tiled_padding.set_shape(data.shape[:axis].concatenate(pad_value.shape)) - return tiled_padding - - assert axis >= 0 - # Return the padding as-is if we're padding the outermost dimension. - if axis == 0: - return pad_value - - elif axis == 1: - if isinstance(pad_value, ragged_tensor.RaggedTensor): - pad_rank = array_ops.rank(pad_value.flat_values) + pad_value.ragged_rank - pad_nrows = pad_value.nrows() - else: - pad_rank = array_ops.rank(pad_value) - pad_nrows = array_ops.shape(pad_value, out_type=dtypes.int64)[0] - - # Return a RaggedTensor that has the same number of rows as `data`, where - # each row contains a single copy of `pad_value`. - data_nrows = data.nrows() - pad_repeats = array_ops.concat( - [[math_ops.cast(data_nrows, dtypes.int32)], - array_ops.ones([pad_rank - 1], dtypes.int32)], - axis=0) - result_values = array_ops.tile(pad_value, pad_repeats) - return ragged_tensor.RaggedTensor.from_row_splits( - result_values, - math_ops.range(0, data_nrows + 1) * pad_nrows) - - else: # Recurse if axis>1. - return ragged_tensor.RaggedTensor.from_row_splits( - _padding_for_dimension(data.values, axis - 1, pad_value), - data.row_splits)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op_test.py deleted file mode 100644 index 9a0c089..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_along_dimension_op_test.py +++ /dev/null
@@ -1,577 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for pad_along_dimension_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.eager import context -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops import pad_along_dimension_op - - -@test_util.run_all_in_graph_and_eager_modes -class PadAlongDimensionOpTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - - def test_pads_along_positive_inner_dimension(self): - """Test padding along the inner dimension with a positive axis integer.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = 1 - left_pad_value = [0] - right_pad_value = [9] - expected_result = constant_op.constant([[0, 1, 1, 1, 9], [0, 2, 2, 1, 9], - [0, 3, 3, 1, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_pads_along_positive_outer_dimension(self): - """Test padding along the outer dimension with a positive axis integer.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = 0 - left_pad_value = [[0, 0, 0]] - right_pad_value = [[9, 9, 9]] - expected_result = constant_op.constant([[0, 0, 0], [1, 1, 1], [2, 2, 1], - [3, 3, 1], [9, 9, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_pads_along_negative_inner_dimension(self): - """Test padding along the inner dimension with a negative axis integer.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = -1 - left_pad_value = [0] - right_pad_value = [9] - expected_result = constant_op.constant([[0, 1, 1, 1, 9], [0, 2, 2, 1, 9], - [0, 3, 3, 1, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_pads_along_negative_outer_dimension(self): - """Test padding along the outer dimension with a negative axis integer.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = -2 - left_pad_value = [[0, 0, 0]] - right_pad_value = [[9, 9, 9]] - expected_result = constant_op.constant([[0, 0, 0], [1, 1, 1], [2, 2, 1], - [3, 3, 1], [9, 9, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_no_left_padding(self): - """Test that not specifying a left pad means no left padding.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = 1 - right_pad_value = [9] - expected_result = constant_op.constant([[1, 1, 1, 9], [2, 2, 1, 9], - [3, 3, 1, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, axis=axis, right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_no_right_padding(self): - """Test that not specifying a right pad means no right padding.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = 1 - left_pad_value = [0] - expected_result = constant_op.constant([[0, 1, 1, 1], [0, 2, 2, 1], - [0, 3, 3, 1]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, axis=axis, left_pad=left_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_string_padding(self): - """Test padding using string values.""" - data = constant_op.constant([['1', '1', '1'], ['2', '2', '2']]) - axis = 1 - left_pad_value = ['0'] - right_pad_value = ['9'] - expected_result = constant_op.constant([['0', '1', '1', '1', '9'], - ['0', '2', '2', '2', '9']]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_string_partial_no_padding(self): - """Test padding using string values but without one padding value.""" - data = constant_op.constant([['1', '1', '1'], ['2', '2', '2']]) - axis = 1 - left_pad_value = ['0', '0'] - expected_result = constant_op.constant([['0', '0', '1', '1', '1'], - ['0', '0', '2', '2', '2']]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, axis=axis, left_pad=left_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_float_padding(self): - """Test padding using float values.""" - data = constant_op.constant([[1.0, 1.0, 1.0]]) - axis = 1 - left_pad_value = [-3.5] - right_pad_value = [3.5] - expected_result = constant_op.constant([[-3.5, 1.0, 1.0, 1.0, 3.5]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_float_partial_no_padding(self): - """Test padding using float values.""" - data = constant_op.constant([[1.0, 1.0, 1.0]]) - axis = 1 - right_pad_value = [3.5, 3.5, 3.5] - expected_result = constant_op.constant([[1.0, 1.0, 1.0, 3.5, 3.5, 3.5]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, axis=axis, right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_padding_tensor_of_unknown_shape(self): - """Test padding a tensor whose shape is not known at graph building time.""" - data = array_ops.placeholder_with_default( - constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]), shape=None) - axis = 1 - left_pad_value = [0] - right_pad_value = [9] - expected_result = constant_op.constant([[0, 1, 1, 1, 9], [0, 2, 2, 1, 9], - [0, 3, 3, 1, 9]]) - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - self.assertAllEqual(expected_result, padded_result) - - def test_no_padding(self): - """Test padding using string values.""" - data = constant_op.constant([['1', '1', '1'], ['2', '2', '2']]) - axis = 1 - expected_result = data - - padded_result = pad_along_dimension_op.pad_along_dimension( - data=data, axis=axis, left_pad=None, right_pad=None) - - self.assertAllEqual(expected_result, padded_result) - - def test_invalid_axis(self): - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1]]) - axis = -4 - left_pad_value = [0, 0] - right_pad_value = [9, 9, 9] - - error_msg = 'axis must be between -k <= axis <= -1 OR 0 <= axis < k' - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_msg): - _ = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=axis, - left_pad=left_pad_value, - right_pad=right_pad_value) - - error_msg = 'axis must be an int' - with self.assertRaisesRegexp(TypeError, error_msg): - _ = pad_along_dimension_op.pad_along_dimension( - data=data, - axis=constant_op.constant(0), - left_pad=left_pad_value, - right_pad=right_pad_value) - - @parameterized.parameters([ - dict( - descr='docstring example', - data=[['a', 'b', 'c'], ['d'], ['e', 'f']], - axis=1, - left_pad=['<'], - right_pad=['>'], - expected=[[b'<', b'a', b'b', b'c', b'>'], [b'<', b'd', b'>'], - [b'<', b'e', b'f', b'>']]), - #========================================================================= - # axis=0 - #========================================================================= - dict( - descr='2D data, axis=0: left padding only', - data=[[1, 2], [3], [4, 5, 6]], - axis=0, - left_pad=[[0]], - expected=[[0], [1, 2], [3], [4, 5, 6]]), - dict( - descr='2D data, axis=0: right padding only', - data=[[1, 2], [3], [4, 5, 6]], - axis=0, - right_pad=[[9, 99], [999]], - expected=[[1, 2], [3], [4, 5, 6], [9, 99], [999]]), - dict( - descr='2D data, axis=0: pad both sides', - data=[[1, 2], [3], [4, 5, 6]], - axis=0, - left_pad=[[0]], - right_pad=[[9, 99], [999]], - expected=[[0], [1, 2], [3], [4, 5, 6], [9, 99], [999]]), - dict( - descr='3D data, axis=0', - data=[[[1, 2], [3]], [[4]]], - axis=0, - right_pad=[[[9], [99, 999]], [[9999]]], - expected=[[[1, 2], [3]], [[4]], [[9], [99, 999]], [[9999]]]), - dict( - descr='4D data, axis=0', - data=[[[[1, 2]]], [[[4]]]], - axis=0, - left_pad=[[[[9, 9], [9]], [[9]]]], - expected=[[[[9, 9], [9]], [[9]]], [[[1, 2]]], [[[4]]]]), - dict( - descr='2D data, axis=-2: pad both sides', - data=[[1, 2], [3], [4, 5, 6]], - axis=-2, - left_pad=[[0]], - right_pad=[[9, 99], [999]], - expected=[[0], [1, 2], [3], [4, 5, 6], [9, 99], [999]]), - dict( - descr='3D data, axis=-3', - data=[[[1, 2], [3]], [[4]]], - axis=-3, - right_pad=[[[9], [99, 999]], [[9999]]], - expected=[[[1, 2], [3]], [[4]], [[9], [99, 999]], [[9999]]]), - dict( - descr='4D data, axis=-4', - data=[[[[1, 2]]], [[[4]]]], - axis=-4, - left_pad=[[[[9, 9], [9]], [[9]]]], - expected=[[[[9, 9], [9]], [[9]]], [[[1, 2]]], [[[4]]]]), - #========================================================================= - # axis=1 - #========================================================================= - dict( - descr='2D data, axis=1: left padding only', - data=[[1, 2], [3]], - axis=1, - left_pad=[0], - expected=[[0, 1, 2], [0, 3]]), - dict( - descr='2D data, axis=1: right padding only', - data=[[1, 2], [3]], - axis=1, - right_pad=[9, 99], - expected=[[1, 2, 9, 99], [3, 9, 99]]), - dict( - descr='2D data, axis=1: pad both sides', - data=[[1, 2], [3]], - axis=1, - left_pad=[0], - right_pad=[9, 99], - expected=[[0, 1, 2, 9, 99], [0, 3, 9, 99]]), - dict( - descr='3D data, axis=1', - data=[[[1, 2], [3]], [[4]]], - axis=1, - left_pad=[[0]], - right_pad=[[9], [99, 999]], - expected=[[[0], [1, 2], [3], [9], [99, 999]], - [[0], [4], [9], [99, 999]]]), - dict( - descr='4D data, axis=1', - data=[[[[1, 2]]], [[[4]]]], - axis=1, - left_pad=[[[0]]], - right_pad=[[[9]]], - expected=[[[[0]], [[1, 2]], [[9]]], [[[0]], [[4]], [[9]]]]), - dict( - descr='2D data, axis=-1: pad both sides', - data=[[1, 2], [3]], - axis=-1, - left_pad=[0], - right_pad=[9, 99], - expected=[[0, 1, 2, 9, 99], [0, 3, 9, 99]]), - dict( - descr='3D data, axis=-2', - data=[[[1, 2], [3]], [[4]]], - axis=-2, - left_pad=[[0]], - right_pad=[[9], [99, 999]], - expected=[[[0], [1, 2], [3], [9], [99, 999]], - [[0], [4], [9], [99, 999]]]), - dict( - descr='4D data, axis=-3', - data=[[[[1, 2]]], [[[4]]]], - axis=-3, - left_pad=[[[0]]], - right_pad=[[[9]]], - expected=[[[[0]], [[1, 2]], [[9]]], [[[0]], [[4]], [[9]]]]), - #========================================================================= - # axis=2 - #========================================================================= - dict( - descr='3D data, axis=2', - data=[[[1, 2], [3]], [[4]]], - axis=2, - left_pad=[0], - right_pad=[9], - expected=[[[0, 1, 2, 9], [0, 3, 9]], [[0, 4, 9]]]), - dict( - descr='4D data, axis=2', - data=[[[[1, 2], [3]], [[4]]], [[[5]]]], - axis=2, - left_pad=[[0]], - right_pad=[[9]], - expected=[[[[0], [1, 2], [3], [9]], [[0], [4], [9]]], [[[0], [5], - [9]]]]), - dict( - descr='3D data, axis=-1', - data=[[[1, 2], [3]], [[4]]], - axis=-1, - left_pad=[0], - right_pad=[9], - expected=[[[0, 1, 2, 9], [0, 3, 9]], [[0, 4, 9]]]), - dict( - descr='4D data, axis=-2', - data=[[[[1, 2], [3]], [[4]]], [[[5]]]], - axis=-2, - left_pad=[[0]], - right_pad=[[9]], - expected=[[[[0], [1, 2], [3], [9]], [[0], [4], [9]]], [[[0], [5], - [9]]]]), - #========================================================================= - # axis=3 - #========================================================================= - dict( - descr='4D data, axis=3', - data=[[[[1, 2], [3]], [[4, 5, 6]]], [[[7, 8]]]], - axis=3, - left_pad=[0], - right_pad=[9, 99], - expected=[[[[0, 1, 2, 9, 99], [0, 3, 9, 99]], [[0, 4, 5, 6, 9, 99]]], - [[[0, 7, 8, 9, 99]]]]), - dict( - descr='4D data, axis=-1', - data=[[[[1, 2], [3]], [[4, 5, 6]]], [[[7, 8]]]], - axis=-1, - left_pad=[0], - right_pad=[9, 99], - expected=[[[[0, 1, 2, 9, 99], [0, 3, 9, 99]], [[0, 4, 5, 6, 9, 99]]], - [[[0, 7, 8, 9, 99]]]]), - ]) - def testRaggedPadDimension(self, - descr, - data, - axis, - expected, - left_pad=None, - right_pad=None, - ragged_rank=None): - data = self._convert_ragged(data, ragged_rank) - positive_axis = axis if axis >= 0 else axis + data.shape.ndims - assert positive_axis >= 0 - left_pad = self._convert_ragged(left_pad, data.ragged_rank - positive_axis) - right_pad = self._convert_ragged(right_pad, - data.ragged_rank - positive_axis) - padded = pad_along_dimension_op.pad_along_dimension(data, axis, left_pad, - right_pad) - - self.assertAllEqual(padded, expected) - - def testRaggedPadDimensionErrors(self): - ragged_data = ragged_factory_ops.constant([[1, 2], [3, 4]]) - self.assertRaisesRegexp( - errors.InvalidArgumentError, - 'axis must be between -k <= axis <= -1 OR 0 <= axis < k', - pad_along_dimension_op.pad_along_dimension, - ragged_data, - left_pad=[0], - axis=2) - self.assertRaisesRegexp( - ValueError, - r'Shapes .* are incompatible', - pad_along_dimension_op.pad_along_dimension, - ragged_data, - axis=1, - left_pad=ragged_data) - if not context.executing_eagerly(): - self.assertRaisesRegexp( - ValueError, 'axis may not be negative if data is ragged ' - 'and data.ndims is not statically known.', - pad_along_dimension_op.pad_along_dimension, - ragged_tensor.RaggedTensor.from_tensor( - array_ops.placeholder_with_default([[1, 2], [3, 4]], shape=None)), - left_pad=[0], - axis=-1) - - @parameterized.parameters([ - #========================================================================= - # axis=0: pad_value is returned as-is. - #========================================================================= - dict( - descr='2D data, axis=0, len(pad_value)=0', - data=[[1, 2], [3]], - axis=0, - pad_value=[], - expected=[]), - dict( - descr='2D data, axis=0, len(pad_value)=1', - data=[[1, 2], [3]], - axis=0, - pad_value=[[9]], - expected=[[9]]), - dict( - descr='2D data, axis=0, len(pad_value)=2', - data=[[1, 2], [3]], - axis=0, - pad_value=[[9], [99]], - expected=[[9], [99]]), - dict( - descr='3D data, axis=0', - data=[[[1, 2], [3]], [[4]]], - axis=0, - pad_value=[[[9], [99, 999]], [[9999]]], - expected=[[[9], [99, 999]], [[9999]]]), - dict( - descr='4D data, axis=0', - data=[[[[1, 2]]], [[[4]]]], - axis=0, - pad_value=[[[[9, 9], [9]], [[9]]]], - expected=[[[[9, 9], [9]], [[9]]]]), - #========================================================================= - # axis=1: pad_value is repeated for each item in 1st dimension of data - #========================================================================= - dict( - descr='2D data, axis=1, len(pad_value)=1', - data=[[1, 2], [3]], - axis=1, - pad_value=[9], - expected=[[9], [9]]), - dict( - descr='2D data, axis=1, len(pad_value)=2', - data=[[1, 2], [3]], - axis=1, - pad_value=[9, 99], - expected=[[9, 99], [9, 99]]), - dict( - descr='2D data, axis=1, len(pad_value)=0', - data=[[1, 2], [3]], - axis=1, - pad_value=[], - expected=[[], []]), - dict( - descr='3D data, axis=1', - data=[[[1, 2], [3]], [[4]]], - axis=1, - pad_value=[[9], [99, 999]], - expected=[[[9], [99, 999]], [[9], [99, 999]]]), - dict( - descr='4D data, axis=1', - data=[[[[1, 2]]], [[[4]]]], - axis=1, - pad_value=[[[9, 9], [9]]], - expected=[[[[9, 9], [9]]], [[[9, 9], [9]]]]), - #========================================================================= - # axis=2: pad_value is repeated for each item in 2nd dimension of data - #========================================================================= - dict( - descr='3D data, axis=2', - data=[[[1, 2], [3]], [[4]]], - axis=2, - pad_value=[9, 99], - expected=[[[9, 99], [9, 99]], [[9, 99]]]), - dict( - descr='4D data, axis=2', - data=[[[[1, 2], [3]], [[4]]], [[[5]]]], - axis=2, - pad_value=[[9, 99], [999]], - expected=[[[[9, 99], [999]], [[9, 99], [999]]], [[[9, 99], [999]]]]), - #========================================================================= - # axis=3: pad_value is repeated for each item in 3rd dimension of data - #========================================================================= - dict( - descr='4D data, axis=3', - data=[[[[1, 2], [3]], [[4, 5, 6]]], [[[7, 8]]]], - axis=3, - pad_value=[9, 99], - expected=[[[[9, 99], [9, 99]], [[9, 99]]], [[[9, 99]]]]), - ]) - def testPaddingForRaggedDimensionHelper(self, - descr, - data, - axis, - expected, - pad_value=None, - ragged_rank=None): - data = self._convert_ragged(data, ragged_rank) - pad_value = self._convert_ragged(pad_value, data.ragged_rank - axis) - pad = pad_along_dimension_op._padding_for_dimension(data, axis, pad_value) - - self.assertAllEqual(pad, expected) - self.assertEqual(data.shape.ndims, pad.shape.ndims) - - def _convert_ragged(self, value, ragged_rank): - if value is None: - return None - if ragged_rank is None or ragged_rank > 0: - return ragged_factory_ops.constant(value, ragged_rank=ragged_rank) - else: - return constant_op.constant(value) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops.py deleted file mode 100644 index 2eabe9b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops.py +++ /dev/null
@@ -1,87 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Library of ops to pack model inputs.""" -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_tensor - - -def pad_model_inputs(input, max_seq_length, pad_value=0): # pylint: disable=redefined-builtin - r"""Pad model input and generate corresponding input masks. - - `pad_model_inputs` performs the final packaging of a model's inputs commonly - found in text models. This includes padding out (or simply truncating) to a - fixed-size, 2-dimensional `Tensor` and generating mask `Tensor`s (of the same - 2D shape) with values of 0 if the corresponding item is a pad value and 1 if - it is part of the original input. - - Note that a simple truncation strategy (drop everything after max sequence - length) is used to force the inputs to the specified shape. This may be - incorrect and users should instead apply a `Trimmer` upstream to safely - truncate large inputs. - - >>> input_data = tf.ragged.constant([ - ... [101, 1, 2, 102, 10, 20, 102], - ... [101, 3, 4, 102, 30, 40, 50, 60, 70, 80], - ... [101, 5, 6, 7, 8, 9, 102, 70], - ... ], np.int32) - >>> data, mask = pad_model_inputs(input=input_data, max_seq_length=9) - >>> print("data: %s, mask: %s" % (data, mask)) - data: tf.Tensor( - [[101 1 2 102 10 20 102 0 0] - [101 3 4 102 30 40 50 60 70] - [101 5 6 7 8 9 102 70 0]], shape=(3, 9), dtype=int32), - mask: tf.Tensor( - [[1 1 1 1 1 1 1 0 0] - [1 1 1 1 1 1 1 1 1] - [1 1 1 1 1 1 1 1 0]], shape=(3, 9), dtype=int32) - - Args: - input: A `RaggedTensor` with rank >= 2. - max_seq_length: An int, or scalar `Tensor`. The "input" `Tensor` will be - flattened down to 2 dimensions and then have its 2nd dimension either - padded out or truncated to this size. - pad_value: An int or scalar `Tensor` specifying the value used for padding. - - Returns: - A tuple of (padded_input, pad_mask) where: - - padded_input: A `Tensor` corresponding to `inputs` that has been - padded/truncated out to a fixed size and flattened to 2 - dimensions. - pad_mask: A `Tensor` corresponding to `padded_input` whose values are - 0 if the corresponding item is a pad value and 1 if it is not. - """ - with ops.name_scope("pad_model_inputs"): - # Verify that everything is a RaggedTensor - if not isinstance(input, ragged_tensor.RaggedTensor): - raise TypeError("Expecting a `RaggedTensor`, instead found: " + - str(input)) - - # Flatten down to `merge_axis` - input = input.merge_dims(1, -1) if input.ragged_rank > 1 else input - - # Pad to fixed Tensor - target_shape = math_ops.cast([-1, max_seq_length], dtypes.int64) - padded_input = input.to_tensor(shape=target_shape, default_value=pad_value) - - # Get padded input mask - input_mask = array_ops.ones_like(input) - padded_input_mask = input_mask.to_tensor(shape=target_shape) - - return padded_input, padded_input_mask
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops_test.py deleted file mode 100644 index a47921b8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pad_model_inputs_ops_test.py +++ /dev/null
@@ -1,121 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ops to pack model inputs.""" -from absl.testing import parameterized -import numpy as np - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import pad_model_inputs_ops - - -class ModelInputPackerTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - # Test out padding out when sequence length < max_seq_length. - dict( - pack_inputs=[ - [101, 1, 2, 102, 10, 20, 102], - [101, 3, 4, 102, 30, 40, 50, 60], - [101, 5, 6, 7, 8, 9, 102, 70], - ], - max_seq_length=10, - expected=[[101, 1, 2, 102, 10, 20, 102, 0, 0, 0], - [101, 3, 4, 102, 30, 40, 50, 60, 0, 0], - [101, 5, 6, 7, 8, 9, 102, 70, 0, 0]], - expected_mask=[[1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0]], - ), - dict( - pack_inputs=[ - [0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 1], - ], - expected=[ - [0, 0, 0, 0, 1, 1, 1, 0, 0, 0], - [0, 0, 0, 0, 1, 1, 1, 1, 0, 0], - [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], - ], - expected_mask=[ - [1, 1, 1, 1, 1, 1, 1, 0, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - [1, 1, 1, 1, 1, 1, 1, 1, 0, 0], - ], - max_seq_length=10, - ), - # Test out truncation when sequence length > max_seq_length. - dict( - pack_inputs=[ - [0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 1], - ], - expected=[ - [0, 0, 0, 0, 1], - [0, 0, 0, 0, 1], - [0, 0, 0, 0, 0], - ], - expected_mask=[ - [1, 1, 1, 1, 1], - [1, 1, 1, 1, 1], - [1, 1, 1, 1, 1], - ], - max_seq_length=5, - ), - ]) - def testPadModelInputs(self, - pack_inputs, - expected, - expected_mask, - max_seq_length=10): - # Pack everything as a RaggedTensor. - pack_inputs = ragged_factory_ops.constant(pack_inputs) - - # Pad to max_seq_length and construct input_mask - actual_padded, actual_mask = pad_model_inputs_ops.pad_model_inputs( - pack_inputs, max_seq_length=max_seq_length, pad_value=0) - - # Verify the contents of all the padded (and maybe truncated) values as well - # as the mask. - self.assertAllEqual(expected, actual_padded) - self.assertAllEqual(expected_mask, actual_mask) - - @parameterized.named_parameters([ - ("PythonInt", lambda l: l), - ("NpInt32", lambda l: np.array(l, np.int32)), - ("NpInt64", lambda l: np.array(l, np.int64)), - ("TfInt32", lambda l: constant_op.constant(l, dtypes.int32)), - ("TfInt64", lambda l: constant_op.constant(l, dtypes.int64)), - ]) - def testLengthType(self, length_fn): - """Tests types beyond Python int for the max_seq_length argument.""" - pack_inputs = ragged_factory_ops.constant([[1, 2, 3, 4, 5], - [8, 9]], dtypes.int32) - max_seq_length = length_fn(3) - expected_padded = [[1, 2, 3], [8, 9, 0]] - expected_mask = [[1, 1, 1], [1, 1, 0]] - actual_padded, actual_mask = pad_model_inputs_ops.pad_model_inputs( - pack_inputs, max_seq_length=max_seq_length, pad_value=0) - self.assertAllEqual(expected_padded, actual_padded) - self.assertAllEqual(expected_mask, actual_mask) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pointer_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/pointer_ops.py deleted file mode 100644 index a220ca5..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/pointer_ops.py +++ /dev/null
@@ -1,587 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops that consume or generate index-based pointers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import functools - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_functional_ops -from tensorflow.python.ops.ragged import ragged_gather_ops -from tensorflow.python.ops.ragged import ragged_math_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged import ragged_where_op -from tensorflow.python.ops.ragged import segment_id_ops - - -def gather_with_default(params, indices, default, name=None, axis=0): - """Gather slices with `indices=-1` mapped to `default`. - - This operation is similar to `tf.gather()`, except that any value of `-1` - in `indices` will be mapped to `default`. Example: - - >>> gather_with_default(['a', 'b', 'c', 'd'], [2, 0, -1, 2, -1], '_') - <tf.Tensor: shape=(5,), dtype=string, - numpy=array([b'c', b'a', b'_', b'c', b'_'], dtype=object)> - - Args: - params: The `Tensor` from which to gather values. Must be at least rank - `axis + 1`. - indices: The index `Tensor`. Must have dtype `int32` or `int64`, and values - must be in the range `[-1, params.shape[axis])`. - default: The value to use when `indices` is `-1`. `default.shape` must - be equal to `params.shape[axis + 1:]`. - name: A name for the operation (optional). - axis: The axis in `params` to gather `indices` from. Must be a scalar - `int32` or `int64`. Supports negative indices. - - Returns: - A `Tensor` with the same type as `param`, and with shape - `params.shape[:axis] + indices.shape + params.shape[axis + 1:]`. - """ - # This implementation basically just concatenates the default value and - # the params together, and then uses gather(default_plus_params, indices + 1) - # to get the appropriate values. Most of the complexity below has to do - # with properly handling cases where axis != 0, in which case we need to tile - # the default before concatenating it. - with ops.name_scope(name, 'GatherWithDefault', - [params, indices, default, axis]): - # Convert inputs to tensors. - indices = ops.convert_to_tensor( - indices, name='indices', preferred_dtype=dtypes.int32) - params = ops.convert_to_tensor(params, name='params') - default = ops.convert_to_tensor(default, name='default', dtype=params.dtype) - - if axis == 0: - tiled_default = array_ops.stack([default]) - - else: - # Get ranks & shapes of inputs. - params_rank = array_ops.rank(params) - params_shape = array_ops.shape(params) - default_shape = array_ops.shape(default) - outer_params_shape = params_shape[:axis] - - # This will equal `axis` if axis>=0. - outer_params_rank = array_ops.shape(outer_params_shape)[0] - - # Add dimensions (with size=1) to default, so its rank matches params. - new_shape = array_ops.concat([ - array_ops.ones([outer_params_rank + 1], dtypes.int32), default_shape - ], - axis=0) - reshaped_default = array_ops.reshape(default, new_shape) - - # Tile the default for any dimension dim<axis, so its size matches params. - multiples = array_ops.concat([ - outer_params_shape, - array_ops.ones(params_rank - outer_params_rank, dtypes.int32) - ], - axis=0) - tiled_default = array_ops.tile(reshaped_default, multiples) - - # Prepend the default value to params (on the chosen axis). Thus, the - # default value is at index 0, and all other values have their index - # incremented by one. - default_plus_params = array_ops.concat([tiled_default, params], axis=axis) - return array_ops.gather(default_plus_params, indices + 1, axis=axis) - - -def span_overlaps(source_start, - source_limit, - target_start, - target_limit, - contains=False, - contained_by=False, - partial_overlap=False, - name=None): - """Returns a boolean tensor indicating which source and target spans overlap. - - The source and target spans are specified using B+1 dimensional tensors, - with `B>=0` batch dimensions followed by a final dimension that lists the - span offsets for each span in the batch: - - * The `i`th source span in batch `b1...bB` starts at - `source_start[b1...bB, i]` (inclusive), and extends to just before - `source_limit[b1...bB, i]` (exclusive). - * The `j`th target span in batch `b1...bB` starts at - `target_start[b1...bB, j]` (inclusive), and extends to just before - `target_limit[b1...bB, j]` (exclusive). - - `result[b1...bB, i, j]` is true if the `i`th source span overlaps with the - `j`th target span in batch `b1...bB`, where a source span overlaps a target - span if any of the following are true: - - * The spans are identical. - * `contains` is true, and the source span contains the target span. - * `contained_by` is true, and the source span is contained by the target - span. - * `partial_overlap` is true, and there is a non-zero overlap between the - source span and the target span. - - #### Example: - Given the following source and target spans (with no batch dimensions): - - >>> # 0 5 10 15 20 25 30 35 40 - >>> # |====|====|====|====|====|====|====|====| - >>> # Source: [-0-] [-1-] [2] [-3-][-4-][-5-] - >>> # Target: [-0-][-1-] [-2-] [3] [-4-][-5-] - >>> # |====|====|====|====|====|====|====|====| - >>> source_start = [0, 10, 16, 20, 25, 30] - >>> source_limit = [5, 15, 19, 25, 30, 35] - >>> target_start = [0, 5, 15, 21, 27, 31] - >>> target_limit = [5, 10, 20, 24, 32, 37] - - `result[i, j]` will be true at the following locations: - - * `[0, 0]` (always) - * `[2, 2]` (if contained_by=True or partial_overlaps=True) - * `[3, 3]` (if contains=True or partial_overlaps=True) - * `[4, 4]` (if partial_overlaps=True) - * `[5, 4]` (if partial_overlaps=True) - * `[5, 5]` (if partial_overlaps=True) - - Args: - source_start: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, source_size]`: the start offset of each source span. - source_limit: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, source_size]`: the limit offset of each source span. - target_start: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, target_size]`: the start offset of each target span. - target_limit: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, target_size]`: the limit offset of each target span. - contains: If true, then a source span is considered to overlap a target span - when the source span contains the target span. - contained_by: If true, then a source span is considered to overlap a target - span when the source span is contained by the target span. - partial_overlap: If true, then a source span is considered to overlap a - target span when the source span partially overlaps the target span. - name: A name for the operation (optional). - - Returns: - A B+2 dimensional potentially ragged boolean tensor with shape - `[D1...DB, source_size, target_size]`. - - Raises: - ValueError: If the span tensors are incompatible. - """ - _check_type(contains, 'contains', bool) - _check_type(contained_by, 'contained_by', bool) - _check_type(partial_overlap, 'partial_overlap', bool) - - scope_tensors = [source_start, source_limit, target_start, target_limit] - with ops.name_scope(name, 'SpanOverlaps', scope_tensors): - # Convert input tensors. - source_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( - source_start, name='source_start') - source_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( - source_limit, name='source_limit') - target_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( - target_start, name='target_start') - target_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( - target_limit, name='target_limit') - span_tensors = [source_start, source_limit, target_start, target_limit] - - # Verify input tensor shapes and types. - source_start.shape.assert_is_compatible_with(source_limit.shape) - target_start.shape.assert_is_compatible_with(target_limit.shape) - source_start.shape.assert_same_rank(target_start.shape) - source_start.shape.assert_same_rank(target_limit.shape) - source_limit.shape.assert_same_rank(target_start.shape) - source_limit.shape.assert_same_rank(target_limit.shape) - if not (source_start.dtype == target_start.dtype == source_limit.dtype == - target_limit.dtype): - raise TypeError('source_start, source_limit, target_start, and ' - 'target_limit must all have the same dtype') - ndims = set( - [t.shape.ndims for t in span_tensors if t.shape.ndims is not None]) - assert len(ndims) <= 1 # because of assert_same_rank statements above. - - if all(not isinstance(t, ragged_tensor.RaggedTensor) for t in span_tensors): - return _span_overlaps(source_start, source_limit, target_start, - target_limit, contains, contained_by, - partial_overlap) - - elif all(isinstance(t, ragged_tensor.RaggedTensor) for t in span_tensors): - if not ndims: - raise ValueError('For ragged inputs, the shape.ndims of at least one ' - 'span tensor must be statically known.') - if list(ndims)[0] == 2: - return _span_overlaps(source_start, source_limit, target_start, - target_limit, contains, contained_by, - partial_overlap) - else: - # Handle ragged batch dimension by recursion on values. - row_splits = span_tensors[0].row_splits - shape_checks = [ - check_ops.assert_equal( - t.row_splits, - row_splits, - message='Mismatched ragged shapes for batch dimensions') - for t in span_tensors[1:] - ] - with ops.control_dependencies(shape_checks): - return ragged_tensor.RaggedTensor.from_row_splits( - span_overlaps(source_start.values, source_limit.values, - target_start.values, target_limit.values, contains, - contained_by, partial_overlap), row_splits) - - else: - # Mix of dense and ragged tensors. - raise ValueError('Span tensors must all have the same ragged_rank') - - -def _span_overlaps(source_start, source_limit, target_start, target_limit, - contains, contained_by, partial_overlap): - """Implementation of span_overlaps(). - - If the inputs are ragged, then the source tensors must have exactly one - batch dimension. (I.e., `B=1` in the param descriptions below.) - - Args: - source_start: `<int>[D1...DB, source_size]` - source_limit: `<int>[D1...DB, source_size]` - target_start: `<int>[D1...DB, target_size]` - target_limit: `<int>[D1...DB, target_size]` - contains: `bool` - contained_by: `bool` - partial_overlap: `bool` - - Returns: - `<bool>[D1...DB, source_size, target_size]` - """ - if isinstance(source_start, ops.Tensor): - # Reshape the source tensors to [D1...DB, source_size, 1] and the - # target tensors to [D1...DB, 1, target_size], so we can use broadcasting. - # In particular, elementwise_op(source_x, target_x) will have shape - # [D1...DB, source_size, target_size]. - source_start = array_ops.expand_dims(source_start, -1) - source_limit = array_ops.expand_dims(source_limit, -1) - target_start = array_ops.expand_dims(target_start, -2) - target_limit = array_ops.expand_dims(target_limit, -2) - - equal = math_ops.equal - less_equal = math_ops.less_equal - less = math_ops.less - logical_and = math_ops.logical_and - logical_or = math_ops.logical_or - - else: - # Broadcast the source span indices to all have shape - # [batch_size, (source_size), (target_size)]. - (source_start, source_limit) = _broadcast_ragged_sources_for_overlap( - source_start, source_limit, target_start.row_splits) - (target_start, target_limit) = _broadcast_ragged_targets_for_overlap( - target_start, target_limit, source_start.row_splits) - - # Use map_flat_values to perform elementwise operations. - equal = functools.partial(ragged_functional_ops.map_flat_values, - math_ops.equal) - less_equal = functools.partial(ragged_functional_ops.map_flat_values, - math_ops.less_equal) - less = functools.partial(ragged_functional_ops.map_flat_values, - math_ops.less) - logical_and = functools.partial(ragged_functional_ops.map_flat_values, - math_ops.logical_and) - logical_or = functools.partial(ragged_functional_ops.map_flat_values, - math_ops.logical_or) - - if partial_overlap: - return logical_or( - logical_and( - less_equal(source_start, target_start), - less(target_start, source_limit)), - logical_and( - less_equal(target_start, source_start), - less(source_start, target_limit))) - elif contains and contained_by: - return logical_or( - logical_and( - less_equal(source_start, target_start), - less_equal(target_limit, source_limit)), - logical_and( - less_equal(target_start, source_start), - less_equal(source_limit, target_limit))) - elif contains: - return logical_and( - less_equal(source_start, target_start), - less_equal(target_limit, source_limit)) - elif contained_by: - return logical_and( - less_equal(target_start, source_start), - less_equal(source_limit, target_limit)) - else: - return logical_and( - equal(target_start, source_start), equal(source_limit, target_limit)) - - -def _broadcast_ragged_targets_for_overlap(target_start, target_limit, - source_splits): - """Repeats target indices for each source item in the same batch. - - Args: - target_start: `<int>[batch_size, (target_size)]` - target_limit: `<int>[batch_size, (target_size)]` - source_splits: `<int64>[batch_size, (source_size+1)]` - - Returns: - `<int>[batch_size, (source_size), (target_size)]`. - A tuple of ragged tensors `(tiled_target_start, tiled_target_limit)` where: - - * `tiled_target_start[b, s, t] = target_start[b, t]` - * `tiled_target_limit[b, s, t] = target_limit[b, t]` - """ - source_batch_ids = segment_id_ops.row_splits_to_segment_ids(source_splits) - - target_start = ragged_tensor.RaggedTensor.from_value_rowids( - ragged_gather_ops.gather(target_start, source_batch_ids), - source_batch_ids) - target_limit = ragged_tensor.RaggedTensor.from_value_rowids( - ragged_gather_ops.gather(target_limit, source_batch_ids), - source_batch_ids) - return (target_start, target_limit) - - -def _broadcast_ragged_sources_for_overlap(source_start, source_limit, - target_splits): - """Repeats source indices for each target item in the same batch. - - Args: - source_start: `<int>[batch_size, (source_size)]` - source_limit: `<int>[batch_size, (source_size)]` - target_splits: `<int64>[batch_size, (target_size+1)]` - - Returns: - `<int>[batch_size, (source_size), (target_size)]`. - A tuple of tensors `(tiled_source_start, tiled_source_limit)` where: - - * `tiled_target_start[b, s, t] = source_start[b, s]` - * `tiled_target_limit[b, s, t] = source_limit[b, s]` - """ - source_splits = source_start.row_splits - target_rowlens = target_splits[1:] - target_splits[:-1] - source_batch_ids = segment_id_ops.row_splits_to_segment_ids(source_splits) - - # <int64>[sum(source_size[b] for b in range(batch_size))] - # source_repeats[i] is the number of target spans in the batch that contains - # source span i. We need to add a new ragged dimension that repeats each - # source span this number of times. - source_repeats = ragged_gather_ops.gather(target_rowlens, source_batch_ids) - - # <int64>[sum(source_size[b] for b in range(batch_size)) + 1] - # The row_splits tensor for the inner ragged dimension of the result tensors. - inner_splits = array_ops.concat([[0], math_ops.cumsum(source_repeats)], - axis=0) - - # <int64>[sum(source_size[b] * target_size[b] for b in range(batch_size))] - # Indices for gathering source indices. - source_indices = segment_id_ops.row_splits_to_segment_ids(inner_splits) - - source_start = ragged_tensor.RaggedTensor.from_nested_row_splits( - array_ops.gather(source_start.values, source_indices), - [source_splits, inner_splits]) - source_limit = ragged_tensor.RaggedTensor.from_nested_row_splits( - array_ops.gather(source_limit.values, source_indices), - [source_splits, inner_splits]) - - return source_start, source_limit - - -def span_alignment(source_start, - source_limit, - target_start, - target_limit, - contains=False, - contained_by=False, - partial_overlap=False, - multivalent_result=False, - name=None): - """Return an alignment from a set of source spans to a set of target spans. - - The source and target spans are specified using B+1 dimensional tensors, - with `B>=0` batch dimensions followed by a final dimension that lists the - span offsets for each span in the batch: - - * The `i`th source span in batch `b1...bB` starts at - `source_start[b1...bB, i]` (inclusive), and extends to just before - `source_limit[b1...bB, i]` (exclusive). - * The `j`th target span in batch `b1...bB` starts at - `target_start[b1...bB, j]` (inclusive), and extends to just before - `target_limit[b1...bB, j]` (exclusive). - - `result[b1...bB, i]` contains the index (or indices) of the target span that - overlaps with the `i`th source span in batch `b1...bB`. The - `multivalent_result` parameter indicates whether the result should contain - a single span that aligns with the source span, or all spans that align with - the source span. - - * If `multivalent_result` is false (the default), then `result[b1...bB, i]=j` - indicates that the `j`th target span overlaps with the `i`th source span - in batch `b1...bB`. If no target spans overlap with the `i`th target span, - then `result[b1...bB, i]=-1`. - - * If `multivalent_result` is true, then `result[b1...bB, i, n]=j` indicates - that the `j`th target span is the `n`th span that overlaps with the `i`th - source span in in batch `b1...bB`. - - For a definition of span overlap, see the docstring for `span_overlaps()`. - - #### Examples: - - Given the following source and target spans (with no batch dimensions): - - >>> # 0 5 10 15 20 25 30 35 40 45 50 55 60 - >>> # |====|====|====|====|====|====|====|====|====|====|====|====| - >>> # Source: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - >>> # Target: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-][10] - >>> # |====|====|====|====|====|====|====|====|====|====|====|====| - >>> source_starts = [0, 10, 16, 20, 27, 30, 35, 40, 45, 50] - >>> source_limits = [5, 15, 19, 23, 30, 35, 40, 45, 50, 55] - >>> target_starts = [0, 5, 15, 20, 25, 31, 35, 42, 47, 52, 57] - >>> target_limits = [5, 10, 20, 25, 30, 34, 38, 45, 52, 57, 61] - >>> span_alignment(source_starts, source_limits, target_starts, target_limits) - <tf.Tensor: shape=(10,), dtype=int64, - numpy=array([ 0, -1, -1, -1, -1, -1, -1, -1, -1, -1])> - >>> span_alignment(source_starts, source_limits, target_starts, target_limits, - ... multivalent_result=True) - <tf.RaggedTensor [[0], [], [], [], [], [], [], [], [], []]> - >>> span_alignment(source_starts, source_limits, target_starts, target_limits, - ... contains=True) - <tf.Tensor: shape=(10,), dtype=int64, - numpy=array([ 0, -1, -1, -1, -1, 5, 6, 7, -1, -1])> - >>> span_alignment(source_starts, source_limits, target_starts, target_limits, - ... partial_overlap=True, multivalent_result=True) - <tf.RaggedTensor [[0], [], [2], [3], [4], [5], [6], [7], [8], [8, 9]]> - - Args: - source_start: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, source_size]`: the start offset of each source span. - source_limit: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, source_size]`: the limit offset of each source span. - target_start: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, target_size]`: the start offset of each target span. - target_limit: A B+1 dimensional potentially ragged tensor with shape - `[D1...DB, target_size]`: the limit offset of each target span. - contains: If true, then a source span is considered to overlap a target span - when the source span contains the target span. - contained_by: If true, then a source span is considered to overlap a target - span when the source span is contained by the target span. - partial_overlap: If true, then a source span is considered to overlap a - target span when the source span partially overlaps the target span. - multivalent_result: Whether the result should contain a single target span - index (if `multivalent_result=False`) or a list of target span indices (if - `multivalent_result=True`) for each source span. - name: A name for the operation (optional). - - Returns: - An int64 tensor with values in the range: `-1 <= result < target_size`. - If `multivalent_result=False`, then the returned tensor has shape - `[source_size]`, where `source_size` is the length of the `source_start` - and `source_limit` input tensors. If `multivalent_result=True`, then the - returned tensor has shape `[source_size, (num_aligned_target_spans)]. - """ - scope_tensors = [source_start, source_limit, target_start, target_limit] - with ops.name_scope(name, 'SpanAlignment', scope_tensors): - source_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( - source_start, name='source_start') - source_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( - source_limit, name='source_limit') - target_start = ragged_tensor.convert_to_tensor_or_ragged_tensor( - target_start, name='target_start') - target_limit = ragged_tensor.convert_to_tensor_or_ragged_tensor( - target_limit, name='target_limit') - - # <bool>[D1...DB, source_size, target_size] - # overlaps[b1...bB, i, j] is true if source span i overlaps target span j - # (in batch b1...bB). - overlaps = span_overlaps(source_start, source_limit, target_start, - target_limit, contains, contained_by, - partial_overlap) - - # <int64>[D1...DB, source_size, (num_aligned_spans)] - # alignment[b1...bB, i, n]=j if target span j is the n'th target span - # that aligns with source span i (in batch b1...bB). - alignment = _multivalent_span_alignment(overlaps) - - if not multivalent_result: - # <int64>[D1...DB, source_size] - # alignment[b1...bB, i]=j if target span j is the last target span - # that aligns with source span i, or -1 if no target spans align. - alignment = ragged_functional_ops.map_flat_values( - math_ops.maximum, ragged_math_ops.reduce_max(alignment, axis=-1), -1) - return alignment - - -def _multivalent_span_alignment(overlaps): - """Returns the multivalent span alignment for a given overlaps tensor. - - Args: - overlaps: `<int64>[D1...DB, source_size, target_size]`: `overlaps[b1...bB, - i, j]` is true if source span `i` overlaps target span `j` (in batch - `b1...bB`). - - Returns: - `<int64>[D1...DB, source_size, (num_aligned_spans)]`: - `result[b1...bB, i, n]=j` if target span `j` is the `n`'th target span - that aligns with source span `i` (in batch `b1...bB`). - """ - overlaps_ndims = overlaps.shape.ndims - assert overlaps_ndims is not None # guaranteed/checked by span_overlaps() - assert overlaps_ndims >= 2 - - # If there are multiple batch dimensions, then flatten them and recurse. - if overlaps_ndims > 3: - if not isinstance(overlaps, ragged_tensor.RaggedTensor): - overlaps = ragged_tensor.RaggedTensor.from_tensor( - overlaps, ragged_rank=overlaps.shape.ndims - 3) - return overlaps.with_values(_multivalent_span_alignment(overlaps.values)) - - elif overlaps_ndims == 2: # no batch dimension - assert not isinstance(overlaps, ragged_tensor.RaggedTensor) - overlap_positions = array_ops.where(overlaps) - return ragged_tensor.RaggedTensor.from_value_rowids( - values=overlap_positions[:, 1], - value_rowids=overlap_positions[:, 0], - nrows=array_ops.shape(overlaps, out_type=dtypes.int64)[0]) - - else: # batch dimension - if not isinstance(overlaps, ragged_tensor.RaggedTensor): - overlaps = ragged_tensor.RaggedTensor.from_tensor(overlaps, ragged_rank=1) - overlap_positions = ragged_where_op.where(overlaps.values) - if isinstance(overlaps.values, ragged_tensor.RaggedTensor): - overlaps_values_nrows = overlaps.values.nrows() - else: - overlaps_values_nrows = array_ops.shape(overlaps.values, - out_type=dtypes.int64)[0] - return overlaps.with_values( - ragged_tensor.RaggedTensor.from_value_rowids( - values=overlap_positions[:, 1], - value_rowids=overlap_positions[:, 0], - nrows=overlaps_values_nrows)) - - -def _check_type(value, name, expected_type): - """Raises TypeError if not isinstance(value, expected_type).""" - if not isinstance(value, expected_type): - raise TypeError('%s must be %s, not %s' % (name, expected_type.__name__, - type(value).__name__))
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops.py deleted file mode 100644 index 5e21e532..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops.py +++ /dev/null
@@ -1,250 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Lint as: python3 -"""This file contains the python libraries for the regex_split op.""" -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_regex_split_ops = load_library.load_op_library(resource_loader.get_path_to_datafile('_regex_split_ops.so')) -from tensorflow_text.python.ops import splitter - - -# pylint: disable= redefined-builtin -def regex_split_with_offsets(input, - delim_regex_pattern, - keep_delim_regex_pattern="", - name=None): - r"""Split `input` by delimiters that match a regex pattern; returns offsets. - - `regex_split_with_offsets` will split `input` using delimiters that match a - regex pattern in `delim_regex_pattern`. It will return three tensors: - one containing the split substrings ('result' in the examples below), one - containing the offsets of the starts of each substring ('begin' in the - examples below), and one containing the offsets of the ends of each substring - ('end' in the examples below). - - Here is an example: - - >>> text_input=["hello there"] - >>> # split by whitespace - >>> result, begin, end = regex_split_with_offsets(input=text_input, - ... delim_regex_pattern="\s") - >>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) - result: <tf.RaggedTensor [[b'hello', b'there']]> - begin: <tf.RaggedTensor [[0, 6]]> - end: <tf.RaggedTensor [[5, 11]]> - - By default, delimiters are not included in the split string results. - Delimiters may be included by specifying a regex pattern - `keep_delim_regex_pattern`. For example: - - >>> text_input=["hello there"] - >>> # split by whitespace - >>> result, begin, end = regex_split_with_offsets(input=text_input, - ... delim_regex_pattern="\s", - ... keep_delim_regex_pattern="\s") - >>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) - result: <tf.RaggedTensor [[b'hello', b' ', b'there']]> - begin: <tf.RaggedTensor [[0, 5, 6]]> - end: <tf.RaggedTensor [[5, 6, 11]]> - - If there are multiple delimiters in a row, there are no empty splits emitted. - For example: - - >>> text_input=["hello there"] # Note the two spaces between the words. - >>> # split by whitespace - >>> result, begin, end = regex_split_with_offsets(input=text_input, - ... delim_regex_pattern="\s") - >>> print("result: %s\nbegin: %s\nend: %s" % (result, begin, end)) - result: <tf.RaggedTensor [[b'hello', b'there']]> - begin: <tf.RaggedTensor [[0, 7]]> - end: <tf.RaggedTensor [[5, 12]]> - - See https://github.com/google/re2/wiki/Syntax for the full list of supported - expressions. - - Args: - input: A Tensor or RaggedTensor of string input. - delim_regex_pattern: A string containing the regex pattern of a delimiter. - keep_delim_regex_pattern: (optional) Regex pattern of delimiters that should - be kept in the result. - name: (optional) Name of the op. - - Returns: - A tuple of RaggedTensors containing: - (split_results, begin_offsets, end_offsets) - where tokens is of type string, begin_offsets and end_offsets are of type - int64. - """ - # Convert input to ragged or tensor - input = ragged_tensor.convert_to_tensor_or_ragged_tensor( - input, dtype=dtypes.string) - - # Handle RaggedTensor inputs by recursively processing the `flat_values`. - if ragged_tensor.is_ragged(input): - # Split the `flat_values` of the input. - tokens, begin_offsets, end_offsets = regex_split_with_offsets( - input.flat_values, delim_regex_pattern, keep_delim_regex_pattern, name) - # Copy outer dimenion partitions from `input` to the output tensors. - tokens_rt = input.with_flat_values(tokens) - begin_offsets_rt = input.with_flat_values(begin_offsets) - end_offsets_rt = input.with_flat_values(end_offsets) - return tokens_rt, begin_offsets_rt, end_offsets_rt - - delim_regex_pattern = b"".join( - [b"(", delim_regex_pattern.encode("utf-8"), b")"]) - keep_delim_regex_pattern = b"".join( - [b"(", keep_delim_regex_pattern.encode("utf-8"), b")"]) - - # reshape to a flat Tensor (if not already) - input_shape = math_ops.cast(array_ops.shape(input), dtypes.int64) - input_reshaped = array_ops.reshape(input, [-1]) - - # send flat_values to regex_split op. - tokens, begin_offsets, end_offsets, row_splits = ( - gen_regex_split_ops.regex_split_with_offsets(input_reshaped, - delim_regex_pattern, - keep_delim_regex_pattern)) - # Pack back into ragged tensors - tokens_rt = ragged_tensor.RaggedTensor.from_row_splits( - tokens, row_splits=row_splits) - begin_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( - begin_offsets, - row_splits=row_splits) - end_offsets_rt = ragged_tensor.RaggedTensor.from_row_splits( - end_offsets, row_splits=row_splits) - - # If the original input was a multi-dimensional Tensor, add back the - # dimensions - static_rank = input.get_shape().ndims - if static_rank is not None and static_rank > 1: - i = array_ops.get_positive_axis(-1, input.get_shape().ndims) - for i in range( - array_ops.get_positive_axis(-1, - input.get_shape().ndims), 0, -1): - tokens_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( - values=tokens_rt, uniform_row_length=input_shape[i]) - begin_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( - values=begin_offsets_rt, uniform_row_length=input_shape[i]) - end_offsets_rt = ragged_tensor.RaggedTensor.from_uniform_row_length( - values=end_offsets_rt, uniform_row_length=input_shape[i]) - return tokens_rt, begin_offsets_rt, end_offsets_rt - - -# pylint: disable= redefined-builtin -def regex_split(input, - delim_regex_pattern, - keep_delim_regex_pattern="", - name=None): - r"""Split `input` by delimiters that match a regex pattern. - - `regex_split` will split `input` using delimiters that match a - regex pattern in `delim_regex_pattern`. Here is an example: - - >>> text_input=["hello there"] - >>> # split by whitespace - >>> regex_split(input=text_input, - ... delim_regex_pattern="\s") - <tf.RaggedTensor [[b'hello', b'there']]> - - By default, delimiters are not included in the split string results. - Delimiters may be included by specifying a regex pattern - `keep_delim_regex_pattern`. For example: - - >>> text_input=["hello there"] - >>> # split by whitespace - >>> regex_split(input=text_input, - ... delim_regex_pattern="\s", - ... keep_delim_regex_pattern="\s") - <tf.RaggedTensor [[b'hello', b' ', b'there']]> - - If there are multiple delimiters in a row, there are no empty splits emitted. - For example: - - >>> text_input=["hello there"] # Note the two spaces between the words. - >>> # split by whitespace - >>> regex_split(input=text_input, - ... delim_regex_pattern="\s") - <tf.RaggedTensor [[b'hello', b'there']]> - - - See https://github.com/google/re2/wiki/Syntax for the full list of supported - expressions. - - Args: - input: A Tensor or RaggedTensor of string input. - delim_regex_pattern: A string containing the regex pattern of a delimiter. - keep_delim_regex_pattern: (optional) Regex pattern of delimiters that should - be kept in the result. - name: (optional) Name of the op. - - Returns: - A RaggedTensors containing of type string containing the split string - pieces. - """ - tokens, _, _ = regex_split_with_offsets(input, delim_regex_pattern, - keep_delim_regex_pattern, name) - return tokens - - -class RegexSplitter(splitter.SplitterWithOffsets): - r"""`RegexSplitter` splits text on the given regular expression. - - The default is a newline character pattern. It can also return the beginning - and ending byte offsets as well. - - By default, this splitter will break on newlines, ignoring any trailing ones. - >>> splitter = RegexSplitter() - >>> text_input=[ - ... b"Hi there.\nWhat time is it?\nIt is gametime.", - ... b"Who let the dogs out?\nWho?\nWho?\nWho?\n\n", - ... ] - >>> splitter.split(text_input) - <tf.RaggedTensor [[b'Hi there.', b'What time is it?', b'It is gametime.'], - [b'Who let the dogs out?', b'Who?', b'Who?', b'Who?']]> - - The splitter can be passed a custom split pattern, as well. The pattern - can be any string, but we're using a single character (tab) in this example. - >>> splitter = RegexSplitter(split_regex='\t') - >>> text_input=[ - ... b"Hi there.\tWhat time is it?\tIt is gametime.", - ... b"Who let the dogs out?\tWho?\tWho?\tWho?\t\t", - ... ] - >>> splitter.split(text_input) - <tf.RaggedTensor [[b'Hi there.', b'What time is it?', b'It is gametime.'], - [b'Who let the dogs out?', b'Who?', b'Who?', b'Who?']]> - - """ - - def __init__(self, split_regex=None): - r"""Creates an instance of `RegexSplitter`. - - Args: - split_regex: (optional) A string containing the regex pattern of a - delimiter to split on. Default is '\r?\n'. - """ - if not split_regex: - split_regex = "\r?\n" - self._split_regex = split_regex - - def split(self, input): # pylint: disable=redefined-builtin - return regex_split(input, self._split_regex) - - def split_with_offsets(self, input): # pylint: disable=redefined-builtin - return regex_split_with_offsets(input, self._split_regex)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops_test.py deleted file mode 100644 index 6de97a8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/regex_split_ops_test.py +++ /dev/null
@@ -1,280 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -# Lint as: python3 -"""Tests for regex_split and regex_split_with_offsets ops.""" -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops import regex_split_ops - - -def _utf8(char): - return char.encode("utf-8") - - -# TODO(thuang513): It appears there isn't a Ragged version of substr; consider -# checking this into core TF. -def _ragged_substr(text_input, begin, size): - if not (isinstance(text_input, ragged_tensor.RaggedTensor) or - isinstance(begin, ragged_tensor.RaggedTensor) or - isinstance(size, ragged_tensor.RaggedTensor)): - return string_ops.substr_v2(text_input, begin, size) - - # TODO(edloper) Update this to use ragged_tensor_shape.broadcast_dynamic_shape - # once it's been updated to handle uniform_row_lengths correctly. - if ragged_tensor.is_ragged(text_input): - if text_input.ragged_rank != 1 or text_input.shape.rank != 2: - return None # Test only works for `shape=[N, None]` - text_input_flat = text_input.flat_values - else: - text_input_flat = array_ops.reshape(text_input, [-1]) - broadcasted_text = array_ops.gather_v2(text_input_flat, - begin.nested_value_rowids()[-1]) - new_tokens = string_ops.substr_v2(broadcasted_text, begin.flat_values, - size.flat_values) - return begin.with_flat_values(new_tokens) - - -@test_util.run_all_in_graph_and_eager_modes -class RegexSplitOpsTest(parameterized.TestCase, test.TestCase): - - @parameterized.parameters([ - dict( - descr="Test doc string examples", - text_input=[r"hello there"], - delim_regex_pattern=r"\s", - keep_delim_regex_pattern=r"\s", - expected=[[b"hello", b" ", b"there"]], - ), - dict( - descr="Test simple whitespace", - text_input=[r"hello there"], - delim_regex_pattern=r"\s", - expected=[[b"hello", b"there"]], - ), - dict( - descr="Two delimiters in a row", - text_input=[r"hello there"], - delim_regex_pattern=r"\s", - expected=[[b"hello", b"there"]], - ), - dict( - descr="Test Hiragana", - text_input=[_utf8(u"では4日")], - delim_regex_pattern=r"\p{Hiragana}", - keep_delim_regex_pattern=r"\p{Hiragana}", - expected=[[_utf8(u"で"), _utf8(u"は"), - _utf8(u"4日")]], - ), - dict( - descr="Test symbols and punctuation", - text_input=[r"hello! (:$) there"], - delim_regex_pattern=r"[\p{S}|\p{P}]+|\s", - keep_delim_regex_pattern=r"[\p{S}|\p{P}]+", - expected=[[b"hello", b"!", b"(:$)", b"there"]], - ), - dict( - descr="Test numbers", - text_input=[r"hello12345there"], - delim_regex_pattern=r"\p{N}+", - keep_delim_regex_pattern=r"\p{N}+", - expected=[[b"hello", b"12345", b"there"]], - ), - dict( - descr="Test numbers and symbols", - text_input=[r"show me some $100 bills yo!"], - delim_regex_pattern=r"\s|\p{S}", - keep_delim_regex_pattern=r"\p{S}", - expected=[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"]], - ), - dict( - descr="Test input RaggedTensor with ragged_rank=1; " - "shape = [2, (2, 1)]", - text_input=[ - [b"show me some $100 bills yo!", - _utf8(u"では4日")], - [b"hello there"], - ], - delim_regex_pattern=r"\s|\p{S}|\p{Hiragana}", - keep_delim_regex_pattern=r"\p{S}|\p{Hiragana}", - expected=[[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"], - [_utf8(u"で"), _utf8(u"は"), - _utf8(u"4日")]], [[b"hello", b"there"]]], - ), - dict( - descr="Test input 3D RaggedTensor with ragged_rank=2; " - "shape = [1, 2, (2, 1)]", - text_input=[[ - [b"show me some $100 bills yo!", - _utf8(u"では4日")], - [b"hello there"], - ]], - delim_regex_pattern=r"\s|\p{S}|\p{Hiragana}", - keep_delim_regex_pattern=r"\p{S}|\p{Hiragana}", - expected=[[[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"], - [_utf8(u"で"), _utf8(u"は"), _utf8(u"4日")]], - [[b"hello", b"there"]]]], - ), - dict( - descr="Test input 3D RaggedTensor with ragged_rank=1; " - "shape = [2, (1, 2), 2]", - text_input=[ - [[b"a b", b"c"], [b"d", b"e f g"]], - [[b"cat horse cow", b""]]], - ragged_rank=1, - delim_regex_pattern=r"\s", - expected=[ - [[[b"a", b"b"], [b"c"]], [[b"d"], [b"e", b"f", b"g"]]], - [[[b"cat", b"horse", b"cow"], []]]], - ), - # Test inputs that are Tensors. - dict( - descr="Test input Tensor with shape = [2], rank = 1", - text_input=[ - r"show me some $100 bills yo!", - r"hello there", - ], - delim_regex_pattern=r"\s|\p{S}", - keep_delim_regex_pattern=r"\p{S}", - expected=[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"], - [b"hello", b"there"]], - input_is_dense=True, - ), - dict( - descr="Test input Tensor with shape = [2, 1], rank = 2", - text_input=[ - [r"show me some $100 bills yo!"], - [r"hello there"], - ], - delim_regex_pattern=r"\s|\p{S}", - keep_delim_regex_pattern=r"\p{S}", - expected=[[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"]], - [[b"hello", b"there"]]], - input_is_dense=True, - ), - dict( - descr="Test input Tensor with multiple ranks; shape = [2, 2]", - input_is_dense=True, - text_input=[ - [b"show me some $100 bills yo!", - _utf8(u"では4日")], - [b"hello there", b"woot woot"], - ], - delim_regex_pattern=r"\s|\p{S}|\p{Hiragana}", - keep_delim_regex_pattern=r"\p{S}|\p{Hiragana}", - expected=[[[b"show", b"me", b"some", b"$", b"100", b"bills", b"yo!"], - [_utf8(u"で"), _utf8(u"は"), - _utf8(u"4日")]], [[b"hello", b"there"], [b"woot", - b"woot"]]], - ), - dict( - descr="Test input Tensor with multiple; shape = [2, 2, 1]", - input_is_dense=True, - text_input=[ - [[b"show me some $100 bills yo!"], [_utf8(u"では4日")]], - [[b"hello there"], [b"woot woot"]], - ], - delim_regex_pattern=r"\s|\p{S}|\p{Hiragana}", - keep_delim_regex_pattern=r"\p{S}|\p{Hiragana}", - # expected shape = [2, 2, 1, ] - expected=[[[[b"show", b"me", b"some", b"$", b"100", b"bills", - b"yo!"]], [[_utf8(u"で"), - _utf8(u"は"), - _utf8(u"4日")]]], - [[[b"hello", b"there"]], [[b"woot", b"woot"]]]], - ), - ]) - def testRegexSplitOp(self, - text_input, - delim_regex_pattern, - expected, - keep_delim_regex_pattern=r"", - descr="", - input_is_dense=False, - ragged_rank=None): - if input_is_dense: - text_input = constant_op.constant(text_input) - else: - text_input = ragged_factory_ops.constant(text_input, - ragged_rank=ragged_rank) - - actual_tokens, start, end = regex_split_ops.regex_split_with_offsets( - input=text_input, - delim_regex_pattern=delim_regex_pattern, - keep_delim_regex_pattern=keep_delim_regex_pattern, - ) - self.assertAllEqual(actual_tokens, expected) - - # Use the offsets to extract substrings and verify that the substrings match - # up with the expected tokens - extracted_tokens = _ragged_substr(array_ops.expand_dims(text_input, -1), - start, end - start) - if extracted_tokens is not None: - self.assertAllEqual(extracted_tokens, expected) - - -@test_util.run_all_in_graph_and_eager_modes -class RegexSplitterTestCases(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - dict( - test_description="Split on new line", - text_input=[ - b"Hi there.\nWhat time is it?\nIt is gametime.", - b"Who let the dogs out?\nWho?\nWho?\nWho?", - ], - expected=[[b"Hi there.", b"What time is it?", b"It is gametime."], - [b"Who let the dogs out?", b"Who?", b"Who?", b"Who?"]], - ), - dict( - test_description="Test trailing \\n.", - text_input=[ - b"Hi there.\nWhat time is it?\nIt is gametime.", - b"Who let the dogs out?\nWho?\nWho?\nWho?\n", - ], - expected=[[b"Hi there.", b"What time is it?", b"It is gametime."], - [b"Who let the dogs out?", b"Who?", b"Who?", b"Who?"]], - ), - dict( - test_description="Custom regex.", - text_input=[ - b"Hi there.\r\nWhat time is it?\r\nIt is gametime.", - b"Who let the dogs out?\r\nWho?\r\nWho?\r\nWho?", - ], - expected=[[b"Hi there.", b"What time is it?", b"It is gametime."], - [b"Who let the dogs out?", b"Who?", b"Who?", b"Who?"]], - new_sentence_regex="\r\n", - ), - ]) - def testRegexSplitter(self, - test_description, - text_input, - expected, - new_sentence_regex=None): - text_input = constant_op.constant(text_input) - sentence_breaker = regex_split_ops.RegexSplitter(new_sentence_regex) - actual = sentence_breaker.split(text_input) - self.assertAllEqual(actual, expected) - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops.py deleted file mode 100644 index 3b34130b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops.py +++ /dev/null
@@ -1,138 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Library of ops for building segments.""" -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops - - -def combine_segments(segments, start_of_sequence_id, end_of_segment_id): - """Combine one or more input segments for a model's input sequence. - - `combine_segments` combines the tokens of one or more input segments to a - single sequence of token values and generates matching segment ids. - `combine_segments` can follow a `Trimmer`, who limit segment lengths and - emit `RaggedTensor` outputs, and can be followed up by `ModelInputPacker`. - - See `Detailed Experimental Setup` in `BERT: Pre-training of Deep Bidirectional - Transformers for Language Understanding` - (https://arxiv.org/pdf/1810.04805.pdf) for more examples of combined - segments. - - - `combine_segments` first flattens and combines a list of one or more - segments - (`RaggedTensor`s of n dimensions) together along the 1st axis, then packages - any special tokens into a final n dimensional `RaggedTensor`. - - And finally `combine_segments` generates another `RaggedTensor` (with the - same rank as the final combined `RaggedTensor`) that contains a distinct int - id for each segment. - - Example usage: - - ``` - segment_a = [[1, 2], - [3, 4,], - [5, 6, 7, 8, 9]] - - segment_b = [[10, 20,], - [30, 40, 50, 60,], - [70, 80]] - expected_combined, expected_ids = combine_segments([segment_a, segment_b]) - - # segment_a and segment_b have been combined w/ special tokens describing - # the beginning of a sequence and end of a sequence inserted. - expected_combined=[ - [101, 1, 2, 102, 10, 20, 102], - [101, 3, 4, 102, 30, 40, 50, 60, 102], - [101, 5, 6, 7, 8, 9, 102, 70, 80, 102], - ] - - # ids describing which items belong to which segment. - expected_ids=[ - [0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]] - ``` - - Args: - segments: A list of `RaggedTensor`s with the tokens of the input segments. - All elements must have the same dtype (int32 or int64), same rank, and - same dimension 0 (namely batch size). Slice `segments[i][j, ...]` - contains the tokens of the i-th input segment to the j-th example in the - batch. - start_of_sequence_id: a python int or scalar Tensor containing the id used - to denote the start of a sequence (e.g. `[CLS]` token in BERT - terminology). - end_of_segment_id: a python int or scalar Tensor containing the id used to - denote end of a segment (e.g. the `[SEP]` token in BERT terminology). - - Returns: - a tuple of (combined_segments, segment_ids), where: - - combined_segments: A `RaggedTensor` with segments combined and special - tokens inserted. - segment_ids: A `RaggedTensor` w/ the same shape as `combined_segments` - and containing int ids for each item detailing the segment that they - correspond to. - """ - - # Create special tokens ([CLS] and [SEP]) that will be combined with the - # segments - if len(segments) <= 0: - raise ValueError("`segments` must be a nonempty list.") - segment_dtype = segments[0].dtype - if segment_dtype not in (dtypes.int32, dtypes.int64): - raise ValueError("`segments` must have elements with dtype of int32 or " + - "int64") - - start_of_sequence_id = ops.convert_to_tensor( - start_of_sequence_id, dtype=segment_dtype) - end_of_segment_id = ops.convert_to_tensor( - end_of_segment_id, dtype=segment_dtype) - - start_sequence_id = math_ops.cast(start_of_sequence_id, segment_dtype) - end_segment_id = math_ops.cast(end_of_segment_id, segment_dtype) - start_seq_tokens = array_ops.tile([start_sequence_id], [segments[0].nrows()]) - end_segment_tokens = array_ops.tile([end_segment_id], [segments[0].nrows()]) - for i in range(segments[0].ragged_rank): - start_seq_tokens = array_ops.expand_dims(start_seq_tokens, 1) - end_segment_tokens = array_ops.expand_dims(end_segment_tokens, 1) - special_token_segment_template = array_ops.ones_like(start_seq_tokens) - - # Combine all segments w/ special tokens - segments_to_combine = [start_seq_tokens] - for seg in segments: - segments_to_combine.append(seg) - segments_to_combine.append(end_segment_tokens) - segments_combined = array_ops.concat(segments_to_combine, 1) - - # Create the segment ids, making sure to account for special tokens. - segment_ids_to_combine = [] - segment_ids_to_combine.append(special_token_segment_template * 0) - for i, item in enumerate(segments): - # Add segment id - segment_id = array_ops.ones_like(item) * i - segment_ids_to_combine.append(segment_id) - - # Add for SEP - special_token_segment_id = special_token_segment_template * i - segment_ids_to_combine.append(special_token_segment_id) - - segment_ids = array_ops.concat(segment_ids_to_combine, 1) - return segments_combined, segment_ids
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops_test.py deleted file mode 100644 index b3dce34..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/segment_combiner_ops_test.py +++ /dev/null
@@ -1,185 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ops to build and pack segments.""" -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import segment_combiner_ops - - -class SegmentBuilderTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - dict( - descr="Test empty", - segments=[ - # first segment - [[], [], []], - ], - expected_combined=[ - [101, 102], - [101, 102], - [101, 102], - ], - expected_segment_ids=[[0, 0], [0, 0], [0, 0]], - ), - dict( - descr="Test custom start and end of sequence ids", - segments=[ - # first segment - [[1, 2], [ - 3, - 4, - ], [5, 6, 7, 8, 9]], - ], - expected_combined=[ - [1001, 1, 2, 1002], - [1001, 3, 4, 1002], - [1001, 5, 6, 7, 8, 9, 1002], - ], - expected_segment_ids=[[0, 0, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], - start_id=1001, - end_id=1002, - ), - dict( - descr="Single segment: test rank 3 input segments", - segments=[ - # first segment - [[[1], [2]], [[3], [4]], [[5], [6], [7], [8], [9]]], - ], - expected_combined=[ - [[101], [1], [2], [102]], - [[101], [3], [4], [102]], - [[101], [5], [6], [7], [8], [9], [102]], - ], - expected_segment_ids=[ - [[0], [0], [0], [0]], - [[0], [0], [0], [0]], - [[0], [0], [0], [0], [0], [0], [0]], - ], - ), - dict( - descr="Test single segment", - segments=[ - # first segment - [[1, 2], [ - 3, - 4, - ], [5, 6, 7, 8, 9]], - ], - expected_combined=[ - [101, 1, 2, 102], - [101, 3, 4, 102], - [101, 5, 6, 7, 8, 9, 102], - ], - expected_segment_ids=[[0, 0, 0, 0], [0, 0, 0, 0], - [0, 0, 0, 0, 0, 0, 0]], - ), - dict( - descr="Test two segments", - segments=[ - # first segment - [[1, 2], [ - 3, - 4, - ], [5, 6, 7, 8, 9]], - # second segment - [[ - 10, - 20, - ], [ - 30, - 40, - 50, - 60, - ], [70, 80]], - ], - expected_combined=[ - [101, 1, 2, 102, 10, 20, 102], - [101, 3, 4, 102, 30, 40, 50, 60, 102], - [101, 5, 6, 7, 8, 9, 102, 70, 80, 102], - ], - expected_segment_ids=[[0, 0, 0, 0, 1, 1, 1], - [0, 0, 0, 0, 1, 1, 1, 1, 1], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1]], - ), - dict( - descr="Test two rank 3 segments", - segments=[ - # first segment - [[[1], [2]], [[3], [4]], [[5], [6], [7], [8], [9]]], - # second segment - [[[10], [20]], [[30], [40], [50], [60]], [[70], [80]]], - ], - expected_combined=[ - [[101], [1], [2], [102], [10], [20], [102]], - [[101], [3], [4], [102], [30], [40], [50], [60], [102]], - [[101], [5], [6], [7], [8], [9], [102], [70], [80], [102]], - ], - expected_segment_ids=[[[0], [0], [0], [0], [1], [1], [1]], - [[0], [0], [0], [0], [1], [1], [1], [1], [1]], - [[0], [0], [0], [0], [0], [0], [0], [1], [1], - [1]]], - ), - dict( - descr="Test that if we have 3 or more segments in the list, the " + - "segment ids are correct", - segments=[ - # first segment - [[1, 2], [3, 4], [5, 6, 7, 8, 9]], - # second segment - [[10, 20], [30, 40, 50, 60], [70, 80]], - # third segment - [[100, 200, 300, 400], [ - 500, - 600, - ], [700, 800]], - ], - expected_combined=[[ - 101, 1, 2, 102, 10, 20, 102, 100, 200, 300, 400, 102 - ], [101, 3, 4, 102, 30, 40, 50, 60, 102, 500, 600, - 102], [101, 5, 6, 7, 8, 9, 102, 70, 80, 102, 700, 800, 102]], - expected_segment_ids=[[0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2], - [0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2], - [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2]], - ), - ]) - def testSegmentBuilder(self, - segments, - expected_combined, - expected_segment_ids, - start_id=101, - end_id=102, - descr=None): - for segment_dtype in [dtypes.int32, dtypes.int64]: - segments_as_tensors = [ - ragged_factory_ops.constant(seg, dtype=segment_dtype) - for seg in segments - ] - actual_combined, actual_segment_ids = ( - segment_combiner_ops.combine_segments( - segments_as_tensors, constant_op.constant(start_id, - segment_dtype), end_id)) - self.assertAllEqual(expected_combined, actual_combined) - self.assertAllEqual(expected_segment_ids, actual_segment_ids) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops.py deleted file mode 100644 index 496fdf6..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops.py +++ /dev/null
@@ -1,163 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Break sentence ops.""" -import abc - -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.util import deprecation -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_sentence_breaking_ops = load_library.load_op_library(resource_loader.get_path_to_datafile('_sentence_breaking_ops.so')) - - -class SentenceBreaker(object): - """An abstract base class for sentence breaker implementations.""" - - @abc.abstractmethod - def break_sentences(self, input): # pylint: disable=redefined-builtin - """Splits `input` into sentences. - - Args: - input: A string `Tensor` of shape [batch] with a batch of documents. - - Returns: - A string `RaggedTensor` of shape [batch, (num_sentences)] with each input - broken up into its constituent sentences. - """ - raise NotImplementedError() - - -class SentenceBreakerWithOffsets(SentenceBreaker): - """An abstract base class for sentence breakers that support offsets.""" - - @abc.abstractmethod - def break_sentences_with_offsets(self, input): # pylint: disable=redefined-builtin - """Splits `input` into sentences and returns the starting & ending offsets. - - Args: - input: A string `Tensor` of shape [batch] with a batch of documents. - - Returns: - A tuple of (sentences, begin_offset, end_offset) where: - - sentences: A string `RaggedTensor` of shape [batch, (num_sentences)] with - each input broken up into its constituent sentences. - begin_offset: A int64 `RaggedTensor` of shape [batch, (num_sentences)] - where each entry is the inclusive beginning byte offset of a sentence. - end_offset: A int64 `RaggedTensor` of shape [batch, (num_sentences)] - where each entry is the exclusive ending byte offset of a sentence. - """ - raise NotImplementedError() - - -@deprecation.deprecated(None, - "Deprecated, use 'StateBasedSentenceBreaker' instead.") -def sentence_fragments(token_word, - token_starts, - token_ends, - token_properties, - input_encoding='UTF-8', - errors='replace', - replacement_char=0xFFFD, - replace_control_characters=False): - """Find the sentence fragments in a given text. - - A sentence fragment is a potential next sentence determined using - deterministic heuristics based on punctuation, capitalization, and similar - text attributes. - - NOTE: This op is deprecated. Use `StateBasedSentenceBreaker` instead. - - Args: - token_word: A Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) - containing the token strings. - token_starts: A Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) - containing offsets where the token starts. - token_ends: A Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) - containing offsets where the token ends. - token_properties: A Tensor (w/ rank=2) or a RaggedTensor (w/ ragged_rank=1) - containing a bitmask. - - The values of the bitmask are: - - * 0x01 (ILL_FORMED) - Text is ill-formed: typically applies to all - tokens of a paragraph that is too short or lacks terminal punctuation. - * 0x02 (HEADING) - * 0x04 (BOLD) - * 0x10 (UNDERLINED) - * 0x20 (LIST) - * 0x40 (TITLE) - * 0x80 (EMOTICON) - * 0x100 (ACRONYM) - Token was identified as an acronym. Period-, - hyphen-, and space-separated acronyms: "U.S.", "U-S", and "U S". - * 0x200 (HYPERLINK) - Indicates that the token (or part of the token) is - covered by at least one hyperlink. - - input_encoding: String name for the unicode encoding that should be used to - decode each string. - errors: Specifies the response when an input string can't be converted - using the indicated encoding. One of: - - * `'strict'`: Raise an exception for any illegal substrings. - * `'replace'`: Replace illegal substrings with `replacement_char`. - * `'ignore'`: Skip illegal substrings. - replacement_char: The replacement codepoint to be used in place of invalid - substrings in `input` when `errors='replace'`; and in place of C0 control - characters in `input` when `replace_control_characters=True`. - replace_control_characters: Whether to replace the C0 control characters - `(U+0000 - U+001F)` with the `replacement_char`. - Returns: - A RaggedTensor of `fragment_start`, `fragment_end`, `fragment_properties` - and `terminal_punc_token`. - - `fragment_properties` is an int32 bitmask whose values may contain: - - * 1 = fragment ends with terminal punctuation - * 2 = fragment ends with multiple terminal punctuations (e.g. - "She said what?!") - * 3 = Has close parenthesis (e.g. "Mushrooms (they're fungi).") - * 4 = Has sentential close parenthesis (e.g. "(Mushrooms are fungi!)") - - `terminal_punc_token` is a RaggedTensor containing the index of terminal - punctuation token immediately following the last word in the fragment - -- or index of the last word itself, if it's an acronym (since acronyms - include the terminal punctuation). index of the terminal punctuation - token. - """ # pylint: disable=pointless-string-statement - - if not isinstance(token_starts, ragged_tensor.RaggedTensor): - token_starts = ragged_tensor.RaggedTensor.from_tensor(token_starts) - if not isinstance(token_ends, ragged_tensor.RaggedTensor): - token_ends = ragged_tensor.RaggedTensor.from_tensor(token_ends) - if not isinstance(token_word, ragged_tensor.RaggedTensor): - token_word = ragged_tensor.RaggedTensor.from_tensor(token_word) - if not isinstance(token_properties, ragged_tensor.RaggedTensor): - token_properties = ragged_tensor.RaggedTensor.from_tensor(token_properties) - - fragment = gen_sentence_breaking_ops.sentence_fragments( - errors=errors, - replacement_char=replacement_char, - replace_control_characters=replace_control_characters, - input_encoding=input_encoding, - row_lengths=token_starts.row_lengths(), - token_start=token_starts.flat_values, - token_end=token_ends.flat_values, - token_word=token_word.flat_values, - token_properties=token_properties.flat_values) - start, end, properties, terminal_punc_token, row_lengths = fragment - return tuple( - ragged_tensor.RaggedTensor.from_row_lengths(value, row_lengths) - for value in [start, end, properties, terminal_punc_token])
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops_test.py deleted file mode 100644 index 5320ace..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentence_breaking_ops_test.py +++ /dev/null
@@ -1,197 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for sentence_breaking_ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_map_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops import sentence_breaking_ops - - -@test_util.run_all_in_graph_and_eager_modes -class SentenceFragmenterTestCasesV1(test.TestCase, parameterized.TestCase): - - def getTokenWord(self, text, token_starts, token_ends): - def _FindSubstr(input_tensor): - text, token_start, token_length = input_tensor - return string_ops.substr(text, token_start, token_length) - - token_lengths = token_ends - token_starts - token_word = ragged_map_ops.map_fn( - _FindSubstr, (text, token_starts, token_lengths), - dtype=ragged_tensor.RaggedTensorType( - dtype=dtypes.string, ragged_rank=1), - infer_shape=False) - return token_word - - def getTokenOffsets(self, token_words): - result_start = [] - result_end = [] - for sentence in token_words: - sentence_string = "" - sentence_start = [] - sentence_end = [] - for word in sentence: - sentence_start.append(len(sentence_string)) - sentence_string = sentence_string.join([word, " "]) - sentence_end.append(len(sentence_string)) - result_start.append(sentence_start) - result_end.append(sentence_end) - return (constant_op.constant(result_start, dtype=dtypes.int64), - constant_op.constant(result_end, dtype=dtypes.int64)) - - @parameterized.parameters([ - dict( - test_description="Test acronyms.", - text=[["Welcome to the U.S. don't be surprised."]], - token_starts=[[0, 8, 11, 15, 20, 26, 29, 38]], - token_ends=[[7, 10, 14, 19, 25, 28, 38, 39]], - token_properties=[[0, 0, 0, 256, 0, 0, 0, 0]], - expected_fragment_start=[[0, 4]], - expected_fragment_end=[[4, 8]], - expected_fragment_properties=[[1, 1]], - expected_terminal_punc=[[3, 7]], - ), - dict( - test_description="Test batch containing acronyms.", - text=[["Welcome to the U.S. don't be surprised."], ["I.B.M. yo"]], - token_starts=[[0, 8, 11, 15, 20, 26, 29, 38], [0, 7]], - token_ends=[[7, 10, 14, 19, 25, 28, 38, 39], [6, 9]], - token_properties=[[0, 0, 0, 256, 0, 0, 0, 0], [0, 0]], - expected_fragment_start=[[0, 4], [0]], - expected_fragment_end=[[4, 8], [2]], - expected_fragment_properties=[[1, 1], [0]], - expected_terminal_punc=[[3, 7], [-1]], - ), - dict( - test_description="Test for semicolons.", - text=[["Welcome to the US; don't be surprised."]], - token_starts=[[0, 8, 11, 15, 17, 19, 25, 28, 37]], - token_ends=[[8, 10, 14, 19, 18, 24, 27, 37, 38]], - token_properties=[[0, 0, 0, 0, 0, 0, 0, 0, 0]], - expected_fragment_start=[[0]], - expected_fragment_end=[[9]], - expected_fragment_properties=[[1]], - expected_terminal_punc=[[8]], - ), - ]) - def testSentenceFragmentOp(self, test_description, text, token_starts, - token_ends, token_properties, - expected_fragment_start, expected_fragment_end, - expected_fragment_properties, - expected_terminal_punc): - text = constant_op.constant(text) - token_starts = ragged_factory_ops.constant(token_starts, dtype=dtypes.int64) - token_ends = ragged_factory_ops.constant(token_ends, dtype=dtypes.int64) - token_properties = ragged_factory_ops.constant( - token_properties, dtype=dtypes.int64) - token_word = self.getTokenWord(text, token_starts, token_ends) - - fragments = sentence_breaking_ops.sentence_fragments( - token_word, token_starts, token_ends, token_properties) - - fragment_starts, fragment_ends, fragment_properties, terminal_punc = ( - fragments) - self.assertAllEqual(expected_fragment_start, fragment_starts) - self.assertAllEqual(expected_fragment_end, fragment_ends) - self.assertAllEqual(expected_fragment_properties, fragment_properties) - self.assertAllEqual(expected_terminal_punc, terminal_punc) - - @parameterized.parameters([ - dict( - test_description="Test acronyms.", - token_word=[ - ["Welcome", "to", "the", "U.S.", "!", "Harry"], - ["Wb", "Tang", "Clan", ";", "ain't", "nothing"], - ], - token_properties=[[0, 0, 0, 256, 0, 0], [0, 0, 0, 0, 0, 0]], - expected_fragment_start=[[0, 5], [0]], - expected_fragment_end=[[5, 6], [6]], - expected_fragment_properties=[[3, 0], [0]], - expected_terminal_punc=[[3, -1], [-1]], - ), - ]) - def testDenseInputs(self, test_description, token_word, token_properties, - expected_fragment_start, expected_fragment_end, - expected_fragment_properties, expected_terminal_punc): - token_starts, token_ends = self.getTokenOffsets(token_word) - token_properties = constant_op.constant( - token_properties, dtype=dtypes.int64) - token_word = constant_op.constant(token_word, dtype=dtypes.string) - - fragments = sentence_breaking_ops.sentence_fragments( - token_word, token_starts, token_ends, token_properties) - - fragment_starts, fragment_ends, fragment_properties, terminal_punc = ( - fragments) - self.assertAllEqual(expected_fragment_start, fragment_starts) - self.assertAllEqual(expected_fragment_end, fragment_ends) - self.assertAllEqual(expected_fragment_properties, fragment_properties) - self.assertAllEqual(expected_terminal_punc, terminal_punc) - - @parameterized.parameters([ - dict( - test_description="Too many ragged ranks.", - token_word=[ - ["Welcome", "to", "the", "U.S.", "don't", "be", "surprised"], - ], - token_starts=[[1, 2, 3]], - token_ends=[[[7, 10, 14, 19, 25, 28, 38, 39]]], - token_properties=[[0, 0, 0, 256, 0, 0, 0, 0]], - ), - dict( - test_description="Too many ranks in a dense Tensor.", - token_word=[ - [[["Welcome", "to", "the", "U.S.", "don't", "be", "surprised"]]], - ], - token_starts=[[1, 2, 3]], - token_ends=[[7, 10, 14, 19, 25, 28, 38, 39]], - token_properties=[[0, 0, 0, 256, 0, 0, 0, 0]], - is_ragged=False, - ), - ]) - def testBadInputShapes(self, - test_description, - token_word, - token_starts, - token_ends, - token_properties, - is_ragged=True): - constant = ragged_factory_ops.constant if is_ragged else constant_op.constant - token_starts = constant(token_starts, dtype=dtypes.int64) - token_ends = constant(token_ends, dtype=dtypes.int64) - token_properties = ragged_factory_ops.constant( - token_properties, dtype=dtypes.int64) - - with self.assertRaises(errors.InvalidArgumentError): - result = sentence_breaking_ops.sentence_fragments( - token_word, token_starts, token_ends, token_properties) - _ = self.evaluate(result) - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer.py deleted file mode 100644 index c35ff03..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer.py +++ /dev/null
@@ -1,349 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sentencepiece tokenizer for string tensors.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_conversion_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow.python.training.tracking import tracking -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_sentencepiece_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_sentencepiece_tokenizer.so')) # pylint: disable=g-bad-import-order - -_tf_text_sentencepiece_tokenizer_op_create_counter = monitoring.Counter( - "/nlx/api/python/sentencepiece_tokenizer_create_counter", - "Counter for number of SentencepieceTokenizers created in Python.") - - -class _SentencepieceModelResource(tracking.TrackableResource): - """Utility to track the model resource tensor (for SavedModel support).""" - - def __init__(self, model, name): - super(_SentencepieceModelResource, self).__init__() - self._model = model - self._name = name - _ = self.resource_handle # Accessing this property creates the resource. - - def _create_resource(self): - model, name = self._model, self._name - with ops.name_scope(name, "SentenceTokenizerInitializer", [model]): - return gen_sentencepiece_tokenizer.sentencepiece_op(model=model) - - -class SentencepieceTokenizer(TokenizerWithOffsets, Detokenizer): - r"""Tokenizes a tensor of UTF-8 strings. - - SentencePiece is an unsupervised text tokenizer and detokenizer. It is used - mainly for Neural Network-based text generation systems where the vocabulary - size is predetermined prior to the neural model training. SentencePiece - implements subword units with the extension of direct training from raw - sentences. - - Before using the tokenizer, you will need to train a vocabulary and build a - model configuration for it. Please visit the [Sentencepiece - repository](https://github.com/google/sentencepiece#train-sentencepiece-model) - for the most up-to-date instructions on this process. - """ - - def __init__(self, - model=None, - out_type=dtypes.int32, - nbest_size=0, - alpha=1.0, - reverse=False, - add_bos=False, - add_eos=False, - return_nbest=False, - name=None): - """Creates & initializes a Sentencepiece processor. - - Args: - model: The sentencepiece model serialized proto. - out_type: output type. tf.int32 or tf.string (Default = tf.int32) Setting - tf.int32 directly encodes the string into an id sequence. - nbest_size: A scalar for sampling. - * `nbest_size = {0,1}`: No sampling is performed. (default) - * `nbest_size > 1`: samples from the nbest_size results. - * `nbest_size < 0`: assuming that nbest_size is infinite and samples - from the all hypothesis (lattice) using - forward-filtering-and-backward-sampling algorithm. - alpha: A scalar for a smoothing parameter. Inverse temperature for - probability rescaling. - reverse: Reverses the tokenized sequence (Default = false) - add_bos: Add beginning of sentence token to the result (Default = false) - add_eos: Add end of sentence token to the result (Default = false). When - `reverse=True` beginning/end of sentence tokens are added after - reversing. - return_nbest: If True requires that `nbest_size` is a scalar and `> 1`. - Returns the `nbest_size` best tokenizations for each sentence instead - of a single one. The returned tensor has shape - `[batch * nbest, (tokens)]`. - name: The name argument that is passed to the op function. - - Returns: - pieces: A SentencepieceTokenizer. - """ - super(SentencepieceTokenizer, self).__init__() - _tf_text_sentencepiece_tokenizer_op_create_counter.get_cell().increase_by(1) - self.nbest_size = nbest_size - self.alpha = alpha - self.out_type = out_type - self.reverse = reverse - self.add_bos = add_bos - self.add_eos = add_eos - self.return_nbest = return_nbest - self._model_resource = _SentencepieceModelResource(model, name) - - def tokenize(self, input, name=None): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings. - - Args: - input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. - name: The name argument that is passed to the op function. - - Returns: - A `RaggedTensor` of tokenized text. The returned shape is the shape of the - input tensor with an added ragged dimension for tokens of each string. - """ - with ops.name_scope(name, "SentenceTokenizer", [input, self]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if ragged_tensor.is_ragged(input_tensor): - # Recursively process the values of the ragged tensor. - tokens = self.tokenize(input_tensor.flat_values) - return input_tensor.with_flat_values(tokens) - else: - if input_tensor.shape.ndims > 1: - # Convert the input tensor to ragged and process it. - return self.tokenize(ragged_conversion_ops.from_tensor(input_tensor)) - elif input_tensor.shape.ndims == 0: - tokens = self.tokenize(array_ops.stack([input_tensor])) - return tokens.values - else: - # Our rank 1 tensor is the correct shape, so we can process it as - # normal. - (output_values, row_splits) = ( - gen_sentencepiece_tokenizer.sentencepiece_tokenize_op( - self._model_resource.resource_handle, input_tensor, - self.nbest_size, self.alpha, self.add_bos, self.add_eos, - self.reverse, self.out_type, return_nbest=self.return_nbest)) - tokens = RaggedTensor.from_nested_row_splits( - flat_values=output_values, - nested_row_splits=[row_splits], - validate=False) - return tokens - - def tokenize_with_offsets(self, input, name=None): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings. - - This function returns a tuple containing the tokens along with - start and end byte offsets that mark where in the original string each - token was located. - - Args: - input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. - name: The name argument that is passed to the op function. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - tokens: is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. - start_offsets: is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the starting indices of each token (byte - indices for input strings). - end_offsets: is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the exclusive ending indices of each token - (byte indices for input strings). - """ - with ops.name_scope(name, "SentenceTokenizer", [input, self]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if ragged_tensor.is_ragged(input_tensor): - # Recursively process the values of the ragged tensor - (tokens, starts, - ends) = self.tokenize_with_offsets(input_tensor.flat_values) - tokens = input_tensor.with_flat_values(tokens) - starts = input_tensor.with_flat_values(starts) - ends = input_tensor.with_flat_values(ends) - return (tokens, starts, ends) - else: - if input_tensor.shape.ndims > 1: - # Convert the input tensor to ragged and process it. - return self.tokenize_with_offsets( - ragged_conversion_ops.from_tensor(input_tensor)) - elif input_tensor.shape.ndims == 0: - (tokens, starts, ends) = self.tokenize_with_offsets( - array_ops.stack([input_tensor])) - tokens = tokens.values - starts = starts.values - ends = ends.values - return (tokens, starts, ends) - else: - # Our rank 1 tensor is the correct shape, so we can process it as - # normal. - (output_values, output_splits, output_offset_starts, - output_offset_ends) = ( - gen_sentencepiece_tokenizer - .sentencepiece_tokenize_with_offsets_op( - self._model_resource.resource_handle, input_tensor, - self.nbest_size, self.alpha, self.add_bos, self.add_eos, - self.reverse, self.out_type, return_nbest=self.return_nbest)) - tokens = RaggedTensor.from_nested_row_splits( - flat_values=output_values, - nested_row_splits=[output_splits], - validate=False) - starts = RaggedTensor.from_nested_row_splits( - flat_values=output_offset_starts, - nested_row_splits=[output_splits], - validate=False) - ends = RaggedTensor.from_nested_row_splits( - flat_values=output_offset_ends, - nested_row_splits=[output_splits], - validate=False) - return (tokens, starts, ends) - - def detokenize(self, input, name=None): # pylint: disable=redefined-builtin - """Detokenizes tokens into preprocessed text. - - This function accepts tokenized text, and reforms it back into - sentences. - - Args: - input: A `RaggedTensor` or `Tensor` of UTF-8 string tokens with a rank of - at least 1. - name: The name argument that is passed to the op function. - - Returns: - A N-1 dimensional string Tensor or RaggedTensor of the detokenized text. - """ - with ops.name_scope(name, "SentenceTokenizer", [input, self]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if input_tensor.shape.ndims == 0: - raise ValueError("Rank of input_tensor must be at least 1.") - if ragged_tensor.is_ragged(input_tensor): - if input_tensor.flat_values.shape.ndims > 1: - # If the flat_values of our ragged tensor is multi-dimensional, we can - # process it separately and our output will have the same nested - # splits as our input. - tokens = self.detokenize(input_tensor.flat_values) - return input_tensor.with_flat_values(tokens) - elif input_tensor.ragged_rank > 1: - # Recursively process the values of the ragged tensor. - tokens = self.detokenize(input_tensor.values) - return input_tensor.with_values(tokens) - else: - return gen_sentencepiece_tokenizer.sentencepiece_detokenize_op( - self._model_resource.resource_handle, input_tensor.flat_values, - input_tensor.row_splits, self.add_bos, self.add_eos, self.reverse) - else: - if input_tensor.shape.ndims > 1: - # Convert the input tensor to ragged and process it. - return self.detokenize( - ragged_conversion_ops.from_tensor(input_tensor)) - else: - tokens = self.detokenize(array_ops.stack([input_tensor])) - return array_ops.reshape(tokens, []) - - def vocab_size(self, name=None): - """Returns the vocabulary size. - - The number of tokens from within the Sentencepiece vocabulary provided at - the time of initialization. - - Args: - name: The name argument that is passed to the op function. - - Returns: - A scalar representing the vocabulary size. - """ - with ops.name_scope(name, "SentencepieceTokenizerVocabSize", [self]): - return gen_sentencepiece_tokenizer.sentencepiece_vocab_size_op( - self._model_resource.resource_handle) - - def id_to_string(self, input, name=None): # pylint: disable=redefined-builtin - """Converts vocabulary id into a token. - - Args: - input: An arbitrary tensor of int32 representing the token IDs. - name: The name argument that is passed to the op function. - - Returns: - A tensor of string with the same shape as input. - """ - with ops.name_scope(name, "SentencepieceTokenizerIdToString", - [self, input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if input_tensor.shape.ndims == 0: - strings = self.id_to_string(array_ops.stack([input_tensor])) - return strings[0] - if ragged_tensor.is_ragged(input_tensor): - strings = self.id_to_string(input_tensor.flat_values) - return input_tensor.with_flat_values(strings) - if input_tensor.shape.ndims > 1: - return array_ops.reshape( - self.id_to_string(array_ops.reshape(input_tensor, [-1])), - array_ops.shape(input_tensor)) - return gen_sentencepiece_tokenizer.sentencepiece_id_to_string_op( - self._model_resource.resource_handle, input) - - def string_to_id(self, input, name=None): # pylint: disable=redefined-builtin - """Converts token into a vocabulary id. - - This function is particularly helpful for determining the IDs for any - special tokens whose ID could not be determined through normal tokenization. - - Args: - input: An arbitrary tensor of string tokens. - name: The name argument that is passed to the op function. - - Returns: - A tensor of int32 representing the IDs with the same shape as input. - """ - with ops.name_scope(name, "SentencepieceTokenizerStringToId", - [self, input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if input_tensor.shape.ndims == 0: - strings = self.string_to_id(array_ops.stack([input_tensor])) - return strings[0] - if ragged_tensor.is_ragged(input_tensor): - strings = self.string_to_id(input_tensor.flat_values) - return input_tensor.with_flat_values(strings) - if input_tensor.shape.ndims > 1: - return array_ops.reshape( - self.string_to_id(array_ops.reshape(input_tensor, [-1])), - array_ops.shape(input_tensor)) - return gen_sentencepiece_tokenizer.sentencepiece_string_to_id_op( - self._model_resource.resource_handle, input)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer_test.py deleted file mode 100644 index d7cdebc..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sentencepiece_tokenizer_test.py +++ /dev/null
@@ -1,539 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for SentencePieceProcessor Tensorflow op.""" - -import sys -import tempfile - -from absl.testing import parameterized - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.eager import context -from tensorflow.python.eager import def_function -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import tensor_spec -from tensorflow.python.framework import test_util -from tensorflow.python.lib.io import file_io -from tensorflow.python.module import module -from tensorflow.python.ops import gen_experimental_dataset_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_gather_ops -from tensorflow.python.platform import gfile -from tensorflow.python.platform import test -from tensorflow.python.saved_model import load -from tensorflow.python.saved_model import save -from tensorflow_text.python.ops.sentencepiece_tokenizer import SentencepieceTokenizer - - -def _utf8(tokens): - if sys.version_info[0] == 2: - return tokens - if isinstance(tokens, list): - return [_utf8(t) for t in tokens] - else: - return tokens.encode('utf-8') - - -class TestSavedModelModule(module.Module): - - def __init__(self, tokenizer): - self.tokenizer = tokenizer - - @def_function.function(input_signature=[ - tensor_spec.TensorSpec(shape=[None], dtype=dtypes.string) - ]) - def tokenize(self, inputs): - return self.tokenizer.tokenize(inputs) - - -@test_util.run_all_in_graph_and_eager_modes -class SentencepieceTokenizerOpTest(test_util.TensorFlowTestCase, - parameterized.TestCase): - - def getTokenizerAndSetOptions(self, reverse, add_bos, add_eos, out_type): - self.reverse = reverse - self.add_bos = add_bos - self.add_eos = add_eos - self.out_type = out_type - return SentencepieceTokenizer( - self.model, - reverse=reverse, - add_bos=add_bos, - add_eos=add_eos, - out_type=out_type) - - def transformExpected(self, expected, is_offsets=False): - bos = _utf8('<s>') - eos = _utf8('</s>') - if is_offsets: - bos = 0 - eos = 0 - elif self.out_type == dtypes.int32: - bos = 1 - eos = 2 - if not isinstance(expected[0], list): - if self.add_bos: - expected = [bos] + expected - if self.add_eos: - expected = expected + [eos] - if self.reverse: - expected = [x for x in reversed(expected)] - else: - return [self.transformExpected(x) for x in expected] - return expected - - def setUp(self): - super(SentencepieceTokenizerOpTest, self).setUp() - sentencepiece_model_file = ( - 'tensorflow_text/python/ops/test_data/' - 'test_oss_model.model') - self.model = gfile.GFile(sentencepiece_model_file, 'rb').read() - - def testGetVocabSize(self): - sp = SentencepieceTokenizer(self.model) - self.assertAllEqual(1000, sp.vocab_size()) - - def testIdToStringScalar(self): - sp = SentencepieceTokenizer(self.model) - result = sp.id_to_string(125) - self.assertAllEqual('ve', result) - - def testIdToStringVector(self): - sp = SentencepieceTokenizer(self.model) - pieces = _utf8([['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']]) - ids = [[9, 169, 21, 125, 78, 48, 132, 15], [9, 169, 21, 125, 727, 6], - [9, 169, 21, 125, 169, 579, 6]] - result = sp.id_to_string(ragged_factory_ops.constant(ids)) - self.assertAllEqual(pieces, result) - - def testIdToStringRagged(self): - sp = SentencepieceTokenizer(self.model) - pieces = _utf8( - [[['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']], - [['▁', 'N', 'ever', '▁tell', '▁me', '▁the', '▁', 'o', 'd', 'd', 's']]]) - ids = [[[9, 169, 21, 125, 78, 48, 132, 15], [9, 169, 21, 125, 727, 6], - [9, 169, 21, 125, 169, 579, 6]], - [[4, 199, 363, 310, 33, 7, 4, 21, 17, 17, 8]]] - result = sp.id_to_string(ragged_factory_ops.constant(ids, dtypes.int32)) - self.assertAllEqual(pieces, result) - - def testStringToIdScalar(self): - sp = SentencepieceTokenizer(self.model) - result = sp.string_to_id('</s>') - self.assertAllEqual(2, result) - - def testStringToIdVector(self): - sp = SentencepieceTokenizer(self.model) - pieces = _utf8([['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']]) - ids = [[9, 169, 21, 125, 78, 48, 132, 15], [9, 169, 21, 125, 727, 6], - [9, 169, 21, 125, 169, 579, 6]] - result = sp.string_to_id(ragged_factory_ops.constant(pieces)) - self.assertAllEqual(ids, result) - - def testStringToIdRagged(self): - sp = SentencepieceTokenizer(self.model) - pieces = _utf8( - [[['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']], - [['▁', 'N', 'ever', '▁tell', '▁me', '▁the', '▁', 'o', 'd', 'd', 's']]]) - ids = [[[9, 169, 21, 125, 78, 48, 132, 15], [9, 169, 21, 125, 727, 6], - [9, 169, 21, 125, 169, 579, 6]], - [[4, 199, 363, 310, 33, 7, 4, 21, 17, 17, 8]]] - result = sp.string_to_id(ragged_factory_ops.constant(pieces, dtypes.string)) - self.assertAllEqual(ids, result) - - @parameterized.parameters([ - (False, False, False, dtypes.int32), - (False, False, True, dtypes.int32), - (False, True, False, dtypes.int32), - (False, True, True, dtypes.int32), - (True, False, False, dtypes.int32), - (True, False, True, dtypes.int32), - (True, True, False, dtypes.int32), - (True, True, True, dtypes.int32), - (False, False, False, dtypes.string), - (False, False, True, dtypes.string), - (False, True, False, dtypes.string), - (False, True, True, dtypes.string), - (True, False, False, dtypes.string), - (True, False, True, dtypes.string), - (True, True, False, dtypes.string), - (True, True, True, dtypes.string), - ]) - def testTokenizeAndDetokenizeScalar(self, reverse, add_bos, add_eos, - out_type): - sp = self.getTokenizerAndSetOptions(reverse, add_bos, add_eos, out_type) - sentence = 'I love lamp.' - expected = [] - if out_type == dtypes.int32: - expected = [9, 169, 21, 125, 169, 579, 6] - else: - expected = _utf8(['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']) - expected = self.transformExpected(expected) - result = sp.tokenize(sentence) - self.assertAllEqual(expected, result) - detokenized = sp.detokenize(result) - self.assertAllEqual(_utf8(sentence), detokenized) - - @parameterized.parameters([ - (False, False, False, dtypes.int32), - (False, False, True, dtypes.int32), - (False, True, False, dtypes.int32), - (False, True, True, dtypes.int32), - (True, False, False, dtypes.int32), - (True, False, True, dtypes.int32), - (True, True, False, dtypes.int32), - (True, True, True, dtypes.int32), - (False, False, False, dtypes.string), - (False, False, True, dtypes.string), - (False, True, False, dtypes.string), - (False, True, True, dtypes.string), - (True, False, False, dtypes.string), - (True, False, True, dtypes.string), - (True, True, False, dtypes.string), - (True, True, True, dtypes.string), - ]) - def testTokenizeAndDetokenizeVec(self, reverse, add_bos, add_eos, out_type): - sp = self.getTokenizerAndSetOptions(reverse, add_bos, add_eos, out_type) - sentences = ['I love carpet', 'I love desk.', 'I love lamp.'] - expected = [] - if out_type == dtypes.int32: - expected = [[9, 169, 21, 125, 78, 48, 132, 15], [9, 169, 21, 125, 727, 6], - [9, 169, 21, 125, 169, 579, 6]] - else: - expected = _utf8([['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']]) - expected = self.transformExpected(expected) - result = sp.tokenize(sentences) - self.assertAllEqual(expected, result) - detokenized = sp.detokenize(result) - self.assertAllEqual(_utf8(sentences), detokenized) - - @parameterized.parameters([ - (False, False, False, dtypes.int32), - (False, False, True, dtypes.int32), - (False, True, False, dtypes.int32), - (False, True, True, dtypes.int32), - (True, False, False, dtypes.int32), - (True, False, True, dtypes.int32), - (True, True, False, dtypes.int32), - (True, True, True, dtypes.int32), - (False, False, False, dtypes.string), - (False, False, True, dtypes.string), - (False, True, False, dtypes.string), - (False, True, True, dtypes.string), - (True, False, False, dtypes.string), - (True, False, True, dtypes.string), - (True, True, False, dtypes.string), - (True, True, True, dtypes.string), - ]) - def testTokenizeAndDetokenizeUniformTensorMatrix(self, reverse, add_bos, - add_eos, out_type): - sp = self.getTokenizerAndSetOptions(reverse, add_bos, add_eos, out_type) - sentences = [['I love carpet', 'I love desk.'], - ['I love lamp.', 'Never tell me the odds']] - expected = [] - if out_type == dtypes.int32: - expected = [[[9, 169, 21, 125, 78, 48, 132, 15], - [9, 169, 21, 125, 727, 6]], - [[9, 169, 21, 125, 169, 579, 6], - [4, 199, 363, 310, 33, 7, 4, 21, 17, 17, 8]]] - else: - expected = _utf8( - [[['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.']], - [['▁I', '▁l', 'o', 've', '▁l', 'amp', '.'], - ['▁', 'N', 'ever', '▁tell', '▁me', '▁the', '▁', 'o', 'd', 'd', - 's']]]) - expected = self.transformExpected(expected) - result = sp.tokenize(constant_op.constant(sentences)) - self.assertAllEqual(expected, result) - detokenized = sp.detokenize(result) - self.assertAllEqual(_utf8(sentences), detokenized) - - @parameterized.parameters([ - (False, False, False, dtypes.int32), - (False, False, True, dtypes.int32), - (False, True, False, dtypes.int32), - (False, True, True, dtypes.int32), - (True, False, False, dtypes.int32), - (True, False, True, dtypes.int32), - (True, True, False, dtypes.int32), - (True, True, True, dtypes.int32), - (False, False, False, dtypes.string), - (False, False, True, dtypes.string), - (False, True, False, dtypes.string), - (False, True, True, dtypes.string), - (True, False, False, dtypes.string), - (True, False, True, dtypes.string), - (True, True, False, dtypes.string), - (True, True, True, dtypes.string), - ]) - def testTokenizeAndDetokenizeRaggedMatrix(self, reverse, add_bos, add_eos, - out_type): - sp = self.getTokenizerAndSetOptions(reverse, add_bos, add_eos, out_type) - sentences = [['I love carpet', 'I love desk.', 'I love lamp.'], - ['Never tell me the odds']] - expected = [] - if out_type == dtypes.int32: - expected = [[[9, 169, 21, 125, 78, 48, 132, 15], - [9, 169, 21, 125, 727, 6], [9, 169, 21, 125, 169, 579, 6]], - [[4, 199, 363, 310, 33, 7, 4, 21, 17, 17, 8]]] - else: - expected = _utf8( - [[['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']], - [['▁', 'N', 'ever', '▁tell', '▁me', '▁the', '▁', 'o', 'd', 'd', - 's']]]) - expected = self.transformExpected(expected) - result = sp.tokenize(ragged_factory_ops.constant(sentences)) - self.assertAllEqual(expected, result) - detokenized = sp.detokenize(result) - self.assertAllEqual(_utf8(sentences), detokenized) - - @parameterized.parameters([ - (False, False, False, dtypes.int32), - (False, False, True, dtypes.int32), - (False, True, False, dtypes.int32), - (False, True, True, dtypes.int32), - (True, False, False, dtypes.int32), - (True, False, True, dtypes.int32), - (True, True, False, dtypes.int32), - (True, True, True, dtypes.int32), - (False, False, False, dtypes.string), - (False, False, True, dtypes.string), - (False, True, False, dtypes.string), - (False, True, True, dtypes.string), - (True, False, False, dtypes.string), - (True, False, True, dtypes.string), - (True, True, False, dtypes.string), - (True, True, True, dtypes.string), - ]) - def testTokenizeAndDetokenizeWithOffsetsScalar(self, reverse, add_bos, - add_eos, out_type): - sp = self.getTokenizerAndSetOptions(reverse, add_bos, add_eos, out_type) - sentence = 'I love lamp.' - expected_tok = [] - expected_starts = [0, 1, 3, 4, 6, 8, 11] - expected_ends = [1, 3, 4, 6, 8, 11, 12] - if out_type == dtypes.int32: - expected_tok = [9, 169, 21, 125, 169, 579, 6] - else: - expected_tok = _utf8(['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']) - expected_tok = self.transformExpected(expected_tok) - expected_starts = self.transformExpected(expected_starts, True) - expected_ends = self.transformExpected(expected_ends, True) - (tokens, starts, - ends) = sp.tokenize_with_offsets(ragged_factory_ops.constant(sentence)) - self.assertAllEqual(expected_tok, tokens) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - detokenized = sp.detokenize(tokens) - self.assertAllEqual(_utf8(sentence), detokenized) - - def testTokenizeAndDetokenizeWithOffsetsSingleElementVector(self): - sp = SentencepieceTokenizer(self.model, out_type=dtypes.string) - sentences = ['I love lamp.'] - expected_tokens = [['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']] - expected_tokens = _utf8(expected_tokens) - expected_starts = [[0, 1, 3, 4, 6, 8, 11]] - expected_ends = [[1, 3, 4, 6, 8, 11, 12]] - (tokens, starts, - ends) = sp.tokenize_with_offsets(ragged_factory_ops.constant(sentences)) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - detokenized = sp.detokenize(tokens) - self.assertAllEqual(_utf8(sentences), detokenized) - - def testTokenizeAndDetokenizeWithOffsetsVector(self): - sp = SentencepieceTokenizer(self.model, out_type=dtypes.string) - sentences = ['I love carpet.', 'I love desk.', 'I love lamp.'] - expected_tokens = [['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't', '.'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']] - expected_tokens = _utf8(expected_tokens) - expected_starts = [[0, 1, 3, 4, 6, 8, 10, 12, 13], [0, 1, 3, 4, 6, 11], - [0, 1, 3, 4, 6, 8, 11]] - expected_ends = [[1, 3, 4, 6, 8, 10, 12, 13, 14], [1, 3, 4, 6, 11, 12], - [1, 3, 4, 6, 8, 11, 12]] - (tokens, starts, - ends) = sp.tokenize_with_offsets(ragged_factory_ops.constant(sentences)) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - detokenized = sp.detokenize(tokens) - self.assertAllEqual(_utf8(sentences), detokenized) - - def testTokenizeAndDetokenizeWithOffsetsMatrix(self): - sp = SentencepieceTokenizer(self.model, out_type=dtypes.string) - sentences = [['I love carpet.', 'I love desk.', 'I love lamp.'], - ['Never tell me the odds']] - expected_tokens = [[['▁I', '▁l', 'o', 've', '▁c', 'ar', 'pe', 't', '.'], - ['▁I', '▁l', 'o', 've', '▁desk', '.'], - ['▁I', '▁l', 'o', 've', '▁l', 'amp', '.']], - [[ - '▁', 'N', 'ever', '▁tell', '▁me', '▁the', '▁', 'o', - 'd', 'd', 's' - ]]] - expected_tokens = _utf8(expected_tokens) - expected_starts = [[[0, 1, 3, 4, 6, 8, 10, 12, 13], [0, 1, 3, 4, 6, 11], - [0, 1, 3, 4, 6, 8, 11]], - [[0, 0, 1, 5, 10, 13, 17, 18, 19, 20, 21]]] - expected_ends = [[[1, 3, 4, 6, 8, 10, 12, 13, 14], [1, 3, 4, 6, 11, 12], - [1, 3, 4, 6, 8, 11, 12]], - [[0, 1, 5, 10, 13, 17, 18, 19, 20, 21, 22]]] - (tokens, starts, - ends) = sp.tokenize_with_offsets(ragged_factory_ops.constant(sentences)) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - detokenized = sp.detokenize(tokens) - self.assertAllEqual(_utf8(sentences), detokenized) - - @parameterized.parameters([ - (-1, 0.1, dtypes.int32), - (64, 0.1, dtypes.int32), - (0, 0.0, dtypes.int32), - (-1, 0.1, dtypes.string), - (64, 0.1, dtypes.string), - (0, 0.0, dtypes.string), - ]) - def testSampleTokenizeAndDetokenize(self, nbest_size, alpha, out_type): - sp = SentencepieceTokenizer( - self.model, nbest_size=nbest_size, alpha=alpha, out_type=out_type) - sentences = [['I love carpet', 'I love desk.', 'I love lamp.'], - ['Never tell me the odds']] - result = sp.tokenize(ragged_factory_ops.constant(sentences)) - detokenized = sp.detokenize(result) - self.assertAllEqual(_utf8(sentences), detokenized) - - def testReturnNbestAndDetokenize(self): - sp = SentencepieceTokenizer( - self.model, nbest_size=2, out_type=dtypes.int32, return_nbest=True) - sentences = ['I love carpet', 'Never tell me the odds'] - result = sp.tokenize(ragged_factory_ops.constant(sentences)) - detokenized = sp.detokenize(result) - self.assertAllEqual( - _utf8(sentences), ragged_gather_ops.gather(detokenized, [0, 2])) - self.assertAllEqual( - _utf8(sentences), ragged_gather_ops.gather(detokenized, [1, 3])) - - def testReturnNbestAndDetokenizeWithOffsets(self): - sp = SentencepieceTokenizer( - self.model, nbest_size=2, out_type=dtypes.int32, return_nbest=True) - sentences = ['I love carpet', 'Never tell me the odds'] - result, _, _ = sp.tokenize_with_offsets( - ragged_factory_ops.constant(sentences)) - detokenized = sp.detokenize(result) - self.assertAllEqual( - _utf8(sentences), ragged_gather_ops.gather(detokenized, [0, 2])) - self.assertAllEqual( - _utf8(sentences), ragged_gather_ops.gather(detokenized, [1, 3])) - - def testSavedModel(self): - sp = SentencepieceTokenizer(self.model) - test_module = TestSavedModelModule(sp) - inputs = constant_op.constant(['hello world']) - expected_result = test_module.tokenize(inputs) - temp_dir = tempfile.mkdtemp(dir=test.get_temp_dir()) - save.save(test_module, temp_dir) - restored_model = load.load(temp_dir) - self.assertAllEqual(restored_model.tokenize(inputs), expected_result) - file_io.delete_recursively(temp_dir) - - def testBasicPipeline(self): - if not context.executing_eagerly(): - self.skipTest('testBasicPipeline only supported in eager mode.') - - sp = SentencepieceTokenizer(self.model) - - strings = ['hello', 'world'] - dataset = dataset_ops.Dataset.from_tensor_slices(strings) - # Ensure we can map the tokenizer across the dataset. - dataset1 = dataset.map(sp.tokenize) - # Ensure there's no error with a second map call. - dataset2 = dataset.map(sp.tokenize) - - expected = sp.tokenize(strings) - for i, result in enumerate(dataset1): - self.assertAllEqual(result, expected[i]) - for i, result in enumerate(dataset2): - self.assertAllEqual(result, expected[i]) - - def testEmptyModel(self): - with self.cached_session(): - with self.assertRaises(errors.InvalidArgumentError): - sp = SentencepieceTokenizer() - result = sp.tokenize('whatever') - result.eval() - - def testInvalidModel(self): - with self.cached_session(): - with self.assertRaises(errors.InternalError): - sp = SentencepieceTokenizer('invalid model') - result = sp.tokenize('whatever') - result.eval() - - -# Test that datasets depending on a sentencepiece tokenizer resources can be -# serialized without external references. -# This test is separate from `SentencepieceTokenizerOpTest` below because -# context._reset_context() must be called from outside the context created by -# `@test_util.run_all_in_graph_and_eager_modes`. -class DatasetSerializationTest(test_util.TensorFlowTestCase): - - def testSerialization(self): - with context.eager_mode(): - sentencepiece_model_file = ( - 'tensorflow_text/python/ops/test_data/' - 'test_oss_model.model') - model = gfile.GFile(sentencepiece_model_file, 'rb').read() - sp = SentencepieceTokenizer(model) - strings = ['hello', 'world'] - dataset = dataset_ops.Dataset.from_tensor_slices(strings) - # Ensure we can map the tokenizer across the dataset. - dataset = dataset.map(sp.tokenize) - graph = dataset._as_serialized_graph() - element_spec = dataset.element_spec - dataset_graph_string = graph.numpy() - expected = sp.tokenize(strings) - - # Reset the eager context to make sure that the serialized dataset graph - # is self-contained. - context._reset_context() - - with context.eager_mode(): - restored = dataset_ops.from_variant( - gen_experimental_dataset_ops.dataset_from_graph(dataset_graph_string), - element_spec) - for i, result in enumerate(restored): - self.assertAllEqual(result, expected[i]) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op.py deleted file mode 100644 index 3025933..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op.py +++ /dev/null
@@ -1,157 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Sliding window op. - -Returns a sliding window of data with a specified width. -""" - -from __future__ import absolute_import -from __future__ import print_function - -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_tensor - - -def sliding_window(data, width, axis=-1, name=None): - """Builds a sliding window for `data` with a specified width. - - Returns a tensor constructed from `data`, where each element in - dimension `axis` is a slice of `data` starting at the corresponding - position, with the given width and step size. I.e.: - - * `result.shape.ndims = data.shape.ndims + 1` - * `result[i1..iaxis, a] = data[i1..iaxis, a:a+width]` - (where `0 <= a < data[i1...iaxis].shape[0] - (width - 1)`). - - Note that each result row (along dimension `axis`) has `width - 1` fewer items - than the corresponding `data` row. If a `data` row has fewer than `width` - items, then the corresponding `result` row will be empty. If you wish for - the `result` rows to be the same size as the `data` rows, you can use - `pad_along_dimension` to add `width - 1` padding elements before calling - this op. - - #### Examples: - - Sliding window (width=3) across a sequence of tokens: - - >>> # input: <string>[sequence_length] - >>> input = tf.constant(["one", "two", "three", "four", "five", "six"]) - >>> # output: <string>[sequence_length-2, 3] - >>> sliding_window(data=input, width=3, axis=0) - <tf.Tensor: shape=(4, 3), dtype=string, numpy= - array([[b'one', b'two', b'three'], - [b'two', b'three', b'four'], - [b'three', b'four', b'five'], - [b'four', b'five', b'six']], dtype=object)> - - Sliding window (width=2) across the inner dimension of a ragged matrix - containing a batch of token sequences: - - >>> # input: <string>[num_sentences, (num_words)] - >>> input = tf.ragged.constant( - ... [['Up', 'high', 'in', 'the', 'air'], - ... ['Down', 'under', 'water'], - ... ['Away', 'to', 'outer', 'space']]) - >>> # output: <string>[num_sentences, (num_word-1), 2] - >>> sliding_window(input, width=2, axis=-1) - <tf.RaggedTensor [[[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'], - [b'the', b'air']], [[b'Down', b'under'], - [b'under', b'water']], - [[b'Away', b'to'], [b'to', b'outer'], - [b'outer', b'space']]]> - - Sliding window across the second dimension of a 3-D tensor containing - batches of sequences of embedding vectors: - - >>> # input: <int32>[num_sequences, sequence_length, embedding_size] - >>> input = tf.constant([ - ... [[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], [5, 5, 1]], - ... [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], [5, 5, 2]]]) - >>> # output: <int32>[num_sequences, sequence_length-1, 2, embedding_size] - >>> sliding_window(data=input, width=2, axis=1) - <tf.Tensor: shape=(2, 4, 2, 3), dtype=int32, numpy= - array([[[[1, 1, 1], - [2, 2, 1]], - [[2, 2, 1], - [3, 3, 1]], - [[3, 3, 1], - [4, 4, 1]], - [[4, 4, 1], - [5, 5, 1]]], - [[[1, 1, 2], - [2, 2, 2]], - [[2, 2, 2], - [3, 3, 2]], - [[3, 3, 2], - [4, 4, 2]], - [[4, 4, 2], - [5, 5, 2]]]], dtype=int32)> - - Args: - data: `<dtype> [O1...ON, A, I1...IM]` - A potentially ragged K-dimensional tensor with outer dimensions of size - `O1...ON`; axis dimension of size `A`; and inner dimensions of size - `I1...IM`. I.e. `K = N + 1 + M`, where `N>=0` and `M>=0`. - - width: An integer constant specifying the width of the window. Must be - greater than zero. - - axis: An integer constant specifying the axis along which sliding window - is computed. Negative axis values from `-K` to `-1` are supported. - - name: The name for this op (optional). - - Returns: - A `K+1` dimensional tensor with the same dtype as `data`, where: - - * `result[i1..iaxis, a]` = `data[i1..iaxis, a:a+width]` - * `result.shape[:axis]` = `data.shape[:axis]` - * `result.shape[axis]` = `data.shape[axis] - (width - 1)` - * `result.shape[axis + 1]` = `width` - * `result.shape[axis + 2:]` = `data.shape[axis + 1:]` - """ - with ops.name_scope(name, "SlidingWindow", [data, axis]): - data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name="data") - - if not isinstance(axis, int): - raise TypeError("axis must be an int") - - if not isinstance(width, int): - raise TypeError("width must be an int") - - if data.shape.ndims is not None and (axis < -data.shape.ndims or - axis >= data.shape.ndims): - raise errors.InvalidArgumentError( - None, None, "axis must be between -k <= axis <= -1 OR 0 <= axis < k") - - if width <= 0: - raise errors.InvalidArgumentError( - None, None, "width must be an integer greater than 0") - - slices = [] - for start in range(width): - stop = None if start - width + 1 == 0 else start - width + 1 - if axis >= 0: - idx = [slice(None)] * axis + [slice(start, stop)] - else: - idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1) - slices.append(data[idx]) - - # Stack the slices. - stack_axis = axis + 1 if axis >= 0 else axis - return array_ops.stack(slices, stack_axis)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op_test.py deleted file mode 100644 index b50af4f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/sliding_window_op_test.py +++ /dev/null
@@ -1,351 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for sliding_window_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import sliding_window_op - - -@test_util.run_all_in_graph_and_eager_modes -class SlidingWindowOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): - - # TODO(b/122967006): Update this to include *all* docstring examples. - def testDocStringExamples(self): - # Sliding window (width=3) across a sequence of tokens - data = constant_op.constant(['one', 'two', 'three', 'four', 'five', 'six']) - output = sliding_window_op.sliding_window(data=data, width=3, axis=0) - self.assertAllEqual( - output, [[b'one', b'two', b'three'], [b'two', b'three', b'four'], - [b'three', b'four', b'five'], [b'four', b'five', b'six']]) - self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape), - 'Shape: (6,) -> (4, 3)') - - # Sliding window (width=2) across the inner dimension of a ragged matrix - # containing a batch of token sequences - data = ragged_factory_ops.constant([['Up', 'high', 'in', 'the', 'air'], - ['Down', 'under', 'water'], - ['Away', 'to', 'outer', 'space']]) - output = sliding_window_op.sliding_window(data, width=2, axis=-1) - self.assertAllEqual(output, [ - [[b'Up', b'high'], [b'high', b'in'], [b'in', b'the'], [b'the', b'air']], - [[b'Down', b'under'], [b'under', b'water']], - [[b'Away', b'to'], [b'to', b'outer'], [b'outer', b'space']] - ]) # pyformat: disable - self.assertEqual( - 'Shape: %s -> %s' % (data.shape.as_list(), output.shape.as_list()), - 'Shape: [3, None] -> [3, None, 2]') - - # Sliding window across the second dimension of a 3-D tensor containing - # batches of sequences of embedding vectors: - data = constant_op.constant([[[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], - [5, 5, 1]], - [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], - [5, 5, 2]]]) - output = sliding_window_op.sliding_window(data=data, width=2, axis=1) - self.assertAllEqual(output, - [[[[1, 1, 1], [2, 2, 1]], [[2, 2, 1], [3, 3, 1]], - [[3, 3, 1], [4, 4, 1]], [[4, 4, 1], [5, 5, 1]]], - [[[1, 1, 2], [2, 2, 2]], [[2, 2, 2], [3, 3, 2]], - [[3, 3, 2], [4, 4, 2]], [[4, 4, 2], [5, 5, 2]]]]) - self.assertEqual('Shape: %s -> %s' % (data.shape, output.shape), - 'Shape: (2, 5, 3) -> (2, 4, 2, 3)') - - def _test_sliding_window_op(self, expected_result, data, width, axis): - result = sliding_window_op.sliding_window(data, width, axis) - self.assertAllEqual(expected_result, result) - - def test_sliding_window_for_one_dimensional_data(self): - """Test sliding_window when data is a 1 dimensional tensor. - - data_shape: [5] - total_dimensions: 1 - outer_dimensions: 0 - inner_dimensions: 0 - axis_dimension: 1 - - result_shape: [3, 3] - """ - data = constant_op.constant([1, 2, 3, 4, 5]) - width = 3 - axis = -1 - - expected_result = constant_op.constant([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_sliding_window_for_data_with_outer_dimensions(self): - """Test sliding_window when data has outer dimensions. - - data_shape: [5,3] - axis_dimension: 1 - - result_shape: [4, 2, 3] - """ - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], - [5, 5, 1]]) - - width = 2 - axis = -2 - - expected_result = constant_op.constant([[[1, 1, 1], [2, 2, 1]], - [[2, 2, 1], [3, 3, 1]], - [[3, 3, 1], [4, 4, 1]], - [[4, 4, 1], [5, 5, 1]]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_sliding_window_for_data_with_zero_axis(self): - """Test sliding_window when axis is 0. - - data_shape: [5, 3] - - result_shape: [4, 2, 3] - """ - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], - [5, 5, 1]]) - - width = 2 - axis = 0 - - expected_result = constant_op.constant([[[1, 1, 1], [2, 2, 1]], - [[2, 2, 1], [3, 3, 1]], - [[3, 3, 1], [4, 4, 1]], - [[4, 4, 1], [5, 5, 1]]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_sliding_window_for_multi_dimensional_data(self): - """Test sliding_window when data has both inner and outer dimensions. - - data_shape: [2, 4, 3] - total_dimensions: 3 - outer_dimensions: 1 - inner_dimensions: 1 - axis_dimension: 1 - - result_shape: [2, 3, 2, 3] - """ - data = constant_op.constant([[[1, 1, 1], [2, 2, 2], [3, 3, 3], [4, 4, 4]], - [[5, 5, 5], [6, 6, 6], [7, 7, 7], [8, 8, 8]]]) - width = 2 - axis = -2 - - expected_result = constant_op.constant( - [[[[1, 1, 1], [2, 2, 2]], [[2, 2, 2], [3, 3, 3]], [[3, 3, 3], [4, 4, - 4]]], - [[[5, 5, 5], [6, 6, 6]], [[6, 6, 6], [7, 7, 7]], - [[7, 7, 7], [8, 8, 8]]]]) # pyformat: disable - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_sliding_window_op_invalid_width(self): - data = constant_op.constant([1, 2, 3, 4, 5]) - axis = -1 - error_message = 'width must be an integer greater than 0' - - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - self._test_sliding_window_op(data, data, 0, axis) - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - self._test_sliding_window_op(data, data, -1, axis) - - def test_sliding_window_op_invalid_axis(self): - data = constant_op.constant([1, 2, 3, 4, 5]) - width = 3 - error_message = 'axis must be between -k <= axis <= -1 OR 0 <= axis < k' - - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - self._test_sliding_window_op(data, data, width, -2) - with self.assertRaisesRegexp(errors.InvalidArgumentError, error_message): - self._test_sliding_window_op(data, data, width, 1) - - def test_sliding_window_op_invalid_data_types(self): - data = constant_op.constant([1, 2, 3, 4, 5]) - width = 3 - bad_width = constant_op.constant([width]) - axis = -1 - bad_axis = constant_op.constant([axis]) - - with self.assertRaisesRegexp(TypeError, 'width must be an int'): - self._test_sliding_window_op(data, data, bad_width, axis) - with self.assertRaisesRegexp(TypeError, 'axis must be an int'): - self._test_sliding_window_op(data, data, width, bad_axis) - - def test_docstring_example_1d_tensor(self): - """Test the 1D example in the sliding_window docstring.""" - data = constant_op.constant(['one', 'two', 'three', 'four', 'five']) - width = 3 - axis = -1 - - expected_result = constant_op.constant( - [['one', 'two', 'three'], ['two', 'three', 'four'], - ['three', 'four', 'five']]) # pyformat: disable - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_docstring_example_inner_dimension_tensor(self): - """Test the inner-dimension example in the sliding_window docstring.""" - data = constant_op.constant([[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], - [5, 5, 1]]) - width = 2 - axis = -1 - - expected_result = constant_op.constant([[[1, 1], [1, 1]], [[2, 2], [2, 1]], - [[3, 3], [3, 1]], [[4, 4], [4, 1]], - [[5, 5], [5, 1]]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_docstring_example_multi_dimension_tensor(self): - """Test the multi-dimension example in the sliding_window docstring.""" - data = constant_op.constant([[[1, 1, 1], [2, 2, 1], [3, 3, 1], [4, 4, 1], - [5, 5, 1]], - [[1, 1, 2], [2, 2, 2], [3, 3, 2], [4, 4, 2], - [5, 5, 2]]]) - width = 2 - axis = -2 - - expected_result = constant_op.constant([[[[1, 1, 1], [2, 2, 1]], - [[2, 2, 1], [3, 3, 1]], - [[3, 3, 1], [4, 4, 1]], - [[4, 4, 1], [5, 5, 1]]], - [[[1, 1, 2], [2, 2, 2]], - [[2, 2, 2], [3, 3, 2]], - [[3, 3, 2], [4, 4, 2]], - [[4, 4, 2], [5, 5, 2]]]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - def test_with_unknown_shape_tensor(self): - """Vaalidate that the op still works with a tensor of unknown shape.""" - data = array_ops.placeholder_with_default( - constant_op.constant([1, 2, 3, 4, 5]), shape=None) - width = 3 - axis = -1 - - expected_result = constant_op.constant([[1, 2, 3], [2, 3, 4], [3, 4, 5]]) - self._test_sliding_window_op(expected_result, data, width, axis) - - @parameterized.parameters([ - dict( - descr='2-D data, width=1', - data=[['See', 'Spot', 'run'], ['Hello'], [], ['Go', 'Giants'], - ['a', 'b', 'c', 'd', 'e', 'f']], - width=1, - expected=[ - [[b'See'], [b'Spot'], [b'run']], - [[b'Hello']], - [], - [[b'Go'], [b'Giants']], - [[b'a'], [b'b'], [b'c'], [b'd'], [b'e'], [b'f']]]), - dict( - descr='2-D data, width=2', - data=[['See', 'Spot', 'run'], ['Hello'], [], ['Go', 'Giants'], - ['a', 'b', 'c', 'd', 'e', 'f']], - width=2, - expected=[ - [[b'See', b'Spot'], [b'Spot', b'run']], - [], - [], - [[b'Go', b'Giants']], - [[b'a', b'b'], [b'b', b'c'], [b'c', b'd'], [b'd', b'e'], - [b'e', b'f']]]), - dict( - descr='2-D data, width=3', - data=[['See', 'Spot', 'run'], ['Hello'], [], ['Go', 'Giants'], - ['a', 'b', 'c', 'd', 'e']], - width=3, - expected=[ - [[b'See', b'Spot', b'run']], - [], - [], - [], - [[b'a', b'b', b'c'], [b'b', b'c', b'd'], [b'c', b'd', b'e']]]), - - dict( - descr='3-D data, ragged_rank=1, width=2, axis=1', - data=[ - [[0, 0], [0, 1], [0, 2]], - [], - [[1, 0], [1, 1], [1, 2], [1, 3], [1, 4]]], - ragged_rank=1, - width=2, - axis=1, - expected=[ - [[[0, 0], [0, 1]], - [[0, 1], [0, 2]]], - [], - [[[1, 0], [1, 1]], - [[1, 1], [1, 2]], - [[1, 2], [1, 3]], - [[1, 3], [1, 4]]]]), - dict( - descr='3-D data, ragged_rank=2, width=2, axis=2', - data=[[[1, 2, 3, 4], - [5, 6]], - [], - [[7, 8, 9]]], - width=2, - axis=2, - expected=[[[[1, 2], [2, 3], [3, 4]], - [[5, 6]]], - [], - [[[7, 8], [8, 9]]]]), - dict( - descr='2-D data, width=2, axis=0', - data=[[1, 2], [3, 4, 5], [], [6, 7]], - width=2, - axis=0, - expected=[ - [[1, 2], [3, 4, 5]], - [[3, 4, 5], []], - [[], [6, 7]]]), - dict( - descr='3-D data, ragged_rank=1, width=2, axis=2', - data=[[[0, 1, 2, 3], - [4, 5, 6, 7]], - [], - [[8, 9, 10, 11]]], - width=2, - axis=2, - expected=[[[[0, 1], [1, 2], [2, 3]], - [[4, 5], [5, 6], [6, 7]]], - [], - [[[8, 9], [9, 10], [10, 11]]]]), - dict( - descr='empty data', - data=[], - ragged_rank=1, - width=2, - expected=[]), - ]) # pyformat: disable - def testRaggedInputs(self, - descr, - data, - width, - expected, - axis=-1, - ragged_rank=None): - data = ragged_factory_ops.constant(data, ragged_rank=ragged_rank) - result = sliding_window_op.sliding_window(data, width, axis) - self.assertAllEqual(result, expected) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_alignment_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_alignment_op_test.py deleted file mode 100644 index e2e82a1..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_alignment_op_test.py +++ /dev/null
@@ -1,528 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for the pointer_ops.span_overlaps() op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import pointer_ops - - -@test_util.run_all_in_graph_and_eager_modes -class SpanOverlapsOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): - maxDiff = 5000 # Display diffs even if they're long. pylint: disable=invalid-name - - #============================================================================= - # Source & Target Spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source[0]: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - # Target[0]: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source[1]: [-0-] [-1-] [-2-] [-3-] [-4-] [-5-] - # Target[1]: [2] [-0-] [----1---] [3] [4] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # [----0----] - # Source[2]: [--1--][--3--] - # [--2--] - # - # [--0--] - # Target[2]: [------1------] - # [--2--] [-3-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - BATCH_SIZE = 3 - SOURCE_START = [[0, 10, 16, 20, 27, 30, 35, 40, 45, 50], - [0, 10, 20, 30, 40, 50], - [0, 2, 3, 9]] # pyformat: disable - SOURCE_LIMIT = [[5, 15, 19, 23, 30, 35, 40, 45, 50, 55], - [5, 15, 25, 35, 45, 55], - [11, 9, 10, 16]] # pyformat: disable - TARGET_START = [[0, 5, 15, 20, 25, 31, 35, 42, 47, 52], - [10, 18, 1, 30, 42], - [2, 0, 1, 10]] # pyformat: disable - TARGET_LIMIT = [[5, 10, 20, 25, 30, 34, 38, 45, 52, 57], - [15, 28, 4, 33, 45], - [9, 15, 8, 15]] # pyformat: disable - - # Spans encoded using 1D tensors - BATCH_ITEM = [] - for i in range(BATCH_SIZE): - BATCH_ITEM.append( - dict( - source_start=SOURCE_START[i], # <int>[s] - source_limit=SOURCE_LIMIT[i], # <int>[s] - target_start=TARGET_START[i], # <int>[t] - target_limit=TARGET_LIMIT[i], # <int>[t] - )) - - # Spans encoded using 2D ragged tensors - RAGGED_BATCH_2D = dict( - source_start=SOURCE_START, # <int>[b, (s)] - source_limit=SOURCE_LIMIT, # <int>[b, (s)] - target_start=TARGET_START, # <int>[b, (t)] - target_limit=TARGET_LIMIT, # <int>[b, (t)] - ) - - # Spans encoded using 2D uniform tensors - UNIFORM_BATCH_2D = dict( - source_start=[row[:4] for row in SOURCE_START], # <int>[b, s] - source_limit=[row[:4] for row in SOURCE_LIMIT], # <int>[b, s] - target_start=[row[:4] for row in TARGET_START], # <int>[b, t] - target_limit=[row[:4] for row in TARGET_LIMIT], # <int>[b, t] - ) - - # Spans encoded using a 3D ragged tensor with 2 ragged dimensions - # (2 batch dimensions) - RAGGED_BATCH_3D = dict( - source_start=[SOURCE_START[:2], SOURCE_START[2:]], # <int>[b1, (b2), (s)] - source_limit=[SOURCE_LIMIT[:2], SOURCE_LIMIT[2:]], # <int>[b1, (b2), (s)] - target_start=[TARGET_START[:2], TARGET_START[2:]], # <int>[b1, (b2), (t)] - target_limit=[TARGET_LIMIT[:2], TARGET_LIMIT[2:]], # <int>[b1, (b2), (t)] - ) - - # Spans encoded using a 3D uniform tensor (2 batch dimensions) - UNIFORM_BATCH_3D = dict( - source_start=[UNIFORM_BATCH_2D['source_start']] * 2, # <int>[b1, b2, s] - source_limit=[UNIFORM_BATCH_2D['source_limit']] * 2, # <int>[b1, b2, s] - target_start=[UNIFORM_BATCH_2D['target_start']] * 2, # <int>[b1, b2, t] - target_limit=[UNIFORM_BATCH_2D['target_limit']] * 2, # <int>[b1, b2, t] - ) - - @parameterized.parameters( - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - # Target: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 1, with default overlap flags', - expected=[0, -1, -1, -1, -1, -1, -1, -1, -1, -1], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True', - contains=True, - expected=[0, -1, -1, -1, -1, 5, 6, 7, -1, -1], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contained_by=True', - contained_by=True, - expected=[0, -1, 2, 3, 4, -1, -1, -1, -1, -1], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected=[0, -1, 2, 3, 4, 5, 6, 7, -1, -1], - **BATCH_ITEM[0]), - dict( - name='test set 1, with partial_overlap=True', - partial_overlap=True, - expected=[0, -1, 2, 3, 4, 5, 6, 7, 8, 9], - **BATCH_ITEM[0]), - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 - # |====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [-2-] [-3-] [-4-] [-5-] - # Target: [2] [-0-] [----1---] [3] [4] - # |====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 2, with default overlap flags', - expected=[-1, 0, -1, -1, -1, -1], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contains=True', - contains=True, - expected=[2, 0, -1, 3, 4, -1], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contained_by=True', - contained_by=True, - expected=[-1, 0, 1, -1, -1, -1], - **BATCH_ITEM[1]), - dict( - name='test set 2, with partial_overlap=True', - partial_overlap=True, - expected=[2, 0, 1, 3, 4, -1], - **BATCH_ITEM[1]), - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 - # |====|====|====|====| - # [----0----] - # Source: [--1--][--3--] - # [--2--] - # |====|====|====|====| - # [--0--] - # Target: [------1------] - # [--2--] [-3-] - # |====|====|====|====| - dict( - name='test set 3, with default overlap flags', - expected=[-1, 0, -1, -1], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True', - contains=True, - expected=[2, 0, -1, 3], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contained_by=True', - contained_by=True, - expected=[1, 1, 1, -1], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected=[2, 1, 1, 3], - **BATCH_ITEM[2]), - dict( - name='test set 3, with partial_overlap=True', - partial_overlap=True, - expected=[3, 2, 2, 3], - **BATCH_ITEM[2]), - #========================================================================= - # This group of tests use RAGGED_BATCH_2D. - # Inputs have a single batch dimension, with shapes [b, (s)] and [b, (t)]. - dict( - name='default overlap flags', - expected=[ - [0, -1, -1, -1, -1, -1, -1, -1, -1, -1], - [-1, 0, -1, -1, -1, -1], - [-1, 0, -1, -1], - ], - **RAGGED_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected=[ - [0, -1, -1, -1, -1, 5, 6, 7, -1, -1], - [2, 0, -1, 3, 4, -1], - [2, 0, -1, 3], - ], - **RAGGED_BATCH_2D), - #========================================================================= - # This group of tests use UNIFORM_BATCH_2D - # Inputs have a single batch dimension, with shapes [b, s] and [b, t]. - dict( - name='default overlap flags', - expected=[ - [0, -1, -1, -1], - [-1, 0, -1, -1], - [-1, 0, -1, -1], - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected=[ - [0, -1, -1, -1], - [2, 0, -1, 3], - [2, 0, -1, 3], - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - #========================================================================= - # This group of tests use RAGGED_BATCH_3D. - # Inputs have two batch dimensions, with shapes [b1, (b2), (s)] and - # [b1, (b2), (t)]. - dict( - name='default overlap flags', - expected=[ - [[0, -1, -1, -1, -1, -1, -1, -1, -1, -1], [-1, 0, -1, -1, -1, - -1]], - [[-1, 0, -1, -1]], - ], - **RAGGED_BATCH_3D), - dict( - name='contains=True', - contains=True, - expected=[ - [[0, -1, -1, -1, -1, 5, 6, 7, -1, -1], [2, 0, -1, 3, 4, -1]], - [[2, 0, -1, 3]], - ], - **RAGGED_BATCH_3D), - #========================================================================= - # This group of tests use UNIFORM_BATCH_3D - # Inputs have two batch dimensions, with shapes [b1, b2, s] and - # [b1, b2, t]. - dict( - name='default overlap flags', - expected=[[ - [0, -1, -1, -1], - [-1, 0, -1, -1], - [-1, 0, -1, -1], - ]] * 2, - ragged_rank=0, - **UNIFORM_BATCH_3D), - dict( - name='contains=True', - contains=True, - expected=[[ - [0, -1, -1, -1], - [2, 0, -1, 3], - [2, 0, -1, 3], - ]] * 2, - ragged_rank=0, - **UNIFORM_BATCH_3D), - ) # pyformat: disable - def testSpanAlignment(self, - name, - source_start, - source_limit, - target_start, - target_limit, - expected, - contains=False, - contained_by=False, - partial_overlap=False, - ragged_rank=None): - source_start = ragged_factory_ops.constant( - source_start, ragged_rank=ragged_rank) - source_limit = ragged_factory_ops.constant( - source_limit, ragged_rank=ragged_rank) - target_start = ragged_factory_ops.constant( - target_start, ragged_rank=ragged_rank) - target_limit = ragged_factory_ops.constant( - target_limit, ragged_rank=ragged_rank) - multivalent_result = False - alignment = pointer_ops.span_alignment( - source_start, source_limit, target_start, target_limit, contains, - contained_by, partial_overlap, multivalent_result) - self.assertAllEqual(alignment, expected) - - @parameterized.parameters([ - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - # Target: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-][10] - # |====|====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 1, with default overlap flags', - expected=[[0], [], [], [], [], [], [], [], [], []], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True', - contains=True, - expected=[[0], [], [], [], [], [5], [6], [7], [], []], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contained_by=True', - contained_by=True, - expected=[[0], [], [2], [3], [4], [], [], [], [], []], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected=[[0], [], [2], [3], [4], [5], [6], [7], [], []], - **BATCH_ITEM[0]), - dict( - name='test set 1, with partial_overlap=True', - partial_overlap=True, - expected=[[0], [], [2], [3], [4], [5], [6], [7], [8], [8, 9]], - **BATCH_ITEM[0]), - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 - # |====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [-2-] [-3-] [-4-] [-5-] - # Target: [2] [-0-] [----1---] [3] [4] - # |====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 2, with default overlap flags', - expected=[[], [0], [], [], [], []], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contains=True', - contains=True, - expected=[[2], [0], [], [3], [4], []], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contained_by=True', - contained_by=True, - expected=[[], [0], [1], [], [], []], - **BATCH_ITEM[1]), - dict( - name='test set 2, with partial_overlap=True', - partial_overlap=True, - expected=[[2], [0], [1], [3], [4], []], - **BATCH_ITEM[1]), - #========================================================================= - # This group of tests use the following source & target spans: - # Offset: 0 5 10 15 20 - # |====|====|====|====| - # [----0----] - # Source: [--1--][--3--] - # [--2--] - # |====|====|====|====| - # [--0--] - # Target: [------1------] - # [--2--] [-3-] - # |====|====|====|====| - dict( - name='test set 3, with default overlap flags', - expected=[[], [0], [], []], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True', - contains=True, - expected=[[0, 2], [0], [], [3]], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contained_by=True', - contained_by=True, - expected=[[1], [0, 1], [1], []], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected=[[0, 1, 2], [0, 1], [1], [3]], - **BATCH_ITEM[2]), - dict( - name='test set 3, with partial_overlap=True', - partial_overlap=True, - expected=[[0, 1, 2, 3], [0, 1, 2], [0, 1, 2], [1, 3]], - **BATCH_ITEM[2]), - #========================================================================= - # This group of tests use RAGGED_BATCH_2D - # Inputs have a single batch dimension, with shapes [b, (s)] and [b, (t)]. - dict( - name='default overlap flags', - expected=[ - [[0], [], [], [], [], [], [], [], [], []], - [[], [0], [], [], [], []], - [[], [0], [], []], - ], - **RAGGED_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected=[ - [[0], [], [], [], [], [5], [6], [7], [], []], - [[2], [0], [], [3], [4], []], - [[0, 2], [0], [], [3]], - ], - **RAGGED_BATCH_2D), - #========================================================================= - # This group of tests use UNIFORM_BATCH_2D - # Inputs have a single batch dimension, with shapes [b, s] and [b, t]. - dict( - name='default overlap flags', - expected=[ - [[0], [], [], []], - [[], [0], [], []], - [[], [0], [], []], - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected=[ - [[0], [], [], []], - [[2], [0], [], [3]], - [[0, 2], [0], [], [3]], - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - #========================================================================= - # This group of tests use RAGGED_BATCH_3D - # Inputs have two batch dimensions, with shapes [b1, (b2), (s)] and - # [b1, (b2), (t)]. - dict( - name='default overlap flags', - expected=[ - [[[0], [], [], [], [], [], [], [], [], []], - [[], [0], [], [], [], []]], - [[[], [0], [], []]], - ], - **RAGGED_BATCH_3D), - dict( - name='contains=True', - contains=True, - expected=[ - [[[0], [], [], [], [], [5], [6], [7], [], []], - [[2], [0], [], [3], [4], []]], - [[[0, 2], [0], [], [3]]], - ], - **RAGGED_BATCH_3D), - #========================================================================= - # This group of tests use UNIFORM_BATCH_3D - # Inputs have two batch dimensions, with shapes [b1, b2, s] and - # [b1, b2, t]. - dict( - name='default overlap flags', - expected=[[ - [[0], [], [], []], - [[], [0], [], []], - [[], [0], [], []], - ]] * 2, - ragged_rank=0, - **UNIFORM_BATCH_3D), - dict( - name='contains=True', - contains=True, - expected=[[ - [[0], [], [], []], - [[2], [0], [], [3]], - [[0, 2], [0], [], [3]], - ]] * 2, - ragged_rank=0, - **UNIFORM_BATCH_3D), - ]) # pyformat: disable - def testSpanMultiAlignment(self, - name, - source_start, - source_limit, - target_start, - target_limit, - expected, - contains=False, - contained_by=False, - partial_overlap=False, - ragged_rank=None): - source_start = ragged_factory_ops.constant( - source_start, ragged_rank=ragged_rank) - source_limit = ragged_factory_ops.constant( - source_limit, ragged_rank=ragged_rank) - target_start = ragged_factory_ops.constant( - target_start, ragged_rank=ragged_rank) - target_limit = ragged_factory_ops.constant( - target_limit, ragged_rank=ragged_rank) - multivalent_result = True - alignment = pointer_ops.span_alignment( - source_start, source_limit, target_start, target_limit, contains, - contained_by, partial_overlap, multivalent_result) - self.assertAllEqual(alignment, expected) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_overlaps_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_overlaps_op_test.py deleted file mode 100644 index 45e53cb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/span_overlaps_op_test.py +++ /dev/null
@@ -1,440 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for the pointer_ops.span_overlaps() op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.eager import context -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops import pointer_ops - - -@test_util.run_all_in_graph_and_eager_modes -class SpanOverlapsOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): - maxDiff = 5000 # Display diffs even if they're long. pylint: disable=invalid-name - - #============================================================================= - # Source & Target Spans: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source[0]: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - # Target[0]: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source[1]: [-0-] [-1-] [-2-] [-3-] [-4-] [-5-] - # Target[1]: [2] [-0-] [----1---] [3] [4] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # [----0----] - # Source[2]: [--1--][--3--] - # [--2--] - # - # [--0--] - # Target[2]: [------1------] - # [--2--] [-3-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - BATCH_SIZE = 3 - SOURCE_START = [[0, 10, 16, 20, 27, 30, 35, 40, 45, 50], - [0, 10, 20, 30, 40, 50], - [0, 2, 3, 9]] # pyformat: disable - SOURCE_LIMIT = [[5, 15, 19, 23, 30, 35, 40, 45, 50, 55], - [5, 15, 25, 35, 45, 55], - [11, 9, 10, 16]] # pyformat: disable - TARGET_START = [[0, 5, 15, 20, 25, 31, 35, 42, 47, 52], - [10, 18, 1, 30, 42], - [2, 0, 1, 10]] # pyformat: disable - TARGET_LIMIT = [[5, 10, 20, 25, 30, 34, 38, 45, 52, 57], - [15, 28, 4, 33, 45], - [9, 15, 8, 15]] # pyformat: disable - - # Spans encoded using 1D tensors - BATCH_ITEM = [] - for i in range(BATCH_SIZE): - BATCH_ITEM.append( - dict( - source_start=SOURCE_START[i], # <int>[s] - source_limit=SOURCE_LIMIT[i], # <int>[s] - target_start=TARGET_START[i], # <int>[t] - target_limit=TARGET_LIMIT[i], # <int>[t] - )) - - # Spans encoded using 2D ragged tensors - RAGGED_BATCH_2D = dict( - source_start=SOURCE_START, # <int>[b, (s)] - source_limit=SOURCE_LIMIT, # <int>[b, (s)] - target_start=TARGET_START, # <int>[b, (t)] - target_limit=TARGET_LIMIT, # <int>[b, (t)] - ) - - # Spans encoded using 2D uniform tensors - UNIFORM_BATCH_2D = dict( - source_start=[row[:4] for row in SOURCE_START], # <int>[b, s] - source_limit=[row[:4] for row in SOURCE_LIMIT], # <int>[b, s] - target_start=[row[:4] for row in TARGET_START], # <int>[b, t] - target_limit=[row[:4] for row in TARGET_LIMIT], # <int>[b, t] - ) - - # Spans encoded using a 3D ragged tensor with 2 ragged dimensions - RAGGED_BATCH_3D = dict( - source_start=[SOURCE_START[:2], SOURCE_START[2:]], # <int>[b1, (b2), (s)] - source_limit=[SOURCE_LIMIT[:2], SOURCE_LIMIT[2:]], # <int>[b1, (b2), (s)] - target_start=[TARGET_START[:2], TARGET_START[2:]], # <int>[b1, (b2), (t)] - target_limit=[TARGET_LIMIT[:2], TARGET_LIMIT[2:]], # <int>[b1, (b2), (t)] - ) - - @parameterized.parameters( - #========================================================================= - # This group of tests use BATCH_ITEM[0]: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 60 - # |====|====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [2] [3] [4][-5-][-6-][-7-][-8-][-9-] - # Target: [-0-][-1-] [-2-][-3-][-4-] [5] [6] [7] [-8-][-9-] - # |====|====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 1, with default overlap flags', - expected_overlap_pairs=[(0, 0)], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True', - contains=True, - expected_overlap_pairs=[(0, 0), (5, 5), (6, 6), (7, 7)], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contained_by=True', - contained_by=True, - expected_overlap_pairs=[(0, 0), (2, 2), (3, 3), (4, 4)], - **BATCH_ITEM[0]), - dict( - name='test set 1, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected_overlap_pairs=[(0, 0), (2, 2), (3, 3), (4, 4), (5, 5), (6, - 6), - (7, 7)], - **BATCH_ITEM[0]), - dict( - name='test set 1, with partial_overlap=True', - partial_overlap=True, - expected_overlap_pairs=[(0, 0), (2, 2), (3, 3), (4, 4), (5, 5), (6, - 6), - (7, 7), (8, 8), (9, 8), (9, 9)], - **BATCH_ITEM[0]), - #========================================================================= - # This group of tests use BATCH_ITEM[1]: - # Offset: 0 5 10 15 20 25 30 35 40 45 50 55 - # |====|====|====|====|====|====|====|====|====|====|====| - # Source: [-0-] [-1-] [-2-] [-3-] [-4-] [-5-] - # Target: [2] [-0-] [----1---] [3] [4] - # |====|====|====|====|====|====|====|====|====|====|====| - dict( - name='test set 2, with default overlap flags', - expected_overlap_pairs=[(1, 0)], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contains=True', - contains=True, - expected_overlap_pairs=[(0, 2), (1, 0), (3, 3), (4, 4)], - **BATCH_ITEM[1]), - dict( - name='test set 2, with contained_by=True', - contained_by=True, - expected_overlap_pairs=[(1, 0), (2, 1)], - **BATCH_ITEM[1]), - dict( - name='test set 2, with partial_overlap=True', - partial_overlap=True, - expected_overlap_pairs=[(0, 2), (1, 0), (2, 1), (3, 3), (4, 4)], - **BATCH_ITEM[1]), - #========================================================================= - # This group of tests use BATCH_ITEM[2]: - # Offset: 0 5 10 15 20 - # |====|====|====|====| - # [----0----] - # Source: [--1--][--3--] - # [--2--] - # |====|====|====|====| - # [--0--] - # Target: [------1------] - # [--2--] [-3-] - # |====|====|====|====| - dict( - name='test set 3, with default overlap flags', - expected_overlap_pairs=[(1, 0)], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True', - contains=True, - expected_overlap_pairs=[(0, 0), (0, 2), (1, 0), (3, 3)], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contained_by=True', - contained_by=True, - expected_overlap_pairs=[(0, 1), (1, 0), (1, 1), (2, 1)], - **BATCH_ITEM[2]), - dict( - name='test set 3, with contains=True and contained_by=True', - contains=True, - contained_by=True, - expected_overlap_pairs=[(0, 0), (0, 1), (0, 2), (1, 0), (1, 1), (2, - 1), - (3, 3)], - **BATCH_ITEM[2]), - dict( - name='test set 3, with partial_overlap=True', - partial_overlap=True, - expected_overlap_pairs=[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, - 1), - (1, 2), (2, 0), (2, 1), (2, 2), (3, 1), (3, - 3)], - **BATCH_ITEM[2]), - ) - def test1DSpanOverlaps(self, - name, - source_start, - source_limit, - target_start, - target_limit, - expected_overlap_pairs, - contains=False, - contained_by=False, - partial_overlap=False): - # Assemble expected value. (Writing out the complete expected result - # matrix takes up a lot of space, so instead we just list the positions - # in the matrix that should be True.) - # pylint: disable=g-complex-comprehension - expected = [[(s, t) in expected_overlap_pairs - for t in range(len(target_limit))] - for s in range(len(source_limit))] - - overlaps = pointer_ops.span_overlaps(source_start, source_limit, - target_start, target_limit, contains, - contained_by, partial_overlap) - self.assertAllEqual(overlaps, expected) - - @parameterized.parameters([ - #========================================================================= - # This group of tests use RAGGED_BATCH_2D - dict( - name='default overlap flags', - expected_overlap_pairs=[ - (0, 0, 0), # batch 0 - (1, 1, 0), # batch 1 - (2, 1, 0), # batch 2 - ], - **RAGGED_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected_overlap_pairs=[ - (0, 0, 0), (0, 5, 5), (0, 6, 6), (0, 7, 7), # batch 0 - (1, 0, 2), (1, 1, 0), (1, 3, 3), (1, 4, 4), # batch 1 - (2, 0, 0), (2, 0, 2), (2, 1, 0), (2, 3, 3), # batch 2 - ], - **RAGGED_BATCH_2D), - dict( - name='contained_by=True', - contained_by=True, - expected_overlap_pairs=[ - (0, 0, 0), (0, 2, 2), (0, 3, 3), (0, 4, 4), # batch 0 - (1, 1, 0), (1, 2, 1), # batch 1 - (2, 0, 1), (2, 1, 0), (2, 1, 1), (2, 2, 1)], # batch 2 - **RAGGED_BATCH_2D), - dict( - name='contains=True and contained_by=True', - contains=True, - contained_by=True, - expected_overlap_pairs=[ - # Batch 0: - (0, 0, 0), (0, 2, 2), (0, 3, 3), (0, 4, 4), (0, 5, 5), - (0, 6, 6), (0, 7, 7), - # Batch 1: - (1, 0, 2), (1, 1, 0), (1, 2, 1), (1, 3, 3), (1, 4, 4), - # Batch 2: - (2, 0, 0), (2, 0, 1), (2, 0, 2), (2, 1, 0), (2, 1, 1), - (2, 2, 1), (2, 3, 3)], - **RAGGED_BATCH_2D), - dict( - name='partial_overlap=True', - partial_overlap=True, - expected_overlap_pairs=[ - # Batch 0: - (0, 0, 0), (0, 2, 2), (0, 3, 3), (0, 4, 4), (0, 5, 5), - (0, 6, 6), (0, 7, 7), (0, 8, 8), (0, 9, 8), (0, 9, 9), - # Batch 1: - (1, 0, 2), (1, 1, 0), (1, 2, 1), (1, 3, 3), (1, 4, 4), - # Batch 2: - (2, 0, 0), (2, 0, 1), (2, 0, 2), (2, 0, 3), (2, 1, 0), - (2, 1, 1), (2, 1, 2), (2, 2, 0), (2, 2, 1), (2, 2, 2), - (2, 3, 1), (2, 3, 3)], - **RAGGED_BATCH_2D), - #========================================================================= - # This group of tests use UNIFORM_BATCH_2D - dict( - name='default overlap flags', - expected_overlap_pairs=[ - (0, 0, 0), # batch 0 - (1, 1, 0), # batch 1 - (2, 1, 0), # batch 2 - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - dict( - name='contains=True', - contains=True, - expected_overlap_pairs=[ - (0, 0, 0), # batch 0 - (1, 0, 2), (1, 1, 0), (1, 3, 3), # batch 1 - (2, 0, 0), (2, 0, 2), (2, 1, 0), (2, 3, 3), # batch 2 - ], - ragged_rank=0, - **UNIFORM_BATCH_2D), - ]) # pyformat: disable - def test2DSpanOverlaps(self, - name, - source_start, - source_limit, - target_start, - target_limit, - expected_overlap_pairs, - contains=False, - contained_by=False, - partial_overlap=False, - ragged_rank=None): - # Assemble expected value. - # pylint: disable=g-complex-comprehension - expected = [[[(b, s, t) in expected_overlap_pairs - for t in range(len(target_limit[b]))] - for s in range(len(source_limit[b]))] - for b in range(self.BATCH_SIZE)] - - source_start = ragged_factory_ops.constant( - source_start, ragged_rank=ragged_rank) - source_limit = ragged_factory_ops.constant( - source_limit, ragged_rank=ragged_rank) - target_start = ragged_factory_ops.constant( - target_start, ragged_rank=ragged_rank) - target_limit = ragged_factory_ops.constant( - target_limit, ragged_rank=ragged_rank) - overlaps = pointer_ops.span_overlaps(source_start, source_limit, - target_start, target_limit, contains, - contained_by, partial_overlap) - self.assertAllEqual(overlaps, expected) - - @parameterized.parameters([ - #========================================================================= - # This group of tests use RAGGED_BATCH_3D - dict( - name='default overlap flags', - expected_overlap_pairs=[ - (0, 0, 0, 0), # batch [0, 0] - (0, 1, 1, 0), # batch [0, 1] - (1, 0, 1, 0), # batch [1, 0] - ], - **RAGGED_BATCH_3D), - dict( - name='contains=True', - contains=True, - expected_overlap_pairs=[ - (0, 0, 0, 0), (0, 0, 5, 5), (0, 0, 6, 6), (0, 0, 7, 7), # b[0, 0] - (0, 1, 0, 2), (0, 1, 1, 0), (0, 1, 3, 3), (0, 1, 4, 4), # b[0, 1] - (1, 0, 0, 0), (1, 0, 0, 2), (1, 0, 1, 0), (1, 0, 3, 3), # b[1, 0] - ], - **RAGGED_BATCH_3D), - ]) # pyformat: disable - def test3DSpanOverlaps(self, - name, - source_start, - source_limit, - target_start, - target_limit, - expected_overlap_pairs, - contains=False, - contained_by=False, - partial_overlap=False, - ragged_rank=None): - # Assemble expected value. - # pylint: disable=g-complex-comprehension - expected = [[[[(b1, b2, s, t) in expected_overlap_pairs - for t in range(len(target_limit[b1][b2]))] - for s in range(len(source_limit[b1][b2]))] - for b2 in range(len(source_limit[b1]))] - for b1 in range(2)] - - source_start = ragged_factory_ops.constant( - source_start, ragged_rank=ragged_rank) - source_limit = ragged_factory_ops.constant( - source_limit, ragged_rank=ragged_rank) - target_start = ragged_factory_ops.constant( - target_start, ragged_rank=ragged_rank) - target_limit = ragged_factory_ops.constant( - target_limit, ragged_rank=ragged_rank) - overlaps = pointer_ops.span_overlaps(source_start, source_limit, - target_start, target_limit, contains, - contained_by, partial_overlap) - self.assertAllEqual(overlaps, expected) - - def testErrors(self): - t = [10, 20, 30, 40, 50] - - with self.assertRaisesRegexp(TypeError, 'contains must be bool.'): - pointer_ops.span_overlaps(t, t, t, t, contains='x') - with self.assertRaisesRegexp(TypeError, 'contained_by must be bool.'): - pointer_ops.span_overlaps(t, t, t, t, contained_by='x') - with self.assertRaisesRegexp(TypeError, 'partial_overlap must be bool.'): - pointer_ops.span_overlaps(t, t, t, t, partial_overlap='x') - with self.assertRaisesRegexp( - TypeError, 'source_start, source_limit, target_start, and ' - 'target_limit must all have the same dtype'): - pointer_ops.span_overlaps(t, t, t, [1.0, 2.0, 3.0, 4.0, 5.0]) - with self.assertRaisesRegexp(ValueError, - r'Shapes \(5,\) and \(4,\) are incompatible'): - pointer_ops.span_overlaps(t, t[:4], t, t) - with self.assertRaisesRegexp(ValueError, - r'Shapes \(4,\) and \(5,\) are incompatible'): - pointer_ops.span_overlaps(t, t, t[:4], t) - with self.assertRaisesRegexp( - ValueError, r'Shapes \(1, 5\) and \(5,\) must have the same rank'): - pointer_ops.span_overlaps([t], [t], t, t) - if not context.executing_eagerly(): - with self.assertRaisesRegexp( - ValueError, 'For ragged inputs, the shape.ndims of at least one ' - 'span tensor must be statically known.'): - x = ragged_tensor.RaggedTensor.from_row_splits( - array_ops.placeholder(dtypes.int32), [0, 3, 8]) - pointer_ops.span_overlaps(x, x, x, x) - with self.assertRaisesRegexp( - ValueError, 'Span tensors must all have the same ragged_rank'): - a = [[10, 20, 30], [40, 50, 60]] - pointer_ops.span_overlaps(a, a, a, ragged_factory_ops.constant(a)) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - 'Mismatched ragged shapes for batch dimensions'): - rt1 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5]]]) - rt2 = ragged_factory_ops.constant([[[1, 2], [3]], [[4, 5], [6]]]) - pointer_ops.span_overlaps(rt1, rt1, rt2, rt2) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py deleted file mode 100644 index 03c2454c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer.py +++ /dev/null
@@ -1,273 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops to tokenize words into subwords.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import ops -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_split_merge_from_logits_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_split_merge_from_logits_tokenizer.so')) - - -_tf_text_split_merge_from_logits_tokenizer_op_create_counter = monitoring.Counter( - '/nlx/api/python/split_merge_from_logits_tokenizer_create_counter', - 'Counter for number of SplitMergeFromLogitsTokenizer instances ' - 'created in Python.') - - -class SplitMergeFromLogitsTokenizer(TokenizerWithOffsets): - """Tokenizes a tensor of UTF-8 string into words according to logits.""" - - def __init__(self, force_split_at_break_character=True): - """Initializes a new instance. - - Args: - force_split_at_break_character: a bool that indicates whether to force - start a new word after an ICU-defined whitespace character. Regardless - of this parameter, we never include a whitespace into a token, and we - always ignore the split/merge action for the whitespace character - itself. This parameter indicates what happens after a whitespace. - * if force_split_at_break_character is true, create a new word starting - at the first non-space character, regardless of the 0/1 label for - that character, for instance: - - ```python - s = [2.0, 1.0] # sample pair of logits indicating a split action - m = [1.0, 3.0] # sample pair of logits indicating a merge action - - strings=["New York"] - logits=[[s, m, m, s, m, m, m, m]] - output tokens=[["New", "York"]] - - strings=["New York"] - logits=[[s, m, m, m, m, m, m, m]] - output tokens=[["New", "York"]] - - strings=["New York"], - logits=[[s, m, m, m, s, m, m, m]] - output tokens=[["New", "York"]] - ``` - * otherwise, create a new word / continue the current one depending on - the action for the first non-whitespace character. - - ```python - s = [2.0, 1.0] # sample pair of logits indicating a split action - m = [1.0, 3.0] # sample pair of logits indicating a merge action - - strings=["New York"], - logits=[[s, m, m, s, m, m, m, m]] - output tokens=[["NewYork"]] - - strings=["New York"], - logits=[[s, m, m, m, m, m, m, m]] - output tokens=[["NewYork"]] - - strings=["New York"], - logits=[[s, m, m, m, s, m, m, m]] - output tokens=[["New", "York"]] - ``` - """ - super(SplitMergeFromLogitsTokenizer, self).__init__() - self._force_split_at_break_character = force_split_at_break_character - counter = _tf_text_split_merge_from_logits_tokenizer_op_create_counter - counter.get_cell().increase_by(1) - - def tokenize(self, strings, logits): - """Tokenizes a tensor of UTF-8 strings according to logits. - - The logits refer to the split / merge action we should take for each - character. For more info, see the doc for the logits argument below. - - ### Example: - - >>> strings = ['IloveFlume!', 'and tensorflow'] - >>> logits = [ - ... [ - ... # 'I' - ... [5.0, -3.2], # I: split - ... # 'love' - ... [2.2, -1.0], # l: split - ... [0.2, 12.0], # o: merge - ... [0.0, 11.0], # v: merge - ... [-3.0, 3.0], # e: merge - ... # 'Flume' - ... [10.0, 0.0], # F: split - ... [0.0, 11.0], # l: merge - ... [0.0, 11.0], # u: merge - ... [0.0, 12.0], # m: merge - ... [0.0, 12.0], # e: merge - ... # '!' - ... [5.2, -7.0], # !: split - ... # padding: - ... [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], - ... ], [ - ... # 'and' - ... [2.0, 0.7], # a: split - ... [0.2, 1.5], # n: merge - ... [0.5, 2.3], # d: merge - ... # ' ' - ... [1.7, 7.0], # <space>: merge - ... # 'tensorflow' - ... [2.2, 0.1], # t: split - ... [0.2, 3.1], # e: merge - ... [1.1, 2.5], # n: merge - ... [0.7, 0.9], # s: merge - ... [0.6, 1.0], # o: merge - ... [0.3, 1.0], # r: merge - ... [0.2, 2.2], # f: merge - ... [0.7, 3.1], # l: merge - ... [0.4, 5.0], # o: merge - ... [0.8, 6.0], # w: merge - ... ]] - >>> tokenizer = SplitMergeFromLogitsTokenizer() - >>> tokenizer.tokenize(strings, logits) - <tf.RaggedTensor [[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']]> - - Args: - strings: a 1D `Tensor` of UTF-8 strings. - logits: 3D Tensor; logits[i,j,0] is the logit for the split action for - j-th character of strings[i]. logits[i,j,1] is the logit for the merge - action for that same character. For each character, we pick the action - with the greatest logit. Split starts a new word at this character and - merge adds this character to the previous word. The shape of this - tensor should be (n, m, 2) where n is the number of strings, and m is - greater or equal with the number of characters from each strings[i]. As - the elements of the strings tensor may have different lengths (in UTF-8 - chars), padding may be required to get a dense vector; for each row, the - extra (padding) pairs of logits are ignored. - - Returns: - A `RaggedTensor` of strings where `tokens[i, k]` is the string - content of the `k-th` token in `strings[i]` - - Raises: - InvalidArgumentError: if one of the input Tensors has the wrong shape. - E.g., if the logits tensor does not have enough elements for one of the - strings. - """ - subword, _, _ = self.tokenize_with_offsets(strings, logits) - return subword - - def tokenize_with_offsets(self, strings, logits): - """Tokenizes a tensor of UTF-8 strings into tokens with [start,end) offsets. - - ### Example: - - >>> strings = ['IloveFlume!', 'and tensorflow'] - >>> logits = [ - ... [ - ... # 'I' - ... [5.0, -3.2], # I: split - ... # 'love' - ... [2.2, -1.0], # l: split - ... [0.2, 12.0], # o: merge - ... [0.0, 11.0], # v: merge - ... [-3.0, 3.0], # e: merge - ... # 'Flume' - ... [10.0, 0.0], # F: split - ... [0.0, 11.0], # l: merge - ... [0.0, 11.0], # u: merge - ... [0.0, 12.0], # m: merge - ... [0.0, 12.0], # e: merge - ... # '!' - ... [5.2, -7.0], # !: split - ... # padding: - ... [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], - ... ], [ - ... # 'and' - ... [2.0, 0.7], # a: split - ... [0.2, 1.5], # n: merge - ... [0.5, 2.3], # d: merge - ... # ' ' - ... [1.7, 7.0], # <space>: merge - ... # 'tensorflow' - ... [2.2, 0.1], # t: split - ... [0.2, 3.1], # e: merge - ... [1.1, 2.5], # n: merge - ... [0.7, 0.9], # s: merge - ... [0.6, 1.0], # o: merge - ... [0.3, 1.0], # r: merge - ... [0.2, 2.2], # f: merge - ... [0.7, 3.1], # l: merge - ... [0.4, 5.0], # o: merge - ... [0.8, 6.0], # w: merge - ... ]] - >>> tokenizer = SplitMergeFromLogitsTokenizer() - >>> tokens, starts, ends = tokenizer.tokenize_with_offsets(strings, logits) - >>> tokens - <tf.RaggedTensor [[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']]> - >>> starts - <tf.RaggedTensor [[0, 1, 5, 10], [0, 4]]> - >>> ends - <tf.RaggedTensor [[1, 5, 10, 11], [3, 14]]> - - Args: - strings: A 1D `Tensor` of UTF-8 strings. - logits: 3D Tensor; logits[i,j,0] is the logit for the split action for - j-th character of strings[i]. logits[i,j,1] is the logit for the merge - action for that same character. For each character, we pick the action - with the greatest logit. Split starts a new word at this character and - merge adds this character to the previous word. The shape of this - tensor should be (n, m, 2) where n is the number of strings, and m is - greater or equal with the number of characters from each strings[i]. As - the elements of the strings tensor may have different lengths (in UTF-8 - chars), padding may be required to get a dense vector; for each row, the - extra (padding) pairs of logits are ignored. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - * `tokens` is a `RaggedTensor` of strings where `tokens[i, k]` is - the string content of the `k-th` token in `strings[i]` - * `start_offsets` is a `RaggedTensor` of int64s where - `start_offsets[i, k]` is the byte offset for the start of the - `k-th` token in `strings[i]`. - * `end_offsets` is a `RaggedTensor` of int64s where - `end_offsets[i, k]` is the byte offset immediately after the - end of the `k-th` token in `strings[i]`. - - Raises: - InvalidArgumentError: if one of the input Tensors has the wrong shape. - E.g., if the tensor logits does not have enough elements for one of the - strings. - """ - name = None - with ops.name_scope(name, 'SplitMergeFromLogitsTokenizer', - [strings, logits]): - # Tokenize the strings into tokens. - force_split = self._force_split_at_break_character - token_values, token_row_splits, start_values, end_values = ( - gen_split_merge_from_logits_tokenizer.tokenizer_from_logits( - strings=strings, - logits=logits, - force_split_at_break_character=force_split)) - - # Put token info into RaggedTensors, as indicated by token_row_splits. - def put_token_info_into_ragged_tensor(token_info_values): - return RaggedTensor.from_row_splits( - token_info_values, token_row_splits, validate=False) - - tokens = put_token_info_into_ragged_tensor(token_values) - start_offsets = put_token_info_into_ragged_tensor(start_values) - end_offsets = put_token_info_into_ragged_tensor(end_values) - return tokens, start_offsets, end_offsets
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer_test.py deleted file mode 100644 index 1241bd0..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_from_logits_tokenizer_test.py +++ /dev/null
@@ -1,410 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for split_merge_tokenizer op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test - -from tensorflow_text.python.ops.split_merge_from_logits_tokenizer import SplitMergeFromLogitsTokenizer # pylint: disable=line-too-long - - -def _Utf8(char): - return char.encode('utf-8') - - -def _RaggedSubstr(text_input, begin, end): - text_input_flat = None - if ragged_tensor.is_ragged(text_input): - text_input_flat = text_input.flat_values - else: - text_input_flat = ops.convert_to_tensor(text_input) - - if ragged_tensor.is_ragged(begin): - broadcasted_text = array_ops.gather_v2(text_input_flat, - begin.nested_value_rowids()[-1]) - - # convert boardcasted_text into a 1D tensor. - broadcasted_text = array_ops.reshape(broadcasted_text, [-1]) - size = math_ops.sub(end.flat_values, begin.flat_values) - new_tokens = string_ops.substr_v2(broadcasted_text, begin.flat_values, size) - return begin.with_flat_values(new_tokens) - else: - assert begin.shape.ndims == 1 - assert text_input_flat.shape.ndims == 0 - size = math_ops.sub(end, begin) - new_tokens = string_ops.substr_v2(text_input_flat, begin, size) - return new_tokens - - -@test_util.run_all_in_graph_and_eager_modes -class SplitMergeFromLogitsTokenizerTest(test.TestCase): - - def setUp(self): - super(SplitMergeFromLogitsTokenizerTest, self).setUp() - self.tokenizer = SplitMergeFromLogitsTokenizer() - self.no_force_split_tokenizer = SplitMergeFromLogitsTokenizer( - force_split_at_break_character=False) - - def testVectorSingleValue(self): - test_strings = constant_op.constant([b'IloveFlume!']) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 > l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # I - [2.7, -0.3], # I: split - # love - [4.1, 0.82], # l: split - [-2.3, 4.3], # o: merge - [3.1, 12.2], # v: merge - [-3.0, 4.7], # e: merge - # Flume - [2.7, -0.7], # F: split - [0.7, 15.0], # l: merge - [1.6, 23.0], # u: merge - [2.1, 11.0], # m: merge - [0.0, 20.0], # e: merge - # ! - [18.0, 0.7], # !: split - ]]) - expected_tokens = [[b'I', b'love', b'Flume', b'!']] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - extracted_tokens = _RaggedSubstr(test_strings, starts, ends) - self.assertAllEqual(expected_tokens, extracted_tokens) - - def testVectorSingleValueTokenAcrossSpace(self): - test_string = b'I love Flume!' - test_strings = constant_op.constant([test_string]) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 < l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # I - [2.7, -0.3], # I: split - # ' ' - [-1.5, 2.3], # <space>: merge - # love - [4.1, 0.82], # l: split - [-2.3, 4.3], # o: merge - [3.1, 12.2], # v: merge - [-3.0, 4.7], # e: merge - # ' ' - [2.5, 32.0], # <space>: merge - # Flume - [-2.7, 5.3], # F: merge - [0.7, 15.0], # l: merge - [1.6, 23.0], # u: merge - [2.1, 11.0], # m: merge - [0.0, 20.0], # e: merge - # ! - [18.0, 0.7], # !: split - ]]) - - # By default force_split_at_break_character is set True, so we start new - # tokens after break characters regardless of the SPLIT/MERGE label of the - # break character. - expected_tokens = [[b'I', b'love', b'Flume', b'!']] - expected_start_offsets = [[0, 2, 7, 12]] - expected_end_offsets = [[1, 6, 12, 13]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_start_offsets, starts) - self.assertAllEqual(expected_end_offsets, ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - # When force_split_at_break_character set false, we may combine two tokens - # together to form a word according to the label of the first non-space - # character. - expected_tokens = [[b'I', b'loveFlume', b'!']] - expected_start_offsets = [[0, 2, 12]] - expected_end_offsets = [[1, 12, 13]] - # Assertions below clarify what the expected offsets mean: - self.assertEqual(b'I', test_string[0:1]) - - # Notice that the original text between the [start, end) offsets for the - # second token differs from the token text by an extra space: this is - # by design, that space is not copied in the token. - self.assertEqual(b'love Flume', test_string[2:12]) - self.assertEqual(b'!', test_string[12:13]) - - (tokens, starts, ends) = ( - self.no_force_split_tokenizer.tokenize_with_offsets( - test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - self.assertAllEqual(expected_start_offsets, starts) - self.assertAllEqual(expected_end_offsets, ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.no_force_split_tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - def testVectorSingleValueTokenChinese(self): - # TODO(salcianu): clean-up. We used the Unicode string, but Windows may - # have problems with it, so we use the utf-8 bytes instead. - # - # test_strings = constant_op.constant([_Utf8(u'我在谷歌 写代码')]) - test_strings = constant_op.constant([ - b'\xe6\x88\x91\xe5\x9c\xa8\xe8\xb0\xb7\xe6\xad\x8c' - + b'\xe3\x80\x80\xe5\x86\x99\xe4\xbb\xa3\xe7\xa0\x81' - ]) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 < l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # 我 - [2.0, 0.3], # split - # 在 - [3.5, 2.1], # split - # 谷歌 - [5.0, 1.2], # split - [0.4, 3.0], # merge - # ' ', note this is a full-width space that contains 3 bytes. - [2.8, 0.0], # split - # 写代码 - [6.0, 2.1], # split - [2.6, 5.1], # merge - [1.0, 7.1], # merge - ]]) - - # By default force_split_at_break_character is set True, so we start new - # tokens after break characters regardless of the SPLIT/MERGE label of the - # break character. - expected_tokens = [[ - _Utf8(u'我'), _Utf8(u'在'), _Utf8(u'谷歌'), _Utf8(u'写代码')]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - - # Extract tokens according to the returned starts, ends. - tokens_by_offsets = _RaggedSubstr(test_strings, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - # Although force_split_at_break_character is set false we actually predict a - # SPLIT at '写', so we still start a new token: '写代码'. - (tokens, starts, ends) = ( - self.no_force_split_tokenizer.tokenize_with_offsets( - test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - - # Extract tokens according to the returned starts, ends. - tokens_by_offsets = _RaggedSubstr(test_strings, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.no_force_split_tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - def testVectorMultipleValues(self): - test_strings = constant_op.constant([b'IloveFlume!', - b'and tensorflow']) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 < l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # "I" - [5.0, -3.2], # I: split - # "love" - [2.2, -1.0], # l: split - [0.2, 12.0], # o: merge - [0.0, 11.0], # v: merge - [-3.0, 3.0], # e: merge - # "Flume" - [10.0, 0.0], # F: split - [0.0, 11.0], # l: merge - [0.0, 11.0], # u: merge - [0.0, 12.0], # m: merge - [0.0, 12.0], # e: merge - # "!" - [5.2, -7.0], # !: split - # padding: - [1.0, 0.0], [1.0, 1.0], [1.0, 0.0], - ], [ - # "and" - [2.0, 0.7], # a: split - [0.2, 1.5], # n: merge - [0.5, 2.3], # d: merge - # " " - [1.7, 7.0], # <space>: merge - # "tensorflow" - [2.2, 0.1], # t: split - [0.2, 3.1], # e: merge - [1.1, 2.5], # n: merge - [0.7, 0.9], # s: merge - [0.6, 1.0], # o: merge - [0.3, 1.0], # r: merge - [0.2, 2.2], # f: merge - [0.7, 3.1], # l: merge - [0.4, 5.0], # o: merge - [0.8, 6.0], # w: merge - ]]) - expected_tokens = [[b'I', b'love', b'Flume', b'!'], - [b'and', b'tensorflow']] - expected_starts = [[0, 1, 5, 10], [0, 4]] - expected_ends = [[1, 5, 10, 11], [3, 14]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - tokens_by_offsets = _RaggedSubstr(test_strings, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - self.assertAllEqual(expected_starts, starts) - self.assertAllEqual(expected_ends, ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - def testVectorTooFewActions(self): - test_strings = constant_op.constant([b'IloveFlume!', - b'and tensorflow']) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 < l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # "I" - [5.0, -3.2], # I: split - # "love" - [2.2, -1.0], # l: split - [0.2, 12.0], # o: merge - [0.0, 11.0], # v: merge - [-3.0, 3.0], # e: merge - # "Flume" - [10.0, 0.0], # F: split - [0.0, 11.0], # l: merge - [0.0, 11.0], # u: merge - [0.0, 12.0], # m: merge - [0.0, 12.0], # e: merge - # "!" - [5.2, -7.0], # !: split - # no padding, instead, we truncated the logits for 2nd string. - ], [ - # "and" - [2.0, 0.7], # a: split - [0.2, 1.5], # n: merge - [0.5, 2.3], # d: merge - # " " - [1.7, 7.0], # <space>: merge - # "tensorf"; no logits for final three chars, "low". - [2.2, 0.1], # t: split - [0.2, 3.1], # e: merge - [1.1, 2.5], # n: merge - [0.7, 0.9], # s: merge - [0.6, 1.0], # o: merge - [0.3, 1.0], # r: merge - [0.2, 2.2], # f: merge - ]]) - with self.assertRaisesRegexp( - errors.InvalidArgumentError, - r'Number of logits, 11, is insufficient for text "and tensorflow"'): - self.evaluate( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - - def testTextWithWhitespaces(self): - # The text from this example contains some whitespaces: we test that we - # don't generate empty tokens, nor tokens that contain whitespaces. - test_strings = constant_op.constant([b'\n Ilove Flume! ', - b'and \t\ntensorflow']) - - # Below, each pair of logits [l1, l2] indicates a "split" action - # if l1 < l2 and a "merge" otherwise. - test_logits = constant_op.constant([ - [ - # "\n" and " " - [12.0, 2.1], # \n: split - [0.3, 17.3], # <space>: merge - # "I" - [5.0, -3.2], # I: split - # "love" - [2.2, -1.0], # l: split - [0.2, 12.0], # o: merge - [0.0, 11.0], # v: merge - [-3.0, 3.0], # e: merge - # " " - [15.4, 0.3], # <space>: split - # "Flume" - [10.0, 0.0], # F: split - [0.0, 11.0], # l: merge - [0.0, 11.0], # u: merge - [0.0, 12.0], # m: merge - [0.0, 12.0], # e: merge - # "!" - [5.2, -7.0], # !: split - # " " - [15.4, 0.3], # <space>: split - # padding - [2.0, 3.0] - ], [ - # "and" - [2.0, 0.7], # a: split - [0.2, 1.5], # n: merge - [0.5, 2.3], # d: merge - # " ", "\t", and "\n" - [1.7, 7.0], # <space>: merge - [8.0, 2.1], # \t: split - [0.3, 7.3], # \n: merge - # "tensorflow" - [2.2, 0.1], # t: split - [0.2, 3.1], # e: merge - [1.1, 2.5], # n: merge - [0.7, 0.9], # s: merge - [0.6, 1.0], # o: merge - [0.3, 1.0], # r: merge - [0.2, 2.2], # f: merge - [0.7, 3.1], # l: merge - [0.4, 5.0], # o: merge - [0.8, 6.0], # w: merge - ]]) - expected_tokens = [[b'I', b'love', b'Flume', b'!'], - [b'and', b'tensorflow']] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_strings, test_logits)) - self.assertAllEqual(expected_tokens, tokens) - tokens_by_offsets = _RaggedSubstr(test_strings, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_strings, test_logits) - self.assertAllEqual(expected_tokens, tokens) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer.py deleted file mode 100644 index a5c6a98..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer.py +++ /dev/null
@@ -1,272 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops to tokenize words into subwords.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_split_merge_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_split_merge_tokenizer.so')) - -_tf_text_split_merge_tokenizer_op_create_counter = monitoring.Counter( - '/nlx/api/python/split_merge_tokenizer_create_counter', - 'Counter for number of SplitMergeTokenizers created in Python.') - - -class SplitMergeTokenizer(TokenizerWithOffsets): - """Tokenizes a tensor of UTF-8 string into words according to labels.""" - - def __init__(self): - """Initializes a new instance. - """ - super(SplitMergeTokenizer, self).__init__() - _tf_text_split_merge_tokenizer_op_create_counter.get_cell().increase_by(1) - - def tokenize(self, - input, # pylint: disable=redefined-builtin - labels, - force_split_at_break_character=True): - """Tokenizes a tensor of UTF-8 strings according to labels. - - ### Example: - - >>> strings = ["HelloMonday", "DearFriday"] - >>> labels = [[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], - ... [0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0]] - >>> tokenizer = SplitMergeTokenizer() - >>> tokenizer.tokenize(strings, labels) - <tf.RaggedTensor [[b'Hello', b'Monday'], [b'Dear', b'Friday']]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - labels: An (N+1)-dimensional `Tensor` or `RaggedTensor` of `int32`, with - `labels[i1...iN, j]` being the split(0)/merge(1) label of the j-th - character for `input[i1...iN]`. Here split means create a new word with - this character and merge means adding this character to the previous - word. - force_split_at_break_character: bool indicates whether to force start a - new word after seeing a ICU defined whitespace character. When seeing - one or more ICU defined whitespace character: - * if `force_split_at_break_character` is set true, then create a new - word at the first non-space character, regardless of the label of that - character, for instance: - - ```python - input="New York" - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["New", "York"] - ``` - - ```python - input="New York" - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["New", "York"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - * otherwise, whether to create a new word or not for the first non-space - character depends on the label of that character, for instance: - - ```python - input="New York", - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - Returns: - A `RaggedTensor` of strings where `tokens[i1...iN, j]` is the string - content of the `j-th` token in `input[i1...iN]` - """ - subword, _, _ = self.tokenize_with_offsets(input, labels, - force_split_at_break_character) - return subword - - def tokenize_with_offsets(self, - input, # pylint: disable=redefined-builtin - labels, - force_split_at_break_character=True): - """Tokenizes a tensor of UTF-8 strings into tokens with [start,end) offsets. - - ### Example: - - >>> strings = ["HelloMonday", "DearFriday"] - >>> labels = [[0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], - ... [0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0]] - >>> tokenizer = SplitMergeTokenizer() - >>> tokens, starts, ends = tokenizer.tokenize_with_offsets(strings, labels) - >>> tokens - <tf.RaggedTensor [[b'Hello', b'Monday'], [b'Dear', b'Friday']]> - >>> starts - <tf.RaggedTensor [[0, 5], [0, 4]]> - >>> ends - <tf.RaggedTensor [[5, 11], [4, 10]]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - labels: An (N+1)-dimensional `Tensor` or `RaggedTensor` of int32, with - labels[i1...iN, j] being the split(0)/merge(1) label of the j-th - character for input[i1...iN]. Here split means create a new word with - this character and merge means adding this character to the previous - word. - force_split_at_break_character: bool indicates whether to force start a - new word after seeing a ICU defined whitespace character. When seeing - one or more ICU defined whitespace character: - * if `force_split_at_break_character` is set true, then create a new - word at the first non-space character, regardless of the label of - that character, for instance: - - ```python - input="New York" - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["New", "York"] - ``` - - ```python - input="New York" - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["New", "York"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - * otherwise, whether to create a new word or not for the first non-space - character depends on the label of that character, for instance: - - ```python - input="New York", - labels=[0, 1, 1, 0, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 1, 1, 1, 1] - output tokens=["NewYork"] - ``` - - ```python - input="New York", - labels=[0, 1, 1, 1, 0, 1, 1, 1] - output tokens=["New", "York"] - ``` - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - tokens: is a `RaggedTensor` of strings where `tokens[i1...iN, j]` is - the string content of the `j-th` token in `input[i1...iN]` - start_offsets: is a `RaggedTensor` of int64s where - `start_offsets[i1...iN, j]` is the byte offset for the start of the - `j-th` token in `input[i1...iN]`. - end_offsets: is a `RaggedTensor` of int64s where - `end_offsets[i1...iN, j]` is the byte offset immediately after the - end of the `j-th` token in `input[i...iN]`. - """ - name = None - with ops.name_scope( - name, 'SplitMergeTokenizeWithOffsets', - [input, labels, force_split_at_break_character]): - # Check that the types are expected and the ragged rank is appropriate. - tokens = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - labels = ragged_tensor.convert_to_tensor_or_ragged_tensor(labels) - rank = tokens.shape.ndims - if rank is None: - raise ValueError('input must have a known rank.') - - if rank == 0: - words, starts, ends = self.tokenize_with_offsets( - array_ops.stack([tokens]), - array_ops.stack([labels]), - force_split_at_break_character) - return words.values, starts.values, ends.values - - elif rank > 1: - if not ragged_tensor.is_ragged(tokens): - tokens = ragged_tensor.RaggedTensor.from_tensor( - tokens, ragged_rank=rank - 1) - - # Convert to a 2D ragged tensor from labels of shape - # [#input_string, (labels per string)] - if not ragged_tensor.is_ragged(labels): - labels2d = array_ops.reshape(labels, [-1, labels.shape[-1]]) - labels_unpack = ragged_tensor.RaggedTensor.from_tensor(labels2d) - else: - labels_unpack = ragged_tensor.RaggedTensor.from_row_splits( - values=labels.flat_values, - row_splits=labels.nested_row_splits[-1]) - words, starts, ends = self.tokenize_with_offsets( - tokens.flat_values, - labels_unpack, - force_split_at_break_character) - words = words.with_row_splits_dtype(tokens.row_splits.dtype) - starts = starts.with_row_splits_dtype(tokens.row_splits.dtype) - ends = ends.with_row_splits_dtype(tokens.row_splits.dtype) - return (tokens.with_flat_values(words), - tokens.with_flat_values(starts), - tokens.with_flat_values(ends)) - - if not ragged_tensor.is_ragged(labels): - ragged_labels = ragged_tensor.RaggedTensor.from_tensor(labels) - else: - ragged_labels = labels - - row_splits = math_ops.cast(ragged_labels.row_splits, dtypes.int32) - - # Tokenize the strings into tokens. - values, row_splits, starts, ends = ( - gen_split_merge_tokenizer.split_merge_tokenize_with_offsets( - input_values=tokens, - labels=ragged_labels.flat_values, - row_splits=row_splits, - force_split_at_break_character=force_split_at_break_character)) - - words = RaggedTensor.from_row_splits(values, row_splits, validate=False) - starts = RaggedTensor.from_row_splits(starts, row_splits, validate=False) - ends = RaggedTensor.from_row_splits(ends, row_splits, validate=False) - return words, starts, ends
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer_test.py deleted file mode 100644 index 7dee246..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/split_merge_tokenizer_test.py +++ /dev/null
@@ -1,407 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for split_merge_tokenizer op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.platform import test -from tensorflow_text.python.ops.split_merge_tokenizer import SplitMergeTokenizer - - -def _Utf8(char): - return char.encode('utf-8') - - -def _RaggedSubstr(text_input, begin, end): - text_input_flat = None - if ragged_tensor.is_ragged(text_input): - text_input_flat = text_input.flat_values - else: - text_input_flat = ops.convert_to_tensor(text_input) - - if ragged_tensor.is_ragged(begin): - broadcasted_text = array_ops.gather_v2(text_input_flat, - begin.nested_value_rowids()[-1]) - - # convert boardcasted_text into a 1D tensor. - broadcasted_text = array_ops.reshape(broadcasted_text, [-1]) - size = math_ops.sub(end.flat_values, begin.flat_values) - new_tokens = string_ops.substr_v2(broadcasted_text, begin.flat_values, size) - return begin.with_flat_values(new_tokens) - else: - assert begin.shape.ndims == 1 - assert text_input_flat.shape.ndims == 0 - size = math_ops.sub(end, begin) - new_tokens = string_ops.substr_v2(text_input_flat, begin, size) - return new_tokens - - -@test_util.run_all_in_graph_and_eager_modes -class SplitMergeTokenizerTest(test.TestCase): - - def setUp(self): - super(SplitMergeTokenizerTest, self).setUp() - self.tokenizer = SplitMergeTokenizer() - - def testScalarValueSplitMerge(self): - test_value = b'IloveFlume!' - test_label = constant_op.constant( - [ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0 - ]) - expected_tokens = [b'I', b'love', b'Flume', b'!'] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - extracted_tokens = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(extracted_tokens, expected_tokens) - - def testVectorSingleValueSplitMerge(self): - test_value = constant_op.constant([b'IloveFlume!']) - test_label = constant_op.constant([ - [ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0 - ]]) - expected_tokens = [[b'I', b'love', b'Flume', b'!']] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - extracted_tokens = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(extracted_tokens, expected_tokens) - - def testVectorSingleValueTokenCrossSpace(self): - test_string = b'I love Flume!' - test_value = constant_op.constant([test_string]) - test_label = constant_op.constant([ - [ - # I - 0, - # ' ' - 1, - # love - 0, 1, 1, 1, - # ' ' - 0, - # Flume - 1, 1, 1, 1, 1, - # ! - 0 - ]]) - - # By default force_split_at_break_character is set True, so we start new - # tokens after break characters regardless of the SPLIT/MERGE label of the - # break character. - expected_tokens = [[b'I', b'love', b'Flume', b'!']] - expected_offset_starts = [[0, 2, 7, 12]] - expected_offset_ends = [[1, 6, 12, 13]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - # When force_split_at_break_character set false, we may combine two tokens - # together to form a word according to the label of the first non-space - # character. - expected_tokens = [[b'I', b'loveFlume', b'!']] - expected_offset_starts = [[0, 2, 12]] - expected_offset_ends = [[1, 12, 13]] - # Assertions below clarify what the expected offsets mean: - self.assertEqual(test_string[0:1], b'I') - - # Notice that the original text between the [start, end) offsets for the - # second token differs from the token text by an extra space: this is - # by design, that space is not copied in the token. - self.assertEqual(test_string[2:12], b'love Flume') - self.assertEqual(test_string[12:13], b'!') - - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets( - test_value, test_label, force_split_at_break_character=False)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize( - test_value, test_label, force_split_at_break_character=False) - self.assertAllEqual(tokens, expected_tokens) - - def testVectorSingleValueTokenChinese(self): - # TODO(salcianu): clean-up. We used the Unicode string, but Windows may - # have problems with it, so we use the utf-8 bytes instead. - # - # test_value = constant_op.constant([_Utf8(u'我在谷歌 写代码')]) - test_value = constant_op.constant([ - b'\xe6\x88\x91\xe5\x9c\xa8\xe8\xb0\xb7\xe6\xad\x8c' - + b'\xe3\x80\x80\xe5\x86\x99\xe4\xbb\xa3\xe7\xa0\x81' - ]) - test_label = constant_op.constant([ - [ - # 我 - 0, - # 在 - 0, - # 谷歌 - 0, 1, - # ' ', note this is a full-width space that contains 3 bytes. - 0, - # 写代码 - 0, 1, 1 - ]]) - - # By default force_split_at_break_character is set True, so we start new - # tokens after break characters regardless of the SPLIT/MERGE label of the - # break character. - expected_tokens = [[ - _Utf8(u'我'), _Utf8(u'在'), _Utf8(u'谷歌'), _Utf8(u'写代码')]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - - # Extract tokens according to the returned starts, ends. - tokens_by_offsets = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - # Although force_split_at_break_character is set false we actually predict a - # SPLIT at '写', so we still start a new token: '写代码'. - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets( - test_value, test_label, force_split_at_break_character=False)) - self.assertAllEqual(tokens, expected_tokens) - - # Extract tokens according to the returned starts, ends. - tokens_by_offsets = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(expected_tokens, tokens_by_offsets) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize( - test_value, test_label, force_split_at_break_character=False) - self.assertAllEqual(tokens, expected_tokens) - - def testHigherRank(self): - # [2, 1] - test_value = constant_op.constant([[b'IloveFlume!'], - [b'and tensorflow']]) - test_label = constant_op.constant([ - [[ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0, - # paddings - 0, 0, 0 - ]], [[ - # and - 0, 1, 1, - # ' ' - 1, - # tensorflow - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 - ]]]) - expected_tokens = [[[b'I', b'love', b'Flume', b'!']], - [[b'and', b'tensorflow']]] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual([[[0, 1, 5, 10]], [[0, 4]]], starts) - self.assertAllEqual([[[1, 5, 10, 11]], [[3, 14]]], ends) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - def testVectorMultipleValue(self): - test_value = constant_op.constant([b'IloveFlume!', - b'and tensorflow']) - test_label = constant_op.constant([ - [ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0, - # paddings - 0, 0, 0 - ], [ - # and - 0, 1, 1, - # ' ' - 1, - # tensorflow - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 - ]]) - expected_tokens = [[b'I', b'love', b'Flume', b'!'], - [b'and', b'tensorflow']] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - tokens_by_offsets = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(tokens_by_offsets, expected_tokens) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - def testRaggedInput(self): - test_value = ragged_factory_ops.constant([ - [b'IloveFlume!', b'and tensorflow'], - [b'go raggedtensor'] - ]) - test_label = ragged_factory_ops.constant([ - [ - [ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0, - # paddings - 0, 0, 0 - ], [ - # and - 0, 1, 1, - # ' ' - 1, - # tensorflow - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 - ] - ], - [ - [ - # go - 0, 1, - # ' ' - 0, - # ragged - 0, 1, 1, 1, 1, 1, - # tensor - 0, 1, 1, 1, 1, 1, - ] - ]]) - expected_tokens = [ - [[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']], - [[b'go', b'ragged', b'tensor']] - ] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - tokens_by_offsets = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(tokens_by_offsets, expected_tokens) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - def testRaggedInputHigherRank(self): - test_value = ragged_factory_ops.constant([ - [[b'IloveFlume!', b'and tensorflow']], - [[b'go raggedtensor']] - ]) - test_label = ragged_factory_ops.constant([ - [ - [[ - # I - 0, - # love - 0, 1, 1, 1, - # Flume - 0, 1, 1, 1, 1, - # ! - 0, - # paddings - 0, 0, 0 - ], [ - # and - 0, 1, 1, - # ' ' - 1, - # tensorflow - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1 - ]] - ], - [ - [[ - # go - 0, 1, - # ' ' - 0, - # ragged - 0, 1, 1, 1, 1, 1, - # tensor - 0, 1, 1, 1, 1, 1, - ]] - ]]) - expected_tokens = [ - [[[b'I', b'love', b'Flume', b'!'], [b'and', b'tensorflow']]], - [[[b'go', b'ragged', b'tensor']]] - ] - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value, test_label)) - self.assertAllEqual(tokens, expected_tokens) - tokens_by_offsets = _RaggedSubstr(test_value, starts, ends) - self.assertAllEqual(tokens_by_offsets, expected_tokens) - - # Use the same arguments to test the tokenize() version, without offsets. - tokens = self.tokenizer.tokenize(test_value, test_label) - self.assertAllEqual(tokens, expected_tokens) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/splitter.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/splitter.py deleted file mode 100644 index 42019a16..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/splitter.py +++ /dev/null
@@ -1,119 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Abstract base classes for all splitters.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc - -from tensorflow.python.module import module - - -class Splitter(module.Module): - """An abstract base class for splitting text. - - A Splitter is a module that splits strings into pieces. Generally, the pieces - returned by a splitter correspond to substrings of the original string, and - can be encoded using either strings or integer ids (where integer ids could be - created by hashing strings or by looking them up in a fixed vocabulary table - that maps strings to ids). - - Each Splitter subclass must implement a `split` method, which subdivides - each string in an input Tensor into pieces. E.g.: - - >>> class SimpleSplitter(tf_text.Splitter): - ... def split(self, input): - ... return tf.strings.split(input) - >>> print(SimpleSplitter().split(["hello world", "this is a test"])) - <tf.RaggedTensor [[b'hello', b'world'], [b'this', b'is', b'a', b'test']]> - """ - - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def split(self, input): # pylint: disable=redefined-builtin - """Splits the input tensor into pieces. - - Generally, the pieces returned by a splitter correspond to substrings of the - original string, and can be encoded using either strings or integer ids. - - Example: - - >>> print(tf_text.WhitespaceTokenizer().split("small medium large")) - tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) - - Args: - input: An N-dimensional UTF-8 string (or optionally integer) `Tensor` or - `RaggedTensor`. - - Returns: - An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. - For each string from the input tensor, the final, extra dimension contains - the pieces that string was split into. - """ - raise NotImplementedError("Abstract method") - - -class SplitterWithOffsets(Splitter): - r"""An abstract base class for splitters that return offsets. - - Each SplitterWithOffsets subclass must implement the `split_with_offsets` - method, which returns a tuple containing both the pieces and the offsets where - those pieces occurred in the input string. E.g.: - - >>> class CharSplitter(SplitterWithOffsets): - ... def split_with_offsets(self, input): - ... chars, starts = tf.strings.unicode_split_with_offsets(input, 'UTF-8') - ... lengths = tf.expand_dims(tf.strings.length(input), -1) - ... ends = tf.concat([starts[..., 1:], tf.cast(lengths, tf.int64)], -1) - ... return chars, starts, ends - ... def split(self, input): - ... return self.split_with_offsets(input)[0] - >>> pieces, starts, ends = CharSplitter().split_with_offsets("a😊c") - >>> print(pieces.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'\xf0\x9f\x98\x8a' b'c'] [0 1 5] [1 5 6] - """ - - @abc.abstractmethod - def split_with_offsets(self, input): # pylint: disable=redefined-builtin - """Splits the input tensor, and returns the resulting pieces with offsets. - - Example: - - >>> splitter = tf_text.WhitespaceTokenizer() - >>> pieces, starts, ends = splitter.split_with_offsets("a bb ccc") - >>> print(pieces.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] - - Args: - input: An N-dimensional UTF-8 string (or optionally integer) `Tensor` or - `RaggedTensor`. - - Returns: - A tuple `(pieces, start_offsets, end_offsets)` where: - - * `pieces` is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. - * `start_offsets` is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the starting indices of each piece (byte - indices for input strings). - * `end_offsets` is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the exclusive ending indices of each piece - (byte indices for input strings). - """ - raise NotImplementedError("Abstract method")
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op.py deleted file mode 100644 index 15bace0d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op.py +++ /dev/null
@@ -1,145 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Break sentence ops.""" - -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import map_fn -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_state_based_sentence_breaker_op = load_library.load_op_library(resource_loader.get_path_to_datafile('_state_based_sentence_breaker_op.so')) -from tensorflow_text.python.ops import sentence_breaking_ops - - -class StateBasedSentenceBreaker(sentence_breaking_ops.SentenceBreakerWithOffsets - ): - """A `Splitter` that uses a state machine to determine sentence breaks. - - `StateBasedSentenceBreaker` splits text into sentences by using a state - machine to determine when a sequence of characters indicates a potential - sentence break. - - The state machine consists of an `initial state`, then transitions to a - `collecting terminal punctuation state` once an acronym, an emoticon, or - terminal punctuation (ellipsis, question mark, exclamation point, etc.), is - encountered. - - It transitions to the `collecting close punctuation state` when a close - punctuation (close bracket, end quote, etc.) is found. - - If non-punctuation is encountered in the collecting terminal punctuation or - collecting close punctuation states, then the state machine exits, returning - false, indicating it has moved past the end of a potential sentence fragment. - """ - - def break_sentences(self, doc): - """Splits `doc` into sentence fragments and returns the fragments' text. - - Args: - doc: A string `Tensor` of shape [batch] with a batch of documents. - - Returns: - results: A string `RaggedTensor` of shape [batch, (num_sentences)] - with each input broken up into its constituent sentence fragments. - - """ - results, _, _ = self.break_sentences_with_offsets(doc) - return results - - def break_sentences_with_offsets(self, doc): - """Splits `doc` into sentence fragments, returns text, start & end offsets. - - Example: - - ``` - 1 1 2 3 - 012345678901234 01234567890123456789012345678901234567 - doc: 'Hello...foo bar', 'Welcome to the U.S. don't be surprised' - - fragment_text: [ - ['Hello...', 'foo bar'], - ['Welcome to the U.S.' , 'don't be surprised'] - ] - start: [[0, 8],[0, 20]] - end: [[8, 15],[19, 38]] - ``` - - Args: - doc: A string `Tensor` of shape `[batch]` or `[batch, 1]`. - - Returns: - A tuple of `(fragment_text, start, end)` where: - - fragment_text: A string `RaggedTensor` of shape [batch, (num_sentences)] - with each input broken up into its constituent sentence fragments. - start: A int64 `RaggedTensor` of shape [batch, (num_sentences)] - where each entry is the inclusive beginning byte offset of a sentence. - end: A int64 `RaggedTensor` of shape [batch, (num_sentences)] - where each entry is the exclusive ending byte offset of a sentence. - """ - doc = ragged_tensor.convert_to_tensor_or_ragged_tensor(doc) - - if doc.shape.ndims > 1: - if not ragged_tensor.is_ragged(doc): - doc = ragged_tensor.RaggedTensor.from_tensor(doc) - doc_flat = doc.flat_values - else: - doc_flat = doc - - # Run sentence fragmenter op v2 - fragment = gen_state_based_sentence_breaker_op.sentence_fragments_v2( - doc_flat) - start, end, properties, terminal_punc_token, row_lengths = fragment - - # Pack and create `RaggedTensor`s - start, end, properties, terminal_punc_token = tuple( - ragged_tensor.RaggedTensor.from_row_lengths(value, row_lengths) - for value in [start, end, properties, terminal_punc_token]) - - # Helper for use within map_fn (function must only take in one argument) - def _substring(x): - s, pos, length = x - return string_ops.substr(s, pos, length) - - # Extract fragment text using offsets - fragment_text = map_fn.map_fn( - _substring, (doc_flat, start, math_ops.subtract(end, start)), - fn_output_signature=ragged_tensor.RaggedTensorSpec( - shape=[None], dtype=dtypes.string), - infer_shape=False, - dtype=dtypes.string) - - # Repack back into original shape (if necessary) - if doc.shape.ndims == 1: - return fragment_text, start, end - - if ragged_tensor.is_ragged(doc): - fragment_text = ragged_tensor.RaggedTensor.from_row_lengths( - fragment_text, doc.row_lengths()) - start = ragged_tensor.RaggedTensor.from_row_lengths( - start, doc.row_lengths()) - end = ragged_tensor.RaggedTensor.from_row_lengths(end, doc.row_lengths()) - return fragment_text, start, end - else: - fragment_text = ragged_tensor.RaggedTensor.from_uniform_row_length( - fragment_text, doc.shape[-1]) - start = ragged_tensor.RaggedTensor.from_uniform_row_length( - start, doc.shape[-1]) - end = ragged_tensor.RaggedTensor.from_uniform_row_length( - end, doc.shape[-1]) - return fragment_text, start, end
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op_test.py deleted file mode 100644 index bea7e6cf..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/state_based_sentence_breaker_op_test.py +++ /dev/null
@@ -1,145 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for sentence_breaking_ops.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import state_based_sentence_breaker_op - - -class SentenceFragmenterTestCasesV2(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - # pyformat: disable - dict( - test_description="Test acronyms", - doc=["Welcome to the U.S. don't be surprised."], - expected_fragment_text=[ - [b"Welcome to the U.S.", b"don't be surprised."] - ], - ), - dict( - test_description="Test batch containing acronyms", - doc=["Welcome to the U.S. don't be surprised.", "I.B.M. yo"], - expected_fragment_text=[ - [b"Welcome to the U.S.", b"don't be surprised."], - [b"I.B.M.", b"yo"] - ], - ), - dict( - test_description="Test when rank > 1.", - doc=[["Welcome to the U.S. don't be surprised."], ["I.B.M. yo"]], - expected_fragment_text=[ - [[b"Welcome to the U.S.", b"don't be surprised."]], - [[b"I.B.M.", b"yo"]] - ], - ), - dict( - test_description="Test semicolons", - doc=["Welcome to the US; don't be surprised."], - expected_fragment_text=[[b"Welcome to the US; don't be surprised."]], - ), - dict( - test_description="Basic test", - doc=["Hello. Foo bar!"], - expected_fragment_text=[[b"Hello.", b"Foo bar!"]], - ), - dict( - test_description="Basic ellipsis test", - doc=["Hello...foo bar"], - expected_fragment_text=[[b"Hello...", b"foo bar"]], - ), - dict( - test_description="Parentheses and ellipsis test", - doc=["Hello (who are you...) foo bar"], - expected_fragment_text=[[b"Hello (who are you...)", b"foo bar"]], - ), - dict( - test_description="Punctuation after parentheses test", - doc=["Hello (who are you)? Foo bar!"], - expected_fragment_text=[[b"Hello (who are you)?", b"Foo bar!"]], - ), - dict( - test_description="MidFragment Parentheses test", - doc=["Hello (who are you) world? Foo bar"], - expected_fragment_text=[[b"Hello (who are you) world?", b"Foo bar"]], - ), - dict( - test_description="Many final punctuation test", - doc=["Hello!!!!! Who are you??"], - expected_fragment_text=[[b"Hello!!!!!", b"Who are you??"]], - ), - dict( - test_description="Test emoticons within text", - doc=["Hello world :) Oh, hi :-O"], - expected_fragment_text=[[b"Hello world :)", b"Oh, hi :-O"]], - ), - dict( - test_description="Test emoticons with punctuation following", - doc=["Hello world :)! Hi."], - expected_fragment_text=[[b"Hello world :)!", b"Hi."]], - ), - dict( - test_description="Test emoticon list", - doc=[b":) :-\\ (=^..^=) |-O"], - expected_fragment_text=[[b":)", b":-\\", b"(=^..^=)", b"|-O"]], - ), - dict( - test_description="Test emoticon batch", - doc=[":)", ":-\\", "(=^..^=)", "|-O"], - expected_fragment_text=[[b":)"], [b":-\\"], [b"(=^..^=)"], [b"|-O"]], - ), - dict( - test_description="Test tensor inputs w/ shape [2, 1]", - doc=[["Welcome to the U.S. don't be surprised. We like it here."], - ["I.B.M. yo"]], - expected_fragment_text=[ - [[b"Welcome to the U.S.", b"don't be surprised.", - b"We like it here."]], - [[b"I.B.M.", b"yo"]] - ], - ), - # pyformat: enable - ]) - def testStateBasedSentenceBreaker(self, test_description, doc, - expected_fragment_text): - input = constant_op.constant(doc) # pylint: disable=redefined-builtin - sentence_breaker = ( - state_based_sentence_breaker_op.StateBasedSentenceBreaker()) - fragment_text, fragment_starts, fragment_ends = ( - sentence_breaker.break_sentences_with_offsets(input)) - - texts, starts, ends = self.evaluate( - (fragment_text, fragment_starts, fragment_ends)) - self.assertAllEqual(expected_fragment_text, fragment_text) - for d, text, start, end in zip(doc, texts.to_list(), starts.to_list(), - ends.to_list()): - # broadcast d to match start/end's shape - start = constant_op.constant(start) - end = constant_op.constant(end) - d = array_ops.broadcast_to(d, start.shape) - self.assertAllEqual(string_ops.substr(d, start, end - start), text) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/string_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/string_ops.py deleted file mode 100644 index 82278b7..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/string_ops.py +++ /dev/null
@@ -1,71 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tensorflow operations for UTF8 strings.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.ops import string_ops - - -def _unichr(codepoint): - try: - return unichr(codepoint) - except NameError: - return chr(codepoint) - - -# pylint: disable=redefined-builtin -def coerce_to_structurally_valid_utf8(input, - replacement_char=_unichr(65533), - name=None): - r"""Coerce UTF-8 input strings to structurally valid UTF-8. - - Any bytes which cause the input string to be invalid UTF-8 are substituted - with the provided replacement character codepoint (default 65533). If you plan - on overriding the default, use a single byte replacement character codepoint - to preserve alignment to the source input string. - - In this example, the character \xDEB2 is an invalid UTF-8 bit sequence; the - call to `coerce_to_structurally_valid_utf8` replaces it with \xef\xbf\xbd, - which is the default replacement character encoding. - >>> input_data = ["A", b"\xDEB2", "C"] - >>> coerce_to_structurally_valid_utf8(input_data) - <tf.Tensor: shape=(3,), dtype=string, - numpy=array([b'A', b'\xef\xbf\xbdB2', b'C'], dtype=object)> - - Args: - input: UTF-8 string tensor to coerce to valid UTF-8. - replacement_char: The replacement character to be used in place of any - invalid byte in the input. Any valid Unicode character may be used. The - default value is the default Unicode replacement character which is - 0xFFFD (or U+65533). Note that passing a replacement character - expressible in 1 byte, such as ' ' or '?', will preserve string - alignment to the source since individual invalid bytes will be replaced - with a 1-byte replacement. (optional) - name: A name for the operation (optional). - - Returns: - A tensor of type string with the same shape as the input. - """ - return string_ops.unicode_transcode( - input, - input_encoding='UTF-8', - output_encoding='UTF-8', - errors='replace', - replacement_char=ord(replacement_char), - name=name)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_README.google.txt b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_README.google.txt deleted file mode 100644 index 5773147..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_README.google.txt +++ /dev/null
@@ -1,28 +0,0 @@ -Below are the steps to generate the 'fast_wordpiece_tokenizer_model.fb' file -that is used in -third_party/tensorflow_text/google/core/kernels/fast_wordpiece_tokenizer_test.cc: - -(1) Create a vocab file '/tmp/test_vocab.txt' with the following content: -```a -abc -abcdefghi -##de -##defgxy -##deh -##f -##ghz -<unk>``` - -(2) Run the following command: -``` -blaze run \ - third_party/tensorflow_text/google/tools:build_fast_wordpiece_model \ - -- --vocab_file=/tmp/test_vocab.txt --max_bytes_per_token 100 \ - --suffix_indicator="##" --unk_token="<unk>" \ - --output_model_file=/tmp/fast_wordpiece_tokenizer_model.fb -``` - -(3) Copy /tmp/fast_wordpiece_tokenizer_model.fb to -third_party/tensorflow_text/google/core/kernels/testdata/. - -
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb deleted file mode 100644 index 079f6cb..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/fast_wordpiece_tokenizer_model.fb +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/saved_model.pb b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/saved_model.pb deleted file mode 100644 index 625cd08..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/saved_model.pb +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/tfhub_module.pb b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/tfhub_module.pb deleted file mode 100644 index d65dd8f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/tfhub_module.pb +++ /dev/null
@@ -1 +0,0 @@ - \ No newline at end of file
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.data-00000-of-00001 b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.data-00000-of-00001 deleted file mode 100644 index 105c506..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.data-00000-of-00001 +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.index b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.index deleted file mode 100644 index 29282b8..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/segmenter_hub_module/variables/variables.index +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_oss_model.model b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_oss_model.model deleted file mode 100644 index 20c1a06..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_oss_model.model +++ /dev/null Binary files differ
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_wp_en_vocab.txt b/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_wp_en_vocab.txt deleted file mode 100644 index 8421b27..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/test_data/test_wp_en_vocab.txt +++ /dev/null
@@ -1,7010 +0,0 @@ -[PAD] -[UNK] -[START] -[END] -! -# -$ -% -& -' -( -) -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -= -? -@ -[ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -ย -ร -อ -– -— -’ -♪ -♫ -the -and -to -of -that -it -in -we -you -is -this -so -they -was -for -are -but -##s -have -what -do -on -with -can -about -there -be -not -my -as -all -at -one -people -re -like -if -our -from -now -an -just -or -me -he -these -when -by -how -because -more -out -very -them -see -had -would -their -were -up -going -know -think -us -your -who -here -##ing -time -really -get -ve -world -has -could -then -some -which -did -actually -where -way -will -no -other -into -well -want -##ed -years -make -been -those -go -two -also -things -first -right -much -than -even -something -new -she -look -laughter -##d -only -many -need -little -life -take -his -let -over -##ly -applause -most -got -why -back -thing -work -does -said -every -lot -different -around -thank -say -day -good -her -through -today -same -down -come -use -year -percent -kind -ll -ca -three -called -made -after -being -change -tell -any -human -##er -find -talk -##e -own -started -doing -should -still -idea -fact -together -put -##y -better -might -never -before -another -each -its -great -problem -last -example -went -system -course -big -##al -part -##r -give -start -next -too -him -able -few -off -brain -story -##n -000 -##t -important -again -long -thought -used -school -place -women -found -understand -##es -show -between -maybe -mean -data -ago -came -technology -point -question -bit -old -children -information -real -always -everything -help -live -love -##a -end -means -call -wanted -10 -feel -ever -away -country -number -person -home -space -done -water -power -looking -believe -social -future -may -million -times -imagine -using -create -four -small -five -without -become -second -best -less -money -left -am -working -science -##ers -comes -lives -whole -city -learn -thinking -trying -talking -ask -energy -kids -making -took -across -high -days -food -getting -such -told -hard -try -body -family -moment -health -man -music -okay -happen -interesting -light -##o -global -happened -young -makes -enough -pretty -video -almost -case -hand -side -##i -sort -building -ways -##l -simple -half -countries -yet -build -often -project -saw -sense -myself -care -inside -quite -later -matter -##able -asked -room -once -friends -probably -else -open -while -experience -already -happens -remember -both -far -self -public -computer -having -move -wrong -##ness -answer -goes -internet -living -words -car -education -coming -reason -until -anything -stop -community -dollars -level -looked -##ation -20 -age -design -set -states -amazing -men -billion -face -nothing -business -picture -##h -read -someone -earth -exactly -seen -possible -under -society -keep -learned -works -bad -process -art -students -became -everybody -whether -history -true -yes -stuff -within -##ic -##k -##on -mind -questions -sometimes -language -single -stories -control -sure -black -job -knew -though -africa -completely -cells -instead -others -since -##m -built -everyone -learning -months -must -child -says -six -united -changed -heard -war -hope -group -mother -run -form -large -share -state -oh -book -past -play -universe -##in -places -sound -##ment -basically -decided -free -government -order -research -##ity -looks -night -news -taking -reality -turns -bring -company -30 -turn -100 -against -saying -top -##ive -itself -easy -gets -middle -natural -couple -lots -takes -##rs -companies -model -nature -ourselves -50 -early -name -problems -woman -study -yeah -cars -century -cities -kinds -planet -species -word -disease -cancer -dna -hear -##th -based -head -huge -powerful -size -themselves -thousands -beautiful -created -difficult -felt -line -piece -##p -air -hours -##le -finally -image -trust -##an -ideas -##ry -##ting -happening -voice -worked -ones -leave -lost -eight -fear -somebody -american -cost -perhaps -behind -environment -guy -##ted -gave -needs -particular -##us -america -outside -value -##ion -full -rather -economic -per -scale -third -longer -low -needed -parents -violence -beginning -eyes -heart -physical -##g -##ized -difference -father -complex -entire -week -least -machine -friend -challenge -economy -numbers -test -blood -china -given -known -met -along -during -happy -pay -simply -york -15 -##nt -front -key -seven -step -technologies -yourself -##b -##ies -population -##or -cell -media -##ate -culture -field -green -realized -anyone -humans -minutes -seeing -knowledge -network -systems -alone -taken -##ally -##less -animals -began -figure -god -speak -tools -write -behavior -born -clear -close -house -incredible -phone -teachers -##ian -images -individual -amount -deal -normal -absolutely -books -hundreds -political -turned -watch -ted -walk -local -grow -opportunity -road -tried -white -feeling -street -##et -access -either -poor -wall -##ism -##ist -carbon -hands -lived -nuclear -online -team -##ter -##tion -industry -law -death -ground -growth -oil -personal -red -rest -certain -dark -europe -millions -rate -stay -##st -common -digital -eat -girl -type -view -whatever -risk -scientists -terms -weeks -realize -support -##c -act -climate -growing -neurons -patients -seems -teach -##u -audience -india -sounds -structure -telling -tiny -wonderful -##ts -changes -developed -present -recently -solution -stand -allow -fast -gone -guys -parts -similar -starting -dream -short -changing -class -interested -kid -map -quickly -understanding -##ling -brought -drive -english -google -hold -modern -moving -spend -sun -##ble -indeed -message -paper -##en -##est -##ize -##ous -40 -animal -code -color -everywhere -lab -national -explain -girls -ok -schools -solve -university -##ful -##ia -ability -area -land -mental -result -save -shows -theory -truth -web -##ure -average -biggest -buy -journey -areas -cool -crazy -eventually -forward -giving -hit -impact -incredibly -literally -material -response -sex -asking -available -computers -development -hundred -security -##it -issue -revolution -societies -stage -wrote -##ar -##ary -center -meet -miles -obviously -south -term -writing -##ial -choice -creating -discovered -sitting -##ine -cause -deep -groups -nobody -ready -several -situation -starts -beyond -buildings -cut -democracy -especially -plant -running -worth -action -baby -higher -hour -likely -major -nine -perfect -role -sea -add -begin -continue -die -finding -patient -reasons -send -source -spent -##man -attention -bottom -developing -device -seem -showed -bigger -box -dead -experiment -market -nice -object -pain -product -##re -##se -##ty -chinese -connected -loved -mom -networks -produce -solar -surface -teacher -wo -25 -eye -institutions -special -##ant -60 -becomes -east -effect -further -genes -guess -movement -suddenly -blue -fun -gives -knows -older -rules -son -choose -consider -extraordinary -focus -generation -killed -patterns -putting -scientific -wait -##0 -##ated -##ce -##te -families -freedom -innovation -math -medical -soon -program -safe -shared -traditional -wife -##age -##ge -##um -crisis -gas -medicine -notice -pictures -showing -college -died -force -largest -leaders -models -perspective -pick -training -truly -##4 -among -approach -blind -electricity -faster -grew -meant -park -potential -resources -results -sit -wish -##is -##ping -##ve -##z -anybody -anymore -communities -decision -exciting -favorite -fly -led -mass -measure -moved -smart -student -teaching -totally -towards -written -chance -creative -designed -evidence -fight -however -larger -late -minute -plan -politics -quality -speed -spread -thousand -##ent -##land -break -examples -follow -lead -listen -materials -please -relationships -somewhere -strong -success -##as -brains -certainly -doctor -effective -game -general -hospital -individuals -month -morning -reach -talked -thanks -unique -##7 -##king -african -conversation -governments -issues -malaria -ocean -particularly -poverty -rights -successful -taught -visual -wonder -biology -forest -physics -provide -relationship -##ped -##red -allowed -architecture -drug -film -mobile -near -shape -speaking -worse -11 -12 -driving -fire -humanity -military -museum -page -pieces -wants -##6 -##8 -##ns -basic -exist -fall -office -police -subject -travel -##ch -##el -##ors -200 -90 -actual -boy -clearly -cold -costs -expect -facebook -feet -main -non -organization -reading -##ating -##ding -##f -##ging -##ll -becoming -capital -dance -diseases -increase -policy -song -standing -supposed -walking -##1 -##ists -18 -allows -attack -camera -compassion -complicated -develop -direction -expensive -lines -listening -onto -path -photos -range -##2 -##3 -##at -##na -##nce -##ual -alive -cases -connection -decades -door -drugs -lower -peace -practice -recognize -usually -##ke -##ne -##ring -80 -above -double -essentially -experiences -mine -remarkable -sent -smaller -sorts -tool -understood -west -17 -bodies -degrees -devices -improve -involved -list -points -protect -site -##led -##line -artist -brand -fish -following -fundamental -helped -international -kept -matters -record -shown -tells -trees -village -vision -##ance -##ck -##il -##ization -ancient -anyway -clean -colleagues -gps -himself -hole -including -insects -interest -jobs -lose -screen -skin -surgery -tend -treatment -wind -##ged -##ra -central -challenges -closer -critical -dangerous -familiar -female -genome -glass -highly -nearly -north -pattern -position -private -products -robot -zero -##ious -##w -500 -bank -complete -concept -doctors -evolution -financial -impossible -income -mr -somehow -specific -watching -workers -bed -bees -block -context -function -greatest -invisible -magic -massive -passwords -price -production -religion -silk -speech -square -table -text -trade -victims -##ks -##ma -##tor -coal -fuel -goal -hey -knowing -memory -paid -post -saving -sharing -unfortunately -##9 -##ving -campaign -citizens -daughter -emotional -era -fix -held -passion -photo -raise -recent -serious -slow -software -streets -terrible -waiting -##izing -##x -13 -70 -board -bringing -dad -engage -excited -fine -hot -immediately -letter -molecules -objects -projects -ran -secret -useful -weight -##ct -##ible -##ile -##ton -brings -communicate -farmers -genetic -hair -meaning -members -moon -obvious -painting -prime -quantum -return -service -services -shift -tv -##nd -##ration -afghanistan -ahead -beings -conditions -earlier -epidemic -floor -forms -gene -greater -investment -king -noise -president -pressure -region -star -stars -stopped -apply -bacteria -chris -classroom -communication -cultural -decade -easier -failure -focused -forget -ice -kill -markets -otherwise -rain -rise -solutions -straight -strange -survive -traffic -urban -##ie -##les -##per -300 -degree -experiments -extremely -mechanical -noticed -paint -playing -rich -sat -search -seemed -sky -therefore -worry -##id -answers -bill -connect -effects -fashion -healthy -series -sorry -surprising -sustainable -throughout -win -##os -address -cheap -check -civil -complexity -conflict -engine -equal -event -flow -infrastructure -laws -lesson -mathematics -particles -tissue -touch -although -broken -creativity -dimensional -directly -european -except -extreme -fully -husband -lie -machines -positive -risks -scientist -tree -visit -##cy -##ds -##ities -##ning -##ro -##ue -24 -anywhere -blocks -brother -bunch -designers -despite -easily -email -feed -feels -fit -higgs -legs -loss -multiple -pass -period -pull -push -stress -##ence -##me -##ments -##ps -##up -##way -apart -aware -beauty -begins -bread -bridge -career -decide -differently -draw -driver -edge -famous -heat -holding -iran -protection -sign -skills -sleep -slightly -spider -studies -target -total -town -train -trial -western -##hip -##ls -##ny -##ters -afford -afraid -artists -believed -below -birds -birth -capacity -content -elements -feelings -forever -helping -holes -illness -importantly -industrial -john -justice -kilometers -meters -mostly -named -original -plants -popular -responsibility -river -shot -spaces -types -upon -waste -##ans -##de -affect -affected -army -ball -bang -click -depression -ended -everyday -invented -necessary -nervous -possibly -press -prison -related -seconds -sight -store -surprise -turning -uses -won -##cing -##om -##ur -##zing -basis -current -decisions -dots -engineering -evolved -feedback -forth -instance -israel -languages -levels -method -microbes -none -opposite -performance -robots -sand -studied -temperature -twitter -values -version -virtual -walked -whose -##bs -##sing -##tic -california -cards -chemistry -choices -consciousness -core -count -creates -drop -eating -electric -final -happiness -intelligence -joy -leaves -letters -random -walls -willing -##ative -##ets -##ey -##ned -##ries -##ular -16 -accept -agree -arab -benefits -british -cross -daily -desire -discover -dog -dollar -efficient -enormous -gender -hearing -inspired -lack -leading -london -majority -messages -moral -nation -oxytocin -professor -raised -respect -san -sentence -sister -structures -track -transform -ultimately -vote -wide -worst -##ish -##istic -##sion -##uring -brazil -caught -collective -connections -curious -differences -distance -due -economics -essential -extra -fantastic -harder -imagination -lying -minds -organizations -password -progress -prove -received -religious -saved -sell -slowly -soil -stuck -summer -twice -whom -##5 -##ins -##ium -##rate -##ut -americans -applications -avoid -carry -chain -cognitive -describe -described -effectively -einstein -em -error -etc -facts -fail -fat -forces -gap -graph -landscape -manage -managed -motion -passed -purpose -quick -radio -report -respond -sequence -spending -standard -super -tough -treat -trillion -various -voices -watched -website -window -##ens -##ier -##sh -##tes -14 -3d -active -arm -babies -cheaper -compared -details -dry -dung -experienced -explore -fascinating -foundation -ignorance -join -moments -operating -played -predict -privacy -secure -silence -suffering -task -tons -trained -valuable -vast -##ction -##lin -20th -accident -according -aging -apple -chemical -citizen -corner -cure -damage -deliver -documents -effort -failed -fairly -foreign -identity -influence -lights -neighborhood -normally -race -release -rock -rule -safety -sick -sides -signal -spirit -underneath -vehicle -##ants -##ations -##ite -##ler -##ner -##tle -2008 -awesome -behaviors -billions -biological -bits -bubble -calling -carefully -causes -charge -compare -copyright -definition -discovery -economies -entropy -faces -factors -faith -fake -gotten -grade -gut -heads -immune -iraq -male -marketing -meat -mountain -narrative -opportunities -peer -phenomenon -phones -plus -produced -rural -savings -sexual -stick -suicide -terrorism -vaccines -virus -wealth -##ad -##nts -##out -##rating -##ulate -##ves -activity -arms -atmosphere -belief -canada -civilization -cloud -condition -currently -factory -flu -followed -france -galaxies -giant -gold -hate -institution -japan -kenya -launched -lucky -mars -mathematical -molecule -mosquitos -photograph -poetry -principles -published -roll -struggle -teeth -##ably -##ked -##ot -##pe -##rt -##son -aid -balance -bought -brown -calls -cat -characters -childhood -chip -colors -date -deeply -enjoy -false -french -hiv -limited -match -missing -mission -moves -net -novel -opened -pages -papers -party -patent -politicians -programs -religions -reputation -scratch -sites -strangers -surprised -weather -##ified -##med -##tive -2010 -asia -bear -blah -britain -caused -character -circle -collect -depends -destruction -direct -dr -egypt -election -entirely -events -fair -fewer -forced -gdp -germany -gift -homes -layer -leaving -mark -meeting -mouse -movie -paris -possibilities -possibility -prize -radiation -roads -scared -september -shame -signals -slide -threat -toilet -tumor -unit -videos -waves -##da -##ical -##ology -account -artificial -background -beat -brilliant -closed -conference -contact -cooking -creation -cyrus -dimensions -disability -domestic -dot -dying -elephant -employees -empty -hospitals -illegal -invest -lies -nations -passionate -platform -pop -protein -recorded -separate -songs -studying -survey -testing -trip -universal -violent -worldwide -##by -##dy -##ics -##ms -##one -##que -##ric -##tions -##to -##ver -advanced -amazon -applied -axis -boys -broke -centers -considered -conversations -correct -de -dealing -dinner -dreams -driven -elections -facing -fighting -funny -grandfather -grandmother -helps -hidden -honest -hum -identical -keeps -labor -mexico -names -offer -ordinary -outcomes -planets -professional -profound -proteins -reaction -remain -significant -smell -sold -stood -suffer -supply -switch -typical -unless -versus -younger -##am -##ft -##ram -##sed -##tors -##v -actions -argue -australia -capable -capture -constantly -cover -efficiency -emotions -ends -engineers -environmental -fell -filled -fundamentally -generations -hopefully -increasingly -insight -interact -notion -partner -previous -proud -shoes -soft -survival -talks -tomorrow -transportation -viruses -wearing -youtube -##ak -##ay -##des -##place -##sts -##time -##unk -150 -400 -acting -advice -awareness -bright -card -concrete -convinced -crispr -debate -deeper -doubt -ears -expected -explained -expression -farm -foot -grown -hi -hotel -leader -mentioned -mouth -mystery -rare -repeat -requires -responsible -rooms -scene -seriously -sets -ship -signs -silent -statistics -therapy -thinks -tracking -treated -variation -wow -yellow -##ber -##cent -##cted -##ians -##ions -##ose -##pt -##ta -##tain -##ud -##ulated -##ward -##ze -2009 -21st -accurate -added -adults -advantage -anger -application -argument -battle -benefit -catch -comfortable -constant -court -curve -danger -demand -designing -disorders -diversity -experts -exploration -fellow -fossil -fourth -garden -goods -haiti -honor -laboratory -leads -legal -loud -manufacturing -maps -member -mile -organized -perfectly -personally -practical -promise -relevant -repair -represent -require -serve -steps -strength -struck -sudden -suggest -sum -ten -tens -trouble -vulnerable -windows -##ability -##ir -##its -##low -##ri -##ten -##ures -2011 -75 -abuse -achieve -agriculture -atoms -bomb -bone -businesses -closely -consequences -deaths -democratic -disappear -disaster -educational -emissions -fibers -gain -generally -gray -hell -interests -jump -killing -march -mistake -mistakes -naturally -necessarily -oceans -pig -pleasure -policies -properties -pulled -quiet -ride -sanitation -sending -shapes -soul -statement -station -volume -wave -welcome -wondering -##ard -##bility -##car -##ery -##go -##hed -##ip -##ished -##lo -##ons -##rable -##rd -##ses -##sive -##sm -##ute -coffee -connectome -cortex -desert -ear -element -empathy -equality -equivalent -features -fields -figured -forests -frame -hall -hide -highest -increased -inner -jealousy -latin -lay -losing -magazine -management -marriage -married -meaningful -medium -mice -options -organ -principle -property -providing -remind -remote -sample -section -sees -sentences -solving -southern -specifically -stayed -stem -stronger -technical -techniques -transformation -transition -unlike -washington -wear -worried -##ah -##ama -##bly -##board -##ify -##ings -##ka -##light -##my -##ural -adding -aids -alzheimer -associated -base -beach -bet -bg -bitcoin -church -conscious -construction -consumers -contrast -curiosity -detect -distributed -drawing -ecosystem -embrace -equally -exact -existence -faced -generate -inspiration -introduce -memories -molecular -mortality -needle -neighbors -obsessed -opening -operate -particle -physically -potentially -processing -reward -richard -roughly -searching -shadow -shipping -solved -stone -strategy -thoughts -toward -traveling -trend -user -visible -voting -weapon -##als -##bed -##bia -##bit -##bling -##der -##ess -##her -##inal -##ncy -##ol -##ron -##val -affordable -amongst -app -challenging -channel -chose -classrooms -collection -consumer -consumption -copy -council -covered -department -drove -emerging -engaged -entrepreneurs -exchange -explanation -fiction -finished -formed -fought -grateful -gravity -humanitarian -hypothesis -indian -interactive -invited -lessons -los -microscope -organs -ours -perception -processes -puts -puzzle -reduce -resource -row -sad -score -shaped -spring -television -topic -tradition -ultimate -vaccine -variety -wanting -weapons -wild -wk -##ee -##ky -##la -##ming -##unt -addition -admit -ages -algorithm -alternative -analysis -anderson -anti -attacks -babbage -beetle -border -boxes -cable -chart -circumstances -claim -clinical -combine -commercial -cook -credit -crying -cutting -detail -dialogue -distribution -earthquake -elderly -electronics -exercise -finish -fund -galaxy -games -handle -heroes -innovative -interaction -interview -january -joined -link -mit -negative -opinion -pakistan -picked -planning -pounds -print -prostate -rates -replace -responses -round -samples -seat -skeleton -sources -staff -standards -status -surely -surgeon -technique -tied -tom -tonight -traveled -users -wake -wheelchair -zone -##ade -##ded -##ering -##ient -##ily -##im -##ise -##ivity -##ld -##men -##uck -##ulation -##va -19th -accessible -afternoon -angeles -anonymous -arrived -bird -breakfast -commitment -conflicts -crime -crucial -customers -deception -define -dense -disorder -dynamic -expand -flying -fuels -functions -gates -graduate -grows -gun -harvard -hits -implications -importance -infinite -input -interactions -internal -island -jail -james -lady -launch -lecture -lifetime -milk -nobel -overall -partners -personality -phrase -poorest -prosperity -radical -regular -released -required -returned -runs -scan -selling -sir -spiders -spreading -taste -technological -tissues -truck -worker -##ars -##ast -##fe -##gs -##ire -##ney -##rated -##rative -##sis -2000 -absolute -aim -badly -barely -bee -birthday -bones -borders -busy -camp -coast -collected -confident -continent -contract -convince -counter -creatures -cylinder -destroyed -detection -discussion -document -dropped -essence -exists -francisco -george -globe -host -hurt -ii -include -industries -iphone -judge -leather -library -maintain -master -mechanism -nerves -nor -note -nowhere -offered -optimistic -option -oxygen -painful -paintings -pair -participate -peers -perform -plans -pm -practices -presented -printing -quote -remains -researchers -review -rid -saudi -scary -sector -signed -spectrum -ss -succeed -suggests -tested -transparency -tremendous -trick -uncle -units -usual -victim -wars -zoom -##ator -##ca -##day -##ell -##ger -##hood -##ition -##ners -##oid -##ow -##pers -##sa -##ulating -21 -22 -27 -academy -accepted -agreed -ai -al -algorithms -aspects -bag -careful -colleague -committed -competition -concerned -courage -cultures -drinking -drives -ecosystems -emergency -engaging -england -enter -equipment -evening -expansion -fabric -figures -friday -gather -german -ghana -historical -hoping -houses -illusion -improvement -intelligent -june -keeping -leadership -location -loop -measured -missed -mothers -neuron -observe -occur -operation -paying -placed -pocket -pre -rapidly -recover -relatively -rely -represents -rice -setting -severe -soup -stands -stored -sub -sweet -symbol -symptoms -throw -transport -union -vehicles -views -weird -wondered -##ality -##aries -##ators -##ched -##end -##ibility -##ically -##ign -##io -##nation -##ological -##ologist -##ored -##ory -##ray -##ria -##rn -##tation -##tter -##uate -##ung -##ured -##ved -##zy -2012 -45 -additional -adult -aspect -battery -bicycle -bind -blockchain -capitalism -carried -clarity -classic -clever -combination -comet -computing -confidence -contribute -crops -cycle -dancing -determine -diagnosed -dignity -dozen -drawn -environments -expertise -express -fixed -fortunately -grand -heavy -holds -horse -hungry -identify -ignore -inevitable -infection -injustice -inspire -intellectual -interacting -introduced -largely -layers -limits -literature -mention -metaphor -micro -movies -neighborhoods -occasionally -odd -offices -organic -percentage -phase -poem -populations -posted -precisely -primary -productivity -provided -quarter -rats -sophisticated -speakers -steel -style -taliban -teenagers -thomas -treatments -uncomfortable -wisdom -yours -##ages -##atory -##fi -##ges -##let -##ography -##ome -##rine -##rts -##ss -##ture -##ug -##ule -##use -2007 -35 -allowing -angry -apartment -assume -awful -boat -burning -button -centuries -clock -co2 -coach -coke -collapse -colony -contrary -crowd -david -delivered -deserve -designer -director -eastern -electronic -exposed -falling -falls -figuring -flexible -flowers -fold -fresh -globalization -globally -goals -granted -increasing -initial -instant -insurance -interface -islands -latest -leg -leonardo -linked -literacy -mad -methods -miss -mustache -nodes -organize -plenty -polio -precision -prevent -prisoners -raw -remove -removed -reverse -reviews -ring -rocket -roof -secondly -seeds -served -spinal -staying -swimming -telescope -universes -warming -wherever -writer -##ain -##atic -##etic -##ff -##ha -##ice -##iest -##ig -##iles -##lands -##load -##logy -##nia -##no -##ond -##oo -##point -##posing -##py -##rging -##room -##shed -##side -##stic -##ties -##uation -##uing -##uous -##ways -##wn -19 -2001 -2050 -appreciate -aside -audio -cameras -capita -category -celebrate -cents -clip -clothes -collaborative -controlled -convey -courses -cup -cybercriminals -debt -definitely -drill -duck -edi -enable -encourage -enemy -existing -explaining -exploring -factor -farming -fascinated -flower -frankly -frozen -grab -grave -harm -height -instructions -investors -involves -joke -journalist -leap -length -liked -liquid -logic -manhattan -measuring -mid -movember -musical -native -neither -newspaper -notes -output -pairs -plastic -prepare -prepared -raising -rarely -reconstruct -restaurant -retina -retirement -rising -seek -shelter -spot -string -subjects -tap -theater -trials -tricks -trucks -uncertainty -van -whenever -whereas -##aging -##aking -##ale -##ana -##ase -##bi -##co -##een -##els -##ency -##fully -##fy -##house -##ide -##imize -##ina -##ined -##ites -##ives -##ley -##od -##ously -##rant -##try -##ution -##vation -##zed -##zi -250 -800 -amounts -ancestors -apparently -attempt -autism -bell -bow -breath -burden -cambridge -chicago -club -concepts -conclusion -congestion -crash -currency -damaged -deadly -delivery -diverse -dramatic -drivers -ecology -efforts -embedded -engineer -equation -existed -expanding -expert -fifth -fill -flight -floating -focusing -folks -frank -functional -funding -generated -genius -healthcare -horrible -increases -instantly -institute -jeopardy -ladies -lens -loves -metal -meter -mixed -motor -mt -nanopatch -neural -oyster -parties -penis -permission -physician -pink -plays -poet -recognition -refugees -relate -reminded -renewable -reveal -roots -sales -season -seeking -singing -stanford -static -struggling -stupid -subway -suggesting -sustainability -swim -ta -tape -tech -tesla -theme -transfer -translate -trends -underwater -universities -voters -walks -warm -youth -##acy -##are -##ates -##ave -##bing -##cus -##dle -##ered -##ility -##ively -##nate -##ntial -##ox -##pel -##rey -##ris -##tte -##uit -##work -2005 -23 -600 -achieved -activist -activities -agenda -airplane -anatomy -angle -arts -association -bar -baseball -bats -beetles -behave -believing -blow -breaking -broad -bus -bush -calculating -carrying -cash -chaos -chips -climb -co -com -compete -concern -coral -corruption -cry -cute -defense -destroy -divided -doodling -drink -emotion -empowering -epidemics -errors -expressions -extract -fallen -farmer -fiber -flat -flip -forgotten -former -framework -generous -genomes -graduated -grid -guide -harbor -helpful -immediate -immigration -independent -influenced -invested -journalists -license -likes -lord -marry -mess -minister -mirror -monitor -multiverse -obesity -objective -origin -outcome -pace -parking -paul -pause -personalized -picking -pigs -pool -prototype -quit -rat -rebuild -recognized -recovery -reduced -represented -richer -schizophrenia -sciences -scores -sculpture -shit -shooting -similarly -split -spoke -stability -stations -surrounded -surrounding -targets -teenage -tony -tragedy -typically -ugly -ultrasound -unexpected -vital -wider -winter -wired -witness -worlds -yesterday -##ach -##ads -##amine -##bo -##ching -##cies -##eer -##ened -##fit -##hand -##ike -##iled -##itive -##lation -##mer -##mon -##olve -##otic -##race -##rian -##row -##rted -##ually -##ult -##un -##vable -##via -##we -2004 -2015 -90s -abstract -acts -adoption -affects -ah -analyze -announced -artwork -assembly -bathroom -beautifully -branches -breathe -brothers -buses -candidate -causing -ceo -collagen -collecting -comedy -comments -component -conclude -consent -convert -cooperate -description -diet -discuss -distant -divide -donor -doors -elected -ending -entrepreneur -established -facial -filter -fusion -grain -guilty -headed -hearts -herself -imaging -infected -ink -instruments -integrated -intervention -israeli -japanese -knock -la -lake -lets -lighting -luxury -mainly -makers -mathematician -methane -michael -microbial -morality -movements -naked -nasa -navigate -node -northern -nose -nutrition -observation -official -overcome -paradox -parent -participation -passive -peak -photographs -plane -prices -professionals -profit -properly -punch -reached -resistance -resolution -robotics -russia -sadly -safer -satellite -sensor -separation -shake -shock -shop -shopping -shoulder -sisters -sits -solid -speaker -stock -stream -stroke -studio -surgeons -surveillance -sweden -syria -tackle -tension -tests -thick -thin -threats -thrown -translation -unknown -unusual -versions -via -ward -wheel -##ail -##ari -##ational -##atts -##ba -##berg -##bert -##bits -##book -##cence -##dies -##eers -##ef -##ew -##form -##gen -##ica -##ification -##iling -##ils -##lan -##lap -##lic -##lities -##lock -##ncing -##nes -##off -##orn -##rises -##rp -##ti -##ucked -##ul -##ump -##unting -26 -65 -achievement -actor -advance -agencies -altogether -anxiety -approaches -archimedes -balls -band -begun -besides -bias -blindness -breathing -budget -cage -calculate -captured -christmas -classes -clue -components -concerns -contain -contains -corals -depending -diarrhea -dogs -dozens -dramatically -earn -economists -educated -egyptian -eh -electrical -engagement -ethiopia -experiencing -extension -extinct -fabulous -factories -flag -footprint -foxo -glue -golden -ha -hanging -homework -incentives -inequality -instrument -intact -jewish -jews -kingdom -laid -landing -laugh -letting -lifespan -located -lovely -mail -manipulate -measures -medication -mexican -mix -mode -multi -muslim -muslims -neighbor -nelson -nerve -occurred -oldest -organism -organisms -ought -outer -owned -owner -panel -parkinson -parks -performing -pet -physicist -pole -positions -printed -produces -proof -propose -pure -react -receiving -reference -relative -rwanda -safely -saharan -screens -sebastian -selection -sequences -shifting -shocked -shy -silicon -situations -smile -smoke -spiritual -spoken -sports -stays -stranger -suggested -suit -tasks -telephone -terrorist -terrorists -texts -title -toy -traditions -ubiquitous -veterans -viral -warning -websites -yield -##af -##ash -##ax -##box -##cle -##gan -##gate -##gation -##head -##ill -##inate -##inated -##inating -##ior -##ips -##ired -##lize -##lls -##mi -##nse -##ole -##omy -##ony -##ope -##ore -##ota -##plied -##posed -##pression -##ral -##rap -##rates -##ridge -##right -##sions -##tal -##tric -##ua -##ught -##uity -##unted -##ura -##wing -##ying -1970s -99 -academic -adapt -africans -arabic -article -asset -assets -astronomers -award -barrier -behavioral -biases -boom -bottle -buying -chair -chances -chapter -chemicals -collaboration -combined -comment -compelling -condoms -confused -congress -contemporary -continued -crack -darkness -december -defined -designs -dig -dioxide -directions -dirty -disabilities -disappeared -dominant -drew -dust -earned -edges -educate -empire -entering -escape -evil -evolutionary -excuse -extent -failing -fears -fingers -flew -forgot -frequency -gathering -gentlemen -gradually -grandchildren -grasp -grass -guns -happier -hardly -hello -highway -horizon -hormone -ideal -imagined -inclusive -interestingly -invention -lawyer -legacy -legislation -leverage -linear -mammals -mandela -mate -mathematicians -minority -monkey -motivated -neanderthals -ngos -obese -opens -originally -owners -ownership -painted -philosophy -plot -predicted -pretend -primarily -privilege -procedure -producing -programming -proportion -ptsd -pushing -quarters -quest -reaching -reactions -recording -reflection -remarkably -rent -representation -resilience -revolutions -rolling -satellites -scenario -secrets -selves -senses -sensors -slowing -smarter -smoking -someday -spanish -stepping -strategies -subtle -suitcase -suppose -survived -survivors -symbols -talent -tank -tax -tie -ties -transformed -trauma -trustworthy -tuberculosis -villages -volunteers -whatsoever -writers -##30 -##ap -##cks -##cular -##den -##em -##hs -##ick -##ight -##ila -##ining -##kes -##ki -##late -##lies -##logist -##matic -##nating -##nded -##nding -##oked -##oration -##plication -##rous -##sia -##sist -##uated -##ub -##ubs -##uts -##ux -##vating -##vity -##zation -700 -70s -72 -80s -administration -adventure -advocate -airbnb -album -alien -andrew -applying -approved -archie -arctic -armed -array -artistic -asks -assignment -assumptions -attitude -author -boson -browser -buried -challenged -charges -charles -circles -clicks -coding -connects -construct -continuing -conventional -costa -darwin -decrease -demands -depend -depressed -detailed -diagnosis -downtown -dutch -economist -emerges -enables -eric -estimate -estimates -evolve -excellent -exhibition -expectations -facility -favorites -feature -fed -football -freespeech -fruit -gabby -gallery -generosity -greek -hang -hero -hormones -huh -hydrogen -innovations -insights -invent -isolated -joel -lego -liberal -limit -luck -lung -mary -meal -miracle -muscle -musicians -nets -neutral -nights -officer -operations -pasted -patience -petition -philosopher -photographer -photography -plate -player -pointed -pollen -pollution -poop -poster -pregnant -primate -primates -priority -profits -programmable -promised -proposed -protected -pushed -receive -reflect -reliable -request -resist -saturn -script -shouting -sing -solly -somewhat -speaks -spectacular -stack -steve -storm -storytelling -surprisingly -tall -tangible -tears -texas -thirds -threatened -throwing -tip -tired -transactions -transparent -trustworthiness -uber -underlying -vessels -visualize -wedding -wires -wolves -wood -##70 -##ades -##ancy -##ang -##ats -##be -##che -##cho -##code -##ctive -##ection -##eries -##esis -##fuse -##han -##iac -##ilia -##ken -##key -##lated -##len -##lessly -##lessness -##mal -##mark -##mise -##mo -##mons -##more -##nated -##ncies -##olation -##ong -##ories -##pa -##pathy -##pic -##places -##ple -##pped -##rade -##ream -##rence -##ressive -##ried -##rily -##rove -##set -##sity -##tate -##ude -##une -##vasive -##vision -##water -2006 -32 -95 -accent -agency -agricultural -airport -ambitious -animation -anniversary -antarctica -appeared -appropriate -architects -assistance -attacked -august -bat -beer -beliefs -belong -blame -bold -borrow -boston -bother -branch -bricks -briefly -calculations -cares -cast -cave -chest -cleaning -clinic -closing -clouds -cluster -colored -competitive -congo -connecting -conservation -constraints -constructed -consume -cope -corporate -corporations -counting -cow -creature -crop -crossing -crushed -curriculum -cuts -daf -dependent -differ -disabled -display -dose -ebola -ecological -electron -embracing -emerged -empowerment -ensure -equations -estimated -exception -explains -facilities -fate -fibonacci -finance -finds -flies -fluid -fraction -freely -frustrated -garage -garbage -gathered -gay -gg -glacier -grades -halls -hardware -healing -honestly -hurricane -impression -ingredients -insect -inspiring -installed -interior -invite -islamic -jesus -kansas -kick -kilograms -labs -laughing -learns -lied -lit -locked -lunch -males -managing -manuscript -mapped -mapping -mechanics -mechanisms -memorial -mk -mobility -mosquito -mountains -ms -narrow -nearby -neck -nest -netherlands -nigeria -nowadays -nurses -observations -occupy -officers -openness -opinions -paradigm -paralyzed -pen -perceptions -perspectives -pete -pilot -posters -powers -preparing -productive -profile -proved -pump -randomly -ray -reasoning -reduction -reflected -refused -relations -remained -replaced -retired -robotic -rocks -route -secular -selfish -significantly -singapore -sons -sounded -sperm -spots -stages -sticks -stolen -stones -stops -strongly -successfully -syringe -tattoos -tedtalk -thirty -tragic -translator -treating -understands -uniform -upper -utility -valley -venice -virtue -weak -whoa -widely -widespread -##ab -##ane -##arily -##berry -##cal -##ception -##cination -##city -##clusion -##ctic -##ders -##eat -##ect -##elling -##ement -##ening -##ep -##ern -##ert -##ever -##fted -##fying -##gain -##herent -##ibly -##ied -##ited -##j -##lish -##lity -##lt -##nal -##nder -##ni -##nic -##nter -##ones -##orous -##over -##ploy -##ployed -##pose -##position -##rain -##rawl -##raying -##ress -##rial -##rm -##runk -##rus -##rush -##tend -##tended -##tling -##tory -##ttered -##tude -##ubling -##ups -##urse -##ury -##vated -##ven -##verse -##ville -##vis -##vised -##vy -120 -160 -1998 -2014 -28 -36 -404 -accountable -accuracy -activists -adopted -alternatives -appeal -appear -arrested -atomic -attractive -audiences -australian -authorities -backwards -balloon -ban -banks -barrels -barriers -bhutan -biodiversity -bite -bla -blank -blast -bob -bombs -boss -breaks -brian -brooklyn -cables -calculation -categories -cats -chosen -civic -clues -coca -cola -consequence -controls -county -couples -creator -crew -dancers -dates -dating -decline -dedicated -defend -deforestation -demonstration -density -difficulty -dimension -disgust -division -drops -eaten -egyptians -enabling -establish -exploitation -exponential -extend -fan -fancy -february -feeding -females -films -firing -flash -flowing -flows -foie -fortune -founded -friendly -fueled -gaining -gore -gras -guinea -habitat -handed -highlight -homeless -housing -hunger -impacts -improving -included -independence -infections -infectious -informed -injury -innovate -insecure -instruction -interventions -intimate -islam -killer -kitchen -korea -lawyers -lifted -limestone -listened -mainstream -marine -marks -martin -meetings -merely -millimeter -ministry -narrator -novelty -november -nutrients -obama -olds -origins -owns -permanent -pervasive -plain -polar -powered -presidential -prey -profession -projection -projections -proper -prosthetic -psychology -rainforests -rape -rapid -recognizing -register -regularly -relation -remembered -renewables -replicate -reporting -reports -revenue -ringing -rivers -rna -root -rose -routine -rush -sale -sensitive -shadows -shall -shine -shops -signature -simulation -slides -slums -soldier -soldiers -soviet -specialized -stomach -stretch -substance -suffered -sunday -supermarket -suspect -tea -terrified -terror -texting -theoretical -theories -titan -traditionally -traits -transit -transmit -traumatic -tunneling -twenty -unbelievable -unprecedented -vertical -visiting -visitors -visualization -vocal -watson -wealthy -wikipedia -william -winner -wire -wireless -workforce -worm -##12 -##ack -##ag -##alin -##ason -##au -##away -##back -##beration -##bic -##bra -##ci -##cope -##dication -##duced -##ents -##ere -##ex -##gh -##gram -##gy -##hi -##hing -##icides -##iction -##ifying -##ild -##ines -##iring -##irs -##ison -##ith -##izes -##jo -##las -##lating -##lets -##lie -##lm -##mit -##mmer -##mond -##nald -##nces -##oe -##olate -##ool -##op -##pad -##power -##ptic -##races -##rams -##rect -##res -##rge -##ribute -##rious -##rise -##sisted -##sty -##tail -##take -##ths -##trating -##tt -##tting -##ued -##unction -##und -##unding -##urn -##usive -##uting -##vise -##wed -##za -abandoned -abilities -accomplished -accounts -advertising -alarm -alongside -als -amazed -analogy -arabia -architect -articles -astonishing -atlantic -attached -augmented -babylon -barbershop -basement -basics -bears -bike -biologists -blog -blowing -boring -breakthrough -breast -broader -cairo -calculus -canvas -capabilities -ceiling -chairs -charged -cheese -chef -clay -comfort -communicating -communications -constitution -controversial -cooked -cooperation -cord -countless -criminal -crystal -damn -deck -delight -depth -desk -despair -diagnostics -diagram -diffusion -dirt -distinction -dragline -elsewhere -emotionally -employment -engines -evaluation -evan -exploit -explosion -extended -external -farms -farther -fence -fits -flourish -fool -forming -forum -gained -givers -graffiti -gravitational -guests -historically -hitting -horror -hurts -immigrants -indigenous -insane -integration -iranian -iron -jean -jh -jihad -kit -korean -labeled -landed -laptop -lasted -lectures -libya -lift -losses -magical -magnificent -magnitude -mario -mastery -matrix -meanwhile -medications -metronome -monkeys -motivation -murder -na -obstacles -odysseus -orders -organizing -package -packed -palestine -panels -panic -parliament -partnership -passenger -paste -paths -peaceful -perceive -ph -phenomena -philosophers -physicists -pockets -pointing -practically -predictions -presence -previously -pride -printer -proposition -proust -provides -psychological -pyramid -queen -rating -realization -reef -regulation -replacing -reported -restore -revealed -ridiculous -risky -robust -saves -separated -shell -ships -significance -silks -silver -sink -skill -socially -spends -sport -staggering -sticky -stores -sunlight -supported -synapses -syndrome -tbp -teenager -terrifying -theirs -thrive -tide -tiger -till -toll -tomato -tongue -tour -transducer -trapped -trash -trips -truths -tube -tumors -twin -twins -unlock -upside -utterly -variables -youngest -##aa -##aded -##aks -##ards -##art -##ature -##aze -##bin -##bration -##break -##cast -##ches -##do -##door -##dus -##east -##eds -##eeks -##eering -##eral -##eration -##ession -##ette -##fied -##fire -##fish -##front -##ga -##hips -##iary -##iate -##iated -##icious -##icity -##ination -##inning -##ishes -##iss -##itions -##ius -##ival -##kers -##lay -##lia -##lines -##mes -##mission -##nap -##net -##ng -##nge -##nny -##og -##olis -##onia -##pes -##pet -##pies -##port -##pressed -##quity -##ren -##rged -##rians -##rified -##rify -##rim -##rk -##rming -##thy -##uating -##ucks -##ucky -##uctor -##ugh -##ules -##using -##vin -##zer -1945 -1999 -38 -86 -abortion -absence -abundance -accelerating -accidents -adam -adapted -affairs -affecting -altruism -ambulance -appears -approaching -arabs -arrow -assemble -associate -assumption -astronauts -atom -attempts -authentic -authority -automated -awake -bacterial -bamboo -beam -beating -behalf -bend -brands -bridges -budgets -calculated -canadian -candidates -cartoon -cathedral -charity -chile -chimpanzee -chronic -circuit -circuits -cochrane -collaborate -combat -configuration -confined -continues -controlling -correctly -covering -cube -curves -customer -daisy -dan -dare -dear -decent -decides -deepest -default -delicious -dental -describing -desperately -devastating -disagree -discoveries -discussions -dominated -dress -ed -eggs -electrons -emma -empowered -enabled -encouraging -endless -engineered -enhance -exercises -exhibit -exponentially -extroverts -fastest -feminine -fifty -file -findings -fires -fishing -fitness -formula -foster -geographic -germ -girlfriend -grandparents -greece -guarantee -handful -happily -harmony -healthier -heforshe -hemisphere -henry -herd -heritage -hierarchy -hire -histories -hometown -honey -hopes -hotels -household -hp -identified -implement -improved -inch -incident -informal -injured -investigate -jane -jaw -jennifer -jerusalem -judgment -july -jumped -jungle -kicked -kidding -knees -lifelong -limitations -locations -loyalty -luckily -lymph -malarious -mall -marked -masters -matches -mere -mic -microphone -migration -minus -missouri -momentum -mouths -muhammad -mutations -myth -neuroscience -nike -norway -observed -obsolete -october -offers -offs -onstage -optimism -oxford -pack -palestinian -passing -passions -pasting -payment -peoples -pharmaceutical -philadelphia -phrases -pity -planted -politician -poo -port -portraits -precise -prefer -prince -prior -programmer -programmers -propaganda -protecting -proven -pulling -pursue -rainforest -rational -rb -reader -reasonable -receptor -redemption -regardless -regime -rescue -responded -revenues -riding -russian -sacred -sandy -sarah -saturday -scales -scholars -seats -seemingly -sexting -sexually -sexy -shelf -shirt -shoot -shorter -shortly -silly -simultaneously -skeletons -smells -smith -spain -spheres -stable -staring -stealing -straightforward -strip -structural -sugar -supports -sustain -synthetic -tables -tale -tendency -tends -terribly -textbooks -timing -toilets -ton -transforming -transplant -trap -tuition -tunisia -tunnel -twist -uganda -upset -urge -ushahidi -variable -variations -velocity -vibrating -virtually -vivid -waited -weigh -whale -winning -woods -##20 -##ague -##ams -##aring -##arity -##asis -##boards -##cation -##cting -##ctions -##dam -##dled -##ections -##eologists -##erate -##even -##fies -##fold -##fuge -##fulness -##gar -##ginable -##hade -##ham -##have -##he -##ho -##hop -##hore -##ially -##iance -##idal -##igating -##ima -##inable -##inary -##inted -##isms -##itch -##ivating -##iving -##laws -##lers -##li -##licate -##list -##mic -##min -##mm -##nds -##nied -##night -##non -##nster -##ntities -##ntity -##oids -##ois -##ok -##ologists -##oms -##ord -##oring -##ort -##pack -##piece -##play -##plify -##ptive -##rch -##rds -##reed -##ribed -##rifying -##rill -##robable -##roid -##ruce -##scopic -##sign -##sters -##table -##tee -##tel -##tly -##tone -##tra -##uba -##uct -##uilt -##uins -##ull -##unce -##uns -##urge -##urs -##ursor -##usion -##ust -##uted -##vel -##view -##ware -##wood -1984 -2003 -2016 -54 -absorb -absurd -accelerate -actively -advances -advent -afterwards -agent -agents -aggregate -alcohol -amazingly -apartments -applies -approximately -april -arranged -ashamed -assignments -attending -authenticity -automobile -autos -awkward -basketball -beloved -belt -bicycles -boundaries -boyfriend -bruno -brush -bubbles -bump -burn -burned -burst -calories -capability -carnegie -catastrophic -celebrity -channels -charter -chemist -chimpanzees -chocolate -citizenship -cleaner -columns -communal -comparison -composition -conor -consistent -consists -consuming -contribution -convincing -copying -cosmic -cosmos -counts -cracks -critically -crossed -daniel -deaf -demographic -demonstrate -demonstrated -determined -determines -devil -dh -diameter -dictionary -diego -diplomacy -discipline -disconnect -disconnected -discrimination -distinct -district -disturbing -domain -doodle -dopamine -download -drawings -earliest -emerge -encoder -encountered -encouraged -enormously -episode -faint -faiza -fans -fathers -federal -feminist -finger -foods -footage -friction -frightening -fuzzy -gallon -gear -generic -glad -gods -gon -goodness -grammar -grant -graphic -guitar -habitats -hamburger -hannah -hat -headlines -hebrew -heroic -hired -historic -hollywood -holy -honesty -honeybees -ill -illnesses -imagining -immense -immigrant -implemented -impressive -improvements -includes -incomes -index -indonesia -initially -intangible -intense -intriguing -intuition -ironic -italy -jam -johnny -joshua -journalism -jupiter -kidney -kilos -knife -label -laboratories -lander -lane -lanes -liberia -liberty -links -littlebits -lowest -lungs -magnetic -maker -manuscripts -microsoft -min -mining -mohammed -mood -motors -mum -museums -naive -neanderthal -negotiate -newly -nl -nurse -oecd -offering -ongoing -opposed -orbit -overnight -pacific -parasite -participants -passes -pathway -pathways -payments -peculiar -periods -persuade -philosophical -pigeon -placing -planting -plug -pollination -popcorn -portion -positioning -pound -practicing -precious -predictable -pregnancy -presentation -prevented -principal -prisons -privileged -probability -promote -protests -psychiatric -psychologist -pursuit -qualities -ratings -ratio -realities -recall -recommend -reconciliation -refuse -reinvent -relatives -relief -reporter -reporters -retire -rezero -romantic -rubble -salary -sarajevo -secretary -seduction -sends -shaking -shut -sighted -singers -skull -skype -slavery -slice -smooth -socioeconomic -somalia -sooner -sp -spacecraft -squares -st -statistical -steal -steam -stimulation -stopping -storycorps -striking -strings -succeeded -sufficient -supreme -switched -sydney -tablet -tag -teams -teen -temple -tenth -territory -textbook -threshold -threw -thrilled -thumb -thus -tim -tone -trains -translated -tries -trigger -tsunami -tweet -tweets -ultra -underground -unhappy -unintended -urbanization -urgent -useless -victorian -vietnam -virgin -vivian -weighs -whales -wheels -whoever -width -wings -wins -wise -witnessed -wives -worms -wrist -yard -##60 -##79 -##ac -##ading -##ains -##aked -##akers -##amination -##arl -##asant -##bc -##berate -##brate -##care -##cha -##cial -##cid -##cipe -##cities -##cles -##clusive -##dance -##date -##dden -##dder -##dor -##duce -##ductive -##ec -##elor -##ends -##eology -##eric -##fusion -##gers -##graded -##gue -##hew -##hy -##iation -##icial -##igorous -##imate -##imeter -##istrust -##istry -##itimate -##iting -##itors -##ker -##legic -##lete -##listic -##lix -##lon -##los -##lum -##makers -##marks -##mates -##mented -##metric -##metry -##minate -##mine -##nial -##obe -##ods -##oke -##ola -##omical -##onal -##oning -##ots -##oyal -##pass -##ph -##pia -##print -##ption -##pus -##q -##rass -##rassed -##ribe -##rip -##rist -##rit -##rize -##rosion -##round -##scent -##sta -##state -##sy -##test -##tification -##tles -##uard -##uded -##uel -##ues -##ui -##uistic -##ulous -##urable -##uration -##urfing -##urity -##uum -##yan -##yl -##zar -18th -1960s -1980s -1989 -1990 -55 -85 -abundant -accenture -acceptable -accountability -accounting -acids -activism -actors -adopt -ag -aged -alex -alexander -allies -amen -analog -angles -antibiotic -antibiotics -arguments -arrive -asian -asphalt -assigned -ate -atlanta -attacker -attracted -aunt -backed -bacterium -bake -barrel -bars -batteries -bay -bc -beaten -believes -belongs -beneath -bites -blown -boil -boltzmann -bound -brave -brief -bronx -bug -builds -campaigns -campus -cared -carol -catholic -celebrated -cerebral -certificate -chains -characteristics -chasing -checked -circuitry -claimed -classmates -cleaned -clients -closest -cloth -coined -collaborator -commit -compassionate -conditioning -connectomes -conservative -conspiracy -contracts -convenience -counted -cousins -criteria -cruel -cues -curator -dangerously -daughters -deer -denial -deserts -desperate -destination -determination -devastated -difficulties -digging -directed -displaced -dive -documented -documenting -donna -dropping -ease -educators -egg -el -emails -encoded -encounter -enforcement -entered -enters -enzymes -equilibrium -ethical -excitement -exhausted -exile -exit -expecting -expressed -extinction -famously -favor -fixing -fluctuations -fluent -folded -foundations -fragile -friendship -fulfill -functioning -funds -gandhi -gaps -genocide -glaciers -governance -gowanus -guards -guilt -hackers -halfway -harry -harvest -heaven -highways -hip -holmes -honduras -hop -hunter -iceberg -idiot -illustrate -imagery -indicate -inherently -initiative -injuries -innovators -inorganic -install -instinct -institutional -intention -intentions -interpret -interviews -intrigued -inventing -inventor -investigation -investing -invisibility -irrigation -journal -jr -keys -lamb -laser -lee -lhc -limbs -livestock -loans -logical -lolcats -looting -louder -manner -marduk -maria -marriages -marrow -meerkats -membrane -messy -metric -millennia -mimic -mirrors -mm -mo -monitoring -moreover -newspapers -noises -nonetheless -obtained -officials -operated -orb -orbiting -originals -outward -pan -parallel -participating -partly -pee -phenomenal -phoenix -plasma -players -polarization -politically -prayer -prediction -princess -procedures -professors -programmed -promising -prosthetics -protest -psychotic -punishment -purely -purple -radically -reaches -reactor -realistic -rebuilding -recession -recovered -reduces -refer -refugee -reinventing -reject -reminds -repeated -researcher -reserve -resilient -responds -reuse -revolutionary -rises -roger -rough -rubber -sake -salt -scaling -scholar -scream -screening -select -selected -senior -server -session -settled -settlement -sheets -shifted -shifts -shining -shower -sierra -simpler -sized -skeptical -slower -smartphone -sms -snow -spark -specimens -squared -stake -stigma -stockholm -storage -stressed -stretched -struggled -stuxnet -supporting -surgeries -surgical -surplus -surveys -surviving -systematically -tagged -talented -tanks -tehran -temporary -tenderness -threatening -thriving -toes -tolerance -touches -touching -towns -toxic -trafficking -transferred -tribal -tribe -trillions -trivial -typing -un -uploaded -ups -vacation -verbal -viewed -viewing -volunteer -voyager -wasted -wine -wing -wound -wrapped -yale -yr -zimbabwe -##13 -##40 -##85 -##aid -##ainable -##alized -##ament -##ao -##arse -##ases -##ashed -##atch -##att -##avor -##aw -##azz -##bbie -##bian -##bled -##bm -##born -##bow -##cidence -##cinations -##cist -##claim -##coming -##con -##crew -##cs -##ctuation -##cture -##dar -##dest -##dic -##diction -##die -##dified -##doors -##down -##ees -##efficient -##eg -##elves -##ensation -##ensible -##eological -##eous -##eping -##ese -##estimate -##eze -##fa -##faction -##fall -##fes -##ffle -##fic -##field -##force -##gic -##give -##giving -##gle -##graphy -##ground -##harks -##heads -##heat -##here -##hold -##hou -##ials -##iaries -##iating -##ich -##icted -##ides -##ilation -##ilized -##imal -##iness -##ingly -##ini -##iny -##iously -##iped -##ipes -##isa -##istress -##itative -##itched -##iture -##ivable -##ivate -##ivor -##ix -##lain -##lash -##lave -##lee -##lizing -##lly -##mart -##mas -##mate -##meter -##mill -##moving -##nging -##nity -##nna -##not -##ntly -##oc -##ographic -##ologically -##olving -##omic -##ood -##ops -##orate -##oric -##oss -##ought -##ows -##passed -##phone -##plete -##plicitly -##plicity -##pling -##ploded -##ply -##posal -##production -##quez -##rading -##rals -##rands -##ras -##rations -##read -##rection -##requent -##rest -##ret -##rick -##ride -##riving -##rture -##rude -##ship -##sional -##stitute -##structive -##style -##sume -##suming -##tach -##tar -##tarian -##tch -##tent -##think -##tical -##tie -##uana -##ubble -##ucted -##uding -##uiz -##ulsion -##ulus -##umble -##umbling -##umen -##ums -##unes -##unts -##uo -##urbs -##usable -##vans -##vator -##venient -##version -##vine -##vio -##vitation -##wind -##ws -##xed -##xious -##ym -##zone -##zzing -1900 -1930s -1950 -1980 -1991 -1995 -2002 -48 -60s -abused -accurately -acknowledge -ad -advantages -adventures -afghan -aiming -ak -alter -ambition -amino -angels -animated -annoying -answering -appreciation -argentina -assembled -assembling -associations -asylum -attract -attributes -automatic -backyard -bali -battles -bedroom -ben -bimetal -biologist -bizarre -blew -blocking -boiling -bonds -bonica -boundary -brick -bucks -butler -buttons -calculator -camps -captain -chad -checks -chernobyl -choosing -christian -cinema -clothing -clusters -collaborators -collapsed -collision -colorful -coma -comparing -competing -complained -conductive -confession -confronted -consciously -container -continents -continuous -copied -corrupt -couch -coworkers -cream -creatively -crimes -cultivate -da -dances -database -daycare -defending -delighted -describes -deserves -destroying -diabetes -diagnostic -dinosaur -disrupt -distances -domingos -draws -dude -dvd -dynamics -eager -editing -editor -editors -elementary -embraced -emergence -empower -enemies -enjoying -entrepreneurial -ethic -evaluating -evolving -ex -exhausting -expanded -experimental -expressing -facilitate -fails -fantasy -fashioned -filming -filmmaker -fingerprint -fingertips -firm -fled -flexibility -florence -fluorescence -fog -fooled -format -formation -fracking -frames -fraud -fright -ft -futureless -gasoline -genetically -geometry -gigantic -glimpse -golf -grading -grains -grandma -graphics -gratitude -greenhouse -gross -guided -gulf -hacking -hal -hardest -harmful -heavens -heel -hence -hill -hoped -hug -hybrid -ian -icons -identities -imperative -implanted -implication -impose -impressed -inadequate -inches -indoors -infect -inform -integrity -intensive -intricate -intrinsic -introverts -irony -italian -jb -jealous -jeff -jet -joining -josh -kepler -kills -kiss -koran -lastly -laughed -le -leaning -lethal -limitation -lion -liver -loan -lobby -logistics -longing -magnets -mankind -manual -margins -maslow -maximum -measurement -measurements -meets -messaging -metabolic -milky -millennium -minded -miners -modest -mud -multiply -muscles -mutation -mysterious -nanotechnology -newton -ngo -nickname -nightmare -noisy -noticing -numeracy -nutritious -occurring -olympics -opec -opera -operator -org -origami -overseas -packaged -palm -pants -paralysis -parenting -passively -perceived -phd -physicians -pie -pill -pipe -piracy -pizza -planes -platforms -polls -pops -portrait -portuguese -posed -poses -pot -potter -pour -pouring -prioritize -problematic -processed -profoundly -prone -protocol -publish -publishing -pulse -punched -raises -reacting -realizing -records -recruit -rectangle -reducing -reefs -regions -regulations -reinforced -relentless -remaining -renaissance -replacement -requirement -requirements -researching -resistant -responsibilities -restaurants -restricted -revenge -rides -robert -rockets -roles -rolled -roofs -rooted -rosetta -scares -scotland -seas -sections -sectors -sensing -sensory -serving -shares -shocking -shoe -shots -shout -shrink -sidewalk -slope -smallest -soap -spare -specialist -speeding -spill -spiral -spoofer -spreads -stakes -stephen -stepped -strike -strive -suite -supplies -sustained -symbolic -tanzania -targeted -taskrabbit -taxi -temples -temporal -terrific -theft -thermal -thermo -throat -tick -tight -tk -toast -topics -touched -trailer -transfers -transformative -transmission -transported -tricky -troll -tropical -ubuntu -uk -ukraine -undergraduate -unpredictable -vantage -vary -veteran -vice -virginia -virtues -visited -vocabulary -volumes -volunteered -voted -vr -wash -weaving -wheat -woke -wooden -zealand -##! -### -##$ -##% -##& -##' -##( -##) -##+ -##, -##- -##. -##/ -##: -##; -##= -##? -##@ -##[ -##] -##^ -##_ -##` -##ย -##ร -##อ -##– -##— -##’ -##♪ -##♫
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/tokenization.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/tokenization.py deleted file mode 100644 index aa02862..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/tokenization.py +++ /dev/null
@@ -1,192 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Abstract base classes for all tokenizers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc - -from tensorflow.python.module import module -from tensorflow_text.python.ops.splitter import Splitter -from tensorflow_text.python.ops.splitter import SplitterWithOffsets - - -class Tokenizer(Splitter): - """Base class for tokenizer implementations. - - A Tokenizer is a `text.Splitter` that splits strings into *tokens*. Tokens - generally correspond to short substrings of the source string. Tokens can be - encoded using either strings or integer ids (where integer ids could be - created by hashing strings or by looking them up in a fixed vocabulary table - that maps strings to ids). - - Each Tokenizer subclass must implement a `tokenize` method, which splits each - string in a Tensor into tokens. E.g.: - - >>> class SimpleTokenizer(tf_text.Tokenizer): - ... def tokenize(self, input): - ... return tf.strings.split(input) - >>> print(SimpleTokenizer().tokenize(["hello world", "this is a test"])) - <tf.RaggedTensor [[b'hello', b'world'], [b'this', b'is', b'a', b'test']]> - - By default, the `split` method simply delegates to `tokenize`. - """ - - @abc.abstractmethod - def tokenize(self, input): # pylint: disable=redefined-builtin - """Tokenizes the input tensor. - - Splits each string in the input tensor into a sequence of tokens. Tokens - generally correspond to short substrings of the source string. Tokens can - be encoded using either strings or integer ids. - - Example: - - >>> print(tf_text.WhitespaceTokenizer().tokenize("small medium large")) - tf.Tensor([b'small' b'medium' b'large'], shape=(3,), dtype=string) - - Args: - input: An N-dimensional UTF-8 string (or optionally integer) `Tensor` or - `RaggedTensor`. - - Returns: - An N+1-dimensional UTF-8 string or integer `Tensor` or `RaggedTensor`. - For each string from the input tensor, the final, extra dimension contains - the tokens that string was split into. - """ - raise NotImplementedError("Abstract method") - - def split(self, input): # pylint: disable=redefined-builtin - """Alias for `Tokenizer.tokenize`.""" - return self.tokenize(input) - - -class TokenizerWithOffsets(Tokenizer, SplitterWithOffsets): - r"""Base class for tokenizer implementations that return offsets. - - The offsets indicate which substring from the input string was used to - generate each token. E.g., if `input` is a single string, then each token - `token[i]` was generated from the substring `input[starts[i]:ends[i]]`. - - Each TokenizerWithOffsets subclass must implement the `tokenize_with_offsets` - method, which returns a tuple containing both the pieces and the start and - end offsets where those pieces occurred in the input string. I.e., if - `tokens, starts, ends = tokenize_with_offsets(s)`, then each token `token[i]` - corresponds with `tf.strings.substr(s, starts[i], ends[i] - starts[i])`. - - If the tokenizer encodes tokens as strings (and not token ids), then it will - usually be the case that these corresponding strings are equal; but that is - not technically required. For example, a tokenizer might choose to downcase - strings - - Example: - - >>> class CharTokenizer(TokenizerWithOffsets): - ... def tokenize_with_offsets(self, input): - ... chars, starts = tf.strings.unicode_split_with_offsets(input, 'UTF-8') - ... lengths = tf.expand_dims(tf.strings.length(input), -1) - ... ends = tf.concat([starts[..., 1:], tf.cast(lengths, tf.int64)], -1) - ... return chars, starts, ends - ... def tokenize(self, input): - ... return self.tokenize_with_offsets(input)[0] - >>> pieces, starts, ends = CharTokenizer().split_with_offsets("a😊c") - >>> print(pieces.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'\xf0\x9f\x98\x8a' b'c'] [0 1 5] [1 5 6] - - """ - - @abc.abstractmethod - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - """Tokenizes the input tensor and returns the result with offsets. - - The offsets indicate which substring from the input string was used to - generate each token. E.g., if `input` is a single string, then each token - `token[i]` was generated from the substring `input[starts[i]:ends[i]]`. - - Example: - - >>> splitter = tf_text.WhitespaceTokenizer() - >>> pieces, starts, ends = splitter.tokenize_with_offsets("a bb ccc") - >>> print(pieces.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] - >>> print(tf.strings.substr("a bb ccc", starts, ends-starts)) - tf.Tensor([b'a' b'bb' b'ccc'], shape=(3,), dtype=string) - - Args: - input: An N-dimensional UTF-8 string (or optionally integer) `Tensor` or - `RaggedTensor`. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - * `tokens` is an N+1-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. - * `start_offsets` is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the starting indices of each token (byte - indices for input strings). - * `end_offsets` is an N+1-dimensional integer `Tensor` or - `RaggedTensor` containing the exclusive ending indices of each token - (byte indices for input strings). - """ - raise NotImplementedError("Abstract method") - - def split_with_offsets(self, input): # pylint: disable=redefined-builtin - """Alias for `TokenizerWithOffsets.tokenize_with_offsets`.""" - return self.tokenize_with_offsets(input) - - -class Detokenizer(module.Module): - """Base class for detokenizer implementations. - - A Detokenizer is a module that combines tokens to form strings. Generally, - subclasses of `Detokenizer` will also be subclasses of `Tokenizer`; and the - `detokenize` method will be the inverse of the `tokenize` method. I.e., - `tokenizer.detokenize(tokenizer.tokenize(s)) == s`. - - Each Detokenizer subclass must implement a `detokenize` method, which combines - tokens together to form strings. E.g.: - - >>> class SimpleDetokenizer(tf_text.Detokenizer): - ... def detokenize(self, input): - ... return tf.strings.reduce_join(input, axis=-1, separator=" ") - >>> text = tf.ragged.constant([["hello", "world"], ["a", "b", "c"]]) - >>> print(SimpleDetokenizer().detokenize(text)) - tf.Tensor([b'hello world' b'a b c'], shape=(2,), dtype=string) - """ - - __metaclass__ = abc.ABCMeta - - @abc.abstractmethod - def detokenize(self, input): # pylint: disable=redefined-builtin - """Assembles the tokens in the input tensor into a string. - - Generally, `detokenize` is the inverse of the `tokenize` method, and can - be used to reconstrct a string from a set of tokens. This is especially - helpful in cases where the tokens are integer ids, such as indexes into a - vocabulary table -- in that case, the tokenized encoding is not very - human-readable (since it's just a list of integers), so the `detokenize` - method can be used to turn it back into something that's more readable. - - Args: - input: An N-dimensional UTF-8 string or integer `Tensor` or - `RaggedTensor`. - - Returns: - An (N-1)-dimensional UTF-8 string `Tensor` or `RaggedTensor`. - """ - raise NotImplementedError("Abstract method")
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops.py deleted file mode 100644 index 355ee4a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops.py +++ /dev/null
@@ -1,269 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Library of ops to truncate segments.""" -import abc - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_array_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow_text.python.ops import item_selector_ops - - -class Trimmer(metaclass=abc.ABCMeta): - """Truncates a list of segments using a pre-determined truncation strategy. - """ - - def trim(self, segments): - """Truncate the list of `segments`. - - Truncate the list of `segments` using the truncation strategy defined by - `generate_mask`. - - Args: - segments: A list of `RaggedTensor`s w/ shape [num_batch, (num_items)]. - - Returns: - a list of `RaggedTensor`s with len(segments) number of items and where - each item has the same shape as its counterpart in `segments` and - with unwanted values dropped. The values are dropped according to the - `TruncationStrategy` defined. - """ - with ops.name_scope("Trimmer/Trim"): - segments = [ragged_tensor.convert_to_tensor_or_ragged_tensor(s) - for s in segments] - truncate_masks = self.generate_mask(segments) - truncated_segments = [ - ragged_array_ops.boolean_mask( - seg, mask.with_row_splits_dtype(seg.row_splits.dtype)) - for seg, mask in zip(segments, truncate_masks) - ] - return truncated_segments - - @abc.abstractmethod - def generate_mask(self, segments): - """Generates a boolean mask specifying which portions of `segments` to drop. - - Users should be able to use the results of generate_mask() to drop items - in segments using `tf.ragged.boolean_mask(seg, mask)`. - - Args: - segments: A list of `RaggedTensor` each w/ a shape of [num_batch, - (num_items)]. - - Returns: - a list with len(segments) number of items and where each item is a - `RaggedTensor` with the same shape as its counterpart in `segments` and - with a boolean dtype where each value is True if the corresponding - value in `segments` should be kept and False if it should be dropped - instead. - """ - raise NotImplementedError() - - -def _get_row_lengths(segments, axis=-1): - axis = array_ops.get_positive_axis(axis, segments.shape.ndims) - 1 - foo = ragged_tensor.RaggedTensor.from_nested_row_lengths( - segments.nested_row_lengths()[axis], - segments.nested_row_lengths()[:axis]) - for _ in range(axis): - foo = math_ops.reduce_sum(foo, -1) - return foo - - -class WaterfallTrimmer(Trimmer): - """A `Trimmer` that allocates a length budget to segments in order. - - A `Trimmer` that allocates a length budget to segments in order. It selects - elements to drop, according to a max sequence length budget, and then applies - this mask to actually drop the elements. See `generate_mask()` for more - details. - - Example: - - >>> a = tf.ragged.constant([['a', 'b', 'c'], [], ['d']]) - >>> b = tf.ragged.constant([['1', '2', '3'], [], ['4', '5', '6', '7']]) - >>> trimmer = tf_text.WaterfallTrimmer(4) - >>> trimmer.trim([a, b]) - [<tf.RaggedTensor [[b'a', b'b', b'c'], [], [b'd']]>, - <tf.RaggedTensor [[b'1'], [], [b'4', b'5', b'6']]>] - - Here, for the first pair of elements, `['a', 'b', 'c']` and `['1', '2', '3']`, - the `'2'` and `'3'` are dropped to fit the sequence within the max sequence - length budget. - """ - - def __init__(self, max_seq_length, axis=-1): - """Creates an instance of `WaterfallTruncator`. - - Args: - max_seq_length: a scalar `Tensor` or a 1D `Tensor` of type int32 that - describes the number max number of elements allowed in a batch. If a - scalar is provided, the value is broadcasted and applied to all values - across the batch. - axis: Axis to apply trimming on. - """ - self._max_seq_length = max_seq_length - self._axis = axis - - def generate_mask(self, segments): - """Calculates a truncation mask given a per-batch budget. - - Calculate a truncation mask given a budget of the max number of items for - each or all batch row. The allocation of the budget is done using a - 'waterfall' algorithm. This algorithm allocates quota in a left-to-right - manner and fill up the buckets until we run out of budget. - - For example if the budget of [5] and we have segments of size - [3, 4, 2], the truncate budget will be allocated as [3, 2, 0]. - - The budget can be a scalar, in which case the same budget is broadcasted - and applied to all batch rows. It can also be a 1D `Tensor` of size - `batch_size`, in which each batch row i will have a budget corresponding to - `per_batch_quota[i]`. - - Example: - - >>> a = tf.ragged.constant([['a', 'b', 'c'], [], ['d']]) - >>> b = tf.ragged.constant([['1', '2', '3'], [], ['4', '5', '6', '7']]) - >>> trimmer = tf_text.WaterfallTrimmer(4) - >>> trimmer.generate_mask([a, b]) - [<tf.RaggedTensor [[True, True, True], [], [True]]>, - <tf.RaggedTensor [[True, False, False], [], [True, True, True, False]]>] - - Args: - segments: A list of `RaggedTensor` each w/ a shape of [num_batch, - (num_items)]. - Returns: - a list with len(segments) of `RaggedTensor`s, see superclass for details. - """ - with ops.name_scope("WaterfallTrimmer/generate_mask"): - segment_row_lengths = [_get_row_lengths(s, self._axis) for s in segments] - segment_row_lengths = array_ops.stack(segment_row_lengths, axis=-1) - - # Broadcast budget to match the rank of segments[0] - budget = ops.convert_to_tensor(self._max_seq_length) - for _ in range(segments[0].shape.ndims - budget.shape.ndims): - budget = array_ops.expand_dims(budget, -1) - - # Compute the allocation for each segment using a `waterfall` algorithm - segment_lengths = math_ops.cast(segment_row_lengths, dtypes.int32) - budget = math_ops.cast(budget, dtypes.int32) - leftover_budget = math_ops.cumsum( - -1 * segment_lengths, exclusive=False, axis=-1) + budget - leftover_budget = segment_lengths + math_ops.minimum(leftover_budget, 0) - results = math_ops.maximum(leftover_budget, 0) - - # Translate the results into boolean masks that match the shape of each - # segment - results = array_ops.unstack(results, axis=-1) - item_selectors = [ - item_selector_ops.FirstNItemSelector(i) for i in results - ] - return [ - i.get_selectable(s, self._axis) - for s, i in zip(segments, item_selectors) - ] - - -def _round_robin_allocation(row_lengths, max_seq_length): - """Allocating quota via round robin algorithm.""" - distribution = array_ops.zeros_like(row_lengths) - i = constant_op.constant(0) - batch_size = array_ops.shape(row_lengths)[0] - num_segments = array_ops.shape(row_lengths)[1] - quota_used = array_ops.zeros([batch_size], dtypes.int32) - max_seq_length_bc = max_seq_length + 0 * quota_used - - def _cond(i, dist, quota_used): - del i - have_quota = quota_used < max_seq_length_bc - have_space = math_ops.reduce_any(dist < row_lengths, 1) - return math_ops.reduce_any(math_ops.logical_and(have_quota, have_space)) - - def _body(i, dist, quota_used): - index = math_ops.mod(i, num_segments) - updates = array_ops.where(dist[..., index] < row_lengths[..., index], - array_ops.ones_like(dist[..., index]), - array_ops.zeros_like(dist[..., index])) - scatter_index = array_ops.tile([index], [batch_size]) - scatter_index = array_ops.expand_dims(scatter_index, -1) - batch_dim = array_ops.reshape(math_ops.range(batch_size), [batch_size, 1]) - scatter_index_2d = array_ops.concat([batch_dim, scatter_index], -1) - new_dist = array_ops.tensor_scatter_add(dist, scatter_index_2d, updates) - return i + 1, new_dist, quota_used + updates - - _, results, _ = control_flow_ops.while_loop(_cond, _body, - (i, distribution, quota_used)) - return results - - -class RoundRobinTrimmer(Trimmer): - """A `Trimmer` that allocates a length budget to segments via round robin. - - A `Trimmer` that allocates a length budget to segments using a round robin - strategy, then drops elements outside of the segment's allocated budget. - See `generate_mask()` for more details. - """ - - def __init__(self, max_seq_length, axis=-1): - """Creates an instance of `RoundRobinTrimmer`. - - Args: - max_seq_length: a scalar `Tensor` int32 that describes the number max - number of elements allowed in a batch. - axis: Axis to apply trimming on. - """ - self._max_seq_length = max_seq_length - self._axis = axis - - def generate_mask(self, segments): - """Calculates a truncation mask given a per-batch budget. - - Calculate a truncation mask given a budget of the max number of items for - each or all batch row. The allocation of the budget is done using a - 'round robin' algorithm. This algorithm allocates quota in each bucket, - left-to-right repeatedly until all the buckets are filled. - - For example if the budget of [5] and we have segments of size - [3, 4, 2], the truncate budget will be allocated as [2, 2, 1]. - - Args: - segments: A list of `RaggedTensor` each w/ a shape of [num_batch, - (num_items)]. - - Returns: - a list with len(segments) of `RaggedTensor`s, see superclass for details. - """ - with ops.name_scope("RoundRobinTrimmer/generate_mask"): - segment_row_lengths = [_get_row_lengths(s, self._axis) for s in segments] - segment_row_lengths = array_ops.stack(segment_row_lengths, axis=-1) - segment_row_lengths = math_ops.cast(segment_row_lengths, dtypes.int32) - budget = ops.convert_to_tensor(self._max_seq_length) - results = _round_robin_allocation(segment_row_lengths, budget) - - results = array_ops.unstack(results, axis=-1) - item_selectors = [ - item_selector_ops.FirstNItemSelector(i) for i in results - ] - return [ - i.get_selectable(s, self._axis) - for s, i in zip(segments, item_selectors) - ]
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops_test.py deleted file mode 100644 index 18a14b13..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/trimmer_ops_test.py +++ /dev/null
@@ -1,516 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for ops to trim segments.""" -from absl.testing import parameterized - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops import trimmer_ops - - -@test_util.run_all_in_graph_and_eager_modes -class WaterfallTrimmerOpsTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - # pyformat: disable - dict( - segments=[ - # segment 1 - [[1, 2, 3], [4, 5], [6]], - # segment 2 - [[10], [20], [30, 40, 50]] - ], - expected=[ - # segment 1 - [[True, True, False], [True, False], [True]], - # Segment 2 - [[False], [False], [True, True, False]] - ], - max_seq_length=[[2], [1], [3]], - ), - dict( - segments=[ - # segment 1 - [[1, 2, 3], [4, 5], [6]], - # segment 2 - [[10], [20], [30, 40, 50]] - ], - expected=[ - # segment 1 - [[True, True, False], [True, False], [True]], - # Segment 2 - [[False], [False], [True, True, False]] - ], - max_seq_length=[2, 1, 3], - ), - dict( - segments=[ - # first segment - [[b"hello"], [b"name", b"is"], - [b"what", b"time", b"is", b"it", b"?"]], - # second segment - [[b"whodis", b"?"], [b"bond", b",", b"james", b"bond"], - [b"5:30", b"AM"]], - ], - max_seq_length=2, - expected=[ - # first segment - [[True], [True, True], [True, True, False, False, False]], - # second segment - [[True, False], [False, False, False, False], [False, False]], - ], - ), - dict( - descr="Test when segments are rank 3 RaggedTensors", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - max_seq_length=2, - expected=[[[[True], [True]], [[True, True]], - [[True, True], [False], [False], [False]]], - [[[False], [False]], [[False], [False], [False], [False]], - [[False], [False]]]], - ), - dict( - descr="Test when segments are rank 3 RaggedTensors and axis = 1", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - axis=1, - max_seq_length=2, - expected=[ - # 1st segment - [[True, True], [True], [True, True, False, False]], - # 2nd segment - [[False, False], [True, False, False, False], [False, False]], - ], - ), - # pyformat: enable - ]) - def testGenerateMask(self, - segments, - max_seq_length, - expected, - axis=-1, - descr=None): - max_seq_length = constant_op.constant(max_seq_length) - segments = [ragged_factory_ops.constant(i) for i in segments] - expected = [ragged_factory_ops.constant(i) for i in expected] - trimmer = trimmer_ops.WaterfallTrimmer(max_seq_length, axis=axis) - actual = trimmer.generate_mask(segments) - for expected_mask, actual_mask in zip(expected, actual): - self.assertAllEqual(actual_mask, expected_mask) - - @parameterized.parameters([ - dict( - segments=[ - # first segment - [[b"hello", b"there"], [b"name", b"is"], - [b"what", b"time", b"is", b"it", b"?"]], - # second segment - [[b"whodis", b"?"], [b"bond", b",", b"james", b"bond"], - [b"5:30", b"AM"]], - ], - max_seq_length=[1, 3, 4], - expected=[ - # Expected first segment has shape [3, (1, 2, 4)] - [[b"hello"], [b"name", b"is"], [b"what", b"time", b"is", b"it"]], - # Expected second segment has shape [3, (0, 1, 0)] - [[], [b"bond"], []], - ]), - dict( - descr="Test max sequence length across the batch", - segments=[ - # first segment - [[b"hello", b"there"], [b"name", b"is"], - [b"what", b"time", b"is", b"it", b"?"]], - # second segment - [[b"whodis", b"?"], [b"bond", b",", b"james", b"bond"], - [b"5:30", b"AM"]], - ], - max_seq_length=2, - expected=[ - # Expected first segment has shape [3, (2, 2, 2)] - [[b"hello", b"there"], [b"name", b"is"], [b"what", b"time"]], - # Expected second segment has shape [3, (0, 0, 0)] - [[], [], []], - ], - ), - dict( - descr="Test when segments are rank 3 RaggedTensors", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - max_seq_length=2, - expected=[ - # Expected first segment has shape [3, (2, 2, 2)] - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [], [], []]], - # Expected second segment has shape [3, (0, 0, 0)] - [[[], []], [[], [], [], []], [[], []]] - ], - ), - dict( - descr="Test when segments are rank 3 RaggedTensors and axis = 1", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [b"is"], [b"it"], [b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - axis=1, - max_seq_length=2, - expected=[ - [[[b"hello"], [b"there"]], [[b"name", b"is"]], - [[b"what", b"time"], [b"is"]]], - [[], [[b"bond"]], []], - ], - ), - ]) - def testPerBatchBudgetTrimmer(self, - max_seq_length, - segments, - expected, - axis=-1, - descr=None): - max_seq_length = constant_op.constant(max_seq_length) - trimmer = trimmer_ops.WaterfallTrimmer(max_seq_length, axis=axis) - segments = [ragged_factory_ops.constant(seg) for seg in segments] - expected = [ragged_factory_ops.constant(exp) for exp in expected] - actual = trimmer.trim(segments) - for expected_seg, actual_seg in zip(expected, actual): - self.assertAllEqual(expected_seg, actual_seg) - - -@test_util.run_all_in_graph_and_eager_modes -class RoundRobinTrimmerOpsTest(test.TestCase, parameterized.TestCase): - - @parameterized.parameters([ - # pyformat: disable - dict( - descr="Basic test on rank 2 RTs", - segments=[ - # segment 1 - [[1, 2, 3], [4, 5], [6]], - # segment 2 - [[10], [20], [30, 40, 50]] - ], - expected=[ - # segment 1 - [[True, False, False], [True, False], [True]], - # Segment 2 - [[True], [True], [True, False, False]] - ], - max_seq_length=2, - ), - dict( - descr="Test where no truncation is needed", - segments=[ - # segment 1 - [[1, 2, 3], [4, 5], [6]], - # segment 2 - [[10], [20], [30, 40, 50]] - ], - expected=[ - # segment 1 - [[True, True, True], [True, True], [True]], - # Segment 2 - [[True], [True], [True, True, True]] - ], - max_seq_length=100, - ), - dict( - descr="Basic test w/ segments of rank 3 on axis=-1", - segments=[ - # first segment - # [batch, num_tokens, num_wordpieces] - [[[b"hello", b"123"], [b"there"]]], - # second segment - [[[b"whodis", b"233"], [b"?"]]], - ], - max_seq_length=2, - axis=-1, - expected=[ - # segment 1 - [[[True, False], [False]]], - # Segment 2 - [[[True, False], [False]]] - ], - ), - dict( - descr="Test 4 segments", - segments=[ - # first segment - [[b"a", b"b"]], - # second segment - [[b"one", b"two"]], - # third segment - [[b"un", b"deux", b"trois", b"quatre", b"cinque"]], - # fourth segment - [[b"unos", b"dos", b"tres", b"quatro", b"cincos"]], - ], - max_seq_length=10, - expected=[ - # first segment - [[True, True]], - # second segment - [[True, True]], - # third segment - [[True, True, True, False, False]], - # fourth segment - [[True, True, True, False, False]], - ], - ), - dict( - descr="Test rank 3 RTs, single batch", - segments=[ - [[[3897], [4702]]], - [[[[4248], [2829], [4419]]]], - ], - max_seq_length=7, - expected=[ - [[[True], [True]]], - [[[[True], [True], [True]]]], - ], - ), - dict( - descr="Test rank 2; test when one batch has " - "elements < max_seq_length", - segments=[[[11, 12, 13], - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]], - [[11, 12, 13], - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]]], - max_seq_length=7, - axis=-1, - expected=[ - [[True, True, True], - [True, True, True, True, - False, False, False, False, False, False]], - [[True, True, True], - [True, True, True, False, - False, False, False, False, False, False]] - ], - ), - # pyformat: enable - ]) - def testGenerateMask(self, - segments, - max_seq_length, - expected, - axis=-1, - descr=None): - max_seq_length = constant_op.constant(max_seq_length) - segments = [ragged_factory_ops.constant(i) for i in segments] - expected = [ragged_factory_ops.constant(i) for i in expected] - trimmer = trimmer_ops.RoundRobinTrimmer(max_seq_length, axis=axis) - actual = trimmer.generate_mask(segments) - for expected_mask, actual_mask in zip(expected, actual): - self.assertAllEqual(actual_mask, expected_mask) - - @parameterized.parameters([ - # pyformat: disable - dict( - descr="Test w/ segments of rank 3 on axis=-1", - segments=[ - # first segment - [[[b"hello", b"123"], [b"there"]]], - # second segment - [[[b"whodis", b"233"], [b"?"]]], - ], - max_seq_length=2, - axis=-1, - expected=[ - [[[b"hello"], []]], - [[[b"whodis"], []]], - ]), - dict( - descr="Test rank 2; test when one batch has " - "elements < max_seq_length", - segments=[[[11, 12, 13], - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]], - [[11, 12, 13], - [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]]], - max_seq_length=7, - axis=-1, - expected=[ - [[11, 12, 13], - [21, 22, 23, 24]], - [[11, 12, 13], - [21, 22, 23]]], - ), - dict( - descr="Test wordpiece trimming across 2 segments", - segments=[ - # first segment - [[[b"hello", b"123"], [b"there"]]], - # second segment - [[[b"whodis", b"233"], [b"?"]]], - ], - max_seq_length=3, - axis=-1, - expected=[ - [[[b"hello", b"123"], []]], - [[[b"whodis"], []]], - ]), - dict( - descr="Test whole word trimming across 2 segments", - segments=[ - # first segment - [[[b"hello", b"123"], [b"there"]]], - # second segment - [[[b"whodis", b"233"], [b"?"]]], - ], - max_seq_length=3, - axis=-2, - expected=[ - [[[b"hello", b"123"], [b"there"]]], - [[[b"whodis", b"233"]]], - ]), - dict( - descr="Basic test w/ segments of rank 2", - segments=[ - # first segment - [[b"hello", b"there"], [b"name", b"is"], - [b"what", b"time", b"is", b"it", b"?"]], - # second segment - [[b"whodis", b"?"], [b"bond", b",", b"james", b"bond"], - [b"5:30", b"AM"]], - ], - max_seq_length=2, - expected=[ - # Expected first segment has shape [3, (1, 2, 4)] - [[b"hello"], [b"name"], [b"what"]], - # Expected second segment has shape [3, (0, 1, 0)] - [[b"whodis"], [b"bond"], [b"5:30"]], - ]), - dict( - descr="Basic test w/ segments of rank 3", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name"], [b"is"]], - [[b"what", b"time"], [b"is"], [b"it", b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - max_seq_length=2, - expected=[ - # Expected first segment has shape [3, (1, 2, 4), 1] - [[[b"hello"], []], [[b"name"], []], [[b"what"], [], []]], - # Expected second segment has shape [3, (0, 1, 0)] - [[[b"whodis"], []], [[b"bond"], [], [], []], [[b"5:30"], []]], - ]), - dict( - descr="Basic test w/ segments of rank 3 on axis=-2", - segments=[ - # first segment - [[[b"hello"], [b"there"]], [[b"name"], [b"is"]], - [[b"what", b"time"], [b"is"], [b"it", b"?"]]], - # second segment - [[[b"whodis"], [b"?"]], [[b"bond"], [b","], [b"james"], - [b"bond"]], [[b"5:30"], [b"AM"]]], - ], - max_seq_length=2, - axis=-2, - expected=[ - # Expected first segment has shape [3, (1, 2, 4), 1] - [[[b"hello"]], [[b"name"]], [[b"what", b"time"]]], - # Expected second segment has shape [3, (0, 1, 0)] - [[[b"whodis"]], [[b"bond"]], [[b"5:30"]]], - ]), - dict( - descr="Test 4 segments", - segments=[ - # first segment - [[b"a", b"b"]], - # second segment - [[b"one", b"two"]], - # third segment - [[b"un", b"deux", b"trois", b"quatre", b"cinque"]], - # fourth segment - [[b"unos", b"dos", b"tres", b"quatro", b"cincos"]], - ], - max_seq_length=10, - expected=[ - [[b"a", b"b"]], - [[b"one", b"two"]], - [[b"un", b"deux", b"trois"]], - [[b"unos", b"dos", b"tres"]], - ], - ), - dict( - descr="Test 4 segments of rank 3 on axis=-1", - segments=[ - # first segment - [[[b"a", b"b"], [b"c", b"d"]]], - # second segment - [[[b"one", b"two"], [b"three", b"four"]]], - # third segment - [[[b"un", b"deux", b"trois", b"quatre", b"cinque"], [b"six"]]], - # fourth segment - [[[b"unos", b"dos", b"tres", b"quatro", b"cincos"], [b"seis"]]], - ], - max_seq_length=10, - axis=-1, - expected=[ - # first segment - [[[b"a", b"b"], [b"c"]]], - # second segment - [[[b"one", b"two"], [b"three"]]], - # third segment - [[[b"un", b"deux"], []]], - # fourth segment - [[[b"unos", b"dos"], []]], - ], - ), - # pyformat: enable - ]) - def testPerBatchBudgetTrimmer(self, - max_seq_length, - segments, - expected, - axis=-1, - descr=None): - max_seq_length = constant_op.constant(max_seq_length) - trimmer = trimmer_ops.RoundRobinTrimmer(max_seq_length, axis=axis) - segments = [ragged_factory_ops.constant(seg) for seg in segments] - expected = [ragged_factory_ops.constant(exp) for exp in expected] - actual = trimmer.trim(segments) - for expected_seg, actual_seg in zip(expected, actual): - self.assertAllEqual(expected_seg, actual_seg) - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer.py deleted file mode 100644 index 36695a5d9..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer.py +++ /dev/null
@@ -1,169 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tokenizer implementation for character-based models.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_array_ops -from tensorflow.python.ops.ragged import ragged_string_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - - -_tf_text_unicode_char_tokenizer_create_counter = monitoring.Counter( - "/nlx/api/python/unicode_char_tokenizer_create_counter", - "Counter for number of UnicodeCharTokenizers created in Python.") - - -class UnicodeCharTokenizer(TokenizerWithOffsets, Detokenizer): - """Tokenizes a tensor of UTF-8 strings on Unicode character boundaries. - - - Resulting tokens are integers (unicode codepoints). Scalar input will - produce a `Tensor` output containing the codepoints. Tensor inputs will - produce `RaggedTensor` outputs. - - Example: - - >>> tokenizer = tf_text.UnicodeCharTokenizer() - >>> tokens = tokenizer.tokenize("abc") - >>> print(tokens) - tf.Tensor([97 98 99], shape=(3,), dtype=int32) - - >>> tokens = tokenizer.tokenize(["abc", "de"]) - >>> print(tokens) - <tf.RaggedTensor [[97, 98, 99], [100, 101]]> - - Note: any remaining illegal and special UTF-8 characters (like BOM - characters) in the input string will not be treated specially by the tokenizer - and show up in the output tokens. These should be normalized out before - or after tokenization if they are unwanted in the application. - - >>> t = ["abc" + chr(0xfffe) + chr(0x1fffe) ] - >>> tokens = tokenizer.tokenize(t) - >>> print(tokens.to_list()) - [[97, 98, 99, 65534, 131070]] - - Passing malformed UTF-8 will result in unpredictable behavior. Make sure - inputs conform to UTF-8. - """ - - def __init__(self): - """Initializes a new instance.""" - super(UnicodeCharTokenizer, self).__init__() - _tf_text_unicode_char_tokenizer_create_counter.get_cell().increase_by(1) - - def tokenize(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings on Unicode character boundaries. - - Input strings are split on character boundaries using - unicode_decode_with_offsets. - - Args: - input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. - - Returns: - A `RaggedTensor` of tokenized text. The returned shape is the shape of the - input tensor with an added ragged dimension for tokens (characters) of - each string. - """ - (tokens, _, _) = self.tokenize_with_offsets(input) - return tokens - - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings to Unicode characters. - - Example: - - >>> tokenizer = tf_text.UnicodeCharTokenizer() - >>> tokens = tokenizer.tokenize_with_offsets("a"+chr(8364)+chr(10340)) - >>> print(tokens[0]) - tf.Tensor([ 97 8364 10340], shape=(3,), dtype=int32) - >>> print(tokens[1]) - tf.Tensor([0 1 4], shape=(3,), dtype=int64) - >>> print(tokens[2]) - tf.Tensor([1 4 7], shape=(3,), dtype=int64) - - The `start_offsets` and `end_offsets` are in byte indices of the original - string. When calling with multiple string inputs, the offset indices will - be relative to the individual source strings: - - >>> toks = tokenizer.tokenize_with_offsets(["a"+chr(8364), "b"+chr(10300) ]) - >>> print(toks[0]) - <tf.RaggedTensor [[97, 8364], [98, 10300]]> - >>> print(toks[1]) - <tf.RaggedTensor [[0, 1], [0, 1]]> - >>> print(toks[2]) - <tf.RaggedTensor [[1, 4], [1, 4]]> - - Args: - input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - * `tokens`: A `RaggedTensor` of code points (integer type). - * `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. - * `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. - """ - name = None - with ops.name_scope(name, "UnicodeCharTokenize", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - (codepoints, byte_start_offsets) = ( - ragged_string_ops.unicode_decode_with_offsets(input_tensor, "UTF-8")) - strlens = math_ops.cast( - array_ops.expand_dims(string_ops.string_length(input_tensor), -1), - dtypes.int64) - # Adjust strlens to set 0-length strings to empty array (there will be no - # tokens in this case). - final_ends = ragged_array_ops.boolean_mask(strlens, strlens > 0) - byte_end_offsets = array_ops.concat( - [byte_start_offsets[..., 1:], final_ends], -1) - return codepoints, byte_start_offsets, byte_end_offsets - - def detokenize(self, input, name=None): # pylint: disable=redefined-builtin - """Detokenizes input codepoints (integers) to UTF-8 strings. - - Example: - - >>> tokenizer = tf_text.UnicodeCharTokenizer() - >>> tokens = tokenizer.tokenize(["abc", "de"]) - >>> s = tokenizer.detokenize(tokens) - >>> print(s) - tf.Tensor([b'abc' b'de'], shape=(2,), dtype=string) - - Args: - input: A `RaggedTensor` or `Tensor` of codepoints (ints) with a rank of at - least 1. - name: The name argument that is passed to the op function. - - Returns: - A N-1 dimensional string tensor of the text corresponding to the UTF-8 - codepoints in the input. - """ - name = None - with ops.name_scope(name, "UnicodeCharTokenize", [input, self]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - return ragged_string_ops.unicode_encode(input_tensor, "UTF-8")
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer_test.py deleted file mode 100644 index 1246d4b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_char_tokenizer_test.py +++ /dev/null
@@ -1,352 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for unicode_char_tokenizer.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops.unicode_char_tokenizer import UnicodeCharTokenizer # pylint: disable=line-too-long - - -@test_util.run_all_in_graph_and_eager_modes -class UnicodeCharTokenizerOpTest(test.TestCase): - - def setUp(self): - super(UnicodeCharTokenizerOpTest, self).setUp() - self.tokenizer = UnicodeCharTokenizer() - - def testRequireParams(self): - with self.cached_session(): - with self.assertRaises(TypeError): - self.tokenizer.tokenize() - - def testScalar(self): - test_value = constant_op.constant(b'I love Flume!') - expected_tokens = [ - ord('I'), - ord(' '), - ord('l'), - ord('o'), - ord('v'), - ord('e'), - ord(' '), - ord('F'), - ord('l'), - ord('u'), - ord('m'), - ord('e'), - ord('!') - ] - expected_offset_starts = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - expected_offset_ends = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testVectorSingleValue(self): - test_value = constant_op.constant([b'I lov']) - expected_tokens = [[ord('I'), ord(' '), ord('l'), ord('o'), ord('v')]] - expected_offset_starts = [[0, 1, 2, 3, 4]] - expected_offset_ends = [[1, 2, 3, 4, 5]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testVector(self): - test_value = constant_op.constant([b'123', b'456']) - expected_tokens = [[ord('1'), ord('2'), ord('3')], - [ord('4'), ord('5'), ord('6')]] - expected_offset_starts = [[0, 1, 2], [0, 1, 2]] - expected_offset_ends = [[1, 2, 3], [1, 2, 3]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testMatrix(self): - test_value = constant_op.constant([[b'ab', b'cde'], [b'12', b'34']]) - expected_tokens = [[[ord('a'), ord('b')], [ord('c'), - ord('d'), - ord('e')]], - [[ord('1'), ord('2')], [ord('3'), ord('4')]]] - expected_offset_starts = [[[0, 1], [0, 1, 2]], [[0, 1], [0, 1]]] - expected_offset_ends = [[[1, 2], [1, 2, 3]], [[1, 2], [1, 2]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual( - self.evaluate(detokenized).to_list(), - self.evaluate(test_value).tolist()) - - def testMatrixRagged(self): - test_value = ragged_factory_ops.constant([[u'I love ∰'], [b'a', b'bc']]) - expected_tokens = [[[ - ord('I'), - ord(' '), - ord('l'), - ord('o'), - ord('v'), - ord('e'), - ord(' '), - ord(u'∰') - ]], [[ord('a')], [ord('b'), ord('c')]]] - expected_offset_starts = [[[0, 1, 2, 3, 4, 5, 6, 7]], [[0], [0, 1]]] - expected_offset_ends = [[[1, 2, 3, 4, 5, 6, 7, 10]], [[1], [1, 2]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def test3DimMatrix(self): - test_value = constant_op.constant([[[b'!!', b'Good'], [b'ab', b'cd']], - [[b'12', b'3'], [b'scrub', b'a guy']]]) - expected_tokens = [[[[ord('!'), ord('!')], - [ord('G'), ord('o'), - ord('o'), ord('d')]], - [[ord('a'), ord('b')], [ord('c'), ord('d')]]], - [[[ord('1'), ord('2')], [ord('3')]], - [[ord('s'), - ord('c'), - ord('r'), - ord('u'), - ord('b')], - [ord('a'), - ord(' '), - ord('g'), - ord('u'), - ord('y')]]]] - expected_offset_starts = [[[[0, 1], [0, 1, 2, 3]], [[0, 1], [0, 1]]], - [[[0, 1], [0]], [[0, 1, 2, 3, 4], [0, 1, 2, 3, - 4]]]] - expected_offset_ends = [[[[1, 2], [1, 2, 3, 4]], [[1, 2], [1, 2]]], - [[[1, 2], [1]], [[1, 2, 3, 4, 5], [1, 2, 3, 4, - 5]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - print(detokenized) - self.assertAllEqual( - self.evaluate(detokenized).to_list(), - self.evaluate(test_value).tolist()) - - def test3DimMatrixRagged(self): - test_value = ragged_factory_ops.constant([[[b'11'], [b'12t', b'13']], - [[b'21', b'22!']]]) - expected_tokens = [[[[ord('1'), ord('1')]], - [[ord('1'), ord('2'), ord('t')], [ord('1'), - ord('3')]]], - [[[ord('2'), ord('1')], [ord('2'), - ord('2'), - ord('!')]]]] - expected_offset_starts = [[[[0, 1]], [[0, 1, 2], [0, 1]]], - [[[0, 1], [0, 1, 2]]]] - expected_offset_ends = [[[[1, 2]], [[1, 2, 3], [1, 2]]], - [[[1, 2], [1, 2, 3]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testInternationalization(self): - test_value = constant_op.constant( - [u"J'adore la灯".encode('utf8'), u'¡Escríbeme!'.encode('utf8')]) - expected_tokens = [[ - ord('J'), - ord("'"), - ord('a'), - ord('d'), - ord('o'), - ord('r'), - ord('e'), - ord(' '), - ord('l'), - ord('a'), - ord(u'灯') - ], - [ - ord(u'¡'), - ord('E'), - ord('s'), - ord('c'), - ord('r'), - ord(u'í'), - ord('b'), - ord('e'), - ord('m'), - ord('e'), - ord('!') - ]] - expected_offset_starts = [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [0, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12]] - expected_offset_ends = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13], - [2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testOnlySpaces(self): - test_value = constant_op.constant([b' ', b' ']) - expected_tokens = [[ord(' ')], [ord(' '), ord(' '), ord(' ')]] - expected_offset_starts = [[0], [0, 1, 2]] - expected_offset_ends = [[1], [1, 2, 3]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testWhitespaceCharacters(self): - test_value = constant_op.constant([b't\tc\rd\nl']) - expected_tokens = [[ - ord('t'), - ord('\t'), - ord('c'), - ord('\r'), - ord('d'), - ord('\n'), - ord('l') - ]] - expected_offset_starts = [[0, 1, 2, 3, 4, 5, 6]] - expected_offset_ends = [[1, 2, 3, 4, 5, 6, 7]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testEmptyStringSingle(self): - test_value = constant_op.constant([b'']) - expected_tokens = [[]] - expected_offset_starts = [[]] - expected_offset_ends = [[]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testEmptyString(self): - test_value = constant_op.constant([b'', b'I', b'', b'Oh', b'']) - expected_tokens = [[], [ord('I')], [], [ord('O'), ord('h')], []] - expected_offset_starts = [[], [0], [], [0, 1], []] - expected_offset_ends = [[], [1], [], [1, 2], []] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - def testEmptyDimensions(self): - test_value = ragged_factory_ops.constant([[[b'F.', b'.'], []], [], - [[b'Zk', b'k'], [b'A', b'a']]]) - expected_tokens = [[[[ord('F'), ord('.')], [ord('.')]], []], [], - [[[ord('Z'), ord('k')], [ord('k')]], - [[ord('A')], [ord('a')]]]] - expected_offset_starts = [[[[0, 1], [0]], []], [], - [[[0, 1], [0]], [[0], [0]]]] - expected_offset_ends = [[[[1, 2], [1]], []], [], - [[[1, 2], [1]], [[1], [1]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - detokenized = self.tokenizer.detokenize(tokens) - self.assertAllEqual(detokenized, test_value) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer.py deleted file mode 100644 index 2eccca8d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer.py +++ /dev/null
@@ -1,239 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tokenizer for strings based on change in unicode script codes.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_conversion_ops -from tensorflow.python.ops.ragged import ragged_string_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_unicode_script_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_unicode_script_tokenizer.so')) - -_tf_text_unicode_script_tokenizer_create_counter = monitoring.Counter( - "/nlx/api/python/unicode_script_tokenizer_create_counter", - "Counter for number of UnicodeScriptTokenizers created in Python.") - - -class UnicodeScriptTokenizer(TokenizerWithOffsets): - r"""Tokenizes UTF-8 by splitting when there is a change in Unicode script. - - By default, this tokenizer leaves out scripts matching the whitespace unicode - property (use the `keep_whitespace` argument to keep it), so in this case the - results are similar to the `WhitespaceTokenizer`. Any punctuation - will get its own token (since it is in a different script), and any script - change in the input string will be the location of a split. - - Example: - >>> tokenizer = tf_text.UnicodeScriptTokenizer() - >>> tokens = tokenizer.tokenize(["xy.,z de", "fg?h", "abαβ"]) - >>> print(tokens.to_list()) - [[b'xy', b'.,', b'z', b'de'], [b'fg', b'?', b'h'], - [b'ab', b'\xce\xb1\xce\xb2']] - - >>> tokens = tokenizer.tokenize(u"累計7239人") - >>> print(tokens) - tf.Tensor([b'\xe7\xb4\xaf\xe8\xa8\x88' b'7239' b'\xe4\xba\xba'], shape=(3,), - dtype=string) - - Both the punctuation and the whitespace in the first string have been split, - but the punctuation run is present as a token while the whitespace isn't - emitted (by default). The third example shows the case of a script change - without any whitespace. This results in a split at that boundary point. - """ - - def __init__(self, keep_whitespace=False): - """Initializes a new instance. - - Args: - keep_whitespace: A boolean that specifices whether to emit whitespace - tokens (default `False`). - """ - super(UnicodeScriptTokenizer, self).__init__() - _tf_text_unicode_script_tokenizer_create_counter.get_cell().increase_by(1) - self._keep_whitespace = keep_whitespace - - def tokenize(self, input): # pylint: disable=redefined-builtin - """Tokenizes UTF-8 by splitting when there is a change in Unicode script. - - The strings are split when successive tokens change their Unicode script - or change being whitespace or not. The script codes used correspond to - International Components for Unicode (ICU) UScriptCode values. See: - http://icu-project.org/apiref/icu4c/uscript_8h.html - - ICU-defined whitespace characters are dropped, unless the `keep_whitespace` - option was specified at construction time. - - Args: - input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. - - Returns: - A `RaggedTensor` of tokenized text. The returned shape is the shape of the - input tensor with an added ragged dimension for tokens of each string. - """ - (tokens, _, _) = self.tokenize_with_offsets(input) - return tokens - - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - r"""Tokenizes UTF-8 by splitting when there is a change in Unicode script. - - The strings are split when a change in the Unicode script is detected - between sequential tokens. The script codes used correspond to International - Components for Unicode (ICU) UScriptCode values. See: - http://icu-project.org/apiref/icu4c/uscript_8h.html - - ICU defined whitespace characters are dropped, unless the keep_whitespace - option was specified at construction time. - - Example: - >>> tokenizer = tf_text.UnicodeScriptTokenizer() - >>> tokens = tokenizer.tokenize_with_offsets(["xy.,z de", "abαβ"]) - >>> print(tokens[0].to_list()) - [[b'xy', b'.,', b'z', b'de'], [b'ab', b'\xce\xb1\xce\xb2']] - >>> print(tokens[1].to_list()) - [[0, 2, 4, 6], [0, 2]] - >>> print(tokens[2].to_list()) - [[2, 4, 5, 8], [2, 6]] - - >>> tokens = tokenizer.tokenize_with_offsets(u"累計7239人") - >>> print(tokens[0]) - tf.Tensor([b'\xe7\xb4\xaf\xe8\xa8\x88' b'7239' b'\xe4\xba\xba'], - shape=(3,), dtype=string) - >>> print(tokens[1]) - tf.Tensor([ 0 6 10], shape=(3,), dtype=int64) - >>> print(tokens[2]) - tf.Tensor([ 6 10 13], shape=(3,), dtype=int64) - - The start_offsets and end_offsets are in byte indices of the original - string. When calling with multiple string inputs, the offset indices will - be relative to the individual source strings. - - Args: - input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - * `tokens`: A `RaggedTensor` of tokenized text. - * `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. - * `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. - """ - name = None - with ops.name_scope(name, "UnicodeScriptTokenize", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if ragged_tensor.is_ragged(input_tensor): - if input_tensor.flat_values.shape.ndims > 1: - # If the flat_values of our ragged tensor is multi-dimensional, we can - # process it separately and our output will have the same nested - # splits as our input. - (tokens, starts, ends) = self.tokenize_with_offsets( - input_tensor.flat_values) - return (input_tensor.with_flat_values(tokens), - input_tensor.with_flat_values(starts), - input_tensor.with_flat_values(ends)) - else: - # Recursively process the values of the ragged tensor. - (tokens, starts, ends) = self.tokenize_with_offsets( - input_tensor.values) - return (input_tensor.with_values(tokens), - input_tensor.with_values(starts), - input_tensor.with_values(ends)) - else: - if input_tensor.shape.ndims > 1: - # Convert the input tensor to ragged and process it. - return self.tokenize_with_offsets( - ragged_conversion_ops.from_tensor(input_tensor)) - elif input_tensor.shape.ndims == 0: - (tokens, starts, ends) = self.tokenize_with_offsets( - array_ops.stack([input_tensor])) - return tokens.values, starts.values, ends.values - else: - # Our rank 1 tensor is the correct shape, so we can process it as - # normal - return self._tokenize_with_offsets_encode_decode_wrapper(input_tensor) - - def _tokenize_with_offsets_encode_decode_wrapper(self, input_tensor): - """Tokenizes a tensor of UTF-8 strings with rank of 1. - - Args: - input_tensor: The single dimensional Tensor to tokenize. - - Returns: - Tuple of RaggedTensors of tokenized text and byte offsets, with shapes - [num_strings, (num_tokens or num_offsets)]. - """ - # Decode the strings and get byte offsets - (codepoints, byte_start_offsets) = ( - ragged_string_ops.unicode_decode_with_offsets(input_tensor, "UTF-8")) - byte_end_offsets = array_ops.concat([ - byte_start_offsets[:, 1:], - math_ops.cast( - array_ops.expand_dims(string_ops.string_length(input_tensor), 1), - dtypes.int64) - ], 1) - - # Tokenize - (codepoint_tokens, codepoint_start_offsets, codepoint_end_offsets) = ( - self._tokenize_codepoints_with_offsets(codepoints)) - - # Encode the codepoints and translate the codepoint offsets to byte offsets. - return (ragged_string_ops.unicode_encode(codepoint_tokens, "UTF-8"), - array_ops.batch_gather(byte_start_offsets, codepoint_start_offsets), - array_ops.batch_gather( - byte_end_offsets, - math_ops.subtract(codepoint_end_offsets, [1]))) - - def _tokenize_codepoints_with_offsets(self, codepoints_tensor): - """Tokenizes a tensor of codepoints with rank of 1. - - Args: - codepoints_tensor: Single-dimension Tensor of codepoints to tokenize. - - Returns: - Tuple of tokenized codepoints with offsets relative to the codepoints have - a shape of [num_strings, (num_tokens or num_offsets)]. - """ - (output_values, output_values_inner_splits, output_offset_starts, - output_offset_ends, output_outer_splits) = ( - gen_unicode_script_tokenizer.unicode_script_tokenize_with_offsets( - input_values=codepoints_tensor.flat_values, - input_splits=codepoints_tensor.row_splits, - keep_whitespace=self._keep_whitespace)) - codepoint_tokens = RaggedTensor.from_nested_row_splits( - flat_values=output_values, - nested_row_splits=[output_outer_splits, output_values_inner_splits]) - codepoint_offset_starts = RaggedTensor.from_nested_row_splits( - flat_values=output_offset_starts, - nested_row_splits=[output_outer_splits]) - codepoint_offset_ends = RaggedTensor.from_nested_row_splits( - flat_values=output_offset_ends, - nested_row_splits=[output_outer_splits]) - return (codepoint_tokens, codepoint_offset_starts, codepoint_offset_ends)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer_test.py deleted file mode 100644 index eb4bbbfa..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/unicode_script_tokenizer_test.py +++ /dev/null
@@ -1,304 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for unicode_script_tokenizer_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops.unicode_script_tokenizer import UnicodeScriptTokenizer - - -@test_util.run_all_in_graph_and_eager_modes -class UnicodeScriptTokenizerOpTest(test_util.TensorFlowTestCase): - - def setUp(self): - super(UnicodeScriptTokenizerOpTest, self).setUp() - self.tokenizer = UnicodeScriptTokenizer() - - def testRequireParams(self): - with self.cached_session(): - with self.assertRaises(TypeError): - self.tokenizer.tokenize() - - def testScalar(self): - test_value = constant_op.constant(b'I love Flume!') - expected_tokens = [b'I', b'love', b'Flume', b'!'] - expected_offset_starts = [0, 2, 7, 12] - expected_offset_ends = [1, 6, 12, 13] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testVectorSingleValue(self): - test_value = constant_op.constant([b'I love Flume!']) - expected_tokens = [[b'I', b'love', b'Flume', b'!']] - expected_offset_starts = [[0, 2, 7, 12]] - expected_offset_ends = [[1, 6, 12, 13]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testVector(self): - test_value = constant_op.constant([b'I love Flume!', b'Good day']) - expected_tokens = [[b'I', b'love', b'Flume', b'!'], [b'Good', b'day']] - expected_offset_starts = [[0, 2, 7, 12], [0, 5]] - expected_offset_ends = [[1, 6, 12, 13], [4, 8]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testMatrix(self): - test_value = constant_op.constant([[b'I love Flume!', b'Good day'], - [b'I don\'t want', b'no scrubs']]) - expected_tokens = [[[b'I', b'love', b'Flume', b'!'], [b'Good', b'day']], - [[b'I', b'don', b'\'', b't', b'want'], - [b'no', b'scrubs']]] - expected_offset_starts = [[[0, 2, 7, 12], [0, 5]], - [[0, 2, 5, 6, 8], [0, 3]]] - expected_offset_ends = [[[1, 6, 12, 13], [4, 8]], - [[1, 5, 6, 7, 12], [2, 9]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testMatrixRagged(self): - test_value = ragged_factory_ops.constant([[b'I love Flume!'], - [b'I don\'t want', b'no scrubs']]) - expected_tokens = [[[b'I', b'love', b'Flume', b'!']], - [[b'I', b'don', b'\'', b't', b'want'], - [b'no', b'scrubs']]] - expected_offset_starts = [[[0, 2, 7, 12]], - [[0, 2, 5, 6, 8], [0, 3]]] - expected_offset_ends = [[[1, 6, 12, 13]], - [[1, 5, 6, 7, 12], [2, 9]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def test3DimMatrix(self): - test_value = constant_op.constant([[[b'I love Flume!', b'Good day'], - [b'I don\'t want', b'no scrubs']], - [[b'I love Zhu!', b'Good night'], - [b'A scrub is', b'a guy']]]) - expected_tokens = [[[[b'I', b'love', b'Flume', b'!'], [b'Good', b'day']], - [[b'I', b'don', b'\'', b't', b'want'], - [b'no', b'scrubs']]], - [[[b'I', b'love', b'Zhu', b'!'], [b'Good', b'night']], - [[b'A', b'scrub', b'is'], [b'a', b'guy']]]] - expected_offset_starts = [[[[0, 2, 7, 12], [0, 5]], - [[0, 2, 5, 6, 8], [0, 3]]], - [[[0, 2, 7, 10], [0, 5]], - [[0, 2, 8], [0, 2]]]] - expected_offset_ends = [[[[1, 6, 12, 13], [4, 8]], - [[1, 5, 6, 7, 12], [2, 9]]], - [[[1, 6, 10, 11], [4, 10]], - [[1, 7, 10], [1, 5]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def test3DimMatrixRagged(self): - test_value = ragged_factory_ops.constant([[[b'I love Flume!'], - [b'I don\'t want', - b'no scrubs']], - [[b'I love Zhu!', - b'Good night']]]) - expected_tokens = [[[[b'I', b'love', b'Flume', b'!']], - [[b'I', b'don', b'\'', b't', b'want'], - [b'no', b'scrubs']]], - [[[b'I', b'love', b'Zhu', b'!'], [b'Good', b'night']]]] - expected_offset_starts = [[[[0, 2, 7, 12]], - [[0, 2, 5, 6, 8], [0, 3]]], - [[[0, 2, 7, 10], [0, 5]]]] - expected_offset_ends = [[[[1, 6, 12, 13]], - [[1, 5, 6, 7, 12], [2, 9]]], - [[[1, 6, 10, 11], [4, 10]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testInternationalization(self): - test_value = constant_op.constant([u"J'adore la灯".encode('utf8'), - u'¡Escríbeme!'.encode('utf8')]) - expected_tokens = [[b'J', b"'", b'adore', b'la', u'灯'.encode('utf8')], - [u'¡'.encode('utf8'), u'Escríbeme'.encode('utf8'), b'!']] - expected_offset_starts = [[0, 1, 2, 8, 10], [0, 2, 12]] - expected_offset_ends = [[1, 2, 7, 10, 13], [2, 12, 13]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testSpaceBoundaries(self): - test_value = constant_op.constant([b' Hook em! ', b' .Ok. Go ']) - expected_tokens = [[b'Hook', b'em', b'!'], [b'.', b'Ok', b'.', b'Go']] - expected_offset_starts = [[1, 6, 8], [1, 2, 4, 8]] - expected_offset_ends = [[5, 8, 9], [2, 4, 5, 10]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testKeepWhitespace(self): - test_value = constant_op.constant([ - b'\'Black Panther,\' \t \xe2\x80\x98A Star Is Born\xe2\x80\x98 among AFI Awards honorees', - b' .Ok. Go ' - ]) - expected_tokens = [[ - b'\'', b'Black', b' ', b'Panther', b',\'', b' \t ', b'\xe2\x80\x98', - b'A', b' ', b'Star', b' ', b'Is', b' ', b'Born', b'\xe2\x80\x98', b' ', - b'among', b' ', b'AFI', b' ', b'Awards', b' ', b'honorees' - ], [b' ', b'.', b'Ok', b'.', b' ', b'Go', b' ']] - expected_offset_starts = [ - [0, 1, 6, 7, 14, 16, 19, 22, 23, 24, 28, 29, 31, 32, 36, 39, 40, - 45, 46, 49, 50, 56, 57], - [0, 1, 2, 4, 5, 8, 10]] - expected_offset_ends = [ - [1, 6, 7, 14, 16, 19, 22, 23, 24, 28, 29, 31, 32, 36, 39, 40, - 45, 46, 49, 50, 56, 57, 65], - [1, 2, 4, 5, 8, 10, 12]] - self.tokenizer = UnicodeScriptTokenizer(keep_whitespace=True) - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testOnlySpaces(self): - test_value = constant_op.constant([b' ', b' ']) - expected_tokens = [[], []] - expected_offset_starts = [[], []] - expected_offset_ends = [[], []] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testWhitespaceCharacters(self): - test_value = constant_op.constant([b'things:\tcarpet\rdesk\nlamp']) - expected_tokens = [[b'things', b':', b'carpet', b'desk', b'lamp']] - expected_offset_starts = [[0, 6, 8, 15, 20]] - expected_offset_ends = [[6, 7, 14, 19, 24]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyStringSingle(self): - test_value = constant_op.constant([b'']) - expected_tokens = [[]] - expected_offset_starts = [[]] - expected_offset_ends = [[]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyString(self): - test_value = constant_op.constant( - [b'', b'I love Flume!', b'', b'O hai', b'']) - expected_tokens = [[], [b'I', b'love', b'Flume', b'!'], [], [b'O', b'hai'], - []] - expected_offset_starts = [[], [0, 2, 7, 12], [], [0, 2], []] - expected_offset_ends = [[], [1, 6, 12, 13], [], [1, 5], []] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyDimensions(self): - test_value = ragged_factory_ops.constant( - [[[b'I love Flume!', b'Good day. . .'], []], [], - [[b'I love Zhu!', b'Good night'], [b'A scrub is', b'a guy']]]) - expected_tokens = [[[[b'I', b'love', b'Flume', b'!'], - [b'Good', b'day', b'...']], []], [], - [[[b'I', b'love', b'Zhu', b'!'], [b'Good', b'night']], - [[b'A', b'scrub', b'is'], [b'a', b'guy']]]] - expected_offset_starts = [[[[0, 2, 7, 12], [0, 5, 8]], - []], - [], - [[[0, 2, 7, 10], [0, 5]], - [[0, 2, 8], [0, 2]]]] - expected_offset_ends = [[[[1, 6, 12, 13], [4, 8, 13]], - []], - [], - [[[1, 6, 10, 11], [4, 10]], - [[1, 7, 10], [1, 5]]]] - tokens = self.tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op.py deleted file mode 100644 index b9c6485..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op.py +++ /dev/null
@@ -1,193 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Bulk Viterbi Constrained Sequence. - -Constrains a set of predictions based on a set of legal transitions and/or a -set of transition weights, returning the legal sequence that maximizes the -product of the state scores and the transition weights according to the Viterbi -algorithm. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_tensor - -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_constrained_sequence_op = load_library.load_op_library(resource_loader.get_path_to_datafile('_constrained_sequence_op.so')) - - -def viterbi_constrained_sequence(scores, - sequence_length=None, - allowed_transitions=None, - transition_weights=None, - use_log_space=False, - use_start_and_end_states=True, - name=None): - """Performs greedy constrained sequence on a batch of examples. - - Constrains a set of predictions based on a set of legal transitions - and/or a set of transition weights, returning the legal sequence that - maximizes the product of the state scores and the transition weights - according to the Viterbi algorithm. If `use_log_space` is True, the Viterbi - calculation will be performed in log space (with sums); if it is False, - the Viterbi calculation will be performed in exp space (with normalized - products). - - This op also takes a parameter `use_start_and_end_states`, which when true - will add an implicit start and end state to each sequence. These implicit - states allow the user to specify additional weights and permitted transitions - to start and end a sequence (so, for instance, if you wanted to forbid your - output from ending in a certain set of states you could do so). - - Inputs to this op can take one of three forms: a single TensorFlow tensor - of scores with no sequence lengths, a TensorFlow tensor of scores along - with a TensorFlow tensor of sequence lengths, or a RaggedTensor. If only the - scores tensor is passed, this op will assume that the sequence lengths are - equal to the size of the tensor (and so use all the data provided). If a - scores tensor and sequence_lengths tensor is provided, the op will only - use the data in the scores tensor as specified by the sequence_lengths tensor. - Finally, if a RaggedTensor is provided, the sequence_lengths will be ignored - and the variable length sequences in the RaggedTensor will be used. - - >>> scores = np.array([[10.0, 12.0, 6.0, 4.0], - ... [13.0, 12.0, 11.0, 10.0]], dtype=np.float32) - >>> sequence_length = np.array([2]) - >>> transition_weights = np.array([[ .1, .2, .3, .4], - ... [ .5, .6, .7, .8], - ... [ .9, .1, .15, .2], - ... [.25, .35, .45, .55]], dtype=np.float32) - >>> allowed_transitions = np.array([[True, True, True, True], - ... [True, True, True, True], - ... [True, False, True, False], - ... [True, True, True, True]]) - >>> viterbi_constrained_sequence( - ... scores=scores, - ... sequence_length=sequence_length, - ... allowed_transitions=allowed_transitions, - ... transition_weights=transition_weights, - ... use_log_space=False, - ... use_start_and_end_states=False) - <tf.RaggedTensor [[1, 3]]> - - Args: - scores: `<float32> [batch_size, num_steps, |num_states|]` - A tensor of scores, where `scores[b, t, s]` is the predicted score for - transitioning to state `s` at step `t` for batch `b`. The |num_states| - dimension must correspond to the num_states attribute for this op. This - input may be ragged; if it is ragged, the ragged tensor should have the - same structure [b, t, s] and only axis 1 should be ragged. - - sequence_length: `<{int32, int64}>[batch_size]` - A rank-1 tensor representing the length of the output sequence. If None, - and the 'scores' input is not ragged, sequence lengths will be assumed - to be the length of the score tensor. - - allowed_transitions: - if use_start_and_end_states is TRUE: - `<bool>[num_states+1, num_states+1]` - if use_start_and_end_states is FALSE: - `<bool>[num_states, num_states]` - A rank-2 tensor representing allowed transitions. - - allowed_transitions[i][j] is true if the transition from state i to - state j is allowed for i and j in 0...(num_states). - - allowed_transitions[num_states][num_states] is ignored. - If use_start_and_end_states is TRUE: - - allowed_transitions[num_states][j] is true if the sequence is allowed - to start from state j. - - allowed_transitions[i][num_states] is true if the sequence is allowed - to end on state i. - Default - An empty tensor. This allows all sequence states to transition - to all other sequence states. - - transition_weights: - if use_start_and_end_states is TRUE: - `<float32>[num_states+1, num_states+1]` - if use_start_and_end_states is FALSE: - `<float32>[num_states, num_states]` - A rank-2 tensor representing transition weights. - - transition_weights[i][j] is the coefficient that a candidate transition - score will be multiplied by if that transition is from state i to - state j. - - transition_weights[num_states][num_states] is ignored. - If use_start_and_end_states is TRUE: - - transition_weights[num_states][j] is the coefficient that will be used - if the transition starts with state j. - - transition_weights[i][num_states] is the coefficient that will be used - if the final state in the sequence is state i. - Default - An empty tensor. This assigns a wieght of 1.0 all transitions - - use_log_space: Whether to use log space for the calculation. If false, - calculations will be done in exp-space. - - use_start_and_end_states: If True, sequences will have an implicit start - and end state added. - - name: The name scope within which this op should be constructed. - - Returns: - An <int32>[batch_size, (num_steps)] ragged tensor containing the appropriate - sequence of transitions. If a sequence is impossible, the value of the - RaggedTensor for that and all following transitions in that sequence shall - be '-1'. - """ - with ops.name_scope( - name, "BulkViterbiConstrainedSequence", - [scores, sequence_length, allowed_transitions, transition_weights]): - if allowed_transitions is None: - allowed_transitions = [] - - if transition_weights is None: - transition_weights = [] - - score_data = ragged_tensor.convert_to_tensor_or_ragged_tensor( - scores, name="score_data") - - if isinstance(score_data, ragged_tensor.RaggedTensor): - # TODO(momernick): Extend the generated op to support ragged tensors. - dense_scores = score_data.to_tensor(default_value=0) - sequence_lengths = score_data.row_lengths(axis=1) - else: - dense_scores = score_data - # In this case, the core input was a dense tensor. - if sequence_length is not None: - sequence_lengths = ops.convert_to_tensor(sequence_length) - else: - batch_size = array_ops.shape(dense_scores)[0] - dense_length = array_ops.shape(dense_scores)[-2] - sequence_lengths = array_ops.ones([batch_size], - dtype=dtypes.int32) * dense_length - - transition_weights = ops.convert_to_tensor(transition_weights) - allowed_transitions = ops.convert_to_tensor( - allowed_transitions, dtype=dtypes.bool) - - output, output_splits = gen_constrained_sequence_op.constrained_sequence( - scores=dense_scores, - sequence_lengths=sequence_lengths, - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_viterbi=True, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - return ragged_tensor.RaggedTensor.from_row_splits( - values=output, row_splits=output_splits)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op_test.py deleted file mode 100644 index 22015d7..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/viterbi_constrained_sequence_op_test.py +++ /dev/null
@@ -1,599 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.python.ops.viterbi_constrained_sequence_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.numpy import viterbi_decode -from tensorflow_text.python.ops import viterbi_constrained_sequence_op as sequence_op - - -# TODO(b/122968457): Refactor this test logic. -@test_util.run_all_in_graph_and_eager_modes -class ViterbiConstrainedSequenceOpTest(test_util.TensorFlowTestCase): - - def test_sequence_in_exp_space_with_start_end_states_single_input(self): - use_log_space = False - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4, .1], - [ .5, .6, .7, .8, .1], - [ .9, 1, .15, 1, .1], - [.25, .35, .45, .55, .5], - [ .1, .5, .1, .1, 1]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True, True], - [True, True, True, True, True], - [True, False, True, False, True], - [True, True, True, True, True], - [True, True, True, True, False]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_exp_space_with_start_end_states_multi_input(self): - use_log_space = False - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4, .1], - [ .5, .6, .7, .8, .1], - [ .9, 1, .15, 1, .1], - [.25, .35, .45, .55, .5], - [ .1, .5, .1, .1, 1]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True, True], - [True, True, True, True, True], - [True, False, True, False, True], - [True, True, True, True, True], - [True, True, True, True, False]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_exp_space_without_start_end_states_single_input(self): - use_log_space = False - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, .1, .15, .2], - [.25, .35, .45, .55]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = sequence_op.viterbi_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_exp_space_without_start_end_states_multi_input(self): - use_log_space = False - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[ .1, .2, .3, .4], - [ .5, .6, .7, .8], - [ .9, .1, .15, .2], - [.25, .35, .45, .55]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_log_space_with_start_end_states_single_input(self): - use_log_space = True - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 7.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0, 0.0], - [ 3.0, -3.0, 4.0, -4.0, 0.0], - [ 5.0, 1.0, 10.0, 1.0, 1.0], - [-7.0, 7.0, -8.0, 8.0, 0.0], - [ 0.0, 1.0, 2.0, 3.0, 0.0]], - dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True, True], - [True, True, True, True, True], - [True, False, True, False, False], - [True, True, True, True, True], - [True, False, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = sequence_op.viterbi_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_log_space_with_start_end_states_multi_input(self): - use_log_space = True - use_start_and_end_states = True - scores = np.array([[10.0, 12.0, 7.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0, 0.0], - [ 3.0, -3.0, 4.0, -4.0, 0.0], - [ 5.0, 1.0, 10.0, 1.0, 1.0], - [-7.0, 7.0, -8.0, 8.0, 0.0], - [ 0.0, 1.0, 2.0, 3.0, 0.0]], - dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True, True], - [True, True, True, True, True], - [True, False, True, False, False], - [True, True, True, True, True], - [True, False, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_in_log_space_without_start_end_states_single_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = sequence_op.viterbi_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_in_log_space_without_start_end_states_multi_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_with_none_weights_single_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = sequence_op.viterbi_constrained_sequence( - single_input, [2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_with_none_weights_multi_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - allowed_transitions = np.array([[True, True, True, True], - [True, True, True, True], - [True, False, True, False], - [True, True, True, True]]) - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - allowed_transitions, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - allowed_transitions=allowed_transitions, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_sequence_with_none_permissions_single_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a single-item batch. - single_input = np.array([scores], dtype=np.float32) - single_sequence_op = sequence_op.viterbi_constrained_sequence( - single_input, [2], - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_result, [sequence]) - - def test_sequence_with_none_permissions_multi_input(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, [2, 2, 2], - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_multi_input_sequence_with_implicit_lengths(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores, scores, scores], dtype=np.float32) - - multiple_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - multiple_sequence_result = self.evaluate(multiple_sequence_op) - self.assertAllEqual(multiple_sequence_result, - [sequence, sequence, sequence]) - - def test_single_input_sequence_with_implicit_lengths(self): - use_log_space = True - use_start_and_end_states = False - scores = np.array([[10.0, 13.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0], - [13.0, 12.0, 11.0, 10.0]]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence, _ = viterbi_decode.decode( - scores, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - - # Test a multi-item batch. - multiple_input = np.array([scores], dtype=np.float32) - - single_sequence_op = sequence_op.viterbi_constrained_sequence( - multiple_input, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_sequence_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_sequence_result, [sequence]) - - def test_ragged_input_sequence(self): - use_log_space = True - use_start_and_end_states = False - input_1 = np.array([[10.0, 13.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0], - [13.0, 12.0, 11.0, 10.0]]) - input_2 = np.array([[10.0, 12.0, 6.0, 4.0], [13.0, 12.0, 11.0, 10.0]]) - # TODO(b/122968457): Extend RT support to lists-of-ndarrays. - scores = ragged_factory_ops.constant([input_1.tolist(), input_2.tolist()]) - # pyformat: disable - # pylint: disable=bad-whitespace - # pylint: disable=bad-continuation - transition_weights = np.array([[-1.0, 1.0, -2.0, 2.0], - [ 3.0, -3.0, 4.0, -4.0], - [ 5.0, 1.0, 10.0, 1.0], - [-7.0, 7.0, -8.0, 8.0]], dtype=np.float32) - - # pyformat: enable - # pylint: enable=bad-whitespace - # pylint: enable=bad-continuation - sequence_1, _ = viterbi_decode.decode( - input_1, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - sequence_2, _ = viterbi_decode.decode( - input_2, - transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - expected_sequence = ragged_factory_ops.constant([sequence_1, sequence_2]) - - # Test a ragged batch - single_sequence_op = sequence_op.viterbi_constrained_sequence( - scores, - transition_weights=transition_weights, - use_log_space=use_log_space, - use_start_and_end_states=use_start_and_end_states) - single_sequence_result = self.evaluate(single_sequence_op) - self.assertAllEqual(single_sequence_result, expected_sequence) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer.py deleted file mode 100644 index 1ec322c5c..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer.py +++ /dev/null
@@ -1,158 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Whitespace tokenizer for string tensors.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops.ragged import ragged_conversion_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.core.pybinds import pywrap_whitespace_tokenizer_config_builder -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order,unused-import -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_whitespace_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_whitespace_tokenizer.so')) -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_whitespace_tokenizer_v2 = load_library.load_op_library(resource_loader.get_path_to_datafile('_whitespace_tokenizer_v2.so')) - -_tf_text_whitespace_tokenizer_op_create_counter = monitoring.Counter( - "/nlx/api/python/whitespace_tokenizer_create_counter", - "Counter for number of WhitespaceTokenizers created in Python.") - - -class WhitespaceTokenizer(TokenizerWithOffsets): - """Tokenizes a tensor of UTF-8 strings on whitespaces.""" - - def __init__(self): - """Initializes the WhitespaceTokenizer. - """ - super(WhitespaceTokenizer, self).__init__() - self._config = (pywrap_whitespace_tokenizer_config_builder. - build_whitespace_tokenizer_config()) - _tf_text_whitespace_tokenizer_op_create_counter.get_cell().increase_by(1) - - def tokenize(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings on whitespaces. - - The strings are split on ICU defined whitespace characters. These - whitespace characters are dropped. - - Example: - - >>> WhitespaceTokenizer().tokenize("small medium large") - <tf.Tensor: shape=(3,), dtype=string, numpy=array([b'small', b'medium', - b'large'], dtype=object)> - - Args: - input: A `RaggedTensor` or `Tensor` of UTF-8 strings with any shape. - - Returns: - A `RaggedTensor` of tokenized text. The returned shape is the shape of the - input tensor with an added ragged dimension for tokens of each string. - """ - (tokens, _, _) = self.tokenize_with_offsets(input) - return tokens - - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - """Tokenizes a tensor of UTF-8 strings on whitespaces. - - The strings are split on ICU defined whitespace characters. These - whitespace characters are dropped. - - Example: - - >>> splitter = WhitespaceTokenizer() - >>> pieces, starts, ends = splitter.tokenize_with_offsets("a bb ccc") - >>> print(pieces.numpy(), starts.numpy(), ends.numpy()) - [b'a' b'bb' b'ccc'] [0 2 5] [1 4 8] - - Args: - input: A `RaggedTensor`or `Tensor` of UTF-8 strings with any shape. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - * `tokens`: A `RaggedTensor` of tokenized text. - * `start_offsets`: A `RaggedTensor` of the tokens' starting byte offset. - * `end_offsets`: A `RaggedTensor` of the tokens' ending byte offset. - """ - name = None - with ops.name_scope(name, "WhitespaceTokenize", [input]): - input_tensor = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - if input_tensor.shape.ndims is None: - raise ValueError("Rank of input_tensor must be statically known.") - if ragged_tensor.is_ragged(input_tensor): - if input_tensor.flat_values.shape.ndims > 1: - # If the flat_values of our ragged tensor is multi-dimensional, we can - # process it separately and our output will have the same nested - # splits as our input. - (tokens, starts, - ends) = self.tokenize_with_offsets(input_tensor.flat_values) - return (input_tensor.with_flat_values(tokens), - input_tensor.with_flat_values(starts), - input_tensor.with_flat_values(ends)) - else: - # Recursively process the values of the ragged tensor. - (tokens, starts, - ends) = self.tokenize_with_offsets(input_tensor.values) - return (input_tensor.with_values(tokens), - input_tensor.with_values(starts), - input_tensor.with_values(ends)) - else: - if input_tensor.shape.ndims > 1: - # Convert the input tensor to ragged and process it. - return self.tokenize_with_offsets( - ragged_conversion_ops.from_tensor(input_tensor)) - elif input_tensor.shape.ndims == 0: - (tokens, starts, ends) = self.tokenize_with_offsets( - array_ops.stack([input_tensor])) - return tokens.values, starts.values, ends.values - else: - # Our rank 1 tensor is the correct shape, so we can process it as - # normal. - return self._whitespace_tokenize_with_offsets(input_tensor) - - def _whitespace_tokenize_with_offsets(self, input_tensor): - """Tokenizes a tensor of codepoints with rank of 1. - - Args: - input_tensor: Single-dimension Tensor of strings to tokenize. - - Returns: - Tuple of tokenized codepoints with offsets relative to the codepoints have - a shape of [num_strings, (num_tokens or num_offsets)]. - """ - (values, row_splits, start_offsets, end_offsets) = ( - gen_whitespace_tokenizer_v2.tf_text_whitespace_tokenize_with_offsets_v2( - input_values=input_tensor, input_config=self._config)) - values = RaggedTensor.from_nested_row_splits( - flat_values=values, - nested_row_splits=[row_splits]) - start_offsets = RaggedTensor.from_nested_row_splits( - flat_values=start_offsets, - nested_row_splits=[row_splits]) - end_offsets = RaggedTensor.from_nested_row_splits( - flat_values=end_offsets, - nested_row_splits=[row_splits]) - return (values, start_offsets, end_offsets)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer_test.py deleted file mode 100644 index 8eefc941..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/whitespace_tokenizer_test.py +++ /dev/null
@@ -1,312 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for whitespace_tokenizer_op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -import tensorflow_text as tf_text - -from tensorflow.lite.python import interpreter -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import test_util -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops.whitespace_tokenizer import WhitespaceTokenizer - - -@test_util.run_all_in_graph_and_eager_modes -class WhitespaceTokenizerOpTest(test_util.TensorFlowTestCase): - - def setUp(self): - super(WhitespaceTokenizerOpTest, self).setUp() - self.whitespace_tokenizer = WhitespaceTokenizer() - - def testScalar(self): - test_value = constant_op.constant(b'I love Flume!') - expected_tokens = [b'I', b'love', b'Flume!'] - expected_offset_starts = [0, 2, 7] - expected_offset_ends = [1, 6, 13] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testScalarWithSplit(self): - # Similar to testScalar, but using split() calls (instead of tokenize()). - # Should produce the same results as before. This tests that a - # WhitespaceTokenizer is a valid Splitter. - test_value = constant_op.constant(b'I love Flume!') - expected_tokens = [b'I', b'love', b'Flume!'] - expected_offset_starts = [0, 2, 7] - expected_offset_ends = [1, 6, 13] - tokens = self.whitespace_tokenizer.split(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.split_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testVectorSingleValue(self): - test_value = constant_op.constant([b'I love Flume!']) - expected_tokens = [[b'I', b'love', b'Flume!']] - expected_offset_starts = [[0, 2, 7]] - expected_offset_ends = [[1, 6, 13]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testVector(self): - test_value = constant_op.constant([b'I love Flume!', b'Good day']) - expected_tokens = [[b'I', b'love', b'Flume!'], [b'Good', b'day']] - expected_offset_starts = [[0, 2, 7], [0, 5]] - expected_offset_ends = [[1, 6, 13], [4, 8]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testMatrix(self): - test_value = constant_op.constant([[b'I love Flume!', b'Good day'], - [b'I don\'t want', b'no scrubs']]) - expected_tokens = [[[b'I', b'love', b'Flume!'], [b'Good', b'day']], - [[b'I', b'don\'t', b'want'], [b'no', b'scrubs']]] - expected_offset_starts = [[[0, 2, 7], [0, 5]], [[0, 2, 8], [0, 3]]] - expected_offset_ends = [[[1, 6, 13], [4, 8]], [[1, 7, 12], [2, 9]]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testMatrixRagged(self): - test_value = ragged_factory_ops.constant([[b'I love Flume!'], - [b'I don\'t want', b'no scrubs']]) - expected_tokens = [[[b'I', b'love', b'Flume!']], - [[b'I', b'don\'t', b'want'], [b'no', b'scrubs']]] - expected_offset_starts = [[[0, 2, 7]], [[0, 2, 8], [0, 3]]] - expected_offset_ends = [[[1, 6, 13]], [[1, 7, 12], [2, 9]]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def test3DimMatrix(self): - test_value = constant_op.constant([[[b'I love Flume!', b'Good day'], - [b'I don\'t want', b'no scrubs']], - [[b'I love Zhu!', b'Good night'], - [b'A scrub is', b'a guy']]]) - expected_tokens = [[[[b'I', b'love', b'Flume!'], [b'Good', b'day']], - [[b'I', b'don\'t', b'want'], [b'no', b'scrubs']]], - [[[b'I', b'love', b'Zhu!'], [b'Good', b'night']], - [[b'A', b'scrub', b'is'], [b'a', b'guy']]]] - expected_offset_starts = [[[[0, 2, 7], [0, 5]], [[0, 2, 8], [0, 3]]], - [[[0, 2, 7], [0, 5]], [[0, 2, 8], [0, 2]]]] - expected_offset_ends = [[[[1, 6, 13], [4, 8]], [[1, 7, 12], [2, 9]]], - [[[1, 6, 11], [4, 10]], [[1, 7, 10], [1, 5]]]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def test3DimMatrixRagged(self): - test_value = ragged_factory_ops.constant([[[b'I love Flume!'], - [b'I don\'t want', - b'no scrubs']], - [[b'I love Zhu!', - b'Good night']]]) - expected_tokens = [[[[b'I', b'love', b'Flume!']], - [[b'I', b'don\'t', b'want'], [b'no', b'scrubs']]], - [[[b'I', b'love', b'Zhu!'], [b'Good', b'night']]]] - expected_offset_starts = [[[[0, 2, 7]], [[0, 2, 8], [0, 3]]], - [[[0, 2, 7], [0, 5]]]] - expected_offset_ends = [[[[1, 6, 13]], [[1, 7, 12], [2, 9]]], - [[[1, 6, 11], [4, 10]]]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testInternationalization(self): - test_value = constant_op.constant( - [u"J'adore la灯".encode('utf8'), u'¡Escríbeme!'.encode('utf8')]) - expected_tokens = [[b'J\'adore', u'la灯'.encode('utf8')], - [u'¡Escríbeme!'.encode('utf8')]] - expected_offset_starts = [[0, 8], [0]] - expected_offset_ends = [[7, 13], [13]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testSpaceBoundaries(self): - test_value = constant_op.constant([b' Hook em! ', b' .Ok. Go ']) - expected_tokens = [[b'Hook', b'em!'], [b'.Ok.', b'Go']] - expected_offset_starts = [[1, 6], [1, 8]] - expected_offset_ends = [[5, 9], [5, 10]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testOnlySpaces(self): - test_value = constant_op.constant([b' ', b' ', b' \t\r\n']) - expected_tokens = [[], [], []] - expected_offset_starts = [[], [], []] - expected_offset_ends = [[], [], []] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testWhitespaceCharacters(self): - test_value = constant_op.constant([b'things:\tcarpet\rdesk\nlamp\r\nlove']) - expected_tokens = [[b'things:', b'carpet', b'desk', b'lamp', b'love']] - expected_offset_starts = [[0, 8, 15, 20, 26]] - expected_offset_ends = [[7, 14, 19, 24, 30]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyStringSingle(self): - test_value = constant_op.constant([b'']) - expected_tokens = [[]] - expected_offset_starts = [[]] - expected_offset_ends = [[]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyString(self): - test_value = constant_op.constant( - [b'', b'I love Flume!', b'', b'O hai', b'']) - expected_tokens = [[], [b'I', b'love', b'Flume!'], [], [b'O', b'hai'], []] - expected_offset_starts = [[], [0, 2, 7], [], [0, 2], []] - expected_offset_ends = [[], [1, 6, 13], [], [1, 5], []] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testEmptyDimensions(self): - test_value = ragged_factory_ops.constant( - [[[b'I love Flume!', b'Good day. . .'], []], [], - [[b'I love Zhu!', b'Good night'], [b'A scrub is', b'a guy']]]) - expected_tokens = [[[[b'I', b'love', b'Flume!'], - [b'Good', b'day.', b'.', b'.']], []], [], - [[[b'I', b'love', b'Zhu!'], [b'Good', b'night']], - [[b'A', b'scrub', b'is'], [b'a', b'guy']]]] - expected_offset_starts = [[[[0, 2, 7], [0, 5, 10, 12]], []], [], - [[[0, 2, 7], [0, 5]], [[0, 2, 8], [0, 2]]]] - expected_offset_ends = [[[[1, 6, 13], [4, 9, 11, 13]], []], [], - [[[1, 6, 11], [4, 10]], [[1, 7, 10], [1, 5]]]] - tokens = self.whitespace_tokenizer.tokenize(test_value) - self.assertAllEqual(tokens, expected_tokens) - (tokens, starts, ends) = ( - self.whitespace_tokenizer.tokenize_with_offsets(test_value)) - self.assertAllEqual(tokens, expected_tokens) - self.assertAllEqual(starts, expected_offset_starts) - self.assertAllEqual(ends, expected_offset_ends) - - def testTfLite(self): - """Checks TFLite conversion and inference.""" - - class TokenizerModel(tf.keras.Model): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.tokenizer = WhitespaceTokenizer() - - def call(self, input_tensor, **kwargs): - return self.tokenizer.tokenize(input_tensor).flat_values - - # Test input data. - input_data = np.array(['Some minds are better kept apart']) - - # Define a Keras model. - model = TokenizerModel() - # Do TF.Text inference. - tf_result = model(tf.constant(input_data)) - - # Convert to TFLite. - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] - converter.allow_custom_ops = True - tflite_model = converter.convert() - - # Do TFLite inference. - interp = interpreter.InterpreterWithCustomOps( - model_content=tflite_model, - custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS) - interp.allocate_tensors() - input_details = interp.get_input_details() - interp.set_tensor(input_details[0]['index'], input_data) - interp.invoke() - output_details = interp.get_output_details() - tflite_result = interp.get_tensor(output_details[0]['index']) - - # Assert the results are identical. - self.assertAllEqual(tflite_result, tf_result) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer.py deleted file mode 100644 index de08f422..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer.py +++ /dev/null
@@ -1,398 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Ops to tokenize words into subwords.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re - -from tensorflow.python.compat import compat -from tensorflow.python.eager import monitoring -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import sort_ops -from tensorflow.python.ops import string_ops -from tensorflow.python.ops.ragged import ragged_string_ops -from tensorflow.python.ops.ragged import ragged_tensor -from tensorflow.python.ops.ragged.ragged_tensor import RaggedTensor -from tensorflow_text.python.ops.tokenization import Detokenizer -from tensorflow_text.python.ops.tokenization import TokenizerWithOffsets - -# pylint: disable=g-bad-import-order -from tensorflow.python.framework import load_library -from tensorflow.python.platform import resource_loader -gen_wordpiece_tokenizer = load_library.load_op_library(resource_loader.get_path_to_datafile('_wordpiece_tokenizer.so')) - -_tf_text_wordpiece_tokenizer_op_create_counter = monitoring.Counter( - '/nlx/api/python/wordpiece_tokenizer_create_counter', - 'Counter for number of WordpieceTokenizers created in Python.') - - -class WordpieceTokenizer(TokenizerWithOffsets, Detokenizer): - r"""Tokenizes a tensor of UTF-8 string tokens into subword pieces. - - Each UTF-8 string token in the input is split into its corresponding - wordpieces, drawing from the list in the file `vocab_lookup_table`. - - Algorithm summary: For each token, the longest token prefix that is in the - vocabulary is split off. Any part of the token that remains is prefixed using - the `suffix_indicator`, and the process of removing the longest token prefix - continues. The `unknown_token` (UNK) is used when what remains of the token is - not in the vocabulary, or if the token is too long. - - When `token_out_type` is tf.string, the output tensor contains strings - in the vocabulary (or UNK). When it is an integer type, the output tensor - contains indices into the vocabulary list (with UNK being after the last - entry). - - Example: - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', - ... token_out_type=tf.string) - - >>> tokenizer.tokenize(["they're", "the", "greatest"]) - <tf.RaggedTensor [[b'they', b"##'", b'##re'], [b'the'], [b'great', b'##est']]> - - >>> tokenizer.tokenize(["they", "are", "great"]) - <tf.RaggedTensor [[b'they'], [b'[UNK]'], [b'great']]> - - >>> int_tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', - ... token_out_type=tf.int32) - - >>> int_tokenizer.tokenize(["the", "greatest"]) - <tf.RaggedTensor [[3], [4, 5]]> - - >>> int_tokenizer.tokenize(["really", "the", "greatest"]) - <tf.RaggedTensor [[6], [3], [4, 5]]> - - Tensor or ragged tensor inputs result in ragged tensor outputs. Scalar - inputs (which are just a single token) result in tensor outputs. - - >>> tokenizer.tokenize("they're") - <tf.Tensor: shape=(3,), dtype=string, numpy=array([b'they', b"##'", b'##re'], - dtype=object)> - >>> tokenizer.tokenize(["they're"]) - <tf.RaggedTensor [[b'they', b"##'", b'##re']]> - >>> tokenizer.tokenize(tf.ragged.constant([["they're"]])) - <tf.RaggedTensor [[[b'they', b"##'", b'##re']]]> - - Empty strings are tokenized into empty (ragged) tensors. - - >>> tokenizer.tokenize([""]) - <tf.RaggedTensor []> - """ - - def __init__(self, - vocab_lookup_table, - suffix_indicator='##', - max_bytes_per_word=100, - max_chars_per_token=None, - token_out_type=dtypes.int64, - unknown_token='[UNK]', - split_unknown_characters=False): - """Initializes the WordpieceTokenizer. - - Args: - vocab_lookup_table: A lookup table implementing the LookupInterface - containing the vocabulary of subwords or a string which is the file path - to the vocab.txt file. - suffix_indicator: (optional) The characters prepended to a wordpiece to - indicate that it is a suffix to another subword. Default is '##'. - max_bytes_per_word: (optional) Max size of input token. Default is 100. - max_chars_per_token: (optional) Max size of subwords, excluding suffix - indicator. If known, providing this improves the efficiency of decoding - long words. - token_out_type: (optional) The type of the token to return. This can be - `tf.int64` or `tf.int32` IDs, or `tf.string` subwords. The default is - `tf.int64`. - unknown_token: (optional) The string value to substitute for an unknown - token. Default is "[UNK]". If set to `None`, no substitution occurs. - If `token_out_type` is `tf.int32`/`tf.int64`, the `vocab_lookup_table` - is used (after substitution) to convert the unknown token to an integer. - split_unknown_characters: (optional) Whether to split out single unknown - characters as subtokens. If False (default), words containing unknown - characters will be treated as single unknown tokens. - """ - super(WordpieceTokenizer, self).__init__() - _tf_text_wordpiece_tokenizer_op_create_counter.get_cell().increase_by(1) - - if isinstance(vocab_lookup_table, str) or ( - isinstance(vocab_lookup_table, ops.Tensor) and - vocab_lookup_table.dtype == dtypes.string): - init = lookup_ops.TextFileIdTableInitializer(vocab_lookup_table) - vocab_lookup_table = lookup_ops.StaticVocabularyTableV1( - init, num_oov_buckets=1, lookup_key_dtype=dtypes.string) - - if not isinstance(vocab_lookup_table, lookup_ops.LookupInterface): - raise TypeError( - 'Unable to build a lookup table from {}'.format(vocab_lookup_table)) - - self._vocab_lookup_table = vocab_lookup_table - self._suffix_indicator = suffix_indicator - self._max_bytes_per_word = max_bytes_per_word - self._max_chars_per_token = ( - 0 if max_chars_per_token is None - else max_chars_per_token) - self._token_out_type = token_out_type - self._unknown_token = unknown_token if unknown_token else '[UNK]' - self._use_unknown_token = True if unknown_token else False - self._split_unknown_characters = split_unknown_characters - - def _get_vocab_and_ids(self): - export = getattr(self._vocab_lookup_table, 'export', None) - if export is None: - table = getattr(self._vocab_lookup_table, '_table') - export = table.export - - vocab, ids = export() # pylint: disable=protected-access - - # `.export` doesn't set the shapes. - vocab = check_ops.ensure_shape(vocab, [ - None, - ]) - ids = check_ops.ensure_shape(ids, [ - None, - ]) - - order = sort_ops.argsort(ids) - - ids = array_ops.gather(ids, order) - vocab = array_ops.gather(vocab, order) - - return vocab, ids - - def vocab_size(self, name=None): - """Returns the vocabulary size. - - Args: - name: The name argument that is passed to the op function. - - Returns: - A scalar representing the vocabulary size. - """ - with ops.name_scope(name, 'WordpieceTokenizerVocabSize', [self]): - return self._vocab_lookup_table.size() - - def tokenize(self, input): # pylint: disable=redefined-builtin - r"""Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - - ### Example: - - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokens = [["they're", 'the', 'greatest']] - >>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', - ... token_out_type=tf.string) - >>> tokenizer.tokenize(tokens) - <tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A `RaggedTensor` of tokens where `tokens[i1...iN, j]` is the string - contents (or ID in the vocab_lookup_table representing that string) - of the `jth` token in `input[i1...iN]` - """ - subword, _, _ = self.tokenize_with_offsets(input) - return subword - - def tokenize_with_offsets(self, input): # pylint: disable=redefined-builtin - r"""Tokenizes a tensor of UTF-8 string tokens further into subword tokens. - - ### Example: - - >>> import pathlib - >>> pathlib.Path('/tmp/tok_vocab.txt').write_text( - ... "they ##' ##re the great ##est".replace(' ', '\n')) - >>> tokens = [["they're", 'the', 'greatest']] - >>> tokenizer = WordpieceTokenizer('/tmp/tok_vocab.txt', - ... token_out_type=tf.string) - >>> subtokens, starts, ends = tokenizer.tokenize_with_offsets(tokens) - >>> subtokens - <tf.RaggedTensor [[[b'they', b"##'", b'##re'], [b'the'], - [b'great', b'##est']]]> - >>> starts - <tf.RaggedTensor [[[0, 4, 5], [0], [0, 5]]]> - >>> ends - <tf.RaggedTensor [[[4, 5, 7], [3], [5, 8]]]> - - Args: - input: An N-dimensional `Tensor` or `RaggedTensor` of UTF-8 strings. - - Returns: - A tuple `(tokens, start_offsets, end_offsets)` where: - - tokens[i1...iN, j]: is a `RaggedTensor` of the string contents (or ID - in the vocab_lookup_table representing that string) of the `jth` token - in `input[i1...iN]`. - start_offsets[i1...iN, j]: is a `RaggedTensor` of the byte offsets - for the inclusive start of the `jth` token in `input[i1...iN]`. - end_offsets[i1...iN, j]: is a `RaggedTensor` of the byte offsets for - the exclusive end of the `jth` token in `input[i`...iN]` (exclusive, - i.e., first byte after the end of the token). - """ - name = None - if not isinstance(self._vocab_lookup_table, lookup_ops.LookupInterface): - raise TypeError('vocab_lookup_table must be a LookupInterface') - with ops.name_scope( - name, 'WordpieceTokenizeWithOffsets', - [input, self._vocab_lookup_table, self._suffix_indicator]): - # Check that the types are expected and the ragged rank is appropriate. - tokens = ragged_tensor.convert_to_tensor_or_ragged_tensor(input) - rank = tokens.shape.ndims - if rank is None: - raise ValueError('input must have a known rank.') - - if rank == 0: - wordpieces, starts, ends = self.tokenize_with_offsets( - array_ops.stack([tokens])) - return wordpieces.values, starts.values, ends.values - - elif rank > 1: - if not ragged_tensor.is_ragged(tokens): - tokens = ragged_tensor.RaggedTensor.from_tensor( - tokens, ragged_rank=rank - 1) - wordpieces, starts, ends = self.tokenize_with_offsets( - tokens.flat_values) - wordpieces = wordpieces.with_row_splits_dtype(tokens.row_splits.dtype) - starts = starts.with_row_splits_dtype(tokens.row_splits.dtype) - ends = ends.with_row_splits_dtype(tokens.row_splits.dtype) - return (tokens.with_flat_values(wordpieces), - tokens.with_flat_values(starts), - tokens.with_flat_values(ends)) - - if compat.forward_compatible(2019, 8, 25): - kwargs = dict(output_row_partition_type='row_splits') - from_row_partition = RaggedTensor.from_row_splits - else: - kwargs = {} - from_row_partition = RaggedTensor.from_row_lengths - - # Tokenize the tokens into subwords - values, row_splits, starts, ends = ( - gen_wordpiece_tokenizer.wordpiece_tokenize_with_offsets( - input_values=tokens, - vocab_lookup_table=self._vocab_lookup_table.resource_handle, - suffix_indicator=self._suffix_indicator, - use_unknown_token=self._use_unknown_token, - max_bytes_per_word=self._max_bytes_per_word, - max_chars_per_token=self._max_chars_per_token, - unknown_token=self._unknown_token, - split_unknown_characters=self._split_unknown_characters, - **kwargs)) - - # If ids are desired, look them up in the vocab table. Otherwise just - # return the string values. - if self._token_out_type == dtypes.int64: - values = math_ops.cast( - self._vocab_lookup_table.lookup(values), dtypes.int64) - - if self._token_out_type == dtypes.int32: - values = math_ops.cast( - self._vocab_lookup_table.lookup(values), dtypes.int32) - - wordpieces = from_row_partition(values, row_splits, validate=False) - starts = from_row_partition(starts, row_splits, validate=False) - ends = from_row_partition(ends, row_splits, validate=False) - - return wordpieces, starts, ends - - def detokenize(self, token_ids): - r"""Convert a `Tensor` or `RaggedTensor` of wordpiece IDs to string-words. - - >>> import pathlib - >>> pathlib.Path('/tmp/detok_vocab.txt').write_text( - ... 'a b c ##a ##b ##c'.replace(' ', '\n')) - >>> wordpiece = WordpieceTokenizer('/tmp/detok_vocab.txt') - >>> token_ids = [[0, 4, 5, 2, 5, 5, 5]] - >>> wordpiece.detokenize(token_ids) - <tf.RaggedTensor [[b'abc', b'cccc']]> - - The word pieces are joined along the innermost axis to make words. So the - result has the same rank as the input, but the innermost axis of the result - indexes words instead of word pieces. - - The shape transformation is: `[..., wordpieces] => [..., words]` - - When the input shape is `[..., words, wordpieces]` (like the output of - `WordpieceTokenizer.tokenize`) the result's shape is `[..., words, 1]`. - The additional ragged axis can be removed using `words.merge_dims(-2, -1)`. - - Note: This method assumes wordpiece IDs are dense on the interval - `[0, vocab_size)`. - - Args: - token_ids: A `RaggedTensor` or `Tensor` with an int dtype. Must have - `ndims >= 2` - - Returns: - A `RaggedTensor` with dtype `string` and the rank as the input - `token_ids`. - """ - # If there are performance issues with this method or problems with lookup - # tables using sparse IDs see the notes in b/177610044. - vocab, ids = self._get_vocab_and_ids() - token_ids = ragged_tensor.convert_to_tensor_or_ragged_tensor(token_ids) - - first_is_zero = math_ops.equal(ids[0], 0) - steps = ids[1:] - ids[:-1] - all_one_step = math_ops.reduce_all(math_ops.equal(steps, 1)) - - check = control_flow_ops.Assert( - first_is_zero & all_one_step, - data=[('`detokenize` only works with vocabulary tables where the ' - 'indices are dense on the interval `[0, vocab_size)`')]) - with ops.control_dependencies([check]): - token_ids = math_ops.minimum( - token_ids, - # Limit the OOV buckets to a single index. - math_ops.cast(array_ops.size(vocab), token_ids.dtype)) - - # Add the unknown token at that index. - vocab = array_ops.concat([vocab, [self._unknown_token]], axis=0) - - # Lookup the text tokens and join them along the innermost axis. - txt_tokens = array_ops.gather(vocab, token_ids) - - # Ensure the input is Ragged. - if not isinstance(txt_tokens, RaggedTensor): - txt_tokens = RaggedTensor.from_tensor(txt_tokens) - - # Join the tokens along the last axis. - words = string_ops.reduce_join_v2(txt_tokens, axis=-1, separator=' ') - - # Collapse " ##" in all strings to make words. - words = string_ops.regex_replace( - words, ' ' + re.escape(self._suffix_indicator), '') - - # Strip leading and trailing spaces. - words = string_ops.regex_replace(words, '^ +| +$', '') - - # Split on spaces so the last axis is "words". - words = ragged_string_ops.string_split_v2(words, sep=' ') - return words
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer_test.py deleted file mode 100644 index c5694d5..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordpiece_tokenizer_test.py +++ /dev/null
@@ -1,613 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# encoding=utf-8 -"""Tests for wordpiece_tokenized op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from absl.testing import parameterized -from tensorflow.python.compat import compat - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors_impl -from tensorflow.python.framework import test_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import lookup_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.ragged import ragged_factory_ops -from tensorflow.python.platform import test -from tensorflow_text.python.ops.wordpiece_tokenizer import WordpieceTokenizer - - -def _Utf8(char): - return char.encode("utf-8") - - -def _CreateTable(vocab, num_oov=1): - size = array_ops.size(vocab, out_type=dtypes.int64) - init = lookup_ops.KeyValueTensorInitializer( - vocab, - math_ops.range(size, dtype=dtypes.int64), - key_dtype=dtypes.string, - value_dtype=dtypes.int64) - return lookup_ops.StaticVocabularyTableV1( - init, num_oov, lookup_key_dtype=dtypes.string) - - -_ENGLISH_VOCAB = [ - b"don", - b"##'", - b"##t", - b"tread", - b"##ness", - b"hel", - b"##lo", - b"there", - b"my", - b"na", - b"##me", - b"is", - b"ter", - b"##ry", - b"what", - b"##cha", - b"##ma", - b"##call", - b"##it?", - b"you", - b"said", -] - -_CHINESE_VOCAB = [ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者"), - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福"), -] - -_MIXED_LANG_VOCAB = [ - b"don", - b"##'", - b"##t", - b"tread", - b"##ness", - b"hel", - b"##lo", - b"there", - b"my", - b"na", - b"##me", - b"is", - b"ter", - b"##ry", - b"what", - b"##cha", - b"##ma", - b"##call", - b"##it?", - b"you", - b"said", - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者"), - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福"), -] - -_RUSSIAN_VOCAB = [ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), -] - -_DEATH_VOCAB = [ - _Utf8(u"क"), - _Utf8(u"##र"), - _Utf8(u"##े"), - _Utf8(u"##ं"), - b"##*", - _Utf8(u"##👇"), -] - - -def _GetTokensFromWordpieceOffsets(tokens, begin_indices, end_indices): - begin_indices = begin_indices.to_list() - end_indices = end_indices.to_list() - result = [] - for docs_idx in range(0, len(tokens)): - tokens_in_doc = [] - for tokens_idx in range(0, len(tokens[docs_idx])): - token = bytes(tokens[docs_idx][tokens_idx]) - begin_offsets = begin_indices[docs_idx][tokens_idx] - end_offsets = end_indices[docs_idx][tokens_idx] - tokens_in_doc.append(b"".join( - [token[begin:end] for begin, end in zip(begin_offsets, end_offsets)])) - result.append(tokens_in_doc) - return result - - -class WordpieceOpTest(test_util.TensorFlowTestCase, parameterized.TestCase): - _FORWARD_COMPATIBILITY_HORIZONS = [ - (2019, 7, 1), - (2019, 10, 10), - (2525, 1, 1), # future behavior - ] - - @parameterized.parameters([ - # Basic case - dict( - tokens=[[_Utf8(u"купиха")]], - expected_subwords=[[[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ]]], - vocab=_RUSSIAN_VOCAB, - ), - dict( - tokens=[[b"don't", b"treadness"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[b"hello", b"there", b"my", b"name", b"is", b"terry"], - [b"whatchamacallit?", b"you", b"said"]], - expected_subwords=[[[b"hel", b"##lo"], [b"there"], [b"my"], - [b"na", b"##me"], [b"is"], [b"ter", b"##ry"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"], - [b"you"], [b"said"]]], - vocab=_ENGLISH_VOCAB, - ), - # Basic case w/ unknown token - dict( - tokens=[[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread"], [b"[UNK]"], - [b"[UNK]"]]], - vocab=_ENGLISH_VOCAB, - ), - # Basic case w/o unknown token - dict( - tokens=[[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread"], - [b"cantfindme"], [b"treadcantfindme"]]], - unknown_token=None, - vocab=_ENGLISH_VOCAB, - ), - # Basic case w/ int id lookup - dict( - tokens=[[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]], - token_out_type=dtypes.int64, - expected_subwords=[[[0, 1, 2], [3], [21], [21]]], - vocab=_ENGLISH_VOCAB, - ), - # Chinese test case - dict( - tokens=[[ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者") - ], - [ - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福") - ]], - expected_subwords=[[[_Utf8(u"貿")], [_Utf8(u"易")], [_Utf8(u"戰")], - [_Utf8(u"最")], [_Utf8(u"大")], [_Utf8(u"受")], - [_Utf8(u"益")], [_Utf8(u"者")]], - [[_Utf8(u"越")], [_Utf8(u"南")], [_Utf8(u"總")], - [_Utf8(u"理")], [_Utf8(u"阮")], [_Utf8(u"春")], - [_Utf8(u"福")]]], - vocab=_CHINESE_VOCAB, - ), - # Mixed lang test cases - dict( - tokens=[ - [ - _Utf8(u"貿"), - _Utf8(u"易"), - _Utf8(u"戰"), - _Utf8(u"最"), - _Utf8(u"大"), - _Utf8(u"受"), - _Utf8(u"益"), - _Utf8(u"者") - ], - [ - _Utf8(u"越"), - _Utf8(u"南"), - _Utf8(u"總"), - _Utf8(u"理"), - _Utf8(u"阮"), - _Utf8(u"春"), - _Utf8(u"福") - ], - [b"don't", b"treadness"], - ], - expected_subwords=[ - [[_Utf8(u"貿")], [_Utf8(u"易")], [_Utf8(u"戰")], - [_Utf8(u"最")], [_Utf8(u"大")], [_Utf8(u"受")], - [_Utf8(u"益")], [_Utf8(u"者")]], - [[_Utf8(u"越")], [_Utf8(u"南")], [_Utf8(u"總")], - [_Utf8(u"理")], [_Utf8(u"阮")], [_Utf8(u"春")], - [_Utf8(u"福")]], - [[b"don", b"##'", b"##t"], [b"tread", b"##ness"]], - ], - vocab=_MIXED_LANG_VOCAB, - ), - # Test token whose size is > max_bytes_per_word - dict( - tokens=[[b"don't", b"treadness"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"[UNK]"]]], - vocab=_ENGLISH_VOCAB, - max_bytes_per_word=5, - # Explicitly specify the offsets here because the current way of - # testing offsets would require '[UNK]' to be part of tokens. - expected_start=[[[0, 3, 4], [0]]], - expected_end=[[[3, 4, 5], [5]]], - ), - # Test the token of death usecase. - dict( - tokens=[[_Utf8(u"करें*👇👇")]], - token_out_type=dtypes.string, - expected_subwords=[[[ - _Utf8(u"क"), - _Utf8(u"##र"), - _Utf8(u"##े"), - _Utf8(u"##ं"), b"##*", - _Utf8(u"##👇"), - _Utf8(u"##👇") - ]]], - vocab=_DEATH_VOCAB, - max_bytes_per_word=40, - ), - # Test not splitting out unknown characters. - # (p and ! are unknown) - dict( - tokens=[[b"nap", b"hello!me"]], - expected_subwords=[[[b"[UNK]"], [b"[UNK]"]]], - unknown_token="[UNK]", - vocab=_ENGLISH_VOCAB, - ), - # Test splitting out unknown characters. - dict( - tokens=[[b"nap", b"hello!me"]], - expected_subwords=[ - [[b"na", b"##[UNK]"], [b"hel", b"##lo", b"##[UNK]", b"##me"]]], - unknown_token="[UNK]", - vocab=_ENGLISH_VOCAB, - split_unknown_characters=True, - ), - # Test splitting out unknown characters, with unknown_token set to None. - dict( - tokens=[[b"nap", b"hello!me"]], - expected_subwords=[ - [[b"na", b"##p"], [b"hel", b"##lo", b"##!", b"##me"]]], - unknown_token=None, - vocab=_ENGLISH_VOCAB, - split_unknown_characters=True, - ), - ]) - def testWordPieceOpAndVerifyOffsets(self, - tokens, - expected_subwords, - vocab, - expected_start=None, - expected_end=None, - use_unknown_token=True, - unknown_token="[UNK]", - token_out_type=dtypes.string, - max_bytes_per_word=100, - split_unknown_characters=False): - for horizon in self._FORWARD_COMPATIBILITY_HORIZONS: - with compat.forward_compatibility_horizon(*horizon): - tokens_t = ragged_factory_ops.constant(tokens) - vocab_table = _CreateTable(vocab) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer( - vocab_table, - unknown_token=unknown_token, - token_out_type=token_out_type, - max_bytes_per_word=max_bytes_per_word, - split_unknown_characters=split_unknown_characters, - ) - subwords_t, begin_t, end_t = tokenizer.tokenize_with_offsets(tokens_t) - self.assertAllEqual(subwords_t, expected_subwords) - - # Verify the indices by performing the following: - # - Extract subwords and join them together to form the original tokens. - # - Then compare the extracted tokens and original tokens. - begin, end = (self.evaluate((begin_t, end_t))) - - # If expected start/end offsets were provided, check them explicitly. - # Otherwise test the offsets by extracting subwords using token offsets - # from the original 'tokens' input. - if expected_start is None or expected_end is None: - extracted_tokens = _GetTokensFromWordpieceOffsets(tokens, begin, end) - self.assertAllEqual(extracted_tokens, tokens) - else: - self.assertAllEqual(begin, expected_start) - self.assertAllEqual(end, expected_end) - - @parameterized.parameters([ - dict( - tokens=[[[b"don't"], [b"treadness"], - [b"whatchamacallit?", b"you", b"hello"]], [[b"treadness"]]], - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"], [b"you"], - [b"hel", b"##lo"]]], [[[b"tread", b"##ness"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - ]) - def testWordPieceOpWithMultipleRaggedRank(self, - tokens, - expected_subwords, - vocab, - expected_start=None, - expected_end=None, - use_unknown_token=True, - token_out_type=dtypes.string): - for row_splits_dtype in (dtypes.int32, dtypes.int64): - ragged_tokens = ragged_factory_ops.constant( - tokens, row_splits_dtype=row_splits_dtype) - vocab_table = _CreateTable(vocab) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer(vocab_table, token_out_type=token_out_type) - subwords = tokenizer.tokenize(ragged_tokens) - self.assertAllEqual(subwords, expected_subwords) - - def testWordPieceOpWithIdReturned(self): - """Let the table determine how to do a lookup on unknown tokens.""" - tokens = ragged_factory_ops.constant( - [[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]]) - vocab_table = _CreateTable( - _ENGLISH_VOCAB, - 100 # OOV values - ) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer( - vocab_table, unknown_token=None, token_out_type=dtypes.int64) - subwords, _, _ = tokenizer.tokenize_with_offsets(tokens) - - self.assertAllEqual(subwords, [[[0, 1, 2], [3], [96], [46]]]) - self.assertEqual(subwords.dtype, dtypes.int64) - - def testWordPieceOpWithInt32IdReturned(self): - """Let the table determine how to do a lookup on unknown tokens.""" - tokens = ragged_factory_ops.constant( - [[b"don't", b"tread", b"cantfindme", b"treadcantfindme"]]) - vocab_table = _CreateTable( - _ENGLISH_VOCAB, - 100 # OOV values - ) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer( - vocab_table, unknown_token=None, token_out_type=dtypes.int32) - subwords, _, _ = tokenizer.tokenize_with_offsets(tokens) - - self.assertAllEqual(subwords, [[[0, 1, 2], [3], [96], [46]]]) - self.assertEqual(subwords.dtype, dtypes.int32) - - @parameterized.parameters([ - dict( - tokens=[[b"don't", b"treadness", b"whatchamacallit?"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"], - [b"what", b"##cha", b"##ma", b"##call", - b"##it?"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[[b"don't"], [b"treadness"], [b"whatchamacallit?"]]], - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=[[[b"don't", _Utf8(u"貿")], - [b"treadness", _Utf8(u"大")], - [b"whatchamacallit?", _Utf8(u"福")]]], - expected_subwords=[[[[b"don", b"##'", b"##t"], [_Utf8(u"貿")]], - [[b"tread", b"##ness"], [_Utf8(u"大")]], - [[ - b"what", b"##cha", b"##ma", b"##call", - b"##it?" - ], [_Utf8(u"福")]]]], - vocab=_MIXED_LANG_VOCAB, - ), - # Vector input - dict( - tokens=[_Utf8(u"купиха")], - expected_subwords=[[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ]], - vocab=_RUSSIAN_VOCAB, - ), - # Scalar input - dict( - tokens=_Utf8(u"купиха"), - expected_subwords=[ - _Utf8(u"к"), - _Utf8(u"##уп"), - _Utf8(u"##иха"), - ], - vocab=_RUSSIAN_VOCAB, - ), - # 3D input with 1 ragged dimension. - dict( - tokens=[[b"don't", b"treadness", b"whatchamacallit?"]], - expected_subwords=[[[b"don", b"##'", b"##t"], [b"tread", b"##ness"], - [b"what", b"##cha", b"##ma", b"##call", - b"##it?"]]], - vocab=_ENGLISH_VOCAB, - ), - dict( - tokens=ragged_factory_ops.constant_value( - [[[b"don't"], [b"treadness"], [b"whatchamacallit?"]]], - ragged_rank=1), - expected_subwords=[ - [[[b"don", b"##'", b"##t"]], [[b"tread", b"##ness"]], - [[b"what", b"##cha", b"##ma", b"##call", b"##it?"]]] - ], - vocab=_ENGLISH_VOCAB, - ), - # Specifying max_chars_per_token. - dict( - tokens=[[b"don't", b"treadness"]], - max_chars_per_token=5, - expected_subwords=[ - [[b"don", b"##'", b"##t"], [b"tread", b"##ness"]]], - vocab=_ENGLISH_VOCAB + [b"trea", b"##d"], - ), - # Specifying max_chars_per_token to 4, so that "tread" is not found, and - # is split into "trea", "##d". - dict( - tokens=[[b"don't", b"treadness"]], - max_chars_per_token=4, - expected_subwords=[ - [[b"don", b"##'", b"##t"], [b"trea", b"##d", b"##ness"]]], - vocab=_ENGLISH_VOCAB + [b"trea", b"##d"], - ), - # Specifying max_chars_per_token where characters are multiple bytes. - dict( - tokens=[[_Utf8(u"大"), _Utf8(u"易")]], - max_chars_per_token=1, - expected_subwords=[[[_Utf8(u"大")], [_Utf8(u"易")]]], - vocab=_CHINESE_VOCAB, - ), - ]) - def testTensors(self, - tokens, - expected_subwords, - vocab, - max_chars_per_token=None, - expected_start=None, - expected_end=None, - use_unknown_token=True, - token_out_type=dtypes.string): - vocab_table = _CreateTable(vocab) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer( - vocab_table, token_out_type=token_out_type, - max_chars_per_token=max_chars_per_token, - ) - subwords = tokenizer.tokenize(tokens) - self.assertAllEqual(subwords, expected_subwords) - - @test_util.run_in_graph_and_eager_modes - def testDetokenizeIsReversable(self): - - table = _CreateTable(_MIXED_LANG_VOCAB + [b""], 2) - self.evaluate(table.initializer) - - tokenizer = WordpieceTokenizer(table) - - word_lists = [ - [b"hello", b"there", b"my", b"name", b"is", b"treadness"], - [b"whatchamacallit?", b"you", b"said"], - [_Utf8(u"大"), _Utf8(u"易")], - ] - words = ragged_factory_ops.constant(word_lists) - - subwords_ids = tokenizer.tokenize(words) - - # detokeinze input shape is (batch, ragged-words, ragged-wordpieces) - words_output = tokenizer.detokenize(subwords_ids) - words_output = array_ops.squeeze(words_output, axis=-1) - - self.assertAllEqual(words_output, words) - - # detokeinze input shape is (batch, ragged-wordpieces) - subwords_id_seqs = subwords_ids.merge_dims(-2, -1) - words_output = tokenizer.detokenize(subwords_id_seqs) - self.assertAllEqual(words_output, words) - - # detokeinze input shape is a dense (batch, padded-wordpieces) - words_output = tokenizer.detokenize( - subwords_ids.merge_dims(-2, -1) - # len(_MIXED_LANG_VOCAB) is "" - .to_tensor(default_value=len(_MIXED_LANG_VOCAB))) - - self.assertAllEqual(words_output, words) - - @test_util.run_in_graph_and_eager_modes - def testDetokenizeFailsForSparseVocab(self): - vocab = ["a", "##b", "##c"] - ids = [0, 10, 20] - init = lookup_ops.KeyValueTensorInitializer( - vocab, ids, key_dtype=dtypes.string, value_dtype=dtypes.int64) - table = lookup_ops.StaticVocabularyTableV1( - init, num_oov_buckets=1, lookup_key_dtype=dtypes.string) - self.evaluate(table.initializer) - - tokenizer = WordpieceTokenizer(table) - words = ragged_factory_ops.constant([["abb", "abc"], ["abcbc"]]) - subwords_ids = tokenizer.tokenize(words) - - with self.assertRaisesRegex(errors_impl.InvalidArgumentError, - "detokenize.*?dense on the interval"): - result = tokenizer.detokenize(subwords_ids) - self.evaluate(result) - - def testVocabSize(self): - vocab_table = _CreateTable( - _ENGLISH_VOCAB, - 100 # OOV values - ) - self.evaluate(vocab_table.initializer) - tokenizer = WordpieceTokenizer( - vocab_table, unknown_token=None) - self.assertEqual(self.evaluate(tokenizer.vocab_size()), 121) - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops.py deleted file mode 100644 index 921c2a8d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops.py +++ /dev/null
@@ -1,409 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Text shape ops. - -A variety of useful regex helper functions using the RE2 library -(string_ops.regex_full_match) for matching various relevant patterns within -input text. - -Naming convention: - is_$PROPERTY: the entire string is composed of $PROPERTY - has_$PROPERTY: the string contains at least one $PROPERTY. - has_no_$PROPERTY: the string does not contain any $PROPERTY. - begins_with_$PROPERTY: the string begins with $PROPERTY characters. - ends_with_$PROPERTY: the string ends with $PROPERTY characters. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -import enum - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import string_ops - -#=============================================================================== -# Implementation: Regular Expressions for WordShapes -#=============================================================================== - - -def _emoticon_regex(): - """Regexp to detect emoticons.""" - emoticons = [ - ":-)", ":)", ":o)", ":]", ":3", ":>", "=]", "=)", ":}", ":^)", ":-D", - ":-))", ":-)))", ":-))))", ":-)))))", ">:[", ":-(", ":(", ":-c", ":c", - ":-<", ":<", ":-[", ":[", ":{", ";(", ":-||", ":@", ">:(", ":'-(", ":'(", - ":'-)", ":')", "D:<", ">:O", ":-O", ":-o", ":*", ":-*", ":^*", ";-)", - ";)", "*-)", "*)", ";-]", ";]", ";^)", ":-,", ">:P", ":-P", ":p", "=p", - ":-p", "=p", ":P", "=P", ";p", ";-p", ";P", ";-P" - ">:\\", ">:/", ":-/", ":-.", ":/", ":\\", "=/", "=\\", ":|", ":-|", ":$", - ":-#", ":#", "O:-)", "0:-)", "0:)", "0;^)", ">:)", ">;)", ">:-)", "}:-)", - "}:)", "3:-)", ">_>^", "^<_<", "|;-)", "|-O", ":-J", ":-&", ":&", "#-)", - "%-)", "%)", "<:-|", "~:-\\", "*<|:-)", "=:o]", ",:-)", "7:^]", "</3", - "<3", "8-)", "^_^", ":D", ":-D", "=D", "^_^;;", "O=)", "}=)", "B)", "B-)", - "=|", "-_-", "o_o;", "u_u", ":-\\", ":s", ":S", ":-s", ":-S", ";*", ";-*" - ":(", "=(", ">.<", ">:-(", ">:(", ">=(", ";_;", "T_T", "='(", ">_<", "D:", - ":o", ":-o", "=o", "o.o", ":O", ":-O", "=O", "O.O", "x_x", "X-(", "X(", - "X-o", "X-O", ":X)", "(=^.^=)", "(=^..^=)", "=^_^=", "-<@%", ":(|)", - ":(:)", "(]:{", "<\\3", "~@~", "8'(", "XD", "DX" - ] - # Note: unicode-containing emojis are added manually-escaped here. - return "|".join(map(re.escape, emoticons)) + "|".join( - [u"\\:\u3063\\)", u"\\:\u3063C", u"\u0ca0\\_\u0ca0"]) - - -def _emoji_regex(): - """Returns regexp to detect emoji characters. - - Generated from https://unicode.org/emoji/charts/full-emoji-list.html, - https://unicode.org/Public/emoji/13.0/emoji-sequences.txt. - """ - char_class = "".join([ - "[", - u"\u203c", u"\u2049", u"\u2139", - u"\u2194", "-", u"\u2199", - u"\u21a9", u"\u21aa", - u"\u231a", u"\u231b", - u"\u2328", u"\u23cf", - u"\u23e9", "-", u"\u23f3", - u"\u23f8", "-", u"\u23fa", - u"\u24c2", u"\u25aa", u"\u25ab" - u"\u25b6", u"\u25c0", - u"\u25fb", "-", u"\u25fe", - u"\u2600", "-", u"\u26ff", - u"\u2702", u"\u2705" - u"\u2708", "-", u"\u270d", u"\u270f", - u"\u2712", u"\u2714", u"\u2716", u"\u271d", - u"\u2721", u"\u2728", u"\u2733", u"\u2734", - u"\u2744", u"\u2747", u"\u274c", u"\u274e", - u"\u2753", "-", u"\u2755", u"\u2757", - u"\u2763", u"\u2764", - u"\u2795", "-", u"\u2797", - u"\u2934", u"\u2935", - u"\u2b05", "-", u"\u2b07", - u"\u2b1b", u"\u2b1c", u"\u2b50", u"\u2b55", - u"\u3030", u"\u303d", u"\u3297", u"\u3299", - u"\U0001f004", u"\U0001f0cf", - u"\U0001f170", u"\U0001f171", u"\U0001f17e", u"\U0001f17f", - u"\U0001f18e", - u"\U0001f191", "-", u"\U0001f19a", - u"\U0001f1e6", "-", u"\U0001f1ff", - u"\U0001f201", u"\U0001f202", - u"\U0001f21a", u"\U0001f22f", - u"\U0001f232", "-", u"\U0001f23a", - u"\U0001f250", u"\U0001f251", - u"\U0001f300", "-", u"\U0001f6ff", - u"\U0001f900", "-", u"\U0001f9ff", - u"\U0001fa70", "-", u"\U0001fa74", - u"\U0001fa78", "-", u"\U0001fa7a", - u"\U0001fa80", "-", u"\U0001fa86", - u"\U0001fa90", "-", u"\U0001faa8", - u"\U0001fab0", "-", u"\U0001fab6", - u"\U0001fac0", "-", u"\U0001fac2", - u"\U0001fad0", "-", u"\U0001fad6", - "]" - ]) # pyformat:disable - return ".*" + char_class + ".*" - - -def _begins_with_open_quote_regex(): - # Note: RE2 syntax doesn't support char class intersection. - char_class = "".join([ - "\"", "'", "`", u"\uff07", u"\uff02", u"\u2018", u"\u201a", u"\u201b", - u"\u201c", u"\u00ab", u"\u201e", u"\u201f" + u"\u2039", u"\u300c", - u"\u300e", u"\u301d", u"\u2e42" + u"\uff62", u"\ufe41", u"\ufe43" - ]) - return "``.*|[" + char_class + "][^" + char_class + "]*" - - -def _ends_with_close_quote_regex(): - char_class = "".join([ - "\"", "'", "`", u"\uff07", u"\uff02", u"\u00bb", u"\u2019", u"\u201d", - u"\u203a", u"\u300d", u"\u300f", u"\u301e" + u"\u301f", u"\ufe42", - u"\ufe44", u"\uff63" - ]) - - return ".*''|[^" + char_class + "]*[" + char_class + "]" - - -class WordShape(enum.Enum): - """Values for the 'pattern' arg of the wordshape op. - - The supported wordshape identifiers are: - %(identifier_list)s - """ - HAS_PUNCTUATION_DASH = r".*\p{Pd}+.*" - HAS_NO_DIGITS = r"\P{Nd}*" - HAS_SOME_DIGITS = r".*\P{Nd}\p{Nd}.*|.*\p{Nd}\P{Nd}.*" - HAS_ONLY_DIGITS = r"\p{Nd}+" - IS_NUMERIC_VALUE = r"([+-]?((\p{Nd}+\.?\p{Nd}*)|(\.\p{Nd}+)))([eE]-?\p{Nd}+)?" - # IS_WHITESPACE = r"\p{Whitespace}+" - HAS_NO_PUNCT_OR_SYMBOL = r"[^\p{P}\p{S}]*" - HAS_SOME_PUNCT_OR_SYMBOL = r".*[^\p{P}\p{S}][\p{P}\p{S}].*|.*[\p{P}\p{S}][^\p{P}\p{S}].*" # pylint: disable=line-too-long - IS_PUNCT_OR_SYMBOL = r"[\p{P}|\p{S}]+" - BEGINS_WITH_PUNCT_OR_SYMBOL = r"[\p{P}\p{S}].*" - ENDS_WITH_PUNCT_OR_SYMBOL = r".*[\p{P}\p{S}]" - # ENDS_WITH_SENTENCE_TERMINAL = r".*[\p{Sentence_Terminal}]" - # ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL = r".*[\p{Sentence_Terminal}]{2}" - # ENDS_WITH_TERMINAL_PUNCT = r".*[\p{Terminal_Punctuation}]" - # ENDS_WITH_MULTIPLE_TERMINAL_PUNCT = r".*[\p{Terminal_Punctuation}]{2}" - ENDS_WITH_ELLIPSIS = r".*(\.{3}|[" + u"\u2026" + u"\u22ef" + "])" - IS_EMOTICON = _emoticon_regex() - ENDS_WITH_EMOTICON = r".*(" + _emoticon_regex() + r")$" - HAS_EMOJI = r".*(" + _emoji_regex() + r")$" - IS_ACRONYM_WITH_PERIODS = r"(\p{Lu}\.)+" - IS_UPPERCASE = r"\p{Lu}+" - IS_LOWERCASE = r"\p{Ll}+" - HAS_MIXED_CASE = r".*\p{Lu}.*\p{Ll}.*|.*\p{Ll}.*\p{Lu}.*" - IS_MIXED_CASE_LETTERS = r"\p{L}*\p{Lu}\p{L}*\p{Ll}\p{L}*|\p{L}*\p{Ll}\p{L}*\p{Lu}\p{L}*" # pylint: disable=line-too-long - # Is a single capital letter alone a title case? - HAS_TITLE_CASE = r"\P{L}*[\p{Lu}\p{Lt}]\p{Ll}+.*" - # HAS_NO_QUOTES = "[^\"'`\\p{Quotation_Mark}]*" - BEGINS_WITH_OPEN_QUOTE = _begins_with_open_quote_regex() - ENDS_WITH_CLOSE_QUOTE = _ends_with_close_quote_regex() - # HAS_QUOTE = r"^[`\p{Quotation_Mark}].*|.*[`\p{Quotation_Mark}]$" - HAS_MATH_SYMBOL = r".*\p{Sm}.*" - HAS_CURRENCY_SYMBOL = r".*\p{Sc}.*" - HAS_NON_LETTER = r".*\P{L}.*" - - -# Note that the entries in _wordshape_doc must be indented 10 spaces to display -# correctly in the docstring. -_wordshape_doc = { - WordShape.HAS_PUNCTUATION_DASH: - """ - The input contains at least one unicode dash character. - - Note that this uses the Pd (Dash) unicode property. This property will - not match to soft-hyphens and katakana middle dot characters. - """, - WordShape.HAS_NO_DIGITS: - """ - The input contains no digit characters. - """, - WordShape.HAS_SOME_DIGITS: - """ - The input contains a mix of digit characters and non-digit - characters. - """, - WordShape.HAS_ONLY_DIGITS: - """ - The input consists entirely of unicode digit characters. - """, - WordShape.IS_NUMERIC_VALUE: - """ - The input is parseable as a numeric value. This will match a - fairly broad set of floating point and integer representations (but - not Nan or Inf). - """, - # IS_WHITESPACE docs - WordShape.HAS_NO_PUNCT_OR_SYMBOL: - """ - The input contains no unicode punctuation or symbol characters. - """, - WordShape.HAS_SOME_PUNCT_OR_SYMBOL: - """ - The input contains a mix of punctuation or symbol characters, - and non-punctuation non-symbol characters. - """, - WordShape.IS_PUNCT_OR_SYMBOL: - """ - The input contains only punctuation and symbol characters. - """, - WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL: - """ - The input starts with a punctuation or symbol character. - """, - WordShape.ENDS_WITH_PUNCT_OR_SYMBOL: - """ - The input ends with a punctuation or symbol character. - """, - # ENDS_WITH_SENTENCE_TERMINAL docs - # ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL docs - # ENDS_WITH_TERMINAL_PUNCT docs - # ENDS_WITH_MULTIPLE_TERMINAL_PUNCT docs - WordShape.ENDS_WITH_ELLIPSIS: - """ - The input ends with an ellipsis (i.e. with three or more - periods or a unicode ellipsis character).""", - WordShape.IS_EMOTICON: - """ - The input is a single emoticon. - """, - WordShape.ENDS_WITH_EMOTICON: - """ - The input ends with an emoticon. - """, - WordShape.HAS_EMOJI: - """ - The input contains an emoji character. - - See http://www.unicode.org/Public/emoji/1.0//emoji-data.txt. - Emojis are in unicode ranges `2600-26FF`, `1F300-1F6FF`, and - `1F900-1F9FF`. - """, - WordShape.IS_ACRONYM_WITH_PERIODS: - """ - The input is a period-separated acronym. - This matches for strings of the form "I.B.M." but not "IBM". - """, - WordShape.IS_UPPERCASE: - """ - The input contains only uppercase letterforms. - """, - WordShape.IS_LOWERCASE: - """ - The input contains only lowercase letterforms. - """, - WordShape.HAS_MIXED_CASE: - """ - The input contains both uppercase and lowercase letterforms. - """, - WordShape.IS_MIXED_CASE_LETTERS: - """ - The input contains only uppercase and lowercase letterforms. - """, - WordShape.HAS_TITLE_CASE: - """ - The input has title case (i.e. the first character is upper or title - case, and the remaining characters are lowercase). - """, - # HAS_NO_QUOTES docs - WordShape.BEGINS_WITH_OPEN_QUOTE: - r""" - The input begins with an open quote. - - The following strings are considered open quotes: - - ``` - " QUOTATION MARK - ' APOSTROPHE - ` GRAVE ACCENT - `` Pair of GRAVE ACCENTs - \uFF02 FULLWIDTH QUOTATION MARK - \uFF07 FULLWIDTH APOSTROPHE - \u00AB LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - \u2018 LEFT SINGLE QUOTATION MARK - \u201A SINGLE LOW-9 QUOTATION MARK - \u201B SINGLE HIGH-REVERSED-9 QUOTATION MARK - \u201C LEFT DOUBLE QUOTATION MARK - \u201E DOUBLE LOW-9 QUOTATION MARK - \u201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK - \u2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - \u300C LEFT CORNER BRACKET - \u300E LEFT WHITE CORNER BRACKET - \u301D REVERSED DOUBLE PRIME QUOTATION MARK - \u2E42 DOUBLE LOW-REVERSED-9 QUOTATION MARK - \uFF62 HALFWIDTH LEFT CORNER BRACKET - \uFE41 PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET - \uFE43 PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET - ``` - - Note: U+B4 (acute accent) not included. - """, - WordShape.ENDS_WITH_CLOSE_QUOTE: - r""" - The input ends witha closing quote character. - - The following strings are considered close quotes: - - ``` - " QUOTATION MARK - ' APOSTROPHE - ` GRAVE ACCENT - '' Pair of APOSTROPHEs - \uFF02 FULLWIDTH QUOTATION MARK - \uFF07 FULLWIDTH APOSTROPHE - \u00BB RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - \u2019 RIGHT SINGLE QUOTATION MARK - \u201D RIGHT DOUBLE QUOTATION MARK - \u203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - \u300D RIGHT CORNER BRACKET - \u300F RIGHT WHITE CORNER BRACKET - \u301E DOUBLE PRIME QUOTATION MARK - \u301F LOW DOUBLE PRIME QUOTATION MARK - \uFE42 PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET - \uFE44 PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET - \uFF63 HALFWIDTH RIGHT CORNER BRACKET - ``` - - Note: U+B4 (ACUTE ACCENT) is not included. - """, - # HAS_QUOTE docs - WordShape.HAS_MATH_SYMBOL: - """ - The input contains a mathematical symbol. - """, - WordShape.HAS_CURRENCY_SYMBOL: - """ - The input contains a currency symbol. - """, - WordShape.HAS_NON_LETTER: - """ - The input contains a non-letter character. - """, -} - - -def _add_identifier_list_to_docstring(func): - items = [("WordShape." + ws.name, doc) for ws, doc in _wordshape_doc.items()] - identifier_list = "".join( - "\n * `%s`:%s\n" % (name, doc) for (name, doc) in sorted(items)) - func.__doc__ = func.__doc__ % dict(identifier_list=identifier_list) - - -# Use the wordshape docstring we created above. -_add_identifier_list_to_docstring(WordShape) - - -def wordshape(input_tensor, pattern, name=None): - r"""Determine wordshape features for each input string. - - In this example, we test for title case (the first character is upper or - title case, and the remaining characters are lowercase). - >>> input = [ - ... u"abc", u"ABc", u"ABC", u"Abc", u"aBcd", u"\u01c8bc".encode("utf-8") - ... ] - >>> wordshape(input, WordShape.HAS_TITLE_CASE) - <tf.Tensor: shape=(6,), dtype=bool, - numpy=array([False, False, False, True, False, True])> - - Args: - input_tensor: string `Tensor` with any shape. - pattern: A `tftext.WordShape` or a list of WordShapes. - name: A name for the operation (optional). - - Returns: - `<bool>[input_tensor.shape + pattern.shape]`: A tensor where - `result[i1...iN, j]` is true if `input_tensor[i1...iN]` has the wordshape - specified by `pattern[j]`. - - Raises: - ValueError: If `pattern` contains an unknown identifier. - """ - if isinstance(pattern, WordShape): - return string_ops.regex_full_match(input_tensor, pattern.value, name) - elif (isinstance(pattern, (list, tuple)) and - all(isinstance(s, WordShape) for s in pattern)): - with ops.name_scope(name, "Wordshape", input_tensor): - return array_ops.stack([wordshape(input_tensor, s) for s in pattern], - axis=-1) - else: - raise TypeError( - "Expected 'pattern' to be a single WordShape or a list of WordShapes.")
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops_test.py deleted file mode 100644 index dc35609..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ops/wordshape_ops_test.py +++ /dev/null
@@ -1,575 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for wordshape ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import test_util -from tensorflow.python.platform import test -from tensorflow_text.python.ops import wordshape_ops - - -@test_util.run_all_in_graph_and_eager_modes -class Utf8CharsOpTest(test.TestCase): - - def testDashShape(self): - test_string = [ - u"a-b", u"a\u2010b".encode("utf-8"), u"a\u2013b".encode("utf-8"), - u"a\u2e3ab".encode("utf-8"), u"abc".encode("utf-8") - ] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.HAS_PUNCTUATION_DASH) - self.assertAllEqual(shapes, [True, True, True, True, False]) - - def testNoDigits(self): - test_string = [u"abc", u"a\u06f3m".encode("utf-8")] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_NO_DIGITS) - self.assertAllEqual(shapes, [True, False]) - - def testSomeDigits(self): - test_string = [ - u"abc", u"a\u06f3m".encode("utf-8"), u"90\u06f3".encode("utf-8"), - u"a9b8c7", u"9ab87c", u"\u06f3m\u06f3" - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_SOME_DIGITS) - self.assertAllEqual(shapes, [False, True, False, True, True, True]) - - def testSomeDigitAndCurrency(self): - test_string = [ - u"abc", u"a\u06f3m".encode("utf-8"), u"90\u06f3".encode("utf-8"), - u"a9b8c7", u"$9ab87c$", u"\u06f3m\u06f3" - ] - pattern_list = [ - wordshape_ops.WordShape.HAS_SOME_DIGITS, - wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL - ] - shapes = wordshape_ops.wordshape(test_string, pattern=pattern_list) - self.assertAllEqual(shapes, [[False, False], [True, False], [False, False], - [True, False], [True, True], [True, False]]) - - def testOnlyDigits(self): - test_string = [u"abc", u"a9b".encode("utf-8"), u"90\u06f3".encode("utf-8")] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_ONLY_DIGITS) - self.assertAllEqual(shapes, [False, False, True]) - - def testNumericValue(self): - test_string = [u"98.6", u"-0.3", u"2.783E4", u"e4", u"1e10"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_NUMERIC_VALUE) - self.assertAllEqual(shapes, [True, True, True, False, True]) - - def SKIP_testWhitespace(self): - test_string = [ - u" ", u"\v", u"\r\n", u"\u3000".encode("utf-8"), u" a", u"abc", u"a\nb", - u"\u3000 \n".encode("utf-8") - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_WHITESPACE) - self.assertAllEqual(shapes, - [True, True, True, True, False, False, False, True]) - - def testNoPunct(self): - test_string = [u"abc", u"a;m".encode("utf-8")] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.HAS_NO_PUNCT_OR_SYMBOL) - self.assertAllEqual(shapes, [True, False]) - - def testSomePunct(self): - test_string = [ - u"abc", u"a;m".encode("utf-8"), u".,!".encode("utf-8"), u"a@b.c,", - u".ab8;c", u"\u0f08m\u0f08" - ] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.HAS_SOME_PUNCT_OR_SYMBOL) - self.assertAllEqual(shapes, [False, True, False, True, True, True]) - - def testAllPunct(self): - test_string = [u"abc", u"a;b".encode("utf-8"), u";,\u0f08".encode("utf-8")] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_PUNCT_OR_SYMBOL) - self.assertAllEqual(shapes, [False, False, True]) - - def testLeadingPunct(self): - test_string = [u"abc", u";b", u"b;", u";,\u0f08".encode("utf-8")] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.BEGINS_WITH_PUNCT_OR_SYMBOL) - self.assertAllEqual(shapes, [False, True, False, True]) - - def testTrailingPunct(self): - test_string = [u"abc", u";b", u"b;", u";,\u0f08".encode("utf-8")] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.ENDS_WITH_PUNCT_OR_SYMBOL) - self.assertAllEqual(shapes, [False, False, True, True]) - - def SKIP_testSentenceTerminal(self): - test_string = [u"abc", u".b", u"b.", u"b,", u"b!!!", u"abc?!"] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.ENDS_WITH_SENTENCE_TERMINAL) - self.assertAllEqual(shapes, [False, False, True, False, True, True]) - - def SKIP_testMultipleSentenceTerminal(self): - test_string = [u"abc", u".b", u"b.", u"b,", u"b!!!", u"abc?!"] - shapes = wordshape_ops.wordshape( - test_string, - wordshape_ops.WordShape.ENDS_WITH_MULTIPLE_SENTENCE_TERMINAL) - self.assertAllEqual(shapes, [False, False, False, False, True, True]) - - def SKIP_testTerminalPunct(self): - test_string = [u"abc", u".b", u"b.", u"b,", u"b!!!", u"abc?!"] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.ENDS_WITH_TERMINAL_PUNCT) - self.assertAllEqual(shapes, [False, False, True, True, True, True]) - - def SKIP_testMultipleTerminalPunct(self): - test_string = [u"abc", u".b", u"b.", u"b,,", u"b!!!", u"abc?!"] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT) - self.assertAllEqual(shapes, [False, False, False, True, True, True]) - - def testEllipsis(self): - test_string = [u"abc", u"abc...", u"...abc", u"abc\u2026".encode("utf-8")] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.ENDS_WITH_ELLIPSIS) - self.assertAllEqual(shapes, [False, True, False, True]) - - def testEndsWithEmoticon(self): - test_string = [u"abc", u":-)", u"O:)", u"8)x", u":\u3063C", u"abc:-)"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.ENDS_WITH_EMOTICON) - self.assertAllEqual(shapes, [False, True, True, False, True, True]) - - def testIsEmoticon(self): - test_string = [u"abc", u":-)", u"O:)", u"8)x", u":\u3063C", u"abc:-)"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_EMOTICON) - self.assertAllEqual(shapes, [False, True, False, False, True, False]) - - def testEmoji(self): - test_string = [ - u"\U0001f604m".encode("utf-8"), u"m\u2605m".encode("utf-8"), u"O:)", - u"m\U0001f604".encode("utf-8"), u"\u2105k".encode("utf-8") - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_EMOJI) - self.assertAllEqual(shapes, [True, True, False, True, False]) - - # This is by no means exhaustive, but it's a broad and diverse sample - # to more throroughly test the emoji regex. - def testExtendedEmojis(self): - test_string = [ - "‼", - "⁉", - "ℹ", - "↘", - "↩", - "⌚", - "⌛", - "⏳", - "⌨", - "⏏", - "⏩", - "⏺", - "⏰", - "⏱", - "⏲", - "🕰", - "Ⓜ", - "▪", - "⬛", - "⬜", - "✂", - "✅", - "✈", - "✉", - "✊", - "✊🏿", - "✋", - "✌", - "🤘🏾", - "🤞🏿", - "✍", - "✏", - "✒", - "✔", - "✝", - "✡", - "✨", - "✳", - "✴", - "❄", - "❇", - "❌", - "❎", - "❓", - "❔", - "❗", - "❕", - "❣", - "❤", - "➕", - "➖", - "➗", - "⤴", - "⤵", - "⬅", - "⭐", - "⭕", - "〰", - "〽", - "㊗", - "🀄", - "🃏", - "🅰", - "🅱", - "🅾", - "🅿", - "🆎", - "🆑", - "🆒", - "🆔", - "🆗", - "🆘", - "🆙", - "🆚", - "🈁", - "🈂", - "🈚", - "🈯", - "🈴", - "🈳", - "🈺", - "🉐", - "🉑", - "🌍", - "🏔", - "🍾", - "🐯", - "🐆", - "🦇", - "🦅", - "🐝", - "🦖", - "🐉", - "🦠", - "🔎", - "⚗", - "🕯", - "💡", - "📽", - "📡", - "🧮", - "🔋", - "📲", - "☎", - "🥁", - "🎧", - "🎼", - "🔊", - "💍", - "👗", - "🕶", - "🎭", - "🔮", - "🧬", - "🔬", - "🤹", - "🚵", - "🧗", - "🧗🏼♀️", - "🧗🏿♂️", - "🥋", - "🎳", - "🏈", - "🏅", - "🎑", - "🎉", - "🎄", - "🌊", - "⚡", - "🌖", - "🚀", - "🚠", - "🛩", - "🛴", - "🏎", - "🚅", - "🌆", - "🕌", - "🕍", - "⛪", - "🗽", - "🏘", - "🍵", - "🍫", - "🦑", - "🍱", - "🥦", - "🥑", - "🌴", - "🌼", - "🦂", - "🐬", - "🥀", - "🧖🏾", - "🧕🏿", - "🧔🏼", - "🧒🏾", - "🧛", - "🧝🏻", - "🧞", - "🧟", - "🧙🏾", - "🧚🏻", - "💃🏽", - "👯", - "🧘", - "🦱", - "👪", - "👩👩👧👦", - "👨🏿🤝👨🏻", - "🕵️♀️", - "🧑🚀", - "👩✈️", - "🧑🏿⚕️", - "🧑🏾⚖️", - "🧠", - "👁️🗨️", - "🙉", - "🤗", - "👏", - "💏", - "🧯", - "🛒", - "🧺", - "🧷", - "💊", - "🧲", - "⛓", - "⚖", - "🛡", - "🏹", - "🎣", - "⚔", - "🔨", - "📌", - "📊", - "📈", - "💹", - "💸", - "💵", - "📜", - "📚", - "📆", - "💼", - "📝", - "📬", - "🔏", - "🔓", - "🔑", - "🗃", - "🚿", - "🛏", - "🗿", - "🏧", - "🚮", - "🚰", - "♿", - "🚻", - "🚾", - "🛄", - "⚠", - "🚸", - "⛔", - "🚭", - "☣", - "🔃", - "🔚", - "🔚", - "⚛", - "♈", - "🔆", - "🎦", - "⚕", - "♻", - "⚜", - "💠", - "🏁", - "🚩", - "🎌", - "🏴☠️", - "🇺🇸", - "🇨🇭", - "🇺🇦", - "🇿🇼", - "🇦🇴", - "🇦🇨", - "🇦🇶", - "🇺🇳", - "🇪🇺", - "🇧🇿", - "🇵🇲", - "🇮🇴", - "🇻🇮", - "🇨🇽", - "🏴", - "🇧🇱", - u"\U0001fa70".encode("utf-8"), # ballet shoes. - u"\U0001fa7a".encode("utf-8"), # stethoscope. - u"\U0001fa80".encode("utf-8"), # yo-yo. - u"\U0001fa82".encode("utf-8"), # parachute. - u"\U0001fa86".encode("utf-8"), # nesting dolls. - u"\U0001fa90".encode("utf-8"), # ringed planet. - u"\U0001fa97".encode("utf-8"), # accordion. - u"\U0001fa99".encode("utf-8"), # coin. - u"\U0001fa9c".encode("utf-8"), # ladder. - u"\U0001fa9f".encode("utf-8"), # window. - u"\U0001faa1".encode("utf-8"), # sewing needle. - u"\U0001faa8".encode("utf-8"), # rock. - u"\U0001fab0".encode("utf-8"), # fly. - u"\U0001fab4".encode("utf-8"), # potted plant. - u"\U0001fab6".encode("utf-8"), # feather. - u"\U0001fac0".encode("utf-8"), # anatomical heart. - u"\U0001fac2".encode("utf-8"), # people hugging. - u"\U0001fad0".encode("utf-8"), # blueberries. - u"\U0001fad2".encode("utf-8"), # olive. - u"\U0001fad6".encode("utf-8"), # teapot. - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_EMOJI) - self.assertAllEqual(shapes, [True] * len(test_string)) - - def testAcronym(self): - test_string = [u"abc", u"A.B.", u"A.B.C.)", u"ABC"] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.IS_ACRONYM_WITH_PERIODS) - self.assertAllEqual(shapes, [False, True, False, False]) - - def testAllUppercase(self): - test_string = [u"abc", u"ABc", u"ABC"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_UPPERCASE) - self.assertAllEqual(shapes, [False, False, True]) - - def testAllLowercase(self): - test_string = [u"abc", u"ABc", u"ABC"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.IS_LOWERCASE) - self.assertAllEqual(shapes, [True, False, False]) - - def testMixedCase(self): - test_string = [u"abc", u"ABc", u"ABC", u"abC"] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_MIXED_CASE) - self.assertAllEqual(shapes, [False, True, False, True]) - - def testMixedCaseLetters(self): - test_string = [u"abc", u"ABc", u"ABC", u"abC", u"abC."] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.IS_MIXED_CASE_LETTERS) - self.assertAllEqual(shapes, [False, True, False, True, False]) - - def testTitleCase(self): - test_string = [ - u"abc", u"ABc", u"ABC", u"Abc", u"aBcd", u"\u01c8bc".encode("utf-8") - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_TITLE_CASE) - self.assertAllEqual(shapes, [False, False, False, True, False, True]) - - def SKIP_testNoQuotes(self): - test_string = [ - u"abc", u"\"ABc", u"ABC'", u"Abc\u201c".encode("utf-8"), u"aBcd" - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_NO_QUOTES) - self.assertAllEqual(shapes, [True, False, False, False, True]) - - def testOpenQuote(self): - test_string = [ - u"''", u"ABc\"", u"\uff07".encode("utf-8"), u"\u2018".encode("utf-8"), - u"aBcd", u"``" - ] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.BEGINS_WITH_OPEN_QUOTE) - self.assertAllEqual(shapes, [False, False, True, True, False, True]) - - def testCloseQuote(self): - test_string = [ - u"''", u"ABc\"", u"\u300f".encode("utf-8"), u"\u2018".encode("utf-8"), - u"aBcd", u"``" - ] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.ENDS_WITH_CLOSE_QUOTE) - self.assertAllEqual(shapes, [True, True, True, False, False, False]) - - def SKIP_testQuote(self): - test_string = [ - u"''", u"ABc\"", u"\uff07".encode("utf-8"), u"\u2018".encode("utf-8"), - u"aBcd", u"``", u"\u300d".encode("utf-8") - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_QUOTE) - self.assertAllEqual(shapes, [True, True, True, True, False, True, True]) - - def testMathSymbol(self): - test_string = [u"''", u"\u003c", u"\uff07".encode("utf-8")] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_MATH_SYMBOL) - self.assertAllEqual(shapes, [False, True, False]) - - def testCurrencySymbol(self): - test_string = [u"''", u"ABc$", u"$\uff07".encode("utf-8")] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL) - self.assertAllEqual(shapes, [False, True, True]) - - def testCurrencySymbolAtBeginning(self): - test_string = [u"''", u"ABc$", u"$ABc", u"A$Bc"] - shapes = wordshape_ops.wordshape( - test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL) - self.assertAllEqual(shapes, [False, True, True, True]) - - def testNonLetters(self): - test_string = [ - u"''", u"ABc", u"\uff07".encode("utf-8"), u"\u2018".encode("utf-8"), - u"aBcd", u"`#ab", u"\u300d".encode("utf-8") - ] - shapes = wordshape_ops.wordshape(test_string, - wordshape_ops.WordShape.HAS_NON_LETTER) - self.assertAllEqual(shapes, [True, False, True, True, False, True, True]) - - def testMultipleShapes(self): - test_string = [u"abc", u"ABc", u"ABC"] - shapes = wordshape_ops.wordshape(test_string, [ - wordshape_ops.WordShape.IS_UPPERCASE, - wordshape_ops.WordShape.IS_LOWERCASE - ]) - self.assertAllEqual(shapes, [[False, True], [False, False], [True, False]]) - - def testNonShapePassedToShapeArg(self): - test_string = [u"abc", u"ABc", u"ABC"] - with self.assertRaises(TypeError): - wordshape_ops.wordshape(test_string, "This is not a Shape") - - -if __name__ == "__main__": - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/python/ragged/ragged_tensor_to_tensor_test.py b/third_party/tensorflow-text/src/tensorflow_text/python/ragged/ragged_tensor_to_tensor_test.py deleted file mode 100644 index 75e622a..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/python/ragged/ragged_tensor_to_tensor_test.py +++ /dev/null
@@ -1,81 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# -*- coding: utf-8 -*- -"""Tests for ragged_tensor_to_tensor op.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow as tf -import tensorflow_text as tf_text - -from tensorflow.lite.python import interpreter -from tensorflow.python.framework import test_util -from tensorflow.python.platform import test - - -@test_util.run_all_in_graph_and_eager_modes -class RaggedTensorToTensorTest(test_util.TensorFlowTestCase): - - def testTfLite(self): - """Checks TFLite conversion and inference.""" - - class TokenizerModel(tf.keras.Model): - - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.fwp = tf_text.FastWordpieceTokenizer(['minds', 'apart', '[UNK]']) - - @tf.function(input_signature=[ - tf.TensorSpec(shape=[None], dtype=tf.string, name='input') - ]) - def call(self, input_tensor): - return {'tokens': self.fwp.tokenize(input_tensor).to_tensor()} - - # Test input data. - input_data = np.array(['Some minds are better kept apart']) - - # Define a model. - model = TokenizerModel() - # Do TF inference. - tf_result = model(tf.constant(input_data))['tokens'] - - # Convert to TFLite. - converter = tf.lite.TFLiteConverter.from_keras_model(model) - converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS] - converter.allow_custom_ops = True - tflite_model = converter.convert() - - # Do TFLite inference. - interp = interpreter.InterpreterWithCustomOps( - model_content=tflite_model, - custom_op_registerers=tf_text.tflite_registrar.SELECT_TFTEXT_OPS) - print(interp.get_signature_list()) - tokenize = interp.get_signature_runner('serving_default') - output = tokenize(input=input_data) - if tf.executing_eagerly(): - tflite_result = output['tokens'] - else: - tflite_result = output['output_1'] - - # Assert the results are identical. - self.assertAllEqual(tflite_result, tf_result) - - -if __name__ == '__main__': - test.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tftext.bzl b/third_party/tensorflow-text/src/tensorflow_text/tftext.bzl deleted file mode 100644 index ff40480..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tftext.bzl +++ /dev/null
@@ -1,138 +0,0 @@ -""" -Build rule for open source tf.text libraries. -""" - -def py_tf_text_library( - name, - srcs = [], - deps = [], - visibility = None, - cc_op_defs = [], - cc_op_kernels = []): - """Creates build rules for TF.Text ops as shared libraries. - - Defines three targets: - - <name> - Python library that exposes all ops defined in `cc_op_defs` and `py_srcs`. - <name>_cc - C++ library that registers any c++ ops in `cc_op_defs`, and includes the - kernels from `cc_op_kernels`. - python/ops/_<name>.so - Shared library exposing the <name>_cc library. - - Args: - name: The name for the python library target build by this rule. - srcs: Python source files for the Python library. - deps: Dependencies for the Python library. - visibility: Visibility for the Python library. - cc_op_defs: A list of c++ src files containing REGISTER_OP definitions. - cc_op_kernels: A list of c++ targets containing kernels that are used - by the Python library. - """ - binary_path = "python/ops" - if srcs: - binary_path_end_pos = srcs[0].rfind("/") - binary_path = srcs[0][0:binary_path_end_pos] - binary_name = binary_path + "/_" + cc_op_kernels[0][1:] + ".so" - if cc_op_defs: - binary_name = binary_path + "/_" + name + ".so" - library_name = name + "_cc" - native.cc_library( - name = library_name, - srcs = cc_op_defs, - copts = select({ - # Android supports pthread natively, -pthread is not needed. - "@org_tensorflow//tensorflow:mobile": [], - "//conditions:default": ["-pthread"], - }), - alwayslink = 1, - deps = cc_op_kernels + select({ - "@org_tensorflow//tensorflow:mobile": [ - "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", - ], - "//conditions:default": [], - }), - ) - - native.cc_binary( - name = binary_name, - copts = select({ - "@org_tensorflow//tensorflow:mobile": [], - "//conditions:default": ["-pthread"], - }), - linkshared = 1, - deps = [ - ":" + library_name, - ] + select({ - "@org_tensorflow//tensorflow:mobile": [ - "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", - ], - "//conditions:default": [], - }), - ) - - native.py_library( - name = name, - srcs = srcs, - srcs_version = "PY2AND3", - visibility = visibility, - data = [":" + binary_name], - deps = deps, - ) - - -def tf_cc_library( - name, - srcs = [], - hdrs = [], - deps = [], - tf_deps = [], - copts = [], - compatible_with = None, - testonly = 0, - alwayslink = 0): - """ A rule to build a TensorFlow library or OpKernel. - - Just like cc_library, but: - * Adds alwayslink=1 for kernels (name has kernel in it) - * Separates out TF deps for when building for Android. - - Args: - name: Name of library - srcs: Source files - hdrs: Headers files - deps: All non-TF dependencies - tf_deps: All TF depenedencies - copts: C options - compatible_with: List of environments target can be built for - testonly: If library is only for testing - alwayslink: If symbols should be exported - """ - if "kernel" in name: - alwayslink = 1 - # These are "random" deps likely needed by each library (http://b/142433427) - oss_deps = [ - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/strings:cord", - "@com_google_absl//absl/time", - "@com_google_absl//absl/types:variant", - ] - deps += select({ - "@org_tensorflow//tensorflow:mobile": [ - "@org_tensorflow//tensorflow/core:portable_tensorflow_lib_lite", - ], - "//conditions:default": [ - "@local_config_tf//:libtensorflow_framework", - "@local_config_tf//:tf_header_lib", - ] + tf_deps + oss_deps, - }) - native.cc_library( - name = name, - srcs = srcs, - hdrs = hdrs, - deps = deps, - copts = copts, - compatible_with = compatible_with, - testonly = testonly, - alwayslink = alwayslink)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/tools/__init__.py deleted file mode 100644 index bcd0e267..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/__init__.py +++ /dev/null
@@ -1,15 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/BUILD b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/BUILD deleted file mode 100644 index bdee3d81..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/BUILD +++ /dev/null
@@ -1,63 +0,0 @@ -licenses(["notice"]) - -package( - default_visibility = [ - "//tensorflow_text:__subpackages__", - ], -) - -py_library( - name = "wordpiece_vocab", - srcs = ["__init__.py"], - srcs_version = "PY3", - deps = [ - ":bert_vocab_from_dataset", - ":wordpiece_tokenizer_learner_lib", - ], -) - -py_library( - name = "wordpiece_tokenizer_learner_lib", - srcs = ["wordpiece_tokenizer_learner_lib.py"], - srcs_version = "PY3", - deps = [ - # numpy dep, - ], -) - -py_test( - name = "wordpiece_tokenizer_learner_test", - srcs = ["wordpiece_tokenizer_learner_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":wordpiece_tokenizer_learner_lib", - # numpy dep, - # python/data/ops:dataset_ops tensorflow dep, - "//tensorflow_text:ops", - ], -) - -py_library( - name = "bert_vocab_from_dataset", - srcs = ["bert_vocab_from_dataset.py"], - srcs_version = "PY3", - deps = [ - ":wordpiece_tokenizer_learner_lib", - "//tensorflow_text:bert_tokenizer", - ], -) - -py_test( - name = "bert_vocab_from_dataset_test", - srcs = ["bert_vocab_from_dataset_test.py"], - python_version = "PY3", - srcs_version = "PY3", - deps = [ - ":bert_vocab_from_dataset", - ":wordpiece_tokenizer_learner_lib", - "@absl_py//absl/testing:absltest", - # numpy dep, - # tensorflow package dep, - ], -)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/__init__.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/__init__.py deleted file mode 100644 index bcd0e267..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/__init__.py +++ /dev/null
@@ -1,15 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset.py deleted file mode 100644 index d5e8b85..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset.py +++ /dev/null
@@ -1,96 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Build a bert vocab from a `tf.data.Dataset`.""" -from typing import List - -from tensorflow_text.python.ops import bert_tokenizer -from tensorflow_text.tools.wordpiece_vocab import wordpiece_tokenizer_learner_lib as learner - - -def bert_vocab_from_dataset(dataset, - vocab_size: int, - reserved_tokens: List[str], - bert_tokenizer_params=None, - learn_params=None) -> List[str]: - """Generate a Bert wordpiece vocabulary from a `tf.data.Dataset` of texts. - - ``` - import tensorflow_text as text - - vocab = bert_vocab_from_dataset(dataset, vocab_size, reserved_tokens, - bert_tokenizer_params, learn_params) - bert_tokenizer = text.BertTokenizer(vocab, **bert_tokenizer_params) - token_ids = bert_tokenizer.tokenize(text) - ``` - - This uses Bert's splitting algorithm to split the text into words before - generating the subword vocabulary from the resulting words. - - The resulting vocabulary _can_ be used directly with a - `text.WordpieceTokenizer`, but note that the vocabulary will be sub-optimal or - **broken** if you split the text into words a different way. - - ``` - wordpiece_tokenizer = text.WordpieceTokenizer(vocab, ...) - words = split(text) - token_ids = wordpiece_tokenizer.tokenize(words) - ``` - - Args: - dataset: `A tf.data.Dataset` containing string-tensor elements. - vocab_size: The target vocabulary size. This is the maximum size. - reserved_tokens: A list of tokens that must be included in the vocabulary. - bert_tokenizer_params: The `text.BertTokenizer` arguments relavant for to - vocabulary-generation: - * `lower_case` - * `keep_whitespace` - * `normalization_form` - * `preserve_unused_token` - - See `BertTokenizer` for details. You should use the same values for - these to both generate the vocabulary and build the `BertTokenizer`. - learn_params: A dict of additional key word arguments for the the vocabulary - learning function. See `wordpiece_tokenizer_learner_lib.learn` for - details. - - Returns: - A list strings containing the vocabulary. - - Raises: - TypeError: If the dataset contains structured elements instead of single - tensors. - """ - if bert_tokenizer_params is None: - bert_tokenizer_params = {} - - if learn_params is None: - learn_params = {} - - element_spec = dataset.element_spec - - try: - element_spec.shape - except AttributeError: - raise TypeError("The dataset should contain single-tensor elements.") - - tokenizer = bert_tokenizer.BasicTokenizer(**bert_tokenizer_params) - words_dataset = dataset.map(tokenizer.tokenize) - word_counts = learner.count_words(words_dataset) - - vocab = learner.learn(word_counts, vocab_size, reserved_tokens, - **learn_params) - - return vocab
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset_test.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset_test.py deleted file mode 100644 index c726d21..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/bert_vocab_from_dataset_test.py +++ /dev/null
@@ -1,51 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.python.tools.wordpiece_vocab.wordpiece_tokenizer_learner_lib.""" - -from absl.testing import absltest -import tensorflow as tf - -from tensorflow_text.tools.wordpiece_vocab import bert_vocab_from_dataset - - -class BertVocabFromDatasetTest(absltest.TestCase): - - def test_smoke(self): - ds = tf.data.Dataset.from_tensor_slices([ - 'Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do', - 'eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut', - 'enim ad minim veniam, quis nostrud exercitation ullamco laboris', - 'nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in', - 'reprehenderit in voluptate velit esse cillum dolore eu fugiat', - 'nulla pariatur. Excepteur sint occaecat cupidatat non proident,', - 'sunt in culpa qui officia deserunt mollit anim id est laborum.', - ]).repeat(10).batch(3) - - reserved = ['<START>', '<END>'] - vocab = bert_vocab_from_dataset.bert_vocab_from_dataset( - ds, - vocab_size=65, - reserved_tokens=reserved, - bert_tokenizer_params={'lower_case': True}) - - self.assertContainsSubset(reserved, vocab) - self.assertContainsSubset(['a', 'b', 'c'], vocab) - self.assertContainsSubset(['dolore', 'dolor'], vocab) - self.assertContainsSubset(['##q', '##r', '##s'], vocab) - - -if __name__ == '__main__': - absltest.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_vocab.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_vocab.py deleted file mode 100644 index 41b10de2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_vocab.py +++ /dev/null
@@ -1,231 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Generate wordpiece vocab and compute metrics over dataset of tf.Examples.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import tempfile -from absl import app -from absl import flags -import apache_beam as beam -import tensorflow.compat.v1 as tf -import tensorflow_transform as tft -import tensorflow_transform.beam as tft_beam -from tensorflow_transform.tf_metadata import dataset_metadata -from tensorflow_transform.tf_metadata import schema_utils -from tensorflow_text.tools.wordpiece_vocab import utils -from tensorflow_text.tools.wordpiece_vocab import wordpiece_tokenizer_learner_lib as learner - - -FLAGS = flags.FLAGS - -flags.DEFINE_string('data_file', None, 'The input data file path.') -flags.DEFINE_string('vocab_file', None, 'The output vocab file path.') -flags.DEFINE_string('metrics_file', None, 'The output metrics file path.') -flags.DEFINE_string( - 'lang_set', 'en,es,ru,ar,de,fr,it,pt,ja,pl,fa,zh', - 'Set of languages used to build wordpiece model, ' - 'given as a comma-separated list.') -flags.DEFINE_string('text_key', 'text', 'Text feature key in input examples.') -flags.DEFINE_string( - 'language_code_key', 'language_code', 'Language code feature key.') -flags.DEFINE_float( - 'smoothing_exponent', 0.5, - 'Exponent used in calculating exponential smoothing coefficients.') -flags.DEFINE_integer('max_word_length', 50, - 'Discard words of length greater than max_word_length.') -flags.DEFINE_integer('upper_thresh', 10000000, - 'Upper threshold for binary search.') -flags.DEFINE_integer('lower_thresh', 10, 'Lower threshold for binary search.') -flags.DEFINE_integer('num_iterations', 4, - 'Number of iterations in wordpiece learning algorithm.') -flags.DEFINE_integer('num_pad_tokens', 100, 'Number of padding tokens to ' - 'include in vocab.') -flags.DEFINE_integer('max_input_tokens', 5000000, - 'Maximum number of input tokens, where -1 means no max.') -flags.DEFINE_integer('max_token_length', 50, 'Maximum length of a token.') -flags.DEFINE_integer('max_unique_chars', 1000, - 'Maximum number of unique characters as tokens.') -flags.DEFINE_integer('vocab_size', 110000, 'Target size of generated vocab, ' - 'where vocab_size is an upper bound and the size of vocab ' - 'can be within slack_ratio less than the vocab_size.') -flags.DEFINE_float('slack_ratio', 0.05, - 'Difference permitted between target and actual vocab size.') -flags.DEFINE_bool('include_joiner_token', True, - 'Whether to include joiner token in word suffixes.') -flags.DEFINE_string('joiner', '##', 'Joiner token in word suffixes.') -flags.DEFINE_list('reserved_tokens', - ['<unk>', '<s>', '</s>', '<mask>', - '<cls>', '<sep>', '<S>', '<T>'], - 'Reserved tokens to be included in vocab.') - - -def generate_vocab(data_file, vocab_file, metrics_file, raw_metadata, params, - min_token_frequency=2): - """Returns a pipeline generating a vocab and writing the output. - - Args: - data_file: recordio file to read - vocab_file: path in which to write the vocab - metrics_file: path in which to write the metrics - raw_metadata: schema for dataset - params: parameters for wordpiece vocab learning algorithm - min_token_frequency: the min frequency for a token to be included - """ - - lang_set = set(FLAGS.lang_set.split(',')) - - # Schema to format metrics as CSV. - csv_schema = schema_utils.schema_from_feature_spec({ - 'lang': tf.FixedLenFeature([], tf.string), - 'sample_count': tf.FixedLenFeature([], tf.int64), - 'micro_drop_char_percent': tf.FixedLenFeature([], tf.string), - 'macro_drop_char_percent': tf.FixedLenFeature([], tf.string), - 'micro_compress_ratio': tf.FixedLenFeature([], tf.string), - 'macro_compress_ratio': tf.FixedLenFeature([], tf.string), - 'unweighted_en_wp_overlap_percent': tf.FixedLenFeature([], tf.string), - 'weighted_en_wp_overlap_percent': tf.FixedLenFeature([], tf.string), - }) - - columns = ['lang', - 'sample_count', - 'micro_drop_char_percent', - 'macro_drop_char_percent', - 'micro_compress_ratio', - 'macro_compress_ratio', - 'unweighted_en_wp_overlap_percent', - 'weighted_en_wp_overlap_percent'] - - example_converter = tft.coders.ExampleProtoCoder(raw_metadata.schema, - serialized=False) - - def run_vocab(): - """Creates a pipeline to generate wordpiece vocab over a corpus.""" - - vocab_pipeline = beam.Pipeline() - - with tft_beam.Context(temp_dir=tempfile.mkdtemp()): - # Read raw data and convert to TF Transform encoded dict. - raw_data = ( - vocab_pipeline - | 'ReadInputData' >> beam.io.tfrecordio.ReadFromTFRecord( - data_file, coder=beam.coders.ProtoCoder(tf.train.Example)) - | 'DecodeInputData' >> beam.Map(example_converter.decode)) - - # Apply TF Transform. - (transformed_data, _), _ = ( - (raw_data, raw_metadata) - | 'FilterLangAndExtractToken' >> tft_beam.AnalyzeAndTransformDataset( - utils.count_preprocessing_fn(FLAGS.text_key, - FLAGS.language_code_key))) - - # Filter by languages. - tokens = ( - transformed_data - | 'FilterByLang' >> beam.ParDo(utils.FilterTokensByLang(lang_set))) - - # Calculate smoothing coefficients. - coeffs = ( - tokens - | 'CalculateSmoothingCoefficients' >> beam.CombineGlobally( - utils.CalculateCoefficients(FLAGS.smoothing_exponent))) - - # Apply smoothing, aggregate counts, and sort words by count. - _ = ( - tokens - | 'ApplyExponentialSmoothing' >> beam.ParDo( - utils.ExponentialSmoothing(), beam.pvalue.AsSingleton(coeffs)) - | 'SumCounts' >> beam.CombinePerKey(sum) - | 'FilterLowCounts' >> beam.ParDo(utils.FilterByCount( - FLAGS.max_word_length, min_token_frequency)) - | 'MergeAndSortCounts' >> beam.CombineGlobally(utils.SortByCount()) - | 'LearnVocab' >> beam.ParDo(utils.LearnVocab(params)) - | 'Flatten' >> beam.FlatMap(lambda x: x + '\n') - | 'WriteVocab' >> beam.io.WriteToText(vocab_file, - shard_name_template='', - append_trailing_newlines=False)) - return vocab_pipeline - - def run_metrics(): - """Creates a pipeline to measure wordpiece vocab metrics over a corpus.""" - - metrics_pipeline = beam.Pipeline() - - with tft_beam.Context(temp_dir=tempfile.mkdtemp()): - # Read raw data and convert to TF Transform encoded dict. - raw_data = ( - metrics_pipeline - | 'ReadInputData' >> beam.io.tfrecordio.ReadFromTFRecord( - data_file, coder=beam.coders.ProtoCoder(tf.train.Example)) - | 'DecodeInputData' >> beam.Map(example_converter.decode)) - - # Apply transform to wordpiece-tokenize input. - (metrics_transformed_data, _), _ = ( - (raw_data, raw_metadata) - | 'WordpieceTokenizeInput' >> tft_beam.AnalyzeAndTransformDataset( - utils.metrics_preprocessing_fn(FLAGS.vocab_file, - FLAGS.text_key, - FLAGS.language_code_key))) - - # Initialize CSV coder. Aggregate values for each lang, calculate metrics, - # and write to output to a CSV file. - csv_converter = tft.coders.CsvCoder(columns, csv_schema) - _ = ( - metrics_transformed_data - | 'CompileTokenInfo' >> beam.ParDo(utils.CompileTokenizationInfo()) - | 'CombineStatsForLang' >> beam.CombineGlobally(utils.AggregateLang()) - | 'CalculateMetrics' >> beam.ParDo(utils.CalculateMetrics()) - | 'EncodeMetrics' >> beam.Map(csv_converter.encode) - | 'WriteMetrics' >> beam.io.WriteToText( - metrics_file, shard_name_template='', header=','.join(columns))) - return metrics_pipeline - - vocab_pipeline = run_vocab() - vocab_pipeline.run().wait_until_finish() - - metrics_pipeline = run_metrics() - metrics_pipeline.run().wait_until_finish() - - -def main(_): - # Define schema. - raw_metadata = dataset_metadata.DatasetMetadata( - schema_utils.schema_from_feature_spec({ - 'text': tf.FixedLenFeature([], tf.string), - 'language_code': tf.FixedLenFeature([], tf.string), - })) - - # Add in padding tokens. - reserved_tokens = FLAGS.reserved_tokens - if FLAGS.num_pad_tokens: - padded_tokens = ['<pad>'] - padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)] - reserved_tokens = padded_tokens + reserved_tokens - - params = learner.Params(FLAGS.upper_thresh, FLAGS.lower_thresh, - FLAGS.num_iterations, FLAGS.max_input_tokens, - FLAGS.max_token_length, FLAGS.max_unique_chars, - FLAGS.vocab_size, FLAGS.slack_ratio, - FLAGS.include_joiner_token, FLAGS.joiner, - reserved_tokens) - - generate_vocab(FLAGS.data_file, FLAGS.vocab_file, FLAGS.metrics_file, - raw_metadata, params) - - -if __name__ == '__main__': - app.run(main)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_word_counts.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_word_counts.py deleted file mode 100644 index 4d164c0b..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/generate_word_counts.py +++ /dev/null
@@ -1,124 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Read text from RecordIO of tf.Examples and generate sorted word counts.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import tempfile -from absl import app -from absl import flags -import apache_beam as beam -import tensorflow.compat.v1 as tf -import tensorflow_transform as tft -import tensorflow_transform.beam as tft_beam -from tensorflow_transform.tf_metadata import dataset_metadata -from tensorflow_transform.tf_metadata import schema_utils -from tensorflow_text.tools.wordpiece_vocab import utils - -FLAGS = flags.FLAGS - -flags.DEFINE_string('input_path', None, 'The input file path.') -flags.DEFINE_string('output_path', None, 'The output file path.') -flags.DEFINE_string( - 'lang_set', 'en,es,ru,ar,de,fr,it,pt,ja,pl,fa,zh', - 'Set of languages used to build wordpiece model, ' - 'given as a comma-separated list.') -flags.DEFINE_string('text_key', 'text', 'Text feature key in input examples.') -flags.DEFINE_string( - 'language_code_key', 'language_code', 'Language code feature key.') -flags.DEFINE_float( - 'smoothing_exponent', 0.5, - 'Exponent used in calculating exponential smoothing coefficients.') -flags.DEFINE_integer('max_word_length', 50, - 'Discard words of length greater than max_word_length.') - - -def word_count(input_path, output_path, raw_metadata, min_token_frequency=2): - """Returns a pipeline counting words and writing the output. - - Args: - input_path: recordio file to read - output_path: path in which to write the output - raw_metadata: metadata of input tf.Examples - min_token_frequency: the min frequency for a token to be included - """ - - lang_set = set(FLAGS.lang_set.split(',')) - - # Create pipeline. - pipeline = beam.Pipeline() - - with tft_beam.Context(temp_dir=tempfile.mkdtemp()): - converter = tft.coders.ExampleProtoCoder( - raw_metadata.schema, serialized=False) - - # Read raw data and convert to TF Transform encoded dict. - raw_data = ( - pipeline - | 'ReadInputData' >> beam.io.tfrecordio.ReadFromTFRecord( - input_path, coder=beam.coders.ProtoCoder(tf.train.Example)) - | 'DecodeInputData' >> beam.Map(converter.decode)) - - # Apply TF Transform. - (transformed_data, _), _ = ( - (raw_data, raw_metadata) - | 'FilterLangAndExtractToken' >> tft_beam.AnalyzeAndTransformDataset( - utils.count_preprocessing_fn(FLAGS.text_key, - FLAGS.language_code_key))) - - # Filter by languages. - tokens = ( - transformed_data - | 'FilterByLang' >> beam.ParDo(utils.FilterTokensByLang(lang_set))) - - # Calculate smoothing coefficients. - coeffs = ( - tokens - | 'CalculateSmoothingCoefficients' >> beam.CombineGlobally( - utils.CalculateCoefficients(FLAGS.smoothing_exponent))) - - # Apply smoothing, aggregate counts, and sort words by count. - _ = ( - tokens - | 'ApplyExponentialSmoothing' >> beam.ParDo( - utils.ExponentialSmoothing(), beam.pvalue.AsSingleton(coeffs)) - | 'SumCounts' >> beam.CombinePerKey(sum) - | 'FilterLowCounts' >> beam.ParDo(utils.FilterByCount( - FLAGS.max_word_length, min_token_frequency)) - | 'MergeAndSortCounts' >> beam.CombineGlobally(utils.SortByCount()) - | 'Flatten' >> beam.FlatMap(lambda x: x) - | 'FormatCounts' >> beam.Map(lambda tc: '%s\t%s' % (tc[0], tc[1])) - | 'WriteSortedCount' >> beam.io.WriteToText( - output_path, shard_name_template='')) - - return pipeline - - -def main(_): - # Generate schema of input data. - raw_metadata = dataset_metadata.DatasetMetadata( - schema_utils.schema_from_feature_spec({ - 'text': tf.FixedLenFeature([], tf.string), - 'language_code': tf.FixedLenFeature([], tf.string), - })) - - pipeline = word_count(FLAGS.input_path, FLAGS.output_path, raw_metadata) - pipeline.run().wait_until_finish() - - -if __name__ == '__main__': - app.run(main)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/measure_wordpiece_stats.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/measure_wordpiece_stats.py deleted file mode 100644 index 55ea836..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/measure_wordpiece_stats.py +++ /dev/null
@@ -1,137 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Measure wordpiece model stats on a corpus sample. - -Stats: -1) Dropped character percent - How many non-control, non-whitespace characters are getting dropping during - wordpiece tokenization? - - To reduce the number of characters getting dropped, you will need to - increase --max_uniques_chars value in learn_wordpiece_tokenizer_main.cc. - -2) Compression ratio - Number of characters / Number of wordpieces. - - To increase compression ratio for a particular language, you will either - need to increase the overall vocab size (--vocab_size in - learn_wordpiece_tokenizer_main.c) or oversample that language. - -3) Wordpiece overlap with English: - (Wordpieces present in both en and xx samples / - Wordpieces present in xx samples) -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import tempfile -from absl import app -from absl import flags -import apache_beam as beam -import tensorflow.compat.v1 as tf -import tensorflow_transform as tft -import tensorflow_transform.beam as tft_beam -from tensorflow_transform.tf_metadata import dataset_metadata -from tensorflow_transform.tf_metadata import schema_utils -from tensorflow_text.tools.wordpiece_vocab import utils - -FLAGS = flags.FLAGS - -flags.DEFINE_string('input_file', None, 'Input RecordIO file.') -flags.DEFINE_string('output_file', None, - 'File in which to store calculated statistics.') -flags.DEFINE_string('text_key', 'text', 'Text feature key in input examples.') -flags.DEFINE_string( - 'language_code_key', 'language_code', 'Language code feature key.') -flags.DEFINE_string('vocab_file', None, 'Wordpiece vocab file.') - - -def calculate_metrics(): - """Returns a pipeline to compute wordpiece model stats given a vocab and corpus.""" - - # Schema of input dataset. - raw_metadata = dataset_metadata.DatasetMetadata( - schema_utils.schema_from_feature_spec({ - 'text': tf.FixedLenFeature([], tf.string), - 'language_code': tf.FixedLenFeature([], tf.string), - })) - - # Schema to format metrics as CSV. - csv_schema = schema_utils.schema_from_feature_spec({ - 'lang': tf.FixedLenFeature([], tf.string), - 'sample_count': tf.FixedLenFeature([], tf.int64), - 'micro_drop_char_percent': tf.FixedLenFeature([], tf.string), - 'macro_drop_char_percent': tf.FixedLenFeature([], tf.string), - 'micro_compress_ratio': tf.FixedLenFeature([], tf.string), - 'macro_compress_ratio': tf.FixedLenFeature([], tf.string), - 'unweighted_en_wp_overlap_percent': tf.FixedLenFeature([], tf.string), - 'weighted_en_wp_overlap_percent': tf.FixedLenFeature([], tf.string), - }) - - columns = ['lang', - 'sample_count', - 'micro_drop_char_percent', - 'macro_drop_char_percent', - 'micro_compress_ratio', - 'macro_compress_ratio', - 'unweighted_en_wp_overlap_percent', - 'weighted_en_wp_overlap_percent'] - - # Create pipeline. - pipeline = beam.Pipeline() - - with tft_beam.Context(temp_dir=tempfile.mkdtemp()): - example_converter = tft.coders.ExampleProtoCoder( - raw_metadata.schema, serialized=False) - csv_converter = tft.coders.CsvCoder(columns, csv_schema) - - # Read raw data and convert to TF Transform encoded dict. - raw_data = ( - pipeline - | 'ReadInputData' >> beam.io.tfrecordio.ReadFromTFRecord( - FLAGS.input_file, coder=beam.coders.ProtoCoder(tf.train.Example)) - | 'DecodeInputData' >> beam.Map(example_converter.decode)) - - # Apply transform to wordpiece-tokenize input. - (transformed_data, _), _ = ( - (raw_data, raw_metadata) - | 'WordpieceTokenizeInput' >> tft_beam.AnalyzeAndTransformDataset( - utils.metrics_preprocessing_fn(FLAGS.vocab_file, - FLAGS.text_key, - FLAGS.language_code_key))) - - # Aggregate values for each lang, calculate metrics, and write to output. - _ = ( - transformed_data - | 'CompileTokenInfo' >> beam.ParDo(utils.CompileTokenizationInfo()) - | 'CombineStatsForLang' >> beam.CombineGlobally(utils.AggregateLang()) - | 'CalculateMetrics' >> beam.ParDo(utils.CalculateMetrics()) - | 'EncodeMetrics' >> beam.Map(csv_converter.encode) - | 'WriteMetrics' >> beam.io.WriteToText(FLAGS.output_file, - shard_name_template='', - header=','.join(columns))) - - return pipeline - - -def main(_): - pipeline = calculate_metrics() - pipeline.run().wait_until_finish() - - -if __name__ == '__main__': - app.run(main)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils.py deleted file mode 100644 index 39f6a1f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils.py +++ /dev/null
@@ -1,384 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""PTransforms used for wordpiece vocabulary generation pipeline.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import collections -import apache_beam as beam -import tensorflow.compat.v1 as tf -from tensorflow_text.python.ops.bert_tokenizer import BertTokenizer -from tensorflow_text.python.ops.wordpiece_tokenizer import WordpieceTokenizer -from tensorflow_text.tools.wordpiece_vocab import wordpiece_tokenizer_learner_lib as learner - - -class FilterTokensByLang(beam.DoFn): - """Filters out languages if necessary and yields each (token, lang) pair.""" - - def __init__(self, lang_set, include_other_languages=False): - self._lang_set = lang_set - self._include_other_languages = include_other_languages - - def process(self, element): - lang = element['lang'] - - if lang in self._lang_set or self._include_other_languages: - returned_lang = lang if lang in self._lang_set else 'other' - - for token in element['tokens']: - yield token, returned_lang - - -class CalculateCoefficients(beam.CombineFn): - """Calculates smoothing coefficient for each language.""" - - def __init__(self, smoothing_exponent): - self._smoothing_exponent = smoothing_exponent - - def create_accumulator(self): - return {'total_count': 0, 'lang_count': collections.Counter()} - - def add_input(self, accumulator, element): - _, lang = element - accumulator['total_count'] += 1 - accumulator['lang_count'].update([lang]) - return accumulator - - def merge_accumulators(self, accumulators): - merged = self.create_accumulator() - for acc in accumulators: - for key in merged: - merged[key] += acc[key] - return merged - - def extract_output(self, accumulator): - lang_count = accumulator['lang_count'] - total = accumulator['total_count'] - probs, exp = {}, {} - for lang in lang_count: - probs[lang] = lang_count[lang] / total - exp[lang] = pow(probs[lang], self._smoothing_exponent) - total_weight = sum(exp.values()) - for lang in exp: - exp[lang] = exp[lang] / (total_weight * probs[lang]) - return exp - - -class ExponentialSmoothing(beam.DoFn): - """Applies exponential smoothing coefficients to the counts.""" - - def __init__(self, corpus_multiplier=1): - self._corpus_multiplier = corpus_multiplier - - def process(self, word_and_lang, coeffs): - word, lang = word_and_lang - count = coeffs[lang] * self._corpus_multiplier - yield word, count - - -class FilterByCount(beam.DoFn): - """Filters words with counts below some threshold.""" - - def __init__(self, max_word_length, min_token_frequency=2): - self._min_token_frequency = int(min_token_frequency) - self._max_word_length = max_word_length - - def process(self, word_and_count): - word, count = word_and_count - if count > self._min_token_frequency and len(word) <= self._max_word_length: - yield word, int(round(count)) - - -class SortByCount(beam.CombineFn): - """Sorts words by count.""" - - def create_accumulator(self): - return [] - - def add_input(self, accumulator, element): - if not accumulator: - accumulator = self.create_accumulator() - - word, count = element - accumulator.append((word, int(count))) - return accumulator - - def merge_accumulators(self, accumulators): - merged = self.create_accumulator() - for accumulator in accumulators: - if accumulator: - merged.extend(accumulator) - return merged - - def extract_output(self, accumulator): - return sorted(sorted(accumulator, key=lambda x: x[0]), key=lambda x: x[1], - reverse=True) - - -class CompileTokenizationInfo(beam.DoFn): - """Expands list of tokens and computes intermediate metrics.""" - - def process(self, record): - wordpiece_counter = collections.Counter(record['wordpieces']) - del wordpiece_counter['[UNK]'] - dropped = record['num_dropped_chars'] - preserved = record['num_preserved_chars'] - non_unk = record['num_non_unk_wordpieces'] - preserved_ratio = [preserved / non_unk] if non_unk else [] - dropped_ratio = [dropped / (dropped + preserved)] if (dropped + - preserved) else [] - tokenization_info = { - 'lang': record['lang'], - 'count': 1, - 'num_preserved_chars': preserved, - 'num_dropped_chars': dropped, - 'num_non_unk_wordpieces': non_unk, - 'preserved_ratio': preserved_ratio, - 'dropped_ratio': dropped_ratio, - 'wordpieces': wordpiece_counter - } - yield tokenization_info - - -def default(): - return { - 'count': 0, - 'num_preserved_chars': 0, - 'num_dropped_chars': 0, - 'num_non_unk_wordpieces': 0, - 'preserved_ratio': [], - 'dropped_ratio': [], - 'wordpieces': collections.Counter() - } - - -class AggregateLang(beam.CombineFn): - """Aggregates intermediate metrics for each language.""" - - def create_accumulator(self): - return collections.defaultdict(default) - - def add_input(self, accumulator, element): - lang = element['lang'] - for key in accumulator[lang].keys(): - accumulator[lang][key] += element[key] - return accumulator - - def merge_accumulators(self, accumulators): - merged = self.create_accumulator() - for acc in accumulators: - for lang in acc.keys(): - for key in acc[lang].keys(): - merged[lang][key] += acc[lang][key] - return merged - - def extract_output(self, accumulator): - return accumulator - - -class LearnVocab(beam.DoFn): - - def __init__(self, params): - self._params = params - - def process(self, wordcounts): - return learner.learn(wordcounts, self._params) - - -class CalculateMetrics(beam.DoFn): - """Calculates metrics for each language given tokenization info.""" - - def process(self, info_dict): - for lang in info_dict.keys(): - infos = info_dict[lang] - yield { - 'lang': - lang, - 'sample_count': - infos['count'], - 'micro_drop_char_percent': - self._format_float_or_none( - self._get_micro_dropped_char_percent(infos)), - 'macro_drop_char_percent': - self._format_float_or_none( - self._get_macro_dropped_char_percent(infos)), - 'micro_compress_ratio': - self._format_float_or_none( - self._get_micro_compression_ratio(infos)), - 'macro_compress_ratio': - self._format_float_or_none( - self._get_macro_compression_ratio(infos)), - 'unweighted_en_wp_overlap_percent': - self._format_float_or_none( - self._get_wordpiece_overlap_percent( - infos['wordpieces'], - info_dict['en']['wordpieces'], - weighted=False)), - 'weighted_en_wp_overlap_percent': - self._format_float_or_none( - self._get_wordpiece_overlap_percent( - infos['wordpieces'], - info_dict['en']['wordpieces'], - weighted=True)) - } - - def _get_list_mean(self, l): - return sum(l) / len(l) if l else None - - def _get_micro_compression_ratio(self, infos): - if infos['num_non_unk_wordpieces']: - return infos['num_preserved_chars'] / infos['num_non_unk_wordpieces'] - else: - return None - - def _get_macro_compression_ratio(self, infos): - return self._get_list_mean(infos['preserved_ratio']) - - def _get_micro_dropped_char_percent(self, infos): - if infos['num_preserved_chars'] + infos['num_dropped_chars']: - return 100.0 * infos['num_dropped_chars'] / ( - infos['num_preserved_chars'] + infos['num_dropped_chars']) - else: - return None - - def _get_macro_dropped_char_percent(self, infos): - return 100.0 * self._get_list_mean(infos['dropped_ratio']) - - def _get_wordpiece_overlap_percent(self, - xx_wordpiece_counter, - en_wordpiece_counter, - weighted=False): - numerator = 0 - denominator = 0 - for wordpiece, count in xx_wordpiece_counter.iteritems(): - if not weighted: - count = 1 - denominator += count - if wordpiece in en_wordpiece_counter: - numerator += count - - if denominator: - return 100.0 * numerator / denominator - else: - return None - - def _format_float_or_none(self, value): - if isinstance(value, float): - return '{:.3f}'.format(value) - else: - return None - - -def count_preprocessing_fn(text_key, language_code_key): - """Generates a preprocessing function to be used in generating word counts. - - Args: - text_key: feature key in tf.Example for text - language_code_key: feature key in tf.Example for language_code - - Returns: - a preprocessing function - """ - - def preprocessing_fn(inputs): - """Function used to transform dataset using TF transform. - - Tokenizes input and detects language if there is no associated - language_code. - - Args: - inputs: dataset of tf.Examples containing text samples - - Returns: - transformed outputs - """ - - outputs = {} - - tokenizer = BertTokenizer() - tokens = tokenizer.tokenize(inputs[text_key]) - outputs['tokens'] = tokens.to_sparse() - outputs['lang'] = tf.convert_to_tensor(inputs[language_code_key]) - - return outputs - - return preprocessing_fn - - -def metrics_preprocessing_fn(vocab_file, text_key, language_code_key): - """Generates a preprocessing function to be used in generating word counts. - - Args: - vocab_file: path to file containing wordpiece vocabulary - text_key: feature key in tf.Example for text - language_code_key: feature key in tf.Example for language_code - - Returns: - a preprocessing function - """ - - def preprocessing_fn(inputs): - """Preprocessing function used in TF Transform. - - Args: - inputs: the input dataset of tf.Examples - - Returns: - preprocessed outputs - """ - vocab_table = tf.lookup.StaticHashTable(tf.lookup.TextFileInitializer( - vocab_file, tf.string, tf.lookup.TextFileIndex.WHOLE_LINE, tf.int64, - tf.lookup.TextFileIndex.LINE_NUMBER), -1) - - tokenizer = BertTokenizer() - tokens = tokenizer.tokenize(inputs[text_key]) - wordpiece_tokenizer = WordpieceTokenizer(vocab_table, - token_out_type=tf.string) - wordpieces = wordpiece_tokenizer.tokenize(tokens) - wordpieces_flat = wordpieces.flat_values - wordpieces_flat.set_shape([None]) - wordpieces = tf.RaggedTensor.from_nested_row_splits( - wordpieces_flat, wordpieces.nested_row_splits) - - known_mask = tf.cast(tf.not_equal(wordpieces, '[UNK]'), tf.int32) - num_non_unk_wordpieces = tf.reduce_sum(known_mask, axis=[1, 2]) - - wordpiece_is_unknown = tf.equal(wordpieces, '[UNK]') - token_has_unknown = tf.reduce_any(wordpiece_is_unknown, axis=-1) - unknown_tokens = tf.ragged.boolean_mask(tokens, token_has_unknown) - unknown_lengths = tf.strings.length(unknown_tokens) - num_dropped_chars = tf.math.reduce_sum(unknown_lengths, axis=1) - - token_lengths = tf.strings.length(tokens) - total_chars = tf.reduce_sum(token_lengths, axis=-1) - num_preserved_chars = total_chars - num_dropped_chars - - flattened = tf.RaggedTensor.from_row_splits( - wordpieces.flat_values, tf.gather(wordpieces.values.row_splits, - wordpieces.row_splits)) - - outputs = {} - outputs['num_non_unk_wordpieces'] = tf.cast(num_non_unk_wordpieces, - tf.int64) - outputs['num_dropped_chars'] = tf.cast(num_dropped_chars, tf.int64) - outputs['num_preserved_chars'] = tf.cast(num_preserved_chars, tf.int64) - outputs['wordpieces'] = flattened.to_sparse() - outputs['lang'] = tf.convert_to_tensor(inputs[language_code_key]) - - return outputs - - return preprocessing_fn
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils_test.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils_test.py deleted file mode 100644 index b88edc2..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/utils_test.py +++ /dev/null
@@ -1,414 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.python.tools.wordpiece_vocab.utils.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -import logging -import tempfile - -from absl.testing import absltest -import apache_beam as beam -from apache_beam.testing.test_pipeline import TestPipeline -from apache_beam.testing.util import assert_that -from apache_beam.testing.util import equal_to -import tensorflow.compat.v1 as tf -from tensorflow_text.tools.wordpiece_vocab import utils - - -class FilterTokensByLangTest(absltest.TestCase): - - def setUp(self): - super(FilterTokensByLangTest, self).setUp() - self.sample_input = [{'lang': 'en', - 'tokens': ['I', 'like', 'pie', '.']}] - - def testLangInLangSet(self): - with TestPipeline() as p: - tokens = p | beam.Create(self.sample_input) - result = tokens | beam.ParDo(utils.FilterTokensByLang({'en'})) - assert_that(result, equal_to([('I', 'en'), - ('like', 'en'), - ('pie', 'en'), - ('.', 'en')])) - - def testLangNotInLangSet(self): - with TestPipeline() as p: - tokens = p | beam.Create(self.sample_input) - result = tokens | beam.ParDo(utils.FilterTokensByLang({'fr'})) - assert_that(result, equal_to([])) - - def testLangNotInLangSetIncludeOthers(self): - with TestPipeline() as p: - tokens = p | beam.Create(self.sample_input) - result = tokens | beam.ParDo(utils.FilterTokensByLang({'fr'}, True)) - assert_that(result, equal_to([('I', 'other'), - ('like', 'other'), - ('pie', 'other'), - ('.', 'other')])) - - -class CompareValues(beam.DoFn): - - def process(self, element): - return [element['en'] < element['fr']] - - -class CalculateCoefficientsTest(absltest.TestCase): - - def setUp(self): - super(CalculateCoefficientsTest, self).setUp() - self.sample_input = [('I', 'en'), ('really', 'en'), - ('like', 'en'), ('pie', 'en'), - ('.', 'en'), ('Je', 'fr'), - ('suis', 'fr'), ('une', 'fr'), - ('fille', 'fr'), ('.', 'fr')] - - def testEqual(self): - with TestPipeline() as p: - tokens = p | beam.Create(self.sample_input) - result = tokens | beam.CombineGlobally(utils.CalculateCoefficients(0.5)) - assert_that(result, equal_to([{'en': 1.0, 'fr': 1.0}])) - - def testNotEqual(self): - with TestPipeline() as p: - sample_input = [('I', 'en'), ('kind', 'en'), ('of', 'en'), ('like', 'en'), - ('to', 'en'), ('eat', 'en'), ('pie', 'en'), ('!', 'en'), - ('Je', 'fr'), ('suis', 'fr'), ('une', 'fr'), - ('fille', 'fr'), ('.', 'fr')] - tokens = p | beam.Create(sample_input) - result = (tokens - | beam.CombineGlobally(utils.CalculateCoefficients(0.5)) - | beam.ParDo(CompareValues())) - assert_that(result, equal_to([True])) - - -class ExponentialSmoothingTest(absltest.TestCase): - - def setUp(self): - super(ExponentialSmoothingTest, self).setUp() - self.sample_input = [('Hello', 'en'), (',', 'en'), - ('world', 'en'), ('!', 'en'), - ('Bonjour', 'fr'), ('.', 'fr')] - self.coeffs = [{'en': 0.75, 'fr': 1.5}] - - def testBasic(self): - with TestPipeline() as p: - tokens = p | 'CreateInput' >> beam.Create(self.sample_input) - coeffs = p | 'CreateCoeffs' >> beam.Create(self.coeffs) - result = tokens | beam.ParDo( - utils.ExponentialSmoothing(), beam.pvalue.AsSingleton(coeffs)) - assert_that(result, equal_to([('Hello', 0.75), (',', 0.75), - ('world', 0.75), ('!', 0.75), - ('Bonjour', 1.5), ('.', 1.5)])) - - -class FilterByCountTest(absltest.TestCase): - - def setUp(self): - super(FilterByCountTest, self).setUp() - self.sample_input = [('one', 1), ('two', 2), ('three', 3), ('four', 4)] - self.max_token_length = 50 - - def testBelowThreshold(self): - with TestPipeline() as p: - tokens = p | 'CreateInput' >> beam.Create(self.sample_input) - result = tokens | beam.ParDo(utils.FilterByCount(self.max_token_length, - min_token_frequency=2)) - assert_that(result, equal_to([('three', 3), ('four', 4)])) - - def testTokenTooLong(self): - sample_input = [('one', 1), ('two', 2), ('three', 3), ('four', 4), - ('qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnm', 5), - ('blah', 20)] - - with TestPipeline() as p: - tokens = p | 'CreateInput' >> beam.Create(sample_input) - result = tokens | beam.ParDo(utils.FilterByCount(self.max_token_length, - min_token_frequency=2)) - assert_that(result, equal_to([('three', 3), ('four', 4), ('blah', 20)])) - - -class SortByCountTest(absltest.TestCase): - - def setUp(self): - super(SortByCountTest, self).setUp() - self.sample_input = [('a', 5), ('b', 2), ('c', 9), ('d', 4)] - - def testUnsorted(self): - with TestPipeline() as p: - tokens = p | 'CreateInput' >> beam.Create(self.sample_input) - result = tokens | beam.CombineGlobally(utils.SortByCount()) - assert_that(result, equal_to([[('c', 9), ('a', 5), ('d', 4), ('b', 2)]])) - - -class CompileTokenizationInfoTest(absltest.TestCase): - - def setUp(self): - super(CompileTokenizationInfoTest, self).setUp() - self.sample_input = [{'lang': 'en', - 'num_non_unk_wordpieces': 4, - 'num_dropped_chars': 2, - 'num_preserved_chars': 13, - 'wordpieces': ['the', 'app', '##le', - 'sauce', '[UNK]']}, - {'lang': 'fr', - 'num_non_unk_wordpieces': 5, - 'num_dropped_chars': 0, - 'num_preserved_chars': 14, - 'wordpieces': ['bon', '##jour', 'bon', '##soir']}] - - def testTwoLangs(self): - with TestPipeline() as p: - tokens = p | 'CreateInput' >> beam.Create(self.sample_input) - result = tokens | beam.ParDo(utils.CompileTokenizationInfo()) - assert_that(result, equal_to([{ - 'lang': 'en', - 'count': 1, - 'num_preserved_chars': 13, - 'num_dropped_chars': 2, - 'num_non_unk_wordpieces': 4, - 'preserved_ratio': [13/4], - 'dropped_ratio': [2/15], - 'wordpieces': collections.Counter(['the', 'app', '##le', 'sauce']) - }, { - 'lang': 'fr', - 'count': 1, - 'num_preserved_chars': 14, - 'num_dropped_chars': 0, - 'num_non_unk_wordpieces': 5, - 'preserved_ratio': [14/5], - 'dropped_ratio': [0], - 'wordpieces': collections.Counter(['bon', '##jour', 'bon', '##soir']) - }])) - - -class AggregateLangTest(absltest.TestCase): - - def setUp(self): - super(AggregateLangTest, self).setUp() - self.aggregator = utils.AggregateLang() - self.sample_input = [{ - 'lang': 'en', - 'count': 1, - 'num_preserved_chars': 13, - 'num_dropped_chars': 2, - 'num_non_unk_wordpieces': 4, - 'preserved_ratio': [13/4], - 'dropped_ratio': [2/15], - 'wordpieces': collections.Counter(['the', 'app', '##le', 'sauce']) - }, { - 'lang': 'en', - 'count': 1, - 'num_preserved_chars': 11, - 'num_dropped_chars': 0, - 'num_non_unk_wordpieces': 4, - 'preserved_ratio': [11/4], - 'dropped_ratio': [0], - 'wordpieces': collections.Counter(['the', 'app', 'st', '##ore']) - }] - - def testMultiEntryOneLang(self): - expected_output = self.aggregator.create_accumulator() - expected_output['en'] = { - 'count': 2, - 'num_preserved_chars': 24, - 'num_dropped_chars': 2, - 'num_non_unk_wordpieces': 8, - 'preserved_ratio': [13/4, 11/4], - 'dropped_ratio': [2/15, 0], - 'wordpieces': collections.Counter({'the': 2, 'app': 2, '##le': 1, - 'sauce': 1, 'st': 1, '##ore': 1})} - # Test create_accumulator. - accumulator = self.aggregator.create_accumulator() - # Test add_input. - for element in self.sample_input: - accumulator = self.aggregator.add_input(accumulator, element) - # Test merge_accumulators. - merged = self.aggregator.merge_accumulators([ - accumulator, self.aggregator.create_accumulator()]) - # Test extract_output. - output = self.aggregator.extract_output(merged) - self.assertEqual(output, expected_output) - - -class CalculateMetricsTest(absltest.TestCase): - - def setUp(self): - super(CalculateMetricsTest, self).setUp() - self.info_dict = { - 'en': { - 'count': 2, - 'num_preserved_chars': 24, - 'num_dropped_chars': 2, - 'num_non_unk_wordpieces': 8, - 'preserved_ratio': [2, 3], - 'dropped_ratio': [0.5, 0], - 'wordpieces': collections.Counter({'the': 2, 'le': 1, '##sson': 1, - 'plan': 1, '##s': 1})}, - 'fr': { - 'count': 2, - 'num_preserved_chars': 24, - 'num_dropped_chars': 2, - 'num_non_unk_wordpieces': 8, - 'preserved_ratio': [5, 7], - 'dropped_ratio': [0.4, 0.6], - 'wordpieces': collections.Counter({'bon': 2, 'le': 2, 'jour': 1, - 'soir': 1, 'homme': 1})}} - self.metrics = utils.CalculateMetrics() - - def testListMean(self): - test_list = [1, 2, 3, 4, 5] - mean = self.metrics._get_list_mean(test_list) - self.assertEqual(mean, 3) - - def testMicroCompressionRatio(self): - fr_micro_compression = self.metrics._get_micro_compression_ratio( - self.info_dict['fr']) - self.assertEqual(fr_micro_compression, 3) - - def testMacroCompressionRatio(self): - en_macro_compression = self.metrics._get_macro_compression_ratio( - self.info_dict['en']) - self.assertEqual(en_macro_compression, 2.5) - - def testMicroDroppedCharPercent(self): - en_micro_dropped_char = self.metrics._get_micro_dropped_char_percent( - self.info_dict['en']) - self.assertEqual(en_micro_dropped_char, 100/13) - - def testMacroDroppedCharPercent(self): - fr_macro_dropped_char = self.metrics._get_macro_dropped_char_percent( - self.info_dict['fr']) - self.assertEqual(fr_macro_dropped_char, 50.0) - - def testWordpieceOverlapFrench(self): - fr_wp_overlap = self.metrics._get_wordpiece_overlap_percent( - self.info_dict['fr']['wordpieces'], self.info_dict['en']['wordpieces']) - self.assertEqual(fr_wp_overlap, 20.0) - - def testWordpieceOverlapFrenchWeighted(self): - fr_wp_overlap = self.metrics._get_wordpiece_overlap_percent( - self.info_dict['fr']['wordpieces'], self.info_dict['en']['wordpieces'], - weighted=True) - self.assertEqual(fr_wp_overlap, 200/7) - - def testWordpieceOverlapEnglish(self): - en_wp_overlap = self.metrics._get_wordpiece_overlap_percent( - self.info_dict['en']['wordpieces'], self.info_dict['en']['wordpieces']) - self.assertEqual(en_wp_overlap, 100.0) - - def testFormatFloatOrNone(self): - extra_digits = 0.12345 - self.assertEqual(self.metrics._format_float_or_none(extra_digits), '0.123') - fewer_digits = 0.1 - self.assertEqual(self.metrics._format_float_or_none(fewer_digits), '0.100') - non_float = '' - self.assertEqual(self.metrics._format_float_or_none(non_float), None) - - -def _bytes_feature(value): - """Wrapper for inserting bytes features into Example proto.""" - return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) - - -class CountPreprocessingFnTest(absltest.TestCase): - - def setUp(self): - super(CountPreprocessingFnTest, self).setUp() - self.raw_data = { - 'text': ['Let\'s make this Chinese even though it\'s English.'], - 'language_code': ['zh'], - } - - def testUseGivenLang(self): - preprocessing_fn = utils.count_preprocessing_fn('text', 'language_code') - with tf.Session() as sess: - expected_tokens = ['Let', '\'', 's', 'make', 'this', 'Chinese', 'even', - 'though', 'it', '\'', 's', 'English', '.'] - - outputs = preprocessing_fn(self.raw_data) - outputs = sess.run(outputs) - self.assertEqual(outputs['lang'], 'zh') - self.assertSequenceAlmostEqual(outputs['tokens'].values, expected_tokens) - - -class MetricsPreprocessingFnTest(absltest.TestCase): - - def setUp(self): - super(MetricsPreprocessingFnTest, self).setUp() - self.raw_data = { - 'label': ['1'], - 'text_a': ['The boy jumped into the air.'], - 'lang': ['en'], - } - self.vocab = ['The', 'jump', '##ed', 'in', '##to', 'the', 'air', '.', 'bo', - 'jumped', 'to', 'cat', 'sat', 'on', 'a', 'h', '##at', 'c'] - self.expected_wordpieces = ['The', '[UNK]', 'jumped', 'in', '##to', 'the', - 'air', '.'] - - def testSingleElement(self): - with tf.Session() as sess: - with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as vocab: - vocab.writelines([word + '\n' for word in self.vocab]) - vocab.flush() - preprocessing_fn = utils.metrics_preprocessing_fn( - vocab.name, 'text_a', 'lang') - outputs = preprocessing_fn(self.raw_data) - tf.tables_initializer().run() - outputs = sess.run(outputs) - - self.assertEqual(outputs['lang'], 'en') - self.assertEqual(outputs['num_non_unk_wordpieces'], 7) - self.assertEqual(outputs['num_preserved_chars'], 20) - self.assertEqual(outputs['num_dropped_chars'], 3) - self.assertSequenceAlmostEqual(outputs['wordpieces'].values, - self.expected_wordpieces) - - def testLargerBatchSize(self): - with tf.Session() as sess: - with tempfile.NamedTemporaryFile(mode='w+t', delete=False) as vocab: - raw_data = { - 'label': ['1', '2'], - 'text_a': ['The boy jumped into the air.', 'The cat sat on a hat.'], - 'lang': ['en', 'en'], - } - expected_wordpieces = ['The', '[UNK]', 'jumped', 'in', '##to', 'the', - 'air', '.', 'The', 'cat', 'sat', 'on', 'a', 'h', - '##at', '.'] - vocab.writelines([word + '\n' for word in self.vocab]) - vocab.flush() - preprocessing_fn = utils.metrics_preprocessing_fn( - vocab.name, 'text_a', 'lang') - outputs = preprocessing_fn(raw_data) - tf.tables_initializer().run() - outputs = sess.run(outputs) - - self.assertSequenceAlmostEqual(outputs['lang'], ['en', 'en']) - self.assertSequenceAlmostEqual(outputs['num_preserved_chars'], [20, 16]) - self.assertSequenceAlmostEqual(outputs['num_dropped_chars'], [3, 0]) - self.assertSequenceAlmostEqual(outputs['wordpieces'].values, - expected_wordpieces) - self.assertSequenceAlmostEqual(outputs['num_non_unk_wordpieces'], - [7, 8]) - - -if __name__ == '__main__': - logging.getLogger().setLevel(logging.INFO) - absltest.main()
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner.py deleted file mode 100644 index 90876f9..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner.py +++ /dev/null
@@ -1,93 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Binary for learning wordpiece vocabulary.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from absl import app -from absl import flags -from tensorflow_text.tools.wordpiece_vocab import wordpiece_tokenizer_learner_lib as learner - - -FLAGS = flags.FLAGS - -flags.DEFINE_string('input_path', None, 'Path to wordcount file.') -flags.mark_flag_as_required('input_path', FLAGS) -flags.DEFINE_string('output_path', None, 'Path to vocab file.') -flags.mark_flag_as_required('output_path', FLAGS) - -flags.DEFINE_integer('upper_thresh', 10000000, - 'Upper threshold for binary search.') -flags.DEFINE_integer('lower_thresh', 10, 'Lower threshold for binary search.') -flags.DEFINE_integer('num_iterations', 4, - 'Number of iterations in wordpiece learning algorithm.') -flags.DEFINE_integer('num_pad_tokens', 100, 'Number of padding tokens to ' - 'include in vocab.') -flags.DEFINE_integer('max_input_tokens', 5000000, - 'Maximum number of input tokens, where -1 means no max.') -flags.DEFINE_integer('max_token_length', 50, 'Maximum length of a token.') -flags.DEFINE_integer('max_unique_chars', 1000, - 'Maximum number of unique characters as tokens.') -flags.DEFINE_integer('vocab_size', 110000, 'Target size of generated vocab, ' - 'where vocab_size is an upper bound and the size of vocab ' - 'can be within slack_ratio less than the vocab_size.') -flags.DEFINE_float('slack_ratio', 0.05, - 'Difference permitted between target and actual vocab size.') -flags.DEFINE_bool('include_joiner_token', True, - 'Whether to include joiner token in word suffixes.') -flags.DEFINE_string('joiner', '##', 'Joiner token in word suffixes.') -flags.DEFINE_list('reserved_tokens', - ['<unk>', '<s>', '</s>', '<mask>', - '<cls>', '<sep>', '<S>', '<T>'], - 'Reserved tokens to be included in vocab.') - - -def main(_): - # Read in wordcount file. - with open(FLAGS.input_path) as wordcount_file: - word_counts = [(line.split()[0], int(line.split()[1])) - for line in wordcount_file] - - # Add in padding tokens. - reserved_tokens = FLAGS.reserved_tokens - if FLAGS.num_pad_tokens: - padded_tokens = ['<pad>'] - padded_tokens += ['<pad%d>' % i for i in range(1, FLAGS.num_pad_tokens)] - reserved_tokens = padded_tokens + reserved_tokens - - vocab = learner.learn( - word_counts, - vocab_size=FLAGS.vocab_size, - reserved_tokens=reserved_tokens, - upper_thresh=FLAGS.upper_thresh, - lower_thresh=FLAGS.lower_thresh, - num_iterations=FLAGS.num_iterations, - max_input_tokens=FLAGS.max_input_tokens, - max_token_length=FLAGS.max_token_length, - max_unique_chars=FLAGS.max_unique_chars, - slack_ratio=FLAGS.slack_ratio, - include_joiner_token=FLAGS.include_joiner_token, - joiner=FLAGS.joiner) - vocab = ''.join([line + '\n' for line in vocab]) - - # Write vocab to file. - with open(FLAGS.output_path, 'w') as vocab_file: - vocab_file.write(vocab) - - -if __name__ == '__main__': - app.run(main)
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_lib.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_lib.py deleted file mode 100644 index b789c61f..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_lib.py +++ /dev/null
@@ -1,468 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Algorithm for learning wordpiece vocabulary.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import collections -from typing import List, Optional - -import numpy as np - -Params = collections.namedtuple('Params', [ - 'upper_thresh', 'lower_thresh', 'num_iterations', 'max_input_tokens', - 'max_token_length', 'max_unique_chars', 'vocab_size', 'slack_ratio', - 'include_joiner_token', 'joiner', 'reserved_tokens' -]) - - -def extract_char_tokens(word_counts): - """Extracts all single-character tokens from word_counts. - - Args: - word_counts: list of (string, int) tuples - - Returns: - set of single-character strings contained within word_counts - """ - - seen_chars = set() - for word, _ in word_counts: - for char in word: - seen_chars.add(char) - return seen_chars - - -def ensure_all_tokens_exist(input_tokens, output_tokens, include_joiner_token, - joiner): - """Adds all tokens in input_tokens to output_tokens if not already present. - - Args: - input_tokens: set of strings (tokens) we want to include - output_tokens: string to int dictionary mapping token to count - include_joiner_token: bool whether to include joiner token - joiner: string used to indicate suffixes - - Returns: - string to int dictionary with all tokens in input_tokens included - """ - - for token in input_tokens: - if token not in output_tokens: - output_tokens[token] = 1 - - if include_joiner_token: - joined_token = joiner + token - if joined_token not in output_tokens: - output_tokens[joined_token] = 1 - - return output_tokens - - -def get_split_indices(word, curr_tokens, include_joiner_token, joiner): - """Gets indices for valid substrings of word, for iterations > 0. - - For iterations > 0, rather than considering every possible substring, we only - want to consider starting points corresponding to the start of wordpieces in - the current vocabulary. - - Args: - word: string we want to split into substrings - curr_tokens: string to int dict of tokens in vocab (from previous iteration) - include_joiner_token: bool whether to include joiner token - joiner: string used to indicate suffixes - - Returns: - list of ints containing valid starting indices for word - """ - - indices = [] - start = 0 - while start < len(word): - end = len(word) - while end > start: - subtoken = word[start:end] - # Subtoken includes the joiner token. - if include_joiner_token and start > 0: - subtoken = joiner + subtoken - # If subtoken is part of vocab, 'end' is a valid start index. - if subtoken in curr_tokens: - indices.append(end) - break - end -= 1 - - if end == start: - return None - start = end - - return indices - - -def get_search_threshs(word_counts, upper_thresh, lower_thresh): - """Clips the thresholds for binary search based on current word counts. - - The upper threshold parameter typically has a large default value that can - result in many iterations of unnecessary search. Thus we clip the upper and - lower bounds of search to the maximum and the minimum wordcount values. - - Args: - word_counts: list of (string, int) tuples - upper_thresh: int, upper threshold for binary search - lower_thresh: int, lower threshold for binary search - - Returns: - upper_search: int, clipped upper threshold for binary search - lower_search: int, clipped lower threshold for binary search - """ - - counts = [count for _, count in word_counts] - max_count = max(counts) - min_count = min(counts) - - if upper_thresh is None: - upper_search = max_count - else: - upper_search = max_count if max_count < upper_thresh else upper_thresh - - if lower_thresh is None: - lower_search = min_count - else: - lower_search = min_count if min_count > lower_thresh else lower_thresh - - return upper_search, lower_search - - -def get_input_words(word_counts, reserved_tokens, max_token_length): - """Filters out words that are longer than max_token_length or are reserved. - - Args: - word_counts: list of (string, int) tuples - reserved_tokens: list of strings - max_token_length: int, maximum length of a token - - Returns: - list of (string, int) tuples of filtered wordcounts - """ - - all_counts = [] - - for word, count in word_counts: - if len(word) > max_token_length or word in reserved_tokens: - continue - all_counts.append((word, count)) - - return all_counts - - -def get_allowed_chars(all_counts, max_unique_chars): - """Get the top max_unique_chars characters within our wordcounts. - - We want each character to be in the vocabulary so that we can keep splitting - down to the character level if necessary. However, in order not to inflate - our vocabulary with rare characters, we only keep the top max_unique_chars - characters. - - Args: - all_counts: list of (string, int) tuples - max_unique_chars: int, maximum number of unique single-character tokens - - Returns: - set of strings containing top max_unique_chars characters in all_counts - """ - - char_counts = collections.defaultdict(int) - - for word, count in all_counts: - for char in word: - char_counts[char] += count - - # Sort by count, then alphabetically. - sorted_counts = sorted(sorted(char_counts.items(), key=lambda x: x[0]), - key=lambda x: x[1], reverse=True) - - allowed_chars = set() - for i in range(min(len(sorted_counts), max_unique_chars)): - allowed_chars.add(sorted_counts[i][0]) - return allowed_chars - - -def filter_input_words(all_counts, allowed_chars, max_input_tokens): - """Filters out words with unallowed chars and limits words to max_input_tokens. - - Args: - all_counts: list of (string, int) tuples - allowed_chars: list of single-character strings - max_input_tokens: int, maximum number of tokens accepted as input - - Returns: - list of (string, int) tuples of filtered wordcounts - """ - # Ensure that the input is sorted so that if `max_input_tokens` is reached - # the least common tokens are dropped. - all_counts = sorted( - all_counts, key=lambda word_and_count: word_and_count[1], reverse=True) - filtered_counts = [] - for word, count in all_counts: - if (max_input_tokens != -1 and - len(filtered_counts) >= max_input_tokens): - break - has_unallowed_chars = False - for char in word: - if char not in allowed_chars: - has_unallowed_chars = True - break - if has_unallowed_chars: - continue - filtered_counts.append((word, count)) - - return filtered_counts - - -def generate_final_vocabulary(reserved_tokens, char_tokens, curr_tokens): - """Generates final vocab given reserved, single-character, and current tokens. - - Args: - reserved_tokens: list of strings (tokens) that must be included in vocab - char_tokens: set of single-character strings - curr_tokens: string to int dict mapping token to count - - Returns: - list of strings representing final vocabulary - """ - - sorted_char_tokens = sorted(list(char_tokens)) - vocab_char_arrays = [] - vocab_char_arrays.extend(reserved_tokens) - vocab_char_arrays.extend(sorted_char_tokens) - - # Sort by count, then alphabetically. - sorted_tokens = sorted(sorted(curr_tokens.items(), key=lambda x: x[0]), - key=lambda x: x[1], reverse=True) - for token, _ in sorted_tokens: - vocab_char_arrays.append(token) - - seen_tokens = set() - # Adding unique tokens to list to maintain sorted order. - vocab_words = [] - for word in vocab_char_arrays: - if word in seen_tokens: - continue - seen_tokens.add(word) - vocab_words.append(word) - - return vocab_words - - -def learn_with_thresh(word_counts, thresh, params): - """Wordpiece learning algorithm to produce a vocab given frequency threshold. - - Args: - word_counts: list of (string, int) tuples - thresh: int, frequency threshold for a token to be included in the vocab - params: Params namedtuple, parameters for learning - - Returns: - list of strings, vocabulary generated for the given thresh - """ - - # Set of single-character tokens. - char_tokens = extract_char_tokens(word_counts) - curr_tokens = ensure_all_tokens_exist(char_tokens, {}, - params.include_joiner_token, - params.joiner) - - for iteration in range(params.num_iterations): - subtokens = [dict() for _ in range(params.max_token_length + 1)] - # Populate array with counts of each subtoken. - for word, count in word_counts: - if iteration == 0: - split_indices = range(1, len(word) + 1) - else: - split_indices = get_split_indices(word, curr_tokens, - params.include_joiner_token, - params.joiner) - if not split_indices: - continue - - start = 0 - for index in split_indices: - for end in range(start + 1, len(word) + 1): - subtoken = word[start:end] - length = len(subtoken) - if params.include_joiner_token and start > 0: - subtoken = params.joiner + subtoken - if subtoken in subtokens[length]: - # Subtoken exists, increment count. - subtokens[length][subtoken] += count - else: - # New subtoken, add to dict. - subtokens[length][subtoken] = count - start = index - - next_tokens = {} - # Get all tokens that have a count above the threshold. - for length in range(params.max_token_length, 0, -1): - for token, count in subtokens[length].items(): - if count >= thresh: - next_tokens[token] = count - # Decrement the count of all prefixes. - if len(token) > length: # This token includes the joiner. - joiner_len = len(params.joiner) - for i in range(1 + joiner_len, length + joiner_len): - prefix = token[0:i] - if prefix in subtokens[i - joiner_len]: - subtokens[i - joiner_len][prefix] -= count - else: - for i in range(1, length): - prefix = token[0:i] - if prefix in subtokens[i]: - subtokens[i][prefix] -= count - - # Add back single-character tokens. - curr_tokens = ensure_all_tokens_exist(char_tokens, next_tokens, - params.include_joiner_token, - params.joiner) - - vocab_words = generate_final_vocabulary(params.reserved_tokens, char_tokens, - curr_tokens) - - return vocab_words - - -def learn_binary_search(word_counts, lower, upper, params): - """Performs binary search to find wordcount frequency threshold. - - Given upper and lower bounds and a list of (word, count) tuples, performs - binary search to find the threshold closest to producing a vocabulary - of size vocab_size. - - Args: - word_counts: list of (string, int) tuples - lower: int, lower bound for binary search - upper: int, upper bound for binary search - params: Params namedtuple, parameters for learning - - Returns: - list of strings, vocab that is closest to target vocab_size - """ - thresh = (upper + lower) // 2 - current_vocab = learn_with_thresh(word_counts, thresh, params) - current_vocab_size = len(current_vocab) - - # Allow count to be within k% of the target count, where k is slack ratio. - slack_count = params.slack_ratio * params.vocab_size - if slack_count < 0: - slack_count = 0 - - is_within_slack = (current_vocab_size <= params.vocab_size) and ( - params.vocab_size - current_vocab_size <= slack_count) - - # We've created a vocab within our goal range (or, ran out of search space). - if is_within_slack or lower >= upper or thresh <= 1: - return current_vocab - - current_vocab = None - - if current_vocab_size > params.vocab_size: - return learn_binary_search(word_counts, thresh + 1, upper, params) - - else: - return learn_binary_search(word_counts, lower, thresh - 1, params) - - -def count_words(iterable) -> collections.Counter: - """Converts a iterable of arrays of words into a `Counter` of word counts.""" - counts = collections.Counter() - for words in iterable: - # Convert a RaggedTensor to a flat/dense Tensor. - words = getattr(words, 'flat_values', words) - # Flatten any dense tensor - words = np.reshape(words, [-1]) - counts.update(words) - - # Decode the words if necessary. - example_word = next(iter(counts.keys())) - if isinstance(example_word, bytes): - counts = collections.Counter( - {word.decode('utf-8'): count for word, count in counts.items()}) - - return counts - - -def learn(word_counts, - vocab_size: int, - reserved_tokens: List[str], - upper_thresh: Optional[int] = int(1e7), - lower_thresh: Optional[int] = 10, - num_iterations: int = 4, - max_input_tokens: Optional[int] = int(5e6), - max_token_length: int = 50, - max_unique_chars: int = 1000, - slack_ratio: float = 0.05, - include_joiner_token: bool = True, - joiner: str = '##') -> List[str]: - """Takes in wordcounts and returns wordpiece vocabulary. - - Args: - word_counts: (word, count) pairs as a dictionary, or list of tuples. - vocab_size: The target vocabulary size. This is the maximum size. - reserved_tokens: A list of tokens that must be included in the vocabulary. - upper_thresh: Initial upper bound on the token frequency threshold. - lower_thresh: Initial lower bound on the token frequency threchold. - num_iterations: Number of iterations to run. - max_input_tokens: The maximum number of words in the initial vocabulary. The - words with the lowest counts are discarded. Use `None` or `-1` for "no - maximum". - max_token_length: The maximum token length. Counts for longer words are - discarded. - max_unique_chars: The maximum alphabet size. This prevents rare characters - from inflating the vocabulary. Counts for words containing characters - ouside of the selected alphabet are discarded. - slack_ratio: The maximum deviation acceptable from `vocab_size` for an - acceptable vocabulary. The acceptable range of vocabulary sizes is from - `vocab_size*(1-slack_ratio)` to `vocab_size`. - include_joiner_token: If true, include the `joiner` token in the output - vocabulary. - joiner: The prefix to include on suffix tokens in the output vocabulary. - Usually "##". For example 'places' could be tokenized as `['place', - '##s']`. - - Returns: - string, final vocabulary with each word separated by newline - """ - if isinstance(word_counts, dict): - word_counts = word_counts.items() - - params = Params(upper_thresh, lower_thresh, num_iterations, max_input_tokens, - max_token_length, max_unique_chars, vocab_size, slack_ratio, - include_joiner_token, joiner, reserved_tokens) - - upper_search, lower_search = get_search_threshs(word_counts, - params.upper_thresh, - params.lower_thresh) - all_counts = get_input_words(word_counts, params.reserved_tokens, - params.max_token_length) - allowed_chars = get_allowed_chars(all_counts, params.max_unique_chars) - - filtered_counts = filter_input_words(all_counts, allowed_chars, - params.max_input_tokens) - - vocab = learn_binary_search(filtered_counts, lower_search, upper_search, - params) - - return vocab
diff --git a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_test.py b/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_test.py deleted file mode 100644 index 25a4e6d..0000000 --- a/third_party/tensorflow-text/src/tensorflow_text/tools/wordpiece_vocab/wordpiece_tokenizer_learner_test.py +++ /dev/null
@@ -1,273 +0,0 @@ -# coding=utf-8 -# Copyright 2021 TF.Text Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests for tensorflow_text.python.tools.wordpiece_vocab.wordpiece_tokenizer_learner_lib.""" - -import collections -import logging - -from absl.testing import absltest -from absl.testing import parameterized - -import numpy as np - -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.ops.ragged import ragged_string_ops -from tensorflow_text.tools.wordpiece_vocab import wordpiece_tokenizer_learner_lib as learner - - -class ExtractCharTokensTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'UniqueChars', - 'word_counts': [('abc', 1), ('def', 1), ('ghi', 1)], - 'expected': {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}}, - {'testcase_name': 'RepeatedChars', - 'word_counts': [('hello', 1), ('world!', 1)], - 'expected': {'h', 'e', 'l', 'o', 'w', 'r', 'd', '!'}}) - - def testExtractCharTokens(self, word_counts, expected): - actual = learner.extract_char_tokens(word_counts) - self.assertEqual(expected, actual) - - -class EnsureAllTokensExistTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'EmptyDict', 'input_tokens': {'a', 'b', 'c'}, - 'output_tokens': {}, 'expected_tokens': {'a': 1, 'b': 1, 'c': 1}, - 'include_joiner': False}, - {'testcase_name': 'SomeTokensExist', 'input_tokens': {'a', 'b', 'c'}, - 'output_tokens': {'a': 2, 'd': 3, 'e': 1}, - 'expected_tokens': {'a': 2, 'b': 1, 'c': 1, 'd': 3, 'e': 1}, - 'include_joiner': False}, - {'testcase_name': 'SomeTokensExistWithJoiner', - 'input_tokens': {'a', 'b', 'c'}, - 'output_tokens': {'a': 2, 'd': 3, 'e': 1}, - 'expected_tokens': {'a': 2, 'b': 1, 'c': 1, 'd': 3, 'e': 1, '##a': 1, - '##b': 1, '##c': 1}, 'include_joiner': True}) - - def testEnsureAllTokensExist(self, input_tokens, output_tokens, - expected_tokens, include_joiner): - joiner = '##' - new_tokens = learner.ensure_all_tokens_exist(input_tokens, output_tokens, - include_joiner, joiner) - self.assertEqual(new_tokens, expected_tokens) - - -class GetSplitIndicesTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'ValidWordNoJoiner', 'word': 'hello', - 'expected_indices': [2, 5], 'include_joiner': False}, - {'testcase_name': 'ValidWordWithJoiner', 'word': 'hello', - 'expected_indices': [2, 3, 5], 'include_joiner': True}, - {'testcase_name': 'InvalidSplitIndices', 'word': 'world', - 'expected_indices': None, 'include_joiner': False}) - - def testGetSplitIndices(self, word, expected_indices, include_joiner): - joiner = '##' - curr_tokens = {'he': 1, 'llo': 1, '##l': 1, '##lo': 1, '!': 1} - indices = learner.get_split_indices(word, curr_tokens, include_joiner, - joiner) - self.assertEqual(indices, expected_indices) - - -class GetSearchThreshsTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'ThreshsWithinMinMax', 'upper_thresh': 200, - 'lower_thresh': 5, 'expected_upper': 200, 'expected_lower': 5}, - {'testcase_name': 'ThreshsOutsideMinMax', 'upper_thresh': 10000, - 'lower_thresh': 2, 'expected_upper': 292, 'expected_lower': 3}) - - def testGetSearchThreshs(self, upper_thresh, lower_thresh, expected_upper, - expected_lower): - word_counts = [('apple', 3), ('banana', 292), ('cucumber', 5)] - upper, lower = learner.get_search_threshs(word_counts, upper_thresh, - lower_thresh) - self.assertEqual(upper, expected_upper) - self.assertEqual(lower, expected_lower) - - -class GetInputWordsTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'TokenTooLong', - 'word_counts': [('blah', 1), ('blehhhhhhhh', 2)], - 'expected_counts': [('blah', 1)]}, - {'testcase_name': 'TokenInReserved', - 'word_counts': [('q', 1), ('r', 2), ('<s>', 35), ('t', 3), ('u', 4)], - 'expected_counts': [('q', 1), ('r', 2), ('t', 3), ('u', 4)]}) - - def testGetInputWords(self, word_counts, expected_counts): - max_token_length = 10 - reserved_tokens = ['<unk>', '<s>', '</s>'] - new_counts = learner.get_input_words(word_counts, reserved_tokens, - max_token_length) - - self.assertEqual(new_counts, expected_counts) - - -class GetAllowedCharsTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'MoreCharsThanMax', - 'word_counts': [('hello', 1), ('world', 1)], - 'expected_chars': {'l', 'o', 'd', 'e', 'h'}}, - {'testcase_name': 'DifferentFrequency', - 'word_counts': [('hello', 1), ('world', 2)], - 'expected_chars': {'l', 'o', 'd', 'r', 'w'}}) - - def testGetAllowedChars(self, word_counts, expected_chars): - max_unique_chars = 5 - chars = learner.get_allowed_chars(word_counts, max_unique_chars) - self.assertEqual(chars, expected_chars) - - -class FilterInputWordsTest(parameterized.TestCase): - @parameterized.named_parameters( - { - 'testcase_name': 'TokenHasUnallowedChars', - 'word_counts': [('bad', 1), ('had', 2), ('bag', 1), ('cat', 5)], - 'expected_counts': [('had', 2), ('bad', 1), ('bag', 1)] - }, { - 'testcase_name': 'TooManyInputTokens', - 'word_counts': [('bad', 1), ('had', 2), ('bag', 1), ('bed', 5), - ('head', 7)], - 'expected_counts': [('head', 7), ('bed', 5), ('had', 2), ('bad', 1)] - }) - - def testFilterInputWords(self, word_counts, expected_counts): - allowed_chars = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} - max_input_tokens = 4 - filtered_counts = learner.filter_input_words(word_counts, allowed_chars, - max_input_tokens) - self.assertEqual(filtered_counts, expected_counts) - - -class GenerateFinalVocabularyTest(absltest.TestCase): - - def setUp(self): - super(GenerateFinalVocabularyTest, self).setUp() - self.reserved_tokens = ['<unk>', '<s>', '</s>'] - self.char_tokens = ['c', 'a', 'b'] - self.curr_tokens = {'my': 2, 'na': 5, '##me': 1, 'is': 2} - self.vocab_array = ['<unk>', '<s>', '</s>', 'a', 'b', 'c', 'na', 'is', 'my', - '##me'] - - def testGenerateFinalVocab(self): - final_vocab = learner.generate_final_vocabulary(self.reserved_tokens, - self.char_tokens, - self.curr_tokens) - self.assertEqual(final_vocab, self.vocab_array) - - -class LearnWithThreshTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'LearnWithOneIteration', - 'word_counts': [('apple', 1), ('app', 1)], - 'thresh': 1, - 'expected_vocab': ['a', 'e', 'l', 'p', 'app', 'apple', 'le', 'ple', 'pp', - 'pple'], - 'params': learner.Params(upper_thresh=4, lower_thresh=1, - num_iterations=1, max_input_tokens=1000, - max_token_length=50, max_unique_chars=5, - vocab_size=10, slack_ratio=0, - include_joiner_token=False, joiner='##', - reserved_tokens=[])}, - {'testcase_name': 'LearnWithTwoIterations', - 'word_counts': [('apple', 1), ('app', 1)], - 'thresh': 1, - 'expected_vocab': ['a', 'e', 'l', 'p', 'app', 'apple'], - 'params': learner.Params(upper_thresh=4, lower_thresh=1, - num_iterations=2, max_input_tokens=1000, - max_token_length=50, max_unique_chars=5, - vocab_size=10, slack_ratio=0, - include_joiner_token=False, joiner='##', - reserved_tokens=[])}, - {'testcase_name': 'LearnWithHigherThresh', - 'word_counts': [('apple', 1), ('app', 2)], - 'thresh': 2, - 'expected_vocab': ['a', 'e', 'l', 'p', 'app', 'pp'], - 'params': learner.Params(upper_thresh=4, lower_thresh=1, - num_iterations=1, max_input_tokens=1000, - max_token_length=50, max_unique_chars=5, - vocab_size=10, slack_ratio=0, - include_joiner_token=False, joiner='##', - reserved_tokens=[])}) - - def testLearnWithThresh(self, word_counts, thresh, expected_vocab, params): - vocab = learner.learn_with_thresh(word_counts, thresh, params) - self.assertEqual(vocab, expected_vocab) - - -class LearnBinarySearchTest(parameterized.TestCase): - @parameterized.named_parameters( - {'testcase_name': 'ReachesVocabSize', - 'word_counts': [('apple', 2), ('peach', 1), ('pear', 1)], - 'lower': 1, 'upper': 10, 'delta': 0, - 'expected_vocab': ['a', 'c', 'e', 'h', 'l', 'p', 'r', 'apple', 'peach', - 'pear'], - 'params': learner.Params(upper_thresh=4, lower_thresh=1, - num_iterations=4, max_input_tokens=1000, - max_token_length=50, max_unique_chars=50, - vocab_size=10, slack_ratio=0, - include_joiner_token=False, joiner='##', - reserved_tokens=[])}, - {'testcase_name': 'VocabSizeWithinSlack', - 'word_counts': [('apple', 2), ('peach', 1), ('pear', 1), ('app', 2)], - 'lower': 1, 'upper': 10, 'delta': 6, - 'expected_vocab': ['a', 'c', 'e', 'h', 'l', 'p', 'r'], - 'params': learner.Params(upper_thresh=4, lower_thresh=1, - num_iterations=4, max_input_tokens=1000, - max_token_length=50, max_unique_chars=50, - vocab_size=12, slack_ratio=0.5, - include_joiner_token=False, joiner='##', - reserved_tokens=[])}) - - def testBinarySearch(self, word_counts, lower, upper, delta, expected_vocab, - params): - vocab = learner.learn_binary_search(word_counts, lower, upper, params) - self.assertAlmostEqual(len(vocab), params.vocab_size, delta=delta) - self.assertLessEqual(len(vocab), params.vocab_size) - self.assertEqual(vocab, expected_vocab) - - -class CountWordsTest(parameterized.TestCase): - - def test_count_lists(self): - data = [['aaa', 'bb', 'c'], ['aaa', 'aaa'], ['c']] - counts = learner.count_words(data) - - self.assertEqual(counts, collections.Counter({'aaa': 3, 'bb': 1, 'c': 2})) - - def test_count_numpy_gen(self): - - def get_words(): - yield np.array(['aaa', 'bb', 'c']) - yield np.array(['aaa', 'aaa']) - yield np.array(['c']) - - counts = learner.count_words(get_words()) - - self.assertEqual(counts, collections.Counter({'aaa': 3, 'bb': 1, 'c': 2})) - - def test_count_ragged_dataset(self): - ds = dataset_ops.DatasetV2.from_tensor_slices(['aaa bb c', 'aaa aaa', 'c']) - ds = ds.map(ragged_string_ops.string_split_v2) - - counts = learner.count_words(ds) - - self.assertEqual(counts, collections.Counter({'aaa': 3, 'bb': 1, 'c': 2})) - - -if __name__ == '__main__': - logging.getLogger().setLevel(logging.INFO) - absltest.main()
diff --git a/third_party/tensorflow-text/src/third_party/darts_clone/BUILD b/third_party/tensorflow-text/src/third_party/darts_clone/BUILD deleted file mode 100644 index 82bab3f..0000000 --- a/third_party/tensorflow-text/src/third_party/darts_clone/BUILD +++ /dev/null
@@ -1 +0,0 @@ -# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/tensorflow-text/src/third_party/darts_clone/BUILD.bzl b/third_party/tensorflow-text/src/third_party/darts_clone/BUILD.bzl deleted file mode 100644 index de6a309..0000000 --- a/third_party/tensorflow-text/src/third_party/darts_clone/BUILD.bzl +++ /dev/null
@@ -1,14 +0,0 @@ -"""Darts-clone is a clone of Darts (Double-ARray Trie System).""" - -package(default_visibility = ["//visibility:public"]) - -licenses(["notice"]) - -exports_files(["LICENSE"]) - -cc_library( - name = "darts_clone", - hdrs = [ - "include/darts.h", - ], -)
diff --git a/third_party/tensorflow-text/src/third_party/icu/BUILD b/third_party/tensorflow-text/src/third_party/icu/BUILD deleted file mode 100644 index 82bab3f..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/BUILD +++ /dev/null
@@ -1 +0,0 @@ -# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/tensorflow-text/src/third_party/icu/BUILD.bzl b/third_party/tensorflow-text/src/third_party/icu/BUILD.bzl deleted file mode 100644 index 85eb556c..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/BUILD.bzl +++ /dev/null
@@ -1,107 +0,0 @@ -"""Builds ICU library.""" - -package( - default_visibility = ["//visibility:public"], -) - -licenses(["notice"]) # Apache 2.0 - -exports_files([ - "icu4c/LICENSE", - "icu4j/main/shared/licenses/LICENSE", -]) - -cc_library( - name = "headers", - hdrs = glob(["icu4c/source/common/unicode/*.h"]), - includes = [ - "icu4c/source/common", - ], - deps = [ - ], -) - -cc_library( - name = "common", - hdrs = glob(["icu4c/source/common/unicode/*.h"]), - includes = [ - "icu4c/source/common", - ], - deps = [ - ":icuuc", - ], -) - -alias( - name = "nfkc", - actual = ":common", -) - -alias( - name = "nfkc_cf", - actual = ":common", -) - -cc_library( - name = "icuuc", - srcs = glob( - [ - "icu4c/source/common/*.c", - "icu4c/source/common/*.cpp", - "icu4c/source/stubdata/*.cpp", - ], - ), - hdrs = glob([ - "icu4c/source/common/*.h", - ]), - copts = [ - "-DU_COMMON_IMPLEMENTATION", - ] + select({ - ":android": [ - "-fdata-sections", - "-DU_HAVE_NL_LANGINFO_CODESET=0", - "-Wno-deprecated-declarations", - ], - ":apple": [ - "-Wno-shorten-64-to-32", - "-Wno-unused-variable", - ], - ":windows": [ - "/utf-8", - "/DLOCALE_ALLOW_NEUTRAL_NAMES=0", - ], - "//conditions:default": [], - }), - tags = ["requires-rtti"], - visibility = [ - "//visibility:private", - ], - deps = [ - ":headers", - ], -) - -cc_library( - name = "windows_static_link_data", - # Dynamic libraries currently not supported on Windows. - defines = ["U_STATIC_IMPLEMENTATION"], - linkopts = ["advapi32.lib"], - deps = [ - "@org_tensorflow_text//third_party/icu/data:icu_normalization_data", - ], -) - -config_setting( - name = "android", - values = {"crosstool_top": "//external:android/crosstool"}, -) - -config_setting( - name = "apple", - values = {"cpu": "darwin"}, -) - -config_setting( - name = "windows", - values = {"cpu": "x64_windows"}, -)
diff --git a/third_party/tensorflow-text/src/third_party/icu/data/BUILD b/third_party/tensorflow-text/src/third_party/icu/data/BUILD deleted file mode 100644 index ceaf4e7..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/data/BUILD +++ /dev/null
@@ -1,33 +0,0 @@ -"""ICU data build.""" - -licenses(["notice"]) # Apache 2.0 - -package( - default_visibility = ["//visibility:public"], -) - -exports_files(["LICENSE"]) - -# To replicate this pre-processing (if you want additional ICU data), do the -# following: -# -# Note: I had to build from master, but I'm sure any version after 64.2 would -# will work. -# -# For guidance, see: go/icu-data & -# http://google3/third_party/icu/g3data/icu_data.bzl?l=47&rcl=252943749 -# -# First, download, build, and install ICU. This installs tools such as makeconv. -# Then, run the following from your icu4c/source directory: -# $ ICU_DATA_FILTER_FILE=filters.json ./runConfigureICU Linux -# $ make clean && make -# $ cd data/out/tmp -# $ genccode icudt64l.dat -# $ echo 'U_CAPI const void * U_EXPORT2 uprv_getICUData_other() { return icudt64l_dat.bytes; }' >> icudt64l_dat.c -# This creates icudt64l_dat.c which you can rename and move in here. -cc_library( - name = "icu_normalization_data", - srcs = ["normalization_data.c"], - deps = ["@icu//:headers"], - alwayslink = 1, -)
diff --git a/third_party/tensorflow-text/src/third_party/icu/data/filters.json b/third_party/tensorflow-text/src/third_party/icu/data/filters.json deleted file mode 100644 index d3999a8..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/data/filters.json +++ /dev/null
@@ -1,6 +0,0 @@ -{ - "strategy": "additive", - "featureFilters": { - "normalization": "include" - } -}
diff --git a/third_party/tensorflow-text/src/third_party/icu/data/normalization_data.c b/third_party/tensorflow-text/src/third_party/icu/data/normalization_data.c deleted file mode 100644 index d868de87..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/data/normalization_data.c +++ /dev/null
@@ -1,10249 +0,0 @@ -#ifndef IN_GENERATED_CCODE -#define IN_GENERATED_CCODE -#define U_DISABLE_RENAMING 1 -#include "unicode/umachine.h" -#endif -U_CDECL_BEGIN -const struct { - double bogus; - uint8_t bytes[163760]; -} icudt64l_norm_dat={ 0.0, { -144,0,218,39,20,0,0,0,0,0,2,0,67,109,110,68, -1,0,0,0,3,0,0,0,32,67,111,112,121,114,105,103, -104,116,32,40,67,41,32,50,48,49,54,32,97,110,100,32, -108,97,116,101,114,58,32,85,110,105,99,111,100,101,44,32, -73,110,99,46,32,97,110,100,32,111,116,104,101,114,115,46, -32,76,105,99,101,110,115,101,32,38,32,116,101,114,109,115, -32,111,102,32,117,115,101,58,32,104,116,116,112,58,47,47, -119,119,119,46,117,110,105,99,111,100,101,46,111,114,103,47, -99,111,112,121,114,105,103,104,116,46,104,116,109,108,32,0, -3,0,0,0,28,0,0,0,96,0,0,0,46,0,0,0, -192,210,0,0,67,0,0,0,240,154,1,0,105,99,117,100, -116,54,52,108,47,110,102,107,99,46,110,114,109,0,105,99, -117,100,116,54,52,108,47,110,102,107,99,95,99,102,46,110, -114,109,0,105,99,117,100,116,54,52,108,47,117,116,115,52, -54,46,110,114,109,0,170,170,170,170,170,170,170,170,170,170, -32,0,218,39,20,0,0,0,0,0,2,0,78,114,109,50, -4,0,0,0,12,1,0,0,0,0,0,0,0,0,0,0, -80,0,0,0,172,105,0,0,60,209,0,0,60,210,0,0, -60,210,0,0,60,210,0,0,60,210,0,0,60,210,0,0, -160,0,0,0,160,0,0,0,172,10,0,0,88,41,0,0, -142,103,0,0,0,252,0,0,82,18,0,0,12,96,0,0, -232,101,0,0,142,103,0,0,0,3,0,0,0,0,0,0, -51,105,114,84,0,0,69,7,97,45,208,0,43,2,126,1, -0,0,64,0,123,0,187,0,251,0,59,1,123,1,183,1, -247,1,43,2,89,2,146,2,210,2,18,3,81,3,145,3, -209,3,16,4,77,4,140,4,43,2,43,2,198,4,6,5, -54,5,110,5,43,2,174,5,221,5,28,6,43,2,49,6, -111,6,157,6,43,2,202,6,10,7,71,7,103,7,166,7, -229,7,34,8,65,8,126,8,103,7,183,8,229,8,36,9, -43,2,94,9,117,9,181,9,204,9,11,10,43,2,65,10, -97,10,156,10,169,10,228,10,24,11,85,11,149,11,207,11, -234,11,43,2,37,12,51,12,115,12,146,12,200,12,5,13, -43,2,43,2,43,2,43,2,43,2,40,13,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,84,13,43,2,43,2,137,13, -43,2,43,2,167,13,43,2,209,13,43,2,43,2,43,2, -13,14,45,14,109,14,43,2,171,14,235,14,31,15,75,15, -70,8,43,2,43,2,127,15,185,15,249,15,50,16,114,16, -178,16,242,16,50,17,114,17,178,17,242,17,50,18,114,18, -178,18,241,18,49,19,97,19,161,19,225,19,33,20,84,20, -145,20,208,20,16,21,70,21,116,21,43,2,43,2,43,2, -43,2,159,21,223,21,31,22,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -83,22,96,22,43,2,151,22,43,2,43,2,43,2,43,2, -43,2,180,22,43,2,22,13,43,2,242,22,43,2,50,23, -43,2,43,2,114,23,146,23,210,23,18,24,82,24,146,24, -210,24,13,25,75,25,139,25,203,25,11,26,75,26,43,2, -139,26,203,26,11,27,75,27,139,27,203,27,11,28,75,28, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,139,28,201,28,233,28,43,2,27,29,43,2,76,29, -134,29,43,2,43,2,194,29,244,29,34,30,74,8,53,30, -43,2,43,2,69,30,133,30,43,2,188,30,43,2,153,29, -252,30,4,31,12,31,20,31,0,31,8,31,16,31,252,30, -4,31,12,31,20,31,0,31,8,31,16,31,252,30,4,31, -12,31,20,31,0,31,8,31,16,31,252,30,4,31,12,31, -20,31,0,31,8,31,16,31,252,30,4,31,12,31,20,31, -0,31,8,31,16,31,252,30,4,31,12,31,20,31,0,31, -8,31,16,31,252,30,4,31,12,31,20,31,0,31,8,31, -16,31,252,30,4,31,12,31,20,31,0,31,8,31,16,31, -252,30,4,31,12,31,20,31,0,31,8,31,16,31,252,30, -4,31,12,31,20,31,0,31,8,31,16,31,252,30,4,31, -12,31,20,31,0,31,8,31,16,31,252,30,4,31,12,31, -20,31,0,31,8,31,16,31,252,30,4,31,12,31,20,31, -0,31,8,31,16,31,252,30,4,31,12,31,20,31,0,31, -8,31,16,31,252,30,4,31,12,31,20,31,0,31,8,31, -16,31,252,30,4,31,12,31,20,31,0,31,8,31,16,31, -252,30,4,31,12,31,20,31,0,31,8,31,16,31,252,30, -4,31,12,31,20,31,0,31,8,31,16,31,252,30,4,31, -12,31,20,31,0,31,8,31,16,31,252,30,4,31,12,31, -20,31,0,31,8,31,16,31,252,30,4,31,12,31,20,31, -0,31,8,31,16,31,252,30,4,31,12,31,20,31,0,31, -8,31,16,31,252,30,4,31,12,31,20,31,0,31,8,31, -16,31,252,30,4,31,12,31,20,31,0,31,8,31,16,31, -252,30,4,31,12,31,20,31,0,31,8,31,72,31,43,2, -136,31,197,31,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,5,32,69,32,133,32,197,32, -5,33,69,33,133,33,197,33,5,34,69,34,131,34,181,34, -245,34,53,35,117,35,181,35,245,35,51,36,115,36,179,36, -240,36,48,37,112,37,175,37,238,37,46,38,110,38,173,38, -197,6,212,6,234,6,9,7,35,7,35,7,35,7,39,7, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,37,12, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,141,5,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,74,4, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -224,38,43,2,43,2,240,38,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,31,14,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,0,39,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,10,39,141,5, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,41,8,43,2,43,2,248,9,43,2,26,39, -39,39,51,39,43,2,43,2,43,2,43,2,82,4,43,2, -62,39,78,39,43,2,43,2,43,2,30,8,43,2,43,2, -43,2,43,2,94,39,43,2,43,2,43,2,105,39,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,112,39,43,2,43,2,43,2,43,2,123,39,138,39, -52,9,152,39,80,4,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,166,39,214,7,43,2,43,2,43,2, -43,2,43,2,182,39,197,39,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,20,9,205,39,221,39,43,2,43,2,43,2,248,9, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,231,39, -43,2,43,2,43,2,43,2,43,2,43,2,36,8,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,228,39,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,139,8, -43,2,43,2,43,2,43,8,40,8,43,2,43,2,43,2, -43,2,38,8,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -248,9,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,31,12,43,2,43,2,43,2,43,2,40,8,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,247,39,43,2, -43,2,43,2,24,15,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,252,39, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,11,40,27,40, -41,40,54,40,43,2,66,40,80,40,96,40,43,2,43,2, -43,2,43,2,67,13,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,21,22,37,22, -53,22,112,40,33,22,128,40,144,40,29,22,45,22,160,40, -176,40,192,40,208,40,21,22,37,22,53,22,220,40,236,40, -49,22,216,40,252,40,12,41,140,40,25,22,41,22,57,22, -21,22,37,22,53,22,112,40,33,22,49,22,144,40,29,22, -45,22,140,40,25,22,41,22,57,22,21,22,37,22,53,22, -28,41,44,41,60,41,76,41,92,41,50,41,66,41,82,41, -40,41,56,41,72,41,88,41,46,41,62,41,78,41,36,41, -52,41,68,41,108,41,124,41,130,41,126,41,146,41,154,41, -168,41,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,24,15,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -58,5,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,184,41,43,2,43,2,43,2,43,2,43,2, -43,2,196,41,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,212,41,228,41,244,41, -4,42,18,42,34,42,50,42,66,42,82,42,98,42,113,42, -98,42,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,129,42,145,42,161,42, -21,22,177,42,43,2,193,42,43,2,43,2,209,42,43,2, -43,2,43,2,43,2,43,2,43,2,225,42,241,42,1,43, -17,43,33,43,49,43,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,65,43,81,43,97,43, -113,43,129,43,145,43,161,43,177,43,193,43,209,43,225,43, -241,43,1,44,17,44,33,44,49,44,65,44,81,44,97,44, -113,44,129,44,145,44,161,44,177,44,193,44,209,44,225,44, -241,44,1,45,17,45,33,45,49,45,65,45,81,45,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,43,2,43,2,43,2, -43,2,43,2,43,2,43,2,43,2,8,4,40,4,208,0, -208,0,208,0,72,4,87,4,106,4,134,4,163,4,191,4, -220,4,249,4,24,5,53,5,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,79,5,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -102,5,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,113,5,142,5,174,5,206,5,208,0,208,0,208,0, -208,0,238,5,2,6,208,0,208,0,21,6,208,0,208,0, -53,6,69,6,101,6,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,133,6,165,6,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,4,0,8,0,12,0,1,0,1,0,16,0,80,0, -92,0,112,0,136,0,204,0,208,0,236,0,8,1,68,1, -72,1,92,1,116,1,128,1,164,1,228,1,1,0,236,1, -12,2,40,2,68,2,144,2,152,2,176,2,184,2,220,2, -1,0,1,0,1,0,1,0,1,0,1,0,244,2,52,3, -64,3,84,3,108,3,176,3,180,3,208,3,240,3,40,4, -48,4,68,4,92,4,104,4,140,4,204,4,1,0,212,4, -244,4,16,5,48,5,124,5,132,5,160,5,168,5,208,5, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -89,41,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -92,41,1,0,98,41,1,0,1,0,1,0,1,0,102,41, -1,0,1,0,109,41,113,41,116,41,123,41,1,0,1,0, -126,41,133,41,136,41,1,0,141,41,149,41,157,41,1,0, -84,18,90,18,174,10,96,18,196,10,206,10,232,5,216,10, -102,18,108,18,226,10,114,18,120,18,126,18,132,18,248,10, -1,0,138,18,144,18,150,18,2,11,24,11,42,11,1,0, -240,5,156,18,162,18,168,18,52,11,174,18,1,0,1,0, -180,18,186,18,74,11,192,18,96,11,106,11,244,5,116,11, -198,18,204,18,126,11,210,18,216,18,222,18,228,18,148,11, -1,0,234,18,240,18,246,18,158,11,180,11,198,11,1,0, -252,5,252,18,2,19,8,19,208,11,14,19,1,0,20,19, -26,19,32,19,230,11,252,11,39,19,45,19,50,19,56,19, -62,19,68,19,74,19,80,19,86,19,92,19,98,19,104,19, -1,0,1,0,18,12,32,12,110,19,116,19,122,19,128,19, -135,19,141,19,146,19,152,19,158,19,164,19,170,19,176,19, -182,19,188,19,195,19,201,19,206,19,212,19,1,0,1,0, -218,19,224,19,230,19,236,19,242,19,248,19,255,19,5,20, -10,20,1,0,164,41,170,41,17,20,23,20,29,20,35,20, -1,0,40,20,46,20,53,20,59,20,64,20,70,20,177,41, -183,41,1,0,1,0,76,20,82,20,89,20,95,20,100,20, -106,20,188,41,1,0,1,0,46,12,60,12,112,20,118,20, -124,20,130,20,1,0,1,0,136,20,142,20,149,20,155,20, -160,20,166,20,74,12,84,12,172,20,178,20,185,20,191,20, -94,12,104,12,197,20,203,20,208,20,214,20,1,0,1,0, -114,12,124,12,134,12,144,12,220,20,226,20,232,20,238,20, -244,20,250,20,1,21,7,21,12,21,18,21,24,21,30,21, -36,21,42,21,48,21,54,21,60,21,66,21,72,21,194,41, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -154,12,180,12,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,206,12, -232,12,1,0,1,0,1,0,1,0,1,0,1,0,0,6, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,96,32,96,46,96,198,41,204,41,210,41,216,41,222,41, -228,41,78,21,84,21,90,21,96,21,102,21,108,21,114,21, -120,21,128,21,138,21,148,21,158,21,168,21,178,21,188,21, -198,21,1,0,208,21,218,21,228,21,238,21,247,21,253,21, -1,0,1,0,2,22,8,22,14,22,20,22,2,13,12,13, -29,22,39,22,47,22,53,22,59,22,234,41,240,41,246,41, -64,22,70,22,1,0,1,0,76,22,82,22,90,22,100,22, -109,22,115,22,121,22,127,22,132,22,138,22,144,22,150,22, -156,22,162,22,168,22,174,22,180,22,186,22,192,22,198,22, -204,22,210,22,216,22,222,22,228,22,234,22,240,22,246,22, -252,22,2,23,8,23,14,23,20,23,26,23,32,23,38,23, -1,0,1,0,44,23,50,23,1,0,1,0,1,0,1,0, -1,0,1,0,22,13,32,13,42,13,52,13,58,23,68,23, -78,23,88,23,62,13,72,13,98,23,108,23,116,23,122,23, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -4,6,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,252,41,1,42, -4,42,8,42,33,248,41,248,81,248,12,42,16,42,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,20,42, -26,42,32,42,38,42,44,42,50,42,1,0,1,0,57,42, -60,42,194,41,64,42,69,42,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,253, -204,253,204,253,204,253,204,253,204,255,204,253,204,253,204,253, -204,253,204,253,204,253,204,253,204,255,204,255,204,253,204,255, -204,253,204,255,204,253,204,253,208,255,184,255,184,255,184,255, -184,255,208,255,176,253,184,255,184,255,184,255,184,255,184,255, -148,255,148,255,184,253,184,253,184,253,184,253,148,253,148,253, -184,255,184,255,184,255,184,255,184,253,184,253,184,255,184,253, -184,253,184,255,184,255,2,254,2,254,2,254,2,254,2,252, -184,255,184,255,184,255,184,255,204,255,204,255,204,255,234,101, -240,101,204,253,246,101,252,101,224,253,204,255,184,255,184,255, -184,255,204,255,204,255,204,255,184,255,184,255,1,0,204,255, -204,255,204,255,184,255,184,255,184,255,184,255,204,255,208,255, -184,255,184,255,204,255,210,255,212,255,212,255,210,255,212,255, -212,255,210,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,1,0,1,0,73,42,1,0,1,0,1,0,1,0, -1,0,76,42,1,0,1,0,1,0,83,42,1,0,1,0, -1,0,1,0,116,41,88,42,129,23,97,42,135,23,141,23, -147,23,1,0,153,23,1,0,159,23,165,23,173,23,8,6, -1,0,1,0,1,0,36,6,1,0,52,6,1,0,72,6, -1,0,1,0,1,0,1,0,1,0,100,6,1,0,116,6, -1,0,1,0,1,0,120,6,1,0,1,0,1,0,144,6, -181,23,187,23,82,13,193,23,92,13,199,23,207,23,164,6, -1,0,1,0,1,0,196,6,1,0,212,6,1,0,236,6, -1,0,1,0,1,0,1,0,1,0,12,7,1,0,28,7, -1,0,1,0,1,0,36,7,1,0,1,0,1,0,68,7, -102,13,120,13,215,23,221,23,138,13,1,0,9,249,49,249, -144,248,60,96,72,96,129,249,73,249,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,73,248,120,248, -121,248,1,0,101,42,248,247,1,0,1,0,1,0,105,42, -1,0,1,0,1,0,1,0,1,0,1,0,227,23,233,23, -1,0,239,23,1,0,1,0,92,7,245,23,1,0,1,0, -1,0,1,0,251,23,1,24,7,24,1,0,96,7,1,0, -1,0,104,7,1,0,108,7,120,7,128,7,132,7,13,24, -148,7,1,0,1,0,1,0,152,7,1,0,1,0,1,0, -1,0,156,7,1,0,1,0,1,0,172,7,1,0,1,0, -1,0,176,7,1,0,180,7,1,0,1,0,184,7,1,0, -1,0,192,7,1,0,196,7,208,7,216,7,220,7,19,24, -236,7,1,0,1,0,1,0,240,7,1,0,1,0,1,0, -244,7,1,0,1,0,1,0,4,8,1,0,1,0,1,0, -8,8,1,0,12,8,1,0,1,0,25,24,31,24,1,0, -37,24,1,0,1,0,16,8,43,24,1,0,1,0,1,0, -1,0,49,24,55,24,61,24,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,20,8,24,8,67,24,73,24,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,79,24,85,24,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,91,24,97,24,103,24,109,24,1,0,1,0,115,24, -121,24,28,8,32,8,127,24,133,24,139,24,145,24,151,24, -157,24,1,0,1,0,163,24,169,24,175,24,181,24,187,24, -193,24,36,8,40,8,199,24,205,24,211,24,217,24,223,24, -229,24,235,24,241,24,247,24,253,24,3,25,9,25,1,0, -1,0,15,25,21,25,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,109,42,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,204,255,204,255,204,255, -204,255,184,255,204,255,204,255,204,255,188,255,184,255,204,255, -204,255,204,255,204,255,204,255,204,255,184,255,184,255,184,255, -184,255,184,255,184,255,204,255,204,255,184,255,204,255,204,255, -188,255,200,255,204,255,20,254,22,254,24,254,26,254,28,254, -30,254,32,254,34,254,36,254,38,254,38,254,40,254,42,254, -44,254,1,0,46,254,1,0,48,254,50,254,1,0,204,255, -184,255,1,0,36,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,60,254,62,254,64,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,26,25,32,25,39,25, -45,25,51,25,44,8,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,56,8,1,0,60,8,54,254,56,254, -58,254,60,254,62,254,64,254,66,254,68,254,204,253,204,253, -184,253,184,255,204,255,204,255,204,255,204,255,204,255,184,255, -204,255,204,255,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,70,254,1,0,1,0,1,0,1,0, -115,42,121,42,127,42,133,42,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,57,25,64,8,63,25,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,68,8,69,25,1,0, -72,8,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,0,1,0,204,255,204,255,204,255,204,255,184,255,204,255, -1,0,1,0,204,255,204,255,1,0,184,255,204,255,204,255, -184,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,72,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,184,255,204,255,204,255,184,255,204,255, -204,255,184,255,184,255,184,255,204,255,184,255,184,255,204,255, -184,255,204,255,204,255,184,255,204,255,184,255,204,255,184,255, -204,255,184,255,204,255,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -184,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,184,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,204,255,204,255,204,255,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,0,204,255,204,255,204,255,1,0,204,255,204,255,204,255, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,184,255,184,255,184,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,1,0,184,255,204,255,204,255,184,255,204,255,204,255, -184,255,204,255,204,255,204,255,184,255,184,255,184,255,54,254, -56,254,58,254,204,255,204,255,204,255,184,255,204,255,204,255, -184,255,184,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,76,8, -75,25,1,0,1,0,1,0,1,0,1,0,1,0,80,8, -81,25,1,0,84,8,87,25,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,14,252,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,18,254,1,0,1,0,1,0,204,255,184,255,204,255, -204,255,1,0,1,0,1,0,138,42,144,42,150,42,156,42, -162,42,168,42,174,42,180,42,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -14,254,1,0,0,252,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,88,8,1,0,1,0,1,0,93,25,99,25, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,0,252,1,0,1,0,1,0,1,0,186,42, -192,42,1,0,198,42,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,42,1,0,1,0, -210,42,1,0,1,0,1,0,1,0,1,0,14,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -216,42,222,42,228,42,1,0,1,0,234,42,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,14,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,96,8,105,25,1,0,1,0,111,25, -117,25,18,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,0,252,0,252,1,0,1,0,1,0,1,0, -240,42,246,42,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,108,8,1,0,123,25,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -0,252,1,0,1,0,1,0,1,0,1,0,1,0,112,8, -120,8,1,0,1,0,129,25,135,25,141,25,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -0,252,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,124,8,1,0,147,25,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -168,254,182,252,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,1,0, -1,0,128,8,153,25,1,0,0,252,1,0,1,0,1,0, -132,8,159,25,165,25,1,0,148,13,173,25,1,0,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,252, -0,252,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,18,254,1,0,0,252, -1,0,1,0,1,0,1,0,1,0,1,0,144,8,152,8, -1,0,1,0,181,25,187,25,193,25,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,252, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,252,1,0,1,0,1,0,1,0,0,252,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,156,8, -199,25,1,0,158,13,207,25,215,25,0,252,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,253,42,1,0,1,0,1,0,1,0,206,254,206,254, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,214,254,214,254,214,254,214,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,3,43,1,0,1,0,1,0,1,0,236,254,236,254, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,244,254,244,254,244,254,244,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,9,43,15,43,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,241,249,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,1,0,184,255,1,0,176,255,1,0, -1,0,1,0,1,0,1,0,1,0,21,43,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,27,43, -1,0,1,0,1,0,1,0,33,43,1,0,1,0,1,0, -1,0,39,43,1,0,1,0,1,0,1,0,45,43,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,51,43,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,2,255,4,255,4,102,8,255,12,102, -56,43,68,43,76,43,88,43,4,255,4,255,4,255,4,255, -1,0,1,0,4,255,20,102,204,255,204,255,18,254,1,0, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,97,43,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,103,43, -1,0,1,0,1,0,1,0,109,43,1,0,1,0,1,0, -1,0,115,43,1,0,1,0,1,0,1,0,121,43,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,127,43,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,168,8,221,25,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,0,252,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,1,0, -18,254,18,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,184,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,249,248,1,0,1,0,1,0, -2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,2,0,2,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,200,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,188,255, -204,255,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,204,255,204,255,204,255,204,255,184,255, -184,255,184,255,184,255,184,255,184,255,204,255,204,255,184,255, -1,0,1,0,1,0,1,0,1,0,172,8,227,25,176,8, -233,25,180,8,239,25,184,8,245,25,188,8,251,25,1,0, -1,0,192,8,1,26,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,14,254,0,252,1,0,1,0, -1,0,1,0,196,8,7,26,200,8,13,26,204,8,208,8, -19,26,25,26,212,8,31,26,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,184,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,1,0, -2,254,184,255,184,255,184,255,184,255,184,255,204,255,204,255, -184,255,184,255,184,255,184,255,204,255,1,0,2,254,2,254, -2,254,2,254,2,254,2,254,2,254,1,0,1,0,1,0, -1,0,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,1,0,1,0,1,0,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,132,43,136,43,140,43,1,0,144,43,148,43, -153,43,156,43,160,43,164,43,168,43,172,43,176,43,180,43, -184,43,1,0,188,43,193,43,196,43,200,43,204,43,208,43, -212,43,98,41,217,43,221,43,225,43,228,43,232,43,236,43, -241,43,245,43,249,43,252,43,1,0,0,44,4,44,9,44, -136,41,13,44,9,248,9,248,16,44,20,44,24,44,25,248, -29,44,32,44,65,248,37,44,41,44,45,44,49,44,53,44, -56,44,8,42,24,44,32,44,37,44,41,44,60,44,49,44, -53,44,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,65,44,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,69,44,72,44,77,44,81,44,249,43,84,44, -89,44,93,44,97,44,101,44,105,44,109,44,153,248,113,44, -117,44,209,248,121,44,125,44,129,44,133,44,137,44,141,44, -145,44,149,44,153,44,157,44,161,44,165,44,169,44,173,44, -177,44,181,44,184,44,189,44,193,44,196,44,201,44,204,255, -204,255,184,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,184,255,204,255,204,255,212,255,172,255,184,255,148,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,208,255,200,255,200,255, -184,255,1,0,204,255,210,255,184,255,204,255,184,255,36,26, -42,26,48,26,54,26,61,26,67,26,73,26,79,26,87,26, -97,26,104,26,110,26,116,26,122,26,128,26,134,26,141,26, -147,26,152,26,158,26,166,26,176,26,186,26,196,26,204,26, -210,26,216,26,222,26,231,26,241,26,249,26,255,26,4,27, -10,27,16,27,22,27,28,27,34,27,40,27,46,27,53,27, -59,27,64,27,70,27,76,27,82,27,90,27,100,27,108,27, -114,27,120,27,126,27,132,27,138,27,168,13,178,13,146,27, -156,27,164,27,170,27,176,27,182,27,188,27,194,27,200,27, -206,27,213,27,219,27,224,27,230,27,236,27,242,27,248,27, -254,27,4,28,10,28,18,28,28,28,38,28,48,28,58,28, -68,28,78,28,88,28,97,28,103,28,109,28,115,28,120,28, -126,28,188,13,198,13,134,28,144,28,152,28,158,28,164,28, -170,28,208,13,218,13,178,28,188,28,198,28,208,28,218,28, -228,28,236,28,242,28,248,28,254,28,4,29,10,29,16,29, -22,29,28,29,34,29,40,29,46,29,52,29,58,29,66,29, -76,29,86,29,96,29,104,29,110,29,117,29,123,29,128,29, -134,29,140,29,146,29,152,29,158,29,164,29,170,29,177,29, -183,29,189,29,195,29,201,29,207,29,212,29,218,29,224,29, -230,29,237,29,243,29,249,29,255,29,4,30,10,30,16,30, -22,30,205,44,84,96,1,0,1,0,1,0,1,0,228,13, -242,13,28,30,34,30,42,30,52,30,62,30,72,30,82,30, -92,30,102,30,112,30,122,30,132,30,142,30,152,30,162,30, -172,30,182,30,192,30,202,30,212,30,222,30,232,30,0,14, -10,14,240,30,246,30,252,30,2,31,10,31,20,31,30,31, -40,31,50,31,60,31,70,31,80,31,90,31,100,31,108,31, -114,31,120,31,126,31,20,14,30,14,132,31,138,31,146,31, -156,31,166,31,176,31,186,31,196,31,206,31,216,31,226,31, -236,31,246,31,0,32,10,32,20,32,30,32,40,32,50,32, -60,32,70,32,80,32,88,32,94,32,100,32,106,32,114,32, -124,32,134,32,144,32,154,32,164,32,174,32,184,32,194,32, -204,32,212,32,218,32,225,32,231,32,236,32,242,32,248,32, -254,32,1,0,1,0,1,0,1,0,1,0,1,0,40,14, -62,14,86,14,100,14,114,14,128,14,142,14,156,14,168,14, -190,14,214,14,228,14,242,14,0,15,14,15,28,15,40,15, -54,15,7,33,17,33,27,33,37,33,1,0,1,0,68,15, -82,15,47,33,57,33,67,33,77,33,1,0,1,0,96,15, -118,15,142,15,156,15,170,15,184,15,198,15,212,15,224,15, -246,15,14,16,28,16,42,16,56,16,70,16,84,16,96,16, -114,16,87,33,97,33,107,33,117,33,127,33,137,33,132,16, -150,16,147,33,157,33,167,33,177,33,187,33,197,33,168,16, -182,16,207,33,217,33,227,33,237,33,1,0,1,0,196,16, -210,16,247,33,1,34,11,34,21,34,1,0,1,0,224,16, -242,16,31,34,41,34,51,34,61,34,71,34,81,34,1,0, -4,17,1,0,91,34,1,0,101,34,1,0,111,34,22,17, -44,17,68,17,82,17,96,17,110,17,124,17,138,17,150,17, -172,17,196,17,210,17,224,17,238,17,252,17,10,18,22,18, -92,96,119,34,100,96,32,18,108,96,125,34,116,96,131,34, -124,96,137,34,132,96,42,18,140,96,1,0,1,0,144,34, -154,34,169,34,185,34,201,34,217,34,233,34,249,34,4,35, -14,35,29,35,45,35,61,35,77,35,93,35,109,35,120,35, -130,35,145,35,161,35,177,35,193,35,209,35,225,35,236,35, -246,35,5,36,21,36,37,36,53,36,69,36,85,36,96,36, -106,36,121,36,137,36,153,36,169,36,185,36,201,36,212,36, -222,36,237,36,253,36,13,37,29,37,45,37,61,37,71,37, -77,37,85,37,92,37,101,37,1,0,52,18,111,37,119,37, -125,37,131,37,148,96,136,37,210,44,216,44,210,44,220,44, -228,44,145,37,152,37,161,37,1,0,62,18,171,37,179,37, -156,96,185,37,164,96,190,37,238,44,248,44,2,45,197,37, -203,37,211,37,174,96,1,0,1,0,219,37,227,37,235,37, -241,37,247,37,184,96,1,0,12,45,22,45,32,45,253,37, -3,38,11,38,194,96,19,38,25,38,31,38,39,38,47,38, -53,38,59,38,204,96,65,38,42,45,54,45,63,45,1,0, -1,0,73,38,80,38,89,38,1,0,72,18,99,38,107,38, -212,96,113,38,220,96,118,38,68,45,74,45,1,0,85,45, -93,45,89,41,89,41,89,41,89,41,89,41,89,41,89,41, -89,41,89,41,1,0,1,0,1,0,1,0,1,0,1,0, -241,249,1,0,1,0,1,0,1,0,1,0,96,45,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,103,45,107,45,113,45,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,89,41,1,0, -1,0,1,0,121,45,127,45,1,0,135,45,141,45,1,0, -1,0,1,0,1,0,149,45,1,0,154,45,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,161,45,167,45,173,45, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,179,45,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,89,41,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,189,45,56,44, -1,0,1,0,193,45,197,45,201,45,205,45,209,45,213,45, -217,45,221,45,224,45,229,45,233,45,236,45,189,45,133,41, -109,41,113,41,193,45,197,45,201,45,205,45,209,45,213,45, -217,45,221,45,224,45,229,45,233,45,1,0,98,41,236,43, -136,41,64,42,241,43,252,41,0,44,60,42,4,44,236,45, -16,44,194,41,20,44,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,240,45,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,204,255, -2,254,2,254,204,255,204,255,204,255,204,255,2,254,2,254, -2,254,204,255,204,255,1,0,1,0,1,0,1,0,204,255, -1,0,1,0,1,0,2,254,2,254,204,255,184,255,204,255, -2,254,2,254,184,255,184,255,184,255,184,255,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,246,45,254,45, -6,46,10,46,1,0,16,46,24,46,33,46,1,0,36,46, -252,43,160,43,160,43,160,43,252,41,43,46,164,43,164,43, -176,43,60,42,1,0,184,43,46,46,1,0,1,0,196,43, -53,46,200,43,200,43,200,43,1,0,1,0,56,46,62,46, -70,46,1,0,76,46,1,0,80,46,1,0,76,46,1,0, -172,43,228,96,140,43,6,46,1,0,236,43,148,43,84,46, -1,0,180,43,136,41,89,46,93,46,97,46,101,46,56,44, -1,0,104,46,113,46,41,44,117,46,121,46,125,46,1,0, -1,0,1,0,1,0,144,43,232,43,236,43,56,44,4,42, -1,0,1,0,1,0,1,0,1,0,1,0,129,46,137,46, -145,46,155,46,163,46,171,46,179,46,187,46,195,46,203,46, -211,46,219,46,227,46,235,46,243,46,251,46,164,43,0,47, -6,47,14,47,20,47,24,47,30,47,38,47,48,47,54,47, -58,47,64,47,176,43,6,46,144,43,180,43,56,44,72,47, -78,47,86,47,32,44,92,47,98,47,106,47,116,47,64,42, -122,47,128,47,60,42,72,44,232,43,4,44,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,137,47, -1,0,1,0,1,0,1,0,1,0,1,0,216,8,1,0, -220,8,1,0,224,8,1,0,1,0,1,0,1,0,1,0, -125,38,131,38,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,137,38,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,143,38,149,38, -155,38,228,8,1,0,232,8,1,0,236,8,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,240,8,161,38,1,0,1,0,1,0,244,8,167,38, -1,0,248,8,173,38,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,252,8,179,38,0,9,185,38,1,0,1,0,1,0, -1,0,1,0,145,47,151,47,1,0,159,47,165,47,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,4,9,1,0,1,0,1,0,191,38,1,0, -8,9,197,38,12,9,1,0,203,38,16,9,209,38,1,0, -1,0,1,0,20,9,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,215,38,24,9,221,38, -1,0,28,9,32,9,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,227,38,233,38,239,38,245,38,251,38,36,9, -40,9,1,39,7,39,44,9,48,9,13,39,19,39,52,9, -56,9,60,9,64,9,1,0,1,0,25,39,31,39,68,9, -72,9,37,39,43,39,76,9,80,9,49,39,55,39,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,84,9,88,9, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,92,9, -1,0,1,0,1,0,1,0,1,0,96,9,100,9,1,0, -104,9,61,39,67,39,73,39,79,39,1,0,1,0,108,9, -112,9,116,9,120,9,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,85,39,91,39,97,39,103,39,1,0, -1,0,1,0,1,0,1,0,1,0,109,39,115,39,121,39, -127,39,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,173,47,177,47,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,133,41,109,41,113,41,193,45, -197,45,201,45,205,45,209,45,213,45,181,47,187,47,193,47, -199,47,205,47,211,47,217,47,223,47,229,47,235,47,241,47, -247,47,255,47,7,48,15,48,23,48,31,48,39,48,47,48, -55,48,63,48,73,48,83,48,93,48,103,48,113,48,123,48, -133,48,143,48,153,48,163,48,173,48,179,48,185,48,191,48, -197,48,203,48,209,48,215,48,221,48,227,48,235,48,243,48, -251,48,3,49,11,49,19,49,27,49,35,49,43,49,51,49, -59,49,67,49,75,49,83,49,91,49,99,49,107,49,115,49, -123,49,131,49,139,49,147,49,155,49,163,49,171,49,179,49, -187,49,195,49,203,49,211,49,219,49,227,49,235,49,243,49, -251,49,3,50,132,43,140,43,6,46,144,43,148,43,84,46, -156,43,160,43,164,43,168,43,172,43,176,43,180,43,184,43, -188,43,196,43,53,46,200,43,10,50,204,43,208,43,20,47, -212,43,54,47,14,50,76,46,98,41,228,43,72,44,232,43, -236,43,84,44,252,43,252,41,56,44,4,42,0,44,60,42, -4,44,236,45,136,41,16,44,19,50,8,42,194,41,20,44, -24,44,32,44,12,42,64,42,16,42,184,44,189,45,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,23,50,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,32,50,40,50,46,50,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -55,50,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,4,42,20,47,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,137,249,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,61,50,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,65,50,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,69,50, -73,50,77,50,81,50,85,50,89,50,93,50,97,50,101,50, -105,50,109,50,113,50,117,50,121,50,125,50,129,50,133,50, -137,50,141,50,145,50,149,50,153,50,157,50,161,50,165,50, -169,50,173,50,177,50,181,50,185,50,189,50,193,50,197,50, -201,50,205,50,209,50,213,50,217,50,221,50,225,50,229,50, -233,50,237,50,241,50,245,50,249,50,253,50,1,51,5,51, -9,51,13,51,17,51,21,51,25,51,29,51,33,51,37,51, -41,51,45,51,49,51,53,51,57,51,61,51,65,51,69,51, -73,51,77,51,81,51,85,51,89,51,93,51,97,51,101,51, -105,51,109,51,113,51,117,51,121,51,125,51,129,51,133,51, -137,51,141,51,145,51,149,51,153,51,157,51,161,51,165,51, -169,51,173,51,177,51,181,51,185,51,189,51,193,51,197,51, -201,51,205,51,209,51,213,51,217,51,221,51,225,51,229,51, -233,51,237,51,241,51,245,51,249,51,253,51,1,52,5,52, -9,52,13,52,17,52,21,52,25,52,29,52,33,52,37,52, -41,52,45,52,49,52,53,52,57,52,61,52,65,52,69,52, -73,52,77,52,81,52,85,52,89,52,93,52,97,52,101,52, -105,52,109,52,113,52,117,52,121,52,125,52,129,52,133,52, -137,52,141,52,145,52,149,52,153,52,157,52,161,52,165,52, -169,52,173,52,177,52,181,52,185,52,189,52,193,52,197,52, -201,52,205,52,209,52,213,52,217,52,221,52,225,52,229,52, -233,52,237,52,241,52,245,52,249,52,253,52,1,53,5,53, -9,53,13,53,17,53,21,53,25,53,29,53,33,53,37,53, -41,53,45,53,49,53,53,53,57,53,61,53,65,53,69,53, -73,53,77,53,81,53,85,53,89,53,93,53,97,53,101,53, -105,53,109,53,113,53,117,53,121,53,125,53,129,53,133,53, -137,53,141,53,145,53,149,53,153,53,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,89,41, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,180,255,200,255,208,255,188,255,192,255,192,255,1,0, -1,0,1,0,1,0,1,0,1,0,217,248,1,0,161,50, -157,53,161,53,1,0,1,0,1,0,1,0,1,0,1,0, -124,9,1,0,1,0,1,0,1,0,128,9,133,39,132,9, -139,39,136,9,145,39,140,9,151,39,144,9,157,39,148,9, -163,39,152,9,169,39,156,9,175,39,160,9,181,39,164,9, -187,39,168,9,193,39,172,9,199,39,1,0,176,9,205,39, -180,9,211,39,184,9,217,39,1,0,1,0,1,0,1,0, -1,0,188,9,223,39,229,39,196,9,235,39,241,39,204,9, -247,39,253,39,212,9,3,40,9,40,220,9,15,40,21,40, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,27,40,1,0,1,0,1,0, -1,0,16,252,16,252,164,53,170,53,228,9,33,40,177,53, -1,0,1,0,1,0,1,0,1,0,1,0,232,9,1,0, -1,0,1,0,1,0,236,9,39,40,240,9,45,40,244,9, -51,40,248,9,57,40,252,9,63,40,0,10,69,40,4,10, -75,40,8,10,81,40,12,10,87,40,16,10,93,40,20,10, -99,40,24,10,105,40,1,0,28,10,111,40,32,10,117,40, -36,10,123,40,1,0,1,0,1,0,1,0,1,0,40,10, -129,40,135,40,48,10,141,40,147,40,56,10,153,40,159,40, -64,10,165,40,171,40,72,10,177,40,183,40,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,80,10, -84,10,88,10,92,10,1,0,189,40,1,0,1,0,195,40, -201,40,207,40,213,40,1,0,1,0,96,10,219,40,182,53, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,188,53,192,53,26,102,196,53,30,102,34,102,200,53, -204,53,208,53,38,102,42,102,46,102,50,102,54,102,58,102, -213,53,216,53,220,53,224,53,229,53,232,53,236,53,240,53, -244,53,248,53,252,53,0,54,4,54,8,54,12,54,62,102, -66,102,70,102,74,102,78,102,82,102,86,102,90,102,94,102, -98,102,102,102,106,102,110,102,114,102,118,102,122,102,126,102, -130,102,134,102,138,102,142,102,17,54,21,54,25,54,29,54, -33,54,37,54,41,54,45,54,49,54,53,54,57,54,61,54, -65,54,69,54,73,54,77,54,81,54,85,54,89,54,93,54, -97,54,101,54,105,54,109,54,113,54,117,54,121,54,125,54, -129,54,133,54,137,54,141,54,145,54,149,54,153,54,157,54, -161,54,165,54,169,54,173,54,177,54,181,54,185,54,1,0, -1,0,1,0,69,50,93,50,189,54,193,54,197,54,201,54, -205,54,209,54,85,50,213,54,217,54,221,54,225,54,101,50, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -229,54,237,54,245,54,253,54,5,55,13,55,21,55,29,55, -37,55,45,55,53,55,61,55,69,55,77,55,235,96,245,96, -255,96,9,97,19,97,29,97,39,97,49,97,59,97,69,97, -79,97,89,97,99,97,109,97,119,97,129,97,145,97,1,0, -85,55,93,55,101,55,109,55,117,55,125,55,133,55,141,55, -149,55,157,55,165,55,173,55,181,55,189,55,197,55,205,55, -213,55,221,55,229,55,237,55,245,55,253,55,5,56,13,56, -21,56,29,56,37,56,45,56,53,56,61,56,69,56,77,56, -85,56,93,56,101,56,109,56,117,56,121,56,77,51,125,56, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -128,56,137,56,143,56,149,56,155,56,161,56,167,56,173,56, -179,56,185,56,191,56,197,56,203,56,209,56,215,56,221,56, -188,53,196,53,200,53,208,53,216,53,220,53,232,53,240,53, -244,53,252,53,0,54,4,54,8,54,12,54,158,97,164,97, -170,97,176,97,182,97,188,97,194,97,200,97,206,97,212,97, -218,97,224,97,230,97,236,97,242,97,254,97,8,98,1,0, -69,50,93,50,189,54,193,54,227,56,231,56,235,56,113,50, -239,56,161,50,105,51,153,51,149,51,109,51,221,52,193,50, -97,51,243,56,247,56,251,56,255,56,3,57,7,57,11,57, -15,57,19,57,23,57,217,50,27,57,31,57,35,57,39,57, -43,57,47,57,51,57,55,57,197,54,201,54,205,54,59,57, -63,57,67,57,71,57,75,57,79,57,83,57,87,57,91,57, -95,57,99,57,105,57,111,57,117,57,123,57,129,57,135,57, -141,57,147,57,153,57,159,57,165,57,171,57,177,57,183,57, -189,57,195,57,201,57,207,57,213,57,219,57,225,57,231,57, -237,57,243,57,251,57,3,58,10,58,16,58,24,58,30,58, -39,58,43,58,46,58,51,58,55,58,58,58,62,58,66,58, -70,58,74,58,78,58,82,58,86,58,90,58,94,58,98,58, -102,58,106,58,110,58,114,58,119,58,123,58,127,58,131,58, -135,58,138,58,142,58,146,58,150,58,154,58,159,58,163,58, -167,58,171,58,175,58,179,58,183,58,187,58,191,58,195,58, -199,58,203,58,207,58,210,58,214,58,218,58,222,58,227,58, -24,98,233,58,47,98,243,58,68,98,250,58,3,59,92,98, -11,59,20,59,29,59,37,59,44,59,55,59,109,98,121,98, -136,98,149,98,65,59,169,98,75,59,195,98,81,59,94,59, -211,98,223,98,249,98,107,59,116,59,125,59,14,99,133,59, -143,59,34,99,154,59,162,59,48,99,60,99,71,99,171,59, -177,59,182,59,190,59,80,99,96,99,109,99,123,99,139,99, -150,99,161,99,180,99,198,59,197,99,209,59,217,59,212,99, -222,99,228,59,234,99,252,99,10,100,22,100,36,100,237,59, -54,100,243,59,251,59,3,60,13,60,20,60,28,60,37,60, -49,60,59,60,79,100,98,100,117,100,65,60,136,100,75,60, -83,60,91,60,101,60,155,100,175,100,107,60,199,100,112,60, -121,60,127,60,133,60,139,60,145,60,151,60,157,60,163,60, -169,60,175,60,181,60,189,60,197,60,205,60,213,60,221,60, -229,60,237,60,245,60,253,60,5,61,13,61,21,61,29,61, -37,61,44,61,52,61,58,61,64,61,72,61,78,61,84,61, -99,61,115,61,122,61,129,61,135,61,141,61,147,61,153,61, -162,61,168,61,174,61,180,61,186,61,192,61,198,61,204,61, -210,61,218,61,228,61,234,61,240,61,246,61,252,61,2,62, -8,62,14,62,22,62,30,62,38,62,52,62,64,62,76,62, -88,62,94,62,100,62,106,62,112,62,118,62,124,62,139,62, -155,62,169,62,183,62,199,62,215,62,229,62,243,62,250,62, -13,63,22,63,28,63,36,63,44,63,52,63,60,63,87,63, -100,63,106,63,112,63,118,63,124,63,130,63,136,63,142,63, -148,63,154,63,160,63,166,63,172,63,178,63,184,63,190,63, -196,63,202,63,209,63,219,63,224,63,230,63,236,63,247,63, -254,63,4,64,10,64,16,64,22,64,28,64,34,64,40,64, -46,64,52,64,58,64,66,64,72,64,78,64,86,64,94,64, -101,64,110,64,118,64,124,64,130,64,136,64,142,64,150,64, -159,64,165,64,171,64,177,64,183,64,189,64,195,64,201,64, -207,64,213,64,221,64,229,64,237,64,245,64,253,64,5,65, -13,65,21,65,29,65,37,65,45,65,53,65,61,65,69,65, -77,65,85,65,93,65,101,65,109,65,117,65,125,65,132,65, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,141,65,145,65,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -241,249,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,149,65,153,65,1,0,1,0,1,0,1,0,1,0, -1,0,18,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,1,0,204,255,204,255,184,255,1,0, -1,0,204,255,204,255,1,0,1,0,1,0,1,0,1,0, -204,255,204,255,1,0,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,157,65,201,248,161,65, -145,249,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,172,10,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,172,10,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,172,10,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,172,10,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,83,18,83,18,83,18,83,18,83,18,83,18,83,18, -83,18,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,142,103,1,0,142,103, -142,103,142,103,142,103,142,103,142,103,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,142,103, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,142,103,1,0,1,0,1,0, -1,0,142,103,180,85,1,0,1,0,142,103,1,0,142,103, -192,85,226,101,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -7,96,1,0,165,65,169,65,189,52,173,65,177,65,181,65, -185,65,149,53,149,53,189,65,221,52,193,65,197,65,201,65, -205,65,209,65,213,65,217,65,221,65,225,65,229,65,233,65, -237,65,241,65,245,65,249,65,253,65,1,66,5,66,9,66, -13,66,17,66,21,66,25,66,29,66,33,66,37,66,41,66, -45,66,49,66,53,66,57,66,61,66,65,66,69,66,73,66, -77,66,81,66,85,66,89,66,93,66,97,66,53,52,101,66, -105,66,109,66,113,66,117,66,121,66,125,66,129,66,133,66, -137,66,141,66,89,53,145,66,149,66,153,66,157,66,161,66, -165,66,169,66,173,66,177,66,181,66,185,66,189,66,193,66, -197,66,201,66,205,66,209,66,213,66,217,66,221,66,225,66, -229,66,233,66,237,66,241,66,245,66,249,66,229,65,253,66, -1,67,5,67,9,67,13,67,17,67,21,67,25,67,29,67, -33,67,37,67,41,67,45,67,49,67,53,67,57,67,61,67, -65,67,69,67,73,67,197,52,77,67,81,67,85,67,89,67, -93,67,97,67,101,67,105,67,109,67,113,67,117,67,121,67, -125,67,129,67,133,67,217,50,137,67,141,67,145,67,149,67, -153,67,157,67,161,67,165,67,141,50,169,67,173,67,177,67, -181,67,185,67,189,67,193,67,197,67,201,67,205,67,209,67, -213,67,217,67,221,67,225,67,229,67,233,67,237,67,241,67, -245,67,249,67,253,67,69,67,1,68,5,68,9,68,13,68, -17,68,21,68,25,68,29,68,5,67,33,68,37,68,41,68, -45,68,49,68,53,68,57,68,61,68,65,68,69,68,73,68, -77,68,81,68,85,68,89,68,93,68,97,68,101,68,105,68, -109,68,229,65,113,68,117,68,121,68,125,68,145,53,129,68, -133,68,137,68,141,68,145,68,149,68,153,68,157,68,161,68, -165,68,169,68,173,68,231,56,177,68,181,68,185,68,189,68, -193,68,197,68,201,68,205,68,209,68,13,67,213,68,217,68, -221,68,225,68,229,68,233,68,237,68,241,68,245,68,249,68, -253,68,1,69,5,69,217,52,9,69,13,69,17,69,21,69, -25,69,29,69,33,69,37,69,41,69,45,69,49,69,53,69, -57,69,21,52,61,69,65,69,69,69,73,69,77,69,81,69, -85,69,89,69,93,69,97,69,101,69,105,69,109,69,113,69, -117,69,121,69,129,52,125,69,141,52,129,69,133,69,137,69, -1,0,1,0,141,69,1,0,145,69,1,0,1,0,149,69, -153,69,157,69,161,69,165,69,169,69,173,69,177,69,181,69, -49,52,1,0,185,69,1,0,189,69,1,0,1,0,193,69, -197,69,1,0,1,0,1,0,201,69,205,69,209,69,213,69, -217,69,221,69,225,69,229,69,233,69,237,69,241,69,245,69, -249,69,253,69,1,70,5,70,9,70,13,70,245,50,17,70, -21,70,25,70,29,70,33,70,37,70,41,70,45,70,49,70, -53,70,57,70,61,70,65,70,69,70,73,70,251,56,77,70, -81,70,85,70,89,70,11,57,93,70,97,70,101,70,105,70, -109,70,213,67,113,70,117,70,121,70,125,70,129,70,133,70, -133,70,137,70,141,70,145,70,149,70,153,70,157,70,161,70, -165,70,193,69,169,70,173,70,177,70,181,70,185,70,191,70, -1,0,1,0,195,70,199,70,203,70,207,70,211,70,215,70, -219,70,223,70,249,69,227,70,231,70,235,70,141,69,239,70, -243,70,247,70,251,70,255,70,3,71,7,71,11,71,15,71, -19,71,23,71,27,71,25,70,31,71,29,70,35,71,39,71, -43,71,47,71,51,71,145,69,57,66,55,71,59,71,121,51, -73,67,149,68,63,71,67,71,57,70,71,71,61,70,75,71, -79,71,83,71,153,69,87,71,91,71,95,71,99,71,103,71, -157,69,107,71,111,71,115,71,119,71,123,71,127,71,109,70, -131,71,135,71,213,67,139,71,125,70,143,71,147,71,151,71, -155,71,159,71,145,70,163,71,189,69,167,71,149,70,253,66, -171,71,153,70,175,71,161,70,179,71,183,71,187,71,191,71, -195,71,169,70,177,69,199,71,173,70,203,71,177,70,207,71, -149,53,211,71,217,71,223,71,229,71,233,71,237,71,241,71, -247,71,253,71,3,72,7,72,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,10,72,16,72,22,72,28,72,36,72,50,72, -56,72,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,63,72,69,72,75,72, -81,72,87,72,1,0,1,0,1,0,1,0,1,0,92,72, -52,254,98,72,105,72,89,46,101,46,109,72,113,72,117,72, -121,72,125,72,129,72,217,45,132,72,138,72,146,72,156,72, -164,72,170,72,176,72,182,72,188,72,194,72,200,72,206,72, -212,72,1,0,218,72,224,72,230,72,236,72,242,72,1,0, -248,72,1,0,254,72,4,73,1,0,10,73,16,73,1,0, -22,73,28,73,34,73,40,73,46,73,52,73,58,73,64,73, -70,73,77,73,83,73,83,73,87,73,87,73,87,73,87,73, -91,73,91,73,91,73,91,73,95,73,95,73,95,73,95,73, -99,73,99,73,99,73,99,73,103,73,103,73,103,73,103,73, -107,73,107,73,107,73,107,73,111,73,111,73,111,73,111,73, -115,73,115,73,115,73,115,73,119,73,119,73,119,73,119,73, -123,73,123,73,123,73,123,73,127,73,127,73,127,73,127,73, -131,73,131,73,135,73,135,73,139,73,139,73,143,73,143,73, -147,73,147,73,151,73,151,73,155,73,155,73,159,73,159,73, -159,73,159,73,163,73,163,73,163,73,163,73,167,73,167,73, -167,73,167,73,171,73,171,73,171,73,171,73,175,73,175,73, -179,73,179,73,179,73,179,73,214,100,214,100,182,73,182,73, -182,73,182,73,187,73,187,73,187,73,187,73,190,73,190,73, -222,100,222,100,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,195,73,195,73,195,73, -195,73,199,73,199,73,203,73,203,73,207,73,207,73,213,73, -219,73,219,73,223,73,223,73,227,73,227,73,231,73,231,73, -231,73,231,73,235,73,235,73,230,100,230,100,240,100,240,100, -250,100,250,100,5,101,5,101,15,101,15,101,25,101,25,101, -35,101,35,101,35,101,45,101,45,101,45,101,239,73,239,73, -239,73,239,73,55,101,65,101,75,101,45,101,84,101,243,73, -249,73,255,73,5,74,11,74,16,74,23,74,29,74,35,74, -41,74,47,74,52,74,59,74,65,74,71,74,76,74,83,74, -89,74,95,74,101,74,107,74,113,74,119,74,125,74,131,74, -137,74,143,74,149,74,155,74,161,74,167,74,173,74,179,74, -185,74,191,74,197,74,203,74,209,74,215,74,221,74,227,74, -233,74,239,74,245,74,251,74,0,75,7,75,13,75,19,75, -24,75,30,75,37,75,43,75,49,75,55,75,61,75,67,75, -72,75,79,75,85,75,91,75,97,75,103,75,108,75,115,75, -121,75,127,75,133,75,139,75,144,75,151,75,157,75,163,75, -169,75,175,75,180,75,187,75,193,75,199,75,204,75,211,75, -217,75,223,75,229,75,235,75,240,75,246,75,252,75,2,76, -8,76,16,76,24,76,32,76,40,76,48,76,95,101,105,101, -75,101,115,101,45,101,84,101,57,76,63,76,5,74,69,76, -11,74,16,74,75,76,81,76,41,74,87,76,47,74,52,74, -93,76,99,76,65,74,105,76,71,74,76,74,251,74,0,75, -19,75,24,75,30,75,55,75,61,75,67,75,72,75,97,75, -103,75,108,75,110,76,133,75,117,76,123,76,169,75,129,76, -175,75,180,75,2,76,135,76,141,76,229,75,147,76,235,75, -240,75,55,101,65,101,125,101,75,101,135,101,243,73,249,73, -255,73,5,74,153,76,23,74,29,74,35,74,41,74,159,76, -65,74,83,74,89,74,95,74,101,74,107,74,119,74,125,74, -131,74,137,74,143,74,149,74,165,76,155,74,161,74,167,74, -173,74,179,74,185,74,197,74,203,74,209,74,215,74,221,74, -227,74,233,74,239,74,245,74,7,75,13,75,37,75,43,75, -49,75,55,75,61,75,79,75,85,75,91,75,97,75,171,76, -115,75,121,75,127,75,133,75,151,75,157,75,163,75,169,75, -177,76,187,75,193,75,182,76,211,75,217,75,223,75,229,75, -189,76,75,101,135,101,5,74,153,76,41,74,159,76,65,74, -195,76,143,74,201,76,207,76,213,76,55,75,61,75,97,75, -169,75,177,76,229,75,189,76,218,76,226,76,234,76,243,76, -248,76,255,76,4,77,11,77,16,77,23,77,28,77,35,77, -40,77,47,77,52,77,59,77,64,77,71,77,76,77,83,77, -88,77,95,77,100,77,107,77,113,77,119,77,207,76,125,77, -131,77,137,77,143,77,243,76,248,76,255,76,4,77,11,77, -16,77,23,77,28,77,35,77,40,77,47,77,52,77,59,77, -64,77,71,77,76,77,83,77,88,77,95,77,100,77,107,77, -113,77,119,77,207,76,125,77,131,77,137,77,143,77,107,77, -113,77,119,77,207,76,201,76,213,76,191,74,125,74,131,74, -137,74,107,77,113,77,119,77,191,74,197,74,148,77,148,77, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -155,77,163,77,163,77,171,77,179,77,187,77,195,77,203,77, -211,77,211,77,218,77,227,77,235,77,243,77,251,77,3,78, -3,78,11,78,19,78,19,78,27,78,27,78,35,78,43,78, -43,78,50,78,59,78,59,78,67,78,67,78,75,78,83,78, -83,78,91,78,91,78,99,78,106,78,115,78,123,78,123,78, -131,78,139,78,146,78,155,78,163,78,163,78,171,78,179,78, -187,78,194,78,203,78,211,78,211,78,219,78,219,78,227,78, -227,78,235,78,243,78,250,78,3,79,11,79,19,79,27,79, -1,0,1,0,35,79,43,79,51,79,59,79,67,79,75,79, -75,79,83,79,90,79,99,79,107,79,107,79,114,79,122,79, -131,79,138,79,147,79,154,79,163,79,170,79,179,79,187,79, -195,79,202,79,210,79,218,79,226,79,234,79,242,79,250,79, -2,80,10,80,18,80,26,80,171,78,187,78,34,80,42,80, -51,80,58,80,67,80,75,80,67,80,51,80,82,80,90,80, -98,80,106,80,114,80,75,80,115,78,35,78,122,80,130,80, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -138,80,146,80,155,80,165,80,175,80,185,80,195,80,205,80, -215,80,225,80,233,80,15,81,33,81,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,43,81,47,81,51,81, -55,81,83,42,59,81,63,81,67,81,71,81,79,81,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,184,255,184,255,184,255,184,255, -184,255,184,255,184,255,204,255,204,255,89,81,95,81,99,81, -103,81,103,81,229,45,233,45,107,81,111,81,115,81,119,81, -123,81,127,81,131,81,135,81,173,47,177,47,139,81,143,81, -147,81,151,81,1,0,1,0,155,81,159,81,164,81,164,81, -164,81,164,81,103,81,103,81,103,81,43,81,47,81,103,45, -1,0,83,42,55,81,63,81,59,81,95,81,229,45,233,45, -107,81,111,81,115,81,119,81,171,81,175,81,179,81,217,45, -183,81,186,81,190,81,224,45,1,0,195,81,199,81,203,81, -207,81,1,0,1,0,1,0,1,0,210,81,216,81,222,81, -1,0,228,81,1,0,234,81,240,81,246,81,252,81,2,82, -8,82,14,82,20,82,26,82,32,82,39,82,144,101,144,101, -152,101,152,101,160,101,160,101,168,101,168,101,176,101,176,101, -176,101,176,101,42,82,42,82,47,82,47,82,47,82,47,82, -51,82,51,82,55,82,55,82,55,82,55,82,59,82,59,82, -59,82,59,82,63,82,63,82,63,82,63,82,67,82,67,82, -67,82,67,82,71,82,71,82,71,82,71,82,75,82,75,82, -79,82,79,82,83,82,83,82,87,82,87,82,91,82,91,82, -91,82,91,82,95,82,95,82,95,82,95,82,99,82,99,82, -99,82,99,82,103,82,103,82,103,82,107,82,107,82,107,82, -107,82,111,82,111,82,111,82,111,82,115,82,115,82,115,82, -115,82,119,82,119,82,119,82,119,82,123,82,123,82,123,82, -123,82,127,82,127,82,127,82,127,82,131,82,131,82,131,82, -131,82,135,82,135,82,135,82,135,82,139,82,139,82,139,82, -139,82,143,82,143,82,143,82,143,82,147,82,147,82,147,82, -147,82,150,82,150,82,235,73,235,73,154,82,154,82,154,82, -154,82,188,101,188,101,202,101,202,101,216,101,216,101,158,82, -158,82,1,0,1,0,1,0,59,81,165,82,171,81,199,81, -203,81,175,81,169,82,229,45,233,45,179,81,217,45,43,81, -183,81,103,45,173,82,189,45,133,41,109,41,113,41,193,45, -197,45,201,45,205,45,209,45,213,45,55,81,83,42,186,81, -224,45,190,81,63,81,207,81,132,43,140,43,6,46,144,43, -148,43,84,46,156,43,160,43,164,43,168,43,172,43,176,43, -180,43,184,43,188,43,196,43,53,46,200,43,10,50,204,43, -208,43,20,47,212,43,54,47,14,50,76,46,155,81,195,81, -159,81,177,82,103,81,63,45,98,41,228,43,72,44,232,43, -236,43,84,44,252,43,252,41,56,44,4,42,0,44,60,42, -4,44,236,45,136,41,16,44,19,50,8,42,194,41,20,44, -24,44,32,44,12,42,64,42,16,42,184,44,107,81,181,82, -111,81,185,82,189,82,193,82,51,81,139,81,143,81,47,81, -197,82,222,58,201,82,205,82,209,82,213,82,217,82,221,82, -225,82,229,82,233,82,237,82,39,58,43,58,46,58,51,58, -55,58,58,58,62,58,66,58,70,58,74,58,78,58,82,58, -86,58,90,58,94,58,98,58,102,58,106,58,110,58,114,58, -119,58,123,58,127,58,131,58,135,58,138,58,142,58,146,58, -150,58,154,58,159,58,163,58,167,58,171,58,175,58,179,58, -183,58,187,58,191,58,195,58,199,58,203,58,207,58,210,58, -241,82,148,102,154,102,249,82,0,83,8,83,162,102,16,83, -170,102,178,102,24,83,32,83,40,83,186,102,194,102,202,102, -210,102,218,102,226,102,49,83,56,83,64,83,72,83,81,83, -88,83,96,83,104,83,112,83,120,83,128,83,136,83,144,83, -152,83,160,83,1,0,1,0,234,102,242,102,250,102,2,103, -10,103,18,103,1,0,1,0,26,103,34,103,42,103,50,103, -58,103,66,103,1,0,1,0,74,103,82,103,90,103,98,103, -106,103,114,103,1,0,1,0,122,103,130,103,138,103,1,0, -1,0,1,0,165,83,169,83,173,83,178,83,185,83,189,83, -193,83,1,0,197,83,200,83,205,83,208,83,213,83,217,83, -221,83,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,1,0,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,2,254,184,255, -1,0,1,0,1,0,1,0,18,254,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,184,255,184,255,204,255, -204,255,204,255,184,255,204,255,184,255,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -100,10,225,40,106,10,235,40,1,0,1,0,1,0,1,0, -1,0,112,10,1,0,1,0,1,0,1,0,1,0,245,40, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,18,254,14,252,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,0,252,1,0,1,0,1,0,1,0,1,0, -1,0,255,40,9,41,1,0,118,10,124,10,18,254,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,1,0,1,0, -1,0,1,0,1,0,18,254,14,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,14,254,14,254,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,130,10,1,0, -1,0,1,0,19,41,29,41,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,1,0,1,0,1,0,18,254,1,0,1,0, -1,0,14,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,0,252,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,142,10,0,252,39,41,49,41, -0,252,59,41,1,0,1,0,18,254,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,160,10,166,10,69,41,79,41,1,0,1,0, -1,0,18,254,14,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,14,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,2,254,2,254,2,254,2,254, -2,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,2,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,224,83,234,83, -254,83,22,84,46,84,70,84,94,84,176,255,176,255,2,254, -2,254,2,254,1,0,1,0,1,0,196,255,176,255,176,255, -176,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,184,255,184,255,184,255,184,255,184,255,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -108,84,118,84,138,84,162,84,186,84,210,84,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,12,42,64,42,16,42, -184,44,132,43,140,43,6,46,144,43,148,43,84,46,156,43, -160,43,164,43,168,43,172,43,176,43,72,44,232,43,236,43, -84,44,252,43,1,0,56,44,4,42,0,44,60,42,4,44, -236,45,136,41,16,44,19,50,8,42,194,41,20,44,24,44, -32,44,12,42,64,42,16,42,184,44,132,43,140,43,6,46, -144,43,148,43,84,46,156,43,160,43,136,41,16,44,19,50, -8,42,194,41,20,44,24,44,32,44,12,42,64,42,16,42, -184,44,132,43,1,0,6,46,144,43,1,0,1,0,156,43, -1,0,1,0,168,43,172,43,1,0,1,0,184,43,188,43, -196,43,53,46,1,0,10,50,204,43,208,43,20,47,212,43, -54,47,14,50,76,46,98,41,228,43,72,44,232,43,1,0, -84,44,1,0,252,41,56,44,4,42,0,44,60,42,4,44, -236,45,1,0,16,44,19,50,8,42,194,41,20,44,24,44, -32,44,12,42,64,42,16,42,184,44,132,43,140,43,1,0, -144,43,148,43,84,46,156,43,1,0,1,0,168,43,172,43, -176,43,180,43,184,43,188,43,196,43,53,46,1,0,10,50, -204,43,208,43,20,47,212,43,54,47,14,50,1,0,98,41, -228,43,164,43,168,43,172,43,176,43,180,43,1,0,188,43, -1,0,1,0,1,0,10,50,204,43,208,43,20,47,212,43, -54,47,14,50,1,0,98,41,228,43,72,44,232,43,236,43, -84,44,252,43,252,41,56,44,4,42,0,44,60,42,4,44, -236,45,12,42,64,42,16,42,184,44,225,84,229,84,1,0, -1,0,232,84,237,84,117,46,241,84,244,84,249,84,252,84, -101,42,0,85,5,85,9,85,13,85,17,85,21,85,24,85, -121,46,28,85,37,85,105,42,41,85,44,85,49,85,53,85, -57,85,80,46,61,85,64,85,37,44,41,44,45,44,68,85, -73,85,76,85,201,44,216,44,81,85,85,85,123,41,89,85, -93,85,96,85,113,46,60,44,101,85,105,85,109,85,112,85, -49,44,53,44,117,85,120,85,125,85,132,85,141,85,149,85, -157,85,164,85,173,85,232,84,237,84,117,46,241,84,244,84, -249,84,252,84,101,42,0,85,5,85,9,85,13,85,17,85, -21,85,53,44,117,85,120,85,125,85,132,85,141,85,149,85, -157,85,164,85,173,85,177,85,181,85,1,0,1,0,189,45, -133,41,109,41,113,41,193,45,197,45,201,45,205,45,209,45, -213,45,189,45,133,41,109,41,113,41,193,45,197,45,201,45, -205,45,209,45,213,45,189,45,133,41,109,41,113,41,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,0,1,0,204,255,204,255,204,255,204,255,204,255,1,0, -204,255,204,255,1,0,204,255,204,255,204,255,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,184,255,184,255,184,255, -184,255,184,255,184,255,184,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -204,255,204,255,204,255,14,254,1,0,1,0,1,0,1,0, -1,0,42,82,47,82,63,82,75,82,1,0,150,82,87,82, -67,82,107,82,154,82,131,82,135,82,139,82,143,82,91,82, -115,82,123,82,99,82,127,82,83,82,95,82,55,82,59,82, -71,82,79,82,103,82,111,82,119,82,185,85,175,73,189,85, -193,85,1,0,47,82,63,82,1,0,147,82,1,0,1,0, -67,82,1,0,154,82,131,82,135,82,139,82,143,82,91,82, -115,82,123,82,99,82,127,82,1,0,95,82,55,82,59,82, -71,82,1,0,103,82,1,0,119,82,1,0,1,0,1,0, -1,0,63,82,1,0,1,0,1,0,1,0,67,82,1,0, -154,82,1,0,135,82,1,0,143,82,91,82,115,82,1,0, -99,82,127,82,1,0,95,82,1,0,1,0,71,82,1,0, -103,82,1,0,119,82,1,0,175,73,1,0,193,85,1,0, -47,82,63,82,1,0,147,82,1,0,1,0,67,82,107,82, -154,82,131,82,1,0,139,82,143,82,91,82,115,82,123,82, -99,82,127,82,1,0,95,82,55,82,59,82,71,82,1,0, -103,82,111,82,119,82,185,85,1,0,189,85,1,0,42,82, -47,82,63,82,75,82,147,82,150,82,87,82,67,82,107,82, -154,82,1,0,135,82,139,82,143,82,91,82,115,82,123,82, -99,82,127,82,83,82,95,82,55,82,59,82,71,82,79,82, -103,82,111,82,119,82,1,0,1,0,1,0,1,0,47,82, -63,82,75,82,1,0,150,82,87,82,67,82,107,82,154,82, -1,0,135,82,139,82,143,82,91,82,115,82,197,85,203,85, -209,85,215,85,221,85,227,85,233,85,239,85,245,85,251,85, -1,86,1,0,1,0,1,0,1,0,1,0,7,86,15,86, -23,86,31,86,39,86,47,86,55,86,63,86,71,86,79,86, -87,86,95,86,103,86,111,86,119,86,127,86,135,86,143,86, -151,86,159,86,167,86,175,86,183,86,191,86,199,86,207,86, -215,86,6,46,200,43,222,86,228,86,1,0,53,46,200,43, -10,50,204,43,208,43,20,47,212,43,54,47,14,50,76,46, -234,86,154,63,240,86,246,86,252,86,4,87,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -10,87,16,87,22,87,1,0,1,0,1,0,28,87,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,34,87,40,87, -78,58,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,65,51,47,87, -51,87,226,101,93,50,55,87,59,87,221,54,63,87,67,87, -71,87,109,68,75,87,79,87,83,87,87,87,91,87,95,87, -209,51,99,87,103,87,107,87,111,87,115,87,119,87,69,50, -189,54,123,87,59,57,201,54,63,57,127,87,177,52,131,87, -135,87,139,87,143,87,147,87,247,56,105,51,151,87,155,87, -159,87,163,87,1,0,1,0,1,0,1,0,167,87,175,87, -183,87,191,87,199,87,207,87,215,87,223,87,231,87,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,239,87,243,87, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,247,87,251,87, -255,87,3,88,9,88,225,69,13,88,17,88,21,88,25,88, -229,69,29,88,33,88,37,88,233,69,43,88,47,88,51,88, -55,88,61,88,65,88,83,87,69,88,75,88,79,88,83,88, -87,88,199,70,91,88,133,50,97,88,101,88,105,88,109,88, -155,87,113,88,117,88,219,70,237,69,241,69,223,70,121,88, -125,88,21,67,129,88,245,69,133,88,137,88,141,88,145,88, -145,88,145,88,149,88,155,88,159,88,163,88,167,88,173,88, -177,88,181,88,185,88,189,88,193,88,197,88,201,88,205,88, -209,88,213,88,217,88,221,88,221,88,231,70,225,88,229,88, -233,88,237,88,253,69,241,88,245,88,249,88,93,69,253,88, -1,89,5,89,9,89,13,89,17,89,21,89,25,89,29,89, -35,89,39,89,43,89,55,87,47,89,51,89,55,89,61,89, -67,89,71,89,75,89,79,89,83,89,87,89,91,89,95,89, -99,89,99,89,103,89,109,89,113,89,5,67,117,89,121,89, -127,89,131,89,135,89,237,50,139,89,143,89,245,50,147,89, -151,89,155,89,161,89,165,89,171,89,175,89,179,89,183,89, -187,89,191,89,195,89,199,89,203,89,207,89,211,89,215,89, -221,89,225,89,229,89,233,89,53,66,237,89,29,51,243,89, -243,89,249,89,253,89,253,89,1,90,5,90,11,90,17,90, -21,90,25,90,29,90,33,90,37,90,41,90,45,90,49,90, -53,90,17,70,57,90,63,90,67,90,71,90,23,71,71,90, -75,90,25,70,79,90,83,90,87,90,91,90,29,70,201,65, -95,90,99,90,103,90,107,90,111,90,115,90,119,90,125,90, -129,90,133,90,137,90,141,90,145,90,151,90,155,90,159,90, -163,90,167,90,171,90,175,90,179,90,183,90,33,70,187,90, -191,90,197,90,201,90,205,90,209,90,41,70,213,90,217,90, -221,90,225,90,229,90,233,90,237,90,241,90,57,66,55,71, -245,90,249,90,253,90,1,91,7,91,11,91,15,91,19,91, -45,70,23,91,29,91,33,91,37,91,229,71,41,91,45,91, -49,91,53,91,57,91,63,91,67,91,71,91,75,91,81,91, -85,91,89,91,93,91,73,67,97,91,101,91,107,91,113,91, -119,91,123,91,129,91,133,91,137,91,141,91,145,91,49,70, -149,68,149,91,153,91,157,91,161,91,167,91,171,91,175,91, -179,91,67,71,183,91,187,91,193,91,197,91,201,91,207,91, -213,91,217,91,71,71,221,91,225,91,229,91,233,91,237,91, -241,91,245,91,251,91,255,91,5,92,9,92,15,92,79,71, -19,92,23,92,29,92,33,92,37,92,43,92,49,92,53,92, -57,92,61,92,65,92,65,92,69,92,73,92,87,71,77,92, -81,92,85,92,89,92,93,92,99,92,103,92,17,67,109,92, -115,92,119,92,125,92,131,92,137,92,141,92,111,71,145,92, -151,92,157,92,163,92,169,92,173,92,173,92,115,71,237,71, -177,92,181,92,185,92,189,92,195,92,125,66,123,71,199,92, -203,92,89,70,209,92,215,92,173,69,221,92,225,92,101,70, -229,92,233,92,237,92,243,92,243,92,249,92,253,92,1,93, -7,93,11,93,15,93,19,93,25,93,29,93,33,93,37,93, -41,93,45,93,51,93,55,93,59,93,63,93,67,93,71,93, -75,93,81,93,87,93,91,93,97,93,101,93,107,93,111,93, -125,70,115,93,121,93,127,93,131,93,137,93,141,93,147,93, -151,93,155,93,159,93,163,93,167,93,171,93,177,93,183,93, -189,93,249,89,195,93,199,93,203,93,207,93,211,93,215,93, -219,93,223,93,227,93,231,93,235,93,239,93,85,67,245,93, -249,93,253,93,1,94,5,94,9,94,137,70,13,94,17,94, -21,94,25,94,29,94,35,94,41,94,47,94,51,94,55,94, -59,94,63,94,69,94,73,94,79,94,83,94,87,94,93,94, -99,94,103,94,105,66,107,94,111,94,115,94,119,94,123,94, -127,94,151,71,131,94,135,94,139,94,143,94,147,94,151,94, -155,94,159,94,133,52,163,94,169,94,173,94,177,94,181,94, -185,94,189,94,195,94,201,94,205,94,209,94,171,71,175,71, -161,52,213,94,219,94,223,94,227,94,231,94,235,94,241,94, -247,94,251,94,255,94,3,95,9,95,179,71,13,95,19,95, -25,95,29,95,33,95,37,95,43,95,47,95,51,95,55,95, -59,95,63,95,67,95,71,95,77,95,81,95,85,95,89,95, -95,95,99,95,103,95,107,95,111,95,117,95,123,95,127,95, -131,95,135,95,141,95,145,95,203,71,203,71,151,95,155,95, -161,95,165,95,169,95,173,95,177,95,181,95,185,95,189,95, -207,71,195,95,199,95,203,95,207,95,211,95,215,95,221,95, -225,95,231,95,237,95,97,53,243,95,113,53,247,95,251,95, -255,95,3,96,133,53,7,96,1,0,1,0,255,255,255,255, -112,134,220,68,112,134,192,68,112,134,222,68,0,6,128,1, -2,6,130,1,4,6,133,1,6,6,134,1,8,6,0,2, -12,6,5,2,14,6,77,4,16,6,137,1,18,6,68,61, -20,6,139,1,24,6,154,3,30,6,0,4,34,6,4,4, -70,6,65,61,74,6,0,60,80,134,8,2,14,6,4,60, -70,6,8,60,98,134,12,60,2,6,12,2,4,6,16,2, -14,6,20,2,24,6,24,2,78,134,143,1,14,6,20,60, -24,6,28,2,70,6,24,60,78,6,32,60,90,6,36,60, -98,134,28,60,0,6,144,1,2,6,146,1,4,6,149,1, -6,6,120,61,8,6,37,2,12,6,40,2,14,6,44,2, -16,6,150,1,18,6,116,61,24,6,52,2,30,6,8,4, -34,6,12,4,70,6,113,61,78,6,81,4,80,6,48,2, -90,6,48,60,96,134,52,60,14,134,60,60,2,6,232,3, -4,6,56,2,8,6,64,60,12,6,60,2,14,6,64,2, -24,6,204,3,78,134,68,2,4,6,72,2,14,6,68,60, -16,6,76,60,24,6,60,4,70,6,72,60,78,6,80,60, -92,134,84,60,0,6,152,1,2,6,154,1,4,6,156,1, -6,6,80,2,8,6,84,2,12,6,88,2,14,6,96,2, -16,6,159,1,18,6,144,61,24,6,158,3,30,6,16,4, -34,6,20,4,70,6,148,61,80,6,92,2,96,134,88,60, -4,134,104,2,2,6,96,60,24,6,208,3,70,6,100,60, -78,6,108,2,98,134,104,60,2,6,114,2,24,6,122,2, -70,6,109,60,78,6,118,2,90,6,120,60,98,134,116,60, -2,6,124,60,14,6,128,60,70,134,132,60,0,6,240,3, -2,6,134,2,6,6,162,1,14,6,136,60,24,6,142,2, -70,6,140,60,78,6,138,2,90,6,148,60,98,134,144,60, -0,6,164,1,2,6,166,1,4,6,169,1,6,6,171,1, -8,6,153,2,12,6,156,2,14,6,93,4,16,6,173,1, -18,6,156,61,22,6,160,2,24,6,162,3,30,6,24,4, -34,6,28,4,54,6,65,3,70,6,153,61,80,134,213,3, -2,6,168,60,14,134,172,60,2,6,168,2,14,6,176,60, -24,6,176,2,30,6,32,4,34,6,36,4,70,6,181,60, -78,6,172,2,98,134,188,60,2,6,181,2,4,6,184,2, -14,6,192,60,24,6,193,2,70,6,197,60,76,6,48,4, -78,134,188,2,14,6,212,60,24,6,200,2,70,6,216,60, -76,6,52,4,78,6,196,2,90,6,224,60,98,134,220,60, -0,6,178,1,2,6,180,1,4,6,182,1,6,6,209,2, -8,6,213,2,12,6,216,2,16,6,185,1,18,6,204,61, -20,6,220,2,22,6,224,2,24,6,166,3,30,6,40,4, -34,6,44,4,54,6,95,3,70,6,200,61,72,6,228,60, -80,6,228,2,90,6,236,60,96,134,232,60,6,6,248,60, -70,134,252,60,0,6,0,61,2,6,4,61,4,6,232,2, -14,6,12,61,16,6,8,61,70,134,16,61,14,6,20,61, -16,134,24,61,0,6,228,61,2,6,186,1,4,6,236,2, -6,6,240,61,8,6,100,4,14,6,28,61,16,6,240,2, -18,6,236,61,70,134,232,61,2,6,242,2,4,6,32,61, -14,6,246,2,24,6,250,2,70,6,36,61,98,134,40,61, -0,6,192,1,2,6,194,1,4,6,197,1,6,6,198,1, -8,6,2,2,12,6,7,2,14,6,79,4,16,6,201,1, -18,6,70,61,20,6,203,1,24,6,156,3,30,6,2,4, -34,6,6,4,70,6,67,61,74,6,2,60,80,134,10,2, -14,6,6,60,70,6,10,60,98,134,14,60,2,6,14,2, -4,6,18,2,14,6,22,2,24,6,26,2,78,134,207,1, -14,6,22,60,24,6,30,2,70,6,26,60,78,6,34,60, -90,6,38,60,98,134,30,60,0,6,208,1,2,6,210,1, -4,6,213,1,6,6,122,61,8,6,39,2,12,6,42,2, -14,6,46,2,16,6,214,1,18,6,118,61,24,6,54,2, -30,6,10,4,34,6,14,4,70,6,115,61,78,6,83,4, -80,6,50,2,90,6,50,60,96,134,54,60,14,134,62,60, -2,6,234,3,4,6,58,2,8,6,66,60,12,6,62,2, -14,6,66,2,24,6,206,3,78,134,70,2,4,6,74,2, -14,6,70,60,16,6,78,60,24,6,62,4,70,6,74,60, -78,6,82,60,92,6,86,60,98,134,44,61,0,6,216,1, -2,6,218,1,4,6,220,1,6,6,82,2,8,6,86,2, -12,6,90,2,16,6,223,1,18,6,146,61,24,6,160,3, -30,6,18,4,34,6,22,4,70,6,150,61,80,6,94,2, -96,134,90,60,4,6,106,2,24,134,224,3,2,6,98,60, -24,6,210,3,70,6,102,60,78,6,110,2,98,134,106,60, -2,6,116,2,24,6,124,2,70,6,111,60,78,6,120,2, -90,6,122,60,98,134,118,60,2,6,126,60,14,6,130,60, -70,134,134,60,0,6,242,3,2,6,136,2,6,6,226,1, -14,6,138,60,24,6,144,2,70,6,142,60,78,6,140,2, -90,6,150,60,98,134,146,60,0,6,228,1,2,6,230,1, -4,6,233,1,6,6,235,1,8,6,155,2,12,6,158,2, -14,6,95,4,16,6,237,1,18,6,158,61,22,6,162,2, -24,6,164,3,30,6,26,4,34,6,30,4,54,6,67,3, -70,6,155,61,80,134,215,3,2,6,170,60,14,134,174,60, -2,6,170,2,14,6,178,60,24,6,178,2,30,6,34,4, -34,6,38,4,70,6,183,60,78,6,174,2,98,134,190,60, -2,6,183,2,4,6,186,2,14,6,194,60,24,6,195,2, -70,6,199,60,76,6,50,4,78,134,190,2,14,6,214,60, -16,6,46,61,24,6,202,2,70,6,218,60,76,6,54,4, -78,6,198,2,90,6,226,60,98,134,222,60,0,6,242,1, -2,6,244,1,4,6,246,1,6,6,211,2,8,6,215,2, -12,6,218,2,16,6,249,1,18,6,206,61,20,6,222,2, -22,6,226,2,24,6,168,3,30,6,42,4,34,6,46,4, -54,6,97,3,70,6,202,61,72,6,230,60,80,6,230,2, -90,6,238,60,96,134,234,60,6,6,250,60,70,134,254,60, -0,6,2,61,2,6,6,61,4,6,234,2,14,6,14,61, -16,6,10,61,20,6,48,61,70,134,18,61,14,6,22,61, -16,134,26,61,0,6,230,61,2,6,250,1,4,6,238,2, -6,6,242,61,8,6,102,4,14,6,30,61,16,6,254,1, -18,6,238,61,20,6,50,61,70,134,234,61,2,6,244,2, -4,6,34,61,14,6,248,2,24,6,252,2,70,6,38,61, -98,134,42,61,2,6,248,3,8,134,196,3,2,134,252,3, -2,6,250,3,8,134,198,3,2,134,254,3,24,134,220,3, -24,134,222,3,0,6,116,63,2,6,12,7,8,6,114,63, -12,6,112,63,38,6,17,62,40,6,19,62,138,134,120,63, -0,6,144,63,2,6,16,7,38,6,49,62,40,134,51,62, -0,6,148,63,2,6,18,7,38,6,81,62,40,6,83,62, -138,134,152,63,0,6,180,63,2,6,20,7,8,6,178,63, -12,6,176,63,16,6,84,7,38,6,113,62,40,134,115,62, -0,6,240,63,2,6,24,7,38,6,145,62,40,134,147,62, -40,134,216,63,0,6,212,63,2,6,28,7,8,6,210,63, -12,6,208,63,16,6,86,7,40,134,179,62,0,6,244,63, -2,6,30,7,38,6,209,62,40,6,211,62,138,134,248,63, -0,6,225,62,2,6,89,7,8,6,98,63,12,6,96,63, -38,6,1,62,40,6,3,62,132,6,109,63,138,134,102,63, -0,6,228,62,2,6,90,7,38,6,33,62,40,134,35,62, -0,6,233,62,2,6,93,7,38,6,65,62,40,6,67,62, -132,6,141,63,138,134,134,63,0,6,236,62,2,6,94,7, -8,6,162,63,12,6,160,63,16,6,149,7,38,6,97,62, -40,6,99,62,132,134,172,63,0,6,240,62,2,6,152,7, -38,6,129,62,40,134,131,62,38,6,200,63,40,134,202,63, -0,6,244,62,2,6,154,7,8,6,194,63,12,6,192,63, -16,6,151,7,38,6,161,62,40,6,163,62,132,134,204,63, -0,6,249,62,2,6,157,7,38,6,193,62,40,6,195,62, -132,6,237,63,138,134,230,63,16,134,14,8,12,6,160,9, -16,134,164,9,2,134,6,8,0,6,0,8,12,6,172,9, -16,134,2,8,12,6,130,9,16,134,184,9,16,134,188,9, -0,6,26,8,8,6,196,9,12,6,50,8,16,134,200,9, -2,134,24,8,16,134,204,9,8,6,220,9,12,6,28,8, -16,6,224,9,22,134,228,9,16,134,232,9,16,134,240,9, -16,134,216,9,12,6,162,9,16,134,166,9,2,134,166,8, -0,6,160,8,12,6,174,9,16,134,162,8,12,6,132,9, -16,134,186,9,16,134,190,9,0,6,186,8,8,6,198,9, -12,6,114,8,16,134,202,9,2,134,184,8,16,134,206,9, -8,6,222,9,12,6,188,8,16,6,226,9,22,134,230,9, -16,134,234,9,16,134,242,9,16,134,218,9,16,134,174,8, -30,134,236,8,30,134,238,8,16,134,180,9,16,134,182,9, -16,134,212,9,16,134,214,9,166,12,68,12,168,12,70,12, -170,140,74,12,168,140,72,12,168,140,76,12,168,140,132,13, -168,140,166,13,168,140,128,13,120,146,82,18,120,146,98,18, -120,146,104,18,124,19,150,19,174,147,152,19,124,22,150,22, -172,22,144,22,174,150,152,22,174,151,40,23,124,23,148,23, -174,151,152,23,124,151,150,23,172,152,144,24,170,153,128,25, -132,25,149,25,170,25,142,25,172,153,144,25,124,26,148,26, -174,154,152,26,124,154,150,26,148,27,180,27,158,27,185,27, -190,155,188,27,92,160,76,32,106,182,12,54,106,182,16,54, -106,182,20,54,106,182,24,54,106,182,28,54,106,182,36,54, -106,182,118,54,106,182,122,54,106,182,128,54,106,182,130,54, -106,182,134,54,112,134,52,67,112,134,54,67,112,134,92,67, -112,134,154,67,112,134,158,67,112,134,156,67,112,134,8,68, -112,134,18,68,112,134,24,68,112,134,72,68,112,134,76,68, -112,134,130,68,112,134,136,68,112,134,142,68,112,134,146,68, -112,134,218,68,112,134,196,68,112,134,224,68,112,134,226,68, -112,134,232,68,112,134,234,68,112,134,240,68,112,134,242,68, -112,134,0,69,112,134,2,69,112,134,192,69,112,134,194,69, -112,134,8,69,112,134,10,69,112,134,16,69,112,134,18,69, -112,134,196,69,112,134,198,69,112,134,88,69,112,134,90,69, -112,134,92,69,112,134,94,69,112,134,212,69,112,134,214,69, -112,134,216,69,112,134,218,69,50,225,40,97,50,225,152,96, -50,225,156,96,50,225,160,96,50,225,164,96,50,225,168,96, -50,225,172,96,50,225,176,96,50,225,180,96,50,225,184,96, -50,225,188,96,50,225,192,96,50,225,196,96,50,225,202,96, -50,225,206,96,50,225,210,96,50,97,224,96,52,225,226,96, -50,97,230,96,52,225,232,96,50,97,236,96,52,225,238,96, -50,97,242,96,52,225,244,96,50,97,248,96,52,225,250,96, -50,225,60,97,50,225,232,97,50,225,88,97,50,225,92,97, -50,225,96,97,50,225,100,97,50,225,104,97,50,225,108,97, -50,225,112,97,50,225,116,97,50,225,120,97,50,225,124,97, -50,225,128,97,50,225,132,97,50,225,138,97,50,225,142,97, -50,225,146,97,50,97,160,97,52,225,162,97,50,97,166,97, -52,225,168,97,50,97,172,97,52,225,174,97,50,97,178,97, -52,225,180,97,50,97,184,97,52,225,186,97,50,225,238,97, -50,225,240,97,50,225,242,97,50,225,244,97,50,225,252,97, -137,180,130,46,52,33,137,180,130,46,56,33,137,180,130,46, -86,33,137,180,194,73,92,34,137,180,194,73,94,34,137,52, -130,207,150,38,137,180,194,213,152,38,139,52,2,44,120,41, -139,52,130,46,118,41,139,180,66,47,124,41,139,180,194,107, -116,43,139,180,194,107,118,43,2,0,2,230,65,0,2,3, -0,6,76,61,2,6,72,61,6,6,84,61,18,134,80,61, -2,230,65,0,8,3,8,134,188,3,2,230,65,0,10,3, -2,134,244,3,2,202,67,0,39,3,2,134,16,60,2,230, -69,0,2,3,0,6,128,61,2,6,124,61,6,6,136,61, -18,134,132,61,2,230,73,0,8,3,2,134,92,60,2,230, -79,0,2,3,0,6,164,61,2,6,160,61,6,6,172,61, -18,134,168,61,2,230,79,0,3,3,2,6,152,60,8,6, -88,4,16,134,156,60,2,230,79,0,8,3,8,134,84,4, -2,230,85,0,8,3,0,6,182,3,2,6,174,3,8,6, -170,3,24,134,178,3,2,230,97,0,2,3,0,6,78,61, -2,6,74,61,6,6,86,61,18,134,82,61,2,230,97,0, -8,3,8,134,190,3,2,230,97,0,10,3,2,134,246,3, -2,202,99,0,39,3,2,134,18,60,2,230,101,0,2,3, -0,6,130,61,2,6,126,61,6,6,138,61,18,134,134,61, -2,230,105,0,8,3,2,134,94,60,2,230,111,0,2,3, -0,6,166,61,2,6,162,61,6,6,174,61,18,134,170,61, -2,230,111,0,3,3,2,6,154,60,8,6,90,4,16,134, -158,60,2,230,111,0,8,3,8,134,86,4,2,230,117,0, -8,3,0,6,184,3,2,6,176,3,8,6,172,3,24,134, -180,3,2,230,65,0,6,3,0,6,96,61,2,6,92,61, -6,6,104,61,18,134,100,61,2,230,97,0,6,3,0,6, -98,61,2,6,94,61,6,6,106,61,18,134,102,61,2,230, -69,0,4,3,0,6,40,60,2,134,44,60,2,230,101,0, -4,3,0,6,42,60,2,134,46,60,2,230,79,0,4,3, -0,6,160,60,2,134,164,60,2,230,111,0,4,3,0,6, -162,60,2,134,166,60,2,230,83,0,1,3,14,134,200,60, -2,230,115,0,1,3,14,134,202,60,2,230,83,0,12,3, -14,134,204,60,2,230,115,0,12,3,14,134,206,60,2,230, -85,0,3,3,2,134,240,60,2,230,117,0,3,3,2,134, -242,60,2,230,85,0,4,3,16,134,244,60,2,230,117,0, -4,3,16,134,246,60,2,216,79,0,27,3,0,6,184,61, -2,6,180,61,6,6,192,61,18,6,188,61,70,134,196,61, -2,216,111,0,27,3,0,6,186,61,2,6,182,61,6,6, -194,61,18,6,190,61,70,134,198,61,2,216,85,0,27,3, -0,6,212,61,2,6,208,61,6,6,220,61,18,6,216,61, -70,134,224,61,2,216,117,0,27,3,0,6,214,61,2,6, -210,61,6,6,222,61,18,6,218,61,70,134,226,61,2,202, -79,0,40,3,8,134,216,3,2,202,111,0,40,3,8,134, -218,3,2,230,65,0,7,3,8,134,192,3,2,230,97,0, -7,3,8,134,194,3,2,202,69,0,39,3,12,134,56,60, -2,202,101,0,39,3,12,134,58,60,2,230,79,0,7,3, -8,134,96,4,2,230,111,0,7,3,8,134,98,4,2,230, -177,3,1,3,138,134,104,63,2,230,183,3,1,3,138,134, -136,63,2,230,185,3,8,3,0,6,164,63,2,6,32,7, -132,134,174,63,2,230,197,3,8,3,0,6,196,63,2,6, -96,7,132,134,206,63,2,230,201,3,1,3,138,134,232,63, -2,0,198,12,194,12,170,153,150,25,2,0,217,13,207,13, -148,155,186,27,2,220,76,0,35,3,8,134,112,60,2,220, -108,0,35,3,8,134,114,60,2,220,82,0,35,3,8,134, -184,60,2,220,114,0,35,3,8,134,186,60,2,220,83,0, -35,3,14,134,208,60,2,220,115,0,35,3,14,134,210,60, -2,220,65,0,35,3,4,6,88,61,12,134,108,61,2,220, -97,0,35,3,4,6,90,61,12,134,110,61,2,220,69,0, -35,3,4,134,140,61,2,220,101,0,35,3,4,134,142,61, -2,220,79,0,35,3,4,134,176,61,2,220,111,0,35,3, -4,134,178,61,2,230,177,3,19,3,0,6,5,62,2,6, -9,62,132,6,13,62,138,134,0,63,2,230,177,3,20,3, -0,6,7,62,2,6,11,62,132,6,15,62,138,134,2,63, -0,31,67,230,177,3,19,3,0,3,138,134,4,63,1,31, -67,230,177,3,20,3,0,3,138,134,6,63,0,31,67,230, -177,3,19,3,1,3,138,134,8,63,1,31,67,230,177,3, -20,3,1,3,138,134,10,63,0,31,67,230,177,3,19,3, -66,3,138,134,12,63,1,31,67,230,177,3,20,3,66,3, -138,134,14,63,2,230,145,3,19,3,0,6,21,62,2,6, -25,62,132,6,29,62,138,134,16,63,2,230,145,3,20,3, -0,6,23,62,2,6,27,62,132,6,31,62,138,134,18,63, -8,31,67,230,145,3,19,3,0,3,138,134,20,63,9,31, -67,230,145,3,20,3,0,3,138,134,22,63,8,31,67,230, -145,3,19,3,1,3,138,134,24,63,9,31,67,230,145,3, -20,3,1,3,138,134,26,63,8,31,67,230,145,3,19,3, -66,3,138,134,28,63,9,31,67,230,145,3,20,3,66,3, -138,134,30,63,2,230,181,3,19,3,0,6,36,62,2,134, -40,62,2,230,181,3,20,3,0,6,38,62,2,134,42,62, -2,230,149,3,19,3,0,6,52,62,2,134,56,62,2,230, -149,3,20,3,0,6,54,62,2,134,58,62,2,230,183,3, -19,3,0,6,69,62,2,6,73,62,132,6,77,62,138,134, -32,63,2,230,183,3,20,3,0,6,71,62,2,6,75,62, -132,6,79,62,138,134,34,63,32,31,67,230,183,3,19,3, -0,3,138,134,36,63,33,31,67,230,183,3,20,3,0,3, -138,134,38,63,32,31,67,230,183,3,19,3,1,3,138,134, -40,63,33,31,67,230,183,3,20,3,1,3,138,134,42,63, -32,31,67,230,183,3,19,3,66,3,138,134,44,63,33,31, -67,230,183,3,20,3,66,3,138,134,46,63,2,230,151,3, -19,3,0,6,85,62,2,6,89,62,132,6,93,62,138,134, -48,63,2,230,151,3,20,3,0,6,87,62,2,6,91,62, -132,6,95,62,138,134,50,63,40,31,67,230,151,3,19,3, -0,3,138,134,52,63,41,31,67,230,151,3,20,3,0,3, -138,134,54,63,40,31,67,230,151,3,19,3,1,3,138,134, -56,63,41,31,67,230,151,3,20,3,1,3,138,134,58,63, -40,31,67,230,151,3,19,3,66,3,138,134,60,63,41,31, -67,230,151,3,20,3,66,3,138,134,62,63,2,230,185,3, -19,3,0,6,100,62,2,6,104,62,132,134,108,62,2,230, -185,3,20,3,0,6,102,62,2,6,106,62,132,134,110,62, -2,230,153,3,19,3,0,6,116,62,2,6,120,62,132,134, -124,62,2,230,153,3,20,3,0,6,118,62,2,6,122,62, -132,134,126,62,2,230,191,3,19,3,0,6,132,62,2,134, -136,62,2,230,191,3,20,3,0,6,134,62,2,134,138,62, -2,230,159,3,19,3,0,6,148,62,2,134,152,62,2,230, -159,3,20,3,0,6,150,62,2,134,154,62,2,230,197,3, -19,3,0,6,164,62,2,6,168,62,132,134,172,62,2,230, -197,3,20,3,0,6,166,62,2,6,170,62,132,134,174,62, -2,230,165,3,20,3,0,6,182,62,2,6,186,62,132,134, -190,62,2,230,201,3,19,3,0,6,197,62,2,6,201,62, -132,6,205,62,138,134,64,63,2,230,201,3,20,3,0,6, -199,62,2,6,203,62,132,6,207,62,138,134,66,63,96,31, -67,230,201,3,19,3,0,3,138,134,68,63,97,31,67,230, -201,3,20,3,0,3,138,134,70,63,96,31,67,230,201,3, -19,3,1,3,138,134,72,63,97,31,67,230,201,3,20,3, -1,3,138,134,74,63,96,31,67,230,201,3,19,3,66,3, -138,134,76,63,97,31,67,230,201,3,20,3,66,3,138,134, -78,63,2,230,169,3,19,3,0,6,213,62,2,6,217,62, -132,6,221,62,138,134,80,63,2,230,169,3,20,3,0,6, -215,62,2,6,219,62,132,6,223,62,138,134,82,63,104,31, -67,230,169,3,19,3,0,3,138,134,84,63,105,31,67,230, -169,3,20,3,0,3,138,134,86,63,104,31,67,230,169,3, -19,3,1,3,138,134,88,63,105,31,67,230,169,3,20,3, -1,3,138,134,90,63,104,31,67,230,169,3,19,3,66,3, -138,134,92,63,105,31,67,230,169,3,20,3,66,3,138,134, -94,63,2,230,177,3,0,3,138,134,100,63,2,230,183,3, -0,3,138,134,132,63,2,230,201,3,0,3,138,134,228,63, -2,230,177,3,66,3,138,134,110,63,2,230,183,3,66,3, -138,134,142,63,2,230,201,3,66,3,138,134,238,63,3,0, -2,230,65,0,0,3,2,230,65,0,1,3,2,230,65,0, -3,3,2,230,69,0,0,3,2,230,69,0,1,3,2,230, -69,0,8,3,2,230,73,0,0,3,2,230,73,0,1,3, -2,230,73,0,2,3,2,230,78,0,3,3,2,230,79,0, -0,3,2,230,79,0,1,3,2,230,85,0,0,3,2,230, -85,0,1,3,2,230,85,0,2,3,2,230,89,0,1,3, -2,230,97,0,0,3,2,230,97,0,1,3,2,230,97,0, -3,3,2,230,101,0,0,3,2,230,101,0,1,3,2,230, -101,0,8,3,2,230,105,0,0,3,2,230,105,0,1,3, -2,230,105,0,2,3,2,230,110,0,3,3,2,230,111,0, -0,3,2,230,111,0,1,3,2,230,117,0,0,3,2,230, -117,0,1,3,2,230,117,0,2,3,2,230,121,0,1,3, -2,230,121,0,8,3,2,230,65,0,4,3,2,230,97,0, -4,3,2,202,65,0,40,3,2,202,97,0,40,3,2,230, -67,0,1,3,2,230,99,0,1,3,2,230,67,0,2,3, -2,230,99,0,2,3,2,230,67,0,7,3,2,230,99,0, -7,3,2,230,67,0,12,3,2,230,99,0,12,3,2,230, -68,0,12,3,2,230,100,0,12,3,2,230,69,0,6,3, -2,230,101,0,6,3,2,230,69,0,7,3,2,230,101,0, -7,3,2,202,69,0,40,3,2,202,101,0,40,3,2,230, -69,0,12,3,2,230,101,0,12,3,2,230,71,0,2,3, -2,230,103,0,2,3,2,230,71,0,6,3,2,230,103,0, -6,3,2,230,71,0,7,3,2,230,103,0,7,3,2,202, -71,0,39,3,2,202,103,0,39,3,2,230,72,0,2,3, -2,230,104,0,2,3,2,230,73,0,3,3,2,230,105,0, -3,3,2,230,73,0,4,3,2,230,105,0,4,3,2,230, -73,0,6,3,2,230,105,0,6,3,2,202,73,0,40,3, -2,202,105,0,40,3,2,230,73,0,7,3,2,230,74,0, -2,3,2,230,106,0,2,3,2,202,75,0,39,3,2,202, -107,0,39,3,2,230,76,0,1,3,2,230,108,0,1,3, -2,202,76,0,39,3,2,202,108,0,39,3,2,230,76,0, -12,3,2,230,108,0,12,3,2,230,78,0,1,3,2,230, -110,0,1,3,2,202,78,0,39,3,2,202,110,0,39,3, -2,230,78,0,12,3,2,230,110,0,12,3,2,230,79,0, -6,3,2,230,111,0,6,3,2,230,79,0,11,3,2,230, -111,0,11,3,2,230,82,0,1,3,2,230,114,0,1,3, -2,202,82,0,39,3,2,202,114,0,39,3,2,230,82,0, -12,3,2,230,114,0,12,3,2,230,83,0,2,3,2,230, -115,0,2,3,2,202,83,0,39,3,2,202,115,0,39,3, -2,202,84,0,39,3,2,202,116,0,39,3,2,230,84,0, -12,3,2,230,116,0,12,3,2,230,85,0,6,3,2,230, -117,0,6,3,2,230,85,0,10,3,2,230,117,0,10,3, -2,230,85,0,11,3,2,230,117,0,11,3,2,202,85,0, -40,3,2,202,117,0,40,3,2,230,87,0,2,3,2,230, -119,0,2,3,2,230,89,0,2,3,2,230,121,0,2,3, -2,230,89,0,8,3,2,230,90,0,1,3,2,230,122,0, -1,3,2,230,90,0,7,3,2,230,122,0,7,3,2,230, -90,0,12,3,2,230,122,0,12,3,2,230,65,0,12,3, -2,230,97,0,12,3,2,230,73,0,12,3,2,230,105,0, -12,3,2,230,79,0,12,3,2,230,111,0,12,3,2,230, -85,0,12,3,2,230,117,0,12,3,220,0,67,230,85,0, -8,3,4,3,252,0,67,230,117,0,8,3,4,3,220,0, -67,230,85,0,8,3,1,3,252,0,67,230,117,0,8,3, -1,3,220,0,67,230,85,0,8,3,12,3,252,0,67,230, -117,0,8,3,12,3,220,0,67,230,85,0,8,3,0,3, -252,0,67,230,117,0,8,3,0,3,196,0,67,230,65,0, -8,3,4,3,228,0,67,230,97,0,8,3,4,3,38,2, -67,230,65,0,7,3,4,3,39,2,67,230,97,0,7,3, -4,3,2,230,198,0,4,3,2,230,230,0,4,3,2,230, -71,0,12,3,2,230,103,0,12,3,2,230,75,0,12,3, -2,230,107,0,12,3,234,1,67,230,79,0,40,3,4,3, -235,1,67,230,111,0,40,3,4,3,2,230,183,1,12,3, -2,230,146,2,12,3,2,230,106,0,12,3,2,230,71,0, -1,3,2,230,103,0,1,3,2,230,78,0,0,3,2,230, -110,0,0,3,197,0,67,230,65,0,10,3,1,3,229,0, -67,230,97,0,10,3,1,3,2,230,198,0,1,3,2,230, -230,0,1,3,2,230,216,0,1,3,2,230,248,0,1,3, -2,230,65,0,15,3,2,230,97,0,15,3,2,230,65,0, -17,3,2,230,97,0,17,3,2,230,69,0,15,3,2,230, -101,0,15,3,2,230,69,0,17,3,2,230,101,0,17,3, -2,230,73,0,15,3,2,230,105,0,15,3,2,230,73,0, -17,3,2,230,105,0,17,3,2,230,79,0,15,3,2,230, -111,0,15,3,2,230,79,0,17,3,2,230,111,0,17,3, -2,230,82,0,15,3,2,230,114,0,15,3,2,230,82,0, -17,3,2,230,114,0,17,3,2,230,85,0,15,3,2,230, -117,0,15,3,2,230,85,0,17,3,2,230,117,0,17,3, -2,220,83,0,38,3,2,220,115,0,38,3,2,220,84,0, -38,3,2,220,116,0,38,3,2,230,72,0,12,3,2,230, -104,0,12,3,214,0,67,230,79,0,8,3,4,3,246,0, -67,230,111,0,8,3,4,3,213,0,67,230,79,0,3,3, -4,3,245,0,67,230,111,0,3,3,4,3,46,2,67,230, -79,0,7,3,4,3,47,2,67,230,111,0,7,3,4,3, -2,230,89,0,4,3,2,230,121,0,4,3,2,230,145,3, -1,3,2,230,149,3,1,3,2,230,151,3,1,3,2,230, -153,3,1,3,2,230,159,3,1,3,2,230,165,3,1,3, -2,230,169,3,1,3,202,3,67,230,185,3,8,3,1,3, -2,230,153,3,8,3,2,230,165,3,8,3,2,230,181,3, -1,3,2,230,185,3,1,3,203,3,67,230,197,3,8,3, -1,3,2,230,191,3,1,3,2,230,197,3,1,3,2,230, -21,4,0,3,2,230,21,4,8,3,2,230,19,4,1,3, -2,230,6,4,8,3,2,230,26,4,1,3,2,230,24,4, -0,3,2,230,35,4,6,3,2,230,24,4,6,3,2,230, -56,4,6,3,2,230,53,4,0,3,2,230,53,4,8,3, -2,230,51,4,1,3,2,230,86,4,8,3,2,230,58,4, -1,3,2,230,56,4,0,3,2,230,67,4,6,3,2,230, -116,4,15,3,2,230,117,4,15,3,2,230,22,4,6,3, -2,230,54,4,6,3,2,230,16,4,6,3,2,230,48,4, -6,3,2,230,16,4,8,3,2,230,48,4,8,3,2,230, -21,4,6,3,2,230,53,4,6,3,2,230,216,4,8,3, -2,230,217,4,8,3,2,230,22,4,8,3,2,230,54,4, -8,3,2,230,23,4,8,3,2,230,55,4,8,3,2,230, -24,4,4,3,2,230,56,4,4,3,2,230,24,4,8,3, -2,230,56,4,8,3,2,230,30,4,8,3,2,230,62,4, -8,3,2,230,232,4,8,3,2,230,233,4,8,3,2,230, -45,4,8,3,2,230,77,4,8,3,2,230,35,4,4,3, -2,230,67,4,4,3,2,230,35,4,8,3,2,230,67,4, -8,3,2,230,35,4,11,3,2,230,67,4,11,3,2,230, -39,4,8,3,2,230,71,4,8,3,2,230,43,4,8,3, -2,230,75,4,8,3,2,230,39,6,83,6,2,230,39,6, -84,6,2,230,72,6,84,6,2,220,39,6,85,6,2,230, -74,6,84,6,2,230,213,6,84,6,2,230,193,6,84,6, -2,230,210,6,84,6,2,7,40,9,60,9,2,7,48,9, -60,9,2,7,51,9,60,9,2,0,199,9,190,9,2,0, -199,9,215,9,2,0,71,11,86,11,2,0,71,11,62,11, -2,0,71,11,87,11,2,0,146,11,215,11,2,0,198,11, -190,11,2,0,199,11,190,11,2,0,198,11,215,11,2,91, -70,12,86,12,2,0,191,12,213,12,2,0,198,12,213,12, -2,0,198,12,214,12,202,12,67,0,198,12,194,12,213,12, -2,0,70,13,62,13,2,0,71,13,62,13,2,0,70,13, -87,13,2,9,217,13,202,13,220,13,67,9,217,13,207,13, -202,13,2,0,217,13,223,13,2,0,37,16,46,16,2,0, -5,27,53,27,2,0,7,27,53,27,2,0,9,27,53,27, -2,0,11,27,53,27,2,0,13,27,53,27,2,0,17,27, -53,27,2,0,58,27,53,27,2,0,60,27,53,27,2,0, -62,27,53,27,2,0,63,27,53,27,2,0,66,27,53,27, -2,220,65,0,37,3,2,220,97,0,37,3,2,230,66,0, -7,3,2,230,98,0,7,3,2,220,66,0,35,3,2,220, -98,0,35,3,2,220,66,0,49,3,2,220,98,0,49,3, -199,0,67,230,67,0,39,3,1,3,231,0,67,230,99,0, -39,3,1,3,2,230,68,0,7,3,2,230,100,0,7,3, -2,220,68,0,35,3,2,220,100,0,35,3,2,220,68,0, -49,3,2,220,100,0,49,3,2,202,68,0,39,3,2,202, -100,0,39,3,2,220,68,0,45,3,2,220,100,0,45,3, -18,1,67,230,69,0,4,3,0,3,19,1,67,230,101,0, -4,3,0,3,18,1,67,230,69,0,4,3,1,3,19,1, -67,230,101,0,4,3,1,3,2,220,69,0,45,3,2,220, -101,0,45,3,2,220,69,0,48,3,2,220,101,0,48,3, -40,2,67,230,69,0,39,3,6,3,41,2,67,230,101,0, -39,3,6,3,2,230,70,0,7,3,2,230,102,0,7,3, -2,230,71,0,4,3,2,230,103,0,4,3,2,230,72,0, -7,3,2,230,104,0,7,3,2,220,72,0,35,3,2,220, -104,0,35,3,2,230,72,0,8,3,2,230,104,0,8,3, -2,202,72,0,39,3,2,202,104,0,39,3,2,220,72,0, -46,3,2,220,104,0,46,3,2,220,73,0,48,3,2,220, -105,0,48,3,207,0,67,230,73,0,8,3,1,3,239,0, -67,230,105,0,8,3,1,3,2,230,75,0,1,3,2,230, -107,0,1,3,2,220,75,0,35,3,2,220,107,0,35,3, -2,220,75,0,49,3,2,220,107,0,49,3,54,30,67,230, -76,0,35,3,4,3,55,30,67,230,108,0,35,3,4,3, -2,220,76,0,49,3,2,220,108,0,49,3,2,220,76,0, -45,3,2,220,108,0,45,3,2,230,77,0,1,3,2,230, -109,0,1,3,2,230,77,0,7,3,2,230,109,0,7,3, -2,220,77,0,35,3,2,220,109,0,35,3,2,230,78,0, -7,3,2,230,110,0,7,3,2,220,78,0,35,3,2,220, -110,0,35,3,2,220,78,0,49,3,2,220,110,0,49,3, -2,220,78,0,45,3,2,220,110,0,45,3,213,0,67,230, -79,0,3,3,1,3,245,0,67,230,111,0,3,3,1,3, -213,0,67,230,79,0,3,3,8,3,245,0,67,230,111,0, -3,3,8,3,76,1,67,230,79,0,4,3,0,3,77,1, -67,230,111,0,4,3,0,3,76,1,67,230,79,0,4,3, -1,3,77,1,67,230,111,0,4,3,1,3,2,230,80,0, -1,3,2,230,112,0,1,3,2,230,80,0,7,3,2,230, -112,0,7,3,2,230,82,0,7,3,2,230,114,0,7,3, -90,30,67,230,82,0,35,3,4,3,91,30,67,230,114,0, -35,3,4,3,2,220,82,0,49,3,2,220,114,0,49,3, -2,230,83,0,7,3,2,230,115,0,7,3,90,1,67,230, -83,0,1,3,7,3,91,1,67,230,115,0,1,3,7,3, -96,1,67,230,83,0,12,3,7,3,97,1,67,230,115,0, -12,3,7,3,98,30,67,230,83,0,35,3,7,3,99,30, -67,230,115,0,35,3,7,3,2,230,84,0,7,3,2,230, -116,0,7,3,2,220,84,0,35,3,2,220,116,0,35,3, -2,220,84,0,49,3,2,220,116,0,49,3,2,220,84,0, -45,3,2,220,116,0,45,3,2,220,85,0,36,3,2,220, -117,0,36,3,2,220,85,0,48,3,2,220,117,0,48,3, -2,220,85,0,45,3,2,220,117,0,45,3,104,1,67,230, -85,0,3,3,1,3,105,1,67,230,117,0,3,3,1,3, -106,1,67,230,85,0,4,3,8,3,107,1,67,230,117,0, -4,3,8,3,2,230,86,0,3,3,2,230,118,0,3,3, -2,220,86,0,35,3,2,220,118,0,35,3,2,230,87,0, -0,3,2,230,119,0,0,3,2,230,87,0,1,3,2,230, -119,0,1,3,2,230,87,0,8,3,2,230,119,0,8,3, -2,230,87,0,7,3,2,230,119,0,7,3,2,220,87,0, -35,3,2,220,119,0,35,3,2,230,88,0,7,3,2,230, -120,0,7,3,2,230,88,0,8,3,2,230,120,0,8,3, -2,230,89,0,7,3,2,230,121,0,7,3,2,230,90,0, -2,3,2,230,122,0,2,3,2,220,90,0,35,3,2,220, -122,0,35,3,2,220,90,0,49,3,2,220,122,0,49,3, -2,220,104,0,49,3,2,230,116,0,8,3,2,230,119,0, -10,3,2,230,121,0,10,3,2,230,65,0,9,3,2,230, -97,0,9,3,194,0,67,230,65,0,2,3,1,3,226,0, -67,230,97,0,2,3,1,3,194,0,67,230,65,0,2,3, -0,3,226,0,67,230,97,0,2,3,0,3,194,0,67,230, -65,0,2,3,9,3,226,0,67,230,97,0,2,3,9,3, -194,0,67,230,65,0,2,3,3,3,226,0,67,230,97,0, -2,3,3,3,160,30,67,230,65,0,35,3,2,3,161,30, -67,230,97,0,35,3,2,3,2,1,67,230,65,0,6,3, -1,3,3,1,67,230,97,0,6,3,1,3,2,1,67,230, -65,0,6,3,0,3,3,1,67,230,97,0,6,3,0,3, -2,1,67,230,65,0,6,3,9,3,3,1,67,230,97,0, -6,3,9,3,2,1,67,230,65,0,6,3,3,3,3,1, -67,230,97,0,6,3,3,3,160,30,67,230,65,0,35,3, -6,3,161,30,67,230,97,0,35,3,6,3,2,230,69,0, -9,3,2,230,101,0,9,3,2,230,69,0,3,3,2,230, -101,0,3,3,202,0,67,230,69,0,2,3,1,3,234,0, -67,230,101,0,2,3,1,3,202,0,67,230,69,0,2,3, -0,3,234,0,67,230,101,0,2,3,0,3,202,0,67,230, -69,0,2,3,9,3,234,0,67,230,101,0,2,3,9,3, -202,0,67,230,69,0,2,3,3,3,234,0,67,230,101,0, -2,3,3,3,184,30,67,230,69,0,35,3,2,3,185,30, -67,230,101,0,35,3,2,3,2,230,73,0,9,3,2,230, -105,0,9,3,2,220,73,0,35,3,2,220,105,0,35,3, -2,230,79,0,9,3,2,230,111,0,9,3,212,0,67,230, -79,0,2,3,1,3,244,0,67,230,111,0,2,3,1,3, -212,0,67,230,79,0,2,3,0,3,244,0,67,230,111,0, -2,3,0,3,212,0,67,230,79,0,2,3,9,3,244,0, -67,230,111,0,2,3,9,3,212,0,67,230,79,0,2,3, -3,3,244,0,67,230,111,0,2,3,3,3,204,30,67,230, -79,0,35,3,2,3,205,30,67,230,111,0,35,3,2,3, -160,1,67,230,79,0,27,3,1,3,161,1,67,230,111,0, -27,3,1,3,160,1,67,230,79,0,27,3,0,3,161,1, -67,230,111,0,27,3,0,3,160,1,67,230,79,0,27,3, -9,3,161,1,67,230,111,0,27,3,9,3,160,1,67,230, -79,0,27,3,3,3,161,1,67,230,111,0,27,3,3,3, -160,1,67,220,79,0,27,3,35,3,161,1,67,220,111,0, -27,3,35,3,2,220,85,0,35,3,2,220,117,0,35,3, -2,230,85,0,9,3,2,230,117,0,9,3,175,1,67,230, -85,0,27,3,1,3,176,1,67,230,117,0,27,3,1,3, -175,1,67,230,85,0,27,3,0,3,176,1,67,230,117,0, -27,3,0,3,175,1,67,230,85,0,27,3,9,3,176,1, -67,230,117,0,27,3,9,3,175,1,67,230,85,0,27,3, -3,3,176,1,67,230,117,0,27,3,3,3,175,1,67,220, -85,0,27,3,35,3,176,1,67,220,117,0,27,3,35,3, -2,230,89,0,0,3,2,230,121,0,0,3,2,220,89,0, -35,3,2,220,121,0,35,3,2,230,89,0,9,3,2,230, -121,0,9,3,2,230,89,0,3,3,2,230,121,0,3,3, -16,31,67,230,181,3,19,3,0,3,17,31,67,230,181,3, -20,3,0,3,16,31,67,230,181,3,19,3,1,3,17,31, -67,230,181,3,20,3,1,3,24,31,67,230,149,3,19,3, -0,3,25,31,67,230,149,3,20,3,0,3,24,31,67,230, -149,3,19,3,1,3,25,31,67,230,149,3,20,3,1,3, -48,31,67,230,185,3,19,3,0,3,49,31,67,230,185,3, -20,3,0,3,48,31,67,230,185,3,19,3,1,3,49,31, -67,230,185,3,20,3,1,3,48,31,67,230,185,3,19,3, -66,3,49,31,67,230,185,3,20,3,66,3,56,31,67,230, -153,3,19,3,0,3,57,31,67,230,153,3,20,3,0,3, -56,31,67,230,153,3,19,3,1,3,57,31,67,230,153,3, -20,3,1,3,56,31,67,230,153,3,19,3,66,3,57,31, -67,230,153,3,20,3,66,3,64,31,67,230,191,3,19,3, -0,3,65,31,67,230,191,3,20,3,0,3,64,31,67,230, -191,3,19,3,1,3,65,31,67,230,191,3,20,3,1,3, -72,31,67,230,159,3,19,3,0,3,73,31,67,230,159,3, -20,3,0,3,72,31,67,230,159,3,19,3,1,3,73,31, -67,230,159,3,20,3,1,3,80,31,67,230,197,3,19,3, -0,3,81,31,67,230,197,3,20,3,0,3,80,31,67,230, -197,3,19,3,1,3,81,31,67,230,197,3,20,3,1,3, -80,31,67,230,197,3,19,3,66,3,81,31,67,230,197,3, -20,3,66,3,89,31,67,230,165,3,20,3,0,3,89,31, -67,230,165,3,20,3,1,3,89,31,67,230,165,3,20,3, -66,3,2,230,181,3,0,3,2,230,185,3,0,3,2,230, -191,3,0,3,2,230,197,3,0,3,0,31,67,240,177,3, -19,3,69,3,1,31,67,240,177,3,20,3,69,3,2,31, -69,3,2,0,68,240,177,3,19,3,0,3,69,3,3,31, -69,3,2,0,68,240,177,3,20,3,0,3,69,3,4,31, -69,3,2,0,68,240,177,3,19,3,1,3,69,3,5,31, -69,3,2,0,68,240,177,3,20,3,1,3,69,3,6,31, -69,3,2,0,68,240,177,3,19,3,66,3,69,3,7,31, -69,3,2,0,68,240,177,3,20,3,66,3,69,3,8,31, -67,240,145,3,19,3,69,3,9,31,67,240,145,3,20,3, -69,3,10,31,69,3,2,0,68,240,145,3,19,3,0,3, -69,3,11,31,69,3,2,0,68,240,145,3,20,3,0,3, -69,3,12,31,69,3,2,0,68,240,145,3,19,3,1,3, -69,3,13,31,69,3,2,0,68,240,145,3,20,3,1,3, -69,3,14,31,69,3,2,0,68,240,145,3,19,3,66,3, -69,3,15,31,69,3,2,0,68,240,145,3,20,3,66,3, -69,3,32,31,67,240,183,3,19,3,69,3,33,31,67,240, -183,3,20,3,69,3,34,31,69,3,2,0,68,240,183,3, -19,3,0,3,69,3,35,31,69,3,2,0,68,240,183,3, -20,3,0,3,69,3,36,31,69,3,2,0,68,240,183,3, -19,3,1,3,69,3,37,31,69,3,2,0,68,240,183,3, -20,3,1,3,69,3,38,31,69,3,2,0,68,240,183,3, -19,3,66,3,69,3,39,31,69,3,2,0,68,240,183,3, -20,3,66,3,69,3,40,31,67,240,151,3,19,3,69,3, -41,31,67,240,151,3,20,3,69,3,42,31,69,3,2,0, -68,240,151,3,19,3,0,3,69,3,43,31,69,3,2,0, -68,240,151,3,20,3,0,3,69,3,44,31,69,3,2,0, -68,240,151,3,19,3,1,3,69,3,45,31,69,3,2,0, -68,240,151,3,20,3,1,3,69,3,46,31,69,3,2,0, -68,240,151,3,19,3,66,3,69,3,47,31,69,3,2,0, -68,240,151,3,20,3,66,3,69,3,96,31,67,240,201,3, -19,3,69,3,97,31,67,240,201,3,20,3,69,3,98,31, -69,3,2,0,68,240,201,3,19,3,0,3,69,3,99,31, -69,3,2,0,68,240,201,3,20,3,0,3,69,3,100,31, -69,3,2,0,68,240,201,3,19,3,1,3,69,3,101,31, -69,3,2,0,68,240,201,3,20,3,1,3,69,3,102,31, -69,3,2,0,68,240,201,3,19,3,66,3,69,3,103,31, -69,3,2,0,68,240,201,3,20,3,66,3,69,3,104,31, -67,240,169,3,19,3,69,3,105,31,67,240,169,3,20,3, -69,3,106,31,69,3,2,0,68,240,169,3,19,3,0,3, -69,3,107,31,69,3,2,0,68,240,169,3,20,3,0,3, -69,3,108,31,69,3,2,0,68,240,169,3,19,3,1,3, -69,3,109,31,69,3,2,0,68,240,169,3,20,3,1,3, -69,3,110,31,69,3,2,0,68,240,169,3,19,3,66,3, -69,3,111,31,69,3,2,0,68,240,169,3,20,3,66,3, -69,3,2,230,177,3,6,3,2,230,177,3,4,3,112,31, -67,240,177,3,0,3,69,3,2,240,177,3,69,3,172,3, -67,240,177,3,1,3,69,3,182,31,67,240,177,3,66,3, -69,3,2,230,145,3,6,3,2,230,145,3,4,3,2,230, -145,3,0,3,2,240,145,3,69,3,116,31,67,240,183,3, -0,3,69,3,2,240,183,3,69,3,174,3,67,240,183,3, -1,3,69,3,198,31,67,240,183,3,66,3,69,3,2,230, -149,3,0,3,2,230,151,3,0,3,2,240,151,3,69,3, -2,230,185,3,6,3,2,230,185,3,4,3,202,3,67,230, -185,3,8,3,0,3,2,230,185,3,66,3,202,3,67,230, -185,3,8,3,66,3,2,230,153,3,6,3,2,230,153,3, -4,3,2,230,153,3,0,3,2,230,197,3,6,3,2,230, -197,3,4,3,203,3,67,230,197,3,8,3,0,3,2,230, -193,3,19,3,2,230,193,3,20,3,2,230,197,3,66,3, -203,3,67,230,197,3,8,3,66,3,2,230,165,3,6,3, -2,230,165,3,4,3,2,230,165,3,0,3,2,230,161,3, -20,3,124,31,67,240,201,3,0,3,69,3,2,240,201,3, -69,3,206,3,67,240,201,3,1,3,69,3,246,31,67,240, -201,3,66,3,69,3,2,230,159,3,0,3,2,230,169,3, -0,3,2,240,169,3,69,3,2,1,144,33,56,3,2,1, -146,33,56,3,2,1,148,33,56,3,2,1,208,33,56,3, -2,1,212,33,56,3,2,1,210,33,56,3,2,1,3,34, -56,3,2,1,8,34,56,3,2,1,11,34,56,3,2,1, -35,34,56,3,2,1,37,34,56,3,2,1,60,34,56,3, -2,1,67,34,56,3,2,1,69,34,56,3,2,1,72,34, -56,3,2,1,61,0,56,3,2,1,97,34,56,3,2,1, -77,34,56,3,2,1,60,0,56,3,2,1,62,0,56,3, -2,1,100,34,56,3,2,1,101,34,56,3,2,1,114,34, -56,3,2,1,115,34,56,3,2,1,118,34,56,3,2,1, -119,34,56,3,2,1,122,34,56,3,2,1,123,34,56,3, -2,1,130,34,56,3,2,1,131,34,56,3,2,1,134,34, -56,3,2,1,135,34,56,3,2,1,162,34,56,3,2,1, -168,34,56,3,2,1,169,34,56,3,2,1,171,34,56,3, -2,1,124,34,56,3,2,1,125,34,56,3,2,1,145,34, -56,3,2,1,146,34,56,3,2,1,178,34,56,3,2,1, -179,34,56,3,2,1,180,34,56,3,2,1,181,34,56,3, -2,8,75,48,153,48,2,8,77,48,153,48,2,8,79,48, -153,48,2,8,81,48,153,48,2,8,83,48,153,48,2,8, -85,48,153,48,2,8,87,48,153,48,2,8,89,48,153,48, -2,8,91,48,153,48,2,8,93,48,153,48,2,8,95,48, -153,48,2,8,97,48,153,48,2,8,100,48,153,48,2,8, -102,48,153,48,2,8,104,48,153,48,2,8,111,48,153,48, -2,8,111,48,154,48,2,8,114,48,153,48,2,8,114,48, -154,48,2,8,117,48,153,48,2,8,117,48,154,48,2,8, -120,48,153,48,2,8,120,48,154,48,2,8,123,48,153,48, -2,8,123,48,154,48,2,8,70,48,153,48,2,8,157,48, -153,48,2,8,171,48,153,48,2,8,173,48,153,48,2,8, -175,48,153,48,2,8,177,48,153,48,2,8,179,48,153,48, -2,8,181,48,153,48,2,8,183,48,153,48,2,8,185,48, -153,48,2,8,187,48,153,48,2,8,189,48,153,48,2,8, -191,48,153,48,2,8,193,48,153,48,2,8,196,48,153,48, -2,8,198,48,153,48,2,8,200,48,153,48,2,8,207,48, -153,48,2,8,207,48,154,48,2,8,210,48,153,48,2,8, -210,48,154,48,2,8,213,48,153,48,2,8,213,48,154,48, -2,8,216,48,153,48,2,8,216,48,154,48,2,8,219,48, -153,48,2,8,219,48,154,48,2,8,166,48,153,48,2,8, -239,48,153,48,2,8,240,48,153,48,2,8,241,48,153,48, -2,8,242,48,153,48,2,8,253,48,153,48,4,7,4,216, -153,220,4,216,186,220,4,7,4,216,155,220,4,216,186,220, -4,7,4,216,165,220,4,216,186,220,4,0,4,216,49,221, -4,216,39,221,4,0,4,216,50,221,4,216,39,221,4,0, -4,216,71,223,4,216,62,223,4,0,4,216,71,223,4,216, -87,223,4,0,5,216,185,220,5,216,186,220,4,0,5,216, -185,220,5,216,176,220,4,0,5,216,185,220,5,216,189,220, -4,0,5,216,184,221,5,216,175,221,4,0,5,216,185,221, -5,216,175,221,1,0,32,0,2,230,32,0,8,3,1,0, -97,0,2,230,32,0,4,3,1,0,50,0,1,0,51,0, -2,230,32,0,1,3,1,0,188,3,2,202,32,0,39,3, -1,0,49,0,1,0,111,0,3,0,49,0,68,32,52,0, -3,0,49,0,68,32,50,0,3,0,51,0,68,32,52,0, -2,0,73,0,74,0,2,0,105,0,106,0,2,0,76,0, -183,0,2,0,108,0,183,0,2,0,188,2,110,0,1,0, -115,0,2,0,76,0,74,0,2,0,76,0,106,0,2,0, -108,0,106,0,2,0,78,0,74,0,2,0,78,0,106,0, -2,0,110,0,106,0,2,0,68,0,90,0,2,0,68,0, -122,0,2,0,100,0,122,0,1,0,104,0,1,0,102,2, -1,0,106,0,1,0,114,0,1,0,119,0,1,0,121,0, -2,230,32,0,6,3,2,230,32,0,7,3,2,230,32,0, -10,3,2,202,32,0,40,3,2,230,32,0,3,3,2,230, -32,0,11,3,1,0,99,2,1,0,108,0,1,0,120,0, -1,0,149,2,1,0,185,2,2,240,32,0,69,3,1,0, -59,0,168,0,67,230,32,0,8,3,1,3,1,0,183,0, -1,0,152,3,1,0,163,3,2,0,101,5,130,5,2,0, -39,6,116,6,2,0,72,6,116,6,2,0,199,6,116,6, -2,0,74,6,116,6,2,7,21,9,60,9,2,7,22,9, -60,9,2,7,23,9,60,9,2,7,28,9,60,9,2,7, -33,9,60,9,2,7,34,9,60,9,2,7,43,9,60,9, -2,7,47,9,60,9,2,7,161,9,188,9,2,7,162,9, -188,9,2,7,175,9,188,9,2,7,50,10,60,10,2,7, -56,10,60,10,2,7,22,10,60,10,2,7,23,10,60,10, -2,7,28,10,60,10,2,7,43,10,60,10,2,7,33,11, -60,11,2,7,34,11,60,11,2,0,77,14,50,14,2,0, -205,14,178,14,2,0,171,14,153,14,2,0,171,14,161,14, -2,0,66,15,183,15,2,0,76,15,183,15,2,0,81,15, -183,15,2,0,86,15,183,15,2,0,91,15,183,15,2,0, -64,15,181,15,2,130,178,15,128,15,178,15,129,15,2,0, -67,130,178,15,113,15,128,15,2,130,179,15,128,15,179,15, -129,15,2,0,67,130,179,15,113,15,128,15,2,0,146,15, -183,15,2,0,156,15,183,15,2,0,161,15,183,15,2,0, -166,15,183,15,2,0,171,15,183,15,2,0,144,15,181,15, -1,0,65,0,1,0,198,0,1,0,66,0,1,0,68,0, -1,0,69,0,1,0,142,1,1,0,71,0,1,0,72,0, -1,0,73,0,1,0,74,0,1,0,75,0,1,0,76,0, -1,0,77,0,1,0,78,0,1,0,79,0,1,0,34,2, -1,0,80,0,1,0,82,0,1,0,84,0,1,0,85,0, -1,0,87,0,1,0,80,2,1,0,81,2,1,0,2,29, -1,0,98,0,1,0,100,0,1,0,101,0,1,0,89,2, -1,0,91,2,1,0,92,2,1,0,103,0,1,0,107,0, -1,0,109,0,1,0,75,1,1,0,84,2,1,0,112,0, -1,0,116,0,1,0,117,0,1,0,111,2,1,0,118,0, -1,0,178,3,1,0,179,3,1,0,180,3,1,0,198,3, -1,0,199,3,1,0,105,0,1,0,193,3,1,0,61,4, -1,0,82,2,1,0,99,0,1,0,85,2,1,0,240,0, -1,0,102,0,1,0,95,2,1,0,97,2,1,0,101,2, -1,0,104,2,1,0,105,2,1,0,106,2,1,0,157,2, -1,0,109,2,1,0,159,2,1,0,113,2,1,0,112,2, -1,0,114,2,1,0,115,2,1,0,116,2,1,0,117,2, -1,0,120,2,1,0,130,2,1,0,131,2,1,0,171,1, -1,0,137,2,1,0,138,2,1,0,28,29,1,0,139,2, -1,0,140,2,1,0,122,0,1,0,144,2,1,0,145,2, -1,0,146,2,1,0,184,3,2,0,97,0,190,2,2,230, -32,0,19,3,1,0,185,3,2,230,32,0,66,3,168,0, -67,230,32,0,8,3,66,3,191,31,67,230,32,0,19,3, -0,3,191,31,67,230,32,0,19,3,1,3,191,31,67,230, -32,0,19,3,66,3,254,31,67,230,32,0,20,3,0,3, -254,31,67,230,32,0,20,3,1,3,254,31,67,230,32,0, -20,3,66,3,168,0,67,230,32,0,8,3,0,3,133,3, -1,0,67,230,32,0,8,3,1,3,1,0,96,0,180,0, -66,230,32,0,1,3,2,230,32,0,20,3,2,32,1,0, -65,0,32,0,3,32,1,0,65,0,32,0,2,220,32,0, -51,3,1,0,46,0,2,0,46,0,46,0,3,0,46,0, -46,0,46,0,2,0,50,32,50,32,3,0,50,32,50,32, -50,32,2,0,53,32,53,32,3,0,53,32,53,32,53,32, -2,0,33,0,33,0,2,230,32,0,5,3,2,0,63,0, -63,0,2,0,63,0,33,0,2,0,33,0,63,0,4,0, -50,32,50,32,50,32,50,32,1,0,48,0,1,0,52,0, -1,0,53,0,1,0,54,0,1,0,55,0,1,0,56,0, -1,0,57,0,1,0,43,0,1,0,18,34,1,0,61,0, -1,0,40,0,1,0,41,0,1,0,110,0,2,0,82,0, -115,0,3,0,97,0,47,0,99,0,3,0,97,0,47,0, -115,0,1,0,67,0,2,0,176,0,67,0,3,0,99,0, -47,0,111,0,3,0,99,0,47,0,117,0,1,0,144,1, -2,0,176,0,70,0,1,0,39,1,2,0,78,0,111,0, -1,0,81,0,2,0,83,0,77,0,3,0,84,0,69,0, -76,0,2,0,84,0,77,0,1,0,90,0,1,0,169,3, -1,0,70,0,1,0,208,5,1,0,209,5,1,0,210,5, -1,0,211,5,3,0,70,0,65,0,88,0,1,0,192,3, -1,0,147,3,1,0,160,3,1,0,17,34,3,0,49,0, -68,32,55,0,3,0,49,0,68,32,57,0,4,0,49,0, -68,32,49,0,48,0,3,0,49,0,68,32,51,0,3,0, -50,0,68,32,51,0,3,0,49,0,68,32,53,0,3,0, -50,0,68,32,53,0,3,0,51,0,68,32,53,0,3,0, -52,0,68,32,53,0,3,0,49,0,68,32,54,0,3,0, -53,0,68,32,54,0,3,0,49,0,68,32,56,0,3,0, -51,0,68,32,56,0,3,0,53,0,68,32,56,0,3,0, -55,0,68,32,56,0,2,0,49,0,68,32,2,0,73,0, -73,0,3,0,73,0,73,0,73,0,2,0,73,0,86,0, -1,0,86,0,2,0,86,0,73,0,3,0,86,0,73,0, -73,0,4,0,86,0,73,0,73,0,73,0,2,0,73,0, -88,0,1,0,88,0,2,0,88,0,73,0,3,0,88,0, -73,0,73,0,2,0,105,0,105,0,3,0,105,0,105,0, -105,0,2,0,105,0,118,0,2,0,118,0,105,0,3,0, -118,0,105,0,105,0,4,0,118,0,105,0,105,0,105,0, -2,0,105,0,120,0,2,0,120,0,105,0,3,0,120,0, -105,0,105,0,3,0,48,0,68,32,51,0,2,0,43,34, -43,34,3,0,43,34,43,34,43,34,2,0,46,34,46,34, -3,0,46,34,46,34,46,34,1,0,8,48,1,0,9,48, -2,0,49,0,48,0,2,0,49,0,49,0,2,0,49,0, -50,0,2,0,49,0,51,0,2,0,49,0,52,0,2,0, -49,0,53,0,2,0,49,0,54,0,2,0,49,0,55,0, -2,0,49,0,56,0,2,0,49,0,57,0,2,0,50,0, -48,0,3,0,40,0,49,0,41,0,3,0,40,0,50,0, -41,0,3,0,40,0,51,0,41,0,3,0,40,0,52,0, -41,0,3,0,40,0,53,0,41,0,3,0,40,0,54,0, -41,0,3,0,40,0,55,0,41,0,3,0,40,0,56,0, -41,0,3,0,40,0,57,0,41,0,4,0,40,0,49,0, -48,0,41,0,4,0,40,0,49,0,49,0,41,0,4,0, -40,0,49,0,50,0,41,0,4,0,40,0,49,0,51,0, -41,0,4,0,40,0,49,0,52,0,41,0,4,0,40,0, -49,0,53,0,41,0,4,0,40,0,49,0,54,0,41,0, -4,0,40,0,49,0,55,0,41,0,4,0,40,0,49,0, -56,0,41,0,4,0,40,0,49,0,57,0,41,0,4,0, -40,0,50,0,48,0,41,0,2,0,49,0,46,0,2,0, -50,0,46,0,2,0,51,0,46,0,2,0,52,0,46,0, -2,0,53,0,46,0,2,0,54,0,46,0,2,0,55,0, -46,0,2,0,56,0,46,0,2,0,57,0,46,0,3,0, -49,0,48,0,46,0,3,0,49,0,49,0,46,0,3,0, -49,0,50,0,46,0,3,0,49,0,51,0,46,0,3,0, -49,0,52,0,46,0,3,0,49,0,53,0,46,0,3,0, -49,0,54,0,46,0,3,0,49,0,55,0,46,0,3,0, -49,0,56,0,46,0,3,0,49,0,57,0,46,0,3,0, -50,0,48,0,46,0,3,0,40,0,97,0,41,0,3,0, -40,0,98,0,41,0,3,0,40,0,99,0,41,0,3,0, -40,0,100,0,41,0,3,0,40,0,101,0,41,0,3,0, -40,0,102,0,41,0,3,0,40,0,103,0,41,0,3,0, -40,0,104,0,41,0,3,0,40,0,105,0,41,0,3,0, -40,0,106,0,41,0,3,0,40,0,107,0,41,0,3,0, -40,0,108,0,41,0,3,0,40,0,109,0,41,0,3,0, -40,0,110,0,41,0,3,0,40,0,111,0,41,0,3,0, -40,0,112,0,41,0,3,0,40,0,113,0,41,0,3,0, -40,0,114,0,41,0,3,0,40,0,115,0,41,0,3,0, -40,0,116,0,41,0,3,0,40,0,117,0,41,0,3,0, -40,0,118,0,41,0,3,0,40,0,119,0,41,0,3,0, -40,0,120,0,41,0,3,0,40,0,121,0,41,0,3,0, -40,0,122,0,41,0,1,0,83,0,1,0,89,0,1,0, -113,0,4,0,43,34,43,34,43,34,43,34,3,0,58,0, -58,0,61,0,2,0,61,0,61,0,3,0,61,0,61,0, -61,0,2,1,221,42,56,3,1,0,205,107,1,0,159,159, -1,0,0,78,1,0,40,78,1,0,54,78,1,0,63,78, -1,0,89,78,1,0,133,78,1,0,140,78,1,0,160,78, -1,0,186,78,1,0,63,81,1,0,101,81,1,0,107,81, -1,0,130,81,1,0,150,81,1,0,171,81,1,0,224,81, -1,0,245,81,1,0,0,82,1,0,155,82,1,0,249,82, -1,0,21,83,1,0,26,83,1,0,56,83,1,0,65,83, -1,0,92,83,1,0,105,83,1,0,130,83,1,0,182,83, -1,0,200,83,1,0,227,83,1,0,215,86,1,0,31,87, -1,0,235,88,1,0,2,89,1,0,10,89,1,0,21,89, -1,0,39,89,1,0,115,89,1,0,80,91,1,0,128,91, -1,0,248,91,1,0,15,92,1,0,34,92,1,0,56,92, -1,0,110,92,1,0,113,92,1,0,219,93,1,0,229,93, -1,0,241,93,1,0,254,93,1,0,114,94,1,0,122,94, -1,0,127,94,1,0,244,94,1,0,254,94,1,0,11,95, -1,0,19,95,1,0,80,95,1,0,97,95,1,0,115,95, -1,0,195,95,1,0,8,98,1,0,54,98,1,0,75,98, -1,0,47,101,1,0,52,101,1,0,135,101,1,0,151,101, -1,0,164,101,1,0,185,101,1,0,224,101,1,0,229,101, -1,0,240,102,1,0,8,103,1,0,40,103,1,0,32,107, -1,0,98,107,1,0,121,107,1,0,179,107,1,0,203,107, -1,0,212,107,1,0,219,107,1,0,15,108,1,0,20,108, -1,0,52,108,1,0,107,112,1,0,42,114,1,0,54,114, -1,0,59,114,1,0,63,114,1,0,71,114,1,0,89,114, -1,0,91,114,1,0,172,114,1,0,132,115,1,0,137,115, -1,0,220,116,1,0,230,116,1,0,24,117,1,0,31,117, -1,0,40,117,1,0,48,117,1,0,139,117,1,0,146,117, -1,0,118,118,1,0,125,118,1,0,174,118,1,0,191,118, -1,0,238,118,1,0,219,119,1,0,226,119,1,0,243,119, -1,0,58,121,1,0,184,121,1,0,190,121,1,0,116,122, -1,0,203,122,1,0,249,122,1,0,115,124,1,0,248,124, -1,0,54,127,1,0,81,127,1,0,138,127,1,0,189,127, -1,0,1,128,1,0,12,128,1,0,18,128,1,0,51,128, -1,0,127,128,1,0,137,128,1,0,227,129,1,0,234,129, -1,0,243,129,1,0,252,129,1,0,12,130,1,0,27,130, -1,0,31,130,1,0,110,130,1,0,114,130,1,0,120,130, -1,0,77,134,1,0,107,134,1,0,64,136,1,0,76,136, -1,0,99,136,1,0,126,137,1,0,139,137,1,0,210,137, -1,0,0,138,1,0,55,140,1,0,70,140,1,0,85,140, -1,0,120,140,1,0,157,140,1,0,100,141,1,0,112,141, -1,0,179,141,1,0,171,142,1,0,202,142,1,0,155,143, -1,0,176,143,1,0,181,143,1,0,145,144,1,0,73,145, -1,0,198,145,1,0,204,145,1,0,209,145,1,0,119,149, -1,0,128,149,1,0,28,150,1,0,182,150,1,0,185,150, -1,0,232,150,1,0,81,151,1,0,94,151,1,0,98,151, -1,0,105,151,1,0,203,151,1,0,237,151,1,0,243,151, -1,0,1,152,1,0,168,152,1,0,219,152,1,0,223,152, -1,0,150,153,1,0,153,153,1,0,172,153,1,0,168,154, -1,0,216,154,1,0,223,154,1,0,37,155,1,0,47,155, -1,0,50,155,1,0,60,155,1,0,90,155,1,0,229,156, -1,0,117,158,1,0,127,158,1,0,165,158,1,0,187,158, -1,0,195,158,1,0,205,158,1,0,209,158,1,0,249,158, -1,0,253,158,1,0,14,159,1,0,19,159,1,0,32,159, -1,0,59,159,1,0,74,159,1,0,82,159,1,0,141,159, -1,0,156,159,1,0,160,159,1,0,68,83,1,0,69,83, -2,8,32,0,153,48,2,8,32,0,154,48,2,0,136,48, -138,48,2,0,179,48,200,48,1,0,0,17,1,0,1,17, -1,0,2,17,1,0,3,17,1,0,4,17,1,0,5,17, -1,0,26,17,1,0,6,17,1,0,7,17,1,0,8,17, -1,0,33,17,1,0,9,17,1,0,10,17,1,0,11,17, -1,0,12,17,1,0,13,17,1,0,14,17,1,0,15,17, -1,0,16,17,1,0,17,17,1,0,18,17,1,0,96,17, -1,0,20,17,1,0,21,17,1,0,199,17,1,0,200,17, -1,0,204,17,1,0,206,17,1,0,211,17,1,0,215,17, -1,0,217,17,1,0,28,17,1,0,221,17,1,0,223,17, -1,0,29,17,1,0,30,17,1,0,32,17,1,0,34,17, -1,0,35,17,1,0,39,17,1,0,41,17,1,0,43,17, -1,0,44,17,1,0,45,17,1,0,46,17,1,0,47,17, -1,0,50,17,1,0,54,17,1,0,64,17,1,0,71,17, -1,0,76,17,1,0,241,17,1,0,242,17,1,0,87,17, -1,0,88,17,1,0,89,17,1,0,132,17,1,0,133,17, -1,0,136,17,1,0,145,17,1,0,146,17,1,0,148,17, -1,0,158,17,1,0,161,17,1,0,9,78,1,0,219,86, -1,0,10,78,1,0,45,78,1,0,11,78,1,0,50,117, -1,0,25,78,1,0,1,78,1,0,41,89,1,0,48,87, -3,0,40,0,0,17,41,0,3,0,40,0,2,17,41,0, -3,0,40,0,3,17,41,0,3,0,40,0,5,17,41,0, -3,0,40,0,6,17,41,0,3,0,40,0,7,17,41,0, -3,0,40,0,9,17,41,0,3,0,40,0,11,17,41,0, -3,0,40,0,12,17,41,0,3,0,40,0,14,17,41,0, -3,0,40,0,15,17,41,0,3,0,40,0,16,17,41,0, -3,0,40,0,17,17,41,0,3,0,40,0,18,17,41,0, -3,0,40,0,0,78,41,0,3,0,40,0,140,78,41,0, -3,0,40,0,9,78,41,0,3,0,40,0,219,86,41,0, -3,0,40,0,148,78,41,0,3,0,40,0,109,81,41,0, -3,0,40,0,3,78,41,0,3,0,40,0,107,81,41,0, -3,0,40,0,93,78,41,0,3,0,40,0,65,83,41,0, -3,0,40,0,8,103,41,0,3,0,40,0,107,112,41,0, -3,0,40,0,52,108,41,0,3,0,40,0,40,103,41,0, -3,0,40,0,209,145,41,0,3,0,40,0,31,87,41,0, -3,0,40,0,229,101,41,0,3,0,40,0,42,104,41,0, -3,0,40,0,9,103,41,0,3,0,40,0,62,121,41,0, -3,0,40,0,13,84,41,0,3,0,40,0,121,114,41,0, -3,0,40,0,161,140,41,0,3,0,40,0,93,121,41,0, -3,0,40,0,180,82,41,0,3,0,40,0,227,78,41,0, -3,0,40,0,124,84,41,0,3,0,40,0,102,91,41,0, -3,0,40,0,227,118,41,0,3,0,40,0,1,79,41,0, -3,0,40,0,199,140,41,0,3,0,40,0,84,83,41,0, -3,0,40,0,109,121,41,0,3,0,40,0,17,79,41,0, -3,0,40,0,234,129,41,0,3,0,40,0,243,129,41,0, -1,0,79,85,1,0,124,94,1,0,143,123,3,0,80,0, -84,0,69,0,2,0,50,0,49,0,2,0,50,0,50,0, -2,0,50,0,51,0,2,0,50,0,52,0,2,0,50,0, -53,0,2,0,50,0,54,0,2,0,50,0,55,0,2,0, -50,0,56,0,2,0,50,0,57,0,2,0,51,0,48,0, -2,0,51,0,49,0,2,0,51,0,50,0,2,0,51,0, -51,0,2,0,51,0,52,0,2,0,51,0,53,0,1,0, -148,78,1,0,109,81,1,0,3,78,1,0,93,78,1,0, -42,104,1,0,9,103,1,0,62,121,1,0,13,84,1,0, -121,114,1,0,161,140,1,0,93,121,1,0,180,82,1,0, -216,121,1,0,55,117,1,0,105,144,1,0,42,81,1,0, -112,83,1,0,232,108,1,0,5,152,1,0,17,79,1,0, -153,81,1,0,99,107,1,0,230,93,1,0,243,83,1,0, -59,83,1,0,151,91,1,0,102,91,1,0,227,118,1,0, -1,79,1,0,199,140,1,0,84,83,1,0,28,89,2,0, -51,0,54,0,2,0,51,0,55,0,2,0,51,0,56,0, -2,0,51,0,57,0,2,0,52,0,48,0,2,0,52,0, -49,0,2,0,52,0,50,0,2,0,52,0,51,0,2,0, -52,0,52,0,2,0,52,0,53,0,2,0,52,0,54,0, -2,0,52,0,55,0,2,0,52,0,56,0,2,0,52,0, -57,0,2,0,53,0,48,0,2,0,49,0,8,103,2,0, -50,0,8,103,2,0,51,0,8,103,2,0,52,0,8,103, -2,0,53,0,8,103,2,0,54,0,8,103,2,0,55,0, -8,103,2,0,56,0,8,103,2,0,57,0,8,103,3,0, -49,0,48,0,8,103,3,0,49,0,49,0,8,103,3,0, -49,0,50,0,8,103,2,0,72,0,103,0,3,0,101,0, -114,0,103,0,2,0,101,0,86,0,3,0,76,0,84,0, -68,0,1,0,162,48,1,0,164,48,1,0,166,48,1,0, -168,48,1,0,170,48,1,0,171,48,1,0,173,48,1,0, -175,48,1,0,177,48,1,0,179,48,1,0,181,48,1,0, -183,48,1,0,185,48,1,0,187,48,1,0,189,48,1,0, -191,48,1,0,193,48,1,0,196,48,1,0,198,48,1,0, -200,48,1,0,202,48,1,0,203,48,1,0,204,48,1,0, -205,48,1,0,206,48,1,0,207,48,1,0,210,48,1,0, -213,48,1,0,216,48,1,0,219,48,1,0,222,48,1,0, -223,48,1,0,224,48,1,0,225,48,1,0,226,48,1,0, -228,48,1,0,230,48,1,0,232,48,1,0,233,48,1,0, -234,48,1,0,235,48,1,0,236,48,1,0,237,48,1,0, -239,48,1,0,240,48,1,0,241,48,1,0,242,48,2,0, -228,78,140,84,4,0,162,48,235,48,213,48,161,48,3,0, -162,48,252,48,235,48,3,0,164,48,243,48,193,48,3,0, -166,48,169,48,243,48,4,0,168,48,252,48,171,48,252,48, -3,0,170,48,243,48,185,48,3,0,170,48,252,48,224,48, -3,0,171,48,164,48,234,48,4,0,171,48,233,48,195,48, -200,48,4,0,171,48,237,48,234,48,252,48,4,0,173,48, -229,48,234,48,252,48,2,0,173,48,237,48,6,0,173,48, -237,48,225,48,252,48,200,48,235,48,5,0,173,48,237,48, -239,48,195,48,200,48,4,0,175,48,237,48,252,48,205,48, -3,0,177,48,252,48,185,48,3,0,179,48,235,48,202,48, -4,0,181,48,164,48,175,48,235,48,5,0,181,48,243,48, -193,48,252,48,224,48,3,0,187,48,243,48,193,48,3,0, -187,48,243,48,200,48,2,0,200,48,243,48,2,0,202,48, -206,48,3,0,206,48,195,48,200,48,3,0,207,48,164,48, -196,48,4,0,213,48,163,48,252,48,200,48,3,0,213,48, -233,48,243,48,5,0,216,48,175,48,191,48,252,48,235,48, -3,0,216,48,235,48,196,48,2,0,219,48,243,48,3,0, -219,48,252,48,235,48,3,0,219,48,252,48,243,48,4,0, -222,48,164,48,175,48,237,48,3,0,222,48,164,48,235,48, -3,0,222,48,195,48,207,48,3,0,222,48,235,48,175,48, -5,0,222,48,243,48,183,48,231,48,243,48,4,0,223,48, -175,48,237,48,243,48,2,0,223,48,234,48,4,0,225,48, -252,48,200,48,235,48,3,0,228,48,252,48,235,48,3,0, -230,48,162,48,243,48,4,0,234,48,195,48,200,48,235,48, -2,0,234,48,233,48,2,0,236,48,224,48,3,0,239,48, -195,48,200,48,2,0,48,0,185,112,2,0,49,0,185,112, -2,0,50,0,185,112,2,0,51,0,185,112,2,0,52,0, -185,112,2,0,53,0,185,112,2,0,54,0,185,112,2,0, -55,0,185,112,2,0,56,0,185,112,2,0,57,0,185,112, -3,0,49,0,48,0,185,112,3,0,49,0,49,0,185,112, -3,0,49,0,50,0,185,112,3,0,49,0,51,0,185,112, -3,0,49,0,52,0,185,112,3,0,49,0,53,0,185,112, -3,0,49,0,54,0,185,112,3,0,49,0,55,0,185,112, -3,0,49,0,56,0,185,112,3,0,49,0,57,0,185,112, -3,0,50,0,48,0,185,112,3,0,50,0,49,0,185,112, -3,0,50,0,50,0,185,112,3,0,50,0,51,0,185,112, -3,0,50,0,52,0,185,112,3,0,104,0,80,0,97,0, -2,0,100,0,97,0,2,0,65,0,85,0,3,0,98,0, -97,0,114,0,2,0,111,0,86,0,2,0,112,0,99,0, -2,0,100,0,109,0,100,0,109,0,178,0,3,0,67,0, -100,0,109,0,50,0,100,0,109,0,179,0,3,0,67,0, -100,0,109,0,51,0,2,0,73,0,85,0,2,0,115,94, -16,98,2,0,45,102,140,84,2,0,39,89,99,107,2,0, -14,102,187,108,4,0,42,104,15,95,26,79,62,121,2,0, -112,0,65,0,2,0,110,0,65,0,2,0,188,3,65,0, -2,0,109,0,65,0,2,0,107,0,65,0,2,0,75,0, -66,0,2,0,77,0,66,0,2,0,71,0,66,0,3,0, -99,0,97,0,108,0,4,0,107,0,99,0,97,0,108,0, -2,0,112,0,70,0,2,0,110,0,70,0,2,0,188,3, -70,0,2,0,188,3,103,0,2,0,109,0,103,0,2,0, -107,0,103,0,2,0,72,0,122,0,3,0,107,0,72,0, -122,0,3,0,77,0,72,0,122,0,3,0,71,0,72,0, -122,0,3,0,84,0,72,0,122,0,188,3,19,33,2,0, -66,0,188,3,108,0,109,0,19,33,2,0,66,0,109,0, -108,0,100,0,19,33,2,0,66,0,100,0,108,0,107,0, -19,33,2,0,66,0,107,0,108,0,2,0,102,0,109,0, -2,0,110,0,109,0,2,0,188,3,109,0,2,0,109,0, -109,0,2,0,99,0,109,0,2,0,107,0,109,0,109,0, -109,0,178,0,3,0,67,0,109,0,109,0,50,0,99,0, -109,0,178,0,3,0,67,0,99,0,109,0,50,0,109,0, -178,0,2,0,66,0,109,0,50,0,107,0,109,0,178,0, -3,0,67,0,107,0,109,0,50,0,109,0,109,0,179,0, -3,0,67,0,109,0,109,0,51,0,99,0,109,0,179,0, -3,0,67,0,99,0,109,0,51,0,109,0,179,0,2,0, -66,0,109,0,51,0,107,0,109,0,179,0,3,0,67,0, -107,0,109,0,51,0,3,0,109,0,21,34,115,0,109,0, -21,34,115,0,178,0,4,0,68,0,109,0,21,34,115,0, -50,0,2,0,80,0,97,0,3,0,107,0,80,0,97,0, -3,0,77,0,80,0,97,0,3,0,71,0,80,0,97,0, -3,0,114,0,97,0,100,0,5,0,114,0,97,0,100,0, -21,34,115,0,114,0,97,0,100,0,21,34,115,0,178,0, -6,0,70,0,114,0,97,0,100,0,21,34,115,0,50,0, -2,0,112,0,115,0,2,0,110,0,115,0,2,0,188,3, -115,0,2,0,109,0,115,0,2,0,112,0,86,0,2,0, -110,0,86,0,2,0,188,3,86,0,2,0,109,0,86,0, -2,0,107,0,86,0,2,0,77,0,86,0,2,0,112,0, -87,0,2,0,110,0,87,0,2,0,188,3,87,0,2,0, -109,0,87,0,2,0,107,0,87,0,2,0,77,0,87,0, -2,0,107,0,169,3,2,0,77,0,169,3,4,0,97,0, -46,0,109,0,46,0,2,0,66,0,113,0,2,0,99,0, -99,0,2,0,99,0,100,0,4,0,67,0,21,34,107,0, -103,0,3,0,67,0,111,0,46,0,2,0,100,0,66,0, -2,0,71,0,121,0,2,0,104,0,97,0,2,0,72,0, -80,0,2,0,105,0,110,0,2,0,75,0,75,0,2,0, -75,0,77,0,2,0,107,0,116,0,2,0,108,0,109,0, -2,0,108,0,110,0,3,0,108,0,111,0,103,0,2,0, -108,0,120,0,2,0,109,0,98,0,3,0,109,0,105,0, -108,0,3,0,109,0,111,0,108,0,2,0,80,0,72,0, -4,0,112,0,46,0,109,0,46,0,3,0,80,0,80,0, -77,0,2,0,80,0,82,0,2,0,115,0,114,0,2,0, -83,0,118,0,2,0,87,0,98,0,3,0,86,0,21,34, -109,0,3,0,65,0,21,34,109,0,2,0,49,0,229,101, -2,0,50,0,229,101,2,0,51,0,229,101,2,0,52,0, -229,101,2,0,53,0,229,101,2,0,54,0,229,101,2,0, -55,0,229,101,2,0,56,0,229,101,2,0,57,0,229,101, -3,0,49,0,48,0,229,101,3,0,49,0,49,0,229,101, -3,0,49,0,50,0,229,101,3,0,49,0,51,0,229,101, -3,0,49,0,52,0,229,101,3,0,49,0,53,0,229,101, -3,0,49,0,54,0,229,101,3,0,49,0,55,0,229,101, -3,0,49,0,56,0,229,101,3,0,49,0,57,0,229,101, -3,0,50,0,48,0,229,101,3,0,50,0,49,0,229,101, -3,0,50,0,50,0,229,101,3,0,50,0,51,0,229,101, -3,0,50,0,52,0,229,101,3,0,50,0,53,0,229,101, -3,0,50,0,54,0,229,101,3,0,50,0,55,0,229,101, -3,0,50,0,56,0,229,101,3,0,50,0,57,0,229,101, -3,0,51,0,48,0,229,101,3,0,51,0,49,0,229,101, -3,0,103,0,97,0,108,0,1,0,74,4,1,0,76,4, -1,0,38,1,1,0,83,1,1,0,39,167,1,0,107,2, -1,0,72,140,1,0,244,102,1,0,200,140,1,0,209,110, -1,0,50,78,1,0,229,83,1,0,81,89,1,0,135,85, -1,0,72,89,1,0,246,97,1,0,105,118,1,0,133,127, -1,0,63,134,1,0,186,135,1,0,248,136,1,0,143,144, -1,0,2,106,1,0,27,109,1,0,217,112,1,0,222,115, -1,0,61,132,1,0,106,145,1,0,241,153,1,0,130,78, -1,0,117,83,1,0,4,107,1,0,27,114,1,0,45,134, -1,0,30,158,1,0,80,93,1,0,235,111,1,0,205,133, -1,0,100,137,1,0,201,98,1,0,216,129,1,0,31,136, -1,0,202,94,1,0,23,103,1,0,106,109,1,0,252,114, -1,0,206,144,1,0,134,79,1,0,183,81,1,0,222,82, -1,0,196,100,1,0,211,106,1,0,16,114,1,0,231,118, -1,0,6,134,1,0,92,134,1,0,239,141,1,0,50,151, -1,0,111,155,1,0,250,157,1,0,140,120,1,0,127,121, -1,0,160,125,1,0,201,131,1,0,4,147,1,0,214,138, -1,0,223,88,1,0,4,95,1,0,96,124,1,0,126,128, -1,0,98,114,1,0,202,120,1,0,194,140,1,0,247,150, -1,0,216,88,1,0,98,92,1,0,19,106,1,0,218,109, -1,0,15,111,1,0,47,125,1,0,55,126,1,0,75,150, -1,0,210,82,1,0,139,128,1,0,220,81,1,0,204,81, -1,0,28,122,1,0,190,125,1,0,241,131,1,0,117,150, -1,0,128,139,1,0,207,98,1,0,254,138,1,0,57,78, -1,0,231,91,1,0,18,96,1,0,135,115,1,0,112,117, -1,0,23,83,1,0,251,120,1,0,191,79,1,0,169,95, -1,0,13,78,1,0,204,108,1,0,120,101,1,0,34,125, -1,0,195,83,1,0,94,88,1,0,1,119,1,0,73,132, -1,0,170,138,1,0,186,107,1,0,136,108,1,0,254,98, -1,0,229,130,1,0,160,99,1,0,101,117,1,0,174,78, -1,0,105,81,1,0,201,81,1,0,129,104,1,0,231,124, -1,0,111,130,1,0,210,138,1,0,207,145,1,0,245,82, -1,0,66,84,1,0,236,94,1,0,197,101,1,0,254,111, -1,0,42,121,1,0,173,149,1,0,106,154,1,0,151,158, -1,0,206,158,1,0,198,102,1,0,119,107,1,0,98,143, -1,0,116,94,1,0,144,97,1,0,0,98,1,0,154,100, -1,0,35,111,1,0,73,113,1,0,137,116,1,0,202,121, -1,0,244,125,1,0,111,128,1,0,38,143,1,0,238,132, -1,0,35,144,1,0,74,147,1,0,23,82,1,0,163,82, -1,0,189,84,1,0,200,112,1,0,194,136,1,0,201,94, -1,0,245,95,1,0,123,99,1,0,174,107,1,0,62,124, -1,0,117,115,1,0,228,78,1,0,249,86,1,0,186,93, -1,0,28,96,1,0,178,115,1,0,105,116,1,0,154,127, -1,0,70,128,1,0,52,146,1,0,246,150,1,0,72,151, -1,0,24,152,1,0,139,79,1,0,174,121,1,0,180,145, -1,0,184,150,1,0,225,96,1,0,134,78,1,0,218,80, -1,0,238,91,1,0,63,92,1,0,153,101,1,0,206,113, -1,0,66,118,1,0,252,132,1,0,124,144,1,0,136,102, -1,0,46,150,1,0,137,82,1,0,123,103,1,0,243,103, -1,0,65,109,1,0,156,110,1,0,9,116,1,0,89,117, -1,0,107,120,1,0,16,125,1,0,94,152,1,0,46,98, -1,0,120,150,1,0,43,80,1,0,25,93,1,0,234,109, -1,0,42,143,1,0,139,95,1,0,68,97,1,0,23,104, -1,0,134,150,1,0,41,82,1,0,15,84,1,0,101,92, -1,0,19,102,1,0,78,103,1,0,168,104,1,0,229,108, -1,0,6,116,1,0,226,117,1,0,121,127,1,0,207,136, -1,0,225,136,1,0,226,150,1,0,63,83,1,0,186,110, -1,0,29,84,1,0,208,113,1,0,152,116,1,0,250,133, -1,0,163,150,1,0,87,156,1,0,159,158,1,0,151,103, -1,0,203,109,1,0,232,129,1,0,32,123,1,0,146,124, -1,0,192,114,1,0,153,112,1,0,88,139,1,0,192,78, -1,0,54,131,1,0,58,82,1,0,7,82,1,0,166,94, -1,0,211,98,1,0,214,124,1,0,133,91,1,0,30,109, -1,0,180,102,1,0,59,143,1,0,77,150,1,0,211,94, -1,0,64,81,1,0,192,85,1,0,90,88,1,0,116,102, -1,0,222,81,1,0,42,115,1,0,202,118,1,0,60,121, -1,0,94,121,1,0,101,121,1,0,143,121,1,0,86,151, -1,0,190,124,1,0,18,134,1,0,248,138,1,0,56,144, -1,0,253,144,1,0,239,152,1,0,252,152,1,0,40,153, -1,0,180,157,1,0,222,144,1,0,183,150,1,0,174,79, -1,0,231,80,1,0,77,81,1,0,201,82,1,0,228,82, -1,0,81,83,1,0,157,85,1,0,6,86,1,0,104,86, -1,0,64,88,1,0,168,88,1,0,100,92,1,0,148,96, -1,0,104,97,1,0,142,97,1,0,242,97,1,0,79,101, -1,0,226,101,1,0,145,102,1,0,133,104,1,0,119,109, -1,0,26,110,1,0,34,111,1,0,110,113,1,0,43,114, -1,0,34,116,1,0,145,120,1,0,73,121,1,0,72,121, -1,0,80,121,1,0,86,121,1,0,141,121,1,0,142,121, -1,0,64,122,1,0,129,122,1,0,192,123,1,0,9,126, -1,0,65,126,1,0,114,127,1,0,5,128,1,0,237,129, -1,0,121,130,1,0,87,132,1,0,16,137,1,0,150,137, -1,0,1,139,1,0,57,139,1,0,211,140,1,0,8,141, -1,0,182,143,1,0,227,150,1,0,255,151,1,0,59,152, -1,0,117,96,2,0,80,216,238,222,1,0,24,130,1,0, -38,78,1,0,181,81,1,0,104,81,1,0,128,79,1,0, -69,81,1,0,128,81,1,0,199,82,1,0,250,82,1,0, -85,85,1,0,153,85,1,0,226,85,1,0,179,88,1,0, -68,89,1,0,84,89,1,0,98,90,1,0,40,91,1,0, -210,94,1,0,217,94,1,0,105,95,1,0,173,95,1,0, -216,96,1,0,78,97,1,0,8,97,1,0,96,97,1,0, -52,98,1,0,196,99,1,0,28,100,1,0,82,100,1,0, -86,101,1,0,27,103,1,0,86,103,1,0,219,110,1,0, -203,110,1,0,30,112,1,0,167,119,1,0,53,114,1,0, -175,114,1,0,113,116,1,0,6,117,1,0,59,117,1,0, -29,118,1,0,31,118,1,0,219,118,1,0,244,118,1,0, -74,119,1,0,64,119,1,0,204,120,1,0,177,122,1,0, -123,124,1,0,91,125,1,0,62,127,1,0,82,131,1,0, -239,131,1,0,121,135,1,0,65,137,1,0,134,137,1,0, -191,138,1,0,203,138,1,0,237,138,1,0,138,139,1,0, -56,143,1,0,114,144,1,0,153,145,1,0,118,146,1,0, -124,150,1,0,219,151,1,0,11,152,1,0,18,155,2,0, -74,216,74,220,2,0,74,216,68,220,2,0,76,216,213,223, -1,0,157,59,1,0,24,64,1,0,57,64,2,0,84,216, -73,222,2,0,87,216,208,220,2,0,95,216,211,222,1,0, -67,159,1,0,142,159,2,0,102,0,102,0,2,0,102,0, -105,0,2,0,102,0,108,0,3,0,102,0,102,0,105,0, -3,0,102,0,102,0,108,0,127,1,116,0,2,0,66,0, -115,0,116,0,2,0,115,0,116,0,2,0,116,5,118,5, -2,0,116,5,101,5,2,0,116,5,107,5,2,0,126,5, -118,5,2,0,116,5,109,5,2,14,217,5,180,5,2,17, -242,5,183,5,1,0,226,5,1,0,212,5,1,0,219,5, -1,0,220,5,1,0,221,5,1,0,232,5,1,0,234,5, -2,24,233,5,193,5,2,25,233,5,194,5,73,251,67,24, -233,5,188,5,193,5,73,251,67,25,233,5,188,5,194,5, -2,17,208,5,183,5,2,18,208,5,184,5,2,21,208,5, -188,5,2,21,209,5,188,5,2,21,210,5,188,5,2,21, -211,5,188,5,2,21,212,5,188,5,2,21,213,5,188,5, -2,21,214,5,188,5,2,21,216,5,188,5,2,21,217,5, -188,5,2,21,218,5,188,5,2,21,219,5,188,5,2,21, -220,5,188,5,2,21,222,5,188,5,2,21,224,5,188,5, -2,21,225,5,188,5,2,21,227,5,188,5,2,21,228,5, -188,5,2,21,230,5,188,5,2,21,231,5,188,5,2,21, -232,5,188,5,2,21,233,5,188,5,2,21,234,5,188,5, -2,19,213,5,185,5,2,23,209,5,191,5,2,23,219,5, -191,5,2,23,228,5,191,5,2,0,208,5,220,5,1,0, -113,6,1,0,123,6,1,0,126,6,1,0,128,6,1,0, -122,6,1,0,127,6,1,0,121,6,1,0,164,6,1,0, -166,6,1,0,132,6,1,0,131,6,1,0,134,6,1,0, -135,6,1,0,141,6,1,0,140,6,1,0,142,6,1,0, -136,6,1,0,152,6,1,0,145,6,1,0,169,6,1,0, -175,6,1,0,179,6,1,0,177,6,1,0,186,6,1,0, -187,6,1,0,193,6,1,0,190,6,1,0,210,6,1,0, -173,6,1,0,199,6,1,0,198,6,1,0,200,6,119,6, -66,0,199,6,116,6,1,0,203,6,1,0,197,6,1,0, -201,6,1,0,208,6,1,0,73,6,1,0,204,6,2,0, -40,6,44,6,2,0,40,6,45,6,2,0,40,6,46,6, -2,0,40,6,69,6,2,0,40,6,73,6,2,0,40,6, -74,6,2,0,42,6,44,6,2,0,42,6,45,6,2,0, -42,6,46,6,2,0,42,6,69,6,2,0,42,6,73,6, -2,0,42,6,74,6,2,0,43,6,44,6,2,0,43,6, -69,6,2,0,43,6,73,6,2,0,43,6,74,6,2,0, -44,6,45,6,2,0,44,6,69,6,2,0,45,6,44,6, -2,0,45,6,69,6,2,0,46,6,44,6,2,0,46,6, -45,6,2,0,46,6,69,6,2,0,51,6,44,6,2,0, -51,6,45,6,2,0,51,6,46,6,2,0,51,6,69,6, -2,0,53,6,45,6,2,0,53,6,69,6,2,0,54,6, -44,6,2,0,54,6,45,6,2,0,54,6,46,6,2,0, -54,6,69,6,2,0,55,6,45,6,2,0,55,6,69,6, -2,0,56,6,69,6,2,0,57,6,44,6,2,0,57,6, -69,6,2,0,58,6,44,6,2,0,58,6,69,6,2,0, -65,6,44,6,2,0,65,6,45,6,2,0,65,6,46,6, -2,0,65,6,69,6,2,0,65,6,73,6,2,0,65,6, -74,6,2,0,66,6,45,6,2,0,66,6,69,6,2,0, -66,6,73,6,2,0,66,6,74,6,2,0,67,6,39,6, -2,0,67,6,44,6,2,0,67,6,45,6,2,0,67,6, -46,6,2,0,67,6,68,6,2,0,67,6,69,6,2,0, -67,6,73,6,2,0,67,6,74,6,2,0,68,6,44,6, -2,0,68,6,45,6,2,0,68,6,46,6,2,0,68,6, -69,6,2,0,68,6,73,6,2,0,68,6,74,6,2,0, -69,6,44,6,2,0,69,6,45,6,2,0,69,6,46,6, -2,0,69,6,69,6,2,0,69,6,73,6,2,0,69,6, -74,6,2,0,70,6,44,6,2,0,70,6,45,6,2,0, -70,6,46,6,2,0,70,6,69,6,2,0,70,6,73,6, -2,0,70,6,74,6,2,0,71,6,44,6,2,0,71,6, -69,6,2,0,71,6,73,6,2,0,71,6,74,6,2,0, -74,6,44,6,2,0,74,6,45,6,2,0,74,6,46,6, -2,0,74,6,69,6,2,0,74,6,73,6,2,0,74,6, -74,6,2,35,48,6,112,6,2,35,49,6,112,6,2,35, -73,6,112,6,3,33,32,0,76,6,81,6,3,33,32,0, -77,6,81,6,3,33,32,0,78,6,81,6,3,33,32,0, -79,6,81,6,3,33,32,0,80,6,81,6,3,35,32,0, -81,6,112,6,2,0,40,6,49,6,2,0,40,6,50,6, -2,0,40,6,70,6,2,0,42,6,49,6,2,0,42,6, -50,6,2,0,42,6,70,6,2,0,43,6,49,6,2,0, -43,6,50,6,2,0,43,6,70,6,2,0,69,6,39,6, -2,0,70,6,49,6,2,0,70,6,50,6,2,0,70,6, -70,6,2,0,74,6,49,6,2,0,74,6,50,6,2,0, -74,6,70,6,2,0,40,6,71,6,2,0,42,6,71,6, -2,0,53,6,46,6,2,0,68,6,71,6,2,0,70,6, -71,6,2,35,71,6,112,6,2,0,74,6,71,6,2,0, -43,6,71,6,2,0,51,6,71,6,2,0,52,6,69,6, -2,0,52,6,71,6,3,33,64,6,78,6,81,6,3,33, -64,6,79,6,81,6,3,33,64,6,80,6,81,6,2,0, -55,6,73,6,2,0,55,6,74,6,2,0,57,6,73,6, -2,0,57,6,74,6,2,0,58,6,73,6,2,0,58,6, -74,6,2,0,51,6,73,6,2,0,51,6,74,6,2,0, -52,6,73,6,2,0,52,6,74,6,2,0,45,6,73,6, -2,0,45,6,74,6,2,0,44,6,73,6,2,0,44,6, -74,6,2,0,46,6,73,6,2,0,46,6,74,6,2,0, -53,6,73,6,2,0,53,6,74,6,2,0,54,6,73,6, -2,0,54,6,74,6,2,0,52,6,44,6,2,0,52,6, -45,6,2,0,52,6,46,6,2,0,52,6,49,6,2,0, -51,6,49,6,2,0,53,6,49,6,2,0,54,6,49,6, -2,27,39,6,75,6,3,0,42,6,44,6,69,6,3,0, -42,6,45,6,44,6,3,0,42,6,45,6,69,6,3,0, -42,6,46,6,69,6,3,0,42,6,69,6,44,6,3,0, -42,6,69,6,45,6,3,0,42,6,69,6,46,6,3,0, -44,6,69,6,45,6,3,0,45,6,69,6,74,6,3,0, -45,6,69,6,73,6,3,0,51,6,45,6,44,6,3,0, -51,6,44,6,45,6,3,0,51,6,44,6,73,6,3,0, -51,6,69,6,45,6,3,0,51,6,69,6,44,6,3,0, -51,6,69,6,69,6,3,0,53,6,45,6,45,6,3,0, -53,6,69,6,69,6,3,0,52,6,45,6,69,6,3,0, -52,6,44,6,74,6,3,0,52,6,69,6,46,6,3,0, -52,6,69,6,69,6,3,0,54,6,45,6,73,6,3,0, -54,6,46,6,69,6,3,0,55,6,69,6,45,6,3,0, -55,6,69,6,69,6,3,0,55,6,69,6,74,6,3,0, -57,6,44,6,69,6,3,0,57,6,69,6,69,6,3,0, -57,6,69,6,73,6,3,0,58,6,69,6,69,6,3,0, -58,6,69,6,74,6,3,0,58,6,69,6,73,6,3,0, -65,6,46,6,69,6,3,0,66,6,69,6,45,6,3,0, -66,6,69,6,69,6,3,0,68,6,45,6,69,6,3,0, -68,6,45,6,74,6,3,0,68,6,45,6,73,6,3,0, -68,6,44,6,44,6,3,0,68,6,46,6,69,6,3,0, -68,6,69,6,45,6,3,0,69,6,45,6,44,6,3,0, -69,6,45,6,69,6,3,0,69,6,45,6,74,6,3,0, -69,6,44,6,45,6,3,0,69,6,44,6,69,6,3,0, -69,6,46,6,44,6,3,0,69,6,46,6,69,6,3,0, -69,6,44,6,46,6,3,0,71,6,69,6,44,6,3,0, -71,6,69,6,69,6,3,0,70,6,45,6,69,6,3,0, -70,6,45,6,73,6,3,0,70,6,44,6,69,6,3,0, -70,6,44,6,73,6,3,0,70,6,69,6,74,6,3,0, -70,6,69,6,73,6,3,0,74,6,69,6,69,6,3,0, -40,6,46,6,74,6,3,0,42,6,44,6,74,6,3,0, -42,6,44,6,73,6,3,0,42,6,46,6,74,6,3,0, -42,6,46,6,73,6,3,0,42,6,69,6,74,6,3,0, -42,6,69,6,73,6,3,0,44,6,69,6,74,6,3,0, -44,6,45,6,73,6,3,0,44,6,69,6,73,6,3,0, -51,6,46,6,73,6,3,0,53,6,45,6,74,6,3,0, -52,6,45,6,74,6,3,0,54,6,45,6,74,6,3,0, -68,6,44,6,74,6,3,0,68,6,69,6,74,6,3,0, -74,6,45,6,74,6,3,0,74,6,44,6,74,6,3,0, -74,6,69,6,74,6,3,0,69,6,69,6,74,6,3,0, -66,6,69,6,74,6,3,0,70,6,45,6,74,6,3,0, -57,6,69,6,74,6,3,0,67,6,69,6,74,6,3,0, -70,6,44,6,45,6,3,0,69,6,46,6,74,6,3,0, -68,6,44,6,69,6,3,0,67,6,69,6,69,6,3,0, -44,6,45,6,74,6,3,0,45,6,44,6,74,6,3,0, -69,6,44,6,74,6,3,0,65,6,69,6,74,6,3,0, -40,6,45,6,74,6,3,0,51,6,46,6,74,6,3,0, -70,6,44,6,74,6,3,0,53,6,68,6,210,6,3,0, -66,6,68,6,210,6,4,0,39,6,68,6,68,6,71,6, -4,0,39,6,67,6,40,6,49,6,4,0,69,6,45,6, -69,6,47,6,4,0,53,6,68,6,57,6,69,6,4,0, -49,6,51,6,72,6,68,6,4,0,57,6,68,6,74,6, -71,6,4,0,72,6,51,6,68,6,69,6,3,0,53,6, -68,6,73,6,18,0,53,6,68,6,73,6,32,0,39,6, -68,6,68,6,71,6,32,0,57,6,68,6,74,6,71,6, -32,0,72,6,51,6,68,6,69,6,8,0,44,6,68,6, -32,0,44,6,68,6,39,6,68,6,71,6,4,0,49,6, -204,6,39,6,68,6,1,0,44,0,1,0,1,48,1,0, -2,48,1,0,58,0,1,0,33,0,1,0,63,0,1,0, -22,48,1,0,23,48,38,32,1,0,67,0,46,0,46,0, -46,0,37,32,66,0,46,0,46,0,1,0,20,32,1,0, -19,32,1,0,95,0,1,0,123,0,1,0,125,0,1,0, -20,48,1,0,21,48,1,0,16,48,1,0,17,48,1,0, -10,48,1,0,11,48,1,0,12,48,1,0,13,48,1,0, -14,48,1,0,15,48,1,0,91,0,1,0,93,0,62,32, -66,230,32,0,5,3,1,0,35,0,1,0,38,0,1,0, -42,0,1,0,45,0,1,0,60,0,1,0,62,0,1,0, -92,0,1,0,36,0,1,0,37,0,1,0,64,0,2,27, -32,0,75,6,2,27,64,6,75,6,2,28,32,0,76,6, -2,29,32,0,77,6,2,30,32,0,78,6,2,30,64,6, -78,6,2,31,32,0,79,6,2,31,64,6,79,6,2,32, -32,0,80,6,2,32,64,6,80,6,2,33,32,0,81,6, -2,33,64,6,81,6,2,34,32,0,82,6,2,34,64,6, -82,6,1,0,33,6,1,0,39,6,1,0,40,6,1,0, -41,6,1,0,42,6,1,0,43,6,1,0,44,6,1,0, -45,6,1,0,46,6,1,0,47,6,1,0,48,6,1,0, -49,6,1,0,50,6,1,0,51,6,1,0,52,6,1,0, -53,6,1,0,54,6,1,0,55,6,1,0,56,6,1,0, -57,6,1,0,58,6,1,0,65,6,1,0,66,6,1,0, -67,6,1,0,68,6,1,0,69,6,1,0,70,6,1,0, -71,6,1,0,72,6,1,0,74,6,2,0,68,6,39,6, -1,0,34,0,1,0,39,0,1,0,47,0,1,0,94,0, -1,0,124,0,1,0,126,0,1,0,133,41,1,0,134,41, -1,0,251,48,1,0,161,48,1,0,163,48,1,0,165,48, -1,0,167,48,1,0,169,48,1,0,227,48,1,0,229,48, -1,0,231,48,1,0,195,48,1,0,252,48,1,0,243,48, -100,49,1,0,65,0,96,17,49,49,1,0,65,0,0,17, -50,49,1,0,65,0,1,17,52,49,1,0,65,0,2,17, -55,49,1,0,65,0,3,17,56,49,1,0,65,0,4,17, -57,49,1,0,65,0,5,17,64,49,1,0,65,0,26,17, -65,49,1,0,65,0,6,17,66,49,1,0,65,0,7,17, -67,49,1,0,65,0,8,17,68,49,1,0,65,0,33,17, -69,49,1,0,65,0,9,17,70,49,1,0,65,0,10,17, -71,49,1,0,65,0,11,17,72,49,1,0,65,0,12,17, -73,49,1,0,65,0,13,17,74,49,1,0,65,0,14,17, -75,49,1,0,65,0,15,17,76,49,1,0,65,0,16,17, -77,49,1,0,65,0,17,17,78,49,1,0,65,0,18,17, -1,0,162,0,1,0,163,0,1,0,172,0,175,0,66,230, -32,0,4,3,1,0,166,0,1,0,165,0,1,0,169,32, -1,0,2,37,1,0,144,33,1,0,145,33,1,0,146,33, -1,0,147,33,1,0,160,37,1,0,203,37,4,216,52,216, -87,221,52,216,101,221,4,216,52,216,88,221,52,216,101,221, -52,216,95,221,52,216,110,221,4,0,70,216,52,216,88,221, -52,216,101,221,52,216,110,221,52,216,95,221,52,216,111,221, -4,0,70,216,52,216,88,221,52,216,101,221,52,216,111,221, -52,216,95,221,52,216,112,221,4,0,70,216,52,216,88,221, -52,216,101,221,52,216,112,221,52,216,95,221,52,216,113,221, -4,0,70,216,52,216,88,221,52,216,101,221,52,216,113,221, -52,216,95,221,52,216,114,221,4,0,70,216,52,216,88,221, -52,216,101,221,52,216,114,221,4,216,52,216,185,221,52,216, -101,221,4,216,52,216,186,221,52,216,101,221,52,216,187,221, -52,216,110,221,4,0,70,216,52,216,185,221,52,216,101,221, -52,216,110,221,52,216,188,221,52,216,110,221,4,0,70,216, -52,216,186,221,52,216,101,221,52,216,110,221,52,216,187,221, -52,216,111,221,4,0,70,216,52,216,185,221,52,216,101,221, -52,216,111,221,52,216,188,221,52,216,111,221,4,0,70,216, -52,216,186,221,52,216,101,221,52,216,111,221,1,0,49,1, -1,0,55,2,1,0,145,3,1,0,146,3,1,0,148,3, -1,0,149,3,1,0,150,3,1,0,151,3,1,0,153,3, -1,0,154,3,1,0,155,3,1,0,156,3,1,0,157,3, -1,0,158,3,1,0,159,3,1,0,161,3,244,3,1,0, -65,0,152,3,1,0,164,3,1,0,165,3,1,0,166,3, -1,0,167,3,1,0,168,3,1,0,7,34,1,0,177,3, -1,0,181,3,1,0,182,3,1,0,183,3,1,0,186,3, -1,0,187,3,1,0,189,3,1,0,190,3,1,0,191,3, -1,0,194,3,1,0,195,3,1,0,196,3,1,0,197,3, -1,0,200,3,1,0,201,3,1,0,2,34,245,3,1,0, -65,0,181,3,209,3,1,0,65,0,184,3,240,3,1,0, -65,0,186,3,213,3,1,0,65,0,198,3,241,3,1,0, -65,0,193,3,214,3,1,0,65,0,192,3,1,0,220,3, -1,0,221,3,1,0,110,6,1,0,161,6,1,0,111,6, -2,0,48,0,46,0,2,0,48,0,44,0,2,0,49,0, -44,0,2,0,50,0,44,0,2,0,51,0,44,0,2,0, -52,0,44,0,2,0,53,0,44,0,2,0,54,0,44,0, -2,0,55,0,44,0,2,0,56,0,44,0,2,0,57,0, -44,0,3,0,40,0,65,0,41,0,3,0,40,0,66,0, -41,0,3,0,40,0,67,0,41,0,3,0,40,0,68,0, -41,0,3,0,40,0,69,0,41,0,3,0,40,0,70,0, -41,0,3,0,40,0,71,0,41,0,3,0,40,0,72,0, -41,0,3,0,40,0,73,0,41,0,3,0,40,0,74,0, -41,0,3,0,40,0,75,0,41,0,3,0,40,0,76,0, -41,0,3,0,40,0,77,0,41,0,3,0,40,0,78,0, -41,0,3,0,40,0,79,0,41,0,3,0,40,0,80,0, -41,0,3,0,40,0,81,0,41,0,3,0,40,0,82,0, -41,0,3,0,40,0,83,0,41,0,3,0,40,0,84,0, -41,0,3,0,40,0,85,0,41,0,3,0,40,0,86,0, -41,0,3,0,40,0,87,0,41,0,3,0,40,0,88,0, -41,0,3,0,40,0,89,0,41,0,3,0,40,0,90,0, -41,0,3,0,20,48,83,0,21,48,2,0,67,0,68,0, -2,0,87,0,90,0,2,0,72,0,86,0,2,0,83,0, -68,0,2,0,83,0,83,0,3,0,80,0,80,0,86,0, -2,0,87,0,67,0,2,0,77,0,67,0,2,0,77,0, -68,0,2,0,77,0,82,0,2,0,68,0,74,0,2,0, -123,48,75,48,2,0,179,48,179,48,1,0,87,91,1,0, -204,83,1,0,26,89,1,0,227,137,1,0,164,78,1,0, -32,102,1,0,33,113,1,0,77,82,1,0,140,95,1,0, -141,81,1,0,176,101,1,0,29,82,1,0,66,125,1,0, -169,140,1,0,240,88,1,0,57,84,1,0,20,111,1,0, -149,98,1,0,85,99,1,0,74,144,1,0,7,99,1,0, -83,98,1,0,129,121,1,0,122,122,1,0,8,84,1,0, -128,110,1,0,51,117,1,0,114,82,1,0,182,85,1,0, -77,145,3,0,20,48,44,103,21,48,3,0,20,48,9,78, -21,48,3,0,20,48,140,78,21,48,3,0,20,48,137,91, -21,48,3,0,20,48,185,112,21,48,3,0,20,48,83,98, -21,48,3,0,20,48,215,118,21,48,3,0,20,48,221,82, -21,48,3,0,20,48,87,101,21,48,1,0,151,95,1,0, -239,83,1,0,61,78,1,0,56,78,1,0,65,78,2,0, -64,216,34,221,1,0,96,79,1,0,187,79,1,0,2,80, -1,0,122,80,1,0,153,80,1,0,207,80,1,0,158,52, -2,0,65,216,58,222,1,0,84,81,1,0,100,81,1,0, -119,81,2,0,65,216,28,221,1,0,185,52,1,0,103,81, -2,0,65,216,75,221,1,0,151,81,1,0,164,81,1,0, -204,78,1,0,172,81,2,0,100,216,223,221,1,0,3,82, -1,0,223,52,1,0,59,82,1,0,70,82,1,0,119,82, -1,0,21,53,1,0,5,83,1,0,6,83,1,0,73,83, -1,0,90,83,1,0,115,83,1,0,125,83,1,0,127,83, -2,0,66,216,44,222,1,0,112,112,1,0,202,83,1,0, -223,83,2,0,66,216,99,223,1,0,235,83,1,0,241,83, -1,0,6,84,1,0,158,84,1,0,56,84,1,0,72,84, -1,0,104,84,1,0,162,84,1,0,246,84,1,0,16,85, -1,0,83,85,1,0,99,85,1,0,132,85,1,0,171,85, -1,0,179,85,1,0,194,85,1,0,22,87,1,0,23,87, -1,0,81,86,1,0,116,86,1,0,238,88,1,0,206,87, -1,0,244,87,1,0,13,88,1,0,139,87,1,0,50,88, -1,0,49,88,1,0,172,88,2,0,69,216,228,220,1,0, -242,88,1,0,247,88,1,0,6,89,1,0,34,89,1,0, -98,89,2,0,69,216,168,222,2,0,69,216,234,222,1,0, -236,89,1,0,27,90,1,0,39,90,1,0,216,89,1,0, -102,90,1,0,238,54,1,0,252,54,1,0,8,91,1,0, -62,91,2,0,70,216,200,221,1,0,195,91,1,0,216,91, -1,0,243,91,2,0,70,216,24,223,1,0,255,91,1,0, -6,92,1,0,83,95,1,0,129,55,1,0,96,92,1,0, -192,92,1,0,141,92,2,0,71,216,228,221,1,0,67,93, -2,0,71,216,230,221,1,0,110,93,1,0,107,93,1,0, -124,93,1,0,225,93,1,0,226,93,1,0,47,56,1,0, -253,93,1,0,40,94,1,0,61,94,1,0,105,94,1,0, -98,56,2,0,72,216,131,221,1,0,124,56,1,0,176,94, -1,0,179,94,1,0,182,94,2,0,104,216,146,223,2,0, -72,216,49,223,1,0,1,130,1,0,34,95,1,0,199,56, -2,0,76,216,184,222,2,0,88,216,218,221,1,0,98,95, -1,0,107,95,1,0,227,56,1,0,154,95,1,0,205,95, -1,0,215,95,1,0,249,95,1,0,129,96,1,0,58,57, -1,0,28,57,2,0,73,216,212,222,1,0,199,96,1,0, -72,97,1,0,76,97,1,0,122,97,1,0,178,97,1,0, -164,97,1,0,175,97,1,0,222,97,1,0,16,98,1,0, -27,98,1,0,93,98,1,0,177,98,1,0,212,98,1,0, -80,99,2,0,74,216,12,223,1,0,61,99,1,0,252,98, -1,0,104,99,1,0,131,99,1,0,228,99,2,0,74,216, -241,223,1,0,34,100,1,0,197,99,1,0,169,99,1,0, -46,58,1,0,105,100,1,0,126,100,1,0,157,100,1,0, -119,100,1,0,108,58,1,0,108,101,2,0,76,216,10,220, -1,0,227,101,1,0,248,102,1,0,73,102,1,0,25,59, -1,0,8,59,1,0,228,58,1,0,146,81,1,0,149,81, -1,0,0,103,1,0,156,102,1,0,173,128,1,0,217,67, -1,0,33,103,1,0,94,103,1,0,83,103,2,0,76,216, -195,223,1,0,73,59,1,0,250,103,1,0,133,103,1,0, -82,104,2,0,77,216,109,220,1,0,142,104,1,0,31,104, -1,0,20,105,1,0,66,105,1,0,163,105,1,0,234,105, -1,0,168,106,2,0,77,216,163,222,1,0,219,106,1,0, -24,60,1,0,33,107,2,0,78,216,167,220,1,0,84,107, -1,0,78,60,1,0,114,107,1,0,159,107,1,0,187,107, -2,0,78,216,141,222,2,0,71,216,11,221,2,0,78,216, -250,222,1,0,78,108,2,0,79,216,188,220,1,0,191,108, -1,0,205,108,1,0,103,108,1,0,22,109,1,0,62,109, -1,0,105,109,1,0,120,109,1,0,133,109,2,0,79,216, -30,221,1,0,52,109,1,0,47,110,1,0,110,110,1,0, -51,61,1,0,199,110,2,0,79,216,209,222,1,0,249,109, -1,0,110,111,2,0,79,216,94,223,2,0,79,216,142,223, -1,0,198,111,1,0,57,112,1,0,27,112,1,0,150,61, -1,0,74,112,1,0,125,112,1,0,119,112,1,0,173,112, -2,0,65,216,37,221,1,0,69,113,2,0,80,216,99,222, -1,0,156,113,2,0,80,216,171,223,1,0,40,114,1,0, -80,114,2,0,81,216,8,222,1,0,128,114,1,0,149,114, -2,0,81,216,53,223,2,0,82,216,20,220,1,0,122,115, -1,0,139,115,1,0,172,62,1,0,165,115,1,0,184,62, -1,0,71,116,1,0,92,116,1,0,133,116,1,0,202,116, -1,0,27,63,1,0,36,117,2,0,83,216,54,220,1,0, -62,117,2,0,83,216,146,220,2,0,72,216,159,221,1,0, -16,118,2,0,83,216,161,223,2,0,83,216,184,223,2,0, -84,216,68,220,1,0,252,63,1,0,8,64,2,0,84,216, -243,220,2,0,84,216,242,220,2,0,84,216,25,221,2,0, -84,216,51,221,1,0,30,119,1,0,31,119,1,0,139,119, -1,0,70,64,1,0,150,64,2,0,85,216,29,220,1,0, -78,120,1,0,227,64,2,0,85,216,38,222,2,0,85,216, -154,222,2,0,85,216,197,222,1,0,235,121,1,0,47,65, -1,0,74,122,1,0,79,122,2,0,86,216,124,221,2,0, -86,216,167,222,1,0,238,122,1,0,2,66,2,0,86,216, -171,223,1,0,198,123,1,0,201,123,1,0,39,66,2,0, -87,216,128,220,1,0,210,124,1,0,160,66,1,0,232,124, -1,0,227,124,1,0,0,125,2,0,87,216,134,223,1,0, -99,125,1,0,1,67,1,0,199,125,1,0,2,126,1,0, -69,126,1,0,52,67,2,0,88,216,40,222,2,0,88,216, -71,222,1,0,89,67,2,0,88,216,217,222,1,0,122,127, -2,0,88,216,62,223,1,0,149,127,1,0,250,127,2,0, -89,216,218,220,2,0,89,216,35,221,1,0,96,128,2,0, -89,216,168,221,1,0,112,128,2,0,76,216,95,223,1,0, -213,67,1,0,178,128,1,0,3,129,1,0,11,68,1,0, -62,129,1,0,181,90,2,0,89,216,167,223,2,0,89,216, -181,223,2,0,76,216,147,223,2,0,76,216,156,223,1,0, -4,130,1,0,158,143,1,0,107,68,1,0,145,130,1,0, -139,130,1,0,157,130,1,0,179,82,1,0,177,130,1,0, -179,130,1,0,189,130,1,0,230,130,2,0,90,216,60,223, -1,0,29,131,1,0,99,131,1,0,173,131,1,0,35,131, -1,0,189,131,1,0,231,131,1,0,83,131,1,0,202,131, -1,0,204,131,1,0,220,131,2,0,91,216,54,220,2,0, -91,216,107,221,2,0,91,216,213,220,1,0,43,69,1,0, -241,132,1,0,243,132,1,0,22,133,2,0,92,216,202,223, -1,0,100,133,2,0,91,216,44,223,1,0,93,69,1,0, -97,69,2,0,91,216,177,223,2,0,92,216,210,220,1,0, -107,69,1,0,80,134,1,0,103,134,1,0,105,134,1,0, -169,134,1,0,136,134,1,0,14,135,1,0,226,134,1,0, -40,135,1,0,107,135,1,0,134,135,1,0,215,69,1,0, -225,135,1,0,1,136,1,0,249,69,1,0,96,136,2,0, -93,216,103,222,1,0,215,136,1,0,222,136,1,0,53,70, -1,0,250,136,1,0,187,52,2,0,94,216,174,220,2,0, -94,216,102,221,1,0,190,70,1,0,199,70,1,0,160,138, -2,0,95,216,168,220,1,0,171,140,1,0,193,140,1,0, -27,141,1,0,119,141,2,0,95,216,47,223,2,0,66,216, -4,220,1,0,203,141,1,0,188,141,1,0,240,141,2,0, -66,216,222,220,1,0,212,142,2,0,97,216,210,221,2,0, -97,216,237,221,1,0,148,144,1,0,241,144,1,0,17,145, -2,0,97,216,46,223,1,0,27,145,1,0,56,146,1,0, -215,146,1,0,216,146,1,0,124,146,1,0,249,147,1,0, -21,148,2,0,98,216,250,223,1,0,139,149,1,0,149,73, -1,0,183,149,2,0,99,216,119,221,1,0,230,73,1,0, -195,150,1,0,178,93,1,0,35,151,2,0,100,216,69,221, -2,0,100,216,26,222,1,0,110,74,1,0,118,74,1,0, -224,151,2,0,101,216,10,220,1,0,178,74,2,0,101,216, -150,220,1,0,41,152,2,0,101,216,182,221,1,0,226,152, -1,0,51,75,1,0,41,153,1,0,167,153,1,0,194,153, -1,0,254,153,1,0,206,75,2,0,102,216,48,223,1,0, -64,156,1,0,253,156,1,0,206,76,1,0,237,76,1,0, -103,157,2,0,104,216,206,220,1,0,248,76,2,0,104,216, -5,221,2,0,104,216,14,222,2,0,104,216,145,222,1,0, -86,77,1,0,254,158,1,0,5,159,1,0,15,159,1,0, -22,159,2,0,105,216,0,222,68,0,125,1,2,0,67,230, -68,0,90,0,12,3,68,0,126,1,2,0,67,230,68,0, -122,0,12,3,100,0,126,1,2,0,67,230,100,0,122,0, -12,3,210,3,1,3,2,0,66,230,165,3,1,3,210,3, -8,3,2,0,66,230,165,3,8,3,127,1,7,3,2,0, -66,230,115,0,7,3,172,3,66,230,177,3,1,3,173,3, -66,230,181,3,1,3,174,3,66,230,183,3,1,3,175,3, -66,230,185,3,1,3,204,3,66,230,191,3,1,3,205,3, -66,230,197,3,1,3,206,3,66,230,201,3,1,3,134,3, -66,230,145,3,1,3,136,3,66,230,149,3,1,3,137,3, -66,230,151,3,1,3,144,3,1,0,67,230,185,3,8,3, -1,3,138,3,66,230,153,3,1,3,176,3,1,0,67,230, -197,3,8,3,1,3,142,3,66,230,165,3,1,3,140,3, -66,230,159,3,1,3,143,3,66,230,169,3,1,3,197,0, -66,230,65,0,10,3,4,0,40,0,0,17,97,17,41,0, -4,0,40,0,2,17,97,17,41,0,4,0,40,0,3,17, -97,17,41,0,4,0,40,0,5,17,97,17,41,0,4,0, -40,0,6,17,97,17,41,0,4,0,40,0,7,17,97,17, -41,0,4,0,40,0,9,17,97,17,41,0,4,0,40,0, -11,17,97,17,41,0,4,0,40,0,12,17,97,17,41,0, -4,0,40,0,14,17,97,17,41,0,4,0,40,0,15,17, -97,17,41,0,4,0,40,0,16,17,97,17,41,0,4,0, -40,0,17,17,97,17,41,0,4,0,40,0,18,17,97,17, -41,0,4,0,40,0,12,17,110,17,41,0,7,0,40,0, -11,17,105,17,12,17,101,17,171,17,41,0,6,0,40,0, -11,17,105,17,18,17,110,17,41,0,2,0,0,17,97,17, -2,0,2,17,97,17,2,0,3,17,97,17,2,0,5,17, -97,17,2,0,6,17,97,17,2,0,7,17,97,17,2,0, -9,17,97,17,2,0,11,17,97,17,2,0,12,17,97,17, -2,0,14,17,97,17,2,0,15,17,97,17,2,0,16,17, -97,17,2,0,17,17,97,17,2,0,18,17,97,17,5,0, -14,17,97,17,183,17,0,17,105,17,4,0,12,17,110,17, -11,17,116,17,2,0,11,17,110,17,162,48,209,48,252,48, -200,48,4,0,69,0,162,48,207,48,154,48,252,48,200,48, -162,48,243,48,218,48,162,48,4,0,69,0,162,48,243,48, -216,48,154,48,162,48,164,48,203,48,243,48,176,48,4,0, -69,8,164,48,203,48,243,48,175,48,153,48,168,48,185,48, -175,48,252,48,201,48,5,0,70,8,168,48,185,48,175,48, -252,48,200,48,153,48,172,48,68,0,171,48,153,48,237,48, -243,48,172,48,68,0,171,48,153,48,243,48,222,48,174,48, -172,48,2,0,68,8,173,48,153,48,171,48,153,48,174,48, -68,0,173,48,153,48,203,48,252,48,174,48,235,48,192,48, -252,48,4,0,70,0,173,48,153,48,235,48,191,48,153,48, -252,48,173,48,237,48,176,48,233,48,224,48,5,0,70,0, -173,48,237,48,175,48,153,48,233,48,224,48,176,48,68,0, -175,48,153,48,233,48,224,48,176,48,70,0,175,48,153,48, -233,48,224,48,200,48,243,48,175,48,235,48,188,48,164,48, -237,48,5,0,70,0,175,48,235,48,187,48,153,48,164,48, -237,48,179,48,252,48,221,48,3,0,68,8,179,48,252,48, -219,48,154,48,183,48,234,48,243,48,176,48,4,0,69,8, -183,48,234,48,243,48,175,48,153,48,192,48,68,0,191,48, -153,48,252,48,185,48,199,48,67,0,198,48,153,48,183,48, -201,48,67,0,200,48,153,48,235,48,209,48,70,0,207,48, -154,48,252,48,187,48,243,48,200,48,209,48,68,0,207,48, -154,48,252,48,196,48,208,48,69,0,207,48,153,48,252,48, -236,48,235,48,212,48,70,0,210,48,154,48,162,48,185,48, -200,48,235,48,212,48,68,0,210,48,154,48,175,48,235,48, -212,48,67,0,210,48,154,48,179,48,211,48,67,0,210,48, -153,48,235,48,213,48,161,48,233,48,195,48,201,48,5,0, -70,8,213,48,161,48,233,48,195,48,200,48,153,48,214,48, -70,0,213,48,153,48,195,48,183,48,167,48,235,48,218,48, -67,0,216,48,154,48,189,48,218,48,68,0,216,48,154,48, -203,48,210,48,218,48,68,0,216,48,154,48,243,48,185,48, -218,48,252,48,184,48,3,0,69,8,216,48,154,48,252,48, -183,48,153,48,217,48,68,0,216,48,153,48,252,48,191,48, -221,48,69,0,219,48,154,48,164,48,243,48,200,48,220,48, -68,0,219,48,153,48,235,48,200,48,221,48,243,48,201,48, -3,0,69,8,219,48,154,48,243,48,200,48,153,48,223,48, -234,48,208,48,252,48,235,48,5,0,70,0,223,48,234,48, -207,48,153,48,252,48,235,48,225,48,172,48,2,0,67,8, -225,48,171,48,153,48,225,48,172,48,200,48,243,48,4,0, -69,0,225,48,171,48,153,48,200,48,243,48,228,48,252,48, -201,48,3,0,68,8,228,48,252,48,200,48,153,48,235,48, -212,48,252,48,3,0,68,0,235,48,210,48,154,48,252,48, -235,48,252,48,214,48,235,48,4,0,69,0,235,48,252,48, -213,48,153,48,235,48,236,48,243,48,200,48,178,48,243,48, -5,0,70,0,236,48,243,48,200,48,177,48,153,48,243,48, -192,6,66,230,213,6,84,6,211,6,66,230,210,6,84,6, -38,6,67,0,74,6,84,6,39,6,38,6,67,0,74,6, -84,6,213,6,38,6,67,0,74,6,84,6,72,6,38,6, -67,0,74,6,84,6,199,6,38,6,67,0,74,6,84,6, -198,6,38,6,67,0,74,6,84,6,200,6,38,6,67,0, -74,6,84,6,208,6,38,6,67,0,74,6,84,6,73,6, -38,6,67,0,74,6,84,6,44,6,38,6,67,0,74,6, -84,6,45,6,38,6,67,0,74,6,84,6,69,6,38,6, -67,0,74,6,84,6,74,6,38,6,67,0,74,6,84,6, -49,6,38,6,67,0,74,6,84,6,50,6,38,6,67,0, -74,6,84,6,70,6,38,6,67,0,74,6,84,6,46,6, -38,6,67,0,74,6,84,6,71,6,34,6,66,230,39,6, -83,6,35,6,66,230,39,6,84,6,36,6,66,230,72,6, -84,6,37,6,66,220,39,6,85,6,38,6,66,230,74,6, -84,6,68,6,34,6,2,0,67,230,68,6,39,6,83,6, -68,6,35,6,2,0,67,230,68,6,39,6,84,6,68,6, -37,6,2,0,67,220,68,6,39,6,85,6,199,48,66,8, -198,48,153,48,230,230,129,230,0,3,230,230,129,230,1,3, -230,230,129,230,19,3,230,230,130,230,8,3,1,3,0,129, -130,130,113,15,114,15,0,129,130,132,113,15,116,15,0,129, -130,130,113,15,128,15,1,0,170,17,1,0,172,17,1,0, -173,17,1,0,176,17,1,0,177,17,1,0,178,17,1,0, -179,17,1,0,180,17,1,0,181,17,1,0,97,17,1,0, -98,17,1,0,99,17,1,0,100,17,1,0,101,17,1,0, -102,17,1,0,103,17,1,0,104,17,1,0,105,17,1,0, -106,17,1,0,107,17,1,0,108,17,1,0,109,17,1,0, -110,17,1,0,111,17,1,0,112,17,1,0,113,17,1,0, -114,17,1,0,115,17,1,0,116,17,1,0,117,17,0,8, -129,8,153,48,0,8,129,8,154,48,51,49,1,0,65,0, -170,17,53,49,1,0,65,0,172,17,54,49,1,0,65,0, -173,17,58,49,1,0,65,0,176,17,59,49,1,0,65,0, -177,17,60,49,1,0,65,0,178,17,61,49,1,0,65,0, -179,17,62,49,1,0,65,0,180,17,63,49,1,0,65,0, -181,17,79,49,1,0,65,0,97,17,80,49,1,0,65,0, -98,17,81,49,1,0,65,0,99,17,82,49,1,0,65,0, -100,17,83,49,1,0,65,0,101,17,84,49,1,0,65,0, -102,17,85,49,1,0,65,0,103,17,86,49,1,0,65,0, -104,17,87,49,1,0,65,0,105,17,88,49,1,0,65,0, -106,17,89,49,1,0,65,0,107,17,90,49,1,0,65,0, -108,17,91,49,1,0,65,0,109,17,92,49,1,0,65,0, -110,17,93,49,1,0,65,0,111,17,94,49,1,0,65,0, -112,17,95,49,1,0,65,0,113,17,96,49,1,0,65,0, -114,17,97,49,1,0,65,0,115,17,98,49,1,0,65,0, -116,17,99,49,1,0,65,0,117,17,0,0,224,239,67,127, -223,112,207,135,199,230,102,70,100,70,102,91,18,0,0,4, -0,0,0,67,32,2,41,174,194,192,255,255,195,114,191,0, -0,0,0,0,0,0,64,0,128,136,0,0,254,0,0,7, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,152,0,193,102,224,128,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,39,220,2,158,144,170,170,170,170, -32,0,218,39,20,0,0,0,0,0,2,0,78,114,109,50, -4,0,0,0,12,1,0,0,0,0,0,0,0,0,0,0, -80,0,0,0,28,114,0,0,4,199,0,0,4,200,0,0, -4,200,0,0,4,200,0,0,4,200,0,0,4,200,0,0, -65,0,0,0,65,0,0,0,168,6,0,0,80,22,0,0, -232,84,0,0,0,252,0,0,96,9,0,0,230,75,0,0, -46,84,0,0,228,84,0,0,173,0,0,0,0,0,0,0, -51,105,114,84,0,0,227,7,251,48,208,0,31,6,8,7, -0,0,64,0,123,0,187,0,251,0,58,1,122,1,183,1, -247,1,54,2,100,2,157,2,221,2,29,3,93,3,157,3, -221,3,28,4,90,4,154,4,212,4,5,5,62,5,126,5, -174,5,230,5,31,6,95,6,142,6,205,6,31,6,226,6, -32,7,78,7,31,6,123,7,187,7,248,7,24,8,87,8, -150,8,211,8,242,8,47,9,24,8,104,9,150,9,213,9, -31,6,15,10,38,10,102,10,125,10,188,10,31,6,242,10, -18,11,77,11,90,11,149,11,201,11,6,12,70,12,128,12, -155,12,31,6,214,12,22,13,86,13,119,13,173,13,234,13, -31,6,31,6,31,6,31,6,31,6,13,14,31,6,45,14, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,107,14,31,6,160,14,214,14, -11,15,31,6,34,15,31,6,76,15,31,6,31,6,31,6, -136,15,168,15,232,15,31,6,38,16,102,16,154,16,198,16, -247,8,31,6,6,17,70,17,128,17,192,17,249,17,57,18, -121,18,185,18,249,18,57,19,121,19,185,19,249,19,57,20, -121,20,184,20,248,20,40,21,104,21,168,21,232,21,27,22, -88,22,151,22,215,22,13,23,59,23,31,6,31,6,31,6, -31,6,102,23,166,23,230,23,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -26,24,39,24,31,6,94,24,31,6,31,6,31,6,31,6, -158,24,205,24,13,25,41,25,31,6,92,25,31,6,156,25, -31,6,31,6,220,25,252,25,60,26,124,26,188,26,252,26, -60,27,119,27,181,27,245,27,53,28,117,28,181,28,31,6, -245,28,53,29,117,29,181,29,245,29,53,30,117,30,181,30, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,245,30,53,31,85,31,135,31,185,31,247,31,54,32, -112,32,31,6,31,6,172,32,222,32,140,14,251,8,30,33, -31,6,31,6,46,33,110,33,31,6,165,33,229,33,131,32, -37,34,45,34,53,34,61,34,41,34,49,34,57,34,37,34, -45,34,53,34,61,34,41,34,49,34,57,34,37,34,45,34, -53,34,61,34,41,34,49,34,57,34,37,34,45,34,53,34, -61,34,41,34,49,34,57,34,37,34,45,34,53,34,61,34, -41,34,49,34,57,34,37,34,45,34,53,34,61,34,41,34, -49,34,57,34,37,34,45,34,53,34,61,34,41,34,49,34, -57,34,37,34,45,34,53,34,61,34,41,34,49,34,57,34, -37,34,45,34,53,34,61,34,41,34,49,34,57,34,37,34, -45,34,53,34,61,34,41,34,49,34,57,34,37,34,45,34, -53,34,61,34,41,34,49,34,57,34,37,34,45,34,53,34, -61,34,41,34,49,34,57,34,37,34,45,34,53,34,61,34, -41,34,49,34,57,34,37,34,45,34,53,34,61,34,41,34, -49,34,57,34,37,34,45,34,53,34,61,34,41,34,49,34, -57,34,37,34,45,34,53,34,61,34,41,34,49,34,57,34, -37,34,45,34,53,34,61,34,41,34,49,34,57,34,37,34, -45,34,53,34,61,34,41,34,49,34,57,34,37,34,45,34, -53,34,61,34,41,34,49,34,57,34,37,34,45,34,53,34, -61,34,41,34,49,34,57,34,37,34,45,34,53,34,61,34, -41,34,49,34,57,34,37,34,45,34,53,34,61,34,41,34, -49,34,57,34,37,34,45,34,53,34,61,34,41,34,49,34, -57,34,37,34,45,34,53,34,61,34,41,34,49,34,57,34, -37,34,45,34,53,34,61,34,41,34,49,34,113,34,31,6, -177,34,238,34,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,46,35,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,110,35,174,35,238,35,46,36, -110,36,174,36,238,36,46,37,110,37,174,37,236,37,30,38, -94,38,158,38,222,38,30,39,94,39,156,39,220,39,28,40, -92,40,156,40,220,40,27,41,91,41,155,41,219,41,26,42, -87,7,102,7,126,7,157,7,183,7,183,7,183,7,187,7, -183,7,183,7,183,7,183,7,183,7,183,7,183,7,183,7, -183,7,183,7,183,7,183,7,183,7,183,7,183,7,183,7, -183,7,183,7,183,7,183,7,183,7,183,7,183,7,183,7, -183,7,183,7,183,7,183,7,183,7,183,7,183,7,183,7, -183,7,183,7,183,7,183,7,183,7,183,7,183,7,183,7, -183,7,183,7,183,7,183,7,219,7,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,214,12,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,5,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,84,42,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,100,42,100,42,108,42, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -100,42,100,42,112,42,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,116,42,31,6,31,6, -132,42,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,154,15,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,148,42,148,42,148,42, -161,42,31,6,31,6,31,6,31,6,31,6,31,6,173,42, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,183,42,5,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -218,8,31,6,31,6,169,10,31,6,199,42,212,42,224,42, -31,6,31,6,31,6,31,6,75,7,31,6,235,42,251,42, -31,6,31,6,31,6,207,8,31,6,31,6,31,6,31,6, -11,43,31,6,31,6,31,6,22,43,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,29,43, -31,6,31,6,31,6,31,6,40,43,55,43,229,9,69,43, -73,7,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,83,43,135,8,31,6,31,6,31,6,31,6,31,6, -99,43,114,43,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,197,9, -122,43,138,43,31,6,31,6,31,6,169,10,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,148,43,31,6,31,6, -31,6,31,6,31,6,31,6,213,8,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,145,43,31,6,31,6,31,6,31,6,31,6, -31,6,164,43,164,43,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,60,9,31,6,31,6, -31,6,220,8,217,8,31,6,31,6,31,6,31,6,215,8, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,169,10,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,208,12, -31,6,31,6,31,6,31,6,217,8,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,180,43,31,6,31,6,31,6, -147,16,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,164,43,164,43,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,185,43,22,15,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -200,43,216,43,230,43,243,43,31,6,167,42,255,43,15,44, -31,6,31,6,31,6,31,6,40,14,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -220,23,236,23,226,23,242,23,232,23,31,44,238,23,228,23, -244,23,43,44,59,44,75,44,91,44,220,23,236,23,226,23, -103,44,119,44,222,23,99,44,135,44,151,44,234,23,224,23, -240,23,230,23,220,23,236,23,226,23,242,23,232,23,222,23, -238,23,228,23,244,23,234,23,224,23,240,23,230,23,220,23, -236,23,226,23,167,44,183,44,199,44,215,44,231,44,189,44, -205,44,221,44,179,44,195,44,211,44,227,44,185,44,175,44, -217,44,175,44,191,44,207,44,247,44,7,45,13,45,9,45, -29,45,37,45,51,45,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,147,16,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,178,5,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,67,45,31,6,31,6,83,45, -83,45,97,45,31,6,109,45,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,125,45, -141,45,157,45,173,45,187,45,203,45,219,45,235,45,251,45, -11,46,26,46,11,46,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,42,46, -194,23,58,46,220,23,74,46,31,6,90,46,31,6,31,6, -106,46,31,6,31,6,31,6,31,6,31,6,31,6,122,46, -138,46,154,46,170,46,186,46,202,46,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,218,46, -234,46,250,46,10,47,26,47,42,47,58,47,74,47,90,47, -106,47,122,47,138,47,154,47,170,47,186,47,202,47,218,47, -234,47,250,47,10,48,26,48,42,48,58,48,74,48,90,48, -106,48,122,48,138,48,154,48,170,48,186,48,202,48,218,48, -234,48,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,31,6, -31,6,31,6,31,6,31,6,31,6,31,6,31,6,216,20, -216,20,216,20,216,20,216,20,216,20,216,20,216,20,216,20, -216,20,216,20,216,20,216,20,216,20,216,20,216,20,216,20, -216,20,216,20,216,20,216,20,216,20,216,20,216,20,216,20, -216,20,216,20,216,20,216,20,216,20,216,20,216,20,53,4, -85,4,117,4,208,0,208,0,149,4,173,4,192,4,220,4, -249,4,21,5,50,5,79,5,110,5,139,5,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,165,5,208,0,193,5,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,216,5,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,227,5,0,6,32,6, -64,6,208,0,208,0,208,0,208,0,96,6,116,6,208,0, -208,0,135,6,208,0,208,0,167,6,183,6,215,6,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,247,6, -23,7,208,0,208,0,55,7,55,7,55,7,55,7,55,7, -55,7,55,7,55,7,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,4,0, -8,0,12,0,1,0,1,0,248,250,248,250,248,250,248,250, -248,250,248,250,248,250,248,250,248,250,248,250,248,250,248,250, -248,250,248,250,248,250,248,250,249,250,248,250,248,250,248,250, -248,250,248,250,248,250,248,250,248,250,248,250,1,0,1,0, -1,0,1,0,1,0,1,0,16,0,80,0,92,0,112,0, -136,0,204,0,208,0,236,0,12,1,68,1,76,1,96,1, -120,1,132,1,168,1,232,1,1,0,240,1,16,2,44,2, -76,2,152,2,160,2,188,2,196,2,236,2,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,84,22,1,0, -90,22,1,0,1,0,230,84,1,0,94,22,1,0,1,0, -101,22,105,22,108,22,115,22,1,0,1,0,118,22,125,22, -128,22,1,0,133,22,141,22,149,22,1,0,252,250,252,250, -252,250,252,250,252,250,252,250,248,250,252,250,252,250,252,250, -252,250,252,250,252,250,252,250,252,250,252,250,249,250,252,250, -252,250,252,250,252,250,252,250,252,250,1,0,248,250,252,250, -252,250,252,250,252,250,252,250,249,250,156,22,98,9,104,9, -170,6,110,9,192,6,202,6,4,3,212,6,116,9,122,9, -222,6,128,9,134,9,140,9,146,9,244,6,1,0,152,9, -158,9,164,9,254,6,20,7,38,7,1,0,12,3,170,9, -176,9,182,9,48,7,188,9,1,0,194,9,4,250,200,9, -4,250,70,7,4,250,207,9,4,250,212,9,4,250,218,9, -4,250,224,9,4,250,230,9,4,250,236,9,1,250,1,0, -4,250,92,7,4,250,242,9,4,250,248,9,4,250,255,9, -4,250,4,10,4,250,10,10,4,250,16,10,4,250,22,10, -4,250,29,10,4,250,34,10,1,250,1,0,4,250,40,10, -4,250,46,10,4,250,52,10,4,250,59,10,162,22,1,0, -168,22,168,22,4,250,65,10,4,250,71,10,1,0,4,250, -76,10,4,250,83,10,4,250,88,10,175,22,1,250,1,0, -4,250,94,10,4,250,101,10,4,250,106,10,180,22,1,250, -1,0,4,250,106,7,4,250,112,10,4,250,118,10,1,250, -1,0,4,250,124,10,4,250,131,10,4,250,136,10,4,250, -120,7,4,250,142,10,4,250,149,10,4,250,130,7,4,250, -155,10,4,250,160,10,1,250,1,0,4,250,140,7,4,250, -150,7,4,250,166,10,4,250,172,10,4,250,178,10,4,250, -185,10,4,250,190,10,4,250,196,10,232,75,4,250,202,10, -4,250,208,10,4,250,214,10,186,22,1,0,191,22,1,250, -1,0,1,250,1,0,195,22,1,250,1,0,199,22,203,22, -1,250,1,0,1,0,207,22,211,22,215,22,1,250,1,0, -219,22,223,22,1,0,227,22,231,22,1,250,1,0,1,0, -1,0,235,22,239,22,1,0,243,22,4,250,160,7,1,250, -1,0,1,250,1,0,247,22,1,250,1,0,251,22,1,0, -1,0,1,250,1,0,255,22,4,250,186,7,3,23,7,23, -1,250,1,0,1,250,1,0,10,23,1,250,1,0,1,0, -1,0,1,250,1,0,1,0,1,0,1,0,244,75,244,75, -244,75,14,23,14,23,14,23,20,23,20,23,20,23,4,250, -220,10,4,250,226,10,4,250,232,10,4,250,238,10,4,250, -246,10,4,250,0,11,4,250,10,11,4,250,20,11,1,0, -4,250,30,11,4,250,40,11,4,250,49,11,1,250,1,0, -4,250,54,11,4,250,60,11,4,250,212,7,4,250,69,11, -4,250,77,11,83,11,26,23,26,23,26,23,4,250,88,11, -33,23,57,248,4,250,94,11,4,250,102,11,4,250,111,11, -4,250,117,11,4,250,122,11,4,250,128,11,4,250,134,11, -4,250,140,11,4,250,146,11,4,250,152,11,4,250,158,11, -4,250,164,11,4,250,170,11,4,250,176,11,4,250,182,11, -4,250,188,11,4,250,194,11,4,250,200,11,1,250,1,0, -4,250,206,11,37,23,1,0,1,250,1,0,1,250,1,0, -4,250,222,7,4,250,232,7,4,250,214,11,4,250,224,11, -4,250,242,7,4,250,234,11,4,250,242,11,1,0,1,0, -1,0,1,0,1,0,1,0,41,23,1,250,1,0,45,23, -49,23,1,0,1,250,1,0,53,23,57,23,61,23,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,16,3,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,64,23, -69,23,72,23,76,23,33,248,41,248,81,248,80,23,84,23, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -88,23,94,23,100,23,106,23,112,23,118,23,1,0,1,0, -223,22,124,23,186,22,128,23,133,23,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,253,204,253,204,253,204,253,204,253,204,255,204,253,204,253, -204,253,204,253,204,253,204,253,204,253,204,255,204,255,204,253, -204,255,204,253,204,255,204,253,204,253,208,255,184,255,184,255, -184,255,184,255,208,255,176,253,184,255,184,255,184,255,184,255, -184,255,148,255,148,255,184,253,184,253,184,253,184,253,148,253, -148,253,184,255,184,255,184,255,184,255,184,253,184,253,184,255, -184,253,184,253,184,255,184,255,2,254,2,254,2,254,2,254, -2,252,184,255,184,255,184,255,184,255,204,255,204,255,204,255, -48,84,54,84,204,253,60,84,66,84,138,23,204,255,184,255, -184,255,184,255,204,255,204,255,204,255,184,255,184,255,230,84, -204,255,204,255,204,255,184,255,184,255,184,255,184,255,204,255, -208,255,184,255,184,255,204,255,210,255,212,255,212,255,210,255, -212,255,212,255,210,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,250,1,0,1,250,1,0,143,23,1,0,1,250,1,0, -1,0,1,0,146,23,1,0,1,0,1,0,153,23,157,23, -1,0,1,0,1,0,1,0,108,22,160,23,44,251,169,23, -36,251,36,251,36,251,1,0,252,251,1,0,244,251,244,251, -251,11,248,250,249,250,249,250,249,250,248,250,249,250,248,250, -249,250,248,250,249,250,249,250,249,250,249,250,249,250,248,250, -249,250,248,250,1,0,249,250,249,250,248,250,249,250,249,250, -249,250,248,250,252,250,252,250,3,12,9,12,15,12,21,12, -29,12,20,3,1,0,1,0,1,0,48,3,1,0,64,3, -1,0,84,3,1,0,1,0,1,0,1,0,1,0,116,3, -1,0,132,3,1,250,1,0,1,0,140,3,1,0,1,0, -1,0,172,3,252,7,14,8,37,12,43,12,49,12,57,250, -9,249,49,249,144,249,204,249,180,249,129,249,73,249,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -73,248,120,248,129,248,1,0,25,248,248,247,1,0,1,250, -1,0,73,248,1,250,1,0,1,0,173,23,177,23,181,23, -254,75,6,76,185,23,14,76,189,23,193,23,196,23,22,76, -201,23,205,23,209,23,213,23,30,76,38,76,46,76,217,23, -248,250,249,250,249,250,248,250,249,250,248,250,248,250,248,250, -248,250,252,250,248,250,249,250,249,250,249,250,248,250,249,250, -249,250,249,250,249,250,248,250,249,250,249,250,249,250,248,250, -249,250,249,250,249,250,248,250,249,250,248,250,249,250,249,250, -192,3,1,0,1,0,200,3,1,0,204,3,216,3,224,3, -228,3,55,12,244,3,1,0,1,0,1,0,248,3,1,0, -1,0,1,0,252,3,1,0,1,0,1,0,12,4,1,0, -1,0,1,0,16,4,1,0,20,4,1,0,1,0,61,12, -67,12,1,0,73,12,1,0,1,0,24,4,79,12,1,0, -1,0,1,0,1,0,85,12,91,12,97,12,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,0,250,28,4,4,250,103,12,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,1,0,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,113,250,4,250,109,12, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,0,4,250,115,12,4,250, -121,12,1,250,1,0,4,250,127,12,0,250,32,4,4,250, -133,12,4,250,139,12,4,250,145,12,1,250,1,0,4,250, -151,12,4,250,157,12,4,250,163,12,0,250,36,4,4,250, -169,12,4,250,175,12,4,250,181,12,4,250,187,12,4,250, -193,12,4,250,199,12,1,250,1,0,4,250,205,12,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,0, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -221,23,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,204,255,204,255,204,255,204,255,184,255, -204,255,204,255,204,255,188,255,184,255,204,255,204,255,204,255, -204,255,204,255,204,255,184,255,184,255,184,255,184,255,184,255, -184,255,204,255,204,255,184,255,204,255,204,255,188,255,200,255, -204,255,20,254,22,254,24,254,26,254,28,254,30,254,32,254, -34,254,36,254,38,254,38,254,40,254,42,254,44,254,1,0, -46,254,1,0,48,254,50,254,1,0,204,255,184,255,1,0, -36,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,60,254,62,254,64,254,1,0,230,84,1,0,1,0, -1,0,1,0,1,0,210,12,216,12,223,12,229,12,235,12, -40,4,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,52,4,1,0,56,4,54,254,56,254,58,254,60,254, -62,254,64,254,66,254,68,254,204,253,204,253,184,253,184,255, -204,255,204,255,204,255,204,255,204,255,184,255,204,255,204,255, -184,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,70,254,1,0,1,0,1,0,1,0,227,23,233,23, -239,23,245,23,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,241,12,60,4,247,12,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,64,4,253,12,1,0,68,4, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,204,255,204,255,204,255,204,255,184,255,204,255,1,0, -1,0,204,255,204,255,1,0,184,255,204,255,204,255,184,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,72,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,184,255,204,255,204,255,184,255,204,255,204,255, -184,255,184,255,184,255,204,255,184,255,184,255,204,255,184,255, -204,255,204,255,184,255,204,255,184,255,204,255,184,255,204,255, -184,255,204,255,204,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,184,255, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,1,0,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -204,255,204,255,204,255,1,0,204,255,204,255,204,255,204,255, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,184,255,184,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,184,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,0,184,255,204,255,204,255,184,255,204,255,204,255,184,255, -204,255,204,255,204,255,184,255,184,255,184,255,54,254,56,254, -58,254,204,255,204,255,204,255,184,255,204,255,204,255,184,255, -184,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,72,4,3,13, -1,0,1,0,1,0,1,0,1,0,1,0,76,4,9,13, -1,0,80,4,15,13,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,14,252,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,204,255,184,255,204,255,204,255, -1,0,1,0,1,0,250,23,0,24,6,24,12,24,18,24, -24,24,30,24,36,24,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,14,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,84,4,1,0,1,0,1,0,21,13,27,13,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,0,252,1,0,1,0,1,0,1,0,42,24,48,24, -1,0,54,24,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,60,24,1,0,1,0,66,24, -1,0,1,0,1,0,1,0,1,0,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,72,24, -78,24,84,24,1,0,1,0,90,24,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,14,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,92,4,33,13,1,0,1,0,39,13,45,13, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,0,252,0,252,1,0,1,0,1,0,1,0,96,24, -102,24,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,104,4,1,0,51,13,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,252, -1,0,1,0,1,0,1,0,1,0,1,0,108,4,116,4, -1,0,1,0,57,13,63,13,69,13,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,252, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -120,4,1,0,75,13,1,0,1,0,1,0,1,0,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,168,254, -182,252,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,1,0,1,0, -124,4,81,13,1,0,0,252,1,0,1,0,1,0,128,4, -87,13,93,13,1,0,32,8,101,13,1,0,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,0,252,0,252, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,18,254,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,140,4,148,4,1,0, -1,0,109,13,115,13,121,13,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,18,252, -1,0,1,0,1,0,1,0,0,252,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,152,4,127,13, -1,0,42,8,135,13,143,13,0,252,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -109,24,1,0,1,0,1,0,1,0,206,254,206,254,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -214,254,214,254,214,254,214,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -115,24,1,0,1,0,1,0,1,0,236,254,236,254,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -244,254,244,254,244,254,244,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,121,24,127,24,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -241,249,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,184,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,184,255,1,0,184,255,1,0,176,255,1,0,1,0, -1,0,1,0,1,0,1,0,133,24,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,139,24,1,0, -1,0,1,0,1,0,145,24,1,0,1,0,1,0,1,0, -151,24,1,0,1,0,1,0,1,0,157,24,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,163,24,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,2,255,4,255,74,84,8,255,82,84,168,24, -174,24,182,24,188,24,4,255,4,255,4,255,4,255,1,0, -1,0,4,255,90,84,204,255,204,255,18,254,1,0,204,255, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,197,24,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,203,24,1,0, -1,0,1,0,1,0,209,24,1,0,1,0,1,0,1,0, -215,24,1,0,1,0,1,0,1,0,221,24,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,227,24,1,0,1,0,1,0,1,0,1,0, -1,0,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,164,4,149,13,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,0,252,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,1,0,18,254, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,233,24,237,24,241,24,245,24,249,24,253,24,1,25, -5,25,9,25,13,25,17,25,21,25,25,25,29,25,33,25, -37,25,41,25,45,25,49,25,53,25,57,25,61,25,65,25, -69,25,73,25,77,25,81,25,85,25,89,25,93,25,97,25, -101,25,105,25,109,25,113,25,117,25,121,25,125,25,1,0, -129,25,1,0,1,0,1,0,1,0,1,0,133,25,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,249,248,1,0,1,0, -1,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,2,0,2,0,2,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,230,84,230,84,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -185,249,185,249,185,249,185,249,185,249,185,249,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,230,84, -230,84,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,230,84,230,84,230,84,230,84,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,200,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -188,255,204,255,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,255,184,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,0,1,0,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -184,255,184,255,184,255,184,255,184,255,184,255,204,255,204,255, -184,255,1,0,1,0,1,0,1,0,1,0,168,4,155,13, -172,4,161,13,176,4,167,13,180,4,173,13,184,4,179,13, -1,0,1,0,188,4,185,13,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,0,252,1,0, -1,0,1,0,1,0,192,4,191,13,196,4,197,13,200,4, -204,4,203,13,209,13,208,4,215,13,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,204,255,184,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,18,254, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,14,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,137,25,141,25,144,25,149,25,153,25,153,25,157,25, -161,25,165,25,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,169,25,173,25,177,25,181,25,185,25,189,25,193,25, -197,25,201,25,205,25,209,25,213,25,217,25,221,25,225,25, -229,25,233,25,237,25,241,25,245,25,249,25,253,25,1,26, -5,26,9,26,13,26,17,26,21,26,25,26,29,26,33,26, -37,26,41,26,45,26,49,26,53,26,57,26,61,26,65,26, -69,26,73,26,77,26,81,26,1,0,1,0,85,26,89,26, -93,26,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,1,0,2,254,184,255,184,255, -184,255,184,255,184,255,204,255,204,255,184,255,184,255,184,255, -184,255,204,255,1,0,2,254,2,254,2,254,2,254,2,254, -2,254,2,254,1,0,1,0,1,0,1,0,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,1,0,1,0, -1,0,204,255,204,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,90,22, -96,26,100,26,1,0,104,26,108,26,207,22,112,26,64,23, -116,26,72,23,120,26,124,23,124,26,128,26,1,0,128,22, -133,26,136,26,76,23,140,26,144,26,80,23,90,22,149,26, -153,26,157,26,100,26,104,26,108,26,211,22,215,22,161,26, -112,26,1,0,120,26,124,26,165,26,128,22,195,22,9,248, -9,248,136,26,140,26,144,26,25,248,235,22,168,26,65,248, -173,26,177,26,181,26,185,26,189,26,116,26,76,23,144,26, -168,26,173,26,177,26,192,26,185,26,189,26,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,197,26,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,201,26, -204,26,209,26,213,26,161,26,216,26,221,26,225,26,229,26, -231,22,227,22,233,26,153,248,237,26,241,26,209,248,245,26, -249,26,253,26,239,22,1,27,5,27,243,22,9,27,13,27, -251,22,17,27,57,23,3,23,21,27,7,23,61,23,24,27, -29,27,33,27,10,23,37,27,204,255,204,255,184,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,184,255,204,255, -204,255,212,255,172,255,184,255,148,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,208,255,200,255,200,255,184,255,1,0,204,255, -210,255,184,255,204,255,184,255,4,250,220,13,4,250,226,13, -4,250,233,13,4,250,239,13,4,250,247,13,4,250,254,13, -4,250,4,14,4,250,10,14,4,250,17,14,4,250,22,14, -4,250,30,14,4,250,40,14,4,250,48,14,4,250,54,14, -4,250,63,14,4,250,71,14,4,250,76,14,4,250,82,14, -4,250,88,14,4,250,94,14,4,250,101,14,4,250,106,14, -4,250,112,14,4,250,120,14,4,250,128,14,4,250,134,14, -4,250,140,14,4,250,52,8,4,250,148,14,4,250,156,14, -4,250,162,14,4,250,168,14,4,250,174,14,4,250,181,14, -4,250,186,14,4,250,192,14,4,250,198,14,4,250,204,14, -4,250,212,14,4,250,222,14,4,250,232,14,4,250,242,14, -4,250,251,14,4,250,1,15,4,250,6,15,4,250,62,8, -4,250,14,15,4,250,22,15,4,250,28,15,4,250,72,8, -4,250,36,15,4,250,46,15,4,250,56,15,4,250,64,15, -4,250,70,15,4,250,76,15,4,250,82,15,4,250,88,15, -4,250,94,15,4,250,100,15,4,250,108,15,4,250,118,15, -4,250,126,15,4,250,133,15,4,250,138,15,4,250,144,15, -4,250,150,15,4,250,156,15,4,250,163,15,4,250,169,15, -4,250,175,15,4,250,180,15,4,250,186,15,4,250,193,15, -4,250,199,15,204,15,210,15,216,15,222,15,41,27,44,248, -1,0,1,0,156,22,1,0,4,250,82,8,4,250,228,15, -4,250,236,15,4,250,246,15,4,250,0,16,4,250,10,16, -4,250,20,16,4,250,30,16,4,250,40,16,4,250,50,16, -4,250,60,16,4,250,70,16,4,250,96,8,4,250,78,16, -4,250,84,16,4,250,92,16,4,250,102,16,4,250,112,16, -4,250,122,16,4,250,132,16,4,250,140,16,4,250,146,16, -4,250,106,8,4,250,152,16,4,250,160,16,4,250,170,16, -4,250,180,16,4,250,190,16,4,250,200,16,4,250,210,16, -4,250,220,16,4,250,230,16,4,250,240,16,4,250,250,16, -4,250,2,17,4,250,8,17,4,250,16,17,4,250,26,17, -4,250,36,17,4,250,46,17,4,250,56,17,4,250,64,17, -4,250,71,17,4,250,76,17,4,250,82,17,1,250,1,0, -1,250,1,0,1,250,1,0,116,8,134,8,91,17,101,17, -111,17,121,17,131,17,141,17,188,249,188,249,188,249,188,249, -188,249,188,249,188,249,188,249,152,8,166,8,151,17,161,17, -171,17,181,17,1,0,1,0,188,249,188,249,188,249,188,249, -188,249,188,249,1,0,1,0,180,8,198,8,191,17,201,17, -211,17,221,17,231,17,241,17,188,249,188,249,188,249,188,249, -188,249,188,249,188,249,188,249,216,8,234,8,251,17,5,18, -15,18,25,18,35,18,45,18,188,249,188,249,188,249,188,249, -188,249,188,249,188,249,188,249,252,8,10,9,55,18,65,18, -75,18,85,18,1,0,1,0,188,249,188,249,188,249,188,249, -188,249,188,249,1,0,1,0,24,9,42,9,95,18,105,18, -115,18,125,18,135,18,145,18,1,0,188,249,1,0,188,249, -1,0,188,249,1,0,188,249,60,9,78,9,155,18,165,18, -175,18,185,18,195,18,205,18,188,249,188,249,188,249,188,249, -188,249,188,249,188,249,188,249,213,18,54,76,219,18,62,76, -225,18,70,76,231,18,78,76,237,18,86,76,243,18,94,76, -249,18,102,76,1,0,1,0,110,76,120,76,134,76,150,76, -166,76,182,76,198,76,214,76,110,76,120,76,134,76,150,76, -166,76,182,76,198,76,214,76,226,76,236,76,250,76,10,77, -26,77,42,77,58,77,74,77,226,76,236,76,250,76,10,77, -26,77,42,77,58,77,74,77,86,77,96,77,110,77,126,77, -142,77,158,77,174,77,190,77,86,77,96,77,110,77,126,77, -142,77,158,77,174,77,190,77,255,18,5,19,202,77,46,27, -212,77,1,0,11,19,222,77,188,249,188,249,232,77,54,76, -46,27,52,27,58,27,52,27,62,27,68,27,240,77,76,27, -250,77,1,0,17,19,4,78,14,78,62,76,22,78,70,76, -76,27,82,27,90,27,98,27,23,19,29,19,37,19,32,78, -1,0,1,0,45,19,53,19,188,249,188,249,42,78,78,76, -1,0,106,27,114,27,122,27,61,19,67,19,75,19,52,78, -83,19,89,19,95,19,103,19,188,249,188,249,62,78,94,76, -196,249,130,27,160,23,139,27,1,0,1,0,70,78,142,27, -80,78,1,0,111,19,90,78,100,78,86,76,108,78,102,76, -142,27,108,22,148,27,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,230,84, -230,84,230,84,230,84,230,84,1,0,241,249,1,0,1,0, -1,0,1,0,1,0,154,27,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -161,27,165,27,171,27,1,0,1,0,1,0,230,84,230,84, -230,84,230,84,230,84,81,22,1,0,1,0,1,0,179,27, -185,27,1,0,193,27,199,27,1,0,1,0,1,0,1,0, -207,27,1,0,212,27,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,219,27,225,27,231,27,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,237,27,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,230,84,230,84,230,84,230,84,230,84, -230,84,230,84,230,84,230,84,230,84,230,84,230,84,230,84, -230,84,230,84,230,84,247,27,116,26,1,0,1,0,251,27, -255,27,3,28,7,28,11,28,15,28,19,28,23,28,26,28, -31,28,35,28,128,26,247,27,125,22,101,22,105,22,251,27, -255,27,3,28,7,28,11,28,15,28,19,28,23,28,26,28, -31,28,35,28,1,0,90,22,108,26,128,22,128,23,211,22, -64,23,120,26,124,23,124,26,128,26,136,26,186,22,140,26, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,38,28,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,204,255,2,254,2,254,204,255, -204,255,204,255,204,255,2,254,2,254,2,254,204,255,204,255, -1,0,1,0,1,0,1,0,204,255,1,0,1,0,1,0, -2,254,2,254,204,255,184,255,204,255,2,254,2,254,184,255, -184,255,184,255,184,255,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,44,28,52,28,204,26,60,28,1,0, -66,28,74,28,215,22,1,0,82,28,112,26,64,23,64,23, -64,23,64,23,89,28,116,26,116,26,124,23,124,23,1,0, -128,26,92,28,1,0,1,0,136,26,99,28,76,23,76,23, -76,23,1,0,1,0,102,28,108,28,116,28,1,0,24,27, -1,0,122,28,1,0,24,27,1,0,120,26,116,78,100,26, -204,26,1,0,108,26,108,26,216,26,217,250,124,26,128,22, -127,28,131,28,135,28,139,28,116,26,1,0,142,28,151,28, -177,26,177,26,151,28,155,28,1,0,1,0,1,0,1,0, -104,26,104,26,108,26,116,26,72,23,1,0,1,0,1,0, -1,0,1,0,1,0,159,28,167,28,175,28,185,28,193,28, -201,28,209,28,217,28,225,28,233,28,241,28,249,28,1,29, -9,29,17,29,25,29,116,26,30,29,36,29,44,29,168,26, -50,29,56,29,64,29,74,29,128,23,80,29,86,29,124,23, -204,26,104,26,124,26,116,26,30,29,36,29,44,29,168,26, -50,29,56,29,64,29,74,29,128,23,80,29,86,29,124,23, -204,26,104,26,124,26,1,0,1,0,1,0,1,250,1,0, -1,0,1,0,1,0,1,0,95,29,1,0,1,0,1,0, -1,0,1,0,1,0,212,4,1,0,216,4,1,0,220,4, -1,0,1,0,1,0,1,0,1,0,117,19,123,19,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,129,19,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,135,19,141,19,147,19,224,4,1,0, -228,4,1,0,232,4,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,236,4,153,19, -1,0,1,0,1,0,240,4,159,19,1,0,244,4,165,19, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,248,4,171,19, -252,4,177,19,1,0,1,0,1,0,1,0,1,0,103,29, -109,29,1,0,117,29,123,29,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,0,5, -1,0,1,0,1,0,183,19,1,0,4,5,189,19,8,5, -1,0,195,19,12,5,201,19,1,0,1,0,1,0,16,5, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,207,19,20,5,213,19,1,0,24,5,28,5, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,219,19, -225,19,231,19,237,19,243,19,32,5,36,5,249,19,255,19, -40,5,44,5,5,20,11,20,48,5,52,5,56,5,60,5, -1,0,1,0,17,20,23,20,64,5,68,5,29,20,35,20, -72,5,76,5,41,20,47,20,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,80,5,84,5,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,88,5,1,0,1,0,1,0, -1,0,1,0,92,5,96,5,1,0,100,5,53,20,59,20, -65,20,71,20,1,0,1,0,104,5,108,5,112,5,116,5, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -77,20,83,20,89,20,95,20,1,0,1,0,1,0,1,0, -1,0,1,0,101,20,107,20,113,20,119,20,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,131,29, -135,29,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,125,22,101,22,105,22,251,27,255,27,3,28,7,28, -11,28,15,28,139,29,145,29,151,29,157,29,163,29,169,29, -175,29,181,29,187,29,193,29,199,29,205,29,213,29,221,29, -229,29,237,29,245,29,253,29,5,30,13,30,21,30,31,30, -41,30,51,30,61,30,71,30,81,30,91,30,101,30,111,30, -121,30,131,30,137,30,143,30,149,30,155,30,161,30,167,30, -173,30,179,30,185,30,193,30,201,30,209,30,217,30,225,30, -233,30,241,30,249,30,1,31,9,31,17,31,25,31,33,31, -41,31,49,31,57,31,65,31,73,31,81,31,89,31,97,31, -105,31,113,31,121,31,129,31,137,31,145,31,153,31,161,31, -169,31,177,31,185,31,193,31,201,31,209,31,217,31,90,22, -100,26,204,26,104,26,108,26,216,26,112,26,64,23,116,26, -72,23,120,26,124,23,124,26,128,26,128,22,136,26,99,28, -76,23,186,22,140,26,144,26,168,26,80,23,128,23,84,23, -24,27,90,22,100,26,204,26,104,26,108,26,216,26,112,26, -64,23,116,26,72,23,120,26,124,23,124,26,128,26,128,22, -136,26,99,28,76,23,186,22,140,26,144,26,168,26,80,23, -128,23,84,23,24,27,247,27,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,225,31,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,234,31,242,31, -248,31,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,32,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,250,1,0,7,32,11,32,15,32,1,0,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,153,26,249,26,149,26, -201,26,1,0,1,250,1,0,1,0,1,250,1,0,1,0, -1,0,1,0,1,0,1,0,72,23,168,26,19,32,23,32, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,250, -1,0,1,250,1,0,204,255,204,255,204,255,1,250,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,137,249,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,27,32,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,31,32,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,35,32, -39,32,43,32,47,32,51,32,55,32,59,32,63,32,67,32, -71,32,75,32,79,32,83,32,87,32,91,32,95,32,99,32, -103,32,107,32,111,32,115,32,119,32,123,32,127,32,131,32, -135,32,139,32,143,32,147,32,151,32,155,32,159,32,163,32, -167,32,171,32,175,32,179,32,183,32,187,32,191,32,195,32, -199,32,203,32,207,32,211,32,215,32,219,32,223,32,227,32, -231,32,235,32,239,32,243,32,247,32,251,32,255,32,3,33, -7,33,11,33,15,33,19,33,23,33,27,33,31,33,35,33, -39,33,43,33,47,33,51,33,55,33,59,33,63,33,67,33, -71,33,75,33,79,33,83,33,87,33,91,33,95,33,99,33, -103,33,107,33,111,33,115,33,119,33,123,33,127,33,131,33, -135,33,139,33,143,33,147,33,151,33,155,33,159,33,163,33, -167,33,171,33,175,33,179,33,183,33,187,33,191,33,195,33, -199,33,203,33,207,33,211,33,215,33,219,33,223,33,227,33, -231,33,235,33,239,33,243,33,247,33,251,33,255,33,3,34, -7,34,11,34,15,34,19,34,23,34,27,34,31,34,35,34, -39,34,43,34,47,34,51,34,55,34,59,34,63,34,67,34, -71,34,75,34,79,34,83,34,87,34,91,34,95,34,99,34, -103,34,107,34,111,34,115,34,119,34,123,34,127,34,131,34, -135,34,139,34,143,34,147,34,151,34,155,34,159,34,163,34, -167,34,171,34,175,34,179,34,183,34,187,34,191,34,195,34, -199,34,203,34,207,34,211,34,215,34,219,34,223,34,227,34, -231,34,235,34,239,34,243,34,247,34,251,34,255,34,3,35, -7,35,11,35,15,35,19,35,23,35,27,35,31,35,35,35, -39,35,43,35,47,35,51,35,55,35,59,35,63,35,67,35, -71,35,75,35,79,35,83,35,87,35,91,35,95,35,99,35, -103,35,107,35,111,35,115,35,119,35,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,180,255,200,255,208,255,188,255,192,255,192,255,1,0, -1,0,1,0,1,0,1,0,1,0,217,248,1,0,127,32, -123,35,127,35,1,0,1,0,1,0,1,0,1,0,1,0, -120,5,1,0,1,0,1,0,1,0,124,5,125,20,128,5, -131,20,132,5,137,20,136,5,143,20,140,5,149,20,144,5, -155,20,148,5,161,20,152,5,167,20,156,5,173,20,160,5, -179,20,164,5,185,20,168,5,191,20,1,0,172,5,197,20, -176,5,203,20,180,5,209,20,1,0,1,0,1,0,1,0, -1,0,184,5,215,20,221,20,192,5,227,20,233,20,200,5, -239,20,245,20,208,5,251,20,1,21,216,5,7,21,13,21, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,19,21,1,0,1,0,1,0, -1,0,16,252,16,252,130,35,136,35,224,5,25,21,143,35, -1,0,1,0,1,0,1,0,1,0,1,0,228,5,1,0, -1,0,1,0,1,0,232,5,31,21,236,5,37,21,240,5, -43,21,244,5,49,21,248,5,55,21,252,5,61,21,0,6, -67,21,4,6,73,21,8,6,79,21,12,6,85,21,16,6, -91,21,20,6,97,21,1,0,24,6,103,21,28,6,109,21, -32,6,115,21,1,0,1,0,1,0,1,0,1,0,36,6, -121,21,127,21,44,6,133,21,139,21,52,6,145,21,151,21, -60,6,157,21,163,21,68,6,169,21,175,21,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,76,6, -80,6,84,6,88,6,1,0,181,21,1,0,1,0,187,21, -193,21,199,21,205,21,1,0,1,0,92,6,211,21,148,35, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,154,35,158,35,96,84,162,35,100,84,104,84,166,35, -170,35,174,35,108,84,112,84,116,84,120,84,124,84,128,84, -179,35,182,35,186,35,190,35,195,35,198,35,202,35,206,35, -210,35,214,35,218,35,222,35,226,35,230,35,234,35,132,84, -136,84,140,84,144,84,148,84,152,84,156,84,160,84,164,84, -168,84,172,84,176,84,180,84,184,84,188,84,192,84,196,84, -200,84,204,84,208,84,212,84,230,84,239,35,243,35,247,35, -251,35,255,35,3,36,7,36,11,36,15,36,19,36,23,36, -27,36,31,36,35,36,39,36,43,36,47,36,51,36,55,36, -59,36,63,36,67,36,71,36,75,36,79,36,83,36,87,36, -91,36,95,36,99,36,103,36,107,36,111,36,115,36,119,36, -123,36,127,36,131,36,135,36,139,36,143,36,147,36,1,0, -1,0,1,0,35,32,59,32,151,36,155,36,159,36,163,36, -167,36,171,36,51,32,175,36,179,36,183,36,187,36,67,32, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -191,36,199,36,207,36,215,36,223,36,231,36,239,36,247,36, -255,36,7,37,15,37,23,37,31,37,39,37,131,78,149,78, -167,78,185,78,203,78,221,78,239,78,1,79,19,79,37,79, -55,79,73,79,91,79,109,79,127,79,147,79,173,79,1,0, -47,37,55,37,63,37,71,37,79,37,87,37,95,37,103,37, -111,37,119,37,127,37,135,37,143,37,151,37,159,37,167,37, -175,37,183,37,191,37,199,37,207,37,215,37,223,37,231,37, -239,37,247,37,255,37,7,38,15,38,23,38,31,38,39,38, -47,38,55,38,63,38,71,38,79,38,83,38,43,33,87,38, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -90,38,99,38,105,38,111,38,117,38,123,38,129,38,135,38, -141,38,147,38,153,38,159,38,165,38,171,38,177,38,183,38, -154,35,162,35,166,35,174,35,182,35,186,35,198,35,206,35, -210,35,218,35,222,35,226,35,230,35,234,35,188,79,196,79, -204,79,212,79,220,79,228,79,236,79,244,79,252,79,4,80, -12,80,20,80,28,80,36,80,48,80,66,80,78,80,1,0, -35,32,59,32,151,36,155,36,189,38,193,38,197,38,79,32, -201,38,127,32,71,33,119,33,115,33,75,33,187,34,159,32, -63,33,205,38,209,38,213,38,217,38,221,38,225,38,229,38, -233,38,237,38,241,38,183,32,245,38,249,38,253,38,1,39, -5,39,9,39,13,39,17,39,159,36,163,36,167,36,21,39, -25,39,29,39,33,39,37,39,41,39,45,39,49,39,53,39, -57,39,61,39,67,39,73,39,79,39,85,39,91,39,97,39, -103,39,109,39,115,39,121,39,127,39,133,39,139,39,145,39, -151,39,157,39,163,39,169,39,175,39,181,39,187,39,193,39, -199,39,205,39,213,39,221,39,228,39,234,39,242,39,248,39, -1,40,5,40,8,40,13,40,17,40,20,40,24,40,28,40, -32,40,36,40,40,40,44,40,48,40,52,40,56,40,60,40, -64,40,68,40,72,40,76,40,81,40,85,40,89,40,93,40, -97,40,100,40,104,40,108,40,112,40,116,40,121,40,125,40, -129,40,133,40,137,40,141,40,145,40,149,40,153,40,157,40, -161,40,165,40,169,40,172,40,176,40,180,40,184,40,189,40, -94,80,195,40,117,80,205,40,138,80,212,40,221,40,162,80, -229,40,238,40,247,40,255,40,6,41,17,41,179,80,191,80, -206,80,219,80,27,41,239,80,37,41,9,81,43,41,56,41, -25,81,37,81,63,81,69,41,78,41,87,41,84,81,95,41, -105,41,104,81,116,41,124,41,118,81,130,81,141,81,133,41, -139,41,144,41,152,41,150,81,166,81,179,81,193,81,209,81, -220,81,231,81,250,81,160,41,11,82,171,41,179,41,26,82, -36,82,190,41,48,82,66,82,80,82,92,82,106,82,199,41, -124,82,205,41,213,41,221,41,231,41,238,41,246,41,255,41, -11,42,21,42,149,82,168,82,187,82,27,42,206,82,37,42, -45,42,53,42,63,42,225,82,245,82,69,42,13,83,74,42, -83,42,89,42,95,42,101,42,107,42,113,42,119,42,125,42, -131,42,137,42,143,42,151,42,159,42,167,42,175,42,183,42, -191,42,199,42,207,42,215,42,223,42,231,42,239,42,247,42, -255,42,6,43,14,43,20,43,26,43,34,43,40,43,46,43, -53,43,61,43,68,43,75,43,81,43,87,43,93,43,99,43, -108,43,114,43,120,43,126,43,132,43,138,43,144,43,150,43, -156,43,164,43,174,43,180,43,186,43,192,43,198,43,204,43, -210,43,216,43,224,43,232,43,240,43,248,43,254,43,4,44, -10,44,16,44,22,44,28,44,34,44,40,44,46,44,53,44, -61,44,69,44,75,44,83,44,91,44,99,44,105,44,112,44, -121,44,108,43,130,44,138,44,146,44,154,44,162,44,175,44, -188,44,194,44,200,44,206,44,212,44,218,44,224,44,230,44, -236,44,230,44,242,44,248,44,254,44,4,45,10,45,4,45, -16,45,22,45,29,45,39,45,44,45,50,45,56,45,67,45, -74,45,80,45,86,45,92,45,98,45,104,45,46,44,110,45, -116,45,122,45,128,45,136,45,144,43,142,45,150,45,158,45, -165,45,174,45,182,45,188,45,194,45,200,45,206,45,214,45, -223,45,229,45,235,45,241,45,247,45,253,45,3,46,9,46, -15,46,21,46,29,46,37,46,45,46,53,46,61,46,69,46, -77,46,85,46,93,46,101,46,109,46,117,46,125,46,133,46, -141,46,149,46,157,46,165,46,173,46,181,46,189,46,196,46, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,0,204,255, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,157,25,205,46,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,0,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,241,249,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,250,1,0,1,250, -1,0,209,46,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,0,1,0,1,0,1,250,1,0,229,26, -1,0,1,0,1,250,1,0,1,250,1,0,1,0,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,69,23,161,26,225,26,213,46, -233,26,1,0,217,46,221,46,237,26,225,46,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,0,1,250,1,0,121,248,13,27,229,46, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,89,28,233,46,1,0,1,0,1,0,1,0,1,0, -1,0,18,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,18,254,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,1,0,204,255,204,255,184,255,1,0,1,0, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,1,0,204,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,237,46,201,248,7,32,145,249, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -241,46,245,46,249,46,253,46,1,47,5,47,9,47,13,47, -17,47,21,47,25,47,29,47,33,47,37,47,41,47,45,47, -49,47,53,47,57,47,61,47,65,47,69,47,73,47,77,47, -81,47,85,47,89,47,93,47,97,47,101,47,105,47,109,47, -113,47,117,47,121,47,125,47,129,47,133,47,137,47,141,47, -145,47,149,47,153,47,157,47,161,47,165,47,169,47,173,47, -177,47,181,47,185,47,189,47,193,47,197,47,201,47,205,47, -209,47,213,47,217,47,221,47,225,47,229,47,233,47,237,47, -241,47,245,47,249,47,253,47,1,48,5,48,9,48,13,48, -17,48,21,48,25,48,29,48,33,48,37,48,41,48,45,48, -168,6,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,168,6,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -168,6,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,168,6,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,232,84,233,84,232,84,232,84, -232,84,232,84,232,84,232,84,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,232,84,233,84, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,232,84,1,0,1,0,1,0,1,0, -232,84,106,66,1,0,1,0,232,84,1,0,232,84,118,66, -40,84,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,225,75, -1,0,230,84,230,84,230,84,230,84,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,49,48,53,48,155,34,57,48,61,48,65,48,69,48, -115,35,115,35,73,48,187,34,77,48,81,48,85,48,89,48, -93,48,97,48,101,48,105,48,109,48,113,48,117,48,121,48, -125,48,129,48,133,48,137,48,141,48,145,48,149,48,153,48, -157,48,161,48,165,48,169,48,173,48,177,48,181,48,185,48, -189,48,193,48,197,48,201,48,205,48,209,48,213,48,217,48, -221,48,225,48,229,48,233,48,237,48,19,34,241,48,245,48, -249,48,253,48,1,49,5,49,9,49,13,49,17,49,21,49, -25,49,55,35,29,49,33,49,37,49,41,49,45,49,49,49, -53,49,57,49,61,49,65,49,69,49,73,49,77,49,81,49, -85,49,89,49,93,49,97,49,101,49,105,49,109,49,113,49, -117,49,121,49,125,49,129,49,133,49,113,48,137,49,141,49, -145,49,149,49,153,49,157,49,161,49,165,49,169,49,173,49, -177,49,181,49,185,49,189,49,193,49,197,49,201,49,205,49, -209,49,213,49,163,34,217,49,221,49,225,49,229,49,233,49, -237,49,241,49,245,49,249,49,253,49,1,50,5,50,9,50, -13,50,17,50,183,32,21,50,25,50,29,50,33,50,37,50, -41,50,45,50,49,50,107,32,53,50,57,50,61,50,65,50, -69,50,73,50,77,50,81,50,85,50,89,50,93,50,97,50, -101,50,105,50,109,50,113,50,117,50,121,50,125,50,129,50, -133,50,137,50,209,49,141,50,145,50,149,50,153,50,157,50, -161,50,165,50,169,50,145,49,173,50,177,50,181,50,185,50, -189,50,193,50,197,50,201,50,205,50,209,50,213,50,217,50, -221,50,225,50,229,50,233,50,237,50,241,50,245,50,249,50, -113,48,253,50,1,51,5,51,9,51,111,35,13,51,17,51, -21,51,25,51,29,51,33,51,37,51,41,51,45,51,49,51, -53,51,57,51,193,38,61,51,65,51,69,51,73,51,77,51, -81,51,85,51,89,51,93,51,153,49,97,51,101,51,105,51, -109,51,113,51,117,51,121,51,125,51,129,51,133,51,137,51, -141,51,145,51,183,34,149,51,153,51,157,51,161,51,165,51, -169,51,173,51,177,51,181,51,185,51,189,51,193,51,197,51, -243,33,201,51,205,51,209,51,213,51,217,51,221,51,225,51, -229,51,233,51,237,51,241,51,245,51,249,51,253,51,1,52, -5,52,95,34,9,52,107,34,13,52,17,52,21,52,1,0, -1,0,25,52,1,0,29,52,1,0,1,0,33,52,37,52, -41,52,45,52,49,52,53,52,57,52,61,52,65,52,15,34, -1,0,69,52,1,0,73,52,1,0,1,0,77,52,81,52, -1,0,1,0,1,0,85,52,89,52,93,52,97,52,101,52, -105,52,109,52,113,52,117,52,121,52,125,52,129,52,133,52, -137,52,141,52,145,52,149,52,153,52,211,32,157,52,161,52, -165,52,169,52,173,52,177,52,181,52,185,52,189,52,193,52, -197,52,201,52,205,52,209,52,213,52,213,38,217,52,221,52, -225,52,229,52,229,38,233,52,237,52,241,52,245,52,249,52, -97,50,253,52,1,53,5,53,9,53,13,53,17,53,17,53, -21,53,25,53,29,53,33,53,37,53,41,53,45,53,49,53, -77,52,53,53,57,53,61,53,65,53,69,53,75,53,1,0, -1,0,79,53,83,53,87,53,91,53,95,53,99,53,103,53, -107,53,133,52,111,53,115,53,119,53,25,52,123,53,127,53, -131,53,135,53,139,53,143,53,147,53,151,53,155,53,159,53, -163,53,167,53,165,52,171,53,169,52,175,53,179,53,183,53, -187,53,191,53,29,52,197,48,195,53,199,53,87,33,213,49, -33,51,203,53,207,53,197,52,211,53,201,52,215,53,219,53, -223,53,37,52,227,53,231,53,235,53,239,53,243,53,41,52, -247,53,251,53,255,53,3,54,7,54,11,54,249,52,15,54, -19,54,97,50,23,54,9,53,27,54,31,54,35,54,39,54, -43,54,29,53,47,54,73,52,51,54,33,53,137,49,55,54, -37,53,59,54,45,53,63,54,67,54,71,54,75,54,79,54, -53,53,61,52,83,54,57,53,87,54,61,53,91,54,115,35, -95,54,101,54,107,54,113,54,117,54,121,54,125,54,131,54, -137,54,143,54,147,54,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,150,54,156,54,162,54,168,54,176,54,184,54,184,54, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,191,54,197,54,203,54,209,54, -215,54,1,0,1,0,1,0,1,0,1,0,220,54,52,254, -226,54,233,54,127,28,139,28,237,54,241,54,245,54,249,54, -253,54,1,55,19,28,4,55,10,55,16,55,24,55,32,55, -38,55,44,55,50,55,56,55,62,55,68,55,74,55,80,55, -1,0,86,55,92,55,98,55,104,55,110,55,1,0,116,55, -1,0,122,55,128,55,1,0,134,55,140,55,1,0,146,55, -152,55,158,55,164,55,170,55,176,55,182,55,188,55,194,55, -201,55,207,55,207,55,211,55,211,55,211,55,211,55,215,55, -215,55,215,55,215,55,219,55,219,55,219,55,219,55,223,55, -223,55,223,55,223,55,227,55,227,55,227,55,227,55,231,55, -231,55,231,55,231,55,235,55,235,55,235,55,235,55,239,55, -239,55,239,55,239,55,243,55,243,55,243,55,243,55,247,55, -247,55,247,55,247,55,251,55,251,55,251,55,251,55,255,55, -255,55,3,56,3,56,7,56,7,56,11,56,11,56,15,56, -15,56,19,56,19,56,23,56,23,56,27,56,27,56,27,56, -27,56,31,56,31,56,31,56,31,56,35,56,35,56,35,56, -35,56,39,56,39,56,39,56,39,56,43,56,43,56,47,56, -47,56,47,56,47,56,28,83,28,83,50,56,50,56,50,56, -50,56,55,56,55,56,55,56,55,56,58,56,58,56,36,83, -36,83,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,63,56,63,56,63,56,63,56, -67,56,67,56,71,56,71,56,75,56,75,56,239,23,79,56, -79,56,83,56,83,56,87,56,87,56,91,56,91,56,91,56, -91,56,95,56,95,56,44,83,44,83,54,83,54,83,64,83, -64,83,75,83,75,83,85,83,85,83,95,83,95,83,105,83, -105,83,105,83,115,83,115,83,115,83,99,56,99,56,99,56, -99,56,125,83,135,83,145,83,115,83,154,83,103,56,109,56, -115,56,121,56,127,56,132,56,139,56,145,56,151,56,157,56, -163,56,168,56,175,56,181,56,187,56,192,56,199,56,205,56, -211,56,217,56,223,56,229,56,235,56,241,56,247,56,253,56, -3,57,9,57,15,57,21,57,27,57,33,57,39,57,45,57, -51,57,57,57,63,57,69,57,75,57,81,57,87,57,93,57, -99,57,105,57,111,57,116,57,123,57,129,57,135,57,140,57, -146,57,153,57,159,57,165,57,171,57,177,57,183,57,188,57, -195,57,201,57,207,57,213,57,219,57,224,57,231,57,237,57, -243,57,249,57,255,57,4,58,11,58,17,58,23,58,29,58, -35,58,40,58,47,58,53,58,59,58,64,58,71,58,77,58, -83,58,89,58,95,58,100,58,106,58,112,58,118,58,124,58, -132,58,140,58,148,58,156,58,164,58,165,83,175,83,145,83, -185,83,115,83,154,83,173,58,179,58,121,56,185,58,127,56, -132,56,191,58,197,58,157,56,203,58,163,56,168,56,209,58, -215,58,181,56,221,58,187,56,192,56,111,57,116,57,135,57, -140,57,146,57,171,57,177,57,183,57,188,57,213,57,219,57, -224,57,226,58,249,57,233,58,239,58,29,58,245,58,35,58, -40,58,118,58,251,58,1,59,89,58,7,59,95,58,100,58, -125,83,135,83,195,83,145,83,205,83,103,56,109,56,115,56, -121,56,13,59,139,56,145,56,151,56,157,56,19,59,181,56, -199,56,205,56,211,56,217,56,223,56,235,56,241,56,247,56, -253,56,3,57,9,57,25,59,15,57,21,57,27,57,33,57, -39,57,45,57,57,57,63,57,69,57,75,57,81,57,87,57, -93,57,99,57,105,57,123,57,129,57,153,57,159,57,165,57, -171,57,177,57,195,57,201,57,207,57,213,57,31,59,231,57, -237,57,243,57,249,57,11,58,17,58,23,58,29,58,37,59, -47,58,53,58,42,59,71,58,77,58,83,58,89,58,49,59, -145,83,205,83,121,56,13,59,157,56,19,59,181,56,55,59, -3,57,61,59,67,59,73,59,171,57,177,57,213,57,29,58, -37,59,89,58,49,59,78,59,86,59,94,59,103,59,108,59, -115,59,120,59,127,59,132,59,139,59,144,59,151,59,156,59, -163,59,168,59,175,59,180,59,187,59,192,59,199,59,204,59, -211,59,216,59,223,59,229,59,235,59,67,59,241,59,247,59, -253,59,3,60,103,59,108,59,115,59,120,59,127,59,132,59, -139,59,144,59,151,59,156,59,163,59,168,59,175,59,180,59, -187,59,192,59,199,59,204,59,211,59,216,59,223,59,229,59, -235,59,67,59,241,59,247,59,253,59,3,60,223,59,229,59, -235,59,67,59,61,59,73,59,51,57,241,56,247,56,253,56, -223,59,229,59,235,59,51,57,57,57,8,60,8,60,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,15,60, -23,60,23,60,31,60,39,60,47,60,55,60,63,60,71,60, -71,60,78,60,87,60,95,60,103,60,111,60,119,60,119,60, -127,60,135,60,135,60,143,60,143,60,151,60,159,60,159,60, -166,60,175,60,175,60,183,60,183,60,191,60,199,60,199,60, -207,60,207,60,215,60,222,60,231,60,239,60,239,60,247,60, -255,60,6,61,15,61,23,61,23,61,31,61,39,61,47,61, -54,61,63,61,71,61,71,61,79,61,79,61,87,61,87,61, -95,61,103,61,110,61,119,61,127,61,135,61,143,61,1,0, -1,0,151,61,159,61,167,61,175,61,183,61,191,61,191,61, -199,61,206,61,215,61,223,61,223,61,230,61,238,61,247,61, -254,61,7,62,14,62,23,62,30,62,39,62,47,62,55,62, -62,62,70,62,78,62,86,62,94,62,102,62,110,62,118,62, -126,62,134,62,142,62,31,61,47,61,150,62,158,62,167,62, -174,62,183,62,191,62,183,62,167,62,198,62,206,62,214,62, -222,62,230,62,191,62,231,60,151,60,238,62,246,62,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,254,62, -6,63,15,63,25,63,35,63,45,63,55,63,65,63,75,63, -85,63,93,63,131,63,149,63,1,0,1,0,1,0,230,84, -230,84,230,84,230,84,230,84,230,84,230,84,230,84,230,84, -230,84,230,84,230,84,230,84,230,84,230,84,230,84,159,63, -163,63,167,63,171,63,153,23,175,63,179,63,183,63,187,63, -171,27,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,184,255,184,255, -184,255,184,255,184,255,184,255,184,255,204,255,204,255,165,27, -191,63,195,63,199,63,199,63,31,28,35,28,203,63,207,63, -211,63,215,63,219,63,223,63,227,63,231,63,131,29,135,29, -235,63,239,63,243,63,247,63,1,0,1,0,251,63,255,63, -212,27,212,27,212,27,212,27,199,63,199,63,199,63,159,63, -163,63,161,27,1,0,153,23,171,63,179,63,175,63,191,63, -31,28,35,28,203,63,207,63,211,63,215,63,3,64,7,64, -11,64,19,28,15,64,18,64,22,64,26,28,1,0,27,64, -31,64,35,64,39,64,1,0,1,0,1,0,1,0,42,64, -48,64,54,64,1,0,60,64,1,0,66,64,72,64,78,64, -84,64,90,64,96,64,102,64,108,64,114,64,120,64,127,64, -214,83,214,83,222,83,222,83,230,83,230,83,238,83,238,83, -246,83,246,83,246,83,246,83,130,64,130,64,135,64,135,64, -135,64,135,64,139,64,139,64,143,64,143,64,143,64,143,64, -147,64,147,64,147,64,147,64,151,64,151,64,151,64,151,64, -155,64,155,64,155,64,155,64,159,64,159,64,159,64,159,64, -163,64,163,64,167,64,167,64,171,64,171,64,175,64,175,64, -179,64,179,64,179,64,179,64,183,64,183,64,183,64,183,64, -187,64,187,64,187,64,187,64,191,64,191,64,191,64,195,64, -195,64,195,64,195,64,199,64,199,64,199,64,199,64,203,64, -203,64,203,64,203,64,207,64,207,64,207,64,207,64,211,64, -211,64,211,64,211,64,215,64,215,64,215,64,215,64,219,64, -219,64,219,64,219,64,223,64,223,64,223,64,223,64,227,64, -227,64,227,64,227,64,231,64,231,64,231,64,231,64,235,64, -235,64,235,64,235,64,238,64,238,64,95,56,95,56,242,64, -242,64,242,64,242,64,2,84,2,84,16,84,16,84,30,84, -30,84,246,64,246,64,1,0,1,0,230,84,1,0,175,63, -253,64,3,64,31,64,35,64,7,64,1,65,31,28,35,28, -11,64,19,28,159,63,15,64,161,27,5,65,247,27,125,22, -101,22,105,22,251,27,255,27,3,28,7,28,11,28,15,28, -171,63,153,23,18,64,26,28,22,64,179,63,39,64,90,22, -100,26,204,26,104,26,108,26,216,26,112,26,64,23,116,26, -72,23,120,26,124,23,124,26,128,26,128,22,136,26,99,28, -76,23,186,22,140,26,144,26,168,26,80,23,128,23,84,23, -24,27,251,63,27,64,255,63,9,65,199,63,139,27,90,22, -100,26,204,26,104,26,108,26,216,26,112,26,64,23,116,26, -72,23,120,26,124,23,124,26,128,26,128,22,136,26,99,28, -76,23,186,22,140,26,144,26,168,26,80,23,128,23,84,23, -24,27,203,63,13,65,207,63,17,65,21,65,25,65,167,63, -235,63,239,63,163,63,29,65,184,40,33,65,37,65,41,65, -45,65,49,65,53,65,57,65,61,65,65,65,69,65,1,40, -5,40,8,40,13,40,17,40,20,40,24,40,28,40,32,40, -36,40,40,40,44,40,48,40,52,40,56,40,60,40,64,40, -68,40,72,40,76,40,81,40,85,40,89,40,93,40,97,40, -100,40,104,40,108,40,112,40,116,40,121,40,125,40,129,40, -133,40,137,40,141,40,145,40,149,40,153,40,157,40,161,40, -165,40,169,40,172,40,73,65,218,84,224,84,230,84,154,35, -158,35,96,84,162,35,100,84,104,84,166,35,170,35,174,35, -108,84,112,84,116,84,120,84,124,84,128,84,179,35,182,35, -186,35,190,35,195,35,198,35,202,35,206,35,210,35,214,35, -218,35,222,35,226,35,230,35,234,35,1,0,1,0,132,84, -136,84,140,84,144,84,148,84,152,84,1,0,1,0,156,84, -160,84,164,84,168,84,172,84,176,84,1,0,1,0,180,84, -184,84,188,84,192,84,196,84,200,84,1,0,1,0,204,84, -208,84,212,84,1,0,1,0,1,0,77,65,81,65,85,65, -94,22,89,65,93,65,97,65,1,0,101,65,104,65,109,65, -112,65,117,65,121,65,125,65,1,0,230,84,230,84,230,84, -230,84,230,84,230,84,230,84,230,84,230,84,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,57,251, -57,251,57,251,57,251,57,251,57,251,57,251,57,251,57,251, -57,251,57,251,57,251,57,251,57,251,57,251,57,251,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,1,0,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -2,254,184,255,1,0,1,0,1,0,1,0,18,254,249,251, -249,251,249,251,249,251,249,251,249,251,249,251,249,251,249,251, -249,251,249,251,249,251,249,251,249,251,249,251,249,251,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -184,255,184,255,204,255,204,255,204,255,184,255,204,255,184,255, -184,255,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,96,6,217,21,102,6,227,21,1,0, -1,0,1,0,1,0,1,0,108,6,1,0,1,0,1,0, -1,0,1,0,237,21,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,14,252,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,0,252,1,0,1,0, -1,0,1,0,1,0,1,0,247,21,1,22,1,0,114,6, -120,6,18,254,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -14,254,1,0,1,0,1,0,1,0,1,0,18,254,14,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,14,254,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,14,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,126,6,1,0,1,0,1,0,11,22,21,22,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,14,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,138,6, -0,252,31,22,41,22,0,252,51,22,1,0,1,0,18,254, -14,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,156,6,162,6,61,22, -71,22,1,0,1,0,1,0,18,254,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,18,254,14,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,249,250, -249,250,249,250,249,250,249,250,249,250,249,250,249,250,249,250, -249,250,249,250,249,250,249,250,249,250,249,250,249,250,2,254, -2,254,2,254,2,254,2,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,2,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,128,65,138,65,148,65,162,65,176,65,190,65,204,65, -176,255,176,255,2,254,2,254,2,254,1,0,1,0,1,0, -196,255,176,255,176,255,176,255,230,84,230,84,230,84,230,84, -230,84,230,84,230,84,230,84,184,255,184,255,184,255,184,255, -184,255,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -184,255,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,218,65,228,65,238,65, -252,65,10,66,24,66,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,204,26,104,26,108,26,216,26,112,26,1,0, -116,26,72,23,120,26,124,23,124,26,128,26,128,22,136,26, -99,28,76,23,186,22,140,26,144,26,168,26,80,23,128,23, -84,23,24,27,90,22,1,0,204,26,104,26,1,0,1,0, -112,26,1,0,1,0,72,23,120,26,1,0,1,0,128,26, -128,22,136,26,99,28,1,0,186,22,140,26,144,26,168,26, -80,23,128,23,84,23,24,27,90,22,100,26,204,26,104,26, -1,0,216,26,1,0,64,23,116,26,72,23,120,26,124,23, -124,26,128,26,1,0,136,26,99,28,76,23,186,22,140,26, -144,26,168,26,80,23,128,23,84,23,24,27,90,22,100,26, -1,0,104,26,108,26,216,26,112,26,1,0,1,0,72,23, -120,26,124,23,124,26,128,26,128,22,136,26,99,28,1,0, -186,22,140,26,144,26,168,26,80,23,128,23,84,23,1,0, -90,22,100,26,116,26,72,23,120,26,124,23,124,26,1,0, -128,22,1,0,1,0,1,0,186,22,140,26,144,26,168,26, -80,23,128,23,84,23,1,0,90,22,100,26,204,26,104,26, -108,26,216,26,112,26,64,23,116,26,72,23,120,26,124,23, -124,26,128,26,80,23,128,23,84,23,24,27,39,66,43,66, -1,0,1,0,46,66,173,26,177,26,181,26,50,66,55,66, -58,66,37,27,58,27,63,66,67,66,115,22,71,66,75,66, -78,66,151,28,192,26,37,27,83,66,87,66,90,66,185,26, -189,26,95,66,122,28,99,66,46,66,173,26,177,26,181,26, -50,66,55,66,58,66,37,27,58,27,63,66,67,66,115,22, -71,66,75,66,78,66,151,28,192,26,83,66,83,66,87,66, -90,66,185,26,189,26,95,66,122,28,103,66,50,66,37,27, -63,66,185,26,192,26,151,28,46,66,173,26,177,26,181,26, -50,66,55,66,58,66,37,27,58,27,63,66,67,66,115,22, -71,66,75,66,189,26,95,66,122,28,103,66,50,66,37,27, -63,66,185,26,192,26,151,28,107,66,107,66,1,0,1,0, -247,27,125,22,101,22,105,22,251,27,255,27,3,28,7,28, -11,28,15,28,247,27,125,22,101,22,105,22,251,27,255,27, -3,28,7,28,11,28,15,28,247,27,125,22,101,22,105,22, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,1,0, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -1,0,204,255,204,255,1,0,204,255,204,255,204,255,204,255, -204,255,1,0,1,0,1,0,1,0,1,0,184,255,184,255, -184,255,184,255,184,255,184,255,184,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,9,251,9,251, -9,251,9,251,9,251,9,251,9,251,9,251,9,251,9,251, -9,251,9,251,9,251,9,251,9,251,9,251,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,204,255, -204,255,204,255,14,254,1,0,1,0,1,0,1,0,1,0, -130,64,135,64,151,64,163,64,1,0,238,64,175,64,155,64, -195,64,242,64,219,64,223,64,227,64,231,64,179,64,203,64, -211,64,187,64,215,64,171,64,183,64,143,64,147,64,159,64, -167,64,191,64,199,64,207,64,111,66,43,56,115,66,119,66, -1,0,135,64,151,64,1,0,235,64,1,0,1,0,155,64, -1,0,242,64,219,64,223,64,227,64,231,64,179,64,203,64, -211,64,187,64,215,64,1,0,183,64,143,64,147,64,159,64, -1,0,191,64,1,0,207,64,1,0,1,0,1,0,1,0, -151,64,1,0,1,0,1,0,1,0,155,64,1,0,242,64, -1,0,223,64,1,0,231,64,179,64,203,64,1,0,187,64, -215,64,1,0,183,64,1,0,1,0,159,64,1,0,191,64, -1,0,207,64,1,0,43,56,1,0,119,66,1,0,135,64, -151,64,1,0,235,64,1,0,1,0,155,64,195,64,242,64, -219,64,1,0,227,64,231,64,179,64,203,64,211,64,187,64, -215,64,1,0,183,64,143,64,147,64,159,64,1,0,191,64, -199,64,207,64,111,66,1,0,115,66,1,0,130,64,135,64, -151,64,163,64,235,64,238,64,175,64,155,64,195,64,242,64, -1,0,223,64,227,64,231,64,179,64,203,64,211,64,187,64, -215,64,171,64,183,64,143,64,147,64,159,64,167,64,191,64, -199,64,207,64,1,0,1,0,1,0,1,0,135,64,151,64, -163,64,1,0,238,64,175,64,155,64,195,64,242,64,1,0, -223,64,227,64,231,64,179,64,203,64,123,66,129,66,135,66, -141,66,147,66,153,66,159,66,165,66,171,66,177,66,183,66, -1,0,1,0,1,0,1,0,1,0,145,31,153,31,161,31, -169,31,177,31,185,31,193,31,201,31,209,31,217,31,189,66, -204,26,76,23,50,45,196,66,1,0,99,28,76,23,186,22, -140,26,144,26,168,26,80,23,128,23,84,23,24,27,202,66, -230,44,208,66,156,22,214,66,222,66,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,228,66, -234,66,240,66,1,0,1,0,1,0,246,66,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,252,66,2,67,40,40, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,31,33,9,67,13,67, -40,84,59,32,17,67,21,67,183,36,25,67,29,67,33,67, -249,50,37,67,41,67,45,67,49,67,53,67,57,67,175,33, -61,67,65,67,69,67,73,67,77,67,81,67,35,32,151,36, -85,67,21,39,163,36,25,39,89,67,143,34,93,67,97,67, -101,67,105,67,109,67,209,38,71,33,113,67,117,67,121,67, -125,67,1,0,1,0,1,0,1,0,129,67,137,67,145,67, -153,67,161,67,169,67,177,67,185,67,193,67,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,201,67,205,67,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,209,67,213,67,217,67, -221,67,227,67,109,52,231,67,235,67,239,67,243,67,113,52, -247,67,251,67,255,67,117,52,5,68,9,68,13,68,17,68, -23,68,27,68,45,67,31,68,37,68,41,68,45,68,49,68, -83,53,53,68,99,32,59,68,63,68,67,68,71,68,117,67, -75,68,79,68,103,53,121,52,125,52,107,53,83,68,87,68, -161,49,91,68,129,52,95,68,99,68,103,68,107,68,107,68, -107,68,111,68,117,68,121,68,125,68,129,68,135,68,139,68, -143,68,147,68,151,68,155,68,159,68,163,68,167,68,171,68, -175,68,179,68,183,68,183,68,115,53,187,68,191,68,195,68, -199,68,137,52,203,68,207,68,211,68,233,51,215,68,219,68, -223,68,227,68,231,68,235,68,239,68,243,68,247,68,253,68, -1,69,5,69,17,67,9,69,13,69,17,69,23,69,29,69, -33,69,37,69,41,69,45,69,49,69,53,69,57,69,61,69, -61,69,65,69,71,69,75,69,145,49,79,69,83,69,89,69, -93,69,97,69,203,32,101,69,105,69,211,32,109,69,113,69, -117,69,123,69,127,69,133,69,137,69,141,69,145,69,149,69, -153,69,157,69,161,69,165,69,169,69,173,69,177,69,183,69, -187,69,191,69,195,69,193,48,199,69,251,32,205,69,205,69, -211,69,215,69,215,69,219,69,223,69,229,69,235,69,239,69, -243,69,247,69,251,69,255,69,3,70,7,70,11,70,15,70, -157,52,19,70,25,70,29,70,33,70,163,53,33,70,37,70, -165,52,41,70,45,70,49,70,53,70,169,52,85,48,57,70, -61,70,65,70,69,70,73,70,77,70,81,70,87,70,91,70, -95,70,99,70,103,70,107,70,113,70,117,70,121,70,125,70, -129,70,133,70,137,70,141,70,145,70,173,52,149,70,153,70, -159,70,163,70,167,70,171,70,181,52,175,70,179,70,183,70, -187,70,191,70,195,70,199,70,203,70,197,48,195,53,207,70, -211,70,215,70,219,70,225,70,229,70,233,70,237,70,185,52, -241,70,247,70,251,70,255,70,113,54,3,71,7,71,11,71, -15,71,19,71,25,71,29,71,33,71,37,71,43,71,47,71, -51,71,55,71,213,49,59,71,63,71,69,71,75,71,81,71, -85,71,91,71,95,71,99,71,103,71,107,71,189,52,33,51, -111,71,115,71,119,71,123,71,129,71,133,71,137,71,141,71, -207,53,145,71,149,71,155,71,159,71,163,71,169,71,175,71, -179,71,211,53,183,71,187,71,191,71,195,71,199,71,203,71, -207,71,213,71,217,71,223,71,227,71,233,71,219,53,237,71, -241,71,247,71,251,71,255,71,5,72,11,72,15,72,19,72, -23,72,27,72,27,72,31,72,35,72,227,53,39,72,43,72, -47,72,51,72,55,72,61,72,65,72,157,49,71,72,77,72, -81,72,87,72,93,72,99,72,103,72,251,53,107,72,113,72, -119,72,125,72,131,72,135,72,135,72,255,53,121,54,139,72, -143,72,147,72,151,72,157,72,9,49,7,54,161,72,165,72, -229,52,171,72,177,72,57,52,183,72,187,72,241,52,191,72, -195,72,199,72,205,72,205,72,211,72,215,72,219,72,225,72, -229,72,233,72,237,72,243,72,247,72,251,72,255,72,3,73, -7,73,13,73,17,73,21,73,25,73,29,73,33,73,37,73, -43,73,49,73,53,73,59,73,63,73,69,73,73,73,9,53, -77,73,83,73,89,73,93,73,99,73,103,73,109,73,113,73, -117,73,121,73,125,73,129,73,133,73,139,73,145,73,151,73, -211,69,157,73,161,73,165,73,169,73,173,73,177,73,181,73, -185,73,189,73,193,73,197,73,201,73,225,49,207,73,211,73, -215,73,219,73,223,73,227,73,21,53,231,73,235,73,239,73, -243,73,247,73,253,73,3,74,9,74,13,74,17,74,21,74, -25,74,31,74,35,74,41,74,45,74,49,74,55,74,61,74, -65,74,245,48,69,74,73,74,77,74,81,74,85,74,89,74, -35,54,93,74,97,74,101,74,105,74,109,74,113,74,117,74, -121,74,99,34,125,74,131,74,135,74,139,74,143,74,147,74, -151,74,157,74,163,74,167,74,171,74,55,54,59,54,127,34, -175,74,181,74,185,74,189,74,193,74,197,74,203,74,209,74, -213,74,217,74,221,74,227,74,63,54,231,74,237,74,243,74, -247,74,251,74,255,74,5,75,9,75,13,75,17,75,21,75, -25,75,29,75,33,75,39,75,43,75,47,75,51,75,57,75, -61,75,65,75,69,75,73,75,79,75,85,75,89,75,93,75, -97,75,103,75,107,75,87,54,87,54,113,75,117,75,123,75, -127,75,131,75,135,75,139,75,143,75,147,75,151,75,91,54, -157,75,161,75,165,75,169,75,173,75,177,75,183,75,187,75, -193,75,199,75,63,35,205,75,79,35,209,75,213,75,217,75, -221,75,99,35,225,75,1,0,1,0,1,0,255,255,255,255, -112,134,220,68,112,134,192,68,112,134,222,68,0,6,192,1, -2,6,194,1,4,6,197,1,6,6,198,1,8,6,2,2, -12,6,7,2,14,6,79,4,16,6,201,1,18,6,70,61, -20,6,203,1,24,6,156,3,30,6,2,4,34,6,6,4, -70,6,67,61,74,6,2,60,80,134,10,2,14,6,6,60, -70,6,10,60,98,134,14,60,2,6,14,2,4,6,18,2, -14,6,22,2,24,6,26,2,78,134,207,1,14,6,22,60, -24,6,30,2,70,6,26,60,78,6,34,60,90,6,38,60, -98,134,30,60,0,6,208,1,2,6,210,1,4,6,213,1, -6,6,122,61,8,6,39,2,12,6,42,2,14,6,46,2, -16,6,214,1,18,6,118,61,24,6,54,2,30,6,10,4, -34,6,14,4,70,6,115,61,78,6,83,4,80,6,50,2, -90,6,50,60,96,134,54,60,14,134,62,60,2,6,234,3, -4,6,58,2,8,6,66,60,12,6,62,2,14,6,66,2, -24,6,206,3,78,134,70,2,4,6,74,2,14,6,70,60, -16,6,78,60,24,6,62,4,70,6,74,60,78,6,82,60, -92,6,86,60,98,134,44,61,0,6,216,1,2,6,218,1, -4,6,220,1,6,6,82,2,8,6,86,2,12,6,90,2, -16,6,223,1,18,6,146,61,24,6,160,3,30,6,18,4, -34,6,22,4,70,6,150,61,80,6,94,2,96,134,90,60, -4,6,106,2,24,134,224,3,2,6,98,60,24,6,210,3, -70,6,102,60,78,6,110,2,98,134,106,60,2,6,116,2, -24,6,124,2,70,6,111,60,78,6,120,2,90,6,122,60, -98,134,118,60,2,6,126,60,14,6,130,60,70,134,134,60, -0,6,242,3,2,6,136,2,6,6,226,1,14,6,138,60, -24,6,144,2,70,6,142,60,78,6,140,2,90,6,150,60, -98,134,146,60,0,6,228,1,2,6,230,1,4,6,233,1, -6,6,235,1,8,6,155,2,12,6,158,2,14,6,95,4, -16,6,237,1,18,6,158,61,22,6,162,2,24,6,164,3, -30,6,26,4,34,6,30,4,54,6,67,3,70,6,155,61, -80,134,215,3,2,6,170,60,14,134,174,60,2,6,170,2, -14,6,178,60,24,6,178,2,30,6,34,4,34,6,38,4, -70,6,183,60,78,6,174,2,98,134,190,60,2,6,183,2, -4,6,186,2,14,6,194,60,24,6,195,2,70,6,199,60, -76,6,50,4,78,134,190,2,14,6,214,60,16,6,46,61, -24,6,202,2,70,6,218,60,76,6,54,4,78,6,198,2, -90,6,226,60,98,134,222,60,0,6,242,1,2,6,244,1, -4,6,246,1,6,6,211,2,8,6,215,2,12,6,218,2, -16,6,249,1,18,6,206,61,20,6,222,2,22,6,226,2, -24,6,168,3,30,6,42,4,34,6,46,4,54,6,97,3, -70,6,202,61,72,6,230,60,80,6,230,2,90,6,238,60, -96,134,234,60,6,6,250,60,70,134,254,60,0,6,2,61, -2,6,6,61,4,6,234,2,14,6,14,61,16,6,10,61, -20,6,48,61,70,134,18,61,14,6,22,61,16,134,26,61, -0,6,230,61,2,6,250,1,4,6,238,2,6,6,242,61, -8,6,102,4,14,6,30,61,16,6,254,1,18,6,238,61, -20,6,50,61,70,134,234,61,2,6,244,2,4,6,34,61, -14,6,248,2,24,6,252,2,70,6,38,61,98,134,42,61, -2,6,250,3,8,134,198,3,2,134,254,3,24,134,222,3, -0,6,224,62,2,6,88,7,8,6,98,63,12,6,96,63, -38,6,1,62,40,6,3,62,132,134,108,63,0,6,228,62, -2,6,90,7,38,6,33,62,40,134,35,62,0,6,232,62, -2,6,92,7,38,6,65,62,40,6,67,62,132,134,140,63, -0,6,236,62,2,6,94,7,8,6,162,63,12,6,160,63, -16,6,149,7,38,6,97,62,40,6,99,62,132,134,172,63, -0,6,240,62,2,6,152,7,38,6,129,62,40,134,131,62, -38,6,200,63,40,134,202,63,0,6,244,62,2,6,154,7, -8,6,194,63,12,6,192,63,16,6,151,7,38,6,161,62, -40,6,163,62,132,134,204,63,0,6,248,62,2,6,156,7, -38,6,193,62,40,6,195,62,132,134,236,63,12,6,162,9, -16,134,166,9,2,134,166,8,0,6,160,8,12,6,174,9, -16,134,162,8,12,6,132,9,16,134,186,9,16,134,190,9, -0,6,186,8,8,6,198,9,12,6,114,8,16,134,202,9, -2,134,184,8,16,134,206,9,8,6,222,9,12,6,188,8, -16,6,226,9,22,134,230,9,16,134,234,9,16,134,242,9, -16,134,218,9,16,134,174,8,30,134,238,8,16,134,182,9, -16,134,214,9,166,12,68,12,168,12,70,12,170,140,74,12, -168,140,72,12,168,140,76,12,168,140,132,13,168,140,166,13, -168,140,128,13,120,146,82,18,120,146,98,18,120,146,104,18, -124,19,150,19,174,147,152,19,124,22,150,22,172,22,144,22, -174,150,152,22,174,151,40,23,124,23,148,23,174,151,152,23, -124,151,150,23,172,152,144,24,170,153,128,25,132,25,149,25, -170,25,142,25,172,153,144,25,124,26,148,26,174,154,152,26, -124,154,150,26,148,27,180,27,158,27,185,27,190,155,188,27, -92,160,76,32,106,182,12,54,106,182,16,54,106,182,20,54, -106,182,24,54,106,182,28,54,106,182,36,54,106,182,118,54, -106,182,122,54,106,182,128,54,106,182,130,54,106,182,134,54, -112,134,52,67,112,134,54,67,112,134,92,67,112,134,154,67, -112,134,158,67,112,134,156,67,112,134,8,68,112,134,18,68, -112,134,24,68,112,134,72,68,112,134,76,68,112,134,130,68, -112,134,136,68,112,134,142,68,112,134,146,68,112,134,218,68, -112,134,196,68,112,134,224,68,112,134,226,68,112,134,232,68, -112,134,234,68,112,134,240,68,112,134,242,68,112,134,0,69, -112,134,2,69,112,134,192,69,112,134,194,69,112,134,8,69, -112,134,10,69,112,134,16,69,112,134,18,69,112,134,196,69, -112,134,198,69,112,134,88,69,112,134,90,69,112,134,92,69, -112,134,94,69,112,134,212,69,112,134,214,69,112,134,216,69, -112,134,218,69,50,225,40,97,50,225,152,96,50,225,156,96, -50,225,160,96,50,225,164,96,50,225,168,96,50,225,172,96, -50,225,176,96,50,225,180,96,50,225,184,96,50,225,188,96, -50,225,192,96,50,225,196,96,50,225,202,96,50,225,206,96, -50,225,210,96,50,97,224,96,52,225,226,96,50,97,230,96, -52,225,232,96,50,97,236,96,52,225,238,96,50,97,242,96, -52,225,244,96,50,97,248,96,52,225,250,96,50,225,60,97, -50,225,232,97,50,225,88,97,50,225,92,97,50,225,96,97, -50,225,100,97,50,225,104,97,50,225,108,97,50,225,112,97, -50,225,116,97,50,225,120,97,50,225,124,97,50,225,128,97, -50,225,132,97,50,225,138,97,50,225,142,97,50,225,146,97, -50,97,160,97,52,225,162,97,50,97,166,97,52,225,168,97, -50,97,172,97,52,225,174,97,50,97,178,97,52,225,180,97, -50,97,184,97,52,225,186,97,50,225,238,97,50,225,240,97, -50,225,242,97,50,225,244,97,50,225,252,97,137,180,130,46, -52,33,137,180,130,46,56,33,137,180,130,46,86,33,137,180, -194,73,92,34,137,180,194,73,94,34,137,52,130,207,150,38, -137,180,194,213,152,38,139,52,2,44,120,41,139,52,130,46, -118,41,139,180,66,47,124,41,139,180,194,107,116,43,139,180, -194,107,118,43,2,0,2,230,97,0,2,3,0,6,78,61, -2,6,74,61,6,6,86,61,18,134,82,61,2,230,97,0, -8,3,8,134,190,3,2,230,97,0,10,3,2,134,246,3, -2,202,99,0,39,3,2,134,18,60,2,230,101,0,2,3, -0,6,130,61,2,6,126,61,6,6,138,61,18,134,134,61, -2,230,105,0,8,3,2,134,94,60,2,230,111,0,2,3, -0,6,166,61,2,6,162,61,6,6,174,61,18,134,170,61, -2,230,111,0,3,3,2,6,154,60,8,6,90,4,16,134, -158,60,2,230,111,0,8,3,8,134,86,4,2,230,117,0, -8,3,0,6,184,3,2,6,176,3,8,6,172,3,24,134, -180,3,2,230,97,0,6,3,0,6,98,61,2,6,94,61, -6,6,106,61,18,134,102,61,2,230,101,0,4,3,0,6, -42,60,2,134,46,60,2,230,111,0,4,3,0,6,162,60, -2,134,166,60,2,230,115,0,1,3,14,134,202,60,2,230, -115,0,12,3,14,134,206,60,2,230,117,0,3,3,2,134, -242,60,2,230,117,0,4,3,16,134,246,60,2,216,111,0, -27,3,0,6,186,61,2,6,182,61,6,6,194,61,18,6, -190,61,70,134,198,61,2,216,117,0,27,3,0,6,214,61, -2,6,210,61,6,6,222,61,18,6,218,61,70,134,226,61, -2,202,111,0,40,3,8,134,218,3,2,230,97,0,7,3, -8,134,194,3,2,202,101,0,39,3,12,134,58,60,2,230, -111,0,7,3,8,134,98,4,2,230,185,3,8,3,0,6, -164,63,2,6,32,7,132,134,174,63,2,230,197,3,8,3, -0,6,196,63,2,6,96,7,132,134,206,63,2,0,198,12, -194,12,170,153,150,25,2,0,217,13,207,13,148,155,186,27, -2,220,108,0,35,3,8,134,114,60,2,220,114,0,35,3, -8,134,186,60,2,220,115,0,35,3,14,134,210,60,2,220, -97,0,35,3,4,6,90,61,12,134,110,61,2,220,101,0, -35,3,4,134,142,61,2,220,111,0,35,3,4,134,178,61, -2,230,177,3,19,3,0,6,4,62,2,6,8,62,132,134, -12,62,2,230,177,3,20,3,0,6,6,62,2,6,10,62, -132,134,14,62,2,230,181,3,19,3,0,6,36,62,2,134, -40,62,2,230,181,3,20,3,0,6,38,62,2,134,42,62, -2,230,183,3,19,3,0,6,68,62,2,6,72,62,132,134, -76,62,2,230,183,3,20,3,0,6,70,62,2,6,74,62, -132,134,78,62,2,230,185,3,19,3,0,6,100,62,2,6, -104,62,132,134,108,62,2,230,185,3,20,3,0,6,102,62, -2,6,106,62,132,134,110,62,2,230,191,3,19,3,0,6, -132,62,2,134,136,62,2,230,191,3,20,3,0,6,134,62, -2,134,138,62,2,230,197,3,19,3,0,6,164,62,2,6, -168,62,132,134,172,62,2,230,197,3,20,3,0,6,166,62, -2,6,170,62,132,134,174,62,2,230,201,3,19,3,0,6, -196,62,2,6,200,62,132,134,204,62,2,230,201,3,20,3, -0,6,198,62,2,6,202,62,132,134,206,62,3,0,2,230, -97,0,0,3,2,230,97,0,1,3,2,230,97,0,3,3, -2,230,101,0,0,3,2,230,101,0,1,3,2,230,101,0, -8,3,2,230,105,0,0,3,2,230,105,0,1,3,2,230, -105,0,2,3,2,230,110,0,3,3,2,230,111,0,0,3, -2,230,111,0,1,3,2,230,117,0,0,3,2,230,117,0, -1,3,2,230,117,0,2,3,2,230,121,0,1,3,2,230, -121,0,8,3,2,230,97,0,4,3,2,202,97,0,40,3, -2,230,99,0,1,3,2,230,99,0,2,3,2,230,99,0, -7,3,2,230,99,0,12,3,2,230,100,0,12,3,2,230, -101,0,6,3,2,230,101,0,7,3,2,202,101,0,40,3, -2,230,101,0,12,3,2,230,103,0,2,3,2,230,103,0, -6,3,2,230,103,0,7,3,2,202,103,0,39,3,2,230, -104,0,2,3,2,230,105,0,3,3,2,230,105,0,4,3, -2,230,105,0,6,3,2,202,105,0,40,3,2,230,106,0, -2,3,2,202,107,0,39,3,2,230,108,0,1,3,2,202, -108,0,39,3,2,230,108,0,12,3,2,230,110,0,1,3, -2,202,110,0,39,3,2,230,110,0,12,3,2,230,111,0, -6,3,2,230,111,0,11,3,2,230,114,0,1,3,2,202, -114,0,39,3,2,230,114,0,12,3,2,230,115,0,2,3, -2,202,115,0,39,3,2,202,116,0,39,3,2,230,116,0, -12,3,2,230,117,0,6,3,2,230,117,0,10,3,2,230, -117,0,11,3,2,202,117,0,40,3,2,230,119,0,2,3, -2,230,121,0,2,3,2,230,122,0,1,3,2,230,122,0, -7,3,2,230,122,0,12,3,2,230,97,0,12,3,2,230, -105,0,12,3,2,230,111,0,12,3,2,230,117,0,12,3, -252,0,67,230,117,0,8,3,4,3,252,0,67,230,117,0, -8,3,1,3,252,0,67,230,117,0,8,3,12,3,252,0, -67,230,117,0,8,3,0,3,228,0,67,230,97,0,8,3, -4,3,39,2,67,230,97,0,7,3,4,3,2,230,230,0, -4,3,2,230,103,0,12,3,2,230,107,0,12,3,235,1, -67,230,111,0,40,3,4,3,2,230,146,2,12,3,2,230, -106,0,12,3,2,230,103,0,1,3,2,230,110,0,0,3, -229,0,67,230,97,0,10,3,1,3,2,230,230,0,1,3, -2,230,248,0,1,3,2,230,97,0,15,3,2,230,97,0, -17,3,2,230,101,0,15,3,2,230,101,0,17,3,2,230, -105,0,15,3,2,230,105,0,17,3,2,230,111,0,15,3, -2,230,111,0,17,3,2,230,114,0,15,3,2,230,114,0, -17,3,2,230,117,0,15,3,2,230,117,0,17,3,2,220, -115,0,38,3,2,220,116,0,38,3,2,230,104,0,12,3, -246,0,67,230,111,0,8,3,4,3,245,0,67,230,111,0, -3,3,4,3,47,2,67,230,111,0,7,3,4,3,2,230, -121,0,4,3,202,3,67,230,185,3,8,3,1,3,2,230, -177,3,1,3,2,230,181,3,1,3,2,230,183,3,1,3, -2,230,185,3,1,3,203,3,67,230,197,3,8,3,1,3, -2,230,191,3,1,3,2,230,197,3,1,3,2,230,201,3, -1,3,2,230,56,4,6,3,2,230,53,4,0,3,2,230, -53,4,8,3,2,230,51,4,1,3,2,230,86,4,8,3, -2,230,58,4,1,3,2,230,56,4,0,3,2,230,67,4, -6,3,2,230,117,4,15,3,2,230,54,4,6,3,2,230, -48,4,6,3,2,230,48,4,8,3,2,230,53,4,6,3, -2,230,217,4,8,3,2,230,54,4,8,3,2,230,55,4, -8,3,2,230,56,4,4,3,2,230,56,4,8,3,2,230, -62,4,8,3,2,230,233,4,8,3,2,230,77,4,8,3, -2,230,67,4,4,3,2,230,67,4,8,3,2,230,67,4, -11,3,2,230,71,4,8,3,2,230,75,4,8,3,2,230, -39,6,83,6,2,230,39,6,84,6,2,230,72,6,84,6, -2,220,39,6,85,6,2,230,74,6,84,6,2,230,213,6, -84,6,2,230,193,6,84,6,2,230,210,6,84,6,2,7, -40,9,60,9,2,7,48,9,60,9,2,7,51,9,60,9, -2,0,199,9,190,9,2,0,199,9,215,9,2,0,71,11, -86,11,2,0,71,11,62,11,2,0,71,11,87,11,2,0, -146,11,215,11,2,0,198,11,190,11,2,0,199,11,190,11, -2,0,198,11,215,11,2,91,70,12,86,12,2,0,191,12, -213,12,2,0,198,12,213,12,2,0,198,12,214,12,202,12, -67,0,198,12,194,12,213,12,2,0,70,13,62,13,2,0, -71,13,62,13,2,0,70,13,87,13,2,9,217,13,202,13, -220,13,67,9,217,13,207,13,202,13,2,0,217,13,223,13, -2,0,37,16,46,16,2,0,5,27,53,27,2,0,7,27, -53,27,2,0,9,27,53,27,2,0,11,27,53,27,2,0, -13,27,53,27,2,0,17,27,53,27,2,0,58,27,53,27, -2,0,60,27,53,27,2,0,62,27,53,27,2,0,63,27, -53,27,2,0,66,27,53,27,2,220,97,0,37,3,2,230, -98,0,7,3,2,220,98,0,35,3,2,220,98,0,49,3, -231,0,67,230,99,0,39,3,1,3,2,230,100,0,7,3, -2,220,100,0,35,3,2,220,100,0,49,3,2,202,100,0, -39,3,2,220,100,0,45,3,19,1,67,230,101,0,4,3, -0,3,19,1,67,230,101,0,4,3,1,3,2,220,101,0, -45,3,2,220,101,0,48,3,41,2,67,230,101,0,39,3, -6,3,2,230,102,0,7,3,2,230,103,0,4,3,2,230, -104,0,7,3,2,220,104,0,35,3,2,230,104,0,8,3, -2,202,104,0,39,3,2,220,104,0,46,3,2,220,105,0, -48,3,239,0,67,230,105,0,8,3,1,3,2,230,107,0, -1,3,2,220,107,0,35,3,2,220,107,0,49,3,55,30, -67,230,108,0,35,3,4,3,2,220,108,0,49,3,2,220, -108,0,45,3,2,230,109,0,1,3,2,230,109,0,7,3, -2,220,109,0,35,3,2,230,110,0,7,3,2,220,110,0, -35,3,2,220,110,0,49,3,2,220,110,0,45,3,245,0, -67,230,111,0,3,3,1,3,245,0,67,230,111,0,3,3, -8,3,77,1,67,230,111,0,4,3,0,3,77,1,67,230, -111,0,4,3,1,3,2,230,112,0,1,3,2,230,112,0, -7,3,2,230,114,0,7,3,91,30,67,230,114,0,35,3, -4,3,2,220,114,0,49,3,2,230,115,0,7,3,91,1, -67,230,115,0,1,3,7,3,97,1,67,230,115,0,12,3, -7,3,99,30,67,230,115,0,35,3,7,3,2,230,116,0, -7,3,2,220,116,0,35,3,2,220,116,0,49,3,2,220, -116,0,45,3,2,220,117,0,36,3,2,220,117,0,48,3, -2,220,117,0,45,3,105,1,67,230,117,0,3,3,1,3, -107,1,67,230,117,0,4,3,8,3,2,230,118,0,3,3, -2,220,118,0,35,3,2,230,119,0,0,3,2,230,119,0, -1,3,2,230,119,0,8,3,2,230,119,0,7,3,2,220, -119,0,35,3,2,230,120,0,7,3,2,230,120,0,8,3, -2,230,121,0,7,3,2,230,122,0,2,3,2,220,122,0, -35,3,2,220,122,0,49,3,2,220,104,0,49,3,2,230, -116,0,8,3,2,230,119,0,10,3,2,230,121,0,10,3, -2,230,97,0,9,3,226,0,67,230,97,0,2,3,1,3, -226,0,67,230,97,0,2,3,0,3,226,0,67,230,97,0, -2,3,9,3,226,0,67,230,97,0,2,3,3,3,161,30, -67,230,97,0,35,3,2,3,3,1,67,230,97,0,6,3, -1,3,3,1,67,230,97,0,6,3,0,3,3,1,67,230, -97,0,6,3,9,3,3,1,67,230,97,0,6,3,3,3, -161,30,67,230,97,0,35,3,6,3,2,230,101,0,9,3, -2,230,101,0,3,3,234,0,67,230,101,0,2,3,1,3, -234,0,67,230,101,0,2,3,0,3,234,0,67,230,101,0, -2,3,9,3,234,0,67,230,101,0,2,3,3,3,185,30, -67,230,101,0,35,3,2,3,2,230,105,0,9,3,2,220, -105,0,35,3,2,230,111,0,9,3,244,0,67,230,111,0, -2,3,1,3,244,0,67,230,111,0,2,3,0,3,244,0, -67,230,111,0,2,3,9,3,244,0,67,230,111,0,2,3, -3,3,205,30,67,230,111,0,35,3,2,3,161,1,67,230, -111,0,27,3,1,3,161,1,67,230,111,0,27,3,0,3, -161,1,67,230,111,0,27,3,9,3,161,1,67,230,111,0, -27,3,3,3,161,1,67,220,111,0,27,3,35,3,2,220, -117,0,35,3,2,230,117,0,9,3,176,1,67,230,117,0, -27,3,1,3,176,1,67,230,117,0,27,3,0,3,176,1, -67,230,117,0,27,3,9,3,176,1,67,230,117,0,27,3, -3,3,176,1,67,220,117,0,27,3,35,3,2,230,121,0, -0,3,2,220,121,0,35,3,2,230,121,0,9,3,2,230, -121,0,3,3,0,31,67,230,177,3,19,3,0,3,1,31, -67,230,177,3,20,3,0,3,0,31,67,230,177,3,19,3, -1,3,1,31,67,230,177,3,20,3,1,3,0,31,67,230, -177,3,19,3,66,3,1,31,67,230,177,3,20,3,66,3, -16,31,67,230,181,3,19,3,0,3,17,31,67,230,181,3, -20,3,0,3,16,31,67,230,181,3,19,3,1,3,17,31, -67,230,181,3,20,3,1,3,32,31,67,230,183,3,19,3, -0,3,33,31,67,230,183,3,20,3,0,3,32,31,67,230, -183,3,19,3,1,3,33,31,67,230,183,3,20,3,1,3, -32,31,67,230,183,3,19,3,66,3,33,31,67,230,183,3, -20,3,66,3,48,31,67,230,185,3,19,3,0,3,49,31, -67,230,185,3,20,3,0,3,48,31,67,230,185,3,19,3, -1,3,49,31,67,230,185,3,20,3,1,3,48,31,67,230, -185,3,19,3,66,3,49,31,67,230,185,3,20,3,66,3, -64,31,67,230,191,3,19,3,0,3,65,31,67,230,191,3, -20,3,0,3,64,31,67,230,191,3,19,3,1,3,65,31, -67,230,191,3,20,3,1,3,80,31,67,230,197,3,19,3, -0,3,81,31,67,230,197,3,20,3,0,3,80,31,67,230, -197,3,19,3,1,3,81,31,67,230,197,3,20,3,1,3, -80,31,67,230,197,3,19,3,66,3,81,31,67,230,197,3, -20,3,66,3,96,31,67,230,201,3,19,3,0,3,97,31, -67,230,201,3,20,3,0,3,96,31,67,230,201,3,19,3, -1,3,97,31,67,230,201,3,20,3,1,3,96,31,67,230, -201,3,19,3,66,3,97,31,67,230,201,3,20,3,66,3, -2,230,177,3,0,3,2,230,181,3,0,3,2,230,183,3, -0,3,2,230,185,3,0,3,2,230,191,3,0,3,2,230, -197,3,0,3,2,230,201,3,0,3,2,230,177,3,6,3, -2,230,177,3,4,3,2,230,177,3,66,3,2,230,183,3, -66,3,2,230,185,3,6,3,2,230,185,3,4,3,202,3, -67,230,185,3,8,3,0,3,2,230,185,3,66,3,202,3, -67,230,185,3,8,3,66,3,2,230,197,3,6,3,2,230, -197,3,4,3,203,3,67,230,197,3,8,3,0,3,2,230, -193,3,19,3,2,230,193,3,20,3,2,230,197,3,66,3, -203,3,67,230,197,3,8,3,66,3,2,230,201,3,66,3, -2,1,144,33,56,3,2,1,146,33,56,3,2,1,148,33, -56,3,2,1,208,33,56,3,2,1,212,33,56,3,2,1, -210,33,56,3,2,1,3,34,56,3,2,1,8,34,56,3, -2,1,11,34,56,3,2,1,35,34,56,3,2,1,37,34, -56,3,2,1,60,34,56,3,2,1,67,34,56,3,2,1, -69,34,56,3,2,1,72,34,56,3,2,1,61,0,56,3, -2,1,97,34,56,3,2,1,77,34,56,3,2,1,60,0, -56,3,2,1,62,0,56,3,2,1,100,34,56,3,2,1, -101,34,56,3,2,1,114,34,56,3,2,1,115,34,56,3, -2,1,118,34,56,3,2,1,119,34,56,3,2,1,122,34, -56,3,2,1,123,34,56,3,2,1,130,34,56,3,2,1, -131,34,56,3,2,1,134,34,56,3,2,1,135,34,56,3, -2,1,162,34,56,3,2,1,168,34,56,3,2,1,169,34, -56,3,2,1,171,34,56,3,2,1,124,34,56,3,2,1, -125,34,56,3,2,1,145,34,56,3,2,1,146,34,56,3, -2,1,178,34,56,3,2,1,179,34,56,3,2,1,180,34, -56,3,2,1,181,34,56,3,2,8,75,48,153,48,2,8, -77,48,153,48,2,8,79,48,153,48,2,8,81,48,153,48, -2,8,83,48,153,48,2,8,85,48,153,48,2,8,87,48, -153,48,2,8,89,48,153,48,2,8,91,48,153,48,2,8, -93,48,153,48,2,8,95,48,153,48,2,8,97,48,153,48, -2,8,100,48,153,48,2,8,102,48,153,48,2,8,104,48, -153,48,2,8,111,48,153,48,2,8,111,48,154,48,2,8, -114,48,153,48,2,8,114,48,154,48,2,8,117,48,153,48, -2,8,117,48,154,48,2,8,120,48,153,48,2,8,120,48, -154,48,2,8,123,48,153,48,2,8,123,48,154,48,2,8, -70,48,153,48,2,8,157,48,153,48,2,8,171,48,153,48, -2,8,173,48,153,48,2,8,175,48,153,48,2,8,177,48, -153,48,2,8,179,48,153,48,2,8,181,48,153,48,2,8, -183,48,153,48,2,8,185,48,153,48,2,8,187,48,153,48, -2,8,189,48,153,48,2,8,191,48,153,48,2,8,193,48, -153,48,2,8,196,48,153,48,2,8,198,48,153,48,2,8, -200,48,153,48,2,8,207,48,153,48,2,8,207,48,154,48, -2,8,210,48,153,48,2,8,210,48,154,48,2,8,213,48, -153,48,2,8,213,48,154,48,2,8,216,48,153,48,2,8, -216,48,154,48,2,8,219,48,153,48,2,8,219,48,154,48, -2,8,166,48,153,48,2,8,239,48,153,48,2,8,240,48, -153,48,2,8,241,48,153,48,2,8,242,48,153,48,2,8, -253,48,153,48,4,7,4,216,153,220,4,216,186,220,4,7, -4,216,155,220,4,216,186,220,4,7,4,216,165,220,4,216, -186,220,4,0,4,216,49,221,4,216,39,221,4,0,4,216, -50,221,4,216,39,221,4,0,4,216,71,223,4,216,62,223, -4,0,4,216,71,223,4,216,87,223,4,0,5,216,185,220, -5,216,186,220,4,0,5,216,185,220,5,216,176,220,4,0, -5,216,185,220,5,216,189,220,4,0,5,216,184,221,5,216, -175,221,4,0,5,216,185,221,5,216,175,221,1,0,32,0, -2,230,32,0,8,3,1,0,97,0,2,230,32,0,4,3, -1,0,50,0,1,0,51,0,2,230,32,0,1,3,1,0, -188,3,2,202,32,0,39,3,1,0,49,0,1,0,111,0, -3,0,49,0,68,32,52,0,3,0,49,0,68,32,50,0, -3,0,51,0,68,32,52,0,2,0,115,0,115,0,2,230, -105,0,7,3,2,0,105,0,106,0,2,0,108,0,183,0, -2,0,188,2,110,0,1,0,115,0,1,0,83,2,1,0, -84,2,1,0,86,2,1,0,87,2,1,0,221,1,1,0, -89,2,1,0,91,2,1,0,96,2,1,0,99,2,1,0, -105,2,1,0,104,2,1,0,111,2,1,0,114,2,1,0, -117,2,1,0,128,2,1,0,131,2,1,0,136,2,1,0, -138,2,1,0,139,2,1,0,146,2,2,0,108,0,106,0, -2,0,110,0,106,0,2,0,100,0,122,0,1,0,149,1, -1,0,158,1,1,0,101,44,1,0,154,1,1,0,102,44, -1,0,128,1,1,0,137,2,1,0,140,2,1,0,104,0, -1,0,102,2,1,0,106,0,1,0,114,0,1,0,119,0, -1,0,121,0,2,230,32,0,6,3,2,230,32,0,7,3, -2,230,32,0,10,3,2,202,32,0,40,3,2,230,32,0, -3,3,2,230,32,0,11,3,1,0,108,0,1,0,120,0, -1,0,149,2,240,0,129,0,185,3,1,0,185,2,2,0, -32,0,185,3,1,0,59,0,1,0,243,3,3,230,32,0, -8,3,1,3,1,0,183,0,1,0,123,3,1,0,124,3, -1,0,125,3,1,0,82,4,1,0,84,4,1,0,85,4, -1,0,86,4,1,0,88,4,1,0,89,4,1,0,90,4, -1,0,91,4,1,0,95,4,2,0,101,5,130,5,2,0, -39,6,116,6,2,0,72,6,116,6,2,0,199,6,116,6, -2,0,74,6,116,6,2,7,21,9,60,9,2,7,22,9, -60,9,2,7,23,9,60,9,2,7,28,9,60,9,2,7, -33,9,60,9,2,7,34,9,60,9,2,7,43,9,60,9, -2,7,47,9,60,9,2,7,161,9,188,9,2,7,162,9, -188,9,2,7,175,9,188,9,2,7,50,10,60,10,2,7, -56,10,60,10,2,7,22,10,60,10,2,7,23,10,60,10, -2,7,28,10,60,10,2,7,43,10,60,10,2,7,33,11, -60,11,2,7,34,11,60,11,2,0,77,14,50,14,2,0, -205,14,178,14,2,0,171,14,153,14,2,0,171,14,161,14, -2,0,66,15,183,15,2,0,76,15,183,15,2,0,81,15, -183,15,2,0,86,15,183,15,2,0,91,15,183,15,2,0, -64,15,181,15,2,130,178,15,128,15,3,130,178,15,113,15, -128,15,2,130,179,15,128,15,3,130,179,15,113,15,128,15, -2,0,146,15,183,15,2,0,156,15,183,15,2,0,161,15, -183,15,2,0,166,15,183,15,2,0,171,15,183,15,2,0, -144,15,181,15,1,0,0,45,1,0,1,45,1,0,2,45, -1,0,3,45,1,0,4,45,1,0,5,45,1,0,6,45, -1,0,7,45,1,0,8,45,1,0,9,45,1,0,10,45, -1,0,11,45,1,0,12,45,1,0,13,45,1,0,14,45, -1,0,15,45,1,0,16,45,1,0,17,45,1,0,18,45, -1,0,19,45,1,0,20,45,1,0,21,45,1,0,22,45, -1,0,23,45,1,0,24,45,1,0,25,45,1,0,26,45, -1,0,27,45,1,0,28,45,1,0,29,45,1,0,30,45, -1,0,31,45,1,0,32,45,1,0,33,45,1,0,34,45, -1,0,35,45,1,0,36,45,1,0,37,45,1,0,39,45, -1,0,45,45,1,0,50,4,1,0,52,4,1,0,62,4, -1,0,65,4,1,0,66,4,1,0,74,4,1,0,99,4, -1,0,75,166,1,0,208,16,1,0,209,16,1,0,210,16, -1,0,211,16,1,0,212,16,1,0,213,16,1,0,214,16, -1,0,215,16,1,0,216,16,1,0,217,16,1,0,218,16, -1,0,219,16,1,0,220,16,1,0,221,16,1,0,222,16, -1,0,223,16,1,0,224,16,1,0,225,16,1,0,226,16, -1,0,227,16,1,0,228,16,1,0,229,16,1,0,230,16, -1,0,231,16,1,0,232,16,1,0,233,16,1,0,234,16, -1,0,235,16,1,0,236,16,1,0,237,16,1,0,238,16, -1,0,239,16,1,0,240,16,1,0,241,16,1,0,242,16, -1,0,243,16,1,0,244,16,1,0,245,16,1,0,246,16, -1,0,247,16,1,0,248,16,1,0,249,16,1,0,250,16, -1,0,253,16,1,0,254,16,1,0,255,16,1,0,230,0, -1,0,98,0,1,0,100,0,1,0,101,0,1,0,103,0, -1,0,105,0,1,0,107,0,1,0,109,0,1,0,110,0, -1,0,35,2,1,0,112,0,1,0,116,0,1,0,117,0, -1,0,80,2,1,0,81,2,1,0,2,29,1,0,92,2, -1,0,75,1,1,0,118,0,1,0,178,3,1,0,179,3, -1,0,180,3,1,0,198,3,1,0,199,3,1,0,193,3, -1,0,61,4,1,0,82,2,1,0,99,0,1,0,85,2, -1,0,240,0,1,0,102,0,1,0,95,2,1,0,97,2, -1,0,101,2,1,0,106,2,1,0,157,2,1,0,109,2, -1,0,159,2,1,0,113,2,1,0,112,2,1,0,115,2, -1,0,116,2,1,0,120,2,1,0,130,2,1,0,171,1, -1,0,28,29,1,0,122,0,1,0,144,2,1,0,145,2, -1,0,184,3,2,0,97,0,190,2,2,0,177,3,185,3, -2,230,32,0,19,3,1,0,185,3,2,230,32,0,66,3, -3,230,32,0,8,3,66,3,2,0,183,3,185,3,3,230, -32,0,19,3,0,3,3,230,32,0,19,3,1,3,3,230, -32,0,19,3,66,3,3,230,32,0,20,3,0,3,3,230, -32,0,20,3,1,3,3,230,32,0,20,3,66,3,3,230, -32,0,8,3,0,3,1,0,96,0,2,0,201,3,185,3, -2,230,32,0,20,3,2,220,32,0,51,3,1,0,46,0, -2,0,46,0,46,0,3,0,46,0,46,0,46,0,2,0, -50,32,50,32,3,0,50,32,50,32,50,32,2,0,53,32, -53,32,3,0,53,32,53,32,53,32,2,0,33,0,33,0, -2,230,32,0,5,3,2,0,63,0,63,0,2,0,63,0, -33,0,2,0,33,0,63,0,4,0,50,32,50,32,50,32, -50,32,1,0,48,0,1,0,52,0,1,0,53,0,1,0, -54,0,1,0,55,0,1,0,56,0,1,0,57,0,1,0, -43,0,1,0,18,34,1,0,61,0,1,0,40,0,1,0, -41,0,2,0,114,0,115,0,3,0,97,0,47,0,99,0, -3,0,97,0,47,0,115,0,2,0,176,0,99,0,3,0, -99,0,47,0,111,0,3,0,99,0,47,0,117,0,2,0, -176,0,102,0,1,0,39,1,2,0,110,0,111,0,1,0, -113,0,2,0,115,0,109,0,3,0,116,0,101,0,108,0, -2,0,116,0,109,0,1,0,201,3,1,0,208,5,1,0, -209,5,1,0,210,5,1,0,211,5,3,0,102,0,97,0, -120,0,1,0,192,3,1,0,17,34,3,0,49,0,68,32, -55,0,3,0,49,0,68,32,57,0,4,0,49,0,68,32, -49,0,48,0,3,0,49,0,68,32,51,0,3,0,50,0, -68,32,51,0,3,0,49,0,68,32,53,0,3,0,50,0, -68,32,53,0,3,0,51,0,68,32,53,0,3,0,52,0, -68,32,53,0,3,0,49,0,68,32,54,0,3,0,53,0, -68,32,54,0,3,0,49,0,68,32,56,0,3,0,51,0, -68,32,56,0,3,0,53,0,68,32,56,0,3,0,55,0, -68,32,56,0,2,0,49,0,68,32,2,0,105,0,105,0, -3,0,105,0,105,0,105,0,2,0,105,0,118,0,2,0, -118,0,105,0,3,0,118,0,105,0,105,0,4,0,118,0, -105,0,105,0,105,0,2,0,105,0,120,0,2,0,120,0, -105,0,3,0,120,0,105,0,105,0,3,0,48,0,68,32, -51,0,2,0,43,34,43,34,3,0,43,34,43,34,43,34, -2,0,46,34,46,34,3,0,46,34,46,34,46,34,1,0, -8,48,1,0,9,48,2,0,49,0,48,0,2,0,49,0, -49,0,2,0,49,0,50,0,2,0,49,0,51,0,2,0, -49,0,52,0,2,0,49,0,53,0,2,0,49,0,54,0, -2,0,49,0,55,0,2,0,49,0,56,0,2,0,49,0, -57,0,2,0,50,0,48,0,3,0,40,0,49,0,41,0, -3,0,40,0,50,0,41,0,3,0,40,0,51,0,41,0, -3,0,40,0,52,0,41,0,3,0,40,0,53,0,41,0, -3,0,40,0,54,0,41,0,3,0,40,0,55,0,41,0, -3,0,40,0,56,0,41,0,3,0,40,0,57,0,41,0, -4,0,40,0,49,0,48,0,41,0,4,0,40,0,49,0, -49,0,41,0,4,0,40,0,49,0,50,0,41,0,4,0, -40,0,49,0,51,0,41,0,4,0,40,0,49,0,52,0, -41,0,4,0,40,0,49,0,53,0,41,0,4,0,40,0, -49,0,54,0,41,0,4,0,40,0,49,0,55,0,41,0, -4,0,40,0,49,0,56,0,41,0,4,0,40,0,49,0, -57,0,41,0,4,0,40,0,50,0,48,0,41,0,2,0, -49,0,46,0,2,0,50,0,46,0,2,0,51,0,46,0, -2,0,52,0,46,0,2,0,53,0,46,0,2,0,54,0, -46,0,2,0,55,0,46,0,2,0,56,0,46,0,2,0, -57,0,46,0,3,0,49,0,48,0,46,0,3,0,49,0, -49,0,46,0,3,0,49,0,50,0,46,0,3,0,49,0, -51,0,46,0,3,0,49,0,52,0,46,0,3,0,49,0, -53,0,46,0,3,0,49,0,54,0,46,0,3,0,49,0, -55,0,46,0,3,0,49,0,56,0,46,0,3,0,49,0, -57,0,46,0,3,0,50,0,48,0,46,0,3,0,40,0, -97,0,41,0,3,0,40,0,98,0,41,0,3,0,40,0, -99,0,41,0,3,0,40,0,100,0,41,0,3,0,40,0, -101,0,41,0,3,0,40,0,102,0,41,0,3,0,40,0, -103,0,41,0,3,0,40,0,104,0,41,0,3,0,40,0, -105,0,41,0,3,0,40,0,106,0,41,0,3,0,40,0, -107,0,41,0,3,0,40,0,108,0,41,0,3,0,40,0, -109,0,41,0,3,0,40,0,110,0,41,0,3,0,40,0, -111,0,41,0,3,0,40,0,112,0,41,0,3,0,40,0, -113,0,41,0,3,0,40,0,114,0,41,0,3,0,40,0, -115,0,41,0,3,0,40,0,116,0,41,0,3,0,40,0, -117,0,41,0,3,0,40,0,118,0,41,0,3,0,40,0, -119,0,41,0,3,0,40,0,120,0,41,0,3,0,40,0, -121,0,41,0,3,0,40,0,122,0,41,0,4,0,43,34, -43,34,43,34,43,34,3,0,58,0,58,0,61,0,2,0, -61,0,61,0,3,0,61,0,61,0,61,0,2,1,221,42, -56,3,1,0,107,2,1,0,125,29,1,0,125,2,1,0, -63,2,1,0,64,2,1,0,205,107,1,0,159,159,1,0, -0,78,1,0,40,78,1,0,54,78,1,0,63,78,1,0, -89,78,1,0,133,78,1,0,140,78,1,0,160,78,1,0, -186,78,1,0,63,81,1,0,101,81,1,0,107,81,1,0, -130,81,1,0,150,81,1,0,171,81,1,0,224,81,1,0, -245,81,1,0,0,82,1,0,155,82,1,0,249,82,1,0, -21,83,1,0,26,83,1,0,56,83,1,0,65,83,1,0, -92,83,1,0,105,83,1,0,130,83,1,0,182,83,1,0, -200,83,1,0,227,83,1,0,215,86,1,0,31,87,1,0, -235,88,1,0,2,89,1,0,10,89,1,0,21,89,1,0, -39,89,1,0,115,89,1,0,80,91,1,0,128,91,1,0, -248,91,1,0,15,92,1,0,34,92,1,0,56,92,1,0, -110,92,1,0,113,92,1,0,219,93,1,0,229,93,1,0, -241,93,1,0,254,93,1,0,114,94,1,0,122,94,1,0, -127,94,1,0,244,94,1,0,254,94,1,0,11,95,1,0, -19,95,1,0,80,95,1,0,97,95,1,0,115,95,1,0, -195,95,1,0,8,98,1,0,54,98,1,0,75,98,1,0, -47,101,1,0,52,101,1,0,135,101,1,0,151,101,1,0, -164,101,1,0,185,101,1,0,224,101,1,0,229,101,1,0, -240,102,1,0,8,103,1,0,40,103,1,0,32,107,1,0, -98,107,1,0,121,107,1,0,179,107,1,0,203,107,1,0, -212,107,1,0,219,107,1,0,15,108,1,0,20,108,1,0, -52,108,1,0,107,112,1,0,42,114,1,0,54,114,1,0, -59,114,1,0,63,114,1,0,71,114,1,0,89,114,1,0, -91,114,1,0,172,114,1,0,132,115,1,0,137,115,1,0, -220,116,1,0,230,116,1,0,24,117,1,0,31,117,1,0, -40,117,1,0,48,117,1,0,139,117,1,0,146,117,1,0, -118,118,1,0,125,118,1,0,174,118,1,0,191,118,1,0, -238,118,1,0,219,119,1,0,226,119,1,0,243,119,1,0, -58,121,1,0,184,121,1,0,190,121,1,0,116,122,1,0, -203,122,1,0,249,122,1,0,115,124,1,0,248,124,1,0, -54,127,1,0,81,127,1,0,138,127,1,0,189,127,1,0, -1,128,1,0,12,128,1,0,18,128,1,0,51,128,1,0, -127,128,1,0,137,128,1,0,227,129,1,0,234,129,1,0, -243,129,1,0,252,129,1,0,12,130,1,0,27,130,1,0, -31,130,1,0,110,130,1,0,114,130,1,0,120,130,1,0, -77,134,1,0,107,134,1,0,64,136,1,0,76,136,1,0, -99,136,1,0,126,137,1,0,139,137,1,0,210,137,1,0, -0,138,1,0,55,140,1,0,70,140,1,0,85,140,1,0, -120,140,1,0,157,140,1,0,100,141,1,0,112,141,1,0, -179,141,1,0,171,142,1,0,202,142,1,0,155,143,1,0, -176,143,1,0,181,143,1,0,145,144,1,0,73,145,1,0, -198,145,1,0,204,145,1,0,209,145,1,0,119,149,1,0, -128,149,1,0,28,150,1,0,182,150,1,0,185,150,1,0, -232,150,1,0,81,151,1,0,94,151,1,0,98,151,1,0, -105,151,1,0,203,151,1,0,237,151,1,0,243,151,1,0, -1,152,1,0,168,152,1,0,219,152,1,0,223,152,1,0, -150,153,1,0,153,153,1,0,172,153,1,0,168,154,1,0, -216,154,1,0,223,154,1,0,37,155,1,0,47,155,1,0, -50,155,1,0,60,155,1,0,90,155,1,0,229,156,1,0, -117,158,1,0,127,158,1,0,165,158,1,0,187,158,1,0, -195,158,1,0,205,158,1,0,209,158,1,0,249,158,1,0, -253,158,1,0,14,159,1,0,19,159,1,0,32,159,1,0, -59,159,1,0,74,159,1,0,82,159,1,0,141,159,1,0, -156,159,1,0,160,159,1,0,68,83,1,0,69,83,2,8, -32,0,153,48,2,8,32,0,154,48,2,0,136,48,138,48, -2,0,179,48,200,48,1,0,0,17,1,0,1,17,1,0, -2,17,1,0,3,17,1,0,4,17,1,0,5,17,1,0, -26,17,1,0,6,17,1,0,7,17,1,0,8,17,1,0, -33,17,1,0,9,17,1,0,10,17,1,0,11,17,1,0, -12,17,1,0,13,17,1,0,14,17,1,0,15,17,1,0, -16,17,1,0,17,17,1,0,18,17,1,0,20,17,1,0, -21,17,1,0,199,17,1,0,200,17,1,0,204,17,1,0, -206,17,1,0,211,17,1,0,215,17,1,0,217,17,1,0, -28,17,1,0,221,17,1,0,223,17,1,0,29,17,1,0, -30,17,1,0,32,17,1,0,34,17,1,0,35,17,1,0, -39,17,1,0,41,17,1,0,43,17,1,0,44,17,1,0, -45,17,1,0,46,17,1,0,47,17,1,0,50,17,1,0, -54,17,1,0,64,17,1,0,71,17,1,0,76,17,1,0, -241,17,1,0,242,17,1,0,87,17,1,0,88,17,1,0, -89,17,1,0,132,17,1,0,133,17,1,0,136,17,1,0, -145,17,1,0,146,17,1,0,148,17,1,0,158,17,1,0, -161,17,1,0,9,78,1,0,219,86,1,0,10,78,1,0, -45,78,1,0,11,78,1,0,50,117,1,0,25,78,1,0, -1,78,1,0,41,89,1,0,48,87,3,0,40,0,0,17, -41,0,3,0,40,0,2,17,41,0,3,0,40,0,3,17, -41,0,3,0,40,0,5,17,41,0,3,0,40,0,6,17, -41,0,3,0,40,0,7,17,41,0,3,0,40,0,9,17, -41,0,3,0,40,0,11,17,41,0,3,0,40,0,12,17, -41,0,3,0,40,0,14,17,41,0,3,0,40,0,15,17, -41,0,3,0,40,0,16,17,41,0,3,0,40,0,17,17, -41,0,3,0,40,0,18,17,41,0,3,0,40,0,0,78, -41,0,3,0,40,0,140,78,41,0,3,0,40,0,9,78, -41,0,3,0,40,0,219,86,41,0,3,0,40,0,148,78, -41,0,3,0,40,0,109,81,41,0,3,0,40,0,3,78, -41,0,3,0,40,0,107,81,41,0,3,0,40,0,93,78, -41,0,3,0,40,0,65,83,41,0,3,0,40,0,8,103, -41,0,3,0,40,0,107,112,41,0,3,0,40,0,52,108, -41,0,3,0,40,0,40,103,41,0,3,0,40,0,209,145, -41,0,3,0,40,0,31,87,41,0,3,0,40,0,229,101, -41,0,3,0,40,0,42,104,41,0,3,0,40,0,9,103, -41,0,3,0,40,0,62,121,41,0,3,0,40,0,13,84, -41,0,3,0,40,0,121,114,41,0,3,0,40,0,161,140, -41,0,3,0,40,0,93,121,41,0,3,0,40,0,180,82, -41,0,3,0,40,0,227,78,41,0,3,0,40,0,124,84, -41,0,3,0,40,0,102,91,41,0,3,0,40,0,227,118, -41,0,3,0,40,0,1,79,41,0,3,0,40,0,199,140, -41,0,3,0,40,0,84,83,41,0,3,0,40,0,109,121, -41,0,3,0,40,0,17,79,41,0,3,0,40,0,234,129, -41,0,3,0,40,0,243,129,41,0,1,0,79,85,1,0, -124,94,1,0,143,123,3,0,112,0,116,0,101,0,2,0, -50,0,49,0,2,0,50,0,50,0,2,0,50,0,51,0, -2,0,50,0,52,0,2,0,50,0,53,0,2,0,50,0, -54,0,2,0,50,0,55,0,2,0,50,0,56,0,2,0, -50,0,57,0,2,0,51,0,48,0,2,0,51,0,49,0, -2,0,51,0,50,0,2,0,51,0,51,0,2,0,51,0, -52,0,2,0,51,0,53,0,1,0,148,78,1,0,109,81, -1,0,3,78,1,0,93,78,1,0,42,104,1,0,9,103, -1,0,62,121,1,0,13,84,1,0,121,114,1,0,161,140, -1,0,93,121,1,0,180,82,1,0,216,121,1,0,55,117, -1,0,105,144,1,0,42,81,1,0,112,83,1,0,232,108, -1,0,5,152,1,0,17,79,1,0,153,81,1,0,99,107, -1,0,230,93,1,0,243,83,1,0,59,83,1,0,151,91, -1,0,102,91,1,0,227,118,1,0,1,79,1,0,199,140, -1,0,84,83,1,0,28,89,2,0,51,0,54,0,2,0, -51,0,55,0,2,0,51,0,56,0,2,0,51,0,57,0, -2,0,52,0,48,0,2,0,52,0,49,0,2,0,52,0, -50,0,2,0,52,0,51,0,2,0,52,0,52,0,2,0, -52,0,53,0,2,0,52,0,54,0,2,0,52,0,55,0, -2,0,52,0,56,0,2,0,52,0,57,0,2,0,53,0, -48,0,2,0,49,0,8,103,2,0,50,0,8,103,2,0, -51,0,8,103,2,0,52,0,8,103,2,0,53,0,8,103, -2,0,54,0,8,103,2,0,55,0,8,103,2,0,56,0, -8,103,2,0,57,0,8,103,3,0,49,0,48,0,8,103, -3,0,49,0,49,0,8,103,3,0,49,0,50,0,8,103, -2,0,104,0,103,0,3,0,101,0,114,0,103,0,2,0, -101,0,118,0,3,0,108,0,116,0,100,0,1,0,162,48, -1,0,164,48,1,0,166,48,1,0,168,48,1,0,170,48, -1,0,171,48,1,0,173,48,1,0,175,48,1,0,177,48, -1,0,179,48,1,0,181,48,1,0,183,48,1,0,185,48, -1,0,187,48,1,0,189,48,1,0,191,48,1,0,193,48, -1,0,196,48,1,0,198,48,1,0,200,48,1,0,202,48, -1,0,203,48,1,0,204,48,1,0,205,48,1,0,206,48, -1,0,207,48,1,0,210,48,1,0,213,48,1,0,216,48, -1,0,219,48,1,0,222,48,1,0,223,48,1,0,224,48, -1,0,225,48,1,0,226,48,1,0,228,48,1,0,230,48, -1,0,232,48,1,0,233,48,1,0,234,48,1,0,235,48, -1,0,236,48,1,0,237,48,1,0,239,48,1,0,240,48, -1,0,241,48,1,0,242,48,2,0,228,78,140,84,4,0, -162,48,235,48,213,48,161,48,3,0,162,48,252,48,235,48, -3,0,164,48,243,48,193,48,3,0,166,48,169,48,243,48, -4,0,168,48,252,48,171,48,252,48,3,0,170,48,243,48, -185,48,3,0,170,48,252,48,224,48,3,0,171,48,164,48, -234,48,4,0,171,48,233,48,195,48,200,48,4,0,171,48, -237,48,234,48,252,48,4,0,173,48,229,48,234,48,252,48, -2,0,173,48,237,48,6,0,173,48,237,48,225,48,252,48, -200,48,235,48,5,0,173,48,237,48,239,48,195,48,200,48, -4,0,175,48,237,48,252,48,205,48,3,0,177,48,252,48, -185,48,3,0,179,48,235,48,202,48,4,0,181,48,164,48, -175,48,235,48,5,0,181,48,243,48,193,48,252,48,224,48, -3,0,187,48,243,48,193,48,3,0,187,48,243,48,200,48, -2,0,200,48,243,48,2,0,202,48,206,48,3,0,206,48, -195,48,200,48,3,0,207,48,164,48,196,48,4,0,213,48, -163,48,252,48,200,48,3,0,213,48,233,48,243,48,5,0, -216,48,175,48,191,48,252,48,235,48,3,0,216,48,235,48, -196,48,2,0,219,48,243,48,3,0,219,48,252,48,235,48, -3,0,219,48,252,48,243,48,4,0,222,48,164,48,175,48, -237,48,3,0,222,48,164,48,235,48,3,0,222,48,195,48, -207,48,3,0,222,48,235,48,175,48,5,0,222,48,243,48, -183,48,231,48,243,48,4,0,223,48,175,48,237,48,243,48, -2,0,223,48,234,48,4,0,225,48,252,48,200,48,235,48, -3,0,228,48,252,48,235,48,3,0,230,48,162,48,243,48, -4,0,234,48,195,48,200,48,235,48,2,0,234,48,233,48, -2,0,236,48,224,48,3,0,239,48,195,48,200,48,2,0, -48,0,185,112,2,0,49,0,185,112,2,0,50,0,185,112, -2,0,51,0,185,112,2,0,52,0,185,112,2,0,53,0, -185,112,2,0,54,0,185,112,2,0,55,0,185,112,2,0, -56,0,185,112,2,0,57,0,185,112,3,0,49,0,48,0, -185,112,3,0,49,0,49,0,185,112,3,0,49,0,50,0, -185,112,3,0,49,0,51,0,185,112,3,0,49,0,52,0, -185,112,3,0,49,0,53,0,185,112,3,0,49,0,54,0, -185,112,3,0,49,0,55,0,185,112,3,0,49,0,56,0, -185,112,3,0,49,0,57,0,185,112,3,0,50,0,48,0, -185,112,3,0,50,0,49,0,185,112,3,0,50,0,50,0, -185,112,3,0,50,0,51,0,185,112,3,0,50,0,52,0, -185,112,3,0,104,0,112,0,97,0,2,0,100,0,97,0, -2,0,97,0,117,0,3,0,98,0,97,0,114,0,2,0, -111,0,118,0,2,0,112,0,99,0,2,0,100,0,109,0, -3,0,100,0,109,0,50,0,3,0,100,0,109,0,51,0, -2,0,105,0,117,0,2,0,115,94,16,98,2,0,45,102, -140,84,2,0,39,89,99,107,2,0,14,102,187,108,4,0, -42,104,15,95,26,79,62,121,2,0,112,0,97,0,2,0, -110,0,97,0,2,0,188,3,97,0,2,0,109,0,97,0, -2,0,107,0,97,0,2,0,107,0,98,0,2,0,109,0, -98,0,2,0,103,0,98,0,3,0,99,0,97,0,108,0, -4,0,107,0,99,0,97,0,108,0,2,0,112,0,102,0, -2,0,110,0,102,0,2,0,188,3,102,0,2,0,188,3, -103,0,2,0,109,0,103,0,2,0,107,0,103,0,2,0, -104,0,122,0,3,0,107,0,104,0,122,0,3,0,109,0, -104,0,122,0,3,0,103,0,104,0,122,0,3,0,116,0, -104,0,122,0,2,0,188,3,108,0,2,0,109,0,108,0, -2,0,100,0,108,0,2,0,107,0,108,0,2,0,102,0, -109,0,2,0,110,0,109,0,2,0,188,3,109,0,2,0, -109,0,109,0,2,0,99,0,109,0,2,0,107,0,109,0, -3,0,109,0,109,0,50,0,3,0,99,0,109,0,50,0, -2,0,109,0,50,0,3,0,107,0,109,0,50,0,3,0, -109,0,109,0,51,0,3,0,99,0,109,0,51,0,2,0, -109,0,51,0,3,0,107,0,109,0,51,0,3,0,109,0, -21,34,115,0,4,0,109,0,21,34,115,0,50,0,3,0, -107,0,112,0,97,0,3,0,109,0,112,0,97,0,3,0, -103,0,112,0,97,0,3,0,114,0,97,0,100,0,5,0, -114,0,97,0,100,0,21,34,115,0,6,0,114,0,97,0, -100,0,21,34,115,0,50,0,2,0,112,0,115,0,2,0, -110,0,115,0,2,0,188,3,115,0,2,0,109,0,115,0, -2,0,112,0,118,0,2,0,110,0,118,0,2,0,188,3, -118,0,2,0,109,0,118,0,2,0,107,0,118,0,2,0, -112,0,119,0,2,0,110,0,119,0,2,0,188,3,119,0, -2,0,109,0,119,0,2,0,107,0,119,0,2,0,107,0, -201,3,2,0,109,0,201,3,4,0,97,0,46,0,109,0, -46,0,2,0,98,0,113,0,2,0,99,0,99,0,2,0, -99,0,100,0,4,0,99,0,21,34,107,0,103,0,3,0, -99,0,111,0,46,0,2,0,100,0,98,0,2,0,103,0, -121,0,2,0,104,0,97,0,2,0,104,0,112,0,2,0, -105,0,110,0,2,0,107,0,107,0,2,0,107,0,116,0, -2,0,108,0,109,0,2,0,108,0,110,0,3,0,108,0, -111,0,103,0,2,0,108,0,120,0,3,0,109,0,105,0, -108,0,3,0,109,0,111,0,108,0,2,0,112,0,104,0, -4,0,112,0,46,0,109,0,46,0,3,0,112,0,112,0, -109,0,2,0,112,0,114,0,2,0,115,0,114,0,2,0, -115,0,118,0,2,0,119,0,98,0,3,0,118,0,21,34, -109,0,3,0,97,0,21,34,109,0,2,0,49,0,229,101, -2,0,50,0,229,101,2,0,51,0,229,101,2,0,52,0, -229,101,2,0,53,0,229,101,2,0,54,0,229,101,2,0, -55,0,229,101,2,0,56,0,229,101,2,0,57,0,229,101, -3,0,49,0,48,0,229,101,3,0,49,0,49,0,229,101, -3,0,49,0,50,0,229,101,3,0,49,0,51,0,229,101, -3,0,49,0,52,0,229,101,3,0,49,0,53,0,229,101, -3,0,49,0,54,0,229,101,3,0,49,0,55,0,229,101, -3,0,49,0,56,0,229,101,3,0,49,0,57,0,229,101, -3,0,50,0,48,0,229,101,3,0,50,0,49,0,229,101, -3,0,50,0,50,0,229,101,3,0,50,0,51,0,229,101, -3,0,50,0,52,0,229,101,3,0,50,0,53,0,229,101, -3,0,50,0,54,0,229,101,3,0,50,0,55,0,229,101, -3,0,50,0,56,0,229,101,3,0,50,0,57,0,229,101, -3,0,51,0,48,0,229,101,3,0,51,0,49,0,229,101, -3,0,103,0,97,0,108,0,1,0,76,4,1,0,121,29, -1,0,108,2,1,0,158,2,1,0,135,2,1,0,83,171, -1,0,142,29,1,0,83,1,1,0,39,167,1,0,160,19, -1,0,161,19,1,0,162,19,1,0,163,19,1,0,164,19, -1,0,165,19,1,0,166,19,1,0,167,19,1,0,168,19, -1,0,169,19,1,0,170,19,1,0,171,19,1,0,172,19, -1,0,173,19,1,0,174,19,1,0,175,19,1,0,176,19, -1,0,177,19,1,0,178,19,1,0,179,19,1,0,180,19, -1,0,181,19,1,0,182,19,1,0,183,19,1,0,184,19, -1,0,185,19,1,0,186,19,1,0,187,19,1,0,188,19, -1,0,189,19,1,0,190,19,1,0,191,19,1,0,192,19, -1,0,193,19,1,0,194,19,1,0,195,19,1,0,196,19, -1,0,197,19,1,0,198,19,1,0,199,19,1,0,200,19, -1,0,201,19,1,0,202,19,1,0,203,19,1,0,204,19, -1,0,205,19,1,0,206,19,1,0,207,19,1,0,208,19, -1,0,209,19,1,0,210,19,1,0,211,19,1,0,212,19, -1,0,213,19,1,0,214,19,1,0,215,19,1,0,216,19, -1,0,217,19,1,0,218,19,1,0,219,19,1,0,220,19, -1,0,221,19,1,0,222,19,1,0,223,19,1,0,224,19, -1,0,225,19,1,0,226,19,1,0,227,19,1,0,228,19, -1,0,229,19,1,0,230,19,1,0,231,19,1,0,232,19, -1,0,233,19,1,0,234,19,1,0,235,19,1,0,236,19, -1,0,237,19,1,0,238,19,1,0,239,19,1,0,72,140, -1,0,244,102,1,0,200,140,1,0,209,110,1,0,50,78, -1,0,229,83,1,0,81,89,1,0,135,85,1,0,72,89, -1,0,246,97,1,0,105,118,1,0,133,127,1,0,63,134, -1,0,186,135,1,0,248,136,1,0,143,144,1,0,2,106, -1,0,27,109,1,0,217,112,1,0,222,115,1,0,61,132, -1,0,106,145,1,0,241,153,1,0,130,78,1,0,117,83, -1,0,4,107,1,0,27,114,1,0,45,134,1,0,30,158, -1,0,80,93,1,0,235,111,1,0,205,133,1,0,100,137, -1,0,201,98,1,0,216,129,1,0,31,136,1,0,202,94, -1,0,23,103,1,0,106,109,1,0,252,114,1,0,206,144, -1,0,134,79,1,0,183,81,1,0,222,82,1,0,196,100, -1,0,211,106,1,0,16,114,1,0,231,118,1,0,6,134, -1,0,92,134,1,0,239,141,1,0,50,151,1,0,111,155, -1,0,250,157,1,0,140,120,1,0,127,121,1,0,160,125, -1,0,201,131,1,0,4,147,1,0,214,138,1,0,223,88, -1,0,4,95,1,0,96,124,1,0,126,128,1,0,98,114, -1,0,202,120,1,0,194,140,1,0,247,150,1,0,216,88, -1,0,98,92,1,0,19,106,1,0,218,109,1,0,15,111, -1,0,47,125,1,0,55,126,1,0,75,150,1,0,210,82, -1,0,139,128,1,0,220,81,1,0,204,81,1,0,28,122, -1,0,190,125,1,0,241,131,1,0,117,150,1,0,128,139, -1,0,207,98,1,0,254,138,1,0,57,78,1,0,231,91, -1,0,18,96,1,0,135,115,1,0,112,117,1,0,23,83, -1,0,251,120,1,0,191,79,1,0,169,95,1,0,13,78, -1,0,204,108,1,0,120,101,1,0,34,125,1,0,195,83, -1,0,94,88,1,0,1,119,1,0,73,132,1,0,170,138, -1,0,186,107,1,0,136,108,1,0,254,98,1,0,229,130, -1,0,160,99,1,0,101,117,1,0,174,78,1,0,105,81, -1,0,201,81,1,0,129,104,1,0,231,124,1,0,111,130, -1,0,210,138,1,0,207,145,1,0,245,82,1,0,66,84, -1,0,236,94,1,0,197,101,1,0,254,111,1,0,42,121, -1,0,173,149,1,0,106,154,1,0,151,158,1,0,206,158, -1,0,198,102,1,0,119,107,1,0,98,143,1,0,116,94, -1,0,144,97,1,0,0,98,1,0,154,100,1,0,35,111, -1,0,73,113,1,0,137,116,1,0,202,121,1,0,244,125, -1,0,111,128,1,0,38,143,1,0,238,132,1,0,35,144, -1,0,74,147,1,0,23,82,1,0,163,82,1,0,189,84, -1,0,200,112,1,0,194,136,1,0,201,94,1,0,245,95, -1,0,123,99,1,0,174,107,1,0,62,124,1,0,117,115, -1,0,228,78,1,0,249,86,1,0,186,93,1,0,28,96, -1,0,178,115,1,0,105,116,1,0,154,127,1,0,70,128, -1,0,52,146,1,0,246,150,1,0,72,151,1,0,24,152, -1,0,139,79,1,0,174,121,1,0,180,145,1,0,184,150, -1,0,225,96,1,0,134,78,1,0,218,80,1,0,238,91, -1,0,63,92,1,0,153,101,1,0,206,113,1,0,66,118, -1,0,252,132,1,0,124,144,1,0,136,102,1,0,46,150, -1,0,137,82,1,0,123,103,1,0,243,103,1,0,65,109, -1,0,156,110,1,0,9,116,1,0,89,117,1,0,107,120, -1,0,16,125,1,0,94,152,1,0,46,98,1,0,120,150, -1,0,43,80,1,0,25,93,1,0,234,109,1,0,42,143, -1,0,139,95,1,0,68,97,1,0,23,104,1,0,134,150, -1,0,41,82,1,0,15,84,1,0,101,92,1,0,19,102, -1,0,78,103,1,0,168,104,1,0,229,108,1,0,6,116, -1,0,226,117,1,0,121,127,1,0,207,136,1,0,225,136, -1,0,226,150,1,0,63,83,1,0,186,110,1,0,29,84, -1,0,208,113,1,0,152,116,1,0,250,133,1,0,163,150, -1,0,87,156,1,0,159,158,1,0,151,103,1,0,203,109, -1,0,232,129,1,0,32,123,1,0,146,124,1,0,192,114, -1,0,153,112,1,0,88,139,1,0,192,78,1,0,54,131, -1,0,58,82,1,0,7,82,1,0,166,94,1,0,211,98, -1,0,214,124,1,0,133,91,1,0,30,109,1,0,180,102, -1,0,59,143,1,0,77,150,1,0,211,94,1,0,64,81, -1,0,192,85,1,0,90,88,1,0,116,102,1,0,222,81, -1,0,42,115,1,0,202,118,1,0,60,121,1,0,94,121, -1,0,101,121,1,0,143,121,1,0,86,151,1,0,190,124, -1,0,18,134,1,0,248,138,1,0,56,144,1,0,253,144, -1,0,239,152,1,0,252,152,1,0,40,153,1,0,180,157, -1,0,222,144,1,0,183,150,1,0,174,79,1,0,231,80, -1,0,77,81,1,0,201,82,1,0,228,82,1,0,81,83, -1,0,157,85,1,0,6,86,1,0,104,86,1,0,64,88, -1,0,168,88,1,0,100,92,1,0,148,96,1,0,104,97, -1,0,142,97,1,0,242,97,1,0,79,101,1,0,226,101, -1,0,145,102,1,0,133,104,1,0,119,109,1,0,26,110, -1,0,34,111,1,0,110,113,1,0,43,114,1,0,34,116, -1,0,145,120,1,0,73,121,1,0,72,121,1,0,80,121, -1,0,86,121,1,0,141,121,1,0,142,121,1,0,64,122, -1,0,129,122,1,0,192,123,1,0,9,126,1,0,65,126, -1,0,114,127,1,0,5,128,1,0,237,129,1,0,121,130, -1,0,87,132,1,0,16,137,1,0,150,137,1,0,1,139, -1,0,57,139,1,0,211,140,1,0,8,141,1,0,182,143, -1,0,227,150,1,0,255,151,1,0,59,152,1,0,117,96, -2,0,80,216,238,222,1,0,24,130,1,0,38,78,1,0, -181,81,1,0,104,81,1,0,128,79,1,0,69,81,1,0, -128,81,1,0,199,82,1,0,250,82,1,0,85,85,1,0, -153,85,1,0,226,85,1,0,179,88,1,0,68,89,1,0, -84,89,1,0,98,90,1,0,40,91,1,0,210,94,1,0, -217,94,1,0,105,95,1,0,173,95,1,0,216,96,1,0, -78,97,1,0,8,97,1,0,96,97,1,0,52,98,1,0, -196,99,1,0,28,100,1,0,82,100,1,0,86,101,1,0, -27,103,1,0,86,103,1,0,219,110,1,0,203,110,1,0, -30,112,1,0,167,119,1,0,53,114,1,0,175,114,1,0, -113,116,1,0,6,117,1,0,59,117,1,0,29,118,1,0, -31,118,1,0,219,118,1,0,244,118,1,0,74,119,1,0, -64,119,1,0,204,120,1,0,177,122,1,0,123,124,1,0, -91,125,1,0,62,127,1,0,82,131,1,0,239,131,1,0, -121,135,1,0,65,137,1,0,134,137,1,0,191,138,1,0, -203,138,1,0,237,138,1,0,138,139,1,0,56,143,1,0, -114,144,1,0,153,145,1,0,118,146,1,0,124,150,1,0, -219,151,1,0,11,152,1,0,18,155,2,0,74,216,74,220, -2,0,74,216,68,220,2,0,76,216,213,223,1,0,157,59, -1,0,24,64,1,0,57,64,2,0,84,216,73,222,2,0, -87,216,208,220,2,0,95,216,211,222,1,0,67,159,1,0, -142,159,2,0,102,0,102,0,2,0,102,0,105,0,2,0, -102,0,108,0,3,0,102,0,102,0,105,0,3,0,102,0, -102,0,108,0,2,0,115,0,116,0,2,0,116,5,118,5, -2,0,116,5,101,5,2,0,116,5,107,5,2,0,126,5, -118,5,2,0,116,5,109,5,2,14,217,5,180,5,2,17, -242,5,183,5,1,0,226,5,1,0,212,5,1,0,219,5, -1,0,220,5,1,0,221,5,1,0,232,5,1,0,234,5, -2,24,233,5,193,5,2,25,233,5,194,5,3,24,233,5, -188,5,193,5,3,25,233,5,188,5,194,5,2,17,208,5, -183,5,2,18,208,5,184,5,2,21,208,5,188,5,2,21, -209,5,188,5,2,21,210,5,188,5,2,21,211,5,188,5, -2,21,212,5,188,5,2,21,213,5,188,5,2,21,214,5, -188,5,2,21,216,5,188,5,2,21,217,5,188,5,2,21, -218,5,188,5,2,21,219,5,188,5,2,21,220,5,188,5, -2,21,222,5,188,5,2,21,224,5,188,5,2,21,225,5, -188,5,2,21,227,5,188,5,2,21,228,5,188,5,2,21, -230,5,188,5,2,21,231,5,188,5,2,21,232,5,188,5, -2,21,233,5,188,5,2,21,234,5,188,5,2,19,213,5, -185,5,2,23,209,5,191,5,2,23,219,5,191,5,2,23, -228,5,191,5,2,0,208,5,220,5,1,0,113,6,1,0, -123,6,1,0,126,6,1,0,128,6,1,0,122,6,1,0, -127,6,1,0,121,6,1,0,164,6,1,0,166,6,1,0, -132,6,1,0,131,6,1,0,134,6,1,0,135,6,1,0, -141,6,1,0,140,6,1,0,142,6,1,0,136,6,1,0, -152,6,1,0,145,6,1,0,169,6,1,0,175,6,1,0, -179,6,1,0,177,6,1,0,186,6,1,0,187,6,1,0, -193,6,1,0,190,6,1,0,210,6,1,0,173,6,1,0, -199,6,1,0,198,6,1,0,200,6,1,0,203,6,1,0, -197,6,1,0,201,6,1,0,208,6,1,0,73,6,1,0, -204,6,2,0,40,6,44,6,2,0,40,6,45,6,2,0, -40,6,46,6,2,0,40,6,69,6,2,0,40,6,73,6, -2,0,40,6,74,6,2,0,42,6,44,6,2,0,42,6, -45,6,2,0,42,6,46,6,2,0,42,6,69,6,2,0, -42,6,73,6,2,0,42,6,74,6,2,0,43,6,44,6, -2,0,43,6,69,6,2,0,43,6,73,6,2,0,43,6, -74,6,2,0,44,6,45,6,2,0,44,6,69,6,2,0, -45,6,44,6,2,0,45,6,69,6,2,0,46,6,44,6, -2,0,46,6,45,6,2,0,46,6,69,6,2,0,51,6, -44,6,2,0,51,6,45,6,2,0,51,6,46,6,2,0, -51,6,69,6,2,0,53,6,45,6,2,0,53,6,69,6, -2,0,54,6,44,6,2,0,54,6,45,6,2,0,54,6, -46,6,2,0,54,6,69,6,2,0,55,6,45,6,2,0, -55,6,69,6,2,0,56,6,69,6,2,0,57,6,44,6, -2,0,57,6,69,6,2,0,58,6,44,6,2,0,58,6, -69,6,2,0,65,6,44,6,2,0,65,6,45,6,2,0, -65,6,46,6,2,0,65,6,69,6,2,0,65,6,73,6, -2,0,65,6,74,6,2,0,66,6,45,6,2,0,66,6, -69,6,2,0,66,6,73,6,2,0,66,6,74,6,2,0, -67,6,39,6,2,0,67,6,44,6,2,0,67,6,45,6, -2,0,67,6,46,6,2,0,67,6,68,6,2,0,67,6, -69,6,2,0,67,6,73,6,2,0,67,6,74,6,2,0, -68,6,44,6,2,0,68,6,45,6,2,0,68,6,46,6, -2,0,68,6,69,6,2,0,68,6,73,6,2,0,68,6, -74,6,2,0,69,6,44,6,2,0,69,6,45,6,2,0, -69,6,46,6,2,0,69,6,69,6,2,0,69,6,73,6, -2,0,69,6,74,6,2,0,70,6,44,6,2,0,70,6, -45,6,2,0,70,6,46,6,2,0,70,6,69,6,2,0, -70,6,73,6,2,0,70,6,74,6,2,0,71,6,44,6, -2,0,71,6,69,6,2,0,71,6,73,6,2,0,71,6, -74,6,2,0,74,6,44,6,2,0,74,6,45,6,2,0, -74,6,46,6,2,0,74,6,69,6,2,0,74,6,73,6, -2,0,74,6,74,6,2,35,48,6,112,6,2,35,49,6, -112,6,2,35,73,6,112,6,3,33,32,0,76,6,81,6, -3,33,32,0,77,6,81,6,3,33,32,0,78,6,81,6, -3,33,32,0,79,6,81,6,3,33,32,0,80,6,81,6, -3,35,32,0,81,6,112,6,2,0,40,6,49,6,2,0, -40,6,50,6,2,0,40,6,70,6,2,0,42,6,49,6, -2,0,42,6,50,6,2,0,42,6,70,6,2,0,43,6, -49,6,2,0,43,6,50,6,2,0,43,6,70,6,2,0, -69,6,39,6,2,0,70,6,49,6,2,0,70,6,50,6, -2,0,70,6,70,6,2,0,74,6,49,6,2,0,74,6, -50,6,2,0,74,6,70,6,2,0,40,6,71,6,2,0, -42,6,71,6,2,0,53,6,46,6,2,0,68,6,71,6, -2,0,70,6,71,6,2,35,71,6,112,6,2,0,74,6, -71,6,2,0,43,6,71,6,2,0,51,6,71,6,2,0, -52,6,69,6,2,0,52,6,71,6,3,33,64,6,78,6, -81,6,3,33,64,6,79,6,81,6,3,33,64,6,80,6, -81,6,2,0,55,6,73,6,2,0,55,6,74,6,2,0, -57,6,73,6,2,0,57,6,74,6,2,0,58,6,73,6, -2,0,58,6,74,6,2,0,51,6,73,6,2,0,51,6, -74,6,2,0,52,6,73,6,2,0,52,6,74,6,2,0, -45,6,73,6,2,0,45,6,74,6,2,0,44,6,73,6, -2,0,44,6,74,6,2,0,46,6,73,6,2,0,46,6, -74,6,2,0,53,6,73,6,2,0,53,6,74,6,2,0, -54,6,73,6,2,0,54,6,74,6,2,0,52,6,44,6, -2,0,52,6,45,6,2,0,52,6,46,6,2,0,52,6, -49,6,2,0,51,6,49,6,2,0,53,6,49,6,2,0, -54,6,49,6,2,27,39,6,75,6,3,0,42,6,44,6, -69,6,3,0,42,6,45,6,44,6,3,0,42,6,45,6, -69,6,3,0,42,6,46,6,69,6,3,0,42,6,69,6, -44,6,3,0,42,6,69,6,45,6,3,0,42,6,69,6, -46,6,3,0,44,6,69,6,45,6,3,0,45,6,69,6, -74,6,3,0,45,6,69,6,73,6,3,0,51,6,45,6, -44,6,3,0,51,6,44,6,45,6,3,0,51,6,44,6, -73,6,3,0,51,6,69,6,45,6,3,0,51,6,69,6, -44,6,3,0,51,6,69,6,69,6,3,0,53,6,45,6, -45,6,3,0,53,6,69,6,69,6,3,0,52,6,45,6, -69,6,3,0,52,6,44,6,74,6,3,0,52,6,69,6, -46,6,3,0,52,6,69,6,69,6,3,0,54,6,45,6, -73,6,3,0,54,6,46,6,69,6,3,0,55,6,69,6, -45,6,3,0,55,6,69,6,69,6,3,0,55,6,69,6, -74,6,3,0,57,6,44,6,69,6,3,0,57,6,69,6, -69,6,3,0,57,6,69,6,73,6,3,0,58,6,69,6, -69,6,3,0,58,6,69,6,74,6,3,0,58,6,69,6, -73,6,3,0,65,6,46,6,69,6,3,0,66,6,69,6, -45,6,3,0,66,6,69,6,69,6,3,0,68,6,45,6, -69,6,3,0,68,6,45,6,74,6,3,0,68,6,45,6, -73,6,3,0,68,6,44,6,44,6,3,0,68,6,46,6, -69,6,3,0,68,6,69,6,45,6,3,0,69,6,45,6, -44,6,3,0,69,6,45,6,69,6,3,0,69,6,45,6, -74,6,3,0,69,6,44,6,45,6,3,0,69,6,44,6, -69,6,3,0,69,6,46,6,44,6,3,0,69,6,46,6, -69,6,3,0,69,6,44,6,46,6,3,0,71,6,69,6, -44,6,3,0,71,6,69,6,69,6,3,0,70,6,45,6, -69,6,3,0,70,6,45,6,73,6,3,0,70,6,44,6, -69,6,3,0,70,6,44,6,73,6,3,0,70,6,69,6, -74,6,3,0,70,6,69,6,73,6,3,0,74,6,69,6, -69,6,3,0,40,6,46,6,74,6,3,0,42,6,44,6, -74,6,3,0,42,6,44,6,73,6,3,0,42,6,46,6, -74,6,3,0,42,6,46,6,73,6,3,0,42,6,69,6, -74,6,3,0,42,6,69,6,73,6,3,0,44,6,69,6, -74,6,3,0,44,6,45,6,73,6,3,0,44,6,69,6, -73,6,3,0,51,6,46,6,73,6,3,0,53,6,45,6, -74,6,3,0,52,6,45,6,74,6,3,0,54,6,45,6, -74,6,3,0,68,6,44,6,74,6,3,0,68,6,69,6, -74,6,3,0,74,6,45,6,74,6,3,0,74,6,44,6, -74,6,3,0,74,6,69,6,74,6,3,0,69,6,69,6, -74,6,3,0,66,6,69,6,74,6,3,0,70,6,45,6, -74,6,3,0,57,6,69,6,74,6,3,0,67,6,69,6, -74,6,3,0,70,6,44,6,45,6,3,0,69,6,46,6, -74,6,3,0,68,6,44,6,69,6,3,0,67,6,69,6, -69,6,3,0,44,6,45,6,74,6,3,0,45,6,44,6, -74,6,3,0,69,6,44,6,74,6,3,0,65,6,69,6, -74,6,3,0,40,6,45,6,74,6,3,0,51,6,46,6, -74,6,3,0,70,6,44,6,74,6,3,0,53,6,68,6, -210,6,3,0,66,6,68,6,210,6,4,0,39,6,68,6, -68,6,71,6,4,0,39,6,67,6,40,6,49,6,4,0, -69,6,45,6,69,6,47,6,4,0,53,6,68,6,57,6, -69,6,4,0,49,6,51,6,72,6,68,6,4,0,57,6, -68,6,74,6,71,6,4,0,72,6,51,6,68,6,69,6, -3,0,53,6,68,6,73,6,18,0,53,6,68,6,73,6, -32,0,39,6,68,6,68,6,71,6,32,0,57,6,68,6, -74,6,71,6,32,0,72,6,51,6,68,6,69,6,8,0, -44,6,68,6,32,0,44,6,68,6,39,6,68,6,71,6, -4,0,49,6,204,6,39,6,68,6,1,0,44,0,1,0, -1,48,1,0,2,48,1,0,58,0,1,0,33,0,1,0, -63,0,1,0,22,48,1,0,23,48,1,0,20,32,1,0, -19,32,1,0,95,0,1,0,123,0,1,0,125,0,1,0, -20,48,1,0,21,48,1,0,16,48,1,0,17,48,1,0, -10,48,1,0,11,48,1,0,12,48,1,0,13,48,1,0, -14,48,1,0,15,48,1,0,91,0,1,0,93,0,1,0, -35,0,1,0,38,0,1,0,42,0,1,0,45,0,1,0, -60,0,1,0,62,0,1,0,92,0,1,0,36,0,1,0, -37,0,1,0,64,0,2,27,32,0,75,6,2,27,64,6, -75,6,2,28,32,0,76,6,2,29,32,0,77,6,2,30, -32,0,78,6,2,30,64,6,78,6,2,31,32,0,79,6, -2,31,64,6,79,6,2,32,32,0,80,6,2,32,64,6, -80,6,2,33,32,0,81,6,2,33,64,6,81,6,2,34, -32,0,82,6,2,34,64,6,82,6,1,0,33,6,1,0, -39,6,1,0,40,6,1,0,41,6,1,0,42,6,1,0, -43,6,1,0,44,6,1,0,45,6,1,0,46,6,1,0, -47,6,1,0,48,6,1,0,49,6,1,0,50,6,1,0, -51,6,1,0,52,6,1,0,53,6,1,0,54,6,1,0, -55,6,1,0,56,6,1,0,57,6,1,0,58,6,1,0, -65,6,1,0,66,6,1,0,67,6,1,0,68,6,1,0, -69,6,1,0,70,6,1,0,71,6,1,0,72,6,1,0, -74,6,2,0,68,6,39,6,1,0,34,0,1,0,39,0, -1,0,47,0,1,0,94,0,1,0,124,0,1,0,126,0, -1,0,133,41,1,0,134,41,1,0,251,48,1,0,161,48, -1,0,163,48,1,0,165,48,1,0,167,48,1,0,169,48, -1,0,227,48,1,0,229,48,1,0,231,48,1,0,195,48, -1,0,252,48,1,0,243,48,1,0,162,0,1,0,163,0, -1,0,172,0,1,0,166,0,1,0,165,0,1,0,169,32, -1,0,2,37,1,0,144,33,1,0,145,33,1,0,146,33, -1,0,147,33,1,0,160,37,1,0,203,37,4,216,52,216, -87,221,52,216,101,221,4,216,52,216,88,221,52,216,101,221, -6,216,52,216,88,221,52,216,101,221,52,216,110,221,6,216, -52,216,88,221,52,216,101,221,52,216,111,221,6,216,52,216, -88,221,52,216,101,221,52,216,112,221,6,216,52,216,88,221, -52,216,101,221,52,216,113,221,6,216,52,216,88,221,52,216, -101,221,52,216,114,221,4,216,52,216,185,221,52,216,101,221, -4,216,52,216,186,221,52,216,101,221,6,216,52,216,185,221, -52,216,101,221,52,216,110,221,6,216,52,216,186,221,52,216, -101,221,52,216,110,221,6,216,52,216,185,221,52,216,101,221, -52,216,111,221,6,216,52,216,186,221,52,216,101,221,52,216, -111,221,1,0,49,1,1,0,55,2,1,0,177,3,1,0, -181,3,1,0,182,3,1,0,183,3,1,0,186,3,1,0, -187,3,1,0,189,3,1,0,190,3,1,0,191,3,1,0, -195,3,1,0,196,3,1,0,197,3,1,0,200,3,1,0, -7,34,1,0,2,34,1,0,221,3,1,0,110,6,1,0, -161,6,1,0,111,6,2,0,48,0,46,0,2,0,48,0, -44,0,2,0,49,0,44,0,2,0,50,0,44,0,2,0, -51,0,44,0,2,0,52,0,44,0,2,0,53,0,44,0, -2,0,54,0,44,0,2,0,55,0,44,0,2,0,56,0, -44,0,2,0,57,0,44,0,3,0,20,48,115,0,21,48, -2,0,119,0,122,0,2,0,104,0,118,0,2,0,115,0, -100,0,3,0,112,0,112,0,118,0,2,0,119,0,99,0, -2,0,109,0,99,0,2,0,109,0,100,0,2,0,109,0, -114,0,2,0,100,0,106,0,2,0,123,48,75,48,2,0, -179,48,179,48,1,0,87,91,1,0,204,83,1,0,26,89, -1,0,227,137,1,0,164,78,1,0,32,102,1,0,33,113, -1,0,77,82,1,0,140,95,1,0,141,81,1,0,176,101, -1,0,29,82,1,0,66,125,1,0,169,140,1,0,240,88, -1,0,57,84,1,0,20,111,1,0,149,98,1,0,85,99, -1,0,74,144,1,0,7,99,1,0,83,98,1,0,129,121, -1,0,122,122,1,0,8,84,1,0,128,110,1,0,51,117, -1,0,114,82,1,0,182,85,1,0,77,145,3,0,20,48, -44,103,21,48,3,0,20,48,9,78,21,48,3,0,20,48, -140,78,21,48,3,0,20,48,137,91,21,48,3,0,20,48, -185,112,21,48,3,0,20,48,83,98,21,48,3,0,20,48, -215,118,21,48,3,0,20,48,221,82,21,48,3,0,20,48, -87,101,21,48,1,0,151,95,1,0,239,83,1,0,61,78, -1,0,56,78,1,0,65,78,2,0,64,216,34,221,1,0, -96,79,1,0,187,79,1,0,2,80,1,0,122,80,1,0, -153,80,1,0,207,80,1,0,158,52,2,0,65,216,58,222, -1,0,84,81,1,0,100,81,1,0,119,81,2,0,65,216, -28,221,1,0,185,52,1,0,103,81,2,0,65,216,75,221, -1,0,151,81,1,0,164,81,1,0,204,78,1,0,172,81, -2,0,100,216,223,221,1,0,3,82,1,0,223,52,1,0, -59,82,1,0,70,82,1,0,119,82,1,0,21,53,1,0, -5,83,1,0,6,83,1,0,73,83,1,0,90,83,1,0, -115,83,1,0,125,83,1,0,127,83,2,0,66,216,44,222, -1,0,112,112,1,0,202,83,1,0,223,83,2,0,66,216, -99,223,1,0,235,83,1,0,241,83,1,0,6,84,1,0, -158,84,1,0,56,84,1,0,72,84,1,0,104,84,1,0, -162,84,1,0,246,84,1,0,16,85,1,0,83,85,1,0, -99,85,1,0,132,85,1,0,171,85,1,0,179,85,1,0, -194,85,1,0,22,87,1,0,23,87,1,0,81,86,1,0, -116,86,1,0,238,88,1,0,206,87,1,0,244,87,1,0, -13,88,1,0,139,87,1,0,50,88,1,0,49,88,1,0, -172,88,2,0,69,216,228,220,1,0,242,88,1,0,247,88, -1,0,6,89,1,0,34,89,1,0,98,89,2,0,69,216, -168,222,2,0,69,216,234,222,1,0,236,89,1,0,27,90, -1,0,39,90,1,0,216,89,1,0,102,90,1,0,238,54, -1,0,252,54,1,0,8,91,1,0,62,91,2,0,70,216, -200,221,1,0,195,91,1,0,216,91,1,0,243,91,2,0, -70,216,24,223,1,0,255,91,1,0,6,92,1,0,83,95, -1,0,129,55,1,0,96,92,1,0,192,92,1,0,141,92, -2,0,71,216,228,221,1,0,67,93,2,0,71,216,230,221, -1,0,110,93,1,0,107,93,1,0,124,93,1,0,225,93, -1,0,226,93,1,0,47,56,1,0,253,93,1,0,40,94, -1,0,61,94,1,0,105,94,1,0,98,56,2,0,72,216, -131,221,1,0,124,56,1,0,176,94,1,0,179,94,1,0, -182,94,2,0,104,216,146,223,2,0,72,216,49,223,1,0, -1,130,1,0,34,95,1,0,199,56,2,0,76,216,184,222, -2,0,88,216,218,221,1,0,98,95,1,0,107,95,1,0, -227,56,1,0,154,95,1,0,205,95,1,0,215,95,1,0, -249,95,1,0,129,96,1,0,58,57,1,0,28,57,2,0, -73,216,212,222,1,0,199,96,1,0,72,97,1,0,76,97, -1,0,122,97,1,0,178,97,1,0,164,97,1,0,175,97, -1,0,222,97,1,0,16,98,1,0,27,98,1,0,93,98, -1,0,177,98,1,0,212,98,1,0,80,99,2,0,74,216, -12,223,1,0,61,99,1,0,252,98,1,0,104,99,1,0, -131,99,1,0,228,99,2,0,74,216,241,223,1,0,34,100, -1,0,197,99,1,0,169,99,1,0,46,58,1,0,105,100, -1,0,126,100,1,0,157,100,1,0,119,100,1,0,108,58, -1,0,108,101,2,0,76,216,10,220,1,0,227,101,1,0, -248,102,1,0,73,102,1,0,25,59,1,0,8,59,1,0, -228,58,1,0,146,81,1,0,149,81,1,0,0,103,1,0, -156,102,1,0,173,128,1,0,217,67,1,0,33,103,1,0, -94,103,1,0,83,103,2,0,76,216,195,223,1,0,73,59, -1,0,250,103,1,0,133,103,1,0,82,104,2,0,77,216, -109,220,1,0,142,104,1,0,31,104,1,0,20,105,1,0, -66,105,1,0,163,105,1,0,234,105,1,0,168,106,2,0, -77,216,163,222,1,0,219,106,1,0,24,60,1,0,33,107, -2,0,78,216,167,220,1,0,84,107,1,0,78,60,1,0, -114,107,1,0,159,107,1,0,187,107,2,0,78,216,141,222, -2,0,71,216,11,221,2,0,78,216,250,222,1,0,78,108, -2,0,79,216,188,220,1,0,191,108,1,0,205,108,1,0, -103,108,1,0,22,109,1,0,62,109,1,0,105,109,1,0, -120,109,1,0,133,109,2,0,79,216,30,221,1,0,52,109, -1,0,47,110,1,0,110,110,1,0,51,61,1,0,199,110, -2,0,79,216,209,222,1,0,249,109,1,0,110,111,2,0, -79,216,94,223,2,0,79,216,142,223,1,0,198,111,1,0, -57,112,1,0,27,112,1,0,150,61,1,0,74,112,1,0, -125,112,1,0,119,112,1,0,173,112,2,0,65,216,37,221, -1,0,69,113,2,0,80,216,99,222,1,0,156,113,2,0, -80,216,171,223,1,0,40,114,1,0,80,114,2,0,81,216, -8,222,1,0,128,114,1,0,149,114,2,0,81,216,53,223, -2,0,82,216,20,220,1,0,122,115,1,0,139,115,1,0, -172,62,1,0,165,115,1,0,184,62,1,0,71,116,1,0, -92,116,1,0,133,116,1,0,202,116,1,0,27,63,1,0, -36,117,2,0,83,216,54,220,1,0,62,117,2,0,83,216, -146,220,2,0,72,216,159,221,1,0,16,118,2,0,83,216, -161,223,2,0,83,216,184,223,2,0,84,216,68,220,1,0, -252,63,1,0,8,64,2,0,84,216,243,220,2,0,84,216, -242,220,2,0,84,216,25,221,2,0,84,216,51,221,1,0, -30,119,1,0,31,119,1,0,139,119,1,0,70,64,1,0, -150,64,2,0,85,216,29,220,1,0,78,120,1,0,227,64, -2,0,85,216,38,222,2,0,85,216,154,222,2,0,85,216, -197,222,1,0,235,121,1,0,47,65,1,0,74,122,1,0, -79,122,2,0,86,216,124,221,2,0,86,216,167,222,1,0, -238,122,1,0,2,66,2,0,86,216,171,223,1,0,198,123, -1,0,201,123,1,0,39,66,2,0,87,216,128,220,1,0, -210,124,1,0,160,66,1,0,232,124,1,0,227,124,1,0, -0,125,2,0,87,216,134,223,1,0,99,125,1,0,1,67, -1,0,199,125,1,0,2,126,1,0,69,126,1,0,52,67, -2,0,88,216,40,222,2,0,88,216,71,222,1,0,89,67, -2,0,88,216,217,222,1,0,122,127,2,0,88,216,62,223, -1,0,149,127,1,0,250,127,2,0,89,216,218,220,2,0, -89,216,35,221,1,0,96,128,2,0,89,216,168,221,1,0, -112,128,2,0,76,216,95,223,1,0,213,67,1,0,178,128, -1,0,3,129,1,0,11,68,1,0,62,129,1,0,181,90, -2,0,89,216,167,223,2,0,89,216,181,223,2,0,76,216, -147,223,2,0,76,216,156,223,1,0,4,130,1,0,158,143, -1,0,107,68,1,0,145,130,1,0,139,130,1,0,157,130, -1,0,179,82,1,0,177,130,1,0,179,130,1,0,189,130, -1,0,230,130,2,0,90,216,60,223,1,0,29,131,1,0, -99,131,1,0,173,131,1,0,35,131,1,0,189,131,1,0, -231,131,1,0,83,131,1,0,202,131,1,0,204,131,1,0, -220,131,2,0,91,216,54,220,2,0,91,216,107,221,2,0, -91,216,213,220,1,0,43,69,1,0,241,132,1,0,243,132, -1,0,22,133,2,0,92,216,202,223,1,0,100,133,2,0, -91,216,44,223,1,0,93,69,1,0,97,69,2,0,91,216, -177,223,2,0,92,216,210,220,1,0,107,69,1,0,80,134, -1,0,103,134,1,0,105,134,1,0,169,134,1,0,136,134, -1,0,14,135,1,0,226,134,1,0,40,135,1,0,107,135, -1,0,134,135,1,0,215,69,1,0,225,135,1,0,1,136, -1,0,249,69,1,0,96,136,2,0,93,216,103,222,1,0, -215,136,1,0,222,136,1,0,53,70,1,0,250,136,1,0, -187,52,2,0,94,216,174,220,2,0,94,216,102,221,1,0, -190,70,1,0,199,70,1,0,160,138,2,0,95,216,168,220, -1,0,171,140,1,0,193,140,1,0,27,141,1,0,119,141, -2,0,95,216,47,223,2,0,66,216,4,220,1,0,203,141, -1,0,188,141,1,0,240,141,2,0,66,216,222,220,1,0, -212,142,2,0,97,216,210,221,2,0,97,216,237,221,1,0, -148,144,1,0,241,144,1,0,17,145,2,0,97,216,46,223, -1,0,27,145,1,0,56,146,1,0,215,146,1,0,216,146, -1,0,124,146,1,0,249,147,1,0,21,148,2,0,98,216, -250,223,1,0,139,149,1,0,149,73,1,0,183,149,2,0, -99,216,119,221,1,0,230,73,1,0,195,150,1,0,178,93, -1,0,35,151,2,0,100,216,69,221,2,0,100,216,26,222, -1,0,110,74,1,0,118,74,1,0,224,151,2,0,101,216, -10,220,1,0,178,74,2,0,101,216,150,220,1,0,41,152, -2,0,101,216,182,221,1,0,226,152,1,0,51,75,1,0, -41,153,1,0,167,153,1,0,194,153,1,0,254,153,1,0, -206,75,2,0,102,216,48,223,1,0,64,156,1,0,253,156, -1,0,206,76,1,0,237,76,1,0,103,157,2,0,104,216, -206,220,1,0,248,76,2,0,104,216,5,221,2,0,104,216, -14,222,2,0,104,216,145,222,1,0,86,77,1,0,254,158, -1,0,5,159,1,0,15,159,1,0,22,159,2,0,105,216, -0,222,255,0,66,230,121,0,8,3,100,0,126,1,2,0, -67,230,100,0,122,0,12,3,80,4,66,230,53,4,0,3, -81,4,66,230,53,4,8,3,83,4,66,230,51,4,1,3, -87,4,66,230,86,4,8,3,92,4,66,230,58,4,1,3, -93,4,66,230,56,4,0,3,94,4,66,230,67,4,6,3, -172,3,66,230,177,3,1,3,173,3,66,230,181,3,1,3, -174,3,66,230,183,3,1,3,175,3,66,230,185,3,1,3, -204,3,66,230,191,3,1,3,205,3,66,230,197,3,1,3, -206,3,66,230,201,3,1,3,0,31,67,0,177,3,19,3, -185,3,1,31,67,0,177,3,20,3,185,3,2,31,185,3, -2,0,68,0,177,3,19,3,0,3,185,3,3,31,185,3, -2,0,68,0,177,3,20,3,0,3,185,3,4,31,185,3, -2,0,68,0,177,3,19,3,1,3,185,3,5,31,185,3, -2,0,68,0,177,3,20,3,1,3,185,3,6,31,185,3, -2,0,68,0,177,3,19,3,66,3,185,3,7,31,185,3, -2,0,68,0,177,3,20,3,66,3,185,3,32,31,67,0, -183,3,19,3,185,3,33,31,67,0,183,3,20,3,185,3, -34,31,185,3,2,0,68,0,183,3,19,3,0,3,185,3, -35,31,185,3,2,0,68,0,183,3,20,3,0,3,185,3, -36,31,185,3,2,0,68,0,183,3,19,3,1,3,185,3, -37,31,185,3,2,0,68,0,183,3,20,3,1,3,185,3, -38,31,185,3,2,0,68,0,183,3,19,3,66,3,185,3, -39,31,185,3,2,0,68,0,183,3,20,3,66,3,185,3, -96,31,67,0,201,3,19,3,185,3,97,31,67,0,201,3, -20,3,185,3,98,31,185,3,2,0,68,0,201,3,19,3, -0,3,185,3,99,31,185,3,2,0,68,0,201,3,20,3, -0,3,185,3,100,31,185,3,2,0,68,0,201,3,19,3, -1,3,185,3,101,31,185,3,2,0,68,0,201,3,20,3, -1,3,185,3,102,31,185,3,2,0,68,0,201,3,19,3, -66,3,185,3,103,31,185,3,2,0,68,0,201,3,20,3, -66,3,185,3,112,31,67,0,177,3,0,3,185,3,172,3, -67,0,177,3,1,3,185,3,182,31,67,0,177,3,66,3, -185,3,112,31,66,230,177,3,0,3,116,31,67,0,183,3, -0,3,185,3,174,3,67,0,183,3,1,3,185,3,198,31, -67,0,183,3,66,3,185,3,114,31,66,230,181,3,0,3, -116,31,66,230,183,3,0,3,144,3,1,0,67,230,185,3, -8,3,1,3,118,31,66,230,185,3,0,3,176,3,1,0, -67,230,197,3,8,3,1,3,122,31,66,230,197,3,0,3, -124,31,67,0,201,3,0,3,185,3,206,3,67,0,201,3, -1,3,185,3,246,31,67,0,201,3,66,3,185,3,120,31, -66,230,191,3,0,3,124,31,66,230,201,3,0,3,229,0, -66,230,97,0,10,3,40,0,0,172,41,0,3,0,68,0, -40,0,0,17,97,17,41,0,40,0,152,176,41,0,3,0, -68,0,40,0,2,17,97,17,41,0,40,0,228,178,41,0, -3,0,68,0,40,0,3,17,97,17,41,0,40,0,124,183, -41,0,3,0,68,0,40,0,5,17,97,17,41,0,40,0, -200,185,41,0,3,0,68,0,40,0,6,17,97,17,41,0, -40,0,20,188,41,0,3,0,68,0,40,0,7,17,97,17, -41,0,40,0,172,192,41,0,3,0,68,0,40,0,9,17, -97,17,41,0,40,0,68,197,41,0,3,0,68,0,40,0, -11,17,97,17,41,0,40,0,144,199,41,0,3,0,68,0, -40,0,12,17,97,17,41,0,40,0,40,204,41,0,3,0, -68,0,40,0,14,17,97,17,41,0,40,0,116,206,41,0, -3,0,68,0,40,0,15,17,97,17,41,0,40,0,192,208, -41,0,3,0,68,0,40,0,16,17,97,17,41,0,40,0, -12,211,41,0,3,0,68,0,40,0,17,17,97,17,41,0, -40,0,88,213,41,0,3,0,68,0,40,0,18,17,97,17, -41,0,40,0,252,200,41,0,3,0,68,0,40,0,12,17, -110,17,41,0,40,0,36,198,4,200,41,0,4,0,71,0, -40,0,11,17,105,17,12,17,101,17,171,17,41,0,40,0, -36,198,196,214,41,0,4,0,70,0,40,0,11,17,105,17, -18,17,110,17,41,0,0,172,66,0,0,17,97,17,152,176, -66,0,2,17,97,17,228,178,66,0,3,17,97,17,124,183, -66,0,5,17,97,17,200,185,66,0,6,17,97,17,20,188, -66,0,7,17,97,17,172,192,66,0,9,17,97,17,68,197, -66,0,11,17,97,17,144,199,66,0,12,17,97,17,40,204, -66,0,14,17,97,17,116,206,66,0,15,17,97,17,192,208, -66,0,16,17,97,17,12,211,66,0,17,17,97,17,88,213, -66,0,18,17,97,17,56,204,224,172,2,0,69,0,14,17, -97,17,183,17,0,17,105,17,252,200,88,199,2,0,68,0, -12,17,110,17,11,17,116,17,176,198,66,0,11,17,110,17, -162,48,209,48,252,48,200,48,4,0,69,0,162,48,207,48, -154,48,252,48,200,48,162,48,243,48,218,48,162,48,4,0, -69,0,162,48,243,48,216,48,154,48,162,48,164,48,203,48, -243,48,176,48,4,0,69,8,164,48,203,48,243,48,175,48, -153,48,168,48,185,48,175,48,252,48,201,48,5,0,70,8, -168,48,185,48,175,48,252,48,200,48,153,48,172,48,68,0, -171,48,153,48,237,48,243,48,172,48,68,0,171,48,153,48, -243,48,222,48,174,48,172,48,2,0,68,8,173,48,153,48, -171,48,153,48,174,48,68,0,173,48,153,48,203,48,252,48, -174,48,235,48,192,48,252,48,4,0,70,0,173,48,153,48, -235,48,191,48,153,48,252,48,173,48,237,48,176,48,233,48, -224,48,5,0,70,0,173,48,237,48,175,48,153,48,233,48, -224,48,176,48,68,0,175,48,153,48,233,48,224,48,176,48, -70,0,175,48,153,48,233,48,224,48,200,48,243,48,175,48, -235,48,188,48,164,48,237,48,5,0,70,0,175,48,235,48, -187,48,153,48,164,48,237,48,179,48,252,48,221,48,3,0, -68,8,179,48,252,48,219,48,154,48,183,48,234,48,243,48, -176,48,4,0,69,8,183,48,234,48,243,48,175,48,153,48, -192,48,68,0,191,48,153,48,252,48,185,48,199,48,67,0, -198,48,153,48,183,48,201,48,67,0,200,48,153,48,235,48, -209,48,70,0,207,48,154,48,252,48,187,48,243,48,200,48, -209,48,68,0,207,48,154,48,252,48,196,48,208,48,69,0, -207,48,153,48,252,48,236,48,235,48,212,48,70,0,210,48, -154,48,162,48,185,48,200,48,235,48,212,48,68,0,210,48, -154,48,175,48,235,48,212,48,67,0,210,48,154,48,179,48, -211,48,67,0,210,48,153,48,235,48,213,48,161,48,233,48, -195,48,201,48,5,0,70,8,213,48,161,48,233,48,195,48, -200,48,153,48,214,48,70,0,213,48,153,48,195,48,183,48, -167,48,235,48,218,48,67,0,216,48,154,48,189,48,218,48, -68,0,216,48,154,48,203,48,210,48,218,48,68,0,216,48, -154,48,243,48,185,48,218,48,252,48,184,48,3,0,69,8, -216,48,154,48,252,48,183,48,153,48,217,48,68,0,216,48, -153,48,252,48,191,48,221,48,69,0,219,48,154,48,164,48, -243,48,200,48,220,48,68,0,219,48,153,48,235,48,200,48, -221,48,243,48,201,48,3,0,69,8,219,48,154,48,243,48, -200,48,153,48,223,48,234,48,208,48,252,48,235,48,5,0, -70,0,223,48,234,48,207,48,153,48,252,48,235,48,225,48, -172,48,2,0,67,8,225,48,171,48,153,48,225,48,172,48, -200,48,243,48,4,0,69,0,225,48,171,48,153,48,200,48, -243,48,228,48,252,48,201,48,3,0,68,8,228,48,252,48, -200,48,153,48,235,48,212,48,252,48,3,0,68,0,235,48, -210,48,154,48,252,48,235,48,252,48,214,48,235,48,4,0, -69,0,235,48,252,48,213,48,153,48,235,48,236,48,243,48, -200,48,178,48,243,48,5,0,70,0,236,48,243,48,200,48, -177,48,153,48,243,48,192,6,66,230,213,6,84,6,211,6, -66,230,210,6,84,6,38,6,67,0,74,6,84,6,39,6, -38,6,67,0,74,6,84,6,213,6,38,6,67,0,74,6, -84,6,72,6,38,6,67,0,74,6,84,6,199,6,38,6, -67,0,74,6,84,6,198,6,38,6,67,0,74,6,84,6, -200,6,38,6,67,0,74,6,84,6,208,6,38,6,67,0, -74,6,84,6,73,6,38,6,67,0,74,6,84,6,44,6, -38,6,67,0,74,6,84,6,45,6,38,6,67,0,74,6, -84,6,69,6,38,6,67,0,74,6,84,6,74,6,38,6, -67,0,74,6,84,6,49,6,38,6,67,0,74,6,84,6, -50,6,38,6,67,0,74,6,84,6,70,6,38,6,67,0, -74,6,84,6,46,6,38,6,67,0,74,6,84,6,71,6, -34,6,66,230,39,6,83,6,35,6,66,230,39,6,84,6, -36,6,66,230,72,6,84,6,37,6,66,220,39,6,85,6, -38,6,66,230,74,6,84,6,68,6,34,6,2,0,67,230, -68,6,39,6,83,6,68,6,35,6,2,0,67,230,68,6, -39,6,84,6,68,6,37,6,2,0,67,220,68,6,39,6, -85,6,199,48,66,8,198,48,153,48,230,230,129,230,0,3, -230,230,129,230,1,3,230,230,129,230,19,3,230,230,130,230, -8,3,1,3,0,129,130,130,113,15,114,15,0,129,130,132, -113,15,116,15,0,129,130,130,113,15,128,15,1,0,170,17, -1,0,172,17,1,0,173,17,1,0,176,17,1,0,177,17, -1,0,178,17,1,0,179,17,1,0,180,17,1,0,181,17, -1,0,97,17,1,0,98,17,1,0,99,17,1,0,100,17, -1,0,101,17,1,0,102,17,1,0,103,17,1,0,104,17, -1,0,105,17,1,0,106,17,1,0,107,17,1,0,108,17, -1,0,109,17,1,0,110,17,1,0,111,17,1,0,112,17, -1,0,113,17,1,0,114,17,1,0,115,17,1,0,116,17, -1,0,117,17,0,8,129,8,153,48,0,8,129,8,154,48, -0,1,128,255,224,239,67,127,223,112,207,135,199,230,102,70, -100,70,102,91,18,12,0,4,0,0,0,99,33,2,41,174, -194,192,255,239,203,114,191,0,0,0,0,0,0,0,64,0, -128,136,0,0,254,8,0,7,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,152,0,193,102,224,128, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,4, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39, -220,2,159,176,170,170,170,170,170,170,170,170,170,170,170,170, -32,0,218,39,20,0,0,0,0,0,2,0,78,114,109,50, -4,0,0,0,12,1,0,0,0,0,0,0,0,0,0,0, -80,0,0,0,148,143,0,0,16,227,0,0,16,228,0,0, -16,228,0,0,16,228,0,0,16,228,0,0,16,228,0,0, -65,0,0,0,65,0,0,0,168,6,0,0,80,22,0,0, -124,83,0,0,0,252,0,0,96,9,0,0,122,74,0,0, -194,82,0,0,120,83,0,0,173,0,0,0,0,0,0,0, -51,105,114,84,0,0,222,10,188,60,128,3,91,21,1,7, -0,0,64,0,128,0,192,0,0,1,63,1,127,1,188,1, -252,1,59,2,105,2,162,2,226,2,34,3,98,3,162,3, -226,3,33,4,95,4,159,4,217,4,10,5,67,5,131,5, -189,5,245,5,46,6,110,6,160,6,223,6,237,6,45,7, -107,7,171,7,214,7,20,8,84,8,145,8,205,8,12,9, -75,9,137,9,200,9,5,10,69,10,132,10,194,10,1,11, -65,11,126,11,181,11,245,11,53,12,116,12,180,12,244,12, -51,13,114,13,177,13,241,13,49,14,110,14,174,14,236,14, -44,15,46,6,103,15,160,15,224,15,1,16,55,16,116,16, -46,6,171,16,226,16,32,17,79,17,116,17,151,17,183,17, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,246,17,22,18,86,18,150,18,214,18,12,19, -76,19,22,18,108,19,156,19,220,19,28,20,92,20,146,20, -187,20,220,20,28,21,91,21,155,21,219,21,27,22,71,22, -131,22,9,18,195,22,3,23,67,23,131,23,188,23,252,23, -60,24,124,24,188,24,252,24,60,25,124,25,188,25,252,25, -60,26,123,26,187,26,251,26,59,27,123,27,187,27,238,27, -43,28,106,28,170,28,224,28,14,29,46,6,46,6,46,6, -57,29,121,29,185,29,249,29,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -45,30,58,30,46,6,113,30,46,6,214,18,155,30,46,6, -219,30,11,31,75,31,103,31,160,31,208,31,16,32,48,32, -46,6,112,32,176,32,208,32,16,33,80,33,144,33,208,33, -16,34,80,34,142,34,206,34,14,35,78,35,142,35,206,35, -14,36,78,36,142,36,206,36,14,37,78,37,142,37,206,37, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,156,19,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,14,38, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,6,18,78,38,46,6,46,6,46,6,46,6, -98,38,162,38,226,38,2,39,66,39,116,39,178,39,242,39, -44,40,108,40,46,6,172,40,222,40,12,41,76,41,140,41, -204,41,12,42,40,42,104,42,167,42,215,42,23,43,87,43, -151,43,159,43,167,43,175,43,155,43,163,43,171,43,151,43, -159,43,167,43,175,43,155,43,163,43,171,43,151,43,159,43, -167,43,175,43,155,43,163,43,171,43,151,43,159,43,167,43, -175,43,155,43,163,43,171,43,151,43,159,43,167,43,175,43, -155,43,163,43,171,43,151,43,159,43,167,43,175,43,155,43, -163,43,171,43,151,43,159,43,167,43,175,43,155,43,163,43, -171,43,151,43,159,43,167,43,175,43,155,43,163,43,171,43, -151,43,159,43,167,43,175,43,155,43,163,43,171,43,151,43, -159,43,167,43,175,43,155,43,163,43,171,43,151,43,159,43, -167,43,175,43,155,43,163,43,171,43,151,43,159,43,167,43, -175,43,155,43,163,43,171,43,151,43,159,43,167,43,175,43, -155,43,163,43,171,43,151,43,159,43,167,43,175,43,155,43, -163,43,171,43,151,43,159,43,167,43,175,43,155,43,163,43, -171,43,151,43,159,43,167,43,175,43,155,43,163,43,171,43, -151,43,159,43,167,43,175,43,155,43,163,43,171,43,151,43, -159,43,167,43,175,43,155,43,163,43,171,43,151,43,159,43, -167,43,175,43,155,43,163,43,171,43,151,43,159,43,167,43, -175,43,155,43,163,43,171,43,151,43,159,43,167,43,175,43, -155,43,163,43,171,43,151,43,159,43,167,43,175,43,155,43, -163,43,171,43,151,43,159,43,167,43,175,43,155,43,163,43, -171,43,151,43,159,43,167,43,175,43,155,43,163,43,171,43, -151,43,159,43,167,43,175,43,155,43,163,43,227,43,28,44, -92,44,156,44,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,220,44,91,21,91,21, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,28,45,92,45,156,45,220,45, -28,46,92,46,156,46,220,46,28,47,92,47,154,47,216,47, -24,48,88,48,152,48,216,48,24,49,88,49,152,49,216,49, -24,50,88,50,152,50,215,50,23,51,87,51,151,51,215,51, -251,9,25,10,54,10,85,10,117,10,117,10,130,10,159,10, -189,10,189,10,189,10,189,10,189,10,189,10,189,10,189,10, -189,10,189,10,189,10,189,10,189,10,189,10,189,10,189,10, -189,10,189,10,189,10,189,10,189,10,189,10,189,10,189,10, -189,10,189,10,189,10,189,10,189,10,189,10,189,10,189,10, -189,10,189,10,189,10,189,10,189,10,189,10,189,10,189,10, -189,10,189,10,189,10,189,10,221,10,23,52,46,6,36,52, -44,52,160,5,160,5,91,21,91,21,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,20,7,5,20,46,6,46,6, -199,12,46,6,46,6,46,6,46,6,159,5,162,5,127,0, -91,21,91,21,46,6,46,6,59,52,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,161,5,46,6, -46,6,46,6,127,0,20,6,162,5,46,6,46,6,188,9, -46,6,20,7,46,6,46,6,75,52,46,6,156,7,46,6, -46,6,4,20,178,5,91,21,91,21,91,52,91,52,99,52, -46,6,46,6,46,6,46,6,46,6,46,6,160,5,21,7, -91,52,91,52,115,52,46,6,162,5,46,6,46,6,23,7, -46,6,46,6,46,6,180,5,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -24,7,46,6,178,5,23,7,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,125,52,46,6,46,6, -135,52,46,6,164,7,46,6,46,6,46,6,159,5,140,5, -91,21,91,21,91,21,46,6,150,52,46,6,111,11,46,6, -50,11,91,21,91,21,91,21,91,21,46,6,46,6,46,6, -166,5,46,6,33,5,46,6,46,6,162,52,210,9,46,6, -178,52,22,7,22,7,46,6,46,6,46,6,46,6,91,21, -91,21,46,6,46,6,194,52,24,7,46,6,46,6,46,6, -197,12,46,6,146,12,46,6,57,11,46,6,208,52,186,5, -91,21,91,21,91,21,91,21,91,21,46,6,46,6,46,6, -46,6,22,7,91,21,91,21,91,21,224,52,224,52,224,52, -237,52,46,6,46,6,46,6,52,10,46,6,46,6,253,52, -21,7,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,159,5,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,46,6, -46,6,23,7,46,6,13,53,28,53,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,24,7,46,6, -46,6,46,6,46,6,44,53,33,5,46,6,58,53,46,6, -74,53,87,53,99,53,126,0,46,6,22,7,21,7,81,8, -46,6,113,53,129,53,24,7,46,6,46,6,142,53,46,6, -46,6,46,6,46,6,158,53,46,6,34,5,123,0,46,6, -168,7,46,6,174,53,91,21,91,21,91,21,91,21,182,53, -156,7,21,7,46,6,46,6,46,6,197,53,21,7,205,8, -221,8,161,7,213,53,228,53,244,53,1,54,9,54,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,46,6, -46,6,46,6,46,6,25,54,32,54,91,21,91,21,46,6, -46,6,46,6,48,54,63,54,21,7,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,46,6, -46,6,79,54,95,54,111,54,160,5,91,21,91,21,46,6, -46,6,46,6,97,12,123,0,21,7,161,5,91,21,46,6, -46,6,46,6,121,54,21,7,91,21,91,21,91,21,46,6, -141,12,20,41,46,6,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,46,6, -46,6,46,6,118,54,91,21,91,21,91,21,91,21,91,21, -91,21,137,54,137,54,46,6,46,6,46,6,167,18,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,144,12,46,6,46,6,144,12,153,54,91,21,46,6, -46,6,46,6,46,40,24,41,46,6,46,6,46,6,46,6, -21,19,125,0,91,21,46,6,46,6,46,6,22,7,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,161,7, -46,6,46,6,169,54,178,5,46,6,161,5,46,6,46,6, -33,5,162,7,24,7,91,21,91,21,91,21,91,21,242,9, -46,6,46,6,185,54,200,54,21,7,216,54,46,6,159,5, -230,54,21,7,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -46,6,22,7,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,46,6,46,6, -46,6,38,12,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,21,7,91,21,91,21,91,21,91,21, -91,21,91,21,46,6,46,6,46,6,46,6,46,6,46,6, -159,5,123,0,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,124,0,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,46,6,46,6,159,5,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,46,6,46,6,46,6,46,6,24,7,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,46,6,46,6,46,6,22,7,46,6,159,5, -164,5,91,21,91,21,91,21,91,21,91,21,91,21,46,6, -160,5,246,54,46,6,46,6,46,6,8,22,178,5,160,7, -168,7,52,11,46,6,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,137,54,137,54,46,6,46,6,46,6,20,7, -91,21,91,21,91,21,91,21,91,21,91,21,46,6,46,6, -46,6,46,6,163,5,46,6,46,6,46,6,47,10,46,6, -91,21,91,21,91,21,91,21,124,0,91,21,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,23,7,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,125,0,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,159,5, -91,21,91,21,91,21,125,0,2,55,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,162,5,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,46,6,46,6, -46,6,46,6,46,6,20,7,161,5,22,7,18,55,34,55, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,178,5,46,6,46,6,145,12, -46,6,46,6,50,55,66,55,80,55,93,55,46,6,105,55, -119,55,135,55,46,6,22,7,91,21,46,6,46,6,46,6, -46,6,149,55,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,46,6,124,0,46,6,46,6,46,6, -46,6,46,6,24,7,46,6,22,7,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,239,29,255,29,245,29, -5,30,251,29,165,55,1,30,247,29,7,30,177,55,193,55, -209,55,225,55,239,29,255,29,245,29,237,55,253,55,241,29, -233,55,13,56,29,56,253,29,243,29,3,30,249,29,239,29, -255,29,245,29,5,30,251,29,241,29,1,30,247,29,7,30, -253,29,243,29,3,30,249,29,239,29,255,29,245,29,45,56, -61,56,77,56,93,56,109,56,67,56,83,56,99,56,57,56, -73,56,89,56,105,56,63,56,53,56,95,56,53,56,69,56, -85,56,125,56,141,56,147,56,143,56,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,162,5,184,5,34,5, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,163,56,171,56,185,56, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,46,6,46,6,161,5, -201,56,164,5,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,46,6,46,6, -76,7,50,11,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,163,10,217,56, -91,21,91,21,233,56,233,56,247,56,46,6,3,57,164,5, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,34,5,46,6,46,6,46,6,123,0,91,21, -91,21,91,21,91,21,34,5,46,6,46,6,160,5,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,19,57,35,57,51,57,67,57,81,57, -97,57,113,57,129,57,145,57,161,57,176,57,161,57,91,21, -91,21,91,21,126,0,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,46,6,46,6,162,5,46,6,46,6, -46,6,46,6,46,6,46,6,124,0,159,5,34,5,34,5, -34,5,46,6,178,5,192,57,213,29,208,57,239,29,224,57, -46,6,240,57,46,6,46,6,0,58,161,5,91,21,91,21, -91,21,141,5,46,6,16,58,32,58,48,58,64,58,80,58, -96,58,178,5,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,178,5,161,5,20,7,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,124,0,46,6,46,6, -46,6,46,6,46,6,22,7,162,5,91,21,162,5,46,6, -46,6,46,6,23,7,21,7,46,6,46,6,23,7,46,6, -160,5,91,21,91,21,91,21,91,21,91,21,158,7,46,6, -46,6,46,6,46,6,46,6,46,6,112,58,46,6,46,6, -125,58,46,6,141,12,46,6,46,6,46,6,46,6,46,6, -124,0,160,5,139,58,125,0,178,5,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -24,7,91,21,91,21,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,123,0,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,160,5,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,126,0,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,46,6,46,6,46,6,46,6,46,6,46,6,46,6, -46,6,127,0,91,21,155,58,171,58,187,58,203,58,219,58, -235,58,251,58,11,59,27,59,43,59,59,59,75,59,91,59, -107,59,123,59,139,59,155,59,171,59,187,59,203,59,219,59, -235,59,251,59,11,60,27,60,43,60,59,60,75,60,91,60, -107,60,123,60,139,60,155,60,171,60,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,91,21,91,21,91,21,91,21,91,21, -91,21,91,21,91,21,24,50,24,50,24,50,24,50,24,50, -24,50,24,50,24,50,24,50,24,50,24,50,24,50,24,50, -24,50,24,50,91,21,53,4,85,4,117,4,149,4,181,4, -213,4,245,4,15,5,47,5,79,5,111,5,143,5,175,5, -207,5,239,5,10,6,208,0,42,6,74,6,128,3,128,3, -128,3,128,3,128,3,208,0,208,0,106,6,128,3,128,3, -128,3,128,3,128,3,208,0,138,6,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,208,0,170,6,128,3, -198,6,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,230,6,208,0,6,7,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,38,7,61,7, -128,3,128,3,128,3,128,3,93,7,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,125,7,157,7,189,7, -221,7,208,0,253,7,128,3,128,3,29,8,50,8,128,3, -128,3,82,8,128,3,107,8,139,8,171,8,203,8,208,0, -222,8,254,8,27,9,128,3,128,3,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,59,9,208,0,208,0, -208,0,208,0,208,0,208,0,208,0,75,9,106,9,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,128,9,208,0, -208,0,208,0,208,0,208,0,208,0,208,0,208,0,208,0, -208,0,208,0,208,0,208,0,139,9,128,3,128,3,128,3, -128,3,128,3,128,3,171,9,203,9,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,128,3,128,3,128,3, -128,3,128,3,128,3,128,3,128,3,219,9,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,4,0,8,0,12,0,1,0,1,0,248,250, -248,250,248,250,248,250,248,250,248,250,248,250,248,250,248,250, -248,250,248,250,248,250,248,250,248,250,248,250,248,250,249,250, -248,250,248,250,248,250,248,250,248,250,248,250,248,250,248,250, -248,250,1,0,1,0,1,0,1,0,1,0,1,0,16,0, -80,0,92,0,112,0,136,0,204,0,208,0,236,0,12,1, -68,1,76,1,96,1,120,1,132,1,168,1,232,1,1,0, -240,1,16,2,44,2,76,2,152,2,160,2,188,2,196,2, -236,2,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,85,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,88,22,1,0, -94,22,1,0,1,0,122,83,1,0,98,22,1,0,1,0, -105,22,109,22,112,22,119,22,1,0,1,0,122,22,129,22, -132,22,1,0,137,22,145,22,153,22,1,0,252,250,252,250, -252,250,252,250,252,250,252,250,248,250,252,250,252,250,252,250, -252,250,252,250,252,250,252,250,252,250,252,250,249,250,252,250, -252,250,252,250,252,250,252,250,252,250,1,0,248,250,252,250, -252,250,252,250,252,250,252,250,249,250,1,0,98,9,104,9, -170,6,110,9,192,6,202,6,4,3,212,6,116,9,122,9, -222,6,128,9,134,9,140,9,146,9,244,6,1,0,152,9, -158,9,164,9,254,6,20,7,38,7,1,0,12,3,170,9, -176,9,182,9,48,7,188,9,1,0,194,9,4,250,200,9, -4,250,70,7,4,250,207,9,4,250,212,9,4,250,218,9, -4,250,224,9,4,250,230,9,4,250,236,9,1,250,1,0, -4,250,92,7,4,250,242,9,4,250,248,9,4,250,255,9, -4,250,4,10,4,250,10,10,4,250,16,10,4,250,22,10, -4,250,29,10,4,250,34,10,1,250,1,0,4,250,40,10, -4,250,46,10,4,250,52,10,4,250,59,10,160,22,1,0, -166,22,166,22,4,250,65,10,4,250,71,10,1,0,4,250, -76,10,4,250,83,10,4,250,88,10,173,22,1,250,1,0, -4,250,94,10,4,250,101,10,4,250,106,10,178,22,1,250, -1,0,4,250,106,7,4,250,112,10,4,250,118,10,1,250, -1,0,4,250,124,10,4,250,131,10,4,250,136,10,4,250, -120,7,4,250,142,10,4,250,149,10,4,250,130,7,4,250, -155,10,4,250,160,10,1,250,1,0,4,250,140,7,4,250, -150,7,4,250,166,10,4,250,172,10,4,250,178,10,4,250, -185,10,4,250,190,10,4,250,196,10,124,74,4,250,202,10, -4,250,208,10,4,250,214,10,184,22,1,0,189,22,1,250, -1,0,1,250,1,0,193,22,1,250,1,0,197,22,201,22, -1,250,1,0,1,0,205,22,209,22,213,22,1,250,1,0, -217,22,221,22,1,0,225,22,229,22,1,250,1,0,1,0, -1,0,233,22,237,22,1,0,241,22,4,250,160,7,1,250, -1,0,1,250,1,0,245,22,1,250,1,0,249,22,1,0, -1,0,1,250,1,0,253,22,4,250,186,7,1,23,5,23, -1,250,1,0,1,250,1,0,8,23,1,250,1,0,1,0, -1,0,1,250,1,0,1,0,1,0,1,0,136,74,136,74, -136,74,12,23,12,23,12,23,18,23,18,23,18,23,4,250, -220,10,4,250,226,10,4,250,232,10,4,250,238,10,4,250, -246,10,4,250,0,11,4,250,10,11,4,250,20,11,1,0, -4,250,30,11,4,250,40,11,4,250,49,11,1,250,1,0, -4,250,54,11,4,250,60,11,4,250,212,7,4,250,69,11, -4,250,77,11,83,11,24,23,24,23,24,23,4,250,88,11, -31,23,57,248,4,250,94,11,4,250,102,11,4,250,111,11, -4,250,117,11,4,250,122,11,4,250,128,11,4,250,134,11, -4,250,140,11,4,250,146,11,4,250,152,11,4,250,158,11, -4,250,164,11,4,250,170,11,4,250,176,11,4,250,182,11, -4,250,188,11,4,250,194,11,4,250,200,11,1,250,1,0, -4,250,206,11,35,23,1,0,1,250,1,0,1,250,1,0, -4,250,222,7,4,250,232,7,4,250,214,11,4,250,224,11, -4,250,242,7,4,250,234,11,4,250,242,11,1,0,1,0, -1,0,1,0,1,0,1,0,39,23,1,250,1,0,43,23, -47,23,1,0,1,250,1,0,51,23,55,23,59,23,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,16,3,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,62,23, -67,23,70,23,74,23,33,248,41,248,81,248,78,23,82,23, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -86,23,92,23,98,23,104,23,110,23,116,23,1,0,1,0, -221,22,122,23,184,22,126,23,131,23,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,253,204,253,204,253,204,253,204,253,204,255,204,253,204,253, -204,253,204,253,204,253,204,253,204,253,204,255,204,255,204,253, -204,255,204,253,204,255,204,253,204,253,208,255,184,255,184,255, -184,255,184,255,208,255,176,253,184,255,184,255,184,255,184,255, -184,255,148,255,148,255,184,253,184,253,184,253,184,253,148,253, -148,253,184,255,184,255,184,255,184,255,184,253,184,253,184,255, -184,253,184,253,184,255,184,255,2,254,2,254,2,254,2,254, -2,252,184,255,184,255,184,255,184,255,204,255,204,255,204,255, -196,82,202,82,204,253,208,82,214,82,136,23,204,255,184,255, -184,255,184,255,204,255,204,255,204,255,184,255,184,255,122,83, -204,255,204,255,204,255,184,255,184,255,184,255,184,255,204,255, -208,255,184,255,184,255,204,255,210,255,212,255,212,255,210,255, -212,255,212,255,210,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -1,250,1,0,1,250,1,0,141,23,1,0,1,250,1,0, -81,22,81,22,144,23,1,0,1,0,1,0,151,23,155,23, -81,22,81,22,81,22,81,22,112,22,158,23,44,251,167,23, -36,251,36,251,36,251,81,22,252,251,81,22,244,251,244,251, -251,11,248,250,249,250,249,250,249,250,248,250,249,250,248,250, -249,250,248,250,249,250,249,250,249,250,249,250,249,250,248,250, -249,250,248,250,81,22,249,250,249,250,248,250,249,250,249,250, -249,250,248,250,252,250,252,250,3,12,9,12,15,12,21,12, -29,12,20,3,1,0,1,0,1,0,48,3,1,0,64,3, -1,0,84,3,1,0,1,0,1,0,1,0,1,0,116,3, -1,0,132,3,1,0,1,0,1,0,140,3,1,0,1,0, -1,0,172,3,252,7,14,8,37,12,43,12,49,12,57,250, -9,249,49,249,144,249,204,249,180,249,129,249,73,249,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -73,248,120,248,129,248,1,0,25,248,248,247,1,0,1,250, -1,0,73,248,1,250,1,0,1,0,171,23,175,23,179,23, -146,74,154,74,183,23,162,74,187,23,191,23,194,23,170,74, -199,23,203,23,207,23,211,23,178,74,186,74,194,74,215,23, -248,250,249,250,249,250,248,250,249,250,248,250,248,250,248,250, -248,250,252,250,248,250,249,250,249,250,249,250,248,250,249,250, -249,250,249,250,249,250,248,250,249,250,249,250,249,250,248,250, -249,250,249,250,249,250,248,250,249,250,248,250,249,250,249,250, -192,3,1,0,1,0,200,3,1,0,204,3,216,3,224,3, -228,3,55,12,244,3,1,0,1,0,1,0,248,3,1,0, -1,0,1,0,252,3,1,0,1,0,1,0,12,4,1,0, -1,0,1,0,16,4,1,0,20,4,1,0,1,0,61,12, -67,12,1,0,73,12,1,0,1,0,24,4,79,12,1,0, -1,0,1,0,1,0,85,12,91,12,97,12,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,0,250,28,4,4,250,103,12,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,1,0,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,81,22,4,250,109,12, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,0,4,250,115,12,4,250, -121,12,1,250,1,0,4,250,127,12,0,250,32,4,4,250, -133,12,4,250,139,12,4,250,145,12,1,250,1,0,4,250, -151,12,4,250,157,12,4,250,163,12,0,250,36,4,4,250, -169,12,4,250,175,12,4,250,181,12,4,250,187,12,4,250, -193,12,4,250,199,12,1,250,1,0,4,250,205,12,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,81,22, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -219,23,1,0,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,81,22,184,255,204,255,204,255,204,255,204,255,184,255, -204,255,204,255,204,255,188,255,184,255,204,255,204,255,204,255, -204,255,204,255,204,255,184,255,184,255,184,255,184,255,184,255, -184,255,204,255,204,255,184,255,204,255,204,255,188,255,200,255, -204,255,20,254,22,254,24,254,26,254,28,254,30,254,32,254, -34,254,36,254,38,254,38,254,40,254,42,254,44,254,1,0, -46,254,1,0,48,254,50,254,1,0,204,255,184,255,1,0, -36,254,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,60,254,62,254,64,254,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,210,12,216,12,223,12, -229,12,235,12,40,4,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,52,4,1,0,56,4,54,254,56,254, -58,254,60,254,62,254,64,254,66,254,68,254,204,253,204,253, -184,253,184,255,204,255,204,255,204,255,204,255,204,255,184,255, -204,255,204,255,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,70,254,1,0,1,0,1,0,1,0, -225,23,231,23,237,23,243,23,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,241,12,60,4,247,12,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,64,4,253,12, -1,0,68,4,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,81,22,1,0,204,255,204,255,204,255,204,255,184,255, -204,255,1,0,1,0,204,255,204,255,1,0,184,255,204,255, -204,255,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,1,0,72,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,184,255, -204,255,204,255,184,255,204,255,204,255,184,255,184,255,184,255, -204,255,184,255,184,255,204,255,184,255,204,255,204,255,184,255, -204,255,184,255,204,255,184,255,204,255,184,255,204,255,204,255, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,184,255,204,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -184,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,1,0,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,1,0,204,255,204,255, -204,255,1,0,204,255,204,255,204,255,204,255,204,255,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,184,255,184,255,81,22,81,22,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,184,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,81,22,184,255,204,255,204,255, -184,255,204,255,204,255,184,255,204,255,204,255,204,255,184,255, -184,255,184,255,54,254,56,254,58,254,204,255,204,255,204,255, -184,255,204,255,204,255,184,255,184,255,204,255,204,255,204,255, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,72,4,3,13,1,0,1,0,1,0,1,0, -1,0,1,0,76,4,9,13,1,0,80,4,15,13,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,252,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,1,0,1,0,1,0, -204,255,184,255,204,255,204,255,1,0,1,0,1,0,248,23, -254,23,4,24,10,24,16,24,22,24,28,24,34,24,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,81,22,81,22,14,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,84,4,1,0,81,22,81,22,21,13,27,13,18,254, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,0,252,81,22,81,22,81,22,81,22,40,24,46,24, -81,22,52,24,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,81,22,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -1,0,1,0,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,1,0,58,24,81,22,1,0,64,24, -81,22,1,0,1,0,81,22,81,22,14,254,81,22,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,1,0,1,0, -81,22,81,22,1,0,1,0,18,254,81,22,81,22,81,22, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -70,24,76,24,82,24,1,0,81,22,88,24,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,14,254,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,81,22,1,0,1,0, -18,254,81,22,81,22,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,14,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,92,4,33,13,81,22,81,22,39,13,45,13,18,254, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -0,252,0,252,81,22,81,22,81,22,81,22,94,24,100,24, -81,22,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,81,22,1,0,1,0, -1,0,81,22,104,4,1,0,51,13,1,0,81,22,81,22, -81,22,1,0,1,0,81,22,1,0,81,22,1,0,1,0, -81,22,81,22,81,22,1,0,1,0,81,22,81,22,81,22, -1,0,1,0,1,0,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,0,252,1,0, -1,0,1,0,81,22,81,22,81,22,108,4,116,4,1,0, -81,22,57,13,63,13,69,13,18,254,81,22,81,22,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,0,252,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,81,22,120,4,1,0,75,13,81,22,1,0,1,0, -1,0,18,254,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,168,254,182,252,81,22,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,14,254, -1,0,1,0,124,4,81,13,1,0,0,252,1,0,1,0, -81,22,128,4,87,13,93,13,81,22,32,8,101,13,1,0, -18,254,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -0,252,0,252,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,81,22,1,0,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,18,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,81,22, -140,4,148,4,1,0,81,22,109,13,115,13,121,13,18,254, -1,0,1,0,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,18,252,81,22,81,22,81,22, -81,22,0,252,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,81,22,1,0,152,4,127,13,1,0,42,8,135,13, -143,13,0,252,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,107,24,1,0,1,0,1,0, -1,0,206,254,206,254,18,254,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -214,254,214,254,214,254,214,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,81,22,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,113,24,1,0,1,0,1,0,1,0,236,254, -236,254,18,254,1,0,1,0,1,0,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,81,22,244,254, -244,254,244,254,244,254,1,0,1,0,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,119,24,125,24,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,241,249,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,184,255, -184,255,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,184,255,1,0,184,255,1,0, -176,255,1,0,1,0,1,0,1,0,1,0,1,0,131,24, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,137,24,1,0,1,0,1,0,1,0,143,24,1,0, -1,0,1,0,1,0,149,24,1,0,1,0,1,0,1,0, -155,24,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,161,24,1,0,1,0, -1,0,81,22,81,22,81,22,81,22,2,255,4,255,222,82, -8,255,230,82,166,24,172,24,180,24,186,24,4,255,4,255, -4,255,4,255,1,0,1,0,4,255,238,82,204,255,204,255, -18,254,1,0,204,255,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,195,24, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,201,24,1,0,1,0,1,0,1,0,207,24,1,0, -1,0,1,0,1,0,213,24,1,0,1,0,1,0,1,0, -219,24,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,225,24,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -184,255,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,164,4, -149,13,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -0,252,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,14,254,1,0,18,254,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,231,24,81,22,81,22, -81,22,81,22,81,22,235,24,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,249,248,1,0,1,0,1,0,2,0,2,0, -2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0, -2,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,0,254,0,254,0,254, -0,254,0,254,0,254,0,254,0,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,81,22,1,0,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,204,255, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,185,249,185,249,185,249, -185,249,185,249,185,249,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,81,22,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,122,83,122,83,122,83, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,200,255,1,0,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,188,255,204,255,184,255,81,22,81,22, -81,22,81,22,1,0,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,184,255,1,0,1,0,1,0,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,81,22, -81,22,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,204,255,204,255,204,255,204,255,204,255,184,255, -184,255,184,255,184,255,184,255,184,255,204,255,204,255,184,255, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,168,4,155,13, -172,4,161,13,176,4,167,13,180,4,173,13,184,4,179,13, -1,0,1,0,188,4,185,13,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,0,252,1,0, -1,0,1,0,1,0,192,4,191,13,196,4,197,13,200,4, -204,4,203,13,209,13,208,4,215,13,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,204,255,184,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,18,254,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,14,254,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,18,254, -18,254,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -14,254,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,239,24,243,24,246,24,251,24,255,24,255,24,3,25, -7,25,11,25,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,15,25,19,25,23,25,27,25,31,25,35,25,39,25, -43,25,47,25,51,25,55,25,59,25,63,25,67,25,71,25, -75,25,79,25,83,25,87,25,91,25,95,25,99,25,103,25, -107,25,111,25,115,25,119,25,123,25,127,25,131,25,135,25, -139,25,143,25,147,25,151,25,155,25,159,25,163,25,167,25, -171,25,175,25,179,25,183,25,81,22,81,22,187,25,191,25, -195,25,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,204,255,204,255,204,255,1,0,2,254,184,255,184,255, -184,255,184,255,184,255,204,255,204,255,184,255,184,255,184,255, -184,255,204,255,1,0,2,254,2,254,2,254,2,254,2,254, -2,254,2,254,1,0,1,0,1,0,1,0,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,1,0,1,0, -1,0,204,255,204,255,1,0,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,94,22,198,25,202,25, -1,0,206,25,210,25,205,22,214,25,62,23,218,25,70,23, -222,25,122,23,226,25,230,25,1,0,132,22,235,25,238,25, -74,23,242,25,246,25,78,23,94,22,251,25,255,25,3,26, -202,25,206,25,210,25,209,22,213,22,7,26,214,25,1,0, -222,25,226,25,11,26,132,22,193,22,9,248,9,248,238,25, -242,25,246,25,25,248,233,22,14,26,65,248,19,26,23,26, -27,26,31,26,35,26,218,25,74,23,246,25,14,26,19,26, -23,26,38,26,31,26,35,26,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,43,26,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,47,26,50,26,55,26, -59,26,7,26,62,26,67,26,71,26,75,26,229,22,225,22, -79,26,153,248,83,26,87,26,209,248,91,26,95,26,99,26, -237,22,103,26,107,26,241,22,111,26,115,26,249,22,119,26, -55,23,1,23,123,26,5,23,59,23,126,26,131,26,135,26, -8,23,139,26,204,255,204,255,184,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,184,255,204,255,204,255,212,255, -172,255,184,255,148,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -208,255,200,255,200,255,184,255,81,22,204,255,210,255,184,255, -204,255,184,255,4,250,220,13,4,250,226,13,4,250,233,13, -4,250,239,13,4,250,247,13,4,250,254,13,4,250,4,14, -4,250,10,14,4,250,17,14,4,250,22,14,4,250,30,14, -4,250,40,14,4,250,48,14,4,250,54,14,4,250,63,14, -4,250,71,14,4,250,76,14,4,250,82,14,4,250,88,14, -4,250,94,14,4,250,101,14,4,250,106,14,4,250,112,14, -4,250,120,14,4,250,128,14,4,250,134,14,4,250,140,14, -4,250,52,8,4,250,148,14,4,250,156,14,4,250,162,14, -4,250,168,14,4,250,174,14,4,250,181,14,4,250,186,14, -4,250,192,14,4,250,198,14,4,250,204,14,4,250,212,14, -4,250,222,14,4,250,232,14,4,250,242,14,4,250,251,14, -4,250,1,15,4,250,6,15,4,250,62,8,4,250,14,15, -4,250,22,15,4,250,28,15,4,250,72,8,4,250,36,15, -4,250,46,15,4,250,56,15,4,250,64,15,4,250,70,15, -4,250,76,15,4,250,82,15,4,250,88,15,4,250,94,15, -4,250,100,15,4,250,108,15,4,250,118,15,4,250,126,15, -4,250,133,15,4,250,138,15,4,250,144,15,4,250,150,15, -4,250,156,15,4,250,163,15,4,250,169,15,4,250,175,15, -4,250,180,15,4,250,186,15,4,250,193,15,4,250,199,15, -204,15,210,15,216,15,222,15,143,26,44,248,1,0,1,0, -148,26,1,0,4,250,82,8,4,250,228,15,4,250,236,15, -4,250,246,15,4,250,0,16,4,250,10,16,4,250,20,16, -4,250,30,16,4,250,40,16,4,250,50,16,4,250,60,16, -4,250,70,16,4,250,96,8,4,250,78,16,4,250,84,16, -4,250,92,16,4,250,102,16,4,250,112,16,4,250,122,16, -4,250,132,16,4,250,140,16,4,250,146,16,4,250,106,8, -4,250,152,16,4,250,160,16,4,250,170,16,4,250,180,16, -4,250,190,16,4,250,200,16,4,250,210,16,4,250,220,16, -4,250,230,16,4,250,240,16,4,250,250,16,4,250,2,17, -4,250,8,17,4,250,16,17,4,250,26,17,4,250,36,17, -4,250,46,17,4,250,56,17,4,250,64,17,4,250,71,17, -4,250,76,17,4,250,82,17,1,250,1,0,1,250,1,0, -1,250,1,0,116,8,134,8,91,17,101,17,111,17,121,17, -131,17,141,17,188,249,188,249,188,249,188,249,188,249,188,249, -188,249,188,249,152,8,166,8,151,17,161,17,171,17,181,17, -81,22,81,22,188,249,188,249,188,249,188,249,188,249,188,249, -81,22,81,22,180,8,198,8,191,17,201,17,211,17,221,17, -231,17,241,17,188,249,188,249,188,249,188,249,188,249,188,249, -188,249,188,249,216,8,234,8,251,17,5,18,15,18,25,18, -35,18,45,18,188,249,188,249,188,249,188,249,188,249,188,249, -188,249,188,249,252,8,10,9,55,18,65,18,75,18,85,18, -81,22,81,22,188,249,188,249,188,249,188,249,188,249,188,249, -81,22,81,22,24,9,42,9,95,18,105,18,115,18,125,18, -135,18,145,18,81,22,188,249,81,22,188,249,81,22,188,249, -81,22,188,249,60,9,78,9,155,18,165,18,175,18,185,18, -195,18,205,18,188,249,188,249,188,249,188,249,188,249,188,249, -188,249,188,249,213,18,202,74,219,18,210,74,225,18,218,74, -231,18,226,74,237,18,234,74,243,18,242,74,249,18,250,74, -81,22,81,22,2,75,12,75,26,75,42,75,58,75,74,75, -90,75,106,75,2,75,12,75,26,75,42,75,58,75,74,75, -90,75,106,75,118,75,128,75,142,75,158,75,174,75,190,75, -206,75,222,75,118,75,128,75,142,75,158,75,174,75,190,75, -206,75,222,75,234,75,244,75,2,76,18,76,34,76,50,76, -66,76,82,76,234,75,244,75,2,76,18,76,34,76,50,76, -66,76,82,76,255,18,5,19,94,76,154,26,104,76,81,22, -11,19,114,76,188,249,188,249,124,76,202,74,154,26,160,26, -166,26,160,26,170,26,176,26,132,76,184,26,142,76,81,22, -17,19,152,76,162,76,210,74,170,76,218,74,184,26,190,26, -198,26,206,26,23,19,29,19,37,19,180,76,81,22,81,22, -45,19,53,19,188,249,188,249,190,76,226,74,81,22,214,26, -222,26,230,26,61,19,67,19,75,19,200,76,83,19,89,19, -95,19,103,19,188,249,188,249,210,76,242,74,196,249,238,26, -158,23,247,26,81,22,81,22,218,76,250,26,228,76,81,22, -111,19,238,76,248,76,234,74,0,77,250,74,250,26,112,22, -0,27,81,22,85,22,85,22,85,22,85,22,85,22,85,22, -85,22,85,22,85,22,85,22,85,22,122,83,1,0,1,0, -81,22,81,22,1,0,241,249,1,0,1,0,1,0,1,0, -1,0,6,27,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,85,22,1,0,1,0,1,0,13,27,19,27,1,0, -27,27,33,27,1,0,1,0,1,0,1,0,41,27,1,0, -46,27,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -53,27,59,27,65,27,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -71,27,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -85,22,122,83,81,22,81,22,81,22,122,83,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,27,218,25,81,22,81,22,85,27,89,27,93,27, -97,27,101,27,105,27,109,27,113,27,116,27,121,27,125,27, -230,25,81,27,129,22,105,22,109,22,85,27,89,27,93,27, -97,27,101,27,105,27,109,27,113,27,116,27,121,27,125,27, -81,22,94,22,210,25,132,22,126,23,209,22,62,23,222,25, -122,23,226,25,230,25,238,25,184,22,242,25,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,128,27,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,204,255,204,255,2,254,2,254,204,255,204,255,204,255, -204,255,2,254,2,254,2,254,204,255,204,255,1,0,1,0, -1,0,1,0,204,255,1,0,1,0,1,0,2,254,2,254, -204,255,184,255,204,255,2,254,2,254,184,255,184,255,184,255, -184,255,204,255,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,134,27,142,27,50,26,150,27,1,0,156,27,164,27, -213,22,1,0,172,27,214,25,62,23,62,23,62,23,62,23, -179,27,218,25,218,25,122,23,122,23,1,0,230,25,182,27, -1,0,1,0,238,25,189,27,74,23,74,23,74,23,1,0, -1,0,192,27,198,27,206,27,1,0,126,26,1,0,212,27, -1,0,126,26,1,0,222,25,8,77,202,25,50,26,1,0, -210,25,210,25,62,26,81,22,226,25,132,22,217,27,221,27, -225,27,229,27,218,25,1,0,232,27,241,27,23,26,23,26, -241,27,245,27,1,0,1,0,1,0,1,0,206,25,206,25, -210,25,218,25,70,23,1,0,1,0,1,0,1,0,1,0, -1,0,249,27,1,28,9,28,19,28,27,28,35,28,43,28, -51,28,59,28,67,28,75,28,83,28,91,28,99,28,107,28, -115,28,218,25,120,28,126,28,134,28,14,26,140,28,146,28, -154,28,164,28,126,23,170,28,176,28,122,23,50,26,206,25, -226,25,218,25,120,28,126,28,134,28,14,26,140,28,146,28, -154,28,164,28,126,23,170,28,176,28,122,23,50,26,206,25, -226,25,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,185,28,1,0,1,0,81,22,81,22,81,22, -81,22,212,4,1,0,216,4,1,0,220,4,1,0,1,0, -1,0,1,0,1,0,117,19,123,19,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,129,19, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,135,19,141,19,147,19,224,4,1,0,228,4,1,0, -232,4,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,236,4,153,19,1,0,1,0, -1,0,240,4,159,19,1,0,244,4,165,19,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,248,4,171,19,252,4,177,19, -1,0,1,0,1,0,1,0,1,0,193,28,199,28,1,0, -207,28,213,28,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,0,5,1,0,1,0, -1,0,183,19,1,0,4,5,189,19,8,5,1,0,195,19, -12,5,201,19,1,0,1,0,1,0,16,5,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -207,19,20,5,213,19,1,0,24,5,28,5,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,219,19,225,19,231,19, -237,19,243,19,32,5,36,5,249,19,255,19,40,5,44,5, -5,20,11,20,48,5,52,5,56,5,60,5,1,0,1,0, -17,20,23,20,64,5,68,5,29,20,35,20,72,5,76,5, -41,20,47,20,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,80,5,84,5,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,88,5,1,0,1,0,1,0,1,0,1,0, -92,5,96,5,1,0,100,5,53,20,59,20,65,20,71,20, -1,0,1,0,104,5,108,5,112,5,116,5,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,77,20,83,20, -89,20,95,20,1,0,1,0,1,0,1,0,1,0,1,0, -101,20,107,20,113,20,119,20,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,221,28,225,28,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,129,22, -105,22,109,22,85,27,89,27,93,27,97,27,101,27,105,27, -229,28,235,28,241,28,247,28,253,28,3,29,9,29,15,29, -21,29,27,29,33,29,39,29,47,29,55,29,63,29,71,29, -79,29,87,29,95,29,103,29,111,29,121,29,131,29,141,29, -151,29,161,29,171,29,181,29,191,29,201,29,211,29,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,221,29,229,29,237,29,245,29,253,29, -5,30,13,30,21,30,29,30,37,30,45,30,53,30,61,30, -69,30,77,30,85,30,93,30,101,30,109,30,117,30,125,30, -133,30,141,30,149,30,157,30,165,30,94,22,202,25,50,26, -206,25,210,25,62,26,214,25,62,23,218,25,70,23,222,25, -122,23,226,25,230,25,132,22,238,25,189,27,74,23,184,22, -242,25,246,25,14,26,78,23,126,23,82,23,126,26,94,22, -202,25,50,26,206,25,210,25,62,26,214,25,62,23,218,25, -70,23,222,25,122,23,226,25,230,25,132,22,238,25,189,27, -74,23,184,22,242,25,246,25,14,26,78,23,126,23,82,23, -126,26,81,27,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,173,30, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,182,30,190,30,196,30,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,205,30,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -121,251,121,251,121,251,121,251,121,251,121,251,121,251,121,251, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,250,1,0,211,30,215,30,219,30,1,0,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,255,25,95,26, -251,25,47,26,1,0,1,250,1,0,1,0,1,250,1,0, -1,0,1,0,1,0,1,0,1,0,70,23,14,26,223,30, -227,30,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,250,1,0,1,250,1,0,1,250,1,0,1,250, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,250,1,0,1,250,1,0,204,255,204,255,204,255,1,250, -1,0,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,81,22,81,22, -81,22,81,22,81,22,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,137,249,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,18,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,231,30,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,235,30,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,239,30,243,30, -247,30,251,30,255,30,3,31,7,31,11,31,15,31,19,31, -23,31,27,31,31,31,35,31,39,31,43,31,47,31,51,31, -55,31,59,31,63,31,67,31,71,31,75,31,79,31,83,31, -87,31,91,31,95,31,99,31,103,31,107,31,111,31,115,31, -119,31,123,31,127,31,131,31,135,31,139,31,143,31,147,31, -151,31,155,31,159,31,163,31,167,31,171,31,175,31,179,31, -183,31,187,31,191,31,195,31,199,31,203,31,207,31,211,31, -215,31,219,31,223,31,227,31,231,31,235,31,239,31,243,31, -247,31,251,31,255,31,3,32,7,32,11,32,15,32,19,32, -23,32,27,32,31,32,35,32,39,32,43,32,47,32,51,32, -55,32,59,32,63,32,67,32,71,32,75,32,79,32,83,32, -87,32,91,32,95,32,99,32,103,32,107,32,111,32,115,32, -119,32,123,32,127,32,131,32,135,32,139,32,143,32,147,32, -151,32,155,32,159,32,163,32,167,32,171,32,175,32,179,32, -183,32,187,32,191,32,195,32,199,32,203,32,207,32,211,32, -215,32,219,32,223,32,227,32,231,32,235,32,239,32,243,32, -247,32,251,32,255,32,3,33,7,33,11,33,15,33,19,33, -23,33,27,33,31,33,35,33,39,33,43,33,47,33,51,33, -55,33,59,33,63,33,67,33,71,33,75,33,79,33,83,33, -87,33,91,33,95,33,99,33,103,33,107,33,111,33,115,33, -119,33,123,33,127,33,131,33,135,33,139,33,143,33,147,33, -151,33,155,33,159,33,163,33,167,33,171,33,175,33,179,33, -183,33,187,33,191,33,195,33,199,33,203,33,207,33,211,33, -215,33,219,33,223,33,227,33,231,33,235,33,239,33,243,33, -247,33,251,33,255,33,3,34,7,34,11,34,15,34,19,34, -23,34,27,34,31,34,35,34,39,34,43,34,47,34,51,34, -55,34,59,34,63,34,67,34,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,85,22,1,0, -71,34,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -180,255,200,255,208,255,188,255,192,255,192,255,1,0,1,0, -1,0,1,0,1,0,1,0,217,248,1,0,75,31,75,34, -79,34,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,1,0,120,5,1,0,1,0,1,0, -1,0,124,5,125,20,128,5,131,20,132,5,137,20,136,5, -143,20,140,5,149,20,144,5,155,20,148,5,161,20,152,5, -167,20,156,5,173,20,160,5,179,20,164,5,185,20,168,5, -191,20,1,0,172,5,197,20,176,5,203,20,180,5,209,20, -1,0,1,0,1,0,1,0,1,0,184,5,215,20,221,20, -192,5,227,20,233,20,200,5,239,20,245,20,208,5,251,20, -1,21,216,5,7,21,13,21,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -19,21,1,0,1,0,81,22,81,22,16,252,16,252,82,34, -88,34,224,5,25,21,95,34,1,0,1,0,1,0,1,0, -1,0,1,0,228,5,1,0,1,0,1,0,1,0,232,5, -31,21,236,5,37,21,240,5,43,21,244,5,49,21,248,5, -55,21,252,5,61,21,0,6,67,21,4,6,73,21,8,6, -79,21,12,6,85,21,16,6,91,21,20,6,97,21,1,0, -24,6,103,21,28,6,109,21,32,6,115,21,1,0,1,0, -1,0,1,0,1,0,36,6,121,21,127,21,44,6,133,21, -139,21,52,6,145,21,151,21,60,6,157,21,163,21,68,6, -169,21,175,21,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,76,6,80,6,84,6,88,6,1,0, -181,21,1,0,1,0,187,21,193,21,199,21,205,21,1,0, -1,0,92,6,211,21,100,34,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,106,34,110,34,244,82, -114,34,248,82,252,82,118,34,122,34,126,34,0,83,4,83, -8,83,12,83,16,83,20,83,131,34,134,34,138,34,142,34, -147,34,150,34,154,34,158,34,162,34,166,34,170,34,174,34, -178,34,182,34,186,34,24,83,28,83,32,83,36,83,40,83, -44,83,48,83,52,83,56,83,60,83,64,83,68,83,72,83, -76,83,80,83,84,83,88,83,92,83,96,83,100,83,104,83, -81,22,191,34,195,34,199,34,203,34,207,34,211,34,215,34, -219,34,223,34,227,34,231,34,235,34,239,34,243,34,247,34, -251,34,255,34,3,35,7,35,11,35,15,35,19,35,23,35, -27,35,31,35,35,35,39,35,43,35,47,35,51,35,55,35, -59,35,63,35,67,35,71,35,75,35,79,35,83,35,87,35, -91,35,95,35,99,35,81,22,1,0,1,0,239,30,7,31, -103,35,107,35,111,35,115,35,119,35,123,35,255,30,127,35, -131,35,135,35,139,35,15,31,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,143,35,151,35,159,35,167,35, -175,35,183,35,191,35,199,35,207,35,215,35,223,35,231,35, -239,35,247,35,23,77,41,77,59,77,77,77,95,77,113,77, -131,77,149,77,167,77,185,77,203,77,221,77,239,77,1,78, -19,78,39,78,65,78,81,22,255,35,7,36,15,36,23,36, -31,36,39,36,47,36,55,36,63,36,71,36,79,36,87,36, -95,36,103,36,111,36,119,36,127,36,135,36,143,36,151,36, -159,36,167,36,175,36,183,36,191,36,199,36,207,36,215,36, -223,36,231,36,239,36,247,36,255,36,7,37,15,37,23,37, -31,37,35,37,247,31,39,37,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,42,37,51,37,57,37,63,37, -69,37,75,37,81,37,87,37,93,37,99,37,105,37,111,37, -117,37,123,37,129,37,135,37,106,34,114,34,118,34,126,34, -134,34,138,34,150,34,158,34,162,34,170,34,174,34,178,34, -182,34,186,34,80,78,88,78,96,78,104,78,112,78,120,78, -128,78,136,78,144,78,152,78,160,78,168,78,176,78,184,78, -196,78,214,78,226,78,1,0,239,30,7,31,103,35,107,35, -141,37,145,37,149,37,27,31,153,37,75,31,19,32,67,32, -63,32,23,32,135,33,107,31,11,32,157,37,161,37,165,37, -169,37,173,37,177,37,181,37,185,37,189,37,193,37,131,31, -197,37,201,37,205,37,209,37,213,37,217,37,221,37,225,37, -111,35,115,35,119,35,229,37,233,37,237,37,241,37,245,37, -249,37,253,37,1,38,5,38,9,38,13,38,19,38,25,38, -31,38,37,38,43,38,49,38,55,38,61,38,67,38,73,38, -79,38,85,38,91,38,97,38,103,38,109,38,115,38,121,38, -127,38,133,38,139,38,145,38,151,38,157,38,165,38,173,38, -180,38,186,38,194,38,200,38,209,38,213,38,216,38,221,38, -225,38,228,38,232,38,236,38,240,38,244,38,248,38,252,38, -0,39,4,39,8,39,12,39,16,39,20,39,24,39,28,39, -33,39,37,39,41,39,45,39,49,39,52,39,56,39,60,39, -64,39,68,39,73,39,77,39,81,39,85,39,89,39,93,39, -97,39,101,39,105,39,109,39,113,39,117,39,121,39,124,39, -128,39,132,39,136,39,141,39,242,78,147,39,9,79,157,39, -30,79,164,39,173,39,54,79,181,39,190,39,199,39,207,39, -214,39,225,39,71,79,83,79,98,79,111,79,235,39,131,79, -245,39,157,79,251,39,8,40,173,79,185,79,211,79,21,40, -30,40,39,40,232,79,47,40,57,40,252,79,68,40,76,40, -10,80,22,80,33,80,85,40,91,40,96,40,104,40,42,80, -58,80,71,80,85,80,101,80,112,80,123,80,142,80,112,40, -159,80,123,40,131,40,174,80,184,80,142,40,196,80,214,80, -228,80,240,80,254,80,151,40,16,81,157,40,165,40,173,40, -183,40,190,40,198,40,207,40,219,40,229,40,41,81,60,81, -79,81,235,40,98,81,245,40,253,40,5,41,15,41,117,81, -137,81,21,41,161,81,26,41,35,41,41,41,47,41,53,41, -59,41,65,41,71,41,77,41,83,41,89,41,95,41,103,41, -111,41,119,41,127,41,135,41,143,41,151,41,159,41,167,41, -175,41,183,41,191,41,199,41,207,41,214,41,222,41,228,41, -234,41,242,41,248,41,254,41,5,42,13,42,20,42,27,42, -33,42,39,42,45,42,51,42,60,42,66,42,72,42,78,42, -84,42,90,42,96,42,102,42,108,42,116,42,126,42,132,42, -138,42,144,42,150,42,156,42,162,42,168,42,176,42,184,42, -192,42,200,42,206,42,212,42,218,42,224,42,230,42,236,42, -242,42,248,42,254,42,5,43,13,43,21,43,27,43,35,43, -43,43,51,43,57,43,64,43,73,43,60,42,82,43,90,43, -98,43,106,43,114,43,127,43,140,43,146,43,152,43,158,43, -164,43,170,43,176,43,182,43,188,43,182,43,194,43,200,43, -206,43,212,43,218,43,212,43,224,43,230,43,81,22,237,43, -242,43,248,43,254,43,81,22,8,44,14,44,20,44,26,44, -32,44,38,44,254,42,44,44,50,44,56,44,62,44,70,44, -96,42,76,44,84,44,92,44,81,22,98,44,106,44,112,44, -118,44,124,44,130,44,138,44,147,44,153,44,159,44,165,44, -171,44,177,44,183,44,189,44,195,44,201,44,209,44,217,44, -225,44,233,44,241,44,249,44,1,45,9,45,17,45,25,45, -33,45,41,45,49,45,57,45,65,45,73,45,81,45,89,45, -97,45,105,45,113,45,120,45,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,0,204,255, -1,0,1,0,1,0,1,0,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,3,25,129,45,204,255,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -204,255,204,255,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,0,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,241,249,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,250,1,0,1,250,1,0,133,45, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,0,1,0,1,0,1,250,1,0,75,26,1,0,1,0, -1,250,1,0,1,250,1,0,1,0,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -1,250,1,0,67,23,7,26,71,26,137,45,79,26,1,0, -141,45,145,45,83,26,149,45,1,250,1,0,1,250,1,0, -1,250,1,0,1,250,1,0,1,250,1,0,1,250,1,0, -81,22,81,22,1,250,1,0,121,248,115,26,153,45,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -179,27,157,45,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,18,254,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,184,255, -184,255,184,255,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,18,254,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,204,255,1,0, -204,255,204,255,184,255,1,0,1,0,204,255,204,255,1,0, -1,0,1,0,1,0,1,0,204,255,204,255,1,0,204,255, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,18,254,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,161,45,201,248,211,30,145,249,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,165,45,169,45,173,45, -177,45,181,45,185,45,189,45,193,45,197,45,201,45,205,45, -209,45,213,45,217,45,221,45,225,45,229,45,233,45,237,45, -241,45,245,45,249,45,253,45,1,46,5,46,9,46,13,46, -17,46,21,46,25,46,29,46,33,46,37,46,41,46,45,46, -49,46,53,46,57,46,61,46,65,46,69,46,73,46,77,46, -81,46,85,46,89,46,93,46,97,46,101,46,105,46,109,46, -113,46,117,46,121,46,125,46,129,46,133,46,137,46,141,46, -145,46,149,46,153,46,157,46,161,46,165,46,169,46,173,46, -177,46,181,46,185,46,189,46,193,46,197,46,201,46,205,46, -209,46,213,46,217,46,221,46,225,46,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,168,6,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,168,6,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,168,6,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,168,6,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,97,9,97,9,97,9, -97,9,97,9,97,9,97,9,97,9,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,81,22,124,83,125,83,124,83,124,83,124,83,124,83, -124,83,124,83,81,22,81,22,81,22,81,22,1,0,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,124,83,125,83,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,124,83,81,22,81,22,81,22,81,22,124,83,26,65, -81,22,81,22,124,83,81,22,124,83,38,65,188,82,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,1,0,1,0,81,22, -81,22,1,0,1,0,1,0,1,0,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -117,74,81,22,122,83,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,229,46,233,46,103,33,237,46,241,46,245,46, -249,46,63,34,63,34,253,46,135,33,1,47,5,47,9,47, -13,47,17,47,21,47,25,47,29,47,33,47,37,47,41,47, -45,47,49,47,53,47,57,47,61,47,65,47,69,47,73,47, -77,47,81,47,85,47,89,47,93,47,97,47,101,47,105,47, -109,47,113,47,117,47,121,47,125,47,129,47,133,47,137,47, -141,47,145,47,149,47,153,47,157,47,161,47,223,32,165,47, -169,47,173,47,177,47,181,47,185,47,189,47,193,47,197,47, -201,47,205,47,3,34,209,47,213,47,217,47,221,47,225,47, -229,47,233,47,237,47,241,47,245,47,249,47,253,47,1,48, -5,48,9,48,13,48,17,48,21,48,25,48,29,48,33,48, -37,48,41,48,45,48,49,48,53,48,57,48,37,47,61,48, -65,48,69,48,73,48,77,48,81,48,85,48,89,48,93,48, -97,48,101,48,105,48,109,48,113,48,117,48,121,48,125,48, -129,48,133,48,137,48,111,33,141,48,145,48,149,48,153,48, -157,48,161,48,165,48,169,48,173,48,177,48,181,48,185,48, -189,48,193,48,197,48,131,31,201,48,205,48,209,48,213,48, -217,48,221,48,225,48,229,48,55,31,233,48,237,48,241,48, -245,48,249,48,253,48,1,49,5,49,9,49,13,49,17,49, -21,49,25,49,29,49,33,49,37,49,41,49,45,49,49,49, -53,49,57,49,61,49,133,48,65,49,69,49,73,49,77,49, -81,49,85,49,89,49,93,49,69,48,97,49,101,49,105,49, -109,49,113,49,117,49,121,49,125,49,129,49,133,49,137,49, -141,49,145,49,149,49,153,49,157,49,161,49,165,49,169,49, -173,49,37,47,177,49,181,49,185,49,189,49,59,34,193,49, -197,49,201,49,205,49,209,49,213,49,217,49,221,49,225,49, -229,49,233,49,237,49,145,37,241,49,245,49,249,49,253,49, -1,50,5,50,9,50,13,50,17,50,77,48,21,50,25,50, -29,50,33,50,37,50,41,50,45,50,49,50,53,50,57,50, -61,50,65,50,69,50,131,33,73,50,77,50,81,50,85,50, -89,50,93,50,97,50,101,50,105,50,109,50,113,50,117,50, -121,50,191,32,125,50,129,50,133,50,137,50,141,50,145,50, -149,50,153,50,157,50,161,50,165,50,169,50,173,50,177,50, -181,50,185,50,43,33,189,50,55,33,193,50,197,50,201,50, -1,0,1,0,205,50,1,0,209,50,1,0,1,0,213,50, -217,50,221,50,225,50,229,50,233,50,237,50,241,50,245,50, -219,32,1,0,249,50,1,0,253,50,1,0,1,0,1,51, -5,51,1,0,1,0,1,0,9,51,13,51,17,51,21,51, -25,51,29,51,33,51,37,51,41,51,45,51,49,51,53,51, -57,51,61,51,65,51,69,51,73,51,77,51,159,31,81,51, -85,51,89,51,93,51,97,51,101,51,105,51,109,51,113,51, -117,51,121,51,125,51,129,51,133,51,137,51,165,37,141,51, -145,51,149,51,153,51,181,37,157,51,161,51,165,51,169,51, -173,51,21,49,177,51,181,51,185,51,189,51,193,51,197,51, -197,51,201,51,205,51,209,51,213,51,217,51,221,51,225,51, -229,51,1,51,233,51,237,51,241,51,245,51,249,51,255,51, -81,22,81,22,3,52,7,52,11,52,15,52,19,52,23,52, -27,52,31,52,57,51,35,52,39,52,43,52,205,50,47,52, -51,52,55,52,59,52,63,52,67,52,71,52,75,52,79,52, -83,52,87,52,91,52,89,51,95,52,93,51,99,52,103,52, -107,52,111,52,115,52,209,50,121,47,119,52,123,52,35,32, -137,48,213,49,127,52,131,52,121,51,135,52,125,51,139,52, -143,52,147,52,217,50,151,52,155,52,159,52,163,52,167,52, -221,50,171,52,175,52,179,52,183,52,187,52,191,52,173,51, -195,52,199,52,21,49,203,52,189,51,207,52,211,52,215,52, -219,52,223,52,209,51,227,52,253,50,231,52,213,51,61,48, -235,52,217,51,239,52,225,51,243,52,247,52,251,52,255,52, -3,53,233,51,241,50,7,53,237,51,11,53,241,51,15,53, -63,34,19,53,25,53,31,53,37,53,41,53,45,53,49,53, -55,53,61,53,67,53,71,53,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,74,53,80,53,86,53,92,53,100,53,108,53, -108,53,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,115,53,121,53,127,53, -133,53,139,53,81,22,81,22,81,22,81,22,81,22,144,53, -52,254,150,53,157,53,217,27,229,27,161,53,165,53,169,53, -173,53,177,53,181,53,109,27,184,53,190,53,196,53,204,53, -212,53,218,53,224,53,230,53,236,53,242,53,248,53,254,53, -4,54,81,22,10,54,16,54,22,54,28,54,34,54,81,22, -40,54,81,22,46,54,52,54,81,22,58,54,64,54,81,22, -70,54,76,54,82,54,88,54,94,54,100,54,106,54,112,54, -118,54,125,54,131,54,131,54,135,54,135,54,135,54,135,54, -139,54,139,54,139,54,139,54,143,54,143,54,143,54,143,54, -147,54,147,54,147,54,147,54,151,54,151,54,151,54,151,54, -155,54,155,54,155,54,155,54,159,54,159,54,159,54,159,54, -163,54,163,54,163,54,163,54,167,54,167,54,167,54,167,54, -171,54,171,54,171,54,171,54,175,54,175,54,175,54,175,54, -179,54,179,54,183,54,183,54,187,54,187,54,191,54,191,54, -195,54,195,54,199,54,199,54,203,54,203,54,207,54,207,54, -207,54,207,54,211,54,211,54,211,54,211,54,215,54,215,54, -215,54,215,54,219,54,219,54,219,54,219,54,223,54,223,54, -227,54,227,54,227,54,227,54,176,81,176,81,230,54,230,54, -230,54,230,54,235,54,235,54,235,54,235,54,238,54,238,54, -184,81,184,81,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,243,54,243,54,243,54,243,54,247,54,247,54,251,54, -251,54,255,54,255,54,237,23,3,55,3,55,7,55,7,55, -11,55,11,55,15,55,15,55,15,55,15,55,19,55,19,55, -192,81,192,81,202,81,202,81,212,81,212,81,223,81,223,81, -233,81,233,81,243,81,243,81,253,81,253,81,253,81,7,82, -7,82,7,82,23,55,23,55,23,55,23,55,17,82,27,82, -37,82,7,82,46,82,27,55,33,55,39,55,45,55,51,55, -56,55,63,55,69,55,75,55,81,55,87,55,92,55,99,55, -105,55,111,55,116,55,123,55,129,55,135,55,141,55,147,55, -153,55,159,55,165,55,171,55,177,55,183,55,189,55,195,55, -201,55,207,55,213,55,219,55,225,55,231,55,237,55,243,55, -249,55,255,55,5,56,11,56,17,56,23,56,29,56,35,56, -40,56,47,56,53,56,59,56,64,56,70,56,77,56,83,56, -89,56,95,56,101,56,107,56,112,56,119,56,125,56,131,56, -137,56,143,56,148,56,155,56,161,56,167,56,173,56,179,56, -184,56,191,56,197,56,203,56,209,56,215,56,220,56,227,56, -233,56,239,56,244,56,251,56,1,57,7,57,13,57,19,57, -24,57,30,57,36,57,42,57,48,57,56,57,64,57,72,57, -80,57,88,57,57,82,67,82,37,82,77,82,7,82,46,82, -97,57,103,57,45,55,109,57,51,55,56,55,115,57,121,57, -81,55,127,57,87,55,92,55,133,57,139,57,105,55,145,57, -111,55,116,55,35,56,40,56,59,56,64,56,70,56,95,56, -101,56,107,56,112,56,137,56,143,56,148,56,150,57,173,56, -157,57,163,57,209,56,169,57,215,56,220,56,42,57,175,57, -181,57,13,57,187,57,19,57,24,57,17,82,27,82,87,82, -37,82,97,82,27,55,33,55,39,55,45,55,193,57,63,55, -69,55,75,55,81,55,199,57,105,55,123,55,129,55,135,55, -141,55,147,55,159,55,165,55,171,55,177,55,183,55,189,55, -205,57,195,55,201,55,207,55,213,55,219,55,225,55,237,55, -243,55,249,55,255,55,5,56,11,56,17,56,23,56,29,56, -47,56,53,56,77,56,83,56,89,56,95,56,101,56,119,56, -125,56,131,56,137,56,211,57,155,56,161,56,167,56,173,56, -191,56,197,56,203,56,209,56,217,57,227,56,233,56,222,57, -251,56,1,57,7,57,13,57,229,57,37,82,97,82,45,55, -193,57,81,55,199,57,105,55,235,57,183,55,241,57,247,57, -253,57,95,56,101,56,137,56,209,56,217,57,13,57,229,57, -2,58,10,58,18,58,27,58,32,58,39,58,44,58,51,58, -56,58,63,58,68,58,75,58,80,58,87,58,92,58,99,58, -104,58,111,58,116,58,123,58,128,58,135,58,140,58,147,58, -153,58,159,58,247,57,165,58,171,58,177,58,183,58,27,58, -32,58,39,58,44,58,51,58,56,58,63,58,68,58,75,58, -80,58,87,58,92,58,99,58,104,58,111,58,116,58,123,58, -128,58,135,58,140,58,147,58,153,58,159,58,247,57,165,58, -171,58,177,58,183,58,147,58,153,58,159,58,247,57,241,57, -253,57,231,55,165,55,171,55,177,55,147,58,153,58,159,58, -231,55,237,55,188,58,188,58,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,195,58,203,58, -203,58,211,58,219,58,227,58,235,58,243,58,251,58,251,58, -2,59,11,59,19,59,27,59,35,59,43,59,43,59,51,59, -59,59,59,59,67,59,67,59,75,59,83,59,83,59,90,59, -99,59,99,59,107,59,107,59,115,59,123,59,123,59,131,59, -131,59,139,59,146,59,155,59,163,59,163,59,171,59,179,59, -186,59,195,59,203,59,203,59,211,59,219,59,227,59,234,59, -243,59,251,59,251,59,3,60,3,60,11,60,11,60,19,60, -27,60,34,60,43,60,51,60,59,60,67,60,81,22,81,22, -75,60,83,60,91,60,99,60,107,60,115,60,115,60,123,60, -130,60,139,60,147,60,147,60,154,60,162,60,171,60,178,60, -187,60,194,60,203,60,210,60,219,60,227,60,235,60,242,60, -250,60,2,61,10,61,18,61,26,61,34,61,42,61,50,61, -58,61,66,61,211,59,227,59,74,61,82,61,91,61,98,61, -107,61,115,61,107,61,91,61,122,61,130,61,138,61,146,61, -154,61,115,61,155,59,75,59,162,61,170,61,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,178,61,186,61, -195,61,205,61,215,61,225,61,235,61,245,61,255,61,9,62, -17,62,55,62,73,62,1,0,81,22,81,22,122,83,122,83, -122,83,122,83,122,83,122,83,122,83,122,83,122,83,122,83, -122,83,122,83,122,83,122,83,122,83,122,83,83,62,87,62, -81,22,91,62,151,23,95,62,99,62,103,62,107,62,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,204,255,204,255, -204,255,204,255,204,255,204,255,204,255,184,255,184,255,184,255, -184,255,184,255,184,255,184,255,204,255,204,255,81,22,111,62, -115,62,119,62,119,62,121,27,125,27,123,62,127,62,131,62, -135,62,139,62,143,62,147,62,151,62,221,28,225,28,155,62, -159,62,163,62,167,62,1,0,1,0,171,62,175,62,46,27, -46,27,46,27,46,27,119,62,119,62,119,62,83,62,87,62, -81,22,81,22,151,23,91,62,99,62,95,62,111,62,121,27, -125,27,123,62,127,62,131,62,135,62,179,62,183,62,187,62, -109,27,191,62,194,62,198,62,116,27,81,22,203,62,207,62, -211,62,215,62,81,22,81,22,81,22,81,22,218,62,224,62, -230,62,1,0,236,62,81,22,242,62,248,62,254,62,4,63, -10,63,16,63,22,63,28,63,34,63,40,63,47,63,106,82, -106,82,114,82,114,82,122,82,122,82,130,82,130,82,138,82, -138,82,138,82,138,82,50,63,50,63,55,63,55,63,55,63, -55,63,59,63,59,63,63,63,63,63,63,63,63,63,67,63, -67,63,67,63,67,63,71,63,71,63,71,63,71,63,75,63, -75,63,75,63,75,63,79,63,79,63,79,63,79,63,83,63, -83,63,87,63,87,63,91,63,91,63,95,63,95,63,99,63, -99,63,99,63,99,63,103,63,103,63,103,63,103,63,107,63, -107,63,107,63,107,63,111,63,111,63,111,63,115,63,115,63, -115,63,115,63,119,63,119,63,119,63,119,63,123,63,123,63, -123,63,123,63,127,63,127,63,127,63,127,63,131,63,131,63, -131,63,131,63,135,63,135,63,135,63,135,63,139,63,139,63, -139,63,139,63,143,63,143,63,143,63,143,63,147,63,147,63, -147,63,147,63,151,63,151,63,151,63,151,63,155,63,155,63, -155,63,155,63,158,63,158,63,19,55,19,55,162,63,162,63, -162,63,162,63,150,82,150,82,164,82,164,82,178,82,178,82, -166,63,166,63,81,22,81,22,122,83,81,22,95,62,173,63, -179,62,207,62,211,62,183,62,177,63,121,27,125,27,187,62, -109,27,83,62,191,62,71,34,181,63,81,27,129,22,105,22, -109,22,85,27,89,27,93,27,97,27,101,27,105,27,91,62, -151,23,194,62,116,27,198,62,99,62,215,62,94,22,202,25, -50,26,206,25,210,25,62,26,214,25,62,23,218,25,70,23, -222,25,122,23,226,25,230,25,132,22,238,25,189,27,74,23, -184,22,242,25,246,25,14,26,78,23,126,23,82,23,126,26, -171,62,203,62,175,62,185,63,119,62,247,26,94,22,202,25, -50,26,206,25,210,25,62,26,214,25,62,23,218,25,70,23, -222,25,122,23,226,25,230,25,132,22,238,25,189,27,74,23, -184,22,242,25,246,25,14,26,78,23,126,23,82,23,126,26, -123,62,189,63,127,62,193,63,197,63,201,63,71,34,155,62, -159,62,87,62,205,63,136,39,209,63,213,63,217,63,221,63, -225,63,229,63,233,63,237,63,241,63,245,63,209,38,213,38, -216,38,221,38,225,38,228,38,232,38,236,38,240,38,244,38, -248,38,252,38,0,39,4,39,8,39,12,39,16,39,20,39, -24,39,28,39,33,39,37,39,41,39,45,39,49,39,52,39, -56,39,60,39,64,39,68,39,73,39,77,39,81,39,85,39, -89,39,93,39,97,39,101,39,105,39,109,39,113,39,117,39, -121,39,124,39,249,63,110,83,116,83,81,22,106,34,110,34, -244,82,114,34,248,82,252,82,118,34,122,34,126,34,0,83, -4,83,8,83,12,83,16,83,20,83,131,34,134,34,138,34, -142,34,147,34,150,34,154,34,158,34,162,34,166,34,170,34, -174,34,178,34,182,34,186,34,233,251,225,251,217,251,24,83, -28,83,32,83,36,83,40,83,44,83,161,251,153,251,48,83, -52,83,56,83,60,83,64,83,68,83,97,251,89,251,72,83, -76,83,80,83,84,83,88,83,92,83,33,251,25,251,96,83, -100,83,104,83,249,250,241,250,233,250,253,63,1,64,5,64, -98,22,9,64,13,64,17,64,169,250,21,64,24,64,29,64, -32,64,37,64,41,64,45,64,105,250,97,250,89,250,81,250, -73,250,65,250,57,250,49,250,41,250,33,250,25,250,17,250, -9,250,1,250,1,0,241,249,233,249,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,129,249,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,169,248,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,9,248,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,184,255,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,204,255,81,22,81,22,81,22,81,22, -81,22,57,251,57,251,57,251,57,251,57,251,57,251,57,251, -57,251,57,251,57,251,57,251,57,251,57,251,57,251,57,251, -57,251,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,57,251,57,251,57,251,57,251,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,81,22,81,22,1,0,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,1,0,81,22,81,22, -81,22,1,0,81,22,81,22,1,0,1,0,1,0,81,22, -1,0,1,0,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,81,22, -81,22,81,22,81,22,81,22,1,0,184,255,1,0,204,255, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -204,255,2,254,184,255,81,22,81,22,81,22,81,22,18,254, -1,0,1,0,1,0,1,0,1,0,204,255,184,255,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,81,22,81,22,81,22,249,251,249,251, -249,251,249,251,249,251,249,251,249,251,249,251,249,251,249,251, -249,251,249,251,249,251,249,251,249,251,249,251,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,204,255, -204,255,204,255,204,255,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,184,255,184,255,204,255,204,255,204,255,184,255,204,255, -184,255,184,255,184,255,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,1,0,1,0,1,0,1,0,1,0,1,0, -18,254,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,18,254, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,96,6,217,21,102,6,227,21,1,0,1,0,1,0, -1,0,1,0,108,6,1,0,1,0,1,0,1,0,1,0, -237,21,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,18,254,14,252,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,247,21,1,22,1,0, -114,6,120,6,18,254,18,254,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,14,254, -1,0,1,0,1,0,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,18,254,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,14,254,1,0, -1,0,1,0,81,22,81,22,1,0,1,0,1,0,1,0, -1,0,18,254,14,254,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,1,0,81,22,1,0,1,0, -1,0,1,0,81,22,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,14,254,18,254,81,22,81,22, -81,22,81,22,81,22,1,0,81,22,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,81,22,14,254,14,254, -1,0,0,252,1,0,1,0,1,0,1,0,1,0,81,22, -81,22,126,6,1,0,81,22,81,22,11,22,21,22,18,254, -81,22,81,22,1,0,81,22,81,22,81,22,81,22,81,22, -81,22,0,252,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,81,22,81,22,204,255,204,255,204,255, -204,255,204,255,204,255,204,255,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,18,254,1,0,1,0,1,0,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -81,22,1,0,81,22,1,0,204,255,1,0,0,252,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,138,6, -0,252,31,22,41,22,0,252,51,22,1,0,1,0,18,254, -14,254,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,0,252,1,0,1,0,1,0, -1,0,1,0,1,0,81,22,81,22,156,6,162,6,61,22, -71,22,1,0,1,0,1,0,18,254,14,254,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,18,254,14,254,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,249,250, -249,250,249,250,249,250,249,250,249,250,249,250,249,250,249,250, -249,250,249,250,249,250,249,250,249,250,249,250,249,250,18,254, -1,0,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,18,254,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,1,0,81,22,1,0,1,0,81,22,1,0,1,0, -14,254,1,0,18,254,18,254,1,0,1,0,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -1,0,1,0,1,0,1,0,81,22,1,0,1,0,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,1,0, -1,0,1,0,1,0,18,254,1,0,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,2,254,2,254,2,254,2,254, -2,254,1,0,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,1,0,1,0,1,0,1,0, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,81,22,81,22,1,0,1,0,2,254,1,0, -122,83,122,83,122,83,122,83,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,48,64,58,64, -68,64,82,64,96,64,110,64,124,64,176,255,176,255,2,254, -2,254,2,254,1,0,1,0,1,0,196,255,176,255,176,255, -176,255,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,184,255,184,255,184,255,184,255,184,255,1,0,1,0, -204,255,204,255,204,255,204,255,204,255,184,255,184,255,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,204,255,204,255,204,255,204,255,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -138,64,148,64,158,64,172,64,186,64,200,64,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -1,0,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,50,26,206,25,210,25,62,26,214,25, -81,22,218,25,70,23,222,25,122,23,226,25,230,25,132,22, -238,25,189,27,74,23,184,22,242,25,246,25,14,26,78,23, -126,23,82,23,126,26,94,22,81,22,50,26,206,25,81,22, -81,22,214,25,81,22,81,22,70,23,222,25,81,22,81,22, -230,25,132,22,238,25,189,27,81,22,184,22,242,25,246,25, -14,26,78,23,126,23,82,23,126,26,94,22,202,25,50,26, -206,25,81,22,62,26,81,22,62,23,218,25,70,23,222,25, -122,23,226,25,230,25,81,22,238,25,189,27,74,23,184,22, -242,25,246,25,14,26,78,23,126,23,82,23,126,26,94,22, -202,25,81,22,206,25,210,25,62,26,214,25,81,22,81,22, -70,23,222,25,122,23,226,25,230,25,132,22,238,25,189,27, -81,22,184,22,242,25,246,25,14,26,78,23,126,23,82,23, -81,22,94,22,202,25,218,25,70,23,222,25,122,23,226,25, -81,22,132,22,81,22,81,22,81,22,184,22,242,25,246,25, -14,26,78,23,126,23,82,23,81,22,94,22,202,25,50,26, -206,25,210,25,62,26,214,25,62,23,218,25,70,23,222,25, -122,23,226,25,230,25,78,23,126,23,82,23,126,26,215,64, -219,64,81,22,81,22,222,64,19,26,23,26,27,26,226,64, -231,64,234,64,139,26,166,26,239,64,243,64,119,22,247,64, -251,64,254,64,241,27,38,26,139,26,3,65,7,65,10,65, -31,26,35,26,15,65,212,27,19,65,222,64,19,26,23,26, -27,26,226,64,231,64,234,64,139,26,166,26,239,64,243,64, -119,22,247,64,251,64,254,64,241,27,38,26,3,65,3,65, -7,65,10,65,31,26,35,26,15,65,212,27,23,65,226,64, -139,26,239,64,31,26,38,26,241,27,222,64,19,26,23,26, -27,26,226,64,231,64,234,64,139,26,166,26,239,64,243,64, -119,22,247,64,251,64,35,26,15,65,212,27,23,65,226,64, -139,26,239,64,31,26,38,26,241,27,27,65,27,65,81,22, -81,22,81,27,129,22,105,22,109,22,85,27,89,27,93,27, -97,27,101,27,105,27,81,27,129,22,105,22,109,22,85,27, -89,27,93,27,97,27,101,27,105,27,81,27,129,22,105,22, -109,22,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -81,22,204,255,204,255,204,255,204,255,204,255,204,255,204,255, -204,255,204,255,81,22,81,22,204,255,204,255,204,255,204,255, -204,255,81,22,204,255,204,255,81,22,204,255,204,255,204,255, -204,255,204,255,81,22,81,22,81,22,81,22,81,22,204,255, -204,255,204,255,204,255,204,255,204,255,204,255,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,81,22,81,22,184,255, -184,255,184,255,184,255,184,255,184,255,184,255,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,9,251, -9,251,9,251,9,251,9,251,9,251,9,251,9,251,9,251, -9,251,9,251,9,251,9,251,9,251,9,251,9,251,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,204,255,204,255,204,255, -204,255,204,255,204,255,14,254,1,0,81,22,81,22,81,22, -81,22,50,63,55,63,71,63,83,63,81,22,158,63,95,63, -75,63,115,63,162,63,139,63,143,63,147,63,151,63,99,63, -123,63,131,63,107,63,135,63,91,63,103,63,63,63,67,63, -79,63,87,63,111,63,119,63,127,63,31,65,223,54,35,65, -39,65,81,22,55,63,71,63,81,22,155,63,81,22,81,22, -75,63,81,22,162,63,139,63,143,63,147,63,151,63,99,63, -123,63,131,63,107,63,135,63,81,22,103,63,63,63,67,63, -79,63,81,22,111,63,81,22,127,63,81,22,81,22,81,22, -81,22,71,63,81,22,81,22,81,22,81,22,75,63,81,22, -162,63,81,22,143,63,81,22,151,63,99,63,123,63,81,22, -107,63,135,63,81,22,103,63,81,22,81,22,79,63,81,22, -111,63,81,22,127,63,81,22,223,54,81,22,39,65,81,22, -55,63,71,63,81,22,155,63,81,22,81,22,75,63,115,63, -162,63,139,63,81,22,147,63,151,63,99,63,123,63,131,63, -107,63,135,63,81,22,103,63,63,63,67,63,79,63,81,22, -111,63,119,63,127,63,31,65,81,22,35,65,81,22,50,63, -55,63,71,63,83,63,155,63,158,63,95,63,75,63,115,63, -162,63,81,22,143,63,147,63,151,63,99,63,123,63,131,63, -107,63,135,63,91,63,103,63,63,63,67,63,79,63,87,63, -111,63,119,63,127,63,81,22,81,22,81,22,81,22,55,63, -71,63,83,63,81,22,158,63,95,63,75,63,115,63,162,63, -81,22,143,63,147,63,151,63,99,63,123,63,81,22,43,65, -49,65,55,65,61,65,67,65,73,65,79,65,85,65,91,65, -97,65,1,0,1,0,81,22,81,22,81,22,93,30,101,30, -109,30,117,30,125,30,133,30,141,30,149,30,157,30,165,30, -103,65,50,26,74,23,248,43,110,65,1,0,189,27,74,23, -184,22,242,25,246,25,14,26,78,23,126,23,82,23,126,26, -116,65,182,43,122,65,148,26,128,65,136,65,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -142,65,148,65,154,65,81,22,81,22,81,22,160,65,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0, -1,0,1,0,1,0,1,0,1,0,1,0,166,65,172,65, -248,38,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,235,31,179,65, -183,65,188,82,7,31,187,65,191,65,135,35,195,65,199,65, -203,65,173,49,207,65,211,65,215,65,219,65,223,65,227,65, -123,32,231,65,235,65,239,65,243,65,247,65,251,65,239,30, -103,35,255,65,229,37,115,35,233,37,3,66,91,33,7,66, -11,66,15,66,19,66,23,66,161,37,19,32,27,66,31,66, -35,66,39,66,81,22,81,22,81,22,81,22,43,66,51,66, -59,66,67,66,75,66,83,66,91,66,99,66,107,66,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,115,66,119,66, -81,22,81,22,81,22,81,22,81,22,81,22,81,22,81,22, -81,22,81,22,81,22,81,22,81,22,81,22,1,0,1,0, -81,22,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -1,0,1,0,1,0,1,0,1,0,1,0,81,22,81,22, -81,22,1,0,1,0,1,0,1,0,81,22,81,22,81,22, -81,22,1,0,1,0,1,0,81,22,81,22,81,22,81,22, -81,22,123,66,127,66,131,66,135,66,141,66,33,51,145,66, -149,66,153,66,157,66,37,51,161,66,165,66,169,66,41,51, -175,66,179,66,183,66,187,66,193,66,197,66,215,65,201,66, -207,66,211,66,215,66,219,66,7,52,223,66,47,31,229,66, -233,66,237,66,241,66,31,66,245,66,249,66,27,52,45,51, -49,51,31,52,253,66,1,67,85,48,5,67,53,51,9,67, -13,67,17,67,21,67,21,67,21,67,25,67,31,67,35,67, -39,67,43,67,49,67,53,67,57,67,61,67,65,67,69,67, -73,67,77,67,81,67,85,67,89,67,93,67,97,67,97,67, -39,52,101,67,105,67,109,67,113,67,61,51,117,67,121,67, -125,67,157,50,129,67,133,67,137,67,141,67,145,67,149,67, -153,67,157,67,161,67,167,67,171,67,175,67,187,65,179,67, -183,67,187,67,193,67,199,67,203,67,207,67,211,67,215,67, -219,67,81,22,223,67,227,67,227,67,231,67,237,67,241,67, -69,48,245,67,249,67,255,67,3,68,81,22,151,31,7,68, -11,68,159,31,15,68,19,68,23,68,29,68,33,68,39,68, -43,68,47,68,51,68,55,68,59,68,63,68,67,68,71,68, -75,68,79,68,83,68,89,68,93,68,97,68,101,68,117,47, -105,68,199,31,111,68,111,68,117,68,121,68,121,68,125,68, -129,68,135,68,141,68,145,68,149,68,153,68,157,68,161,68, -165,68,169,68,173,68,177,68,81,51,181,68,187,68,191,68, -195,68,87,52,195,68,199,68,89,51,203,68,207,68,211,68, -215,68,93,51,9,47,219,68,223,68,227,68,231,68,235,68, -239,68,243,68,249,68,253,68,1,69,5,69,9,69,13,69, -19,69,23,69,27,69,31,69,35,69,39,69,43,69,47,69, -51,69,97,51,55,69,59,69,65,69,69,69,73,69,77,69, -105,51,81,69,85,69,89,69,93,69,97,69,101,69,105,69, -109,69,121,47,119,52,113,69,117,69,121,69,125,69,131,69, -135,69,139,69,143,69,109,51,147,69,153,69,157,69,161,69, -37,53,165,69,169,69,173,69,177,69,181,69,187,69,191,69, -195,69,199,69,205,69,209,69,213,69,217,69,137,48,221,69, -225,69,231,69,237,69,243,69,247,69,253,69,1,70,5,70, -9,70,13,70,113,51,213,49,17,70,21,70,25,70,29,70, -35,70,39,70,43,70,47,70,131,52,51,70,55,70,61,70, -65,70,69,70,75,70,81,70,85,70,135,52,89,70,93,70, -97,70,101,70,105,70,109,70,113,70,119,70,123,70,129,70, -81,22,133,70,143,52,137,70,141,70,147,70,151,70,155,70, -161,70,167,70,171,70,175,70,179,70,183,70,183,70,187,70, -191,70,151,52,195,70,199,70,203,70,207,70,211,70,217,70, -221,70,81,48,227,70,233,70,237,70,243,70,249,70,255,70, -3,71,175,52,7,71,13,71,19,71,25,71,31,71,35,71, -35,71,179,52,45,53,39,71,43,71,47,71,51,71,57,71, -189,47,187,52,61,71,65,71,153,51,71,71,77,71,237,50, -83,71,87,71,165,51,91,71,95,71,99,71,105,71,105,71, -81,22,111,71,115,71,121,71,125,71,129,71,133,71,139,71, -143,71,147,71,151,71,155,71,159,71,165,71,169,71,173,71, -177,71,181,71,185,71,189,71,195,71,201,71,205,71,211,71, -215,71,221,71,225,71,189,51,229,71,235,71,241,71,245,71, -251,71,255,71,5,72,9,72,13,72,17,72,21,72,25,72, -29,72,35,72,41,72,47,72,117,68,53,72,57,72,61,72, -65,72,69,72,73,72,77,72,81,72,85,72,89,72,93,72, -97,72,149,48,103,72,107,72,111,72,115,72,119,72,123,72, -201,51,127,72,131,72,135,72,139,72,143,72,149,72,155,72, -161,72,165,72,169,72,173,72,177,72,183,72,187,72,193,72, -197,72,201,72,207,72,213,72,217,72,169,47,221,72,225,72, -229,72,233,72,237,72,241,72,215,52,245,72,249,72,253,72, -81,22,1,73,5,73,9,73,13,73,47,33,17,73,23,73, -27,73,31,73,35,73,39,73,43,73,49,73,55,73,59,73, -63,73,235,52,239,52,75,33,67,73,73,73,77,73,81,73, -85,73,89,73,95,73,101,73,105,73,109,73,113,73,119,73, -243,52,123,73,129,73,135,73,139,73,143,73,147,73,153,73, -157,73,161,73,165,73,169,73,173,73,177,73,181,73,187,73, -191,73,195,73,199,73,205,73,209,73,213,73,217,73,221,73, -227,73,233,73,237,73,241,73,245,73,251,73,255,73,11,53, -11,53,5,74,9,74,15,74,19,74,23,74,27,74,31,74, -35,74,39,74,43,74,15,53,49,74,53,74,57,74,61,74, -65,74,69,74,75,74,79,74,85,74,91,74,11,34,97,74, -27,34,101,74,105,74,109,74,113,74,47,34,117,74,81,22, -81,22,1,0,255,255,255,255,112,134,220,68,112,134,192,68, -112,134,222,68,0,6,192,1,2,6,194,1,4,6,197,1, -6,6,198,1,8,6,2,2,12,6,7,2,14,6,79,4, -16,6,201,1,18,6,70,61,20,6,203,1,24,6,156,3, -30,6,2,4,34,6,6,4,70,6,67,61,74,6,2,60, -80,134,10,2,14,6,6,60,70,6,10,60,98,134,14,60, -2,6,14,2,4,6,18,2,14,6,22,2,24,6,26,2, -78,134,207,1,14,6,22,60,24,6,30,2,70,6,26,60, -78,6,34,60,90,6,38,60,98,134,30,60,0,6,208,1, -2,6,210,1,4,6,213,1,6,6,122,61,8,6,39,2, -12,6,42,2,14,6,46,2,16,6,214,1,18,6,118,61, -24,6,54,2,30,6,10,4,34,6,14,4,70,6,115,61, -78,6,83,4,80,6,50,2,90,6,50,60,96,134,54,60, -14,134,62,60,2,6,234,3,4,6,58,2,8,6,66,60, -12,6,62,2,14,6,66,2,24,6,206,3,78,134,70,2, -4,6,74,2,14,6,70,60,16,6,78,60,24,6,62,4, -70,6,74,60,78,6,82,60,92,6,86,60,98,134,44,61, -0,6,216,1,2,6,218,1,4,6,220,1,6,6,82,2, -8,6,86,2,12,6,90,2,16,6,223,1,18,6,146,61, -24,6,160,3,30,6,18,4,34,6,22,4,70,6,150,61, -80,6,94,2,96,134,90,60,4,6,106,2,24,134,224,3, -2,6,98,60,24,6,210,3,70,6,102,60,78,6,110,2, -98,134,106,60,2,6,116,2,24,6,124,2,70,6,111,60, -78,6,120,2,90,6,122,60,98,134,118,60,2,6,126,60, -14,6,130,60,70,134,134,60,0,6,242,3,2,6,136,2, -6,6,226,1,14,6,138,60,24,6,144,2,70,6,142,60, -78,6,140,2,90,6,150,60,98,134,146,60,0,6,228,1, -2,6,230,1,4,6,233,1,6,6,235,1,8,6,155,2, -12,6,158,2,14,6,95,4,16,6,237,1,18,6,158,61, -22,6,162,2,24,6,164,3,30,6,26,4,34,6,30,4, -54,6,67,3,70,6,155,61,80,134,215,3,2,6,170,60, -14,134,174,60,2,6,170,2,14,6,178,60,24,6,178,2, -30,6,34,4,34,6,38,4,70,6,183,60,78,6,174,2, -98,134,190,60,2,6,183,2,4,6,186,2,14,6,194,60, -24,6,195,2,70,6,199,60,76,6,50,4,78,134,190,2, -14,6,214,60,16,6,46,61,24,6,202,2,70,6,218,60, -76,6,54,4,78,6,198,2,90,6,226,60,98,134,222,60, -0,6,242,1,2,6,244,1,4,6,246,1,6,6,211,2, -8,6,215,2,12,6,218,2,16,6,249,1,18,6,206,61, -20,6,222,2,22,6,226,2,24,6,168,3,30,6,42,4, -34,6,46,4,54,6,97,3,70,6,202,61,72,6,230,60, -80,6,230,2,90,6,238,60,96,134,234,60,6,6,250,60, -70,134,254,60,0,6,2,61,2,6,6,61,4,6,234,2, -14,6,14,61,16,6,10,61,20,6,48,61,70,134,18,61, -14,6,22,61,16,134,26,61,0,6,230,61,2,6,250,1, -4,6,238,2,6,6,242,61,8,6,102,4,14,6,30,61, -16,6,254,1,18,6,238,61,20,6,50,61,70,134,234,61, -2,6,244,2,4,6,34,61,14,6,248,2,24,6,252,2, -70,6,38,61,98,134,42,61,2,6,250,3,8,134,198,3, -2,134,254,3,24,134,222,3,0,6,224,62,2,6,88,7, -8,6,98,63,12,6,96,63,38,6,1,62,40,6,3,62, -132,134,108,63,0,6,228,62,2,6,90,7,38,6,33,62, -40,134,35,62,0,6,232,62,2,6,92,7,38,6,65,62, -40,6,67,62,132,134,140,63,0,6,236,62,2,6,94,7, -8,6,162,63,12,6,160,63,16,6,149,7,38,6,97,62, -40,6,99,62,132,134,172,63,0,6,240,62,2,6,152,7, -38,6,129,62,40,134,131,62,38,6,200,63,40,134,202,63, -0,6,244,62,2,6,154,7,8,6,194,63,12,6,192,63, -16,6,151,7,38,6,161,62,40,6,163,62,132,134,204,63, -0,6,248,62,2,6,156,7,38,6,193,62,40,6,195,62, -132,134,236,63,12,6,162,9,16,134,166,9,2,134,166,8, -0,6,160,8,12,6,174,9,16,134,162,8,12,6,132,9, -16,134,186,9,16,134,190,9,0,6,186,8,8,6,198,9, -12,6,114,8,16,134,202,9,2,134,184,8,16,134,206,9, -8,6,222,9,12,6,188,8,16,6,226,9,22,134,230,9, -16,134,234,9,16,134,242,9,16,134,218,9,16,134,174,8, -30,134,238,8,16,134,182,9,16,134,214,9,166,12,68,12, -168,12,70,12,170,140,74,12,168,140,72,12,168,140,76,12, -168,140,132,13,168,140,166,13,168,140,128,13,120,146,82,18, -120,146,98,18,120,146,104,18,124,19,150,19,174,147,152,19, -124,22,150,22,172,22,144,22,174,150,152,22,174,151,40,23, -124,23,148,23,174,151,152,23,124,151,150,23,172,152,144,24, -170,153,128,25,132,25,149,25,170,25,142,25,172,153,144,25, -124,26,148,26,174,154,152,26,124,154,150,26,148,27,180,27, -158,27,185,27,190,155,188,27,92,160,76,32,106,182,12,54, -106,182,16,54,106,182,20,54,106,182,24,54,106,182,28,54, -106,182,36,54,106,182,118,54,106,182,122,54,106,182,128,54, -106,182,130,54,106,182,134,54,112,134,52,67,112,134,54,67, -112,134,92,67,112,134,154,67,112,134,158,67,112,134,156,67, -112,134,8,68,112,134,18,68,112,134,24,68,112,134,72,68, -112,134,76,68,112,134,130,68,112,134,136,68,112,134,142,68, -112,134,146,68,112,134,218,68,112,134,196,68,112,134,224,68, -112,134,226,68,112,134,232,68,112,134,234,68,112,134,240,68, -112,134,242,68,112,134,0,69,112,134,2,69,112,134,192,69, -112,134,194,69,112,134,8,69,112,134,10,69,112,134,16,69, -112,134,18,69,112,134,196,69,112,134,198,69,112,134,88,69, -112,134,90,69,112,134,92,69,112,134,94,69,112,134,212,69, -112,134,214,69,112,134,216,69,112,134,218,69,50,225,40,97, -50,225,152,96,50,225,156,96,50,225,160,96,50,225,164,96, -50,225,168,96,50,225,172,96,50,225,176,96,50,225,180,96, -50,225,184,96,50,225,188,96,50,225,192,96,50,225,196,96, -50,225,202,96,50,225,206,96,50,225,210,96,50,97,224,96, -52,225,226,96,50,97,230,96,52,225,232,96,50,97,236,96, -52,225,238,96,50,97,242,96,52,225,244,96,50,97,248,96, -52,225,250,96,50,225,60,97,50,225,232,97,50,225,88,97, -50,225,92,97,50,225,96,97,50,225,100,97,50,225,104,97, -50,225,108,97,50,225,112,97,50,225,116,97,50,225,120,97, -50,225,124,97,50,225,128,97,50,225,132,97,50,225,138,97, -50,225,142,97,50,225,146,97,50,97,160,97,52,225,162,97, -50,97,166,97,52,225,168,97,50,97,172,97,52,225,174,97, -50,97,178,97,52,225,180,97,50,97,184,97,52,225,186,97, -50,225,238,97,50,225,240,97,50,225,242,97,50,225,244,97, -50,225,252,97,137,180,130,46,52,33,137,180,130,46,56,33, -137,180,130,46,86,33,137,180,194,73,92,34,137,180,194,73, -94,34,137,52,130,207,150,38,137,180,194,213,152,38,139,52, -2,44,120,41,139,52,130,46,118,41,139,180,66,47,124,41, -139,180,194,107,116,43,139,180,194,107,118,43,2,0,2,230, -97,0,2,3,0,6,78,61,2,6,74,61,6,6,86,61, -18,134,82,61,2,230,97,0,8,3,8,134,190,3,2,230, -97,0,10,3,2,134,246,3,2,202,99,0,39,3,2,134, -18,60,2,230,101,0,2,3,0,6,130,61,2,6,126,61, -6,6,138,61,18,134,134,61,2,230,105,0,8,3,2,134, -94,60,2,230,111,0,2,3,0,6,166,61,2,6,162,61, -6,6,174,61,18,134,170,61,2,230,111,0,3,3,2,6, -154,60,8,6,90,4,16,134,158,60,2,230,111,0,8,3, -8,134,86,4,2,230,117,0,8,3,0,6,184,3,2,6, -176,3,8,6,172,3,24,134,180,3,2,230,97,0,6,3, -0,6,98,61,2,6,94,61,6,6,106,61,18,134,102,61, -2,230,101,0,4,3,0,6,42,60,2,134,46,60,2,230, -111,0,4,3,0,6,162,60,2,134,166,60,2,230,115,0, -1,3,14,134,202,60,2,230,115,0,12,3,14,134,206,60, -2,230,117,0,3,3,2,134,242,60,2,230,117,0,4,3, -16,134,246,60,2,216,111,0,27,3,0,6,186,61,2,6, -182,61,6,6,194,61,18,6,190,61,70,134,198,61,2,216, -117,0,27,3,0,6,214,61,2,6,210,61,6,6,222,61, -18,6,218,61,70,134,226,61,2,202,111,0,40,3,8,134, -218,3,2,230,97,0,7,3,8,134,194,3,2,202,101,0, -39,3,12,134,58,60,2,230,111,0,7,3,8,134,98,4, -2,230,185,3,8,3,0,6,164,63,2,6,32,7,132,134, -174,63,2,230,197,3,8,3,0,6,196,63,2,6,96,7, -132,134,206,63,2,0,198,12,194,12,170,153,150,25,2,0, -217,13,207,13,148,155,186,27,2,220,108,0,35,3,8,134, -114,60,2,220,114,0,35,3,8,134,186,60,2,220,115,0, -35,3,14,134,210,60,2,220,97,0,35,3,4,6,90,61, -12,134,110,61,2,220,101,0,35,3,4,134,142,61,2,220, -111,0,35,3,4,134,178,61,2,230,177,3,19,3,0,6, -4,62,2,6,8,62,132,134,12,62,2,230,177,3,20,3, -0,6,6,62,2,6,10,62,132,134,14,62,2,230,181,3, -19,3,0,6,36,62,2,134,40,62,2,230,181,3,20,3, -0,6,38,62,2,134,42,62,2,230,183,3,19,3,0,6, -68,62,2,6,72,62,132,134,76,62,2,230,183,3,20,3, -0,6,70,62,2,6,74,62,132,134,78,62,2,230,185,3, -19,3,0,6,100,62,2,6,104,62,132,134,108,62,2,230, -185,3,20,3,0,6,102,62,2,6,106,62,132,134,110,62, -2,230,191,3,19,3,0,6,132,62,2,134,136,62,2,230, -191,3,20,3,0,6,134,62,2,134,138,62,2,230,197,3, -19,3,0,6,164,62,2,6,168,62,132,134,172,62,2,230, -197,3,20,3,0,6,166,62,2,6,170,62,132,134,174,62, -2,230,201,3,19,3,0,6,196,62,2,6,200,62,132,134, -204,62,2,230,201,3,20,3,0,6,198,62,2,6,202,62, -132,134,206,62,3,0,2,230,97,0,0,3,2,230,97,0, -1,3,2,230,97,0,3,3,2,230,101,0,0,3,2,230, -101,0,1,3,2,230,101,0,8,3,2,230,105,0,0,3, -2,230,105,0,1,3,2,230,105,0,2,3,2,230,110,0, -3,3,2,230,111,0,0,3,2,230,111,0,1,3,2,230, -117,0,0,3,2,230,117,0,1,3,2,230,117,0,2,3, -2,230,121,0,1,3,2,230,121,0,8,3,2,230,97,0, -4,3,2,202,97,0,40,3,2,230,99,0,1,3,2,230, -99,0,2,3,2,230,99,0,7,3,2,230,99,0,12,3, -2,230,100,0,12,3,2,230,101,0,6,3,2,230,101,0, -7,3,2,202,101,0,40,3,2,230,101,0,12,3,2,230, -103,0,2,3,2,230,103,0,6,3,2,230,103,0,7,3, -2,202,103,0,39,3,2,230,104,0,2,3,2,230,105,0, -3,3,2,230,105,0,4,3,2,230,105,0,6,3,2,202, -105,0,40,3,2,230,106,0,2,3,2,202,107,0,39,3, -2,230,108,0,1,3,2,202,108,0,39,3,2,230,108,0, -12,3,2,230,110,0,1,3,2,202,110,0,39,3,2,230, -110,0,12,3,2,230,111,0,6,3,2,230,111,0,11,3, -2,230,114,0,1,3,2,202,114,0,39,3,2,230,114,0, -12,3,2,230,115,0,2,3,2,202,115,0,39,3,2,202, -116,0,39,3,2,230,116,0,12,3,2,230,117,0,6,3, -2,230,117,0,10,3,2,230,117,0,11,3,2,202,117,0, -40,3,2,230,119,0,2,3,2,230,121,0,2,3,2,230, -122,0,1,3,2,230,122,0,7,3,2,230,122,0,12,3, -2,230,97,0,12,3,2,230,105,0,12,3,2,230,111,0, -12,3,2,230,117,0,12,3,252,0,67,230,117,0,8,3, -4,3,252,0,67,230,117,0,8,3,1,3,252,0,67,230, -117,0,8,3,12,3,252,0,67,230,117,0,8,3,0,3, -228,0,67,230,97,0,8,3,4,3,39,2,67,230,97,0, -7,3,4,3,2,230,230,0,4,3,2,230,103,0,12,3, -2,230,107,0,12,3,235,1,67,230,111,0,40,3,4,3, -2,230,146,2,12,3,2,230,106,0,12,3,2,230,103,0, -1,3,2,230,110,0,0,3,229,0,67,230,97,0,10,3, -1,3,2,230,230,0,1,3,2,230,248,0,1,3,2,230, -97,0,15,3,2,230,97,0,17,3,2,230,101,0,15,3, -2,230,101,0,17,3,2,230,105,0,15,3,2,230,105,0, -17,3,2,230,111,0,15,3,2,230,111,0,17,3,2,230, -114,0,15,3,2,230,114,0,17,3,2,230,117,0,15,3, -2,230,117,0,17,3,2,220,115,0,38,3,2,220,116,0, -38,3,2,230,104,0,12,3,246,0,67,230,111,0,8,3, -4,3,245,0,67,230,111,0,3,3,4,3,47,2,67,230, -111,0,7,3,4,3,2,230,121,0,4,3,202,3,67,230, -185,3,8,3,1,3,2,230,177,3,1,3,2,230,181,3, -1,3,2,230,183,3,1,3,2,230,185,3,1,3,203,3, -67,230,197,3,8,3,1,3,2,230,191,3,1,3,2,230, -197,3,1,3,2,230,201,3,1,3,2,230,56,4,6,3, -2,230,53,4,0,3,2,230,53,4,8,3,2,230,51,4, -1,3,2,230,86,4,8,3,2,230,58,4,1,3,2,230, -56,4,0,3,2,230,67,4,6,3,2,230,117,4,15,3, -2,230,54,4,6,3,2,230,48,4,6,3,2,230,48,4, -8,3,2,230,53,4,6,3,2,230,217,4,8,3,2,230, -54,4,8,3,2,230,55,4,8,3,2,230,56,4,4,3, -2,230,56,4,8,3,2,230,62,4,8,3,2,230,233,4, -8,3,2,230,77,4,8,3,2,230,67,4,4,3,2,230, -67,4,8,3,2,230,67,4,11,3,2,230,71,4,8,3, -2,230,75,4,8,3,2,230,39,6,83,6,2,230,39,6, -84,6,2,230,72,6,84,6,2,220,39,6,85,6,2,230, -74,6,84,6,2,230,213,6,84,6,2,230,193,6,84,6, -2,230,210,6,84,6,2,7,40,9,60,9,2,7,48,9, -60,9,2,7,51,9,60,9,2,0,199,9,190,9,2,0, -199,9,215,9,2,0,71,11,86,11,2,0,71,11,62,11, -2,0,71,11,87,11,2,0,146,11,215,11,2,0,198,11, -190,11,2,0,199,11,190,11,2,0,198,11,215,11,2,91, -70,12,86,12,2,0,191,12,213,12,2,0,198,12,213,12, -2,0,198,12,214,12,202,12,67,0,198,12,194,12,213,12, -2,0,70,13,62,13,2,0,71,13,62,13,2,0,70,13, -87,13,2,9,217,13,202,13,220,13,67,9,217,13,207,13, -202,13,2,0,217,13,223,13,2,0,37,16,46,16,2,0, -5,27,53,27,2,0,7,27,53,27,2,0,9,27,53,27, -2,0,11,27,53,27,2,0,13,27,53,27,2,0,17,27, -53,27,2,0,58,27,53,27,2,0,60,27,53,27,2,0, -62,27,53,27,2,0,63,27,53,27,2,0,66,27,53,27, -2,220,97,0,37,3,2,230,98,0,7,3,2,220,98,0, -35,3,2,220,98,0,49,3,231,0,67,230,99,0,39,3, -1,3,2,230,100,0,7,3,2,220,100,0,35,3,2,220, -100,0,49,3,2,202,100,0,39,3,2,220,100,0,45,3, -19,1,67,230,101,0,4,3,0,3,19,1,67,230,101,0, -4,3,1,3,2,220,101,0,45,3,2,220,101,0,48,3, -41,2,67,230,101,0,39,3,6,3,2,230,102,0,7,3, -2,230,103,0,4,3,2,230,104,0,7,3,2,220,104,0, -35,3,2,230,104,0,8,3,2,202,104,0,39,3,2,220, -104,0,46,3,2,220,105,0,48,3,239,0,67,230,105,0, -8,3,1,3,2,230,107,0,1,3,2,220,107,0,35,3, -2,220,107,0,49,3,55,30,67,230,108,0,35,3,4,3, -2,220,108,0,49,3,2,220,108,0,45,3,2,230,109,0, -1,3,2,230,109,0,7,3,2,220,109,0,35,3,2,230, -110,0,7,3,2,220,110,0,35,3,2,220,110,0,49,3, -2,220,110,0,45,3,245,0,67,230,111,0,3,3,1,3, -245,0,67,230,111,0,3,3,8,3,77,1,67,230,111,0, -4,3,0,3,77,1,67,230,111,0,4,3,1,3,2,230, -112,0,1,3,2,230,112,0,7,3,2,230,114,0,7,3, -91,30,67,230,114,0,35,3,4,3,2,220,114,0,49,3, -2,230,115,0,7,3,91,1,67,230,115,0,1,3,7,3, -97,1,67,230,115,0,12,3,7,3,99,30,67,230,115,0, -35,3,7,3,2,230,116,0,7,3,2,220,116,0,35,3, -2,220,116,0,49,3,2,220,116,0,45,3,2,220,117,0, -36,3,2,220,117,0,48,3,2,220,117,0,45,3,105,1, -67,230,117,0,3,3,1,3,107,1,67,230,117,0,4,3, -8,3,2,230,118,0,3,3,2,220,118,0,35,3,2,230, -119,0,0,3,2,230,119,0,1,3,2,230,119,0,8,3, -2,230,119,0,7,3,2,220,119,0,35,3,2,230,120,0, -7,3,2,230,120,0,8,3,2,230,121,0,7,3,2,230, -122,0,2,3,2,220,122,0,35,3,2,220,122,0,49,3, -2,220,104,0,49,3,2,230,116,0,8,3,2,230,119,0, -10,3,2,230,121,0,10,3,2,230,97,0,9,3,226,0, -67,230,97,0,2,3,1,3,226,0,67,230,97,0,2,3, -0,3,226,0,67,230,97,0,2,3,9,3,226,0,67,230, -97,0,2,3,3,3,161,30,67,230,97,0,35,3,2,3, -3,1,67,230,97,0,6,3,1,3,3,1,67,230,97,0, -6,3,0,3,3,1,67,230,97,0,6,3,9,3,3,1, -67,230,97,0,6,3,3,3,161,30,67,230,97,0,35,3, -6,3,2,230,101,0,9,3,2,230,101,0,3,3,234,0, -67,230,101,0,2,3,1,3,234,0,67,230,101,0,2,3, -0,3,234,0,67,230,101,0,2,3,9,3,234,0,67,230, -101,0,2,3,3,3,185,30,67,230,101,0,35,3,2,3, -2,230,105,0,9,3,2,220,105,0,35,3,2,230,111,0, -9,3,244,0,67,230,111,0,2,3,1,3,244,0,67,230, -111,0,2,3,0,3,244,0,67,230,111,0,2,3,9,3, -244,0,67,230,111,0,2,3,3,3,205,30,67,230,111,0, -35,3,2,3,161,1,67,230,111,0,27,3,1,3,161,1, -67,230,111,0,27,3,0,3,161,1,67,230,111,0,27,3, -9,3,161,1,67,230,111,0,27,3,3,3,161,1,67,220, -111,0,27,3,35,3,2,220,117,0,35,3,2,230,117,0, -9,3,176,1,67,230,117,0,27,3,1,3,176,1,67,230, -117,0,27,3,0,3,176,1,67,230,117,0,27,3,9,3, -176,1,67,230,117,0,27,3,3,3,176,1,67,220,117,0, -27,3,35,3,2,230,121,0,0,3,2,220,121,0,35,3, -2,230,121,0,9,3,2,230,121,0,3,3,0,31,67,230, -177,3,19,3,0,3,1,31,67,230,177,3,20,3,0,3, -0,31,67,230,177,3,19,3,1,3,1,31,67,230,177,3, -20,3,1,3,0,31,67,230,177,3,19,3,66,3,1,31, -67,230,177,3,20,3,66,3,16,31,67,230,181,3,19,3, -0,3,17,31,67,230,181,3,20,3,0,3,16,31,67,230, -181,3,19,3,1,3,17,31,67,230,181,3,20,3,1,3, -32,31,67,230,183,3,19,3,0,3,33,31,67,230,183,3, -20,3,0,3,32,31,67,230,183,3,19,3,1,3,33,31, -67,230,183,3,20,3,1,3,32,31,67,230,183,3,19,3, -66,3,33,31,67,230,183,3,20,3,66,3,48,31,67,230, -185,3,19,3,0,3,49,31,67,230,185,3,20,3,0,3, -48,31,67,230,185,3,19,3,1,3,49,31,67,230,185,3, -20,3,1,3,48,31,67,230,185,3,19,3,66,3,49,31, -67,230,185,3,20,3,66,3,64,31,67,230,191,3,19,3, -0,3,65,31,67,230,191,3,20,3,0,3,64,31,67,230, -191,3,19,3,1,3,65,31,67,230,191,3,20,3,1,3, -80,31,67,230,197,3,19,3,0,3,81,31,67,230,197,3, -20,3,0,3,80,31,67,230,197,3,19,3,1,3,81,31, -67,230,197,3,20,3,1,3,80,31,67,230,197,3,19,3, -66,3,81,31,67,230,197,3,20,3,66,3,96,31,67,230, -201,3,19,3,0,3,97,31,67,230,201,3,20,3,0,3, -96,31,67,230,201,3,19,3,1,3,97,31,67,230,201,3, -20,3,1,3,96,31,67,230,201,3,19,3,66,3,97,31, -67,230,201,3,20,3,66,3,2,230,177,3,0,3,2,230, -181,3,0,3,2,230,183,3,0,3,2,230,185,3,0,3, -2,230,191,3,0,3,2,230,197,3,0,3,2,230,201,3, -0,3,2,230,177,3,6,3,2,230,177,3,4,3,2,230, -177,3,66,3,2,230,183,3,66,3,2,230,185,3,6,3, -2,230,185,3,4,3,202,3,67,230,185,3,8,3,0,3, -2,230,185,3,66,3,202,3,67,230,185,3,8,3,66,3, -2,230,197,3,6,3,2,230,197,3,4,3,203,3,67,230, -197,3,8,3,0,3,2,230,193,3,19,3,2,230,193,3, -20,3,2,230,197,3,66,3,203,3,67,230,197,3,8,3, -66,3,2,230,201,3,66,3,2,1,144,33,56,3,2,1, -146,33,56,3,2,1,148,33,56,3,2,1,208,33,56,3, -2,1,212,33,56,3,2,1,210,33,56,3,2,1,3,34, -56,3,2,1,8,34,56,3,2,1,11,34,56,3,2,1, -35,34,56,3,2,1,37,34,56,3,2,1,60,34,56,3, -2,1,67,34,56,3,2,1,69,34,56,3,2,1,72,34, -56,3,2,1,61,0,56,3,2,1,97,34,56,3,2,1, -77,34,56,3,2,1,60,0,56,3,2,1,62,0,56,3, -2,1,100,34,56,3,2,1,101,34,56,3,2,1,114,34, -56,3,2,1,115,34,56,3,2,1,118,34,56,3,2,1, -119,34,56,3,2,1,122,34,56,3,2,1,123,34,56,3, -2,1,130,34,56,3,2,1,131,34,56,3,2,1,134,34, -56,3,2,1,135,34,56,3,2,1,162,34,56,3,2,1, -168,34,56,3,2,1,169,34,56,3,2,1,171,34,56,3, -2,1,124,34,56,3,2,1,125,34,56,3,2,1,145,34, -56,3,2,1,146,34,56,3,2,1,178,34,56,3,2,1, -179,34,56,3,2,1,180,34,56,3,2,1,181,34,56,3, -2,8,75,48,153,48,2,8,77,48,153,48,2,8,79,48, -153,48,2,8,81,48,153,48,2,8,83,48,153,48,2,8, -85,48,153,48,2,8,87,48,153,48,2,8,89,48,153,48, -2,8,91,48,153,48,2,8,93,48,153,48,2,8,95,48, -153,48,2,8,97,48,153,48,2,8,100,48,153,48,2,8, -102,48,153,48,2,8,104,48,153,48,2,8,111,48,153,48, -2,8,111,48,154,48,2,8,114,48,153,48,2,8,114,48, -154,48,2,8,117,48,153,48,2,8,117,48,154,48,2,8, -120,48,153,48,2,8,120,48,154,48,2,8,123,48,153,48, -2,8,123,48,154,48,2,8,70,48,153,48,2,8,157,48, -153,48,2,8,171,48,153,48,2,8,173,48,153,48,2,8, -175,48,153,48,2,8,177,48,153,48,2,8,179,48,153,48, -2,8,181,48,153,48,2,8,183,48,153,48,2,8,185,48, -153,48,2,8,187,48,153,48,2,8,189,48,153,48,2,8, -191,48,153,48,2,8,193,48,153,48,2,8,196,48,153,48, -2,8,198,48,153,48,2,8,200,48,153,48,2,8,207,48, -153,48,2,8,207,48,154,48,2,8,210,48,153,48,2,8, -210,48,154,48,2,8,213,48,153,48,2,8,213,48,154,48, -2,8,216,48,153,48,2,8,216,48,154,48,2,8,219,48, -153,48,2,8,219,48,154,48,2,8,166,48,153,48,2,8, -239,48,153,48,2,8,240,48,153,48,2,8,241,48,153,48, -2,8,242,48,153,48,2,8,253,48,153,48,4,7,4,216, -153,220,4,216,186,220,4,7,4,216,155,220,4,216,186,220, -4,7,4,216,165,220,4,216,186,220,4,0,4,216,49,221, -4,216,39,221,4,0,4,216,50,221,4,216,39,221,4,0, -4,216,71,223,4,216,62,223,4,0,4,216,71,223,4,216, -87,223,4,0,5,216,185,220,5,216,186,220,4,0,5,216, -185,220,5,216,176,220,4,0,5,216,185,220,5,216,189,220, -4,0,5,216,184,221,5,216,175,221,4,0,5,216,185,221, -5,216,175,221,1,0,253,255,1,0,32,0,2,230,32,0, -8,3,1,0,97,0,2,230,32,0,4,3,1,0,50,0, -1,0,51,0,2,230,32,0,1,3,1,0,188,3,2,202, -32,0,39,3,1,0,49,0,1,0,111,0,3,0,49,0, -68,32,52,0,3,0,49,0,68,32,50,0,3,0,51,0, -68,32,52,0,2,230,105,0,7,3,2,0,105,0,106,0, -2,0,108,0,183,0,2,0,188,2,110,0,1,0,115,0, -1,0,83,2,1,0,84,2,1,0,86,2,1,0,87,2, -1,0,221,1,1,0,89,2,1,0,91,2,1,0,96,2, -1,0,99,2,1,0,105,2,1,0,104,2,1,0,111,2, -1,0,114,2,1,0,117,2,1,0,128,2,1,0,131,2, -1,0,136,2,1,0,138,2,1,0,139,2,1,0,146,2, -2,0,108,0,106,0,2,0,110,0,106,0,2,0,100,0, -122,0,1,0,149,1,1,0,158,1,1,0,101,44,1,0, -154,1,1,0,102,44,1,0,128,1,1,0,137,2,1,0, -140,2,1,0,104,0,1,0,102,2,1,0,106,0,1,0, -114,0,1,0,119,0,1,0,121,0,2,230,32,0,6,3, -2,230,32,0,7,3,2,230,32,0,10,3,2,202,32,0, -40,3,2,230,32,0,3,3,2,230,32,0,11,3,1,0, -108,0,1,0,120,0,1,0,149,2,240,0,129,0,185,3, -1,0,185,2,2,0,32,0,185,3,1,0,59,0,1,0, -243,3,3,230,32,0,8,3,1,3,1,0,183,0,1,0, -123,3,1,0,124,3,1,0,125,3,1,0,82,4,1,0, -84,4,1,0,85,4,1,0,86,4,1,0,88,4,1,0, -89,4,1,0,90,4,1,0,91,4,1,0,95,4,2,0, -101,5,130,5,2,0,39,6,116,6,2,0,72,6,116,6, -2,0,199,6,116,6,2,0,74,6,116,6,2,7,21,9, -60,9,2,7,22,9,60,9,2,7,23,9,60,9,2,7, -28,9,60,9,2,7,33,9,60,9,2,7,34,9,60,9, -2,7,43,9,60,9,2,7,47,9,60,9,2,7,161,9, -188,9,2,7,162,9,188,9,2,7,175,9,188,9,2,7, -50,10,60,10,2,7,56,10,60,10,2,7,22,10,60,10, -2,7,23,10,60,10,2,7,28,10,60,10,2,7,43,10, -60,10,2,7,33,11,60,11,2,7,34,11,60,11,2,0, -77,14,50,14,2,0,205,14,178,14,2,0,171,14,153,14, -2,0,171,14,161,14,2,0,66,15,183,15,2,0,76,15, -183,15,2,0,81,15,183,15,2,0,86,15,183,15,2,0, -91,15,183,15,2,0,64,15,181,15,2,130,178,15,128,15, -3,130,178,15,113,15,128,15,2,130,179,15,128,15,3,130, -179,15,113,15,128,15,2,0,146,15,183,15,2,0,156,15, -183,15,2,0,161,15,183,15,2,0,166,15,183,15,2,0, -171,15,183,15,2,0,144,15,181,15,1,0,39,45,1,0, -45,45,1,0,50,4,1,0,52,4,1,0,62,4,1,0, -65,4,1,0,66,4,1,0,74,4,1,0,99,4,1,0, -75,166,1,0,208,16,1,0,209,16,1,0,210,16,1,0, -211,16,1,0,212,16,1,0,213,16,1,0,214,16,1,0, -215,16,1,0,216,16,1,0,217,16,1,0,218,16,1,0, -219,16,1,0,220,16,1,0,221,16,1,0,222,16,1,0, -223,16,1,0,224,16,1,0,225,16,1,0,226,16,1,0, -227,16,1,0,228,16,1,0,229,16,1,0,230,16,1,0, -231,16,1,0,232,16,1,0,233,16,1,0,234,16,1,0, -235,16,1,0,236,16,1,0,237,16,1,0,238,16,1,0, -239,16,1,0,240,16,1,0,241,16,1,0,242,16,1,0, -243,16,1,0,244,16,1,0,245,16,1,0,246,16,1,0, -247,16,1,0,248,16,1,0,249,16,1,0,250,16,1,0, -253,16,1,0,254,16,1,0,255,16,1,0,230,0,1,0, -98,0,1,0,100,0,1,0,101,0,1,0,103,0,1,0, -105,0,1,0,107,0,1,0,109,0,1,0,110,0,1,0, -35,2,1,0,112,0,1,0,116,0,1,0,117,0,1,0, -80,2,1,0,81,2,1,0,2,29,1,0,92,2,1,0, -75,1,1,0,118,0,1,0,178,3,1,0,179,3,1,0, -180,3,1,0,198,3,1,0,199,3,1,0,193,3,1,0, -61,4,1,0,82,2,1,0,99,0,1,0,85,2,1,0, -240,0,1,0,102,0,1,0,95,2,1,0,97,2,1,0, -101,2,1,0,106,2,1,0,157,2,1,0,109,2,1,0, -159,2,1,0,113,2,1,0,112,2,1,0,115,2,1,0, -116,2,1,0,120,2,1,0,130,2,1,0,171,1,1,0, -28,29,1,0,122,0,1,0,144,2,1,0,145,2,1,0, -184,3,2,0,97,0,190,2,2,0,115,0,115,0,2,0, -177,3,185,3,2,230,32,0,19,3,1,0,185,3,2,230, -32,0,66,3,3,230,32,0,8,3,66,3,2,0,183,3, -185,3,3,230,32,0,19,3,0,3,3,230,32,0,19,3, -1,3,3,230,32,0,19,3,66,3,3,230,32,0,20,3, -0,3,3,230,32,0,20,3,1,3,3,230,32,0,20,3, -66,3,3,230,32,0,8,3,0,3,1,0,96,0,2,0, -201,3,185,3,2,230,32,0,20,3,2,220,32,0,51,3, -2,0,50,32,50,32,3,0,50,32,50,32,50,32,2,0, -53,32,53,32,3,0,53,32,53,32,53,32,2,0,33,0, -33,0,2,230,32,0,5,3,2,0,63,0,63,0,2,0, -63,0,33,0,2,0,33,0,63,0,4,0,50,32,50,32, -50,32,50,32,1,0,48,0,1,0,52,0,1,0,53,0, -1,0,54,0,1,0,55,0,1,0,56,0,1,0,57,0, -1,0,43,0,1,0,18,34,1,0,61,0,1,0,40,0, -1,0,41,0,2,0,114,0,115,0,3,0,97,0,47,0, -99,0,3,0,97,0,47,0,115,0,2,0,176,0,99,0, -3,0,99,0,47,0,111,0,3,0,99,0,47,0,117,0, -2,0,176,0,102,0,1,0,39,1,2,0,110,0,111,0, -1,0,113,0,2,0,115,0,109,0,3,0,116,0,101,0, -108,0,2,0,116,0,109,0,1,0,201,3,1,0,208,5, -1,0,209,5,1,0,210,5,1,0,211,5,3,0,102,0, -97,0,120,0,1,0,192,3,1,0,17,34,3,0,49,0, -68,32,55,0,3,0,49,0,68,32,57,0,4,0,49,0, -68,32,49,0,48,0,3,0,49,0,68,32,51,0,3,0, -50,0,68,32,51,0,3,0,49,0,68,32,53,0,3,0, -50,0,68,32,53,0,3,0,51,0,68,32,53,0,3,0, -52,0,68,32,53,0,3,0,49,0,68,32,54,0,3,0, -53,0,68,32,54,0,3,0,49,0,68,32,56,0,3,0, -51,0,68,32,56,0,3,0,53,0,68,32,56,0,3,0, -55,0,68,32,56,0,2,0,49,0,68,32,2,0,105,0, -105,0,3,0,105,0,105,0,105,0,2,0,105,0,118,0, -2,0,118,0,105,0,3,0,118,0,105,0,105,0,4,0, -118,0,105,0,105,0,105,0,2,0,105,0,120,0,2,0, -120,0,105,0,3,0,120,0,105,0,105,0,3,0,48,0, -68,32,51,0,2,0,43,34,43,34,3,0,43,34,43,34, -43,34,2,0,46,34,46,34,3,0,46,34,46,34,46,34, -1,0,8,48,1,0,9,48,2,0,49,0,48,0,2,0, -49,0,49,0,2,0,49,0,50,0,2,0,49,0,51,0, -2,0,49,0,52,0,2,0,49,0,53,0,2,0,49,0, -54,0,2,0,49,0,55,0,2,0,49,0,56,0,2,0, -49,0,57,0,2,0,50,0,48,0,3,0,40,0,49,0, -41,0,3,0,40,0,50,0,41,0,3,0,40,0,51,0, -41,0,3,0,40,0,52,0,41,0,3,0,40,0,53,0, -41,0,3,0,40,0,54,0,41,0,3,0,40,0,55,0, -41,0,3,0,40,0,56,0,41,0,3,0,40,0,57,0, -41,0,4,0,40,0,49,0,48,0,41,0,4,0,40,0, -49,0,49,0,41,0,4,0,40,0,49,0,50,0,41,0, -4,0,40,0,49,0,51,0,41,0,4,0,40,0,49,0, -52,0,41,0,4,0,40,0,49,0,53,0,41,0,4,0, -40,0,49,0,54,0,41,0,4,0,40,0,49,0,55,0, -41,0,4,0,40,0,49,0,56,0,41,0,4,0,40,0, -49,0,57,0,41,0,4,0,40,0,50,0,48,0,41,0, -3,0,40,0,97,0,41,0,3,0,40,0,98,0,41,0, -3,0,40,0,99,0,41,0,3,0,40,0,100,0,41,0, -3,0,40,0,101,0,41,0,3,0,40,0,102,0,41,0, -3,0,40,0,103,0,41,0,3,0,40,0,104,0,41,0, -3,0,40,0,105,0,41,0,3,0,40,0,106,0,41,0, -3,0,40,0,107,0,41,0,3,0,40,0,108,0,41,0, -3,0,40,0,109,0,41,0,3,0,40,0,110,0,41,0, -3,0,40,0,111,0,41,0,3,0,40,0,112,0,41,0, -3,0,40,0,113,0,41,0,3,0,40,0,114,0,41,0, -3,0,40,0,115,0,41,0,3,0,40,0,116,0,41,0, -3,0,40,0,117,0,41,0,3,0,40,0,118,0,41,0, -3,0,40,0,119,0,41,0,3,0,40,0,120,0,41,0, -3,0,40,0,121,0,41,0,3,0,40,0,122,0,41,0, -4,0,43,34,43,34,43,34,43,34,3,0,58,0,58,0, -61,0,2,0,61,0,61,0,3,0,61,0,61,0,61,0, -2,1,221,42,56,3,1,0,107,2,1,0,125,29,1,0, -125,2,1,0,63,2,1,0,64,2,1,0,205,107,1,0, -159,159,1,0,0,78,1,0,40,78,1,0,54,78,1,0, -63,78,1,0,89,78,1,0,133,78,1,0,140,78,1,0, -160,78,1,0,186,78,1,0,63,81,1,0,101,81,1,0, -107,81,1,0,130,81,1,0,150,81,1,0,171,81,1,0, -224,81,1,0,245,81,1,0,0,82,1,0,155,82,1,0, -249,82,1,0,21,83,1,0,26,83,1,0,56,83,1,0, -65,83,1,0,92,83,1,0,105,83,1,0,130,83,1,0, -182,83,1,0,200,83,1,0,227,83,1,0,215,86,1,0, -31,87,1,0,235,88,1,0,2,89,1,0,10,89,1,0, -21,89,1,0,39,89,1,0,115,89,1,0,80,91,1,0, -128,91,1,0,248,91,1,0,15,92,1,0,34,92,1,0, -56,92,1,0,110,92,1,0,113,92,1,0,219,93,1,0, -229,93,1,0,241,93,1,0,254,93,1,0,114,94,1,0, -122,94,1,0,127,94,1,0,244,94,1,0,254,94,1,0, -11,95,1,0,19,95,1,0,80,95,1,0,97,95,1,0, -115,95,1,0,195,95,1,0,8,98,1,0,54,98,1,0, -75,98,1,0,47,101,1,0,52,101,1,0,135,101,1,0, -151,101,1,0,164,101,1,0,185,101,1,0,224,101,1,0, -229,101,1,0,240,102,1,0,8,103,1,0,40,103,1,0, -32,107,1,0,98,107,1,0,121,107,1,0,179,107,1,0, -203,107,1,0,212,107,1,0,219,107,1,0,15,108,1,0, -20,108,1,0,52,108,1,0,107,112,1,0,42,114,1,0, -54,114,1,0,59,114,1,0,63,114,1,0,71,114,1,0, -89,114,1,0,91,114,1,0,172,114,1,0,132,115,1,0, -137,115,1,0,220,116,1,0,230,116,1,0,24,117,1,0, -31,117,1,0,40,117,1,0,48,117,1,0,139,117,1,0, -146,117,1,0,118,118,1,0,125,118,1,0,174,118,1,0, -191,118,1,0,238,118,1,0,219,119,1,0,226,119,1,0, -243,119,1,0,58,121,1,0,184,121,1,0,190,121,1,0, -116,122,1,0,203,122,1,0,249,122,1,0,115,124,1,0, -248,124,1,0,54,127,1,0,81,127,1,0,138,127,1,0, -189,127,1,0,1,128,1,0,12,128,1,0,18,128,1,0, -51,128,1,0,127,128,1,0,137,128,1,0,227,129,1,0, -234,129,1,0,243,129,1,0,252,129,1,0,12,130,1,0, -27,130,1,0,31,130,1,0,110,130,1,0,114,130,1,0, -120,130,1,0,77,134,1,0,107,134,1,0,64,136,1,0, -76,136,1,0,99,136,1,0,126,137,1,0,139,137,1,0, -210,137,1,0,0,138,1,0,55,140,1,0,70,140,1,0, -85,140,1,0,120,140,1,0,157,140,1,0,100,141,1,0, -112,141,1,0,179,141,1,0,171,142,1,0,202,142,1,0, -155,143,1,0,176,143,1,0,181,143,1,0,145,144,1,0, -73,145,1,0,198,145,1,0,204,145,1,0,209,145,1,0, -119,149,1,0,128,149,1,0,28,150,1,0,182,150,1,0, -185,150,1,0,232,150,1,0,81,151,1,0,94,151,1,0, -98,151,1,0,105,151,1,0,203,151,1,0,237,151,1,0, -243,151,1,0,1,152,1,0,168,152,1,0,219,152,1,0, -223,152,1,0,150,153,1,0,153,153,1,0,172,153,1,0, -168,154,1,0,216,154,1,0,223,154,1,0,37,155,1,0, -47,155,1,0,50,155,1,0,60,155,1,0,90,155,1,0, -229,156,1,0,117,158,1,0,127,158,1,0,165,158,1,0, -187,158,1,0,195,158,1,0,205,158,1,0,209,158,1,0, -249,158,1,0,253,158,1,0,14,159,1,0,19,159,1,0, -32,159,1,0,59,159,1,0,74,159,1,0,82,159,1,0, -141,159,1,0,156,159,1,0,160,159,1,0,46,0,1,0, -68,83,1,0,69,83,2,8,32,0,153,48,2,8,32,0, -154,48,2,0,136,48,138,48,2,0,179,48,200,48,1,0, -0,17,1,0,1,17,1,0,2,17,1,0,3,17,1,0, -4,17,1,0,5,17,1,0,26,17,1,0,6,17,1,0, -7,17,1,0,8,17,1,0,33,17,1,0,9,17,1,0, -10,17,1,0,11,17,1,0,12,17,1,0,13,17,1,0, -14,17,1,0,15,17,1,0,16,17,1,0,17,17,1,0, -18,17,1,0,20,17,1,0,21,17,1,0,199,17,1,0, -200,17,1,0,204,17,1,0,206,17,1,0,211,17,1,0, -215,17,1,0,217,17,1,0,28,17,1,0,221,17,1,0, -223,17,1,0,29,17,1,0,30,17,1,0,32,17,1,0, -34,17,1,0,35,17,1,0,39,17,1,0,41,17,1,0, -43,17,1,0,44,17,1,0,45,17,1,0,46,17,1,0, -47,17,1,0,50,17,1,0,54,17,1,0,64,17,1,0, -71,17,1,0,76,17,1,0,241,17,1,0,242,17,1,0, -87,17,1,0,88,17,1,0,89,17,1,0,132,17,1,0, -133,17,1,0,136,17,1,0,145,17,1,0,146,17,1,0, -148,17,1,0,158,17,1,0,161,17,1,0,9,78,1,0, -219,86,1,0,10,78,1,0,45,78,1,0,11,78,1,0, -50,117,1,0,25,78,1,0,1,78,1,0,41,89,1,0, -48,87,3,0,40,0,0,17,41,0,3,0,40,0,2,17, -41,0,3,0,40,0,3,17,41,0,3,0,40,0,5,17, -41,0,3,0,40,0,6,17,41,0,3,0,40,0,7,17, -41,0,3,0,40,0,9,17,41,0,3,0,40,0,11,17, -41,0,3,0,40,0,12,17,41,0,3,0,40,0,14,17, -41,0,3,0,40,0,15,17,41,0,3,0,40,0,16,17, -41,0,3,0,40,0,17,17,41,0,3,0,40,0,18,17, -41,0,3,0,40,0,0,78,41,0,3,0,40,0,140,78, -41,0,3,0,40,0,9,78,41,0,3,0,40,0,219,86, -41,0,3,0,40,0,148,78,41,0,3,0,40,0,109,81, -41,0,3,0,40,0,3,78,41,0,3,0,40,0,107,81, -41,0,3,0,40,0,93,78,41,0,3,0,40,0,65,83, -41,0,3,0,40,0,8,103,41,0,3,0,40,0,107,112, -41,0,3,0,40,0,52,108,41,0,3,0,40,0,40,103, -41,0,3,0,40,0,209,145,41,0,3,0,40,0,31,87, -41,0,3,0,40,0,229,101,41,0,3,0,40,0,42,104, -41,0,3,0,40,0,9,103,41,0,3,0,40,0,62,121, -41,0,3,0,40,0,13,84,41,0,3,0,40,0,121,114, -41,0,3,0,40,0,161,140,41,0,3,0,40,0,93,121, -41,0,3,0,40,0,180,82,41,0,3,0,40,0,227,78, -41,0,3,0,40,0,124,84,41,0,3,0,40,0,102,91, -41,0,3,0,40,0,227,118,41,0,3,0,40,0,1,79, -41,0,3,0,40,0,199,140,41,0,3,0,40,0,84,83, -41,0,3,0,40,0,109,121,41,0,3,0,40,0,17,79, -41,0,3,0,40,0,234,129,41,0,3,0,40,0,243,129, -41,0,1,0,79,85,1,0,124,94,1,0,143,123,3,0, -112,0,116,0,101,0,2,0,50,0,49,0,2,0,50,0, -50,0,2,0,50,0,51,0,2,0,50,0,52,0,2,0, -50,0,53,0,2,0,50,0,54,0,2,0,50,0,55,0, -2,0,50,0,56,0,2,0,50,0,57,0,2,0,51,0, -48,0,2,0,51,0,49,0,2,0,51,0,50,0,2,0, -51,0,51,0,2,0,51,0,52,0,2,0,51,0,53,0, -1,0,148,78,1,0,109,81,1,0,3,78,1,0,93,78, -1,0,42,104,1,0,9,103,1,0,62,121,1,0,13,84, -1,0,121,114,1,0,161,140,1,0,93,121,1,0,180,82, -1,0,216,121,1,0,55,117,1,0,105,144,1,0,42,81, -1,0,112,83,1,0,232,108,1,0,5,152,1,0,17,79, -1,0,153,81,1,0,99,107,1,0,230,93,1,0,243,83, -1,0,59,83,1,0,151,91,1,0,102,91,1,0,227,118, -1,0,1,79,1,0,199,140,1,0,84,83,1,0,28,89, -2,0,51,0,54,0,2,0,51,0,55,0,2,0,51,0, -56,0,2,0,51,0,57,0,2,0,52,0,48,0,2,0, -52,0,49,0,2,0,52,0,50,0,2,0,52,0,51,0, -2,0,52,0,52,0,2,0,52,0,53,0,2,0,52,0, -54,0,2,0,52,0,55,0,2,0,52,0,56,0,2,0, -52,0,57,0,2,0,53,0,48,0,2,0,49,0,8,103, -2,0,50,0,8,103,2,0,51,0,8,103,2,0,52,0, -8,103,2,0,53,0,8,103,2,0,54,0,8,103,2,0, -55,0,8,103,2,0,56,0,8,103,2,0,57,0,8,103, -3,0,49,0,48,0,8,103,3,0,49,0,49,0,8,103, -3,0,49,0,50,0,8,103,2,0,104,0,103,0,3,0, -101,0,114,0,103,0,2,0,101,0,118,0,3,0,108,0, -116,0,100,0,1,0,162,48,1,0,164,48,1,0,166,48, -1,0,168,48,1,0,170,48,1,0,171,48,1,0,173,48, -1,0,175,48,1,0,177,48,1,0,179,48,1,0,181,48, -1,0,183,48,1,0,185,48,1,0,187,48,1,0,189,48, -1,0,191,48,1,0,193,48,1,0,196,48,1,0,198,48, -1,0,200,48,1,0,202,48,1,0,203,48,1,0,204,48, -1,0,205,48,1,0,206,48,1,0,207,48,1,0,210,48, -1,0,213,48,1,0,216,48,1,0,219,48,1,0,222,48, -1,0,223,48,1,0,224,48,1,0,225,48,1,0,226,48, -1,0,228,48,1,0,230,48,1,0,232,48,1,0,233,48, -1,0,234,48,1,0,235,48,1,0,236,48,1,0,237,48, -1,0,239,48,1,0,240,48,1,0,241,48,1,0,242,48, -2,0,228,78,140,84,4,0,162,48,235,48,213,48,161,48, -3,0,162,48,252,48,235,48,3,0,164,48,243,48,193,48, -3,0,166,48,169,48,243,48,4,0,168,48,252,48,171,48, -252,48,3,0,170,48,243,48,185,48,3,0,170,48,252,48, -224,48,3,0,171,48,164,48,234,48,4,0,171,48,233,48, -195,48,200,48,4,0,171,48,237,48,234,48,252,48,4,0, -173,48,229,48,234,48,252,48,2,0,173,48,237,48,6,0, -173,48,237,48,225,48,252,48,200,48,235,48,5,0,173,48, -237,48,239,48,195,48,200,48,4,0,175,48,237,48,252,48, -205,48,3,0,177,48,252,48,185,48,3,0,179,48,235,48, -202,48,4,0,181,48,164,48,175,48,235,48,5,0,181,48, -243,48,193,48,252,48,224,48,3,0,187,48,243,48,193,48, -3,0,187,48,243,48,200,48,2,0,200,48,243,48,2,0, -202,48,206,48,3,0,206,48,195,48,200,48,3,0,207,48, -164,48,196,48,4,0,213,48,163,48,252,48,200,48,3,0, -213,48,233,48,243,48,5,0,216,48,175,48,191,48,252,48, -235,48,3,0,216,48,235,48,196,48,2,0,219,48,243,48, -3,0,219,48,252,48,235,48,3,0,219,48,252,48,243,48, -4,0,222,48,164,48,175,48,237,48,3,0,222,48,164,48, -235,48,3,0,222,48,195,48,207,48,3,0,222,48,235,48, -175,48,5,0,222,48,243,48,183,48,231,48,243,48,4,0, -223,48,175,48,237,48,243,48,2,0,223,48,234,48,4,0, -225,48,252,48,200,48,235,48,3,0,228,48,252,48,235,48, -3,0,230,48,162,48,243,48,4,0,234,48,195,48,200,48, -235,48,2,0,234,48,233,48,2,0,236,48,224,48,3,0, -239,48,195,48,200,48,2,0,48,0,185,112,2,0,49,0, -185,112,2,0,50,0,185,112,2,0,51,0,185,112,2,0, -52,0,185,112,2,0,53,0,185,112,2,0,54,0,185,112, -2,0,55,0,185,112,2,0,56,0,185,112,2,0,57,0, -185,112,3,0,49,0,48,0,185,112,3,0,49,0,49,0, -185,112,3,0,49,0,50,0,185,112,3,0,49,0,51,0, -185,112,3,0,49,0,52,0,185,112,3,0,49,0,53,0, -185,112,3,0,49,0,54,0,185,112,3,0,49,0,55,0, -185,112,3,0,49,0,56,0,185,112,3,0,49,0,57,0, -185,112,3,0,50,0,48,0,185,112,3,0,50,0,49,0, -185,112,3,0,50,0,50,0,185,112,3,0,50,0,51,0, -185,112,3,0,50,0,52,0,185,112,3,0,104,0,112,0, -97,0,2,0,100,0,97,0,2,0,97,0,117,0,3,0, -98,0,97,0,114,0,2,0,111,0,118,0,2,0,112,0, -99,0,2,0,100,0,109,0,3,0,100,0,109,0,50,0, -3,0,100,0,109,0,51,0,2,0,105,0,117,0,2,0, -115,94,16,98,2,0,45,102,140,84,2,0,39,89,99,107, -2,0,14,102,187,108,4,0,42,104,15,95,26,79,62,121, -2,0,112,0,97,0,2,0,110,0,97,0,2,0,188,3, -97,0,2,0,109,0,97,0,2,0,107,0,97,0,2,0, -107,0,98,0,2,0,109,0,98,0,2,0,103,0,98,0, -3,0,99,0,97,0,108,0,4,0,107,0,99,0,97,0, -108,0,2,0,112,0,102,0,2,0,110,0,102,0,2,0, -188,3,102,0,2,0,188,3,103,0,2,0,109,0,103,0, -2,0,107,0,103,0,2,0,104,0,122,0,3,0,107,0, -104,0,122,0,3,0,109,0,104,0,122,0,3,0,103,0, -104,0,122,0,3,0,116,0,104,0,122,0,2,0,188,3, -108,0,2,0,109,0,108,0,2,0,100,0,108,0,2,0, -107,0,108,0,2,0,102,0,109,0,2,0,110,0,109,0, -2,0,188,3,109,0,2,0,109,0,109,0,2,0,99,0, -109,0,2,0,107,0,109,0,3,0,109,0,109,0,50,0, -3,0,99,0,109,0,50,0,2,0,109,0,50,0,3,0, -107,0,109,0,50,0,3,0,109,0,109,0,51,0,3,0, -99,0,109,0,51,0,2,0,109,0,51,0,3,0,107,0, -109,0,51,0,3,0,109,0,21,34,115,0,4,0,109,0, -21,34,115,0,50,0,3,0,107,0,112,0,97,0,3,0, -109,0,112,0,97,0,3,0,103,0,112,0,97,0,3,0, -114,0,97,0,100,0,5,0,114,0,97,0,100,0,21,34, -115,0,6,0,114,0,97,0,100,0,21,34,115,0,50,0, -2,0,112,0,115,0,2,0,110,0,115,0,2,0,188,3, -115,0,2,0,109,0,115,0,2,0,112,0,118,0,2,0, -110,0,118,0,2,0,188,3,118,0,2,0,109,0,118,0, -2,0,107,0,118,0,2,0,112,0,119,0,2,0,110,0, -119,0,2,0,188,3,119,0,2,0,109,0,119,0,2,0, -107,0,119,0,2,0,107,0,201,3,2,0,109,0,201,3, -2,0,98,0,113,0,2,0,99,0,99,0,2,0,99,0, -100,0,4,0,99,0,21,34,107,0,103,0,2,0,100,0, -98,0,2,0,103,0,121,0,2,0,104,0,97,0,2,0, -104,0,112,0,2,0,105,0,110,0,2,0,107,0,107,0, -2,0,107,0,116,0,2,0,108,0,109,0,2,0,108,0, -110,0,3,0,108,0,111,0,103,0,2,0,108,0,120,0, -3,0,109,0,105,0,108,0,3,0,109,0,111,0,108,0, -2,0,112,0,104,0,3,0,112,0,112,0,109,0,2,0, -112,0,114,0,2,0,115,0,114,0,2,0,115,0,118,0, -2,0,119,0,98,0,3,0,118,0,21,34,109,0,3,0, -97,0,21,34,109,0,2,0,49,0,229,101,2,0,50,0, -229,101,2,0,51,0,229,101,2,0,52,0,229,101,2,0, -53,0,229,101,2,0,54,0,229,101,2,0,55,0,229,101, -2,0,56,0,229,101,2,0,57,0,229,101,3,0,49,0, -48,0,229,101,3,0,49,0,49,0,229,101,3,0,49,0, -50,0,229,101,3,0,49,0,51,0,229,101,3,0,49,0, -52,0,229,101,3,0,49,0,53,0,229,101,3,0,49,0, -54,0,229,101,3,0,49,0,55,0,229,101,3,0,49,0, -56,0,229,101,3,0,49,0,57,0,229,101,3,0,50,0, -48,0,229,101,3,0,50,0,49,0,229,101,3,0,50,0, -50,0,229,101,3,0,50,0,51,0,229,101,3,0,50,0, -52,0,229,101,3,0,50,0,53,0,229,101,3,0,50,0, -54,0,229,101,3,0,50,0,55,0,229,101,3,0,50,0, -56,0,229,101,3,0,50,0,57,0,229,101,3,0,51,0, -48,0,229,101,3,0,51,0,49,0,229,101,3,0,103,0, -97,0,108,0,1,0,76,4,1,0,121,29,1,0,108,2, -1,0,158,2,1,0,135,2,1,0,83,171,1,0,142,29, -1,0,83,1,1,0,39,167,1,0,160,19,1,0,161,19, -1,0,162,19,1,0,163,19,1,0,164,19,1,0,165,19, -1,0,166,19,1,0,167,19,1,0,168,19,1,0,169,19, -1,0,170,19,1,0,171,19,1,0,172,19,1,0,173,19, -1,0,174,19,1,0,175,19,1,0,176,19,1,0,177,19, -1,0,178,19,1,0,179,19,1,0,180,19,1,0,181,19, -1,0,182,19,1,0,183,19,1,0,184,19,1,0,185,19, -1,0,186,19,1,0,187,19,1,0,188,19,1,0,189,19, -1,0,190,19,1,0,191,19,1,0,192,19,1,0,193,19, -1,0,194,19,1,0,195,19,1,0,196,19,1,0,197,19, -1,0,198,19,1,0,199,19,1,0,200,19,1,0,201,19, -1,0,202,19,1,0,203,19,1,0,204,19,1,0,205,19, -1,0,206,19,1,0,207,19,1,0,208,19,1,0,209,19, -1,0,210,19,1,0,211,19,1,0,212,19,1,0,213,19, -1,0,214,19,1,0,215,19,1,0,216,19,1,0,217,19, -1,0,218,19,1,0,219,19,1,0,220,19,1,0,221,19, -1,0,222,19,1,0,223,19,1,0,224,19,1,0,225,19, -1,0,226,19,1,0,227,19,1,0,228,19,1,0,229,19, -1,0,230,19,1,0,231,19,1,0,232,19,1,0,233,19, -1,0,234,19,1,0,235,19,1,0,236,19,1,0,237,19, -1,0,238,19,1,0,239,19,1,0,72,140,1,0,244,102, -1,0,200,140,1,0,209,110,1,0,50,78,1,0,229,83, -1,0,81,89,1,0,135,85,1,0,72,89,1,0,246,97, -1,0,105,118,1,0,133,127,1,0,63,134,1,0,186,135, -1,0,248,136,1,0,143,144,1,0,2,106,1,0,27,109, -1,0,217,112,1,0,222,115,1,0,61,132,1,0,106,145, -1,0,241,153,1,0,130,78,1,0,117,83,1,0,4,107, -1,0,27,114,1,0,45,134,1,0,30,158,1,0,80,93, -1,0,235,111,1,0,205,133,1,0,100,137,1,0,201,98, -1,0,216,129,1,0,31,136,1,0,202,94,1,0,23,103, -1,0,106,109,1,0,252,114,1,0,206,144,1,0,134,79, -1,0,183,81,1,0,222,82,1,0,196,100,1,0,211,106, -1,0,16,114,1,0,231,118,1,0,6,134,1,0,92,134, -1,0,239,141,1,0,50,151,1,0,111,155,1,0,250,157, -1,0,140,120,1,0,127,121,1,0,160,125,1,0,201,131, -1,0,4,147,1,0,214,138,1,0,223,88,1,0,4,95, -1,0,96,124,1,0,126,128,1,0,98,114,1,0,202,120, -1,0,194,140,1,0,247,150,1,0,216,88,1,0,98,92, -1,0,19,106,1,0,218,109,1,0,15,111,1,0,47,125, -1,0,55,126,1,0,75,150,1,0,210,82,1,0,139,128, -1,0,220,81,1,0,204,81,1,0,28,122,1,0,190,125, -1,0,241,131,1,0,117,150,1,0,128,139,1,0,207,98, -1,0,254,138,1,0,57,78,1,0,231,91,1,0,18,96, -1,0,135,115,1,0,112,117,1,0,23,83,1,0,251,120, -1,0,191,79,1,0,169,95,1,0,13,78,1,0,204,108, -1,0,120,101,1,0,34,125,1,0,195,83,1,0,94,88, -1,0,1,119,1,0,73,132,1,0,170,138,1,0,186,107, -1,0,136,108,1,0,254,98,1,0,229,130,1,0,160,99, -1,0,101,117,1,0,174,78,1,0,105,81,1,0,201,81, -1,0,129,104,1,0,231,124,1,0,111,130,1,0,210,138, -1,0,207,145,1,0,245,82,1,0,66,84,1,0,236,94, -1,0,197,101,1,0,254,111,1,0,42,121,1,0,173,149, -1,0,106,154,1,0,151,158,1,0,206,158,1,0,198,102, -1,0,119,107,1,0,98,143,1,0,116,94,1,0,144,97, -1,0,0,98,1,0,154,100,1,0,35,111,1,0,73,113, -1,0,137,116,1,0,202,121,1,0,244,125,1,0,111,128, -1,0,38,143,1,0,238,132,1,0,35,144,1,0,74,147, -1,0,23,82,1,0,163,82,1,0,189,84,1,0,200,112, -1,0,194,136,1,0,201,94,1,0,245,95,1,0,123,99, -1,0,174,107,1,0,62,124,1,0,117,115,1,0,228,78, -1,0,249,86,1,0,186,93,1,0,28,96,1,0,178,115, -1,0,105,116,1,0,154,127,1,0,70,128,1,0,52,146, -1,0,246,150,1,0,72,151,1,0,24,152,1,0,139,79, -1,0,174,121,1,0,180,145,1,0,184,150,1,0,225,96, -1,0,134,78,1,0,218,80,1,0,238,91,1,0,63,92, -1,0,153,101,1,0,206,113,1,0,66,118,1,0,252,132, -1,0,124,144,1,0,136,102,1,0,46,150,1,0,137,82, -1,0,123,103,1,0,243,103,1,0,65,109,1,0,156,110, -1,0,9,116,1,0,89,117,1,0,107,120,1,0,16,125, -1,0,94,152,1,0,46,98,1,0,120,150,1,0,43,80, -1,0,25,93,1,0,234,109,1,0,42,143,1,0,139,95, -1,0,68,97,1,0,23,104,1,0,134,150,1,0,41,82, -1,0,15,84,1,0,101,92,1,0,19,102,1,0,78,103, -1,0,168,104,1,0,229,108,1,0,6,116,1,0,226,117, -1,0,121,127,1,0,207,136,1,0,225,136,1,0,226,150, -1,0,63,83,1,0,186,110,1,0,29,84,1,0,208,113, -1,0,152,116,1,0,250,133,1,0,163,150,1,0,87,156, -1,0,159,158,1,0,151,103,1,0,203,109,1,0,232,129, -1,0,32,123,1,0,146,124,1,0,192,114,1,0,153,112, -1,0,88,139,1,0,192,78,1,0,54,131,1,0,58,82, -1,0,7,82,1,0,166,94,1,0,211,98,1,0,214,124, -1,0,133,91,1,0,30,109,1,0,180,102,1,0,59,143, -1,0,77,150,1,0,211,94,1,0,64,81,1,0,192,85, -1,0,90,88,1,0,116,102,1,0,222,81,1,0,42,115, -1,0,202,118,1,0,60,121,1,0,94,121,1,0,101,121, -1,0,143,121,1,0,86,151,1,0,190,124,1,0,18,134, -1,0,248,138,1,0,56,144,1,0,253,144,1,0,239,152, -1,0,252,152,1,0,40,153,1,0,180,157,1,0,222,144, -1,0,183,150,1,0,174,79,1,0,231,80,1,0,77,81, -1,0,201,82,1,0,228,82,1,0,81,83,1,0,157,85, -1,0,6,86,1,0,104,86,1,0,64,88,1,0,168,88, -1,0,100,92,1,0,148,96,1,0,104,97,1,0,142,97, -1,0,242,97,1,0,79,101,1,0,226,101,1,0,145,102, -1,0,133,104,1,0,119,109,1,0,26,110,1,0,34,111, -1,0,110,113,1,0,43,114,1,0,34,116,1,0,145,120, -1,0,73,121,1,0,72,121,1,0,80,121,1,0,86,121, -1,0,141,121,1,0,142,121,1,0,64,122,1,0,129,122, -1,0,192,123,1,0,9,126,1,0,65,126,1,0,114,127, -1,0,5,128,1,0,237,129,1,0,121,130,1,0,87,132, -1,0,16,137,1,0,150,137,1,0,1,139,1,0,57,139, -1,0,211,140,1,0,8,141,1,0,182,143,1,0,227,150, -1,0,255,151,1,0,59,152,1,0,117,96,2,0,80,216, -238,222,1,0,24,130,1,0,38,78,1,0,181,81,1,0, -104,81,1,0,128,79,1,0,69,81,1,0,128,81,1,0, -199,82,1,0,250,82,1,0,85,85,1,0,153,85,1,0, -226,85,1,0,179,88,1,0,68,89,1,0,84,89,1,0, -98,90,1,0,40,91,1,0,210,94,1,0,217,94,1,0, -105,95,1,0,173,95,1,0,216,96,1,0,78,97,1,0, -8,97,1,0,96,97,1,0,52,98,1,0,196,99,1,0, -28,100,1,0,82,100,1,0,86,101,1,0,27,103,1,0, -86,103,1,0,219,110,1,0,203,110,1,0,30,112,1,0, -167,119,1,0,53,114,1,0,175,114,1,0,113,116,1,0, -6,117,1,0,59,117,1,0,29,118,1,0,31,118,1,0, -219,118,1,0,244,118,1,0,74,119,1,0,64,119,1,0, -204,120,1,0,177,122,1,0,123,124,1,0,91,125,1,0, -62,127,1,0,82,131,1,0,239,131,1,0,121,135,1,0, -65,137,1,0,134,137,1,0,191,138,1,0,203,138,1,0, -237,138,1,0,138,139,1,0,56,143,1,0,114,144,1,0, -153,145,1,0,118,146,1,0,124,150,1,0,219,151,1,0, -11,152,1,0,18,155,2,0,74,216,74,220,2,0,74,216, -68,220,2,0,76,216,213,223,1,0,157,59,1,0,24,64, -1,0,57,64,2,0,84,216,73,222,2,0,87,216,208,220, -2,0,95,216,211,222,1,0,67,159,1,0,142,159,2,0, -102,0,102,0,2,0,102,0,105,0,2,0,102,0,108,0, -3,0,102,0,102,0,105,0,3,0,102,0,102,0,108,0, -2,0,115,0,116,0,2,0,116,5,118,5,2,0,116,5, -101,5,2,0,116,5,107,5,2,0,126,5,118,5,2,0, -116,5,109,5,2,14,217,5,180,5,2,17,242,5,183,5, -1,0,226,5,1,0,212,5,1,0,219,5,1,0,220,5, -1,0,221,5,1,0,232,5,1,0,234,5,2,24,233,5, -193,5,2,25,233,5,194,5,3,24,233,5,188,5,193,5, -3,25,233,5,188,5,194,5,2,17,208,5,183,5,2,18, -208,5,184,5,2,21,208,5,188,5,2,21,209,5,188,5, -2,21,210,5,188,5,2,21,211,5,188,5,2,21,212,5, -188,5,2,21,213,5,188,5,2,21,214,5,188,5,2,21, -216,5,188,5,2,21,217,5,188,5,2,21,218,5,188,5, -2,21,219,5,188,5,2,21,220,5,188,5,2,21,222,5, -188,5,2,21,224,5,188,5,2,21,225,5,188,5,2,21, -227,5,188,5,2,21,228,5,188,5,2,21,230,5,188,5, -2,21,231,5,188,5,2,21,232,5,188,5,2,21,233,5, -188,5,2,21,234,5,188,5,2,19,213,5,185,5,2,23, -209,5,191,5,2,23,219,5,191,5,2,23,228,5,191,5, -2,0,208,5,220,5,1,0,113,6,1,0,123,6,1,0, -126,6,1,0,128,6,1,0,122,6,1,0,127,6,1,0, -121,6,1,0,164,6,1,0,166,6,1,0,132,6,1,0, -131,6,1,0,134,6,1,0,135,6,1,0,141,6,1,0, -140,6,1,0,142,6,1,0,136,6,1,0,152,6,1,0, -145,6,1,0,169,6,1,0,175,6,1,0,179,6,1,0, -177,6,1,0,186,6,1,0,187,6,1,0,193,6,1,0, -190,6,1,0,210,6,1,0,173,6,1,0,199,6,1,0, -198,6,1,0,200,6,1,0,203,6,1,0,197,6,1,0, -201,6,1,0,208,6,1,0,73,6,1,0,204,6,2,0, -40,6,44,6,2,0,40,6,45,6,2,0,40,6,46,6, -2,0,40,6,69,6,2,0,40,6,73,6,2,0,40,6, -74,6,2,0,42,6,44,6,2,0,42,6,45,6,2,0, -42,6,46,6,2,0,42,6,69,6,2,0,42,6,73,6, -2,0,42,6,74,6,2,0,43,6,44,6,2,0,43,6, -69,6,2,0,43,6,73,6,2,0,43,6,74,6,2,0, -44,6,45,6,2,0,44,6,69,6,2,0,45,6,44,6, -2,0,45,6,69,6,2,0,46,6,44,6,2,0,46,6, -45,6,2,0,46,6,69,6,2,0,51,6,44,6,2,0, -51,6,45,6,2,0,51,6,46,6,2,0,51,6,69,6, -2,0,53,6,45,6,2,0,53,6,69,6,2,0,54,6, -44,6,2,0,54,6,45,6,2,0,54,6,46,6,2,0, -54,6,69,6,2,0,55,6,45,6,2,0,55,6,69,6, -2,0,56,6,69,6,2,0,57,6,44,6,2,0,57,6, -69,6,2,0,58,6,44,6,2,0,58,6,69,6,2,0, -65,6,44,6,2,0,65,6,45,6,2,0,65,6,46,6, -2,0,65,6,69,6,2,0,65,6,73,6,2,0,65,6, -74,6,2,0,66,6,45,6,2,0,66,6,69,6,2,0, -66,6,73,6,2,0,66,6,74,6,2,0,67,6,39,6, -2,0,67,6,44,6,2,0,67,6,45,6,2,0,67,6, -46,6,2,0,67,6,68,6,2,0,67,6,69,6,2,0, -67,6,73,6,2,0,67,6,74,6,2,0,68,6,44,6, -2,0,68,6,45,6,2,0,68,6,46,6,2,0,68,6, -69,6,2,0,68,6,73,6,2,0,68,6,74,6,2,0, -69,6,44,6,2,0,69,6,45,6,2,0,69,6,46,6, -2,0,69,6,69,6,2,0,69,6,73,6,2,0,69,6, -74,6,2,0,70,6,44,6,2,0,70,6,45,6,2,0, -70,6,46,6,2,0,70,6,69,6,2,0,70,6,73,6, -2,0,70,6,74,6,2,0,71,6,44,6,2,0,71,6, -69,6,2,0,71,6,73,6,2,0,71,6,74,6,2,0, -74,6,44,6,2,0,74,6,45,6,2,0,74,6,46,6, -2,0,74,6,69,6,2,0,74,6,73,6,2,0,74,6, -74,6,2,35,48,6,112,6,2,35,49,6,112,6,2,35, -73,6,112,6,3,33,32,0,76,6,81,6,3,33,32,0, -77,6,81,6,3,33,32,0,78,6,81,6,3,33,32,0, -79,6,81,6,3,33,32,0,80,6,81,6,3,35,32,0, -81,6,112,6,2,0,40,6,49,6,2,0,40,6,50,6, -2,0,40,6,70,6,2,0,42,6,49,6,2,0,42,6, -50,6,2,0,42,6,70,6,2,0,43,6,49,6,2,0, -43,6,50,6,2,0,43,6,70,6,2,0,69,6,39,6, -2,0,70,6,49,6,2,0,70,6,50,6,2,0,70,6, -70,6,2,0,74,6,49,6,2,0,74,6,50,6,2,0, -74,6,70,6,2,0,40,6,71,6,2,0,42,6,71,6, -2,0,53,6,46,6,2,0,68,6,71,6,2,0,70,6, -71,6,2,35,71,6,112,6,2,0,74,6,71,6,2,0, -43,6,71,6,2,0,51,6,71,6,2,0,52,6,69,6, -2,0,52,6,71,6,3,33,64,6,78,6,81,6,3,33, -64,6,79,6,81,6,3,33,64,6,80,6,81,6,2,0, -55,6,73,6,2,0,55,6,74,6,2,0,57,6,73,6, -2,0,57,6,74,6,2,0,58,6,73,6,2,0,58,6, -74,6,2,0,51,6,73,6,2,0,51,6,74,6,2,0, -52,6,73,6,2,0,52,6,74,6,2,0,45,6,73,6, -2,0,45,6,74,6,2,0,44,6,73,6,2,0,44,6, -74,6,2,0,46,6,73,6,2,0,46,6,74,6,2,0, -53,6,73,6,2,0,53,6,74,6,2,0,54,6,73,6, -2,0,54,6,74,6,2,0,52,6,44,6,2,0,52,6, -45,6,2,0,52,6,46,6,2,0,52,6,49,6,2,0, -51,6,49,6,2,0,53,6,49,6,2,0,54,6,49,6, -2,27,39,6,75,6,3,0,42,6,44,6,69,6,3,0, -42,6,45,6,44,6,3,0,42,6,45,6,69,6,3,0, -42,6,46,6,69,6,3,0,42,6,69,6,44,6,3,0, -42,6,69,6,45,6,3,0,42,6,69,6,46,6,3,0, -44,6,69,6,45,6,3,0,45,6,69,6,74,6,3,0, -45,6,69,6,73,6,3,0,51,6,45,6,44,6,3,0, -51,6,44,6,45,6,3,0,51,6,44,6,73,6,3,0, -51,6,69,6,45,6,3,0,51,6,69,6,44,6,3,0, -51,6,69,6,69,6,3,0,53,6,45,6,45,6,3,0, -53,6,69,6,69,6,3,0,52,6,45,6,69,6,3,0, -52,6,44,6,74,6,3,0,52,6,69,6,46,6,3,0, -52,6,69,6,69,6,3,0,54,6,45,6,73,6,3,0, -54,6,46,6,69,6,3,0,55,6,69,6,45,6,3,0, -55,6,69,6,69,6,3,0,55,6,69,6,74,6,3,0, -57,6,44,6,69,6,3,0,57,6,69,6,69,6,3,0, -57,6,69,6,73,6,3,0,58,6,69,6,69,6,3,0, -58,6,69,6,74,6,3,0,58,6,69,6,73,6,3,0, -65,6,46,6,69,6,3,0,66,6,69,6,45,6,3,0, -66,6,69,6,69,6,3,0,68,6,45,6,69,6,3,0, -68,6,45,6,74,6,3,0,68,6,45,6,73,6,3,0, -68,6,44,6,44,6,3,0,68,6,46,6,69,6,3,0, -68,6,69,6,45,6,3,0,69,6,45,6,44,6,3,0, -69,6,45,6,69,6,3,0,69,6,45,6,74,6,3,0, -69,6,44,6,45,6,3,0,69,6,44,6,69,6,3,0, -69,6,46,6,44,6,3,0,69,6,46,6,69,6,3,0, -69,6,44,6,46,6,3,0,71,6,69,6,44,6,3,0, -71,6,69,6,69,6,3,0,70,6,45,6,69,6,3,0, -70,6,45,6,73,6,3,0,70,6,44,6,69,6,3,0, -70,6,44,6,73,6,3,0,70,6,69,6,74,6,3,0, -70,6,69,6,73,6,3,0,74,6,69,6,69,6,3,0, -40,6,46,6,74,6,3,0,42,6,44,6,74,6,3,0, -42,6,44,6,73,6,3,0,42,6,46,6,74,6,3,0, -42,6,46,6,73,6,3,0,42,6,69,6,74,6,3,0, -42,6,69,6,73,6,3,0,44,6,69,6,74,6,3,0, -44,6,45,6,73,6,3,0,44,6,69,6,73,6,3,0, -51,6,46,6,73,6,3,0,53,6,45,6,74,6,3,0, -52,6,45,6,74,6,3,0,54,6,45,6,74,6,3,0, -68,6,44,6,74,6,3,0,68,6,69,6,74,6,3,0, -74,6,45,6,74,6,3,0,74,6,44,6,74,6,3,0, -74,6,69,6,74,6,3,0,69,6,69,6,74,6,3,0, -66,6,69,6,74,6,3,0,70,6,45,6,74,6,3,0, -57,6,69,6,74,6,3,0,67,6,69,6,74,6,3,0, -70,6,44,6,45,6,3,0,69,6,46,6,74,6,3,0, -68,6,44,6,69,6,3,0,67,6,69,6,69,6,3,0, -44,6,45,6,74,6,3,0,45,6,44,6,74,6,3,0, -69,6,44,6,74,6,3,0,65,6,69,6,74,6,3,0, -40,6,45,6,74,6,3,0,51,6,46,6,74,6,3,0, -70,6,44,6,74,6,3,0,53,6,68,6,210,6,3,0, -66,6,68,6,210,6,4,0,39,6,68,6,68,6,71,6, -4,0,39,6,67,6,40,6,49,6,4,0,69,6,45,6, -69,6,47,6,4,0,53,6,68,6,57,6,69,6,4,0, -49,6,51,6,72,6,68,6,4,0,57,6,68,6,74,6, -71,6,4,0,72,6,51,6,68,6,69,6,3,0,53,6, -68,6,73,6,18,0,53,6,68,6,73,6,32,0,39,6, -68,6,68,6,71,6,32,0,57,6,68,6,74,6,71,6, -32,0,72,6,51,6,68,6,69,6,8,0,44,6,68,6, -32,0,44,6,68,6,39,6,68,6,71,6,4,0,49,6, -204,6,39,6,68,6,1,0,44,0,1,0,1,48,1,0, -58,0,1,0,33,0,1,0,63,0,1,0,22,48,1,0, -23,48,1,0,20,32,1,0,19,32,1,0,95,0,1,0, -123,0,1,0,125,0,1,0,20,48,1,0,21,48,1,0, -16,48,1,0,17,48,1,0,10,48,1,0,11,48,1,0, -12,48,1,0,13,48,1,0,14,48,1,0,15,48,1,0, -91,0,1,0,93,0,1,0,35,0,1,0,38,0,1,0, -42,0,1,0,45,0,1,0,60,0,1,0,62,0,1,0, -92,0,1,0,36,0,1,0,37,0,1,0,64,0,2,27, -32,0,75,6,2,27,64,6,75,6,2,28,32,0,76,6, -2,29,32,0,77,6,2,30,32,0,78,6,2,30,64,6, -78,6,2,31,32,0,79,6,2,31,64,6,79,6,2,32, -32,0,80,6,2,32,64,6,80,6,2,33,32,0,81,6, -2,33,64,6,81,6,2,34,32,0,82,6,2,34,64,6, -82,6,1,0,33,6,1,0,39,6,1,0,40,6,1,0, -41,6,1,0,42,6,1,0,43,6,1,0,44,6,1,0, -45,6,1,0,46,6,1,0,47,6,1,0,48,6,1,0, -49,6,1,0,50,6,1,0,51,6,1,0,52,6,1,0, -53,6,1,0,54,6,1,0,55,6,1,0,56,6,1,0, -57,6,1,0,58,6,1,0,65,6,1,0,66,6,1,0, -67,6,1,0,68,6,1,0,69,6,1,0,70,6,1,0, -71,6,1,0,72,6,1,0,74,6,2,0,68,6,39,6, -1,0,34,0,1,0,39,0,1,0,47,0,1,0,94,0, -1,0,124,0,1,0,126,0,1,0,133,41,1,0,134,41, -1,0,251,48,1,0,161,48,1,0,163,48,1,0,165,48, -1,0,167,48,1,0,169,48,1,0,227,48,1,0,229,48, -1,0,231,48,1,0,195,48,1,0,252,48,1,0,243,48, -1,0,162,0,1,0,163,0,1,0,172,0,1,0,166,0, -1,0,165,0,1,0,169,32,1,0,2,37,1,0,144,33, -1,0,145,33,1,0,146,33,1,0,147,33,1,0,160,37, -1,0,203,37,4,216,52,216,87,221,52,216,101,221,4,216, -52,216,88,221,52,216,101,221,6,216,52,216,88,221,52,216, -101,221,52,216,110,221,6,216,52,216,88,221,52,216,101,221, -52,216,111,221,6,216,52,216,88,221,52,216,101,221,52,216, -112,221,6,216,52,216,88,221,52,216,101,221,52,216,113,221, -6,216,52,216,88,221,52,216,101,221,52,216,114,221,4,216, -52,216,185,221,52,216,101,221,4,216,52,216,186,221,52,216, -101,221,6,216,52,216,185,221,52,216,101,221,52,216,110,221, -6,216,52,216,186,221,52,216,101,221,52,216,110,221,6,216, -52,216,185,221,52,216,101,221,52,216,111,221,6,216,52,216, -186,221,52,216,101,221,52,216,111,221,1,0,49,1,1,0, -55,2,1,0,177,3,1,0,181,3,1,0,182,3,1,0, -183,3,1,0,186,3,1,0,187,3,1,0,189,3,1,0, -190,3,1,0,191,3,1,0,195,3,1,0,196,3,1,0, -197,3,1,0,200,3,1,0,7,34,1,0,2,34,1,0, -221,3,1,0,110,6,1,0,161,6,1,0,111,6,2,0, -48,0,44,0,2,0,49,0,44,0,2,0,50,0,44,0, -2,0,51,0,44,0,2,0,52,0,44,0,2,0,53,0, -44,0,2,0,54,0,44,0,2,0,55,0,44,0,2,0, -56,0,44,0,2,0,57,0,44,0,3,0,20,48,115,0, -21,48,2,0,119,0,122,0,2,0,104,0,118,0,2,0, -115,0,100,0,3,0,112,0,112,0,118,0,2,0,119,0, -99,0,2,0,109,0,99,0,2,0,109,0,100,0,2,0, -109,0,114,0,2,0,100,0,106,0,2,0,123,48,75,48, -2,0,179,48,179,48,1,0,87,91,1,0,204,83,1,0, -26,89,1,0,227,137,1,0,164,78,1,0,32,102,1,0, -33,113,1,0,77,82,1,0,140,95,1,0,141,81,1,0, -176,101,1,0,29,82,1,0,66,125,1,0,169,140,1,0, -240,88,1,0,57,84,1,0,20,111,1,0,149,98,1,0, -85,99,1,0,74,144,1,0,7,99,1,0,83,98,1,0, -129,121,1,0,122,122,1,0,8,84,1,0,128,110,1,0, -51,117,1,0,114,82,1,0,182,85,1,0,77,145,3,0, -20,48,44,103,21,48,3,0,20,48,9,78,21,48,3,0, -20,48,140,78,21,48,3,0,20,48,137,91,21,48,3,0, -20,48,185,112,21,48,3,0,20,48,83,98,21,48,3,0, -20,48,215,118,21,48,3,0,20,48,221,82,21,48,3,0, -20,48,87,101,21,48,1,0,151,95,1,0,239,83,1,0, -61,78,1,0,56,78,1,0,65,78,2,0,64,216,34,221, -1,0,96,79,1,0,187,79,1,0,2,80,1,0,122,80, -1,0,153,80,1,0,207,80,1,0,158,52,2,0,65,216, -58,222,1,0,84,81,1,0,100,81,1,0,119,81,2,0, -65,216,28,221,1,0,185,52,1,0,103,81,2,0,65,216, -75,221,1,0,151,81,1,0,164,81,1,0,204,78,1,0, -172,81,2,0,100,216,223,221,1,0,3,82,1,0,223,52, -1,0,59,82,1,0,70,82,1,0,119,82,1,0,21,53, -1,0,5,83,1,0,6,83,1,0,73,83,1,0,90,83, -1,0,115,83,1,0,125,83,1,0,127,83,2,0,66,216, -44,222,1,0,112,112,1,0,202,83,1,0,223,83,2,0, -66,216,99,223,1,0,235,83,1,0,241,83,1,0,6,84, -1,0,158,84,1,0,56,84,1,0,72,84,1,0,104,84, -1,0,162,84,1,0,246,84,1,0,16,85,1,0,83,85, -1,0,99,85,1,0,132,85,1,0,171,85,1,0,179,85, -1,0,194,85,1,0,22,87,1,0,23,87,1,0,81,86, -1,0,116,86,1,0,238,88,1,0,206,87,1,0,244,87, -1,0,13,88,1,0,139,87,1,0,50,88,1,0,49,88, -1,0,172,88,2,0,69,216,228,220,1,0,242,88,1,0, -247,88,1,0,6,89,1,0,34,89,1,0,98,89,2,0, -69,216,168,222,2,0,69,216,234,222,1,0,236,89,1,0, -27,90,1,0,39,90,1,0,216,89,1,0,102,90,1,0, -238,54,1,0,8,91,1,0,62,91,2,0,70,216,200,221, -1,0,195,91,1,0,216,91,1,0,243,91,2,0,70,216, -24,223,1,0,255,91,1,0,6,92,1,0,129,55,1,0, -96,92,1,0,192,92,1,0,141,92,2,0,71,216,228,221, -1,0,67,93,2,0,71,216,230,221,1,0,110,93,1,0, -107,93,1,0,124,93,1,0,225,93,1,0,226,93,1,0, -47,56,1,0,253,93,1,0,40,94,1,0,61,94,1,0, -105,94,1,0,98,56,2,0,72,216,131,221,1,0,124,56, -1,0,176,94,1,0,179,94,1,0,182,94,2,0,104,216, -146,223,2,0,72,216,49,223,1,0,1,130,1,0,34,95, -1,0,199,56,2,0,76,216,184,222,2,0,88,216,218,221, -1,0,98,95,1,0,107,95,1,0,227,56,1,0,154,95, -1,0,205,95,1,0,215,95,1,0,249,95,1,0,129,96, -1,0,58,57,1,0,28,57,2,0,73,216,212,222,1,0, -199,96,1,0,72,97,1,0,76,97,1,0,122,97,1,0, -178,97,1,0,164,97,1,0,175,97,1,0,222,97,1,0, -16,98,1,0,27,98,1,0,93,98,1,0,177,98,1,0, -212,98,1,0,80,99,2,0,74,216,12,223,1,0,61,99, -1,0,252,98,1,0,104,99,1,0,131,99,1,0,228,99, -2,0,74,216,241,223,1,0,34,100,1,0,197,99,1,0, -169,99,1,0,46,58,1,0,105,100,1,0,126,100,1,0, -157,100,1,0,119,100,1,0,108,58,1,0,108,101,2,0, -76,216,10,220,1,0,227,101,1,0,248,102,1,0,73,102, -1,0,25,59,1,0,8,59,1,0,228,58,1,0,146,81, -1,0,149,81,1,0,0,103,1,0,156,102,1,0,173,128, -1,0,217,67,1,0,33,103,1,0,94,103,1,0,83,103, -2,0,76,216,195,223,1,0,73,59,1,0,250,103,1,0, -133,103,1,0,82,104,2,0,77,216,109,220,1,0,142,104, -1,0,31,104,1,0,20,105,1,0,66,105,1,0,163,105, -1,0,234,105,1,0,168,106,2,0,77,216,163,222,1,0, -219,106,1,0,24,60,1,0,33,107,2,0,78,216,167,220, -1,0,84,107,1,0,78,60,1,0,114,107,1,0,159,107, -1,0,187,107,2,0,78,216,141,222,2,0,71,216,11,221, -2,0,78,216,250,222,1,0,78,108,2,0,79,216,188,220, -1,0,191,108,1,0,205,108,1,0,103,108,1,0,22,109, -1,0,62,109,1,0,105,109,1,0,120,109,1,0,133,109, -2,0,79,216,30,221,1,0,52,109,1,0,47,110,1,0, -110,110,1,0,51,61,1,0,199,110,2,0,79,216,209,222, -1,0,249,109,1,0,110,111,2,0,79,216,94,223,2,0, -79,216,142,223,1,0,198,111,1,0,57,112,1,0,27,112, -1,0,150,61,1,0,74,112,1,0,125,112,1,0,119,112, -1,0,173,112,2,0,65,216,37,221,1,0,69,113,2,0, -80,216,99,222,1,0,156,113,1,0,40,114,1,0,80,114, -2,0,81,216,8,222,1,0,128,114,1,0,149,114,2,0, -81,216,53,223,2,0,82,216,20,220,1,0,122,115,1,0, -139,115,1,0,172,62,1,0,165,115,1,0,184,62,1,0, -71,116,1,0,92,116,1,0,133,116,1,0,202,116,1,0, -27,63,1,0,36,117,2,0,83,216,54,220,1,0,62,117, -2,0,83,216,146,220,2,0,72,216,159,221,1,0,16,118, -2,0,83,216,161,223,2,0,83,216,184,223,2,0,84,216, -68,220,1,0,252,63,1,0,8,64,2,0,84,216,243,220, -2,0,84,216,242,220,2,0,84,216,25,221,2,0,84,216, -51,221,1,0,30,119,1,0,31,119,1,0,139,119,1,0, -70,64,1,0,150,64,2,0,85,216,29,220,1,0,78,120, -1,0,227,64,2,0,85,216,38,222,2,0,85,216,154,222, -2,0,85,216,197,222,1,0,235,121,1,0,47,65,1,0, -74,122,1,0,79,122,2,0,86,216,124,221,2,0,86,216, -167,222,1,0,2,66,2,0,86,216,171,223,1,0,198,123, -1,0,201,123,1,0,39,66,2,0,87,216,128,220,1,0, -210,124,1,0,160,66,1,0,232,124,1,0,227,124,1,0, -0,125,2,0,87,216,134,223,1,0,99,125,1,0,1,67, -1,0,199,125,1,0,2,126,1,0,69,126,1,0,52,67, -2,0,88,216,40,222,2,0,88,216,71,222,1,0,89,67, -2,0,88,216,217,222,1,0,122,127,2,0,88,216,62,223, -1,0,149,127,1,0,250,127,2,0,89,216,218,220,2,0, -89,216,35,221,1,0,96,128,2,0,89,216,168,221,1,0, -112,128,2,0,76,216,95,223,1,0,213,67,1,0,178,128, -1,0,3,129,1,0,11,68,1,0,62,129,1,0,181,90, -2,0,89,216,167,223,2,0,89,216,181,223,2,0,76,216, -147,223,2,0,76,216,156,223,1,0,4,130,1,0,158,143, -1,0,107,68,1,0,145,130,1,0,139,130,1,0,157,130, -1,0,179,82,1,0,177,130,1,0,179,130,1,0,189,130, -1,0,230,130,2,0,90,216,60,223,1,0,29,131,1,0, -99,131,1,0,173,131,1,0,35,131,1,0,189,131,1,0, -231,131,1,0,83,131,1,0,202,131,1,0,204,131,1,0, -220,131,2,0,91,216,54,220,2,0,91,216,107,221,2,0, -91,216,213,220,1,0,43,69,1,0,241,132,1,0,243,132, -1,0,22,133,2,0,92,216,202,223,1,0,100,133,2,0, -91,216,44,223,1,0,93,69,1,0,97,69,2,0,91,216, -177,223,2,0,92,216,210,220,1,0,107,69,1,0,80,134, -1,0,103,134,1,0,105,134,1,0,169,134,1,0,136,134, -1,0,14,135,1,0,226,134,1,0,40,135,1,0,107,135, -1,0,134,135,1,0,225,135,1,0,1,136,1,0,249,69, -1,0,96,136,2,0,93,216,103,222,1,0,215,136,1,0, -222,136,1,0,53,70,1,0,250,136,1,0,187,52,2,0, -94,216,174,220,2,0,94,216,102,221,1,0,190,70,1,0, -199,70,1,0,160,138,2,0,95,216,168,220,1,0,171,140, -1,0,193,140,1,0,27,141,1,0,119,141,2,0,95,216, -47,223,2,0,66,216,4,220,1,0,203,141,1,0,188,141, -1,0,240,141,2,0,66,216,222,220,1,0,212,142,2,0, -97,216,210,221,2,0,97,216,237,221,1,0,148,144,1,0, -241,144,1,0,17,145,2,0,97,216,46,223,1,0,27,145, -1,0,56,146,1,0,215,146,1,0,216,146,1,0,124,146, -1,0,249,147,1,0,21,148,2,0,98,216,250,223,1,0, -139,149,1,0,149,73,1,0,183,149,2,0,99,216,119,221, -1,0,230,73,1,0,195,150,1,0,178,93,1,0,35,151, -2,0,100,216,69,221,2,0,100,216,26,222,1,0,110,74, -1,0,118,74,1,0,224,151,2,0,101,216,10,220,1,0, -178,74,2,0,101,216,150,220,1,0,41,152,2,0,101,216, -182,221,1,0,226,152,1,0,51,75,1,0,41,153,1,0, -167,153,1,0,194,153,1,0,254,153,1,0,206,75,2,0, -102,216,48,223,1,0,64,156,1,0,253,156,1,0,206,76, -1,0,237,76,1,0,103,157,2,0,104,216,206,220,1,0, -248,76,2,0,104,216,5,221,2,0,104,216,14,222,2,0, -104,216,145,222,1,0,86,77,1,0,254,158,1,0,5,159, -1,0,15,159,1,0,22,159,2,0,105,216,0,222,255,0, -66,230,121,0,8,3,100,0,126,1,2,0,67,230,100,0, -122,0,12,3,80,4,66,230,53,4,0,3,81,4,66,230, -53,4,8,3,83,4,66,230,51,4,1,3,87,4,66,230, -86,4,8,3,92,4,66,230,58,4,1,3,93,4,66,230, -56,4,0,3,94,4,66,230,67,4,6,3,172,3,66,230, -177,3,1,3,173,3,66,230,181,3,1,3,174,3,66,230, -183,3,1,3,175,3,66,230,185,3,1,3,204,3,66,230, -191,3,1,3,205,3,66,230,197,3,1,3,206,3,66,230, -201,3,1,3,0,31,67,0,177,3,19,3,185,3,1,31, -67,0,177,3,20,3,185,3,2,31,185,3,2,0,68,0, -177,3,19,3,0,3,185,3,3,31,185,3,2,0,68,0, -177,3,20,3,0,3,185,3,4,31,185,3,2,0,68,0, -177,3,19,3,1,3,185,3,5,31,185,3,2,0,68,0, -177,3,20,3,1,3,185,3,6,31,185,3,2,0,68,0, -177,3,19,3,66,3,185,3,7,31,185,3,2,0,68,0, -177,3,20,3,66,3,185,3,32,31,67,0,183,3,19,3, -185,3,33,31,67,0,183,3,20,3,185,3,34,31,185,3, -2,0,68,0,183,3,19,3,0,3,185,3,35,31,185,3, -2,0,68,0,183,3,20,3,0,3,185,3,36,31,185,3, -2,0,68,0,183,3,19,3,1,3,185,3,37,31,185,3, -2,0,68,0,183,3,20,3,1,3,185,3,38,31,185,3, -2,0,68,0,183,3,19,3,66,3,185,3,39,31,185,3, -2,0,68,0,183,3,20,3,66,3,185,3,96,31,67,0, -201,3,19,3,185,3,97,31,67,0,201,3,20,3,185,3, -98,31,185,3,2,0,68,0,201,3,19,3,0,3,185,3, -99,31,185,3,2,0,68,0,201,3,20,3,0,3,185,3, -100,31,185,3,2,0,68,0,201,3,19,3,1,3,185,3, -101,31,185,3,2,0,68,0,201,3,20,3,1,3,185,3, -102,31,185,3,2,0,68,0,201,3,19,3,66,3,185,3, -103,31,185,3,2,0,68,0,201,3,20,3,66,3,185,3, -112,31,67,0,177,3,0,3,185,3,172,3,67,0,177,3, -1,3,185,3,182,31,67,0,177,3,66,3,185,3,112,31, -66,230,177,3,0,3,116,31,67,0,183,3,0,3,185,3, -174,3,67,0,183,3,1,3,185,3,198,31,67,0,183,3, -66,3,185,3,114,31,66,230,181,3,0,3,116,31,66,230, -183,3,0,3,144,3,1,0,67,230,185,3,8,3,1,3, -118,31,66,230,185,3,0,3,176,3,1,0,67,230,197,3, -8,3,1,3,122,31,66,230,197,3,0,3,124,31,67,0, -201,3,0,3,185,3,206,3,67,0,201,3,1,3,185,3, -246,31,67,0,201,3,66,3,185,3,120,31,66,230,191,3, -0,3,124,31,66,230,201,3,0,3,229,0,66,230,97,0, -10,3,40,0,0,172,41,0,3,0,68,0,40,0,0,17, -97,17,41,0,40,0,152,176,41,0,3,0,68,0,40,0, -2,17,97,17,41,0,40,0,228,178,41,0,3,0,68,0, -40,0,3,17,97,17,41,0,40,0,124,183,41,0,3,0, -68,0,40,0,5,17,97,17,41,0,40,0,200,185,41,0, -3,0,68,0,40,0,6,17,97,17,41,0,40,0,20,188, -41,0,3,0,68,0,40,0,7,17,97,17,41,0,40,0, -172,192,41,0,3,0,68,0,40,0,9,17,97,17,41,0, -40,0,68,197,41,0,3,0,68,0,40,0,11,17,97,17, -41,0,40,0,144,199,41,0,3,0,68,0,40,0,12,17, -97,17,41,0,40,0,40,204,41,0,3,0,68,0,40,0, -14,17,97,17,41,0,40,0,116,206,41,0,3,0,68,0, -40,0,15,17,97,17,41,0,40,0,192,208,41,0,3,0, -68,0,40,0,16,17,97,17,41,0,40,0,12,211,41,0, -3,0,68,0,40,0,17,17,97,17,41,0,40,0,88,213, -41,0,3,0,68,0,40,0,18,17,97,17,41,0,40,0, -252,200,41,0,3,0,68,0,40,0,12,17,110,17,41,0, -40,0,36,198,4,200,41,0,4,0,71,0,40,0,11,17, -105,17,12,17,101,17,171,17,41,0,40,0,36,198,196,214, -41,0,4,0,70,0,40,0,11,17,105,17,18,17,110,17, -41,0,0,172,66,0,0,17,97,17,152,176,66,0,2,17, -97,17,228,178,66,0,3,17,97,17,124,183,66,0,5,17, -97,17,200,185,66,0,6,17,97,17,20,188,66,0,7,17, -97,17,172,192,66,0,9,17,97,17,68,197,66,0,11,17, -97,17,144,199,66,0,12,17,97,17,40,204,66,0,14,17, -97,17,116,206,66,0,15,17,97,17,192,208,66,0,16,17, -97,17,12,211,66,0,17,17,97,17,88,213,66,0,18,17, -97,17,56,204,224,172,2,0,69,0,14,17,97,17,183,17, -0,17,105,17,252,200,88,199,2,0,68,0,12,17,110,17, -11,17,116,17,176,198,66,0,11,17,110,17,162,48,209,48, -252,48,200,48,4,0,69,0,162,48,207,48,154,48,252,48, -200,48,162,48,243,48,218,48,162,48,4,0,69,0,162,48, -243,48,216,48,154,48,162,48,164,48,203,48,243,48,176,48, -4,0,69,8,164,48,203,48,243,48,175,48,153,48,168,48, -185,48,175,48,252,48,201,48,5,0,70,8,168,48,185,48, -175,48,252,48,200,48,153,48,172,48,68,0,171,48,153,48, -237,48,243,48,172,48,68,0,171,48,153,48,243,48,222,48, -174,48,172,48,2,0,68,8,173,48,153,48,171,48,153,48, -174,48,68,0,173,48,153,48,203,48,252,48,174,48,235,48, -192,48,252,48,4,0,70,0,173,48,153,48,235,48,191,48, -153,48,252,48,173,48,237,48,176,48,233,48,224,48,5,0, -70,0,173,48,237,48,175,48,153,48,233,48,224,48,176,48, -68,0,175,48,153,48,233,48,224,48,176,48,70,0,175,48, -153,48,233,48,224,48,200,48,243,48,175,48,235,48,188,48, -164,48,237,48,5,0,70,0,175,48,235,48,187,48,153,48, -164,48,237,48,179,48,252,48,221,48,3,0,68,8,179,48, -252,48,219,48,154,48,183,48,234,48,243,48,176,48,4,0, -69,8,183,48,234,48,243,48,175,48,153,48,192,48,68,0, -191,48,153,48,252,48,185,48,199,48,67,0,198,48,153,48, -183,48,201,48,67,0,200,48,153,48,235,48,209,48,70,0, -207,48,154,48,252,48,187,48,243,48,200,48,209,48,68,0, -207,48,154,48,252,48,196,48,208,48,69,0,207,48,153,48, -252,48,236,48,235,48,212,48,70,0,210,48,154,48,162,48, -185,48,200,48,235,48,212,48,68,0,210,48,154,48,175,48, -235,48,212,48,67,0,210,48,154,48,179,48,211,48,67,0, -210,48,153,48,235,48,213,48,161,48,233,48,195,48,201,48, -5,0,70,8,213,48,161,48,233,48,195,48,200,48,153,48, -214,48,70,0,213,48,153,48,195,48,183,48,167,48,235,48, -218,48,67,0,216,48,154,48,189,48,218,48,68,0,216,48, -154,48,203,48,210,48,218,48,68,0,216,48,154,48,243,48, -185,48,218,48,252,48,184,48,3,0,69,8,216,48,154,48, -252,48,183,48,153,48,217,48,68,0,216,48,153,48,252,48, -191,48,221,48,69,0,219,48,154,48,164,48,243,48,200,48, -220,48,68,0,219,48,153,48,235,48,200,48,221,48,243,48, -201,48,3,0,69,8,219,48,154,48,243,48,200,48,153,48, -223,48,234,48,208,48,252,48,235,48,5,0,70,0,223,48, -234,48,207,48,153,48,252,48,235,48,225,48,172,48,2,0, -67,8,225,48,171,48,153,48,225,48,172,48,200,48,243,48, -4,0,69,0,225,48,171,48,153,48,200,48,243,48,228,48, -252,48,201,48,3,0,68,8,228,48,252,48,200,48,153,48, -235,48,212,48,252,48,3,0,68,0,235,48,210,48,154,48, -252,48,235,48,252,48,214,48,235,48,4,0,69,0,235,48, -252,48,213,48,153,48,235,48,236,48,243,48,200,48,178,48, -243,48,5,0,70,0,236,48,243,48,200,48,177,48,153,48, -243,48,192,6,66,230,213,6,84,6,211,6,66,230,210,6, -84,6,38,6,67,0,74,6,84,6,39,6,38,6,67,0, -74,6,84,6,213,6,38,6,67,0,74,6,84,6,72,6, -38,6,67,0,74,6,84,6,199,6,38,6,67,0,74,6, -84,6,198,6,38,6,67,0,74,6,84,6,200,6,38,6, -67,0,74,6,84,6,208,6,38,6,67,0,74,6,84,6, -73,6,38,6,67,0,74,6,84,6,44,6,38,6,67,0, -74,6,84,6,45,6,38,6,67,0,74,6,84,6,69,6, -38,6,67,0,74,6,84,6,74,6,38,6,67,0,74,6, -84,6,49,6,38,6,67,0,74,6,84,6,50,6,38,6, -67,0,74,6,84,6,70,6,38,6,67,0,74,6,84,6, -46,6,38,6,67,0,74,6,84,6,71,6,34,6,66,230, -39,6,83,6,35,6,66,230,39,6,84,6,36,6,66,230, -72,6,84,6,37,6,66,220,39,6,85,6,38,6,66,230, -74,6,84,6,68,6,34,6,2,0,67,230,68,6,39,6, -83,6,68,6,35,6,2,0,67,230,68,6,39,6,84,6, -68,6,37,6,2,0,67,220,68,6,39,6,85,6,199,48, -66,8,198,48,153,48,230,230,129,230,0,3,230,230,129,230, -1,3,230,230,129,230,19,3,230,230,130,230,8,3,1,3, -0,129,130,130,113,15,114,15,0,129,130,132,113,15,116,15, -0,129,130,130,113,15,128,15,1,0,170,17,1,0,172,17, -1,0,173,17,1,0,176,17,1,0,177,17,1,0,178,17, -1,0,179,17,1,0,180,17,1,0,181,17,1,0,97,17, -1,0,98,17,1,0,99,17,1,0,100,17,1,0,101,17, -1,0,102,17,1,0,103,17,1,0,104,17,1,0,105,17, -1,0,106,17,1,0,107,17,1,0,108,17,1,0,109,17, -1,0,110,17,1,0,111,17,1,0,112,17,1,0,113,17, -1,0,114,17,1,0,115,17,1,0,116,17,1,0,117,17, -0,8,129,8,153,48,0,8,129,8,154,48,0,1,128,255, -224,239,67,127,223,112,207,135,199,230,102,70,100,70,102,91, -18,0,0,4,0,0,0,67,33,2,41,174,194,192,255,239, -203,114,191,0,0,0,0,0,0,0,64,0,128,136,0,0, -254,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,152,0,193,102,224,128,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,3,0,0,4,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, -0,0,0,0,0,0,0,0,0,0,0,39,220,2,159,144 -} -}; -U_CDECL_END -U_CAPI const void * U_EXPORT2 uprv_getICUData_other() { return icudt64l_norm_dat.bytes; }
diff --git a/third_party/tensorflow-text/src/third_party/icu/udata.patch b/third_party/tensorflow-text/src/third_party/icu/udata.patch deleted file mode 100644 index eac6844..0000000 --- a/third_party/tensorflow-text/src/third_party/icu/udata.patch +++ /dev/null
@@ -1,68 +0,0 @@ -diff --git a/icu4c/source/common/udata.cpp b/icu4c/source/common/udata.cpp -index efcd2a2f97..80f275c415 100644 ---- a/icu4c/source/common/udata.cpp -+++ b/icu4c/source/common/udata.cpp -@@ -18,11 +18,10 @@ - - #include "unicode/utypes.h" /* U_PLATFORM etc. */ - --#ifdef __GNUC__ --/* if gcc --#define ATTRIBUTE_WEAK __attribute__ ((weak)) --might have to #include some other header --*/ -+#if defined(__GNUC__) || defined(__SUNPRO_CC) -+# define ATTRIBUTE_WEAK __attribute__ ((weak)) -+#else -+# define ATTRIBUTE_WEAK - #endif - - #include "unicode/putil.h" -@@ -649,10 +648,11 @@ extern "C" const DataHeader U_DATA_API U_ICUDATA_ENTRY_POINT; - * partial-data-library access functions where each returns a pointer - * to its data package, if it is linked in. - */ --/* --extern const void *uprv_getICUData_collation(void) ATTRIBUTE_WEAK; --extern const void *uprv_getICUData_conversion(void) ATTRIBUTE_WEAK; --*/ -+ -+U_CDECL_BEGIN -+const void *uprv_getICUData_other(void) ATTRIBUTE_WEAK; -+U_CDECL_END -+ - - /*----------------------------------------------------------------------* - * * -@@ -709,11 +709,11 @@ openCommonData(const char *path, /* Path from OpenChoice? */ - /* - if (uprv_getICUData_collation) { - setCommonICUDataPointer(uprv_getICUData_collation(), FALSE, pErrorCode); -+ }*/ -+ if (uprv_getICUData_other) { -+ setCommonICUDataPointer(uprv_getICUData_other(), FALSE, pErrorCode); - } -- if (uprv_getICUData_conversion) { -- setCommonICUDataPointer(uprv_getICUData_conversion(), FALSE, pErrorCode); -- } -- */ -+ - #if U_PLATFORM_HAS_WINUWP_API == 0 // Windows UWP Platform does not support dll icu data at this time - setCommonICUDataPointer(&U_ICUDATA_ENTRY_POINT, FALSE, pErrorCode); - { -diff --git a/icu4c/source/common/unicode/uconfig.h b/icu4c/source/common/unicode/uconfig.h -index e61eb5795c..95c43f2334 100644 ---- a/icu4c/source/common/unicode/uconfig.h -+++ b/icu4c/source/common/unicode/uconfig.h -@@ -55,6 +55,11 @@ - #include "uconfig_local.h" - #endif - -+// Tensorflow is statically linked on all platforms. -+#ifndef U_STATIC_IMPLEMENTATION -+#define U_STATIC_IMPLEMENTATION -+#endif -+ - /** - * \def U_DEBUG - * Determines whether to include debugging code.
diff --git a/third_party/tensorflow-text/src/third_party/pybind11/BUILD b/third_party/tensorflow-text/src/third_party/pybind11/BUILD deleted file mode 100644 index c3f090f..0000000 --- a/third_party/tensorflow-text/src/third_party/pybind11/BUILD +++ /dev/null
@@ -1,2 +0,0 @@ -# This empty BUILD file is required to make Bazel treat this directory as a package. -# Bazel cannot build with this package without it.
diff --git a/third_party/tensorflow-text/src/third_party/pybind11/BUILD.bzl b/third_party/tensorflow-text/src/third_party/pybind11/BUILD.bzl deleted file mode 100644 index dcff8c2..0000000 --- a/third_party/tensorflow-text/src/third_party/pybind11/BUILD.bzl +++ /dev/null
@@ -1,29 +0,0 @@ -""" -BUILD file for pybind11 package, since the github version does not have one. -""" - -package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "pybind11", - hdrs = glob( - include = [ - "include/pybind11/*.h", - "include/pybind11/detail/*.h", - ], - exclude = [ - "include/pybind11/common.h", - "include/pybind11/eigen.h", - ], - ), - copts = [ - "-fexceptions", - "-Wno-undefined-inline", - "-Wno-pragma-once-outside-header", - ], - includes = ["include"], - strip_include_prefix = "include", - deps = [ - "@org_tensorflow//third_party/python_runtime:headers", - ], -)
diff --git a/third_party/tensorflow-text/src/third_party/sentencepiece/BUILD b/third_party/tensorflow-text/src/third_party/sentencepiece/BUILD deleted file mode 100644 index 82bab3f..0000000 --- a/third_party/tensorflow-text/src/third_party/sentencepiece/BUILD +++ /dev/null
@@ -1 +0,0 @@ -# This empty BUILD file is required to make Bazel treat this directory as a package.
diff --git a/third_party/tensorflow-text/src/third_party/sentencepiece/processor.patch b/third_party/tensorflow-text/src/third_party/sentencepiece/processor.patch deleted file mode 100644 index 5fa1b84..0000000 --- a/third_party/tensorflow-text/src/third_party/sentencepiece/processor.patch +++ /dev/null
@@ -1,66 +0,0 @@ -diff --git a/src/BUILD b/src/BUILD -index b4298d2..7ce779f 100644 ---- a/src/BUILD -+++ b/src/BUILD -@@ -71,9 +71,6 @@ cc_library( - ":common", - ":sentencepiece_cc_proto", - ":sentencepiece_model_cc_proto", -- "@com_github_gflags_gflags//:gflags", -- "@com_google_glog//:glog", -- "@com_google_googletest//:gtest", - "@com_google_absl//absl/memory", - "@com_google_absl//absl/strings", - "@com_google_absl//absl/container:flat_hash_map", -@@ -81,7 +78,17 @@ cc_library( - "//third_party/darts_clone", - "//src/deps:threadpool", - "//src/deps:status", -- ], -+ ] + select({ -+ "@org_tensorflow//tensorflow:mobile": [ -+ "@org_tensorflow//tensorflow/core:tflite_portable_logging", -+ ], -+ "//conditions:default": [ -+ "@local_config_tf//:libtensorflow_framework", -+ "@local_config_tf//:tf_header_lib", -+ "@com_google_absl//absl/functional:function_ref", -+ "@com_google_absl//absl/strings:cord", -+ ], -+ }), - ) - - cc_library( -diff --git a/src/common.h b/src/common.h -index f1f51ec..339316a 100644 ---- a/src/common.h -+++ b/src/common.h -@@ -21,7 +21,7 @@ - #include <utility> - #include <vector> - --#include <glog/logging.h> -+#include "tensorflow/core/platform/logging.h" - #include "src/deps/basic_types.h" - #include "src/deps/canonical_errors.h" - #include "src/deps/status.h" -diff --git a/src/normalizer.h b/src/normalizer.h -index c16ac16..2af58be 100644 ---- a/src/normalizer.h -+++ b/src/normalizer.h -@@ -21,7 +21,6 @@ - #include <utility> - #include <vector> - --#include "gtest/gtest_prod.h" - #include "absl/strings/string_view.h" - #include "third_party/darts_clone/include/darts.h" - #include "src/common.h" -@@ -97,7 +96,6 @@ class Normalizer { - friend class Builder; - - private: -- FRIEND_TEST(NormalizerTest, EncodeDecodePrecompiledCharsMapTest); - - void Init(); -
diff --git a/third_party/tensorflow-text/src/third_party/tensorflow/BUILD b/third_party/tensorflow-text/src/third_party/tensorflow/BUILD deleted file mode 100644 index 2b0003e..0000000 --- a/third_party/tensorflow-text/src/third_party/tensorflow/BUILD +++ /dev/null
@@ -1 +0,0 @@ -# Needed for Bazel to treat this directory as a package
diff --git a/third_party/tensorflow-text/src/third_party/tensorflow/BUILD.tpl b/third_party/tensorflow-text/src/third_party/tensorflow/BUILD.tpl deleted file mode 100644 index d3b4d27..0000000 --- a/third_party/tensorflow-text/src/third_party/tensorflow/BUILD.tpl +++ /dev/null
@@ -1,17 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -cc_library( - name = "tf_header_lib", - hdrs = [":tf_header_include"], - includes = ["include"], - visibility = ["//visibility:public"], -) - -cc_library( - name = "libtensorflow_framework", - srcs = [":libtensorflow_framework_so"], - visibility = ["//visibility:public"], -) - -%{TF_HEADER_GENRULE} -%{TF_SHARED_LIBRARY_GENRULE}
diff --git a/third_party/tensorflow-text/src/third_party/tensorflow/tf_configure.bzl b/third_party/tensorflow-text/src/third_party/tensorflow/tf_configure.bzl deleted file mode 100644 index 1fc385a..0000000 --- a/third_party/tensorflow-text/src/third_party/tensorflow/tf_configure.bzl +++ /dev/null
@@ -1,213 +0,0 @@ -"""Setup TensorFlow as external dependency. - -This is used for the generation of the dynamic libraries used for custom ops. -See: http://github.com/tensorflow/custom-op -""" - -_TF_HEADER_DIR = "TF_HEADER_DIR" -_TF_SHARED_LIBRARY_DIR = "TF_SHARED_LIBRARY_DIR" -_TF_SHARED_LIBRARY_NAME = "TF_SHARED_LIBRARY_NAME" - -def _tpl(repository_ctx, tpl, substitutions = {}, out = None): - if not out: - out = tpl - repository_ctx.template( - out, - Label("//third_party/tensorflow:%s.tpl" % tpl), - substitutions, - ) - -def _fail(msg): - """Output failure message when auto configuration fails.""" - red = "\033[0;31m" - no_color = "\033[0m" - fail("%sPython Configuration Error:%s %s\n" % (red, no_color, msg)) - -def _is_windows(repository_ctx): - """Returns true if the host operating system is windows.""" - os_name = repository_ctx.os.name.lower() - if os_name.find("windows") != -1: - return True - return False - -def _execute( - repository_ctx, - cmdline, - error_msg = None, - error_details = None, - empty_stdout_fine = False): - """Executes an arbitrary shell command. - - Helper for executes an arbitrary shell command. - - Args: - repository_ctx: the repository_ctx object. - cmdline: list of strings, the command to execute. - error_msg: string, a summary of the error if the command fails. - error_details: string, details about the error or steps to fix it. - empty_stdout_fine: bool, if True, an empty stdout result is fine, otherwise - it's an error. - - Returns: - The result of repository_ctx.execute(cmdline). - """ - result = repository_ctx.execute(cmdline) - if result.stderr or not (empty_stdout_fine or result.stdout): - _fail("\n".join([ - error_msg.strip() if error_msg else "Repository command failed", - result.stderr.strip(), - error_details if error_details else "", - ])) - return result - -def _read_dir(repository_ctx, src_dir): - """Returns a string with all files in a directory. - - Finds all files inside a directory, traversing subfolders and following - symlinks. The returned string contains the full path of all files - separated by line breaks. - - Args: - repository_ctx: the repository_ctx object. - src_dir: directory to find files from. - - Returns: - A string of all files inside the given dir. - """ - if _is_windows(repository_ctx): - src_dir = src_dir.replace("/", "\\") - find_result = _execute( - repository_ctx, - ["cmd.exe", "/c", "dir", src_dir, "/b", "/s", "/a-d"], - empty_stdout_fine = True, - ) - - # src_files will be used in genrule.outs where the paths must - # use forward slashes. - result = find_result.stdout.replace("\\", "/") - else: - find_result = _execute( - repository_ctx, - ["find", src_dir, "-follow", "-type", "f"], - empty_stdout_fine = True, - ) - result = find_result.stdout - return result - -def _genrule(genrule_name, command, outs): - """Returns a string with a genrule. - - Genrule executes the given command and produces the given outputs. - - Args: - genrule_name: A unique name for genrule target. - command: The command to run. - outs: A list of files generated by this rule. - - Returns: - A genrule target. - """ - return ( - "genrule(\n" + - ' name = "' + - genrule_name + '",\n' + - " outs = [\n" + - outs + - "\n ],\n" + - ' cmd = """\n' + - command + - '\n """,\n' + - ")\n" - ) - -def _norm_path(path): - """Returns a path with '/' and remove the trailing slash.""" - path = path.replace("\\", "/") - if path[-1] == "/": - path = path[:-1] - return path - -def _symlink_genrule_for_dir( - repository_ctx, - src_dir, - dest_dir, - genrule_name, - src_files = [], - dest_files = []): - """Returns a genrule to symlink(or copy if on Windows) a set of files. - - If src_dir is passed, files will be read from the given directory; otherwise - we assume files are in src_files and dest_files. - - Args: - repository_ctx: the repository_ctx object. - src_dir: source directory. - dest_dir: directory to create symlink in. - genrule_name: genrule name. - src_files: list of source files instead of src_dir. - dest_files: list of corresonding destination files. - - Returns: - genrule target that creates the symlinks. - """ - if src_dir != None: - src_dir = _norm_path(src_dir) - dest_dir = _norm_path(dest_dir) - files = "\n".join(sorted(_read_dir(repository_ctx, src_dir).splitlines())) - - # Create a list with the src_dir stripped to use for outputs. - dest_files = files.replace(src_dir, "").splitlines() - src_files = files.splitlines() - command = [] - outs = [] - for i in range(len(dest_files)): - if dest_files[i] != "": - # If we have only one file to link we do not want to use the dest_dir, as - # $(@D) will include the full path to the file. - dest = "$(@D)/" + dest_dir + dest_files[i] if len(dest_files) != 1 else "$(@D)/" + dest_files[i] - - # Copy the headers to create a sandboxable setup. - cmd = "cp -f" - command.append(cmd + ' "%s" "%s"' % (src_files[i], dest)) - outs.append(' "' + dest_dir + dest_files[i] + '",') - genrule = _genrule( - genrule_name, - ";\n".join(command), - "\n".join(outs), - ) - return genrule - -def _tf_pip_impl(repository_ctx): - tf_header_dir = repository_ctx.os.environ[_TF_HEADER_DIR] - tf_header_rule = _symlink_genrule_for_dir( - repository_ctx, - tf_header_dir, - "include", - "tf_header_include", - ) - - tf_shared_library_dir = repository_ctx.os.environ[_TF_SHARED_LIBRARY_DIR] - tf_shared_library_name = repository_ctx.os.environ[_TF_SHARED_LIBRARY_NAME] - tf_shared_library_path = "%s/%s" % (tf_shared_library_dir, tf_shared_library_name) - - tf_shared_library_rule = _symlink_genrule_for_dir( - repository_ctx, - None, - "", - "libtensorflow_framework_so", - [tf_shared_library_path], - [tf_shared_library_name], - ) - - _tpl(repository_ctx, "BUILD", { - "%{TF_HEADER_GENRULE}": tf_header_rule, - "%{TF_SHARED_LIBRARY_GENRULE}": tf_shared_library_rule, - }) - -tf_configure = repository_rule( - implementation = _tf_pip_impl, - environ = [ - _TF_HEADER_DIR, - _TF_SHARED_LIBRARY_DIR, - ], -)
diff --git a/third_party/tensorflow-text/update.sh b/third_party/tensorflow-text/update.sh new file mode 100755 index 0000000..466ee17 --- /dev/null +++ b/third_party/tensorflow-text/update.sh
@@ -0,0 +1,33 @@ +#!/bin/bash +# Copyright 2023 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +set -e + +if [ $(basename ${PWD}) != "src" ]; then + echo "Please set the current working directory to chromium/src first!" + exit 1 +fi + +files=( + "LICENSE" + "tensorflow_text/core/kernels/regex_split.cc" + "tensorflow_text/core/kernels/regex_split.h" + "tensorflow_text/core/kernels/wordpiece_tokenizer.cc" + "tensorflow_text/core/kernels/wordpiece_tokenizer.h" +) + +git clone --depth 1 https://github.com/tensorflow/text /tmp/text +rm -rf third_party/tensorflow-text/src/* +pushd third_party/tensorflow-text/src/ + +for file in ${files[@]} ; do + if [ ! -d "$(dirname ${file})" ] ; then + mkdir -p "$(dirname ${file})" + fi + cp "/tmp/text/${file}" "${file}" +done + +popd +rm -rf /tmp/text
diff --git a/third_party/tflite/README.chromium b/third_party/tflite/README.chromium index 76b31e6..29446270 100644 --- a/third_party/tflite/README.chromium +++ b/third_party/tflite/README.chromium
@@ -1,8 +1,8 @@ Name: TensorFlow Lite Short Name: tflite URL: https://github.com/tensorflow/tensorflow -Version: 5abeda2494b0eb551b5c4689e47008fe374684dc -Date: 2023/06/05 +Version: f3e12f6b28647da499edb69794745f14869f0df9 +Date: 2023/06/12 License: Apache 2.0 License File: LICENSE Security Critical: Yes
diff --git a/tools/autotest.py b/tools/autotest.py index 042f77a..0f3c795 100755 --- a/tools/autotest.py +++ b/tools/autotest.py
@@ -133,10 +133,14 @@ raise CommandError(e.cmd, e.returncode, e.output) from None -def BuildTestTargetsWithNinja(out_dir, targets, dry_run): +def BuildTestTargets(out_dir, targets, dry_run, use_siso): """Builds the specified targets with ninja""" # Use autoninja from PATH to match version used for manual builds. - ninja_path = 'autoninja' + if use_siso: + ninja_path = 'autosiso' + else: + ninja_path = 'autoninja' + if sys.platform.startswith('win32'): ninja_path += '.bat' cmd = [ninja_path, '-C', out_dir] + targets @@ -453,6 +457,10 @@ parser.add_argument('--no-fast-local-dev', action='store_true', help='Do not add --fast-local-dev for Android tests.') + parser.add_argument('--siso', + '-s', + action='store_true', + help='Use siso to build instead of ninja.') parser.add_argument('files', metavar='FILE_NAME', nargs="+", @@ -483,7 +491,7 @@ ExitWithMessage('Failed to derive a gtest filter') assert targets - build_ok = BuildTestTargetsWithNinja(out_dir, targets, args.dry_run) + build_ok = BuildTestTargets(out_dir, targets, args.dry_run, args.siso) # If we used the target cache, it's possible we chose the wrong target because # a gn file was changed. The build step above will check for gn modifications @@ -497,7 +505,7 @@ # Note that this can happen, for example, if you rename a test target. print('gn config was changed, trying to build again', file=sys.stderr) targets = new_targets - build_ok = BuildTestTargetsWithNinja(out_dir, targets, args.dry_run) + build_ok = BuildTestTargets(out_dir, targets, args.dry_run, args.siso) if not build_ok: sys.exit(1)
diff --git a/tools/cast3p/runtime.version b/tools/cast3p/runtime.version index e72c78e..615060e 100644 --- a/tools/cast3p/runtime.version +++ b/tools/cast3p/runtime.version
@@ -1 +1 @@ -364749 +365328
diff --git a/tools/clang/pylib/clang/plugin_testing.py b/tools/clang/pylib/clang/plugin_testing.py index e7a7dc0..5831222 100755 --- a/tools/clang/pylib/clang/plugin_testing.py +++ b/tools/clang/pylib/clang/plugin_testing.py
@@ -57,7 +57,7 @@ with open(clang_exe, 'rb') as f: if 'diagnostics-show-line-numbers'.encode('ascii') in f.read(): clang_cmd.extend([ - '-fno-diagnostics-show-line-numbers', '-Xclang', + '-fno-diagnostics-show-line-numbers', '-fcaret-diagnostics-max-lines=1' ])
diff --git a/tools/clang/scripts/build.py b/tools/clang/scripts/build.py index 8b44bcb..b7ee5102 100755 --- a/tools/clang/scripts/build.py +++ b/tools/clang/scripts/build.py
@@ -27,7 +27,7 @@ import json import multiprocessing import os -import pipes +import shlex import platform import re import shutil @@ -112,12 +112,12 @@ # # We want to pass additional arguments to command[0], not to the shell, # so manually join everything into a single string. - # Annoyingly, for "svn co url c:\path", pipes.quote() thinks that it should + # Annoyingly, for "svn co url c:\path", shlex.quote() thinks that it should # quote c:\path but svn can't handle quoted paths on Windows. Since on # Windows follow-on args are passed to args[0] instead of the shell, don't # do the single-string transformation there. if sys.platform != 'win32': - command = ' '.join([pipes.quote(c) for c in command]) + command = ' '.join([shlex.quote(c) for c in command]) print('Running', command) if subprocess.call(command, env=env, shell=True) == 0: return True
diff --git a/tools/cr/cr/base/host.py b/tools/cr/cr/base/host.py index 4098780..de9cdde95 100644 --- a/tools/cr/cr/base/host.py +++ b/tools/cr/cr/base/host.py
@@ -7,7 +7,7 @@ from __future__ import print_function import os -import pipes +import shlex import signal import subprocess @@ -124,7 +124,7 @@ @cr.Plugin.activemethod def Shell(self, *command): - command = ' '.join([pipes.quote(arg) for arg in command]) + command = ' '.join([shlex.quote(arg) for arg in command]) return self._Execute([command], shell=True, ignore_interrupt_signal=True) @cr.Plugin.activemethod
diff --git a/tools/metrics/actions/actions.xml b/tools/metrics/actions/actions.xml index 67bf9cf..8f5e518 100644 --- a/tools/metrics/actions/actions.xml +++ b/tools/metrics/actions/actions.xml
@@ -33332,6 +33332,15 @@ </description> </action> +<action name="Signin_TwoScreens_SwipeDismiss"> + <owner>scottyoder@google.com</owner> + <owner>chrome-signin-team@google.com</owner> + <description> + Recorded when the user swipes to dismiss the signin or sync screen during + the "TwoScreensSignin" UI flow. Recorded for iOS only. + </description> +</action> + <action name="Signin_Undo_Signin"> <owner>gogerald@chromium.org</owner> <description>
diff --git a/tools/metrics/histograms/enums.xml b/tools/metrics/histograms/enums.xml index 207af0a1..3d0f5d5 100644 --- a/tools/metrics/histograms/enums.xml +++ b/tools/metrics/histograms/enums.xml
@@ -5935,6 +5935,13 @@ <int value="2" label="Denied and can't ask again"/> </enum> +<enum name="AudioRecordingMode"> + <int value="0" label="No audio recording"/> + <int value="1" label="Recording the system audio"/> + <int value="2" label="Recording the microphone"/> + <int value="3" label="Recording the microphone and system audio together"/> +</enum> + <enum name="AudioRenderDeviceError"> <int value="0" label="No error"/> <int value="1" label="Error during stream creation"/> @@ -42731,6 +42738,8 @@ <int value="4580" label="FedCmRpContext"/> <int value="4581" label="EventTimingArtificialPointerupOrClick"/> <int value="4582" label="AbortSignalAny"/> + <int value="4583" label="FedCmIdpSigninStatusApi"/> + <int value="4584" label="FedCmIdpSigninStatusJsApi"/> </enum> <enum name="FeaturePolicyAllowlistType"> @@ -60440,6 +60449,8 @@ <int value="-1317768255" label="PermissionStorageAccessAPI:enabled"/> <int value="-1316769004" label="CrossOriginOpenerPolicyReporting:disabled"/> <int value="-1315360754" label="WindowLayoutMenu:disabled"/> + <int value="-1314823250" + label="OsSettingsDeprecateSyncMetricsToggle:disabled"/> <int value="-1314757884" label="TabGroupsSave:enabled"/> <int value="-1314603238" label="ChromeHomePullToRefreshIphAtTop:enabled"/> <int value="-1313810940" label="StrictOriginIsolation:disabled"/> @@ -65819,6 +65830,7 @@ <int value="1515472077" label="VirtualKeyboardRoundCorners:disabled"/> <int value="1517863401" label="history-entry-requires-user-gesture"/> <int value="1517988103" label="FilesBannerFramework:disabled"/> + <int value="1518314853" label="OsSettingsDeprecateSyncMetricsToggle:enabled"/> <int value="1518436057" label="NtpChromeCartInHistoryClusterModule:enabled"/> <int value="1518752357" label="ContextualPageActionPriceTracking:enabled"/> <int value="1518833340" label="MediaAppDisplayExif:enabled"/> @@ -68121,7 +68133,7 @@ <int value="225" label="alias-webkit-column-span"/> <int value="226" label="alias-webkit-column-width"/> <int value="227" label="alias-webkit-columns"/> - <int value="228" label="webkit-box-decoration-break"/> + <int value="228" label="webkit-box-decoration-break (obsolete)"/> <int value="229" label="webkit-filter"/> <int value="230" label="align-content"/> <int value="231" label="align-items"/> @@ -68246,7 +68258,7 @@ <int value="350" label="webkit-wrap-flow"/> <int value="351" label="webkit-wrap-through"/> <int value="352" label="webkit-wrap"/> - <int value="353" label="webkit-tap-highlight-color"/> + <int value="353" label="webkit-tap-highlight-color (obsolete)"/> <int value="354" label="webkit-app-region"/> <int value="355" label="clip-path"/> <int value="356" label="clip-rule"/> @@ -68290,7 +68302,7 @@ <int value="394" label="writing-mode"/> <int value="395" label="webkit-svg-shadow"/> <int value="396" label="webkit-cursor-visibility"/> - <int value="397" label="image-orientation"/> + <int value="397" label="image-orientation (obsolete)"/> <int value="398" label="image-resolution"/> <int value="399" label="webkit-blend-mode"/> <int value="400" label="webkit-background-blend-mode"/> @@ -68307,8 +68319,8 @@ <int value="411" label="webkit-overflow-scrolling"/> <int value="412" label="alias-webkit-app-region"/> <int value="413" label="alias-webkit-filter"/> - <int value="414" label="webkit-box-decoration-break-0"/> - <int value="415" label="webkit-tap-highlight-color-0"/> + <int value="414" label="webkit-box-decoration-break"/> + <int value="415" label="webkit-tap-highlight-color"/> <int value="416" label="buffered-rendering"/> <int value="417" label="grid-auto-rows"/> <int value="418" label="grid-auto-columns"/> @@ -68400,7 +68412,7 @@ <int value="504" label="translate"/> <int value="505" label="rotate"/> <int value="506" label="scale"/> - <int value="507" label="image-orientation-0"/> + <int value="507" label="image-orientation"/> <int value="508" label="backdrop-filter"/> <int value="509" label="text-combine-upright"/> <int value="510" label="text-orientation"/> @@ -68657,6 +68669,7 @@ <int value="761" label="scroll-start-target"/> <int value="762" label="timeline-scope"/> <int value="763" label="scrollbar-color"/> + <int value="764" label="word-boundary-detection"/> </enum> <enum name="MappedEditingCommands"> @@ -88059,6 +88072,10 @@ </enum> <enum name="ProjectorPolicyChangeHandlingError"> + <obsolete> + Deprecated in M116. No longer needed since no error reported and converted + to CHECK. + </obsolete> <int value="0" label="SWA manager is not available"/> <int value="1" label="Web app provider is not available"/> <int value="2" @@ -101095,9 +101112,9 @@ <int value="100036" label="AccessibilityEnhancedNetworkVoicesInSelectToSpeakAllowed"/> <int value="100037" label="AccessibilityFloatingMenuPosition"/> - <int value="100038" label="AccessibilityGreyscaleAmount"/> - <int value="100039" label="AccessibilityHueRotationAmount"/> - <int value="100040" label="AccessibilitySaturationAmount"/> + <int value="100038" label="(obsolete) AccessibilityGreyscaleAmount"/> + <int value="100039" label="(obsolete) AccessibilityHueRotationAmount"/> + <int value="100040" label="(obsolete) AccessibilitySaturationAmount"/> <int value="100041" label="AccessibilityScreenMagnifierCenterFocus"/> <int value="100042" label="AccessibilityScreenMagnifierFocusFollowingEnabled"/> @@ -101112,7 +101129,7 @@ <int value="100050" label="AccessibilitySelectToSpeakVoiceName"/> <int value="100051" label="AccessibilitySelectToSpeakVoiceSwitching"/> <int value="100052" label="AccessibilitySelectToSpeakWordHighlight"/> - <int value="100053" label="AccessibilitySepiaAmount"/> + <int value="100053" label="(obsolete) AccessibilitySepiaAmount"/> <int value="100054" label="AccessibilitySwitchAccessAutoScanEnabled"/> <int value="100055" label="AccessibilitySwitchAccessAutoScanKeyboardSpeedMs"/> <int value="100056" label="AccessibilitySwitchAccessAutoScanSpeedMs"/> @@ -114993,6 +115010,9 @@ <int value="62" label="Open in PWA window"/> <int value="63" label="Send to your devices"/> <int value="64" label="Create QR Code"/> + <int value="65" label="Customize your Chrome"/> + <int value="66" label="Close this profile"/> + <int value="67" label="Manage your Google account"/> </enum> <enum name="WrongConfigurationMetric">
diff --git a/tools/metrics/histograms/metadata/ash/histograms.xml b/tools/metrics/histograms/metadata/ash/histograms.xml index 45e32c7..1791408 100644 --- a/tools/metrics/histograms/metadata/ash/histograms.xml +++ b/tools/metrics/histograms/metadata/ash/histograms.xml
@@ -1617,6 +1617,19 @@ </token> </histogram> +<histogram + name="Ash.CaptureModeController.AudioRecordingMode.{TabletOrClamshell}" + enum="AudioRecordingMode" expires_after="2024-06-01"> + <owner>afakhry@chromium.org</owner> + <owner>gzadina@google.com</owner> + <summary> + Recorded at the end of a capture mode session in {TabletOrClamshell} to + report the user's audio recording mode setting, only if video recording was + selected, and the recording type supports audio recording. + </summary> + <token key="TabletOrClamshell" variants="DisplayModes"/> +</histogram> + <histogram name="Ash.CaptureModeController.BarButtons.{TabletOrClamshell}" enum="CaptureModeBarButtonType" expires_after="2023-08-15"> <owner>afakhry@chromium.org</owner> @@ -1656,6 +1669,11 @@ <histogram name="Ash.CaptureModeController.CaptureAudioOnMetric.{TabletOrClamshell}" enum="BooleanEnabled" expires_after="2023-08-01"> + <obsolete> + This metric was used when we were limited to recording the microphone only. + It has been replaced by Ash.CaptureModeController.AudioRecordingMode in + M-116. + </obsolete> <owner>michelefan@chromium.org</owner> <owner>gzadina@google.com</owner> <summary> @@ -2080,6 +2098,25 @@ <token key="TabletOrClamshell" variants="DisplayModes"/> </histogram> +<histogram + name="Ash.CaptureModeController.{Client}.AudioRecordingMode.{TabletOrClamshell}" + enum="AudioRecordingMode" expires_after="2024-06-01"> + <owner>afakhry@chromium.org</owner> + <owner>gzadina@google.com</owner> + <summary> + Recorded at the end of a client-initiated capture mode session in + {TabletOrClamshell} to report the user's audio recording mode setting, only + if video recording was selected, and the recording type supports audio + recording. + </summary> + <token key="TabletOrClamshell" variants="DisplayModes"/> + <token key="Client"> + <variant name="GameDashboard" + summary="The session started from Game Dashboard"/> + <variant name="Projector" summary="The session started from Projector"/> + </token> +</histogram> + <histogram name="Ash.DarkTheme.Settings.IsDarkModeEnabled" enum="Boolean" expires_after="2023-03-01"> <obsolete> @@ -4322,7 +4359,7 @@ </histogram> <histogram name="Ash.NotificationView.NotificationAdded.Type" - enum="NotificationViewType" expires_after="2023-06-18"> + enum="NotificationViewType" expires_after="2024-06-18"> <owner>leandre@chromium.org</owner> <owner>cros-status-area-eng@google.com</owner> <summary> @@ -5234,6 +5271,10 @@ <histogram name="Ash.Projector.PolicyChangeHandlingError" enum="ProjectorPolicyChangeHandlingError" expires_after="2023-10-17"> + <obsolete> + Deprecated in M116. No longer needed since no error reported and converted + to CHECK. + </obsolete> <owner>llin@chromium.org</owner> <owner>cros-projector@google.com</owner> <summary>
diff --git a/tools/metrics/histograms/metadata/compositing/histograms.xml b/tools/metrics/histograms/metadata/compositing/histograms.xml index 3374300b..02cf643 100644 --- a/tools/metrics/histograms/metadata/compositing/histograms.xml +++ b/tools/metrics/histograms/metadata/compositing/histograms.xml
@@ -686,16 +686,6 @@ </summary> </histogram> -<histogram name="Compositing.Renderer.CommitHung" units="boolean" - expires_after="2023-11-19"> - <owner>skobes@chromium.org</owner> - <owner>input-dev@chromium.org</owner> - <summary> - Records when the renderer compositor thread is hung (14 seconds) waiting to - run the commit after the main thread signalled that it is ready to commit. - </summary> -</histogram> - <histogram name="Compositing.Renderer.LayersUpdateTime" units="microseconds" expires_after="2023-11-12"> <owner>pdr@chromium.org</owner>
diff --git a/tools/metrics/histograms/metadata/event/histograms.xml b/tools/metrics/histograms/metadata/event/histograms.xml index f28f8e1..1a831576 100644 --- a/tools/metrics/histograms/metadata/event/histograms.xml +++ b/tools/metrics/histograms/metadata/event/histograms.xml
@@ -478,9 +478,9 @@ </histogram> <histogram name="Event.Latency.ScrollBegin.Touch.AverageLagPresentation" - units="pixels" expires_after="2022-05-01"> + units="pixels" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> - <owner>joalmei@microsoft.org</owner> + <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> <summary> Measures an average distance that represents how the page sticks to the @@ -492,15 +492,17 @@ Updates, which are separately logged by Event.Latency.ScrollUpdate.Touch.AverageLagPresentation. + Note: This histogram was expired from 2022-05-01 to 2023-06-20. + Team: input-dev@chromium.org. </summary> </histogram> <histogram name="Event.Latency.ScrollBegin.Touch.AverageLagPresentation.NoPrediction" - units="pixels" expires_after="2022-11-23"> + units="pixels" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> - <owner>sahir.vellani@microsoft.org</owner> + <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> <summary> Measures an average distance that represents how the page sticks to the @@ -513,6 +515,8 @@ Updates, which are separately logged by Event.Latency.ScrollUpdate.Touch.AverageLagPresentation. + Note: This histogram was expired from 2022-11-23 to 2023-06-20. + Team: input-dev@chromium.org. </summary> </histogram> @@ -702,9 +706,9 @@ </histogram> <histogram name="Event.Latency.ScrollUpdate.Touch.AverageLagPresentation" - units="pixels" expires_after="2022-12-25"> + units="pixels" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> - <owner>joalmei@microsoft.com</owner> + <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> <summary> Measures an average distance that represents how the page sticks to the @@ -714,13 +718,15 @@ This is the lag observed in processing the Gesture Scroll Updates occurring in the lifetime of a scroll interaction. + Note: This histogram was expired from 2022-12-25 to 2023-06-20. + Team: input-dev@chromium.org. </summary> </histogram> <histogram name="Event.Latency.ScrollUpdate.Touch.AverageLagPresentation.NoPrediction" - units="pixels" expires_after="2022-11-23"> + units="pixels" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> @@ -733,13 +739,15 @@ This is the lag observed in processing the Gesture Scroll Updates occurring in the lifetime of a scroll interaction. + Note: This histogram was expired from 2022-11-23 to 2023-06-12. + Team: input-dev@chromium.org. </summary> </histogram> <histogram name="Event.Latency.ScrollUpdate.Touch.AverageLagPresentation.RemainingLagPercentage" - units="%" expires_after="2022-11-23"> + units="%" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> @@ -763,13 +771,15 @@ Lag_without_prediction = Event.Latency.ScrollUpdate.Touch.AverageLagPresentation.NoPrediction + Note: This histogram was expired from 2022-11-23 to 2023-06-20. + Team: input-dev@chromium.org. </summary> </histogram> <histogram name="Event.Latency.ScrollUpdate.Touch.AverageLagPresentation.{PredictionImpact}" - units="pixels" expires_after="2022-11-23"> + units="pixels" expires_after="2024-06-12"> <owner>flackr@chromium.org</owner> <owner>sahir.vellani@microsoft.com</owner> <owner>input-dev@chromium.org</owner> @@ -795,6 +805,8 @@ Lag_without_prediction = Event.Latency.ScrollUpdate.Touch.AverageLagPresentation.NoPrediction + Note: This histogram was expired from 2022-11-23 to 2023-06-20. + Team: input-dev@chromium.org. </summary> <token key="PredictionImpact">
diff --git a/tools/metrics/histograms/metadata/ios/histograms.xml b/tools/metrics/histograms/metadata/ios/histograms.xml index a9f2dfd..0e95022e 100644 --- a/tools/metrics/histograms/metadata/ios/histograms.xml +++ b/tools/metrics/histograms/metadata/ios/histograms.xml
@@ -1894,7 +1894,7 @@ </histogram> <histogram name="IOS.ReadingList.ImageTooLargeFailure" units="KB" - expires_after="2023-08-05"> + expires_after="2024-06-07"> <owner>olivierrobin@chromium.org</owner> <owner>justincohen@chromium.org</owner> <summary> @@ -1904,7 +1904,7 @@ </histogram> <histogram name="IOS.ReadingList.PageTooLargeFailure" units="KB" - expires_after="2023-06-07"> + expires_after="2024-06-07"> <owner>olivierrobin@chromium.org</owner> <owner>justincohen@chromium.org</owner> <summary>
diff --git a/tools/metrics/histograms/metadata/new_tab_page/histograms.xml b/tools/metrics/histograms/metadata/new_tab_page/histograms.xml index bf3629a..0b5652c 100644 --- a/tools/metrics/histograms/metadata/new_tab_page/histograms.xml +++ b/tools/metrics/histograms/metadata/new_tab_page/histograms.xml
@@ -101,7 +101,7 @@ </histogram> <histogram name="NewTabPage.BackgroundService.Images.RequestLatency" units="ms" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -593,7 +593,7 @@ </histogram> <histogram name="NewTabPage.CustomizeChromeBackgroundAction" - enum="NTPCustomizeChromeBackgroundAction" expires_after="2023-06-30"> + enum="NTPCustomizeChromeBackgroundAction" expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -641,7 +641,7 @@ </histogram> <histogram name="NewTabPage.CustomizeLocalImageBackgroundAction" - enum="NTPCustomizeLocalImageBackgroundAction" expires_after="2023-06-30"> + enum="NTPCustomizeLocalImageBackgroundAction" expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -1015,7 +1015,7 @@ </histogram> <histogram name="NewTabPage.LogoDownloadTime" units="ms" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -1184,7 +1184,7 @@ </histogram> <histogram name="NewTabPage.Modules.Enabled{Interaction}" enum="NtpModules" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -1486,7 +1486,7 @@ </histogram> <histogram name="NewTabPage.NumberOfTiles" units="units" - expires_after="2023-06-25"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>yyushkina@chromium.org</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -1503,7 +1503,7 @@ </histogram> <histogram name="NewTabPage.OneGoogleBar.RequestLatency" units="ms" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -1702,7 +1702,7 @@ </histogram> <histogram name="NewTabPage.RecipeTasks.RelatedSearchClick" units="index" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner> @@ -2035,7 +2035,7 @@ </histogram> <histogram name="NewTabPage.URLState" enum="NewTabURLState" - expires_after="2023-06-30"> + expires_after="2023-09-30"> <owner>tiborg@chromium.org</owner> <owner>danpeng@google.com</owner> <owner>chrome-desktop-ntp@google.com</owner>
diff --git a/tools/metrics/histograms/metadata/others/histograms.xml b/tools/metrics/histograms/metadata/others/histograms.xml index 0336d56..bd03966 100644 --- a/tools/metrics/histograms/metadata/others/histograms.xml +++ b/tools/metrics/histograms/metadata/others/histograms.xml
@@ -14808,10 +14808,12 @@ <variant name="ChromeOSRestart"/> <variant name="ChromeTips"/> <variant name="ClearBrowsingData"/> + <variant name="CloseProfile"/> <variant name="Copy"/> <variant name="CopyUrl"/> <variant name="CreateHostedApp"/> <variant name="CreateQrCode"/> + <variant name="CustomizeChrome"/> <variant name="Cut"/> <variant name="DevTools"/> <variant name="DevToolsConsole"/> @@ -14826,6 +14828,7 @@ <variant name="InstallPwa"/> <variant name="LoginForDeviceTabs"/> <variant name="ManageExtensions"/> + <variant name="ManageGoogleAccount"/> <variant name="NewIncognitoWindow"/> <variant name="NewTab"/> <variant name="NewWindow"/>
diff --git a/tools/metrics/histograms/metadata/password/histograms.xml b/tools/metrics/histograms/metadata/password/histograms.xml index ad1e3a7..05f23a7d 100644 --- a/tools/metrics/histograms/metadata/password/histograms.xml +++ b/tools/metrics/histograms/metadata/password/histograms.xml
@@ -340,9 +340,11 @@ </histogram> <histogram name="PasswordGeneration.SubmissionAvailableEvent" - enum="PasswordSubmissionEvent" expires_after="M117"> + enum="PasswordSubmissionEvent" expires_after="M120"> <owner>kazinova@google.com</owner> <owner>kolos@chromium.org</owner> + <owner>shaikhitdin@google.com</owner> + <owner>chrome-password-manager-metrics-alerts@google.com</owner> <summary> Measures the frequency of submission events for passwords that could have been generated, but the user didn't choose to use the feature. This is to @@ -3492,9 +3494,11 @@ </histogram> <histogram name="PasswordManager.SubmittedFormType" enum="PasswordFormType" - expires_after="M117"> + expires_after="M120"> <owner>kazinova@google.com</owner> <owner>kolos@chromium.org</owner> + <owner>shaikhitdin@google.com</owner> + <owner>chrome-password-manager-metrics-alerts@google.com</owner> <summary> The type (e.g. signup, login, change password) of all submitted password forms. This is logged on form submission, but doesn't require that the
diff --git a/tools/perf/core/cli_helpers.py b/tools/perf/core/cli_helpers.py index f6703931..76ce666 100644 --- a/tools/perf/core/cli_helpers.py +++ b/tools/perf/core/cli_helpers.py
@@ -4,7 +4,7 @@ from __future__ import print_function -import pipes +import shlex import subprocess import sys from six.moves import input # pylint: disable=redefined-builtin @@ -161,8 +161,9 @@ """ with open(log_path, 'w') as f: try: - cmd_str = (' '.join(pipes.quote(c) for c in command) - if isinstance(command, list) else command) + cmd_str = (' '.join( + shlex.quote(c) + for c in command) if isinstance(command, list) else command) print(Colored(cmd_str, 'blue')) print(Colored('Logging stdout & stderr to %s' % log_path, 'blue')) subprocess.check_call( @@ -180,7 +181,7 @@ """Prints and runs the command. Allows to ignore non-zero exit code.""" if not isinstance(command, list): raise ValueError('command must be a list') - print(Colored(' '.join(pipes.quote(c) for c in command), 'blue')) + print(Colored(' '.join(shlex.quote(c) for c in command), 'blue')) try: return subprocess.check_call(command, **kwargs) except subprocess.CalledProcessError as cpe:
diff --git a/tools/perf/core/perfetto_binary_roller/binary_deps.json b/tools/perf/core/perfetto_binary_roller/binary_deps.json index de6d3ce..ffcdab1 100644 --- a/tools/perf/core/perfetto_binary_roller/binary_deps.json +++ b/tools/perf/core/perfetto_binary_roller/binary_deps.json
@@ -5,16 +5,16 @@ "full_remote_path": "perfetto-luci-artifacts/v35.0/linux-arm64/trace_processor_shell" }, "win": { - "hash": "334da30ae8d30e3026eda117cc276ae77d427e10", - "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/win/0d7838b9db8a54caddf184669dac2feaf3d0309e/trace_processor_shell.exe" + "hash": "bbe565e29fbd9cdd0eeaabd487a0896b679dfe54", + "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/win/d173e0a2c2421139efa7267c9dd94567adc3df6d/trace_processor_shell.exe" }, "linux_arm": { "hash": "c9c575015c295fda07a48ff69169904e2c52863b", "full_remote_path": "perfetto-luci-artifacts/v35.0/linux-arm/trace_processor_shell" }, "mac": { - "hash": "d522304fffbf03c30f113bfa564f6d72ae737164", - "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/mac/0d7838b9db8a54caddf184669dac2feaf3d0309e/trace_processor_shell" + "hash": "1e3dd829668eac7e9bf37507d02047138418a22a", + "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/mac/d173e0a2c2421139efa7267c9dd94567adc3df6d/trace_processor_shell" }, "mac_arm64": { "hash": "10cd97d23764e2d760d6cc2d3ff01bd35bbf8418", @@ -22,7 +22,7 @@ }, "linux": { "hash": "9575cf1c4ec142abf79c9764e7da5256465fa839", - "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/linux/0d7838b9db8a54caddf184669dac2feaf3d0309e/trace_processor_shell" + "full_remote_path": "chromium-telemetry/perfetto_binaries/trace_processor_shell/linux/d173e0a2c2421139efa7267c9dd94567adc3df6d/trace_processor_shell" } }, "power_profile.sql": {
diff --git a/tools/rust/build_crubit.py b/tools/rust/build_crubit.py index 08cb9246..eee6f91 100755 --- a/tools/rust/build_crubit.py +++ b/tools/rust/build_crubit.py
@@ -16,7 +16,6 @@ import hashlib import os import platform -import pipes import shutil import stat import string
diff --git a/tools/sublime/ninja_options_script.py b/tools/sublime/ninja_options_script.py index 34d918e6..019e464 100755 --- a/tools/sublime/ninja_options_script.py +++ b/tools/sublime/ninja_options_script.py
@@ -19,7 +19,7 @@ import imp import optparse import os -import pipes +import shlex ycm_module_path = os.path.normpath( os.path.join(os.path.dirname(os.path.abspath(__file__)), @@ -43,7 +43,7 @@ for flag in results['flags']: # The sublimeclang plugin expects to parse its input with shlex. # Defines and include path names may have spaces or quotes. - print(pipes.quote(flag)) + print(shlex.quote(flag)) if __name__ == "__main__":
diff --git a/ui/accessibility/OWNERS b/ui/accessibility/OWNERS index 1594706d..0a22e945 100644 --- a/ui/accessibility/OWNERS +++ b/ui/accessibility/OWNERS
@@ -9,10 +9,10 @@ # For Android / Java related changes: mschillaci@google.com +aldietz@google.com per-file *.mojom=set noparent per-file *.mojom=file://ipc/SECURITY_OWNERS per-file *_param_traits*.*=set noparent per-file *_param_traits*.*=file://ipc/SECURITY_OWNERS -
diff --git a/ui/accessibility/android/OWNERS b/ui/accessibility/android/OWNERS deleted file mode 100644 index c2c9d20..0000000 --- a/ui/accessibility/android/OWNERS +++ /dev/null
@@ -1,3 +0,0 @@ -aldietz@google.com -dtseng@chromium.org -mschillaci@google.com
diff --git a/ui/android/BUILD.gn b/ui/android/BUILD.gn index f637321..ee67df1 100644 --- a/ui/android/BUILD.gn +++ b/ui/android/BUILD.gn
@@ -532,6 +532,7 @@ "junit/src/org/chromium/ui/base/LocalizationUtilsTest.java", "junit/src/org/chromium/ui/base/MimeTypeUtilsTest.java", "junit/src/org/chromium/ui/base/SelectFileDialogTest.java", + "junit/src/org/chromium/ui/display/DisplayUtilTest.java", "junit/src/org/chromium/ui/dragdrop/AnimatedImageDragShadowBuilderUnitTest.java", "junit/src/org/chromium/ui/dragdrop/DragAndDropDelegateImplUnitTest.java", "junit/src/org/chromium/ui/dragdrop/DragEventDispatchHelperUnitTest.java",
diff --git a/ui/android/java/src/org/chromium/ui/display/DisplayUtil.java b/ui/android/java/src/org/chromium/ui/display/DisplayUtil.java index 5d605306..c32c95b 100644 --- a/ui/android/java/src/org/chromium/ui/display/DisplayUtil.java +++ b/ui/android/java/src/org/chromium/ui/display/DisplayUtil.java
@@ -4,11 +4,15 @@ package org.chromium.ui.display; +import android.util.DisplayMetrics; + /** * Helper functions relevant to working with displays, but have no parallel in the native * DisplayAndroid class. */ public abstract class DisplayUtil { + public static final float UI_SCALING_FACTOR_FOR_AUTO = 1.34f; + /** * @return The smaller of getDisplayWidth(), getDisplayHeight(). */ @@ -33,4 +37,17 @@ // Adding .5 is what Android does when doing this conversion. return (int) (value * display.getDipScale() + 0.5f); } + + /** + * Scales up the UI for the {@link DisplayMetrics} by the scaling factor for automotive devices. + * @param displayMetrics The DisplayMetrics to scale up density for. + * @return The DisplayMetrics that was scaled up. + */ + public static DisplayMetrics scaleUpDisplayMetricsForAutomotive(DisplayMetrics displayMetrics) { + displayMetrics.density *= UI_SCALING_FACTOR_FOR_AUTO; + displayMetrics.densityDpi = (int) (displayMetrics.densityDpi * UI_SCALING_FACTOR_FOR_AUTO); + displayMetrics.xdpi *= UI_SCALING_FACTOR_FOR_AUTO; + displayMetrics.ydpi *= UI_SCALING_FACTOR_FOR_AUTO; + return displayMetrics; + } }
diff --git a/ui/android/java/src/org/chromium/ui/display/PhysicalDisplayAndroid.java b/ui/android/java/src/org/chromium/ui/display/PhysicalDisplayAndroid.java index 4f5deb9..cafcd0e 100644 --- a/ui/android/java/src/org/chromium/ui/display/PhysicalDisplayAndroid.java +++ b/ui/android/java/src/org/chromium/ui/display/PhysicalDisplayAndroid.java
@@ -18,6 +18,7 @@ import androidx.annotation.OptIn; import androidx.core.os.BuildCompat; +import org.chromium.base.BuildInfo; import org.chromium.base.CommandLine; import org.chromium.base.ContextUtils; import org.chromium.base.Log; @@ -270,6 +271,11 @@ Rect rect = ApiHelperForR.getMaximumWindowMetricsBounds(windowManager); size.set(rect.width(), rect.height()); DisplayMetrics displayMetrics = mWindowContext.getResources().getDisplayMetrics(); + + if (BuildInfo.getInstance().isAutomotive) { + mDisplay.getRealMetrics(displayMetrics); + DisplayUtil.scaleUpDisplayMetricsForAutomotive(displayMetrics); + } updateCommon(size, displayMetrics.density, displayMetrics.xdpi, displayMetrics.ydpi, ApiHelperForR.getDisplay(mWindowContext)); } @@ -301,6 +307,10 @@ display.getSize(size); display.getMetrics(displayMetrics); } + + if (BuildInfo.getInstance().isAutomotive) { + DisplayUtil.scaleUpDisplayMetricsForAutomotive(displayMetrics); + } updateCommon( size, displayMetrics.density, displayMetrics.xdpi, displayMetrics.ydpi, display); }
diff --git a/ui/android/junit/src/org/chromium/ui/display/DisplayUtilTest.java b/ui/android/junit/src/org/chromium/ui/display/DisplayUtilTest.java new file mode 100644 index 0000000..9df0576 --- /dev/null +++ b/ui/android/junit/src/org/chromium/ui/display/DisplayUtilTest.java
@@ -0,0 +1,52 @@ +// Copyright 2023 The Chromium Authors +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +package org.chromium.ui.display; + +import static org.junit.Assert.assertEquals; + +import android.util.DisplayMetrics; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import org.chromium.base.test.BaseRobolectricTestRunner; + +/** + * Tests logic in the {@link DisplayUtil} class. + */ +@RunWith(BaseRobolectricTestRunner.class) +public class DisplayUtilTest { + @Test + public void testScaleUpDisplayMetricsForAutomotive() { + DisplayMetrics displayMetrics = new DisplayMetrics(); + displayMetrics.density = 1.0f; + displayMetrics.densityDpi = DisplayMetrics.DENSITY_DEFAULT; + displayMetrics.xdpi = DisplayMetrics.DENSITY_DEFAULT; + displayMetrics.ydpi = DisplayMetrics.DENSITY_DEFAULT; + displayMetrics.widthPixels = 100; + displayMetrics.heightPixels = 100; + + int scaledUpDensity = + (int) (DisplayMetrics.DENSITY_DEFAULT * DisplayUtil.UI_SCALING_FACTOR_FOR_AUTO); + DisplayUtil.scaleUpDisplayMetricsForAutomotive(displayMetrics); + assertEquals("The DisplayMetrics density should be scaled up by the " + + "automotive scale-up factor.", + DisplayUtil.UI_SCALING_FACTOR_FOR_AUTO, displayMetrics.density, 0.1f); + assertEquals("The DisplayMetrics densityDpi should be scaled up by the " + + "automotive scale-up factor.", + scaledUpDensity, displayMetrics.densityDpi); + assertEquals("The DisplayMetrics xdpi should be scaled up by the " + + "automotive scale-up factor.", + scaledUpDensity, (int) displayMetrics.xdpi); + assertEquals("The DisplayMetrics ydpi should be scaled up by the " + + "automotive scale-up factor.", + scaledUpDensity, (int) displayMetrics.ydpi); + assertEquals("The DisplayMetrics widthPixels should not be affected by the " + + "automotive scale-up factor.", + 100, displayMetrics.widthPixels); + assertEquals("The DisplayMetrics heightPixels should not be affected by the " + + "automotive scale-up factor.", + 100, displayMetrics.heightPixels); + } +}
diff --git a/ui/compositor/layer.cc b/ui/compositor/layer.cc index df5051b..facb88c 100644 --- a/ui/compositor/layer.cc +++ b/ui/compositor/layer.cc
@@ -12,6 +12,7 @@ #include "base/check_op.h" #include "base/command_line.h" #include "base/containers/contains.h" +#include "base/debug/dump_without_crashing.h" #include "base/functional/bind.h" #include "base/json/json_writer.h" #include "base/memory/ptr_util.h" @@ -52,6 +53,10 @@ namespace ui { namespace { +// TODO(https://crbug.com/1242749): temporary while tracking down crash. +// Minimum interval between no mutation debug dumps. +constexpr base::TimeDelta kMinNoMutationDumpInterval = base::Days(1); + const Layer* GetRoot(const Layer* layer) { // Parent walk cannot be done on a layer that is being used as a mask. Get the // layer to which this layer is a mask of. @@ -391,6 +396,11 @@ } void Layer::Add(Layer* child) { + // TODO(https://crbug.com/1242749): temporary while tracking down crash. + if (no_mutation_) { + base::debug::DumpWithoutCrashing(FROM_HERE, kMinNoMutationDumpInterval); + } + DCHECK(!child->compositor_); if (child->parent_) child->parent_->Remove(child); @@ -666,6 +676,9 @@ // TODO(https://crbug.com/1242749): temporary while tracking down crash. // A `layer_mask` of this would lead to recursion. CHECK(layer_mask != this); + if (no_mutation_) { + base::debug::DumpWithoutCrashing(FROM_HERE, kMinNoMutationDumpInterval); + } layer_mask->layer_mask_back_link_ = this; layer_mask->OnDeviceScaleFactorChanged(device_scale_factor_); } @@ -735,8 +748,6 @@ void Layer::SetLayerBackgroundFilters() { cc::FilterOperations filters; - if (zoom_ != 1) - filters.Append(cc::FilterOperation::CreateZoomFilter(zoom_, zoom_inset_)); if (background_blur_sigma_) { filters.Append(cc::FilterOperation::CreateBlurFilter(background_blur_sigma_, @@ -746,6 +757,15 @@ if (!background_offset_.IsOrigin()) { filters.Append(cc::FilterOperation::CreateOffsetFilter(background_offset_)); } + + // The background zoom is applied after the background offset to support + // positioning of the background *before* magnifying it. Offsetting after + // magnifying is almost equivalent except it can lead to surprising clipping + // at the layer bounds. + if (zoom_ != 1) { + filters.Append(cc::FilterOperation::CreateZoomFilter(zoom_, zoom_inset_)); + } + cc_layer_->SetBackdropFilters(filters); }
diff --git a/ui/compositor/layer.h b/ui/compositor/layer.h index 9464971f85..23fc761 100644 --- a/ui/compositor/layer.h +++ b/ui/compositor/layer.h
@@ -303,6 +303,9 @@ // Zoom the background by a factor of |zoom|. The effect is blended along the // edge across |inset| pixels. + // NOTE: Background zoom does not currently work with software compositing, + // see crbug.com/1451898. Usage should be limited to ash/ which does not + // rely on software compositing. void SetBackgroundZoom(float zoom, int inset); // Applies an offset when drawing pixels for the layer background filter. @@ -576,6 +579,8 @@ compositor_ = compositor; } + void set_no_mutation(bool no_mutation) { no_mutation_ = no_mutation; } + private: // TODO(https://crbug.com/1242749): temporary while tracking down crash. friend class Compositor; @@ -834,6 +839,7 @@ // TODO(https://crbug.com/1242749): temporary while tracking down crash. bool in_send_damaged_rects_ = false; bool sending_damaged_rects_for_descendants_ = false; + bool no_mutation_ = false; // CHECK on Add/SetMakeLayer if true. base::WeakPtrFactory<Layer> weak_ptr_factory_{this}; };
diff --git a/ui/views/examples/examples_with_content_main.cc b/ui/views/examples/examples_with_content_main.cc index 5a77472..87dfb09 100644 --- a/ui/views/examples/examples_with_content_main.cc +++ b/ui/views/examples/examples_with_content_main.cc
@@ -16,7 +16,6 @@ #if BUILDFLAG(IS_MAC) #include "sandbox/mac/seatbelt_exec.h" -#include "ui/display/screen.h" #endif #if BUILDFLAG(IS_WIN) @@ -68,9 +67,6 @@ int main(int argc, const char** argv) { base::CommandLine::Init(argc, argv); -#if BUILDFLAG(IS_MAC) - display::ScopedNativeScreen desktop_screen; -#endif ui::ColorProviderManager::Get().AppendColorProviderInitializer( base::BindRepeating(&views::examples::AddExamplesColorMixers));
diff --git a/ui/views_content_client/views_content_client_main_parts.h b/ui/views_content_client/views_content_client_main_parts.h index d674e20..3401e26 100644 --- a/ui/views_content_client/views_content_client_main_parts.h +++ b/ui/views_content_client/views_content_client_main_parts.h
@@ -11,6 +11,10 @@ #include "build/build_config.h" #include "content/public/browser/browser_main_parts.h" +#if BUILDFLAG(IS_APPLE) +#include "ui/display/screen.h" +#endif + namespace base { class RunLoop; } @@ -62,6 +66,10 @@ #endif private: +#if BUILDFLAG(IS_APPLE) + display::ScopedNativeScreen desktop_screen_; +#endif + std::unique_ptr<content::ShellBrowserContext> browser_context_; std::unique_ptr<views::TestViewsDelegate> views_delegate_;
diff --git a/ui/webui/resources/cr_components/color_change_listener/BUILD.gn b/ui/webui/resources/cr_components/color_change_listener/BUILD.gn index d3f132d..1b7697b 100644 --- a/ui/webui/resources/cr_components/color_change_listener/BUILD.gn +++ b/ui/webui/resources/cr_components/color_change_listener/BUILD.gn
@@ -28,7 +28,10 @@ ts_out_dir = "$root_gen_dir/ui/webui/resources/tsc/cr_components/color_change_listener" ts_composite = true - ts_deps = [ "//ui/webui/resources/mojo:build_ts" ] + ts_deps = [ + "//ui/webui/resources/js:build_ts", + "//ui/webui/resources/mojo:build_ts", + ] generate_grdp = true grd_resource_path_prefix = rebase_path(".", "//ui/webui/resources") }
diff --git a/ui/webui/resources/cr_components/color_change_listener/colors_css_updater.ts b/ui/webui/resources/cr_components/color_change_listener/colors_css_updater.ts index 9fec69a..cb6740a 100644 --- a/ui/webui/resources/cr_components/color_change_listener/colors_css_updater.ts +++ b/ui/webui/resources/cr_components/color_change_listener/colors_css_updater.ts
@@ -7,6 +7,8 @@ * colors CSS stylesheet when a ColorProvider change in the browser is detected. */ +import {assert} from '//resources/js/assert_ts.js'; + import {BrowserProxy} from './browser_proxy.js'; /** @@ -15,99 +17,148 @@ */ export const COLORS_CSS_SELECTOR: string = 'link[href*=\'//theme/colors.css\']'; -/** - * Forces the root to refresh its colors.css stylesheet. This is used to - * fetch an updated stylesheet when the ColorProvider associated with the WebUI - * has changed. - * Returns a promise which resolves to true once the new colors are loaded and - * installed into the DOM. In the case of an error returns false. When a new - * colors.css is loaded, this will always freshly query the existing colors.css, - * allowing multiple calls to successfully remove existing, outdated CSS. - */ -export async function refreshColorCss(root: Document|ShadowRoot = document): - Promise<boolean> { - const colorCssNode = root.querySelector(COLORS_CSS_SELECTOR); - if (!colorCssNode) { - return false; - } - const href = colorCssNode.getAttribute('href'); - if (!href) { - return false; +let documentInstance: ColorChangeUpdater|null = null; + +// <if expr="chromeos_ash"> +type ColorChangeListener = () => void; +// </if> + +export class ColorChangeUpdater { + private listenerId_: null|number = null; + private root_: Document|ShadowRoot; + + // <if expr="chromeos_ash"> + private listeners_: ColorChangeListener[] = []; + // </if> + + constructor(root: Document|ShadowRoot) { + assert(documentInstance === null || root !== document); + this.root_ = root; } - const hrefURL = new URL(href, location.href); - const params = new URLSearchParams(hrefURL.search); - params.set('version', new Date().getTime().toString()); - const newHref = `${hrefURL.origin}${hrefURL.pathname}?${params.toString()}`; + start() { + if (this.listenerId_ !== null) { + return; + } - // A flickering effect may take place when setting the href property of the - // existing color css node with a new value. In order to avoid flickering, we - // create a new link element and once it is loaded we remove the old one. See - // crbug.com/1365320 for additional details. - const newColorsCssLink = document.createElement('link'); - newColorsCssLink.setAttribute('href', newHref); - newColorsCssLink.rel = 'stylesheet'; - newColorsCssLink.type = 'text/css'; - const newColorsLoaded = new Promise(resolve => { - newColorsCssLink.onload = resolve; - }); - if (root === document) { - document.getElementsByTagName('body')[0]!.appendChild(newColorsCssLink); - } else { - root.appendChild(newColorsCssLink); + this.listenerId_ = BrowserProxy.getInstance() + .callbackRouter.onColorProviderChanged.addListener( + this.onColorProviderChanged.bind(this)); } - await newColorsLoaded; - - const oldColorCssNode = document.querySelector(COLORS_CSS_SELECTOR); - if (oldColorCssNode) { - oldColorCssNode.remove(); + // TODO(dpapad): Figure out how to properly trigger + // `callbackRouter.onColorProviderChanged` listeners from tests and make this + // method private. + async onColorProviderChanged() { + await this.refreshColorsCss(); + // <if expr="chromeos_ash"> + for (const listener of this.listeners_) { + listener(); + } + // </if> } - return true; + + /** + * Forces `root_` to refresh its colors.css stylesheet. This is used to + * fetch an updated stylesheet when the ColorProvider associated with the + * WebUI has changed. + * @return A promise which resolves to true once the new colors are loaded and + * installed into the DOM. In the case of an error returns false. When a + * new colors.css is loaded, this will always freshly query the existing + * colors.css, allowing multiple calls to successfully remove existing, + * outdated CSS. + */ + async refreshColorsCss(): Promise<boolean> { + const colorCssNode = this.root_.querySelector(COLORS_CSS_SELECTOR); + if (!colorCssNode) { + return false; + } + + const href = colorCssNode.getAttribute('href'); + if (!href) { + return false; + } + + const hrefURL = new URL(href, location.href); + const params = new URLSearchParams(hrefURL.search); + params.set('version', new Date().getTime().toString()); + const newHref = `${hrefURL.origin}${hrefURL.pathname}?${params.toString()}`; + + // A flickering effect may take place when setting the href property of + // the existing color css node with a new value. In order to avoid + // flickering, we create a new link element and once it is loaded we + // remove the old one. See crbug.com/1365320 for additional details. + const newColorsCssLink = document.createElement('link'); + newColorsCssLink.setAttribute('href', newHref); + newColorsCssLink.rel = 'stylesheet'; + newColorsCssLink.type = 'text/css'; + const newColorsLoaded = new Promise(resolve => { + newColorsCssLink.onload = resolve; + }); + if (this.root_ === document) { + document.getElementsByTagName('body')[0]!.appendChild(newColorsCssLink); + } else { + this.root_.appendChild(newColorsCssLink); + } + + await newColorsLoaded; + + const oldColorCssNode = document.querySelector(COLORS_CSS_SELECTOR); + if (oldColorCssNode) { + oldColorCssNode.remove(); + } + return true; + } + + static forDocument(): ColorChangeUpdater { + return documentInstance || + (documentInstance = new ColorChangeUpdater(document)); + } + + // <if expr="chromeos_ash"> + /** + * Register a function to be called every time the page's color provider + * changes. Note that the listeners will only be invoked AFTER start() is + * called, and only after the updated styles have been loaded. + */ + addListener(listener: ColorChangeListener): void { + this.listeners_.push(listener); + } + + /** + * Remove a listener that was previously registered via addListener(). + * If provided with a listener that was not previously registered does + * nothing. + * @return Whether a listener was actually removed. + */ + removeListener(changeListener: ColorChangeListener): boolean { + const toRemove = + this.listeners_.findIndex(listener => listener === changeListener); + if (toRemove === -1) { + return false; + } + + this.listeners_.splice(toRemove, 1); + return true; + } + // </if> } - - -let listenerId: number|null = null; -let clientColorChangeListeners: Array<() => void> = []; - -/** - * Calls `refreshColorCss()` and any listeners previously registered via - * `addColorChangeListener()` - */ -export async function colorProviderChangeHandler( - root: Document|ShadowRoot = document) { - // The webui's current css variables may now be stale, force update them. - await refreshColorCss(root); - // Notify any interested javascript that the color scheme has changed. - for (const listener of clientColorChangeListeners) { - listener(); - } +// <if expr="chromeos_ash"> +export function addColorChangeListener(listener: ColorChangeListener) { + ColorChangeUpdater.forDocument().addListener(listener); } -/** - * Register a function to be called every time the page's color provider - * changes. Note that the listeners will only be invoked AFTER - * startColorChangeUpdater() is called. - */ -export function addColorChangeListener(changeListener: () => void) { - clientColorChangeListeners.push(changeListener); +export function removeColorChangeListener(listener: ColorChangeListener) { + ColorChangeUpdater.forDocument().removeListener(listener); } +// </if> /** - * Remove a listener that was previously registered via addColorChangeListener. - * If provided with a listener that was not previously registered does nothing. + * Starts listening for ColorProvider changes from the browser and updates the + * top level HTML document whenever changes occur. */ -export function removeColorChangeListener(changeListener: () => void) { - clientColorChangeListeners = clientColorChangeListeners.filter( - listener => listener !== changeListener); -} - -/** Starts listening for ColorProvider change updates from the browser. */ -export function startColorChangeUpdater(root: Document|ShadowRoot = document) { - if (listenerId === null) { - listenerId = BrowserProxy.getInstance() - .callbackRouter.onColorProviderChanged.addListener( - colorProviderChangeHandler.bind(undefined, root)); - } +export function startColorChangeUpdater() { + const updater = ColorChangeUpdater.forDocument(); + updater.start(); }
diff --git a/ui/wm/core/window_util.cc b/ui/wm/core/window_util.cc index 2745a9e..0b764e1 100644 --- a/ui/wm/core/window_util.cc +++ b/ui/wm/core/window_util.cc
@@ -38,6 +38,7 @@ CloneChildren(owner->layer(), old_layer, map_func); } } + parent->set_no_mutation(true); } // Invokes Mirror() on all the children of |to_mirror|, adding the newly cloned
diff --git a/ui/wm/core/window_util_unittest.cc b/ui/wm/core/window_util_unittest.cc index 194c9fe..d625c32 100644 --- a/ui/wm/core/window_util_unittest.cc +++ b/ui/wm/core/window_util_unittest.cc
@@ -7,6 +7,8 @@ #include <memory> #include "base/functional/bind.h" +#include "base/test/scoped_mock_clock_override.h" +#include "base/time/time.h" #include "ui/aura/test/aura_test_base.h" #include "ui/aura/test/test_windows.h" #include "ui/aura/window.h" @@ -16,7 +18,16 @@ namespace wm { -typedef aura::test::AuraTestBase WindowUtilTest; +namespace { + +int dump_count = 0; +void FakeDumpWithoutCrashing() { + ++dump_count; +} + +} // namespace + +using WindowUtilTest = aura::test::AuraTestBase; // Test if the recreate layers does not recreate layers that have // already been acquired. @@ -100,4 +111,71 @@ EXPECT_EQ(window12->layer(), window1->layer()->children()[1]); } +TEST_F(WindowUtilTest, NoMutationAfterCopy) { + base::debug::SetDumpWithoutCrashingFunction(&FakeDumpWithoutCrashing); + base::ScopedMockClockOverride clock; + + std::unique_ptr<aura::Window> window1( + aura::test::CreateTestWindowWithId(0, nullptr)); + std::unique_ptr<aura::Window> window11( + aura::test::CreateTestWindowWithId(1, window1.get())); + + // Add and SetMaskLayer on `window1` and `window11` works before + // RecreateLayers. + { + std::unique_ptr<ui::Layer> layer = std::make_unique<ui::Layer>(); + window1->layer()->Add(layer.get()); + + std::unique_ptr<ui::Layer> mask_layer = std::make_unique<ui::Layer>(); + window1->layer()->SetMaskLayer(layer.get()); + + std::unique_ptr<ui::Layer> child_layer = std::make_unique<ui::Layer>(); + window11->layer()->Add(child_layer.get()); + + std::unique_ptr<ui::Layer> child_mask_layer = std::make_unique<ui::Layer>(); + window11->layer()->SetMaskLayer(child_mask_layer.get()); + } + + std::unique_ptr<ui::LayerTreeOwner> tree = wm::RecreateLayers(window1.get()); + + // Add and SetMaskLayer on `window1` and `window11` crashes after + // RecreateLayers. + { + ASSERT_EQ(dump_count, 0); + + ui::Layer* window1_old_layer = tree->root(); + std::unique_ptr<ui::Layer> layer = std::make_unique<ui::Layer>(); + window1_old_layer->Add(layer.get()); + + std::unique_ptr<ui::Layer> mask_layer = std::make_unique<ui::Layer>(); + window1_old_layer->SetMaskLayer(mask_layer.get()); + + // 2 dumps should be created from Add/SetMaskLayer calls above. + EXPECT_EQ(dump_count, 2); + + { + ui::Layer* window11_old_layer = tree->root()->children().front(); + std::unique_ptr<ui::Layer> child_layer = std::make_unique<ui::Layer>(); + window11_old_layer->Add(child_layer.get()); + + // No new dumps within the 1 day interval. + EXPECT_EQ(dump_count, 2); + } + + // Skip the dump blocking time. + clock.Advance(base::Days(1)); + + ui::Layer* window11_old_layer = tree->root()->children().front(); + std::unique_ptr<ui::Layer> child_layer = std::make_unique<ui::Layer>(); + window11_old_layer->Add(child_layer.get()); + + std::unique_ptr<ui::Layer> child_mask_layer = std::make_unique<ui::Layer>(); + window11_old_layer->SetMaskLayer(child_mask_layer.get()); + + EXPECT_EQ(dump_count, 4); + } + + base::debug::SetDumpWithoutCrashingFunction(nullptr); +} + } // namespace wm