|  | // Copyright 2016 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | #include "components/autofill/content/renderer/form_classifier.h" | 
|  |  | 
|  | #include <algorithm> | 
|  |  | 
|  | #include "base/macros.h" | 
|  | #include "base/strings/string16.h" | 
|  | #include "base/strings/string_util.h" | 
|  | #include "components/autofill/content/renderer/form_autofill_util.h" | 
|  | #include "third_party/WebKit/public/platform/WebString.h" | 
|  | #include "third_party/WebKit/public/platform/WebVector.h" | 
|  | #include "third_party/WebKit/public/web/WebFormControlElement.h" | 
|  | #include "third_party/WebKit/public/web/WebInputElement.h" | 
|  |  | 
|  | using autofill::form_util::WebFormControlElementToFormField; | 
|  | using blink::WebFormControlElement; | 
|  | using blink::WebInputElement; | 
|  | using blink::WebString; | 
|  | using blink::WebVector; | 
|  |  | 
|  | namespace autofill { | 
|  |  | 
|  | namespace { | 
|  |  | 
|  | // The words that frequently appear in attribute values of signin forms. | 
|  | const char* const kSigninTextFeatures[] = {"signin", "login", "logon", "auth"}; | 
|  | constexpr size_t kNumberOfSigninFeatures = arraysize(kSigninTextFeatures); | 
|  |  | 
|  | // The words that frequently appear in attribute values of signup forms. | 
|  | const char* const kSignupTextFeatures[] = {"signup", "regist", "creat"}; | 
|  | constexpr size_t kNumberOfSignupFeatures = arraysize(kSignupTextFeatures); | 
|  |  | 
|  | // The words that frequently appear in attribute values of captcha elements. | 
|  | const char* const kCaptchaFeatures[] = {"captcha", "security", "code"}; | 
|  | constexpr size_t kNumberOfCaptchaFeatures = arraysize(kCaptchaFeatures); | 
|  |  | 
|  | // Minimal number of input fields to classify form as signup or change password | 
|  | // form. If at least one of the listed thresholds is reached or exceeded, the | 
|  | // form is classified as a form where password generation should be available. | 
|  | constexpr size_t MINIMAL_NUMBER_OF_TEXT_FIELDS = 2; | 
|  | constexpr size_t MINIMAL_NUMBER_OF_PASSWORD_FIELDS = 2; | 
|  | constexpr size_t MINIMAL_NUMBER_OF_CHECKBOX_FIELDS = 3; | 
|  | constexpr size_t MINIMAL_NUMBER_OF_OTHER_FIELDS = 2; | 
|  |  | 
|  | // Removes some characters from attribute value. | 
|  | void ClearAttributeValue(std::string* value) { | 
|  | value->erase(std::remove_if(value->begin(), value->end(), | 
|  | [](char x) { return x == '-' || x == '_'; }), | 
|  | value->end()); | 
|  | } | 
|  |  | 
|  | // Find |features| in |element|'s attribute values. Returns true if at least one | 
|  | // text feature was found. | 
|  | bool FindTextFeaturesForClass(const blink::WebElement& element, | 
|  | const char* const features[], | 
|  | size_t number_of_features) { | 
|  | DCHECK(features); | 
|  |  | 
|  | for (unsigned i = 0; i < element.attributeCount(); ++i) { | 
|  | std::string filtered_value = | 
|  | base::ToLowerASCII(element.attributeValue(i).utf8()); | 
|  | ClearAttributeValue(&filtered_value); | 
|  |  | 
|  | if (filtered_value.empty()) | 
|  | continue; | 
|  | for (size_t j = 0; j < number_of_features; ++j) { | 
|  | if (filtered_value.find(features[j]) != std::string::npos) | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Returns true if at least one captcha feature was found in |element|'s | 
|  | // attribute values. | 
|  | bool IsCaptchaInput(const blink::WebInputElement& element) { | 
|  | return FindTextFeaturesForClass(element, kCaptchaFeatures, | 
|  | kNumberOfCaptchaFeatures); | 
|  | } | 
|  |  | 
|  | // Finds <img>'s inside |form| and checks if <img>'s attributes contains captcha | 
|  | // text features. Returns true, if at least one occurrence was found. | 
|  | bool FindCaptchaInImgElements(const blink::WebElement& form, | 
|  | bool ingnore_invisible) { | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kImageTag, ("img")); | 
|  |  | 
|  | blink::WebElementCollection img_elements = | 
|  | form.getElementsByHTMLTagName(kImageTag); | 
|  | for (blink::WebElement element = img_elements.firstItem(); !element.isNull(); | 
|  | element = img_elements.nextItem()) { | 
|  | if (ingnore_invisible && !form_util::IsWebNodeVisible(element)) | 
|  | continue; | 
|  | if (FindTextFeaturesForClass(element, kCaptchaFeatures, | 
|  | kNumberOfCaptchaFeatures)) { | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | // Finds signin and signup features in |element|'s attribute values. Sets to | 
|  | // true |found_signin_text_features| or |found_signup_text_features| if | 
|  | // appropriate features were found. | 
|  | void FindTextFeaturesInElement(const blink::WebElement& element, | 
|  | bool* found_signin_text_features, | 
|  | bool* found_signup_text_features) { | 
|  | DCHECK(found_signin_text_features); | 
|  | DCHECK(found_signup_text_features); | 
|  |  | 
|  | if (!*found_signin_text_features) { | 
|  | *found_signin_text_features = FindTextFeaturesForClass( | 
|  | element, kSigninTextFeatures, kNumberOfSigninFeatures); | 
|  | } | 
|  | if (!*found_signup_text_features) { | 
|  | *found_signup_text_features = FindTextFeaturesForClass( | 
|  | element, kSignupTextFeatures, kNumberOfSignupFeatures); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Returns true if |element| has type "button" or "image". | 
|  | bool IsButtonOrImageElement(const WebFormControlElement& element) { | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kButton, ("button")); | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kImage, ("image")); | 
|  |  | 
|  | return element.formControlType() == kButton || | 
|  | element.formControlType() == kImage; | 
|  | } | 
|  |  | 
|  | // Returns true if |element| has type "submit". | 
|  | bool IsSubmitElement(const WebFormControlElement& element) { | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kSubmit, ("submit")); | 
|  |  | 
|  | return element.formControlType() == kSubmit; | 
|  | } | 
|  |  | 
|  | // Returns true if |element| has type "hidden"; | 
|  | bool IsHiddenElement(const WebFormControlElement& element) { | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kHidden, ("hidden")); | 
|  |  | 
|  | return element.formControlType() == kHidden; | 
|  | } | 
|  |  | 
|  | // Returns true if |element| has type "select-multiple" or "select-one". | 
|  | bool IsSelectElement(const WebFormControlElement& element) { | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kSelectOne, ("select-one")); | 
|  | CR_DEFINE_STATIC_LOCAL(WebString, kSelectMultiple, ("select-multiple")); | 
|  |  | 
|  | return element.formControlType() == kSelectOne || | 
|  | element.formControlType() == kSelectMultiple; | 
|  | } | 
|  |  | 
|  | // Return true if |form| contains at least one visible password element. | 
|  | bool FormContainsVisiblePasswordFields(const blink::WebFormElement& form) { | 
|  | WebVector<WebFormControlElement> control_elements; | 
|  | form.getFormControlElements(control_elements); | 
|  | for (auto& control_element : control_elements) { | 
|  | const WebInputElement* input_element = toWebInputElement(&control_element); | 
|  | if (!input_element) | 
|  | continue; | 
|  | if (input_element->isPasswordField() && | 
|  | form_util::IsWebNodeVisible(*input_element)) { | 
|  | return true; | 
|  | } | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | }  // namespace | 
|  |  | 
|  | bool ClassifyFormAndFindGenerationField(const blink::WebFormElement& form, | 
|  | base::string16* generation_field) { | 
|  | DCHECK(generation_field); | 
|  |  | 
|  | if (form.isNull()) | 
|  | return false; | 
|  |  | 
|  | bool ignore_invisible_elements = FormContainsVisiblePasswordFields(form); | 
|  |  | 
|  | bool found_signin_text_features = false; | 
|  | bool found_signup_text_features = false; | 
|  | size_t number_of_text_input_fields = 0; | 
|  | size_t number_of_password_input_fields = 0; | 
|  | size_t number_of_checkbox_input_fields = 0; | 
|  | size_t number_of_other_input_fields = 0; | 
|  | bool found_captcha = | 
|  | FindCaptchaInImgElements(form, ignore_invisible_elements); | 
|  |  | 
|  | FindTextFeaturesInElement(form, &found_signin_text_features, | 
|  | &found_signup_text_features); | 
|  |  | 
|  | std::vector<WebInputElement> passwords; | 
|  | WebVector<WebFormControlElement> control_elements; | 
|  | form.getFormControlElements(control_elements); | 
|  |  | 
|  | for (const WebFormControlElement& control_element : control_elements) { | 
|  | if (IsHiddenElement(control_element)) | 
|  | continue; | 
|  | if (ignore_invisible_elements) { | 
|  | if (!form_util::IsWebNodeVisible(control_element)) | 
|  | continue; | 
|  | } | 
|  |  | 
|  | // If type="button" or "image", skip them, because it might be a link | 
|  | // to another form. | 
|  | if (IsButtonOrImageElement(control_element)) | 
|  | continue; | 
|  |  | 
|  | FindTextFeaturesInElement(control_element, &found_signin_text_features, | 
|  | &found_signup_text_features); | 
|  |  | 
|  | // Since <select> is not WebInputElement, but WebSelectElement, process | 
|  | // them as a special case. | 
|  | if (IsSelectElement(control_element)) { | 
|  | number_of_other_input_fields++; | 
|  | } else { | 
|  | const WebInputElement* input_element = | 
|  | toWebInputElement(&control_element); | 
|  | if (!input_element) | 
|  | continue; | 
|  |  | 
|  | if (input_element->isTextField()) { | 
|  | if (input_element->isPasswordField()) { | 
|  | ++number_of_password_input_fields; | 
|  | passwords.push_back(*input_element); | 
|  | } else { | 
|  | ++number_of_text_input_fields; | 
|  | found_captcha = found_captcha || IsCaptchaInput(*input_element); | 
|  | } | 
|  | } else {  // Non-text fields. | 
|  | if (input_element->isCheckbox()) | 
|  | ++number_of_checkbox_input_fields; | 
|  | else if (!IsSubmitElement(*input_element)) | 
|  | ++number_of_other_input_fields; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (number_of_password_input_fields == 0 || | 
|  | number_of_password_input_fields > 3) | 
|  | return false; | 
|  |  | 
|  | if ((number_of_text_input_fields - found_captcha >= | 
|  | MINIMAL_NUMBER_OF_TEXT_FIELDS || | 
|  | number_of_password_input_fields >= MINIMAL_NUMBER_OF_PASSWORD_FIELDS || | 
|  | number_of_checkbox_input_fields >= MINIMAL_NUMBER_OF_CHECKBOX_FIELDS || | 
|  | number_of_other_input_fields >= MINIMAL_NUMBER_OF_OTHER_FIELDS) || | 
|  | (found_signup_text_features && !found_signin_text_features)) { | 
|  | WebInputElement password_creation_field; | 
|  |  | 
|  | // TODO(crbug.com/618309): Improve local classifier to distinguish password | 
|  | // creation and password usage fields on the change password forms. | 
|  | if (passwords.size() == 3) | 
|  | password_creation_field = passwords[1]; | 
|  | else | 
|  | password_creation_field = passwords[0]; | 
|  |  | 
|  | *generation_field = password_creation_field.nameForAutofill(); | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  | } |