blob: 0a307edd1c886e88ec4df03e63682cbf18c503cf [file] [log] [blame]
// Copyright 2018 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
package org.chromium.base;
import android.text.TextUtils;
import android.util.Patterns;
import org.chromium.base.annotations.UsedByReflection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Provides public methods for detecting and eliding sensitive PII.
*/
public class PiiElider {
private static final String EMAIL_ELISION = "XXX@EMAIL.ELIDED";
private static final String URL_ELISION = "HTTP://WEBADDRESS.ELIDED";
private static final String GOOD_IRI_CHAR = "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
private static final Pattern IP_ADDRESS = Pattern.compile(
"((25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9])\\.(25[0-5]|2[0-4]"
+ "[0-9]|[0-1][0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1]"
+ "[0-9]{2}|[1-9][0-9]|[1-9]|0)\\.(25[0-5]|2[0-4][0-9]|[0-1][0-9]{2}"
+ "|[1-9][0-9]|[0-9]))");
private static final String IRI =
"[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
private static final String GOOD_GTLD_CHAR = "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
private static final Pattern DOMAIN_NAME =
Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
private static final Pattern LIKELY_EXCEPTION_LOG =
Pattern.compile("\\sat\\sorg\\.chromium\\.[^ ]+.");
private static final Pattern WEB_URL =
Pattern.compile("(?:\\b|^)((?:(http|https|Http|Https|rtsp|Rtsp):"
+ "\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+ "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+ "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
+ "(?:" + DOMAIN_NAME + ")"
+ "(?:\\:\\d{1,5})?)"
+ "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"
+ "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
+ "(?:\\b|$)");
private static final String IP_ELISION = "1.2.3.4";
private static final String MAC_ELISION = "01:23:45:67:89:AB";
private static final String CONSOLE_ELISION = "[ELIDED:CONSOLE(0)] ELIDED CONSOLE MESSAGE";
private static final Pattern MAC_ADDRESS =
Pattern.compile("([0-9a-fA-F]{2}[-:]+){5}[0-9a-fA-F]{2}");
private static final Pattern CONSOLE_MSG = Pattern.compile("\\[\\w*:CONSOLE.*\\].*");
private static final String[] APP_NAMESPACE = new String[] {"org.chromium.", "com.google."};
private static final String[] SYSTEM_NAMESPACE = new String[] {"android.accessibilityservice",
"android.accounts", "android.animation", "android.annotation", "android.app",
"android.appwidget", "android.bluetooth", "android.content", "android.database",
"android.databinding", "android.drm", "android.gesture", "android.graphics",
"android.hardware", "android.inputmethodservice", "android.location", "android.media",
"android.mtp", "android.net", "android.nfc", "android.opengl", "android.os",
"android.preference", "android.print", "android.printservice", "android.provider",
"android.renderscript", "android.sax", "android.security", "android.service",
"android.speech", "android.support", "android.system", "android.telecom",
"android.telephony", "android.test", "android.text", "android.transition",
"android.util", "android.view", "android.webkit", "android.widget", "com.android.",
"dalvik.", "java.", "javax.", "org.apache.", "org.json.", "org.w3c.dom.", "org.xml.",
"org.xmlpull."};
/**
* Elides any emails in the specified {@link String} with
* {@link #EMAIL_ELISION}.
*
* @param original String potentially containing emails.
* @return String with elided emails.
*/
public static String elideEmail(String original) {
return Patterns.EMAIL_ADDRESS.matcher(original).replaceAll(EMAIL_ELISION);
}
/**
* Elides any URLs in the specified {@link String} with
* {@link #URL_ELISION}.
*
* @param original String potentially containing URLs.
* @return String with elided URLs.
*/
public static String elideUrl(String original) {
// Url-matching is fussy. If something looks like an exception message, just return.
if (LIKELY_EXCEPTION_LOG.matcher(original).find()) return original;
StringBuilder buffer = new StringBuilder(original);
Matcher matcher = WEB_URL.matcher(buffer);
int start = 0;
while (matcher.find(start)) {
start = matcher.start();
int end = matcher.end();
String url = buffer.substring(start, end);
if (!likelyToBeAppNamespace(url) && !likelyToBeSystemNamespace(url)) {
buffer.replace(start, end, URL_ELISION);
end = start + URL_ELISION.length();
matcher = WEB_URL.matcher(buffer);
}
start = end;
}
return buffer.toString();
}
private static boolean likelyToBeAppNamespace(String url) {
for (String ns : APP_NAMESPACE) {
if (url.startsWith(ns)) {
return true;
}
}
return false;
}
private static boolean likelyToBeSystemNamespace(String url) {
for (String ns : SYSTEM_NAMESPACE) {
if (url.startsWith(ns)) {
return true;
}
}
return false;
}
/**
* Elides any IP addresses in the specified {@link String} with
* {@link #IP_ELISION}.
*
* @param original String potentially containing IPs.
* @return String with elided IPs.
*/
public static String elideIp(String original) {
return Patterns.IP_ADDRESS.matcher(original).replaceAll(IP_ELISION);
}
/**
* Elides any MAC addresses in the specified {@link String} with
* {@link #MAC_ELISION}.
*
* @param original String potentially containing MACs.
* @return String with elided MACs.
*/
public static String elideMac(String original) {
return MAC_ADDRESS.matcher(original).replaceAll(MAC_ELISION);
}
/**
* Elides any console messages in the specified {@link String} with
* {@link #CONSOLE_ELISION}.
*
* @param original String potentially containing console messages.
* @return String with elided console messages.
*/
public static String elideConsole(String original) {
return CONSOLE_MSG.matcher(original).replaceAll(CONSOLE_ELISION);
}
/**
* Elides any URL in the exception messages contained inside a stacktrace with
* {@link #URL_ELISION}.
*
* @param stacktrace Multiline stacktrace as a string.
* @return Stacktrace with elided URLs.
*/
@UsedByReflection("jni_android.cc")
public static String sanitizeStacktrace(String stacktrace) {
String[] frames = stacktrace.split("\\n");
// Sanitize first stacktrace line which contains the exception message.
frames[0] = elideUrl(frames[0]);
for (int i = 1; i < frames.length; i++) {
// Nested exceptions should also have their message sanitized.
if (frames[i].startsWith("Caused by:")) {
frames[i] = elideUrl(frames[i]);
}
}
return TextUtils.join("\n", frames);
}
}