| // Copyright 2025 The Chromium Authors |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| import * as Handlers from '../handlers/handlers.js'; |
| |
| // Ignore modules smaller than an absolute threshold. |
| const ABSOLUTE_SIZE_THRESHOLD_BYTES = 1024 * 0.5; |
| // Ignore modules smaller than a % size of largest copy of the module. |
| const RELATIVE_SIZE_THRESHOLD = 0.1; |
| |
| interface SourceData { |
| source: string; |
| resourceSize: number; |
| } |
| |
| export function normalizeSource(source: string): string { |
| // Trim trailing question mark - b/c webpack. |
| source = source.replace(/\?$/, ''); |
| |
| // Normalize paths for dependencies by only keeping everything after the last `node_modules`. |
| const lastNodeModulesIndex = source.lastIndexOf('node_modules'); |
| if (lastNodeModulesIndex !== -1) { |
| source = source.substring(lastNodeModulesIndex); |
| } |
| |
| return source; |
| } |
| |
| function shouldIgnoreSource(source: string): boolean { |
| // Ignore bundle overhead. |
| if (source.includes('webpack/bootstrap')) { |
| return true; |
| } |
| if (source.includes('(webpack)/buildin')) { |
| return true; |
| } |
| |
| // Ignore webpack module shims, i.e. aliases of the form `module.exports = window.jQuery` |
| if (source.includes('external ')) { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /** |
| * The key is a source map `sources` entry (these are URLs/file paths), but normalized |
| * via `normalizeSource`. |
| * |
| * The value is an object with an entry for every script that has a source map which |
| * denotes that this source was used, along with the estimated resource size it takes |
| * up in the script. |
| */ |
| export type ScriptDuplication = Map<string, { |
| /** |
| * This is the sum of all (but one) `attributedSize` in `scripts`. |
| * |
| * One copy of this module is treated as the canonical version - the rest will |
| * have non-zero `wastedBytes`. The canonical copy is the first entry of |
| * `scripts`. |
| * |
| * In the case of all copies being the same version, all sizes are |
| * equal and the selection doesn't matter (ignoring compression ratios). When |
| * the copies are different versions, it does matter. Ideally the newest |
| * version would be the canonical copy, but version information is not present. |
| * Instead, size is used as a heuristic for latest version. This makes the |
| * value here conserative in its estimation. |
| */ |
| estimatedDuplicateBytes: number, |
| duplicates: Array<{ |
| script: Handlers.ModelHandlers.Scripts.Script, |
| /** |
| * The number of bytes in the script bundle that map back to this module, |
| * in terms of estimated impact on transfer size. |
| */ |
| attributedSize: number, |
| }>, |
| }>; |
| |
| /** |
| * Sorts each array within @see ScriptDuplication by attributedSize, drops information |
| * on sources that are too small, and calculates esimatedDuplicateBytes. |
| */ |
| export function normalizeDuplication(duplication: ScriptDuplication): void { |
| for (const [key, data] of duplication) { |
| // Sort by resource size. |
| data.duplicates.sort((a, b) => b.attributedSize - a.attributedSize); |
| |
| // Ignore modules smaller than a % size of largest. |
| if (data.duplicates.length > 1) { |
| const largestResourceSize = data.duplicates[0].attributedSize; |
| data.duplicates = data.duplicates.filter(duplicate => { |
| const percentSize = duplicate.attributedSize / largestResourceSize; |
| return percentSize >= RELATIVE_SIZE_THRESHOLD; |
| }); |
| } |
| |
| // Ignore modules smaller than an absolute threshold. |
| data.duplicates = data.duplicates.filter(duplicate => duplicate.attributedSize >= ABSOLUTE_SIZE_THRESHOLD_BYTES); |
| |
| // Delete any that now don't have multiple entries. |
| if (data.duplicates.length <= 1) { |
| duplication.delete(key); |
| continue; |
| } |
| |
| data.estimatedDuplicateBytes = data.duplicates.slice(1).reduce((acc, cur) => acc + cur.attributedSize, 0); |
| } |
| } |
| |
| function indexOfOrLength(haystack: string, needle: string, startPosition = 0): number { |
| const index = haystack.indexOf(needle, startPosition); |
| return index === -1 ? haystack.length : index; |
| } |
| |
| export function getNodeModuleName(source: string): string { |
| const sourceSplit = source.split('node_modules/'); |
| source = sourceSplit[sourceSplit.length - 1]; |
| |
| const indexFirstSlash = indexOfOrLength(source, '/'); |
| if (source[0] === '@') { |
| return source.slice(0, indexOfOrLength(source, '/', indexFirstSlash + 1)); |
| } |
| |
| return source.slice(0, indexFirstSlash); |
| } |
| |
| function groupByNodeModules(duplication: ScriptDuplication): ScriptDuplication { |
| const groupedDuplication: ScriptDuplication = new Map(); |
| for (const [source, data] of duplication) { |
| if (!source.includes('node_modules')) { |
| groupedDuplication.set(source, data); |
| continue; |
| } |
| |
| const nodeModuleKey = 'node_modules/' + getNodeModuleName(source); |
| const aggregatedData = groupedDuplication.get(nodeModuleKey) ?? { |
| duplicates: [], |
| // This is calculated in normalizeDuplication. |
| estimatedDuplicateBytes: 0, |
| }; |
| groupedDuplication.set(nodeModuleKey, aggregatedData); |
| |
| for (const {script, attributedSize} of data.duplicates) { |
| let duplicate = aggregatedData.duplicates.find(d => d.script === script); |
| if (!duplicate) { |
| duplicate = {script, attributedSize: 0}; |
| aggregatedData.duplicates.push(duplicate); |
| } |
| duplicate.attributedSize += attributedSize; |
| } |
| } |
| |
| return groupedDuplication; |
| } |
| |
| /** |
| * Sort by estimated savings. |
| */ |
| function sorted(duplication: ScriptDuplication): ScriptDuplication { |
| return new Map([...duplication].sort((a, b) => b[1].estimatedDuplicateBytes - a[1].estimatedDuplicateBytes)); |
| } |
| |
| /** |
| * Returns 2 @see ScriptDuplication for the given collection of script contents + source maps: |
| * |
| * 1. `duplication` keys correspond to authored files |
| * 2. `duplication` keys correspond to authored files, except all files within the same |
| * node_module package are aggregated under the same entry. |
| */ |
| export function computeScriptDuplication( |
| scriptsData: Handlers.ModelHandlers.Scripts.ScriptsData, compressionRatios: Map<string, number>): |
| {duplication: ScriptDuplication, duplicationGroupedByNodeModules: ScriptDuplication} { |
| const sourceDatasMap = new Map<Handlers.ModelHandlers.Scripts.Script, SourceData[]>(); |
| |
| // Determine size of each `sources` entry. |
| for (const script of scriptsData.scripts) { |
| if (!script.content || !script.sourceMap) { |
| continue; |
| } |
| |
| const sizes = Handlers.ModelHandlers.Scripts.getScriptGeneratedSizes(script); |
| if (!sizes) { |
| continue; |
| } |
| |
| if ('errorMessage' in sizes) { |
| console.error(sizes.errorMessage); |
| continue; |
| } |
| |
| const sourceDataArray: SourceData[] = []; |
| sourceDatasMap.set(script, sourceDataArray); |
| |
| const sources = script.sourceMap.sourceURLs(); |
| for (let i = 0; i < sources.length; i++) { |
| if (shouldIgnoreSource(sources[i])) { |
| continue; |
| } |
| |
| const sourceSize = sizes.files[sources[i]]; |
| sourceDataArray.push({ |
| source: normalizeSource(sources[i]), |
| resourceSize: sourceSize, |
| }); |
| } |
| } |
| |
| const duplication: ScriptDuplication = new Map(); |
| for (const [script, sourceDataArray] of sourceDatasMap) { |
| for (const sourceData of sourceDataArray) { |
| let data = duplication.get(sourceData.source); |
| if (!data) { |
| data = {estimatedDuplicateBytes: 0, duplicates: []}; |
| duplication.set(sourceData.source, data); |
| } |
| const compressionRatio = script.request ? compressionRatios.get(script.request?.args.data.requestId) ?? 1 : 1; |
| const transferSize = Math.round(sourceData.resourceSize * compressionRatio); |
| data.duplicates.push({ |
| script, |
| attributedSize: transferSize, |
| }); |
| } |
| } |
| |
| const duplicationGroupedByNodeModules = groupByNodeModules(duplication); |
| |
| normalizeDuplication(duplication); |
| normalizeDuplication(duplicationGroupedByNodeModules); |
| |
| return { |
| duplication: sorted(duplication), |
| duplicationGroupedByNodeModules: sorted(duplicationGroupedByNodeModules), |
| }; |
| } |