front_end/models/trace/extras/ScriptDuplication.ts - devtools/devtools-frontend - Git at Google

 // Copyright 2025 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 import * as Handlers from '../handlers/handlers.js';

 // Ignore modules smaller than an absolute threshold.
 const ABSOLUTE_SIZE_THRESHOLD_BYTES = 1024 * 0.5;
 // Ignore modules smaller than a % size of largest copy of the module.
 const RELATIVE_SIZE_THRESHOLD = 0.1;

 interface SourceData {
   source: string;
   resourceSize: number;
 }

 export function normalizeSource(source: string): string {
   // Trim trailing question mark - b/c webpack.
   source = source.replace(/\?$/, '');

   // Normalize paths for dependencies by only keeping everything after the last `node_modules`.
   const lastNodeModulesIndex = source.lastIndexOf('node_modules');
   if (lastNodeModulesIndex !== -1) {
     source = source.substring(lastNodeModulesIndex);
   }

   return source;
 }

 function shouldIgnoreSource(source: string): boolean {
   // Ignore bundle overhead.
   if (source.includes('webpack/bootstrap')) {
     return true;
   }
   if (source.includes('(webpack)/buildin')) {
     return true;
   }

   // Ignore webpack module shims, i.e. aliases of the form `module.exports = window.jQuery`
   if (source.includes('external ')) {
     return true;
   }

   return false;
 }

 /**
  * The key is a source map `sources` entry (these are URLs/file paths), but normalized
  * via `normalizeSource`.
  *
  * The value is an object with an entry for every script that has a source map which
  * denotes that this source was used, along with the estimated resource size it takes
  * up in the script.
  */
 export type ScriptDuplication = Map<string, {
   /**
    * This is the sum of all (but one) `attributedSize` in `scripts`.
    *
    * One copy of this module is treated as the canonical version - the rest will
    * have non-zero `wastedBytes`. The canonical copy is the first entry of
    * `scripts`.
    *
    * In the case of all copies being the same version, all sizes are
    * equal and the selection doesn't matter (ignoring compression ratios). When
    * the copies are different versions, it does matter. Ideally the newest
    * version would be the canonical copy, but version information is not present.
    * Instead, size is used as a heuristic for latest version. This makes the
    * value here conserative in its estimation.
    */
   estimatedDuplicateBytes: number,
   duplicates: Array<{
     script: Handlers.ModelHandlers.Scripts.Script,
     /**
      * The number of bytes in the script bundle that map back to this module,
      * in terms of estimated impact on transfer size.
      */
     attributedSize: number,
   }>,
 }>;

 /**
  * Sorts each array within @see ScriptDuplication by attributedSize, drops information
  * on sources that are too small, and calculates esimatedDuplicateBytes.
  */
 export function normalizeDuplication(duplication: ScriptDuplication): void {
   for (const [key, data] of duplication) {
     // Sort by resource size.
     data.duplicates.sort((a, b) => b.attributedSize - a.attributedSize);

     // Ignore modules smaller than a % size of largest.
     if (data.duplicates.length > 1) {
       const largestResourceSize = data.duplicates[0].attributedSize;
       data.duplicates = data.duplicates.filter(duplicate => {
         const percentSize = duplicate.attributedSize / largestResourceSize;
         return percentSize >= RELATIVE_SIZE_THRESHOLD;
       });
     }

     // Ignore modules smaller than an absolute threshold.
     data.duplicates = data.duplicates.filter(duplicate => duplicate.attributedSize >= ABSOLUTE_SIZE_THRESHOLD_BYTES);

     // Delete any that now don't have multiple entries.
     if (data.duplicates.length <= 1) {
       duplication.delete(key);
       continue;
     }

     data.estimatedDuplicateBytes = data.duplicates.slice(1).reduce((acc, cur) => acc + cur.attributedSize, 0);
   }
 }

 function indexOfOrLength(haystack: string, needle: string, startPosition = 0): number {
   const index = haystack.indexOf(needle, startPosition);
   return index === -1 ? haystack.length : index;
 }

 export function getNodeModuleName(source: string): string {
   const sourceSplit = source.split('node_modules/');
   source = sourceSplit[sourceSplit.length - 1];

   const indexFirstSlash = indexOfOrLength(source, '/');
   if (source[0] === '@') {
     return source.slice(0, indexOfOrLength(source, '/', indexFirstSlash + 1));
   }

   return source.slice(0, indexFirstSlash);
 }

 function groupByNodeModules(duplication: ScriptDuplication): ScriptDuplication {
   const groupedDuplication: ScriptDuplication = new Map();
   for (const [source, data] of duplication) {
     if (!source.includes('node_modules')) {
       groupedDuplication.set(source, data);
       continue;
     }

     const nodeModuleKey = 'node_modules/' + getNodeModuleName(source);
     const aggregatedData = groupedDuplication.get(nodeModuleKey) ?? {
       duplicates: [],
       // This is calculated in normalizeDuplication.
       estimatedDuplicateBytes: 0,
     };
     groupedDuplication.set(nodeModuleKey, aggregatedData);

     for (const {script, attributedSize} of data.duplicates) {
       let duplicate = aggregatedData.duplicates.find(d => d.script === script);
       if (!duplicate) {
         duplicate = {script, attributedSize: 0};
         aggregatedData.duplicates.push(duplicate);
       }
       duplicate.attributedSize += attributedSize;
     }
   }

   return groupedDuplication;
 }

 /**
  * Sort by estimated savings.
  */
 function sorted(duplication: ScriptDuplication): ScriptDuplication {
   return new Map([...duplication].sort((a, b) => b[1].estimatedDuplicateBytes - a[1].estimatedDuplicateBytes));
 }

 /**
  * Returns 2 @see ScriptDuplication for the given collection of script contents + source maps:
  *
  * 1. `duplication` keys correspond to authored files
  * 2. `duplication` keys correspond to authored files, except all files within the same
  *    node_module package are aggregated under the same entry.
  */
 export function computeScriptDuplication(
     scriptsData: Handlers.ModelHandlers.Scripts.ScriptsData, compressionRatios: Map<string, number>):
     {duplication: ScriptDuplication, duplicationGroupedByNodeModules: ScriptDuplication} {
   const sourceDatasMap = new Map<Handlers.ModelHandlers.Scripts.Script, SourceData[]>();

   // Determine size of each `sources` entry.
   for (const script of scriptsData.scripts) {
     if (!script.content || !script.sourceMap) {
       continue;
     }

     const sizes = Handlers.ModelHandlers.Scripts.getScriptGeneratedSizes(script);
     if (!sizes) {
       continue;
     }

     if ('errorMessage' in sizes) {
       console.error(sizes.errorMessage);
       continue;
     }

     const sourceDataArray: SourceData[] = [];
     sourceDatasMap.set(script, sourceDataArray);

     const sources = script.sourceMap.sourceURLs();
     for (let i = 0; i < sources.length; i++) {
       if (shouldIgnoreSource(sources[i])) {
         continue;
       }

       const sourceSize = sizes.files[sources[i]];
       sourceDataArray.push({
         source: normalizeSource(sources[i]),
         resourceSize: sourceSize,
       });
     }
   }

   const duplication: ScriptDuplication = new Map();
   for (const [script, sourceDataArray] of sourceDatasMap) {
     for (const sourceData of sourceDataArray) {
       let data = duplication.get(sourceData.source);
       if (!data) {
         data = {estimatedDuplicateBytes: 0, duplicates: []};
         duplication.set(sourceData.source, data);
       }
       const compressionRatio = script.request ? compressionRatios.get(script.request?.args.data.requestId) ?? 1 : 1;
       const transferSize = Math.round(sourceData.resourceSize * compressionRatio);
       data.duplicates.push({
         script,
         attributedSize: transferSize,
       });
     }
   }

   const duplicationGroupedByNodeModules = groupByNodeModules(duplication);

   normalizeDuplication(duplication);
   normalizeDuplication(duplicationGroupedByNodeModules);

   return {
     duplication: sorted(duplication),
     duplicationGroupedByNodeModules: sorted(duplicationGroupedByNodeModules),
   };
 }
	// Copyright 2025 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	import * as Handlers from '../handlers/handlers.js';

	// Ignore modules smaller than an absolute threshold.
	const ABSOLUTE_SIZE_THRESHOLD_BYTES = 1024 * 0.5;
	// Ignore modules smaller than a % size of largest copy of the module.
	const RELATIVE_SIZE_THRESHOLD = 0.1;

	interface SourceData {
	source: string;
	resourceSize: number;
	}

	export function normalizeSource(source: string): string {
	// Trim trailing question mark - b/c webpack.
	source = source.replace(/\?$/, '');

	// Normalize paths for dependencies by only keeping everything after the last `node_modules`.
	const lastNodeModulesIndex = source.lastIndexOf('node_modules');
	if (lastNodeModulesIndex !== -1) {
	source = source.substring(lastNodeModulesIndex);
	}

	return source;
	}

	function shouldIgnoreSource(source: string): boolean {
	// Ignore bundle overhead.
	if (source.includes('webpack/bootstrap')) {
	return true;
	}
	if (source.includes('(webpack)/buildin')) {
	return true;
	}

	// Ignore webpack module shims, i.e. aliases of the form `module.exports = window.jQuery`
	if (source.includes('external ')) {
	return true;
	}

	return false;
	}

	/**
	* The key is a source map `sources` entry (these are URLs/file paths), but normalized
	* via `normalizeSource`.
	*
	* The value is an object with an entry for every script that has a source map which
	* denotes that this source was used, along with the estimated resource size it takes
	* up in the script.
	*/
	export type ScriptDuplication = Map<string, {
	/**
	* This is the sum of all (but one) `attributedSize` in `scripts`.
	*
	* One copy of this module is treated as the canonical version - the rest will
	* have non-zero `wastedBytes`. The canonical copy is the first entry of
	* `scripts`.
	*
	* In the case of all copies being the same version, all sizes are
	* equal and the selection doesn't matter (ignoring compression ratios). When
	* the copies are different versions, it does matter. Ideally the newest
	* version would be the canonical copy, but version information is not present.
	* Instead, size is used as a heuristic for latest version. This makes the
	* value here conserative in its estimation.
	*/
	estimatedDuplicateBytes: number,
	duplicates: Array<{
	script: Handlers.ModelHandlers.Scripts.Script,
	/**
	* The number of bytes in the script bundle that map back to this module,
	* in terms of estimated impact on transfer size.
	*/
	attributedSize: number,
	}>,
	}>;

	/**
	* Sorts each array within @see ScriptDuplication by attributedSize, drops information
	* on sources that are too small, and calculates esimatedDuplicateBytes.
	*/
	export function normalizeDuplication(duplication: ScriptDuplication): void {
	for (const [key, data] of duplication) {
	// Sort by resource size.
	data.duplicates.sort((a, b) => b.attributedSize - a.attributedSize);

	// Ignore modules smaller than a % size of largest.
	if (data.duplicates.length > 1) {
	const largestResourceSize = data.duplicates[0].attributedSize;
	data.duplicates = data.duplicates.filter(duplicate => {
	const percentSize = duplicate.attributedSize / largestResourceSize;
	return percentSize >= RELATIVE_SIZE_THRESHOLD;
	});
	}

	// Ignore modules smaller than an absolute threshold.
	data.duplicates = data.duplicates.filter(duplicate => duplicate.attributedSize >= ABSOLUTE_SIZE_THRESHOLD_BYTES);

	// Delete any that now don't have multiple entries.
	if (data.duplicates.length <= 1) {
	duplication.delete(key);
	continue;
	}

	data.estimatedDuplicateBytes = data.duplicates.slice(1).reduce((acc, cur) => acc + cur.attributedSize, 0);
	}
	}

	function indexOfOrLength(haystack: string, needle: string, startPosition = 0): number {
	const index = haystack.indexOf(needle, startPosition);
	return index === -1 ? haystack.length : index;
	}

	export function getNodeModuleName(source: string): string {
	const sourceSplit = source.split('node_modules/');
	source = sourceSplit[sourceSplit.length - 1];

	const indexFirstSlash = indexOfOrLength(source, '/');
	if (source[0] === '@') {
	return source.slice(0, indexOfOrLength(source, '/', indexFirstSlash + 1));
	}

	return source.slice(0, indexFirstSlash);
	}

	function groupByNodeModules(duplication: ScriptDuplication): ScriptDuplication {
	const groupedDuplication: ScriptDuplication = new Map();
	for (const [source, data] of duplication) {
	if (!source.includes('node_modules')) {
	groupedDuplication.set(source, data);
	continue;
	}

	const nodeModuleKey = 'node_modules/' + getNodeModuleName(source);
	const aggregatedData = groupedDuplication.get(nodeModuleKey) ?? {
	duplicates: [],
	// This is calculated in normalizeDuplication.
	estimatedDuplicateBytes: 0,
	};
	groupedDuplication.set(nodeModuleKey, aggregatedData);

	for (const {script, attributedSize} of data.duplicates) {
	let duplicate = aggregatedData.duplicates.find(d => d.script === script);
	if (!duplicate) {
	duplicate = {script, attributedSize: 0};
	aggregatedData.duplicates.push(duplicate);
	}
	duplicate.attributedSize += attributedSize;
	}
	}

	return groupedDuplication;
	}

	/**
	* Sort by estimated savings.
	*/
	function sorted(duplication: ScriptDuplication): ScriptDuplication {
	return new Map([...duplication].sort((a, b) => b[1].estimatedDuplicateBytes - a[1].estimatedDuplicateBytes));
	}

	/**
	* Returns 2 @see ScriptDuplication for the given collection of script contents + source maps:
	*
	* 1. `duplication` keys correspond to authored files
	* 2. `duplication` keys correspond to authored files, except all files within the same
	* node_module package are aggregated under the same entry.
	*/
	export function computeScriptDuplication(
	scriptsData: Handlers.ModelHandlers.Scripts.ScriptsData, compressionRatios: Map<string, number>):
	{duplication: ScriptDuplication, duplicationGroupedByNodeModules: ScriptDuplication} {
	const sourceDatasMap = new Map<Handlers.ModelHandlers.Scripts.Script, SourceData[]>();

	// Determine size of each `sources` entry.
	for (const script of scriptsData.scripts) {
	if (!script.content \|\| !script.sourceMap) {
	continue;
	}

	const sizes = Handlers.ModelHandlers.Scripts.getScriptGeneratedSizes(script);
	if (!sizes) {
	continue;
	}

	if ('errorMessage' in sizes) {
	console.error(sizes.errorMessage);
	continue;
	}

	const sourceDataArray: SourceData[] = [];
	sourceDatasMap.set(script, sourceDataArray);

	const sources = script.sourceMap.sourceURLs();
	for (let i = 0; i < sources.length; i++) {
	if (shouldIgnoreSource(sources[i])) {
	continue;
	}

	const sourceSize = sizes.files[sources[i]];
	sourceDataArray.push({
	source: normalizeSource(sources[i]),
	resourceSize: sourceSize,
	});
	}
	}

	const duplication: ScriptDuplication = new Map();
	for (const [script, sourceDataArray] of sourceDatasMap) {
	for (const sourceData of sourceDataArray) {
	let data = duplication.get(sourceData.source);
	if (!data) {
	data = {estimatedDuplicateBytes: 0, duplicates: []};
	duplication.set(sourceData.source, data);
	}
	const compressionRatio = script.request ? compressionRatios.get(script.request?.args.data.requestId) ?? 1 : 1;
	const transferSize = Math.round(sourceData.resourceSize * compressionRatio);
	data.duplicates.push({
	script,
	attributedSize: transferSize,
	});
	}
	}

	const duplicationGroupedByNodeModules = groupByNodeModules(duplication);

	normalizeDuplication(duplication);
	normalizeDuplication(duplicationGroupedByNodeModules);

	return {
	duplication: sorted(duplication),
	duplicationGroupedByNodeModules: sorted(duplicationGroupedByNodeModules),
	};
	}