FileAPI/support/send-file-form-helper.js - external/w3c/web-platform-tests - Git at Google

 'use strict';

 // See /FileAPI/file/resources/echo-content-escaped.py
 function escapeString(string) {
   return string.replace(/\\/g, "\\\\").replace(
     /[^\x20-\x7E]/g,
     (x) => {
       let hex = x.charCodeAt(0).toString(16);
       if (hex.length < 2) hex = "0" + hex;
       return `\\x${hex}`;
     },
   ).replace(/\\x0d\\x0a/g, "\r\n");
 }

 // Rationale for this particular test character sequence, which is
 // used in filenames and also in file contents:
 //
 // - ABC~ ensures the string starts with something we can read to
 //   ensure it is from the correct source; ~ is used because even
 //   some 1-byte otherwise-ASCII-like parts of ISO-2022-JP
 //   interpret it differently.
 // - ‾¥ are inside a single-byte range of ISO-2022-JP and help
 //   diagnose problems due to filesystem encoding or locale
 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem
 //   encoding or locale
 // - ¤ is inside Latin-1 and helps diagnose problems due to
 //   filesystem encoding or locale; it is also the "simplest" case
 //   needing substitution in ISO-2022-JP
 // - ･ is inside a single-byte range of ISO-2022-JP in some variants
 //   and helps diagnose problems due to filesystem encoding or locale;
 //   on the web it is distinct when decoding but unified when encoding
 // - ・ is inside a double-byte range of ISO-2022-JP and helps
 //   diagnose problems due to filesystem encoding or locale
 // - • is inside Windows-1252 and helps diagnose problems due to
 //   filesystem encoding or locale and also ensures these aren't
 //   accidentally turned into e.g. control codes
 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem
 //   encoding or locale
 // - · is inside Latin-1 and helps diagnose problems due to
 //   filesystem encoding or locale and also ensures HTML named
 //   character references (e.g. &middot;) are not used
 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
 //   filesystem encoding or locale and also ensures these aren't
 //   accidentally turned into e.g. control codes
 // - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct
 //   output easier to spot
 // - 星 is inside ISO-2022-JP on a Kanji page and makes correct
 //   output easier to spot
 // - 🌟 is outside the BMP and makes incorrect surrogate pair
 //   substitution detectable and ensures substitutions work
 //   correctly immediately after Kanji 2-byte ISO-2022-JP
 // - 星 repeated here ensures the correct codec state is used
 //   after a non-BMP substitution
 // - ★ repeated here also makes correct output easier to spot
 // - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
 //   filesystem encoding or locale and also ensures these aren't
 //   accidentally turned into e.g. control codes and also ensures
 //   substitutions work correctly immediately after non-Kanji
 //   2-byte ISO-2022-JP
 // - · is inside Latin-1 and helps diagnose problems due to
 //   filesystem encoding or locale and also ensures HTML named
 //   character references (e.g. &middot;) are not used
 // - ∙ is inside IBM437 and helps diagnose problems due to filesystem
 //   encoding or locale
 // - • is inside Windows-1252 and again helps diagnose problems
 //   due to filesystem encoding or locale
 // - ・ is inside a double-byte range of ISO-2022-JP and helps
 //   diagnose problems due to filesystem encoding or locale
 // - ･ is inside a single-byte range of ISO-2022-JP in some variants
 //   and helps diagnose problems due to filesystem encoding or locale;
 //   on the web it is distinct when decoding but unified when encoding
 // - ¤ is inside Latin-1 and helps diagnose problems due to
 //   filesystem encoding or locale; again it is a "simple"
 //   substitution case
 // - ≈ is inside IBM437 and helps diagnose problems due to filesystem
 //   encoding or locale
 // - ¥‾ are inside a single-byte range of ISO-2022-JP and help
 //   diagnose problems due to filesystem encoding or locale
 // - ~XYZ ensures earlier errors don't lead to misencoding of
 //   simple ASCII
 //
 // Overall the near-symmetry makes common I18N mistakes like
 // off-by-1-after-non-BMP easier to spot. All the characters
 // are also allowed in Windows Unicode filenames.
 const kTestChars = 'ABC~‾¥≈¤･・•∙·☼★星🌟星★☼·∙•・･¤≈¥‾~XYZ';

 // The kTestFallback* strings represent the expected byte sequence from
 // encoding kTestChars with the given encoding with "html" replacement
 // mode, isomorphic-decoded. That means, characters that can't be
 // encoded in that encoding get HTML-escaped, but no further
 // `escapeString`-like escapes are needed.
 const kTestFallbackUtf8 = (
   "ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" +
     "\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" +
     "\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" +
     "\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ"
 );

 const kTestFallbackIso2022jp = (
   ("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟" +
     "\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ")
     .replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`)
 );

 const kTestFallbackWindows1252 = (
   "ABC~‾\xA5≈\xA4･・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・･\xA4≈\xA5‾~XYZ".replace(
     /[^\0-\xFF]/gu,
     (x) => `&#${x.codePointAt(0)};`,
   )
 );

 const kTestFallbackXUserDefined = kTestChars.replace(
   /[^\0-\x7F]/gu,
   (x) => `&#${x.codePointAt(0)};`,
 );

 // formPostFileUploadTest - verifies multipart upload structure and
 // numeric character reference replacement for filenames, field names,
 // and field values using form submission.
 //
 // Uses /FileAPI/file/resources/echo-content-escaped.py to echo the
 // upload POST with controls and non-ASCII bytes escaped. This is done
 // because navigations whose response body contains [\0\b\v] may get
 // treated as a download, which is not what we want. Use the
 // `escapeString` function to replicate that kind of escape (note that
 // it takes an isomorphic-decoded string, not a byte sequence).
 //
 // Fields in the parameter object:
 //
 // - fileNameSource: purely explanatory and gives a clue about which
 //   character encoding is the source for the non-7-bit-ASCII parts of
 //   the fileBaseName, or Unicode if no smaller-than-Unicode source
 //   contains all the characters. Used in the test name.
 // - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename
 //   used for the constructed test file. Used in the test name.
 // - formEncoding: the acceptCharset of the form used to submit the
 //   test file. Used in the test name.
 // - expectedEncodedBaseName: the expected formEncoding-encoded
 //   version of fileBaseName, isomorphic-decoded. That means, characters
 //   that can't be encoded in that encoding get HTML-escaped, but no
 //   further `escapeString`-like escapes are needed.
 const formPostFileUploadTest = ({
   fileNameSource,
   fileBaseName,
   formEncoding,
   expectedEncodedBaseName,
 }) => {
   promise_test(async testCase => {

     if (document.readyState !== 'complete') {
       await new Promise(resolve => addEventListener('load', resolve));
     }

     const formTargetFrame = Object.assign(document.createElement('iframe'), {
       name: 'formtargetframe',
     });
     document.body.append(formTargetFrame);
     testCase.add_cleanup(() => {
       document.body.removeChild(formTargetFrame);
     });

     const form = Object.assign(document.createElement('form'), {
       acceptCharset: formEncoding,
       action: '/FileAPI/file/resources/echo-content-escaped.py',
       method: 'POST',
       enctype: 'multipart/form-data',
       target: formTargetFrame.name,
     });
     document.body.append(form);
     testCase.add_cleanup(() => {
       document.body.removeChild(form);
     });

     // Used to verify that the browser agrees with the test about
     // which form charset is used.
     form.append(Object.assign(document.createElement('input'), {
       type: 'hidden',
       name: '_charset_',
     }));

     // Used to verify that the browser agrees with the test about
     // field value replacement and encoding independently of file system
     // idiosyncracies.
     form.append(Object.assign(document.createElement('input'), {
       type: 'hidden',
       name: 'filename',
       value: fileBaseName,
     }));

     // Same, but with name and value reversed to ensure field names
     // get the same treatment.
     form.append(Object.assign(document.createElement('input'), {
       type: 'hidden',
       name: fileBaseName,
       value: 'filename',
     }));

     const fileInput = Object.assign(document.createElement('input'), {
       type: 'file',
       name: 'file',
     });
     form.append(fileInput);

     // Removes c:\fakepath\ or other pseudofolder and returns just the
     // final component of filePath; allows both / and \ as segment
     // delimiters.
     const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop();
     await new Promise(resolve => {
       const dataTransfer = new DataTransfer;
       dataTransfer.items.add(
           new File([kTestChars], fileBaseName, {type: 'text/plain'}));
       fileInput.files = dataTransfer.files;
       // For historical reasons .value will be prefixed with
       // c:\fakepath\, but the basename should match the file name
       // exposed through the newer .files[0].name API. This check
       // verifies that assumption.
       assert_equals(
           baseNameOfFilePath(fileInput.files[0].name),
           baseNameOfFilePath(fileInput.value),
           `The basename of the field's value should match its files[0].name`);
       form.submit();
       formTargetFrame.onload = resolve;
     });

     const formDataText = formTargetFrame.contentDocument.body.textContent;
     const formDataLines = formDataText.split('\n');
     if (formDataLines.length && !formDataLines[formDataLines.length - 1]) {
       --formDataLines.length;
     }
     assert_greater_than(
         formDataLines.length,
         2,
         `${fileBaseName}: multipart form data must have at least 3 lines: ${
              JSON.stringify(formDataText)
            }`);
     const boundary = formDataLines[0];
     assert_equals(
         formDataLines[formDataLines.length - 1],
         boundary + '--',
         `${fileBaseName}: multipart form data must end with ${boundary}--: ${
              JSON.stringify(formDataText)
            }`);

     const asValue = expectedEncodedBaseName.replace(/\r\n?|\n/g, "\r\n");
     const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent);
     const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent);

     // The response body from echo-content-escaped.py has controls and non-ASCII
     // bytes escaped, so any caller-provided field that might contain such bytes
     // must be passed to `escapeString`, after any other expected
     // transformations.
     const expectedText = [
       boundary,
       'Content-Disposition: form-data; name="_charset_"',
       '',
       formEncoding,
       boundary,
       'Content-Disposition: form-data; name="filename"',
       '',
       // Unlike for names and filenames, multipart/form-data values don't escape
       // \r\n linebreaks, and when they're read from an iframe they become \n.
       escapeString(asValue).replace(/\r\n/g, "\n"),
       boundary,
       `Content-Disposition: form-data; name="${escapeString(asName)}"`,
       '',
       'filename',
       boundary,
       `Content-Disposition: form-data; name="file"; ` +
           `filename="${escapeString(asFilename)}"`,
       'Content-Type: text/plain',
       '',
       escapeString(kTestFallbackUtf8),
       boundary + '--',
     ].join('\n');

     assert_true(
         formDataText.startsWith(expectedText),
         `Unexpected multipart-shaped form data received:\n${
              formDataText
            }\nExpected:\n${expectedText}`);
   }, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`);
 };
	'use strict';

	// See /FileAPI/file/resources/echo-content-escaped.py
	function escapeString(string) {
	return string.replace(/\\/g, "\\\\").replace(
	/[^\x20-\x7E]/g,
	(x) => {
	let hex = x.charCodeAt(0).toString(16);
	if (hex.length < 2) hex = "0" + hex;
	return `\\x${hex}`;
	},
	).replace(/\\x0d\\x0a/g, "\r\n");
	}

	// Rationale for this particular test character sequence, which is
	// used in filenames and also in file contents:
	//
	// - ABC~ ensures the string starts with something we can read to
	// ensure it is from the correct source; ~ is used because even
	// some 1-byte otherwise-ASCII-like parts of ISO-2022-JP
	// interpret it differently.
	// - ‾¥ are inside a single-byte range of ISO-2022-JP and help
	// diagnose problems due to filesystem encoding or locale
	// - ≈ is inside IBM437 and helps diagnose problems due to filesystem
	// encoding or locale
	// - ¤ is inside Latin-1 and helps diagnose problems due to
	// filesystem encoding or locale; it is also the "simplest" case
	// needing substitution in ISO-2022-JP
	// - ･ is inside a single-byte range of ISO-2022-JP in some variants
	// and helps diagnose problems due to filesystem encoding or locale;
	// on the web it is distinct when decoding but unified when encoding
	// - ・ is inside a double-byte range of ISO-2022-JP and helps
	// diagnose problems due to filesystem encoding or locale
	// - • is inside Windows-1252 and helps diagnose problems due to
	// filesystem encoding or locale and also ensures these aren't
	// accidentally turned into e.g. control codes
	// - ∙ is inside IBM437 and helps diagnose problems due to filesystem
	// encoding or locale
	// - · is inside Latin-1 and helps diagnose problems due to
	// filesystem encoding or locale and also ensures HTML named
	// character references (e.g. ·) are not used
	// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
	// filesystem encoding or locale and also ensures these aren't
	// accidentally turned into e.g. control codes
	// - ★ is inside ISO-2022-JP on a non-Kanji page and makes correct
	// output easier to spot
	// - 星 is inside ISO-2022-JP on a Kanji page and makes correct
	// output easier to spot
	// - 🌟 is outside the BMP and makes incorrect surrogate pair
	// substitution detectable and ensures substitutions work
	// correctly immediately after Kanji 2-byte ISO-2022-JP
	// - 星 repeated here ensures the correct codec state is used
	// after a non-BMP substitution
	// - ★ repeated here also makes correct output easier to spot
	// - ☼ is inside IBM437 shadowing C0 and helps diagnose problems due to
	// filesystem encoding or locale and also ensures these aren't
	// accidentally turned into e.g. control codes and also ensures
	// substitutions work correctly immediately after non-Kanji
	// 2-byte ISO-2022-JP
	// - · is inside Latin-1 and helps diagnose problems due to
	// filesystem encoding or locale and also ensures HTML named
	// character references (e.g. ·) are not used
	// - ∙ is inside IBM437 and helps diagnose problems due to filesystem
	// encoding or locale
	// - • is inside Windows-1252 and again helps diagnose problems
	// due to filesystem encoding or locale
	// - ・ is inside a double-byte range of ISO-2022-JP and helps
	// diagnose problems due to filesystem encoding or locale
	// - ･ is inside a single-byte range of ISO-2022-JP in some variants
	// and helps diagnose problems due to filesystem encoding or locale;
	// on the web it is distinct when decoding but unified when encoding
	// - ¤ is inside Latin-1 and helps diagnose problems due to
	// filesystem encoding or locale; again it is a "simple"
	// substitution case
	// - ≈ is inside IBM437 and helps diagnose problems due to filesystem
	// encoding or locale
	// - ¥‾ are inside a single-byte range of ISO-2022-JP and help
	// diagnose problems due to filesystem encoding or locale
	// - ~XYZ ensures earlier errors don't lead to misencoding of
	// simple ASCII
	//
	// Overall the near-symmetry makes common I18N mistakes like
	// off-by-1-after-non-BMP easier to spot. All the characters
	// are also allowed in Windows Unicode filenames.
	const kTestChars = 'ABC~‾¥≈¤･・•∙·☼★星🌟星★☼·∙•・･¤≈¥‾~XYZ';

	// The kTestFallback* strings represent the expected byte sequence from
	// encoding kTestChars with the given encoding with "html" replacement
	// mode, isomorphic-decoded. That means, characters that can't be
	// encoded in that encoding get HTML-escaped, but no further
	// `escapeString`-like escapes are needed.
	const kTestFallbackUtf8 = (
	"ABC~\xE2\x80\xBE\xC2\xA5\xE2\x89\x88\xC2\xA4\xEF\xBD\xA5\xE3\x83\xBB\xE2" +
	"\x80\xA2\xE2\x88\x99\xC2\xB7\xE2\x98\xBC\xE2\x98\x85\xE6\x98\x9F\xF0\x9F" +
	"\x8C\x9F\xE6\x98\x9F\xE2\x98\x85\xE2\x98\xBC\xC2\xB7\xE2\x88\x99\xE2\x80" +
	"\xA2\xE3\x83\xBB\xEF\xBD\xA5\xC2\xA4\xE2\x89\x88\xC2\xA5\xE2\x80\xBE~XYZ"
	);

	const kTestFallbackIso2022jp = (
	("ABC~\x1B(J~\\≈¤\x1B$B!&!&\x1B(B•∙·☼\x1B$B!z@1\x1B(B🌟" +
	"\x1B$B@1!z\x1B(B☼·∙•\x1B$B!&!&\x1B(B¤≈\x1B(J\\~\x1B(B~XYZ")
	.replace(/[^\0-\x7F]/gu, (x) => `&#${x.codePointAt(0)};`)
	);

	const kTestFallbackWindows1252 = (
	"ABC~‾\xA5≈\xA4･・\x95∙\xB7☼★星🌟星★☼\xB7∙\x95・･\xA4≈\xA5‾~XYZ".replace(
	/[^\0-\xFF]/gu,
	(x) => `&#${x.codePointAt(0)};`,
	)
	);

	const kTestFallbackXUserDefined = kTestChars.replace(
	/[^\0-\x7F]/gu,
	(x) => `&#${x.codePointAt(0)};`,
	);

	// formPostFileUploadTest - verifies multipart upload structure and
	// numeric character reference replacement for filenames, field names,
	// and field values using form submission.
	//
	// Uses /FileAPI/file/resources/echo-content-escaped.py to echo the
	// upload POST with controls and non-ASCII bytes escaped. This is done
	// because navigations whose response body contains [\0\b\v] may get
	// treated as a download, which is not what we want. Use the
	// `escapeString` function to replicate that kind of escape (note that
	// it takes an isomorphic-decoded string, not a byte sequence).
	//
	// Fields in the parameter object:
	//
	// - fileNameSource: purely explanatory and gives a clue about which
	// character encoding is the source for the non-7-bit-ASCII parts of
	// the fileBaseName, or Unicode if no smaller-than-Unicode source
	// contains all the characters. Used in the test name.
	// - fileBaseName: the not-necessarily-just-7-bit-ASCII file basename
	// used for the constructed test file. Used in the test name.
	// - formEncoding: the acceptCharset of the form used to submit the
	// test file. Used in the test name.
	// - expectedEncodedBaseName: the expected formEncoding-encoded
	// version of fileBaseName, isomorphic-decoded. That means, characters
	// that can't be encoded in that encoding get HTML-escaped, but no
	// further `escapeString`-like escapes are needed.
	const formPostFileUploadTest = ({
	fileNameSource,
	fileBaseName,
	formEncoding,
	expectedEncodedBaseName,
	}) => {
	promise_test(async testCase => {

	if (document.readyState !== 'complete') {
	await new Promise(resolve => addEventListener('load', resolve));
	}

	const formTargetFrame = Object.assign(document.createElement('iframe'), {
	name: 'formtargetframe',
	});
	document.body.append(formTargetFrame);
	testCase.add_cleanup(() => {
	document.body.removeChild(formTargetFrame);
	});

	const form = Object.assign(document.createElement('form'), {
	acceptCharset: formEncoding,
	action: '/FileAPI/file/resources/echo-content-escaped.py',
	method: 'POST',
	enctype: 'multipart/form-data',
	target: formTargetFrame.name,
	});
	document.body.append(form);
	testCase.add_cleanup(() => {
	document.body.removeChild(form);
	});

	// Used to verify that the browser agrees with the test about
	// which form charset is used.
	form.append(Object.assign(document.createElement('input'), {
	type: 'hidden',
	name: '_charset_',
	}));

	// Used to verify that the browser agrees with the test about
	// field value replacement and encoding independently of file system
	// idiosyncracies.
	form.append(Object.assign(document.createElement('input'), {
	type: 'hidden',
	name: 'filename',
	value: fileBaseName,
	}));

	// Same, but with name and value reversed to ensure field names
	// get the same treatment.
	form.append(Object.assign(document.createElement('input'), {
	type: 'hidden',
	name: fileBaseName,
	value: 'filename',
	}));

	const fileInput = Object.assign(document.createElement('input'), {
	type: 'file',
	name: 'file',
	});
	form.append(fileInput);

	// Removes c:\fakepath\ or other pseudofolder and returns just the
	// final component of filePath; allows both / and \ as segment
	// delimiters.
	const baseNameOfFilePath = filePath => filePath.split(/[\/\\]/).pop();
	await new Promise(resolve => {
	const dataTransfer = new DataTransfer;
	dataTransfer.items.add(
	new File([kTestChars], fileBaseName, {type: 'text/plain'}));
	fileInput.files = dataTransfer.files;
	// For historical reasons .value will be prefixed with
	// c:\fakepath\, but the basename should match the file name
	// exposed through the newer .files[0].name API. This check
	// verifies that assumption.
	assert_equals(
	baseNameOfFilePath(fileInput.files[0].name),
	baseNameOfFilePath(fileInput.value),
	`The basename of the field's value should match its files[0].name`);
	form.submit();
	formTargetFrame.onload = resolve;
	});

	const formDataText = formTargetFrame.contentDocument.body.textContent;
	const formDataLines = formDataText.split('\n');
	if (formDataLines.length && !formDataLines[formDataLines.length - 1]) {
	--formDataLines.length;
	}
	assert_greater_than(
	formDataLines.length,
	2,
	`${fileBaseName}: multipart form data must have at least 3 lines: ${
	JSON.stringify(formDataText)
	}`);
	const boundary = formDataLines[0];
	assert_equals(
	formDataLines[formDataLines.length - 1],
	boundary + '--',
	`${fileBaseName}: multipart form data must end with ${boundary}--: ${
	JSON.stringify(formDataText)
	}`);

	const asValue = expectedEncodedBaseName.replace(/\r\n?\|\n/g, "\r\n");
	const asName = asValue.replace(/[\r\n"]/g, encodeURIComponent);
	const asFilename = expectedEncodedBaseName.replace(/[\r\n"]/g, encodeURIComponent);

	// The response body from echo-content-escaped.py has controls and non-ASCII
	// bytes escaped, so any caller-provided field that might contain such bytes
	// must be passed to `escapeString`, after any other expected
	// transformations.
	const expectedText = [
	boundary,
	'Content-Disposition: form-data; name="_charset_"',
	'',
	formEncoding,
	boundary,
	'Content-Disposition: form-data; name="filename"',
	'',
	// Unlike for names and filenames, multipart/form-data values don't escape
	// \r\n linebreaks, and when they're read from an iframe they become \n.
	escapeString(asValue).replace(/\r\n/g, "\n"),
	boundary,
	`Content-Disposition: form-data; name="${escapeString(asName)}"`,
	'',
	'filename',
	boundary,
	`Content-Disposition: form-data; name="file"; ` +
	`filename="${escapeString(asFilename)}"`,
	'Content-Type: text/plain',
	'',
	escapeString(kTestFallbackUtf8),
	boundary + '--',
	].join('\n');

	assert_true(
	formDataText.startsWith(expectedText),
	`Unexpected multipart-shaped form data received:\n${
	formDataText
	}\nExpected:\n${expectedText}`);
	}, `Upload ${fileBaseName} (${fileNameSource}) in ${formEncoding} form`);
	};