From 2f0010294fb276ede97eaafa4c18a69c03b6e55a Mon Sep 17 00:00:00 2001 From: GarboMuffin Date: Thu, 23 May 2024 23:47:40 -0500 Subject: [PATCH] Fix packaging very large projects as HTML files in Chrome (#862) Actually fixes https://github.com/TurboWarp/packager/issues/528 https://github.com/TurboWarp/packager/pull/861 fixed running large projects in Chrome, but packaging still used string concatenation so it remained broken. Now the concatenation part is done using TextEncoder & Uint8Arrays and a template tag function to keep it readable. This time I have actually tested it with a 1.0GB sb3. Breaking Node API change: The data property returned by Packager#package() is now always a Uint8Array instead of sometimes string and sometimes ArrayBuffer. --- node-api-docs/README.md | 2 +- src/packager/encode-big-string.js | 66 +++++++++++++++++++++++++++++++ src/packager/packager.js | 18 +++++---- test/p4/encode-big-string.test.js | 41 +++++++++++++++++++ 4 files changed, 118 insertions(+), 9 deletions(-) create mode 100644 src/packager/encode-big-string.js create mode 100644 test/p4/encode-big-string.test.js diff --git a/node-api-docs/README.md b/node-api-docs/README.md index f0c7f46..c552970 100644 --- a/node-api-docs/README.md +++ b/node-api-docs/README.md @@ -126,7 +126,7 @@ const filename = result.filename; // MIME type of the packaged project. Either "text/html" or "application/zip" const type = result.type; -// The packaged project's data. Will be either a string (for type text/html) or ArrayBuffer (for type application/zip). +// The packaged project's data. Will always be a Uint8Array. const data = result.data; ``` diff --git a/src/packager/encode-big-string.js b/src/packager/encode-big-string.js new file mode 100644 index 0000000..abc87a9 --- /dev/null +++ b/src/packager/encode-big-string.js @@ -0,0 +1,66 @@ +/** + * @template T + * @param {T[]} destination + * @param {T[]} newItems + */ +const concatInPlace = (destination, newItems) => { + for (const item of newItems) { + destination.push(item); + } +}; + +/** + * @param {unknown} value String, number, Uint8Array, etc. or a recursive array of them + * @returns {Uint8Array[]} UTF-8 arrays, in order + */ +const encodeComponent = (value) => { + if (typeof value === 'string') { + return [ + new TextEncoder().encode(value) + ]; + } else if (typeof value === 'number' || typeof value === 'boolean' || typeof value === 'undefined' || value === null) { + return [ + new TextEncoder().encode(String(value)) + ]; + } else if (Array.isArray(value)) { + const result = []; + for (const i of value) { + concatInPlace(result, encodeComponent(i)); + } + return result; + } else { + throw new Error(`Unknown value in encodeComponent: ${value}`); + } +}; + +/** + * Tagged template function to generate encoded UTF-8 without string concatenation as Chrome cannot handle + * strings that are longer than 0x1fffffe8 characters. + * @param {TemplateStringsArray} strings + * @param {unknown[]} values + * @returns {Uint8Array} + */ +const encodeBigString = (strings, ...values) => { + /** @type {Uint8Array[]} */ + const encodedChunks = []; + + for (let i = 0; i < strings.length - 1; i++) { + concatInPlace(encodedChunks, encodeComponent(strings[i])); + concatInPlace(encodedChunks, encodeComponent(values[i])); + } + concatInPlace(encodedChunks, encodeComponent(strings[strings.length - 1])); + + let totalByteLength = 0; + for (let i = 0; i < encodedChunks.length; i++) { + totalByteLength += encodedChunks[i].byteLength; + } + + const resultBuffer = new Uint8Array(totalByteLength); + for (let i = 0, j = 0; i < encodedChunks.length; i++) { + resultBuffer.set(encodedChunks[i], j); + j += encodedChunks[i].byteLength; + } + return resultBuffer; +}; + +export default encodeBigString; diff --git a/src/packager/packager.js b/src/packager/packager.js index 3944127..4303903 100644 --- a/src/packager/packager.js +++ b/src/packager/packager.js @@ -11,6 +11,7 @@ import {APP_NAME, WEBSITE, COPYRIGHT_NOTICE, ACCENT_COLOR} from './brand'; import {OutdatedPackagerError} from '../common/errors'; import {darken} from './colors'; import {Adapter} from './adapter'; +import encodeBigString from './encode-big-string'; const PROGRESS_LOADED_SCRIPTS = 0.1; @@ -881,7 +882,7 @@ cd "$(dirname "$0")" } async generateGetProjectData () { - let result = ''; + const result = []; let getProjectDataFunction = ''; let isZip = false; let storageProgressStart; @@ -895,7 +896,7 @@ cd "$(dirname "$0")" const projectData = new Uint8Array(this.project.arrayBuffer); // keep this up-to-date with base85.js - result += ` + result.push(` `; + `); // To avoid unnecessary padding, this should be a multiple of 4. const CHUNK_SIZE = 1024 * 64; @@ -934,7 +935,7 @@ cd "$(dirname "$0")" for (let i = 0; i < projectData.length; i += CHUNK_SIZE) { const projectChunk = projectData.subarray(i, i + CHUNK_SIZE); const base85 = encode(projectChunk); - result += `\n`; + result.push(`\n`); } getProjectDataFunction = `() => { @@ -978,7 +979,7 @@ cd "$(dirname "$0")" })`; } - result += ` + result.push(` `; + `); + return result; } @@ -1107,7 +1109,7 @@ cd "$(dirname "$0")" this.ensureNotAborted(); await this.loadResources(); this.ensureNotAborted(); - const html = ` + const html = encodeBigString` @@ -1565,7 +1567,7 @@ cd "$(dirname "$0")" this.ensureNotAborted(); return { data: await zip.generateAsync({ - type: 'arraybuffer', + type: 'uint8array', compression: 'DEFLATE', // Use UNIX permissions so that executable bits are properly set for macOS and Linux platform: 'UNIX' diff --git a/test/p4/encode-big-string.test.js b/test/p4/encode-big-string.test.js new file mode 100644 index 0000000..5af8854 --- /dev/null +++ b/test/p4/encode-big-string.test.js @@ -0,0 +1,41 @@ +import encodeBigString from "../../src/packager/encode-big-string"; + +test('simple behavior', () => { + expect(encodeBigString``).toEqual(new Uint8Array([])); + expect(encodeBigString`abc`).toEqual(new Uint8Array([97, 98, 99])); + expect(encodeBigString`a${'bc'}`).toEqual(new Uint8Array([97, 98, 99])); + expect(encodeBigString`${'ab'}c`).toEqual(new Uint8Array([97, 98, 99])); + expect(encodeBigString`${'abc'}`).toEqual(new Uint8Array([97, 98, 99])); + expect(encodeBigString`1${'a'}2${'b'}3${'c'}4`).toEqual(new Uint8Array([49, 97, 50, 98, 51, 99, 52])); + expect(encodeBigString`${''}`).toEqual(new Uint8Array([])); +}); + +test('non-string primitives', () => { + expect(encodeBigString`${1}`).toEqual(new Uint8Array([49])); + expect(encodeBigString`${false}`).toEqual(new Uint8Array([102, 97, 108, 115, 101])); + expect(encodeBigString`${true}`).toEqual(new Uint8Array([116, 114, 117, 101])); + expect(encodeBigString`${null}`).toEqual(new Uint8Array([110, 117, 108, 108])); + expect(encodeBigString`${undefined}`).toEqual(new Uint8Array([117, 110, 100, 101, 102, 105, 110, 101, 100])); +}); + +test('array', () => { + expect(encodeBigString`${[]}`).toEqual(new Uint8Array([])); + expect(encodeBigString`${['a', 'b', 'c']}`).toEqual(new Uint8Array([97, 98, 99])); + expect(encodeBigString`${[[[['a'], [['b']], 'c']]]}`).toEqual(new Uint8Array([97, 98, 99])); +}); + +// skipping for now because very slow +test.skip('very big string', () => { + const MAX_LENGTH = 0x1fffffe8; + const maxLength = 'a'.repeat(MAX_LENGTH); + expect(() => maxLength + 'a').toThrow(/Invalid string length/); + const encoded = encodeBigString`${maxLength}aaaaa`; + expect(encoded.byteLength).toBe(MAX_LENGTH + 5); + + // very hot loop, don't call into expect if we don't need to + for (let i = 0; i < encoded.length; i++) { + if (encoded[i] !== 97) { + throw new Error(`Wrong encoding at ${i}`); + } + } +});