fix(core): fix i18n sites SSG memory leak - require.cache (#10599)

This commit is contained in:
Sébastien Lorber 2024-10-22 12:40:57 +02:00 committed by GitHub
parent 9457833df0
commit 776b3ee8c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 217 additions and 124 deletions

2
.gitignore vendored
View File

@ -14,7 +14,7 @@ package-lock.json
.eslintcache
yarn-error.log
build
website/build
coverage
.docusaurus
.cache-loader

View File

@ -18,7 +18,7 @@ import {
} from './BrokenLinksContext';
import type {PageCollectedData, AppRenderer} from '../common';
const render: AppRenderer = async ({pathname}) => {
const render: AppRenderer['render'] = async ({pathname}) => {
await preload(pathname);
const modules = new Set<string>();

View File

@ -0,0 +1,122 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import fs from 'fs-extra';
import logger, {PerfLogger} from '@docusaurus/logger';
import {mapAsyncSequential} from '@docusaurus/utils';
import {loadContext, type LoadContextParams} from '../../server/site';
import {loadI18n} from '../../server/i18n';
import {buildLocale, type BuildLocaleParams} from './buildLocale';
export type BuildCLIOptions = Pick<
LoadContextParams,
'config' | 'locale' | 'outDir'
> & {
bundleAnalyzer?: boolean;
minify?: boolean;
dev?: boolean;
};
export async function build(
siteDirParam: string = '.',
cliOptions: Partial<BuildCLIOptions> = {},
): Promise<void> {
process.env.BABEL_ENV = 'production';
process.env.NODE_ENV = 'production';
process.env.DOCUSAURUS_CURRENT_LOCALE = cliOptions.locale;
if (cliOptions.dev) {
logger.info`Building in dev mode`;
process.env.BABEL_ENV = 'development';
process.env.NODE_ENV = 'development';
}
const siteDir = await fs.realpath(siteDirParam);
['SIGINT', 'SIGTERM'].forEach((sig) => {
process.on(sig, () => process.exit());
});
const locales = await PerfLogger.async('Get locales to build', () =>
getLocalesToBuild({siteDir, cliOptions}),
);
if (locales.length > 1) {
logger.info`Website will be built for all these locales: ${locales}`;
}
await PerfLogger.async(`Build`, () =>
mapAsyncSequential(locales, async (locale) => {
await tryToBuildLocale({siteDir, locale, cliOptions});
}),
);
logger.info`Use code=${'npm run serve'} command to test your build locally.`;
}
async function getLocalesToBuild({
siteDir,
cliOptions,
}: {
siteDir: string;
cliOptions: BuildCLIOptions;
}): Promise<[string, ...string[]]> {
if (cliOptions.locale) {
return [cliOptions.locale];
}
const context = await loadContext({
siteDir,
outDir: cliOptions.outDir,
config: cliOptions.config,
locale: cliOptions.locale,
localizePath: cliOptions.locale ? false : undefined,
});
const i18n = await loadI18n(context.siteConfig, {
locale: cliOptions.locale,
});
if (i18n.locales.length > 1) {
logger.info`Website will be built for all these locales: ${i18n.locales}`;
}
// We need the default locale to always be the 1st in the list. If we build it
// last, it would "erase" the localized sites built in sub-folders
return [
i18n.defaultLocale,
...i18n.locales.filter((locale) => locale !== i18n.defaultLocale),
];
}
async function tryToBuildLocale(params: BuildLocaleParams) {
try {
await PerfLogger.async(`${logger.name(params.locale)}`, async () => {
// Note: I tried to run buildLocale in worker_threads (still sequentially)
// It didn't work and I got SIGSEGV / SIGBUS errors
// See https://x.com/sebastienlorber/status/1848413716372480338
await runBuildLocaleTask(params);
});
} catch (err) {
throw new Error(
logger.interpolate`Unable to build website for locale name=${params.locale}.`,
{
cause: err,
},
);
}
}
async function runBuildLocaleTask(params: BuildLocaleParams) {
// Note: I tried to run buildLocale task in worker_threads (sequentially)
// It didn't work and I got SIGSEGV / SIGBUS errors
// Goal was to isolate memory of each localized site build
// See also https://x.com/sebastienlorber/status/1848413716372480338
//
// Running in child_process worked but is more complex and requires
// specifying the memory of the child process + weird logging issues to fix
//
// Note in the future we could try to enable concurrent localized site builds
await buildLocale(params);
}

View File

@ -10,130 +10,34 @@ import path from 'path';
import _ from 'lodash';
import {compile} from '@docusaurus/bundler';
import logger, {PerfLogger} from '@docusaurus/logger';
import {mapAsyncSequential} from '@docusaurus/utils';
import {loadSite, loadContext, type LoadContextParams} from '../server/site';
import {handleBrokenLinks} from '../server/brokenLinks';
import {createBuildClientConfig} from '../webpack/client';
import createServerConfig from '../webpack/server';
import {loadSite} from '../../server/site';
import {handleBrokenLinks} from '../../server/brokenLinks';
import {createBuildClientConfig} from '../../webpack/client';
import createServerConfig from '../../webpack/server';
import {
createConfigureWebpackUtils,
executePluginsConfigureWebpack,
} from '../webpack/configure';
import {loadI18n} from '../server/i18n';
import {executeSSG} from '../ssg/ssgExecutor';
} from '../../webpack/configure';
import {executeSSG} from '../../ssg/ssgExecutor';
import type {
ConfigureWebpackUtils,
LoadedPlugin,
Props,
} from '@docusaurus/types';
import type {SiteCollectedData} from '../common';
import type {SiteCollectedData} from '../../common';
import {BuildCLIOptions} from './build';
export type BuildCLIOptions = Pick<
LoadContextParams,
'config' | 'locale' | 'outDir'
> & {
bundleAnalyzer?: boolean;
minify?: boolean;
dev?: boolean;
};
export async function build(
siteDirParam: string = '.',
cliOptions: Partial<BuildCLIOptions> = {},
): Promise<void> {
process.env.BABEL_ENV = 'production';
process.env.NODE_ENV = 'production';
process.env.DOCUSAURUS_CURRENT_LOCALE = cliOptions.locale;
if (cliOptions.dev) {
logger.info`Building in dev mode`;
process.env.BABEL_ENV = 'development';
process.env.NODE_ENV = 'development';
}
const siteDir = await fs.realpath(siteDirParam);
['SIGINT', 'SIGTERM'].forEach((sig) => {
process.on(sig, () => process.exit());
});
async function tryToBuildLocale({locale}: {locale: string}) {
try {
await PerfLogger.async(`${logger.name(locale)}`, () =>
buildLocale({
siteDir,
locale,
cliOptions,
}),
);
} catch (err) {
throw new Error(
logger.interpolate`Unable to build website for locale name=${locale}.`,
{
cause: err,
},
);
}
}
const locales = await PerfLogger.async('Get locales to build', () =>
getLocalesToBuild({siteDir, cliOptions}),
);
if (locales.length > 1) {
logger.info`Website will be built for all these locales: ${locales}`;
}
await PerfLogger.async(`Build`, () =>
mapAsyncSequential(locales, async (locale) => {
await tryToBuildLocale({locale});
}),
);
logger.info`Use code=${'npm run serve'} command to test your build locally.`;
}
async function getLocalesToBuild({
siteDir,
cliOptions,
}: {
siteDir: string;
cliOptions: BuildCLIOptions;
}): Promise<[string, ...string[]]> {
if (cliOptions.locale) {
return [cliOptions.locale];
}
const context = await loadContext({
siteDir,
outDir: cliOptions.outDir,
config: cliOptions.config,
locale: cliOptions.locale,
localizePath: cliOptions.locale ? false : undefined,
});
const i18n = await loadI18n(context.siteConfig, {
locale: cliOptions.locale,
});
if (i18n.locales.length > 1) {
logger.info`Website will be built for all these locales: ${i18n.locales}`;
}
// We need the default locale to always be the 1st in the list. If we build it
// last, it would "erase" the localized sites built in sub-folders
return [
i18n.defaultLocale,
...i18n.locales.filter((locale) => locale !== i18n.defaultLocale),
];
}
async function buildLocale({
siteDir,
locale,
cliOptions,
}: {
export type BuildLocaleParams = {
siteDir: string;
locale: string;
cliOptions: Partial<BuildCLIOptions>;
}): Promise<void> {
};
export async function buildLocale({
siteDir,
locale,
cliOptions,
}: BuildLocaleParams): Promise<void> {
// Temporary workaround to unlock the ability to translate the site config
// We'll remove it if a better official API can be designed
// See https://github.com/facebook/docusaurus/issues/4542

View File

@ -12,7 +12,7 @@ import logger from '@docusaurus/logger';
import shell from 'shelljs';
import {hasSSHProtocol, buildSshUrl, buildHttpsUrl} from '@docusaurus/utils';
import {loadContext, type LoadContextParams} from '../server/site';
import {build} from './build';
import {build} from './build/build';
export type DeployCLIOptions = Pick<
LoadContextParams,

View File

@ -14,7 +14,7 @@ import serveHandler from 'serve-handler';
import openBrowser from 'react-dev-utils/openBrowser';
import {applyTrailingSlash} from '@docusaurus/utils-common';
import {loadSiteConfig} from '../server/config';
import {build} from './build';
import {build} from './build/build';
import {getHostPort, type HostPortOptions} from '../server/getHostPort';
import type {LoadContextParams} from '../server/site';

View File

@ -15,9 +15,13 @@ export type AppRenderResult = {
collectedData: PageCollectedData;
};
export type AppRenderer = (params: {
pathname: string;
}) => Promise<AppRenderResult>;
export type AppRenderer = {
render: (params: {pathname: string}) => Promise<AppRenderResult>;
// It's important to shut down the app renderer
// Otherwise Node.js require cache leaks memory
shutdown: () => Promise<void>;
};
export type PageCollectedData = {
// TODO Docusaurus v4 refactor: helmet state is non-serializable

View File

@ -5,7 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/
export {build} from './commands/build';
export {build} from './commands/build/build';
export {clear} from './commands/clear';
export {deploy} from './commands/deploy';
export {externalCommand} from './commands/external';

View File

@ -6,9 +6,10 @@
*/
import fs from 'fs-extra';
import {createRequire} from 'module';
import path from 'path';
import _ from 'lodash';
// TODO eval is archived / unmaintained: https://github.com/pierrec/node-eval
// We should internalize/modernize it
import evaluate from 'eval';
import pMap from 'p-map';
import logger, {PerfLogger} from '@docusaurus/logger';
@ -19,6 +20,7 @@ import {
type SSGTemplateCompiled,
} from './ssgTemplate';
import {SSGConcurrency, writeStaticFile} from './ssgUtils';
import {createSSGRequire} from './ssgNodeRequire';
import type {SSGParams} from './ssgParams';
import type {AppRenderer, AppRenderResult, SiteCollectedData} from '../common';
import type {HtmlMinifier} from '@docusaurus/bundler';
@ -58,6 +60,8 @@ export async function loadAppRenderer({
const filename = path.basename(serverBundlePath);
const ssgRequire = createSSGRequire(serverBundlePath);
const globals = {
// When using "new URL('file.js', import.meta.url)", Webpack will emit
// __filename, and this plugin will throw. not sure the __filename value
@ -67,7 +71,7 @@ export async function loadAppRenderer({
// This uses module.createRequire() instead of very old "require-like" lib
// See also: https://github.com/pierrec/node-eval/issues/33
require: createRequire(serverBundlePath),
require: ssgRequire.require,
};
const serverEntry = await PerfLogger.async(
@ -86,7 +90,15 @@ export async function loadAppRenderer({
`Server bundle export from "${filename}" must be a function that renders the Docusaurus React app.`,
);
}
return serverEntry.default;
async function shutdown() {
ssgRequire.cleanup();
}
return {
render: serverEntry.default,
shutdown,
};
}
export function printSSGWarnings(
@ -191,6 +203,8 @@ export async function generateStaticFiles({
{concurrency: SSGConcurrency},
);
await renderer.shutdown();
printSSGWarnings(results);
const [allSSGErrors, allSSGSuccesses] = _.partition(
@ -235,7 +249,7 @@ async function generateStaticFile({
}): Promise<SSGSuccessResult & {warnings: string[]}> {
try {
// This only renders the app HTML
const result = await renderer({
const result = await renderer.render({
pathname,
});
// This renders the full page HTML, including head tags...

View File

@ -0,0 +1,49 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
import {createRequire} from 'module';
export type SSGNodeRequire = {
require: NodeJS.Require;
cleanup: () => void;
};
// The eval/vm.Script used for running the server bundle need a require() impl
// This impl has to be relative to the server bundler path
// This enables the server bundle to resolve relative paths such as:
// - require('./assets/js/some-chunk.123456.js')
//
// Unfortunately, Node.js vm.Script doesn't isolate memory / require.cache
// This means that if we build multiple Docusaurus localized sites in a row
// The Node.js require cache will keep growing and retain in memory the JS
// assets of the former SSG builds
// We have to clean up the node require cache manually to avoid leaking memory!
// See also https://x.com/sebastienlorber/status/1848399310116831702
export function createSSGRequire(serverBundlePath: string): SSGNodeRequire {
const realRequire = createRequire(serverBundlePath);
const allRequiredIds: string[] = [];
const ssgRequireFunction: NodeJS.Require = (id) => {
const module = realRequire(id);
allRequiredIds.push(id);
return module;
};
const cleanup = () => {
allRequiredIds.forEach((id) => {
delete realRequire.cache[realRequire.resolve(id)];
});
};
ssgRequireFunction.resolve = realRequire.resolve;
ssgRequireFunction.cache = realRequire.cache;
ssgRequireFunction.extensions = realRequire.extensions;
ssgRequireFunction.main = realRequire.main;
return {require: ssgRequireFunction, cleanup};
}