From ad159d34f6e136f6707597f40995d27df951d565 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Fri, 26 Jun 2026 17:37:37 +0100 Subject: [PATCH 1/3] Measure overlay-base database size at the clear cleanup level This lets us compare the storage cost of overlay-base and trimmed databases for the same commit. --- lib/entry-points.js | 43 ++++++++++++++++++++ src/database-upload.ts | 90 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) diff --git a/lib/entry-points.js b/lib/entry-points.js index d3fbf00eec..8aba6ba39a 100644 --- a/lib/entry-points.js +++ b/lib/entry-points.js @@ -151828,8 +151828,51 @@ async function cleanupAndUploadDatabases(repositoryNwo, codeql, config, apiDetai }); } } + if (shouldUploadOverlayBase && !config.debugMode) { + await withGroupAsync( + "Measuring database size at the clear cleanup level", + () => recordClearCleanupSizes(codeql, config, reports, logger) + ); + } return reports; } +async function recordClearCleanupSizes(codeql, config, reports, logger) { + const startTime = performance.now(); + try { + try { + await codeql.databaseCleanupCluster(config, "clear" /* Clear */); + } catch (e) { + logger.warning( + `Failed to clean up databases at the '${"clear" /* Clear */}' level for size measurement: ${getErrorMessage(e)}` + ); + return; + } + for (const language of config.languages) { + const report = reports.find((r) => r.language === language); + if (report === void 0) { + continue; + } + try { + const bundledDb = await bundleDb(config, language, codeql, language, { + includeDiagnostics: false + }); + report.clear_cleanup_zipped_size_bytes = fs17.statSync(bundledDb).size; + logger.debug( + `Database for ${language} is ${report.clear_cleanup_zipped_size_bytes} bytes zipped at the '${"clear" /* Clear */}' cleanup level (vs. ${report.zipped_upload_size_bytes} bytes at the '${"overlay" /* Overlay */}' level).` + ); + } catch (e) { + logger.warning( + `Failed to measure the '${"clear" /* Clear */}' cleanup database size for ${language}: ${getErrorMessage(e)}` + ); + } + } + } finally { + const durationMs = performance.now() - startTime; + for (const report of reports) { + report.clear_cleanup_measurement_duration_ms = durationMs; + } + } +} async function uploadBundledDatabase(repositoryNwo, language, commitOid, bundledDb, bundledDbSize, apiDetails) { const client = getApiClient(); const uploadsUrl = new URL(parseGitHubUrl(apiDetails.url)); diff --git a/src/database-upload.ts b/src/database-upload.ts index 9869f38034..41aee8fe25 100644 --- a/src/database-upload.ts +++ b/src/database-upload.ts @@ -25,6 +25,19 @@ export interface DatabaseUploadResult { zipped_upload_size_bytes?: number; /** Whether the uploaded database is an overlay base. */ is_overlay_base?: boolean; + /** + * For overlay-base uploads only: the size in bytes that the zipped database + * would have been if it had been cleaned at the `clear` cleanup level instead + * of the `overlay` level. + */ + clear_cleanup_zipped_size_bytes?: number; + /** + * For overlay-base uploads only: the time in milliseconds spent measuring the + * `clear` cleanup size (cleaning up the cluster at the `clear` level and + * bundling each database). This is a cluster-wide measurement, so it is the + * same for every language in a run. + */ + clear_cleanup_measurement_duration_ms?: number; /** Time taken to upload database in milliseconds. */ upload_duration_ms?: number; /** If there was an error during database upload, this is its message. */ @@ -156,9 +169,86 @@ export async function cleanupAndUploadDatabases( }); } } + + // When we upload an overlay-base database, we cleaned the databases at the `overlay` level, which + // retains more data than the `clear` level used for regular uploads. Measure what the zipped size + // would have been at the `clear` level too, so we can compare the storage cost of overlay-base + // databases against regular databases for the same repository. + // + // We skip this in debug mode, where the databases are preserved and uploaded as debug artifacts, + // since cleaning them up at the `clear` level would discard data that is useful for debugging. + if (shouldUploadOverlayBase && !config.debugMode) { + await withGroupAsync( + "Measuring database size at the clear cleanup level", + () => recordClearCleanupSizes(codeql, config, reports, logger), + ); + } + return reports; } +/** + * Cleans up the databases at the `clear` cleanup level and records the resulting zipped size for + * each language in `clear_cleanup_zipped_size_bytes`, along with the time spent taking the + * measurement in `clear_cleanup_measurement_duration_ms`. + * + * This mutates the entries of `reports` in place. It must run only after all overlay-base uploads + * have completed, since the `clear` cleanup discards overlay data that the uploaded database + * depends on. + * + * Failures here are non-fatal: this is telemetry-only, so we log and move on rather than failing + * the workflow. + */ +async function recordClearCleanupSizes( + codeql: CodeQL, + config: Config, + reports: DatabaseUploadResult[], + logger: Logger, +): Promise { + const startTime = performance.now(); + try { + try { + await codeql.databaseCleanupCluster(config, CleanupLevel.Clear); + } catch (e) { + logger.warning( + `Failed to clean up databases at the '${CleanupLevel.Clear}' level for ` + + `size measurement: ${util.getErrorMessage(e)}`, + ); + return; + } + + for (const language of config.languages) { + const report = reports.find((r) => r.language === language); + if (report === undefined) { + continue; + } + try { + const bundledDb = await bundleDb(config, language, codeql, language, { + includeDiagnostics: false, + }); + report.clear_cleanup_zipped_size_bytes = fs.statSync(bundledDb).size; + logger.debug( + `Database for ${language} is ` + + `${report.clear_cleanup_zipped_size_bytes} bytes zipped at the ` + + `'${CleanupLevel.Clear}' cleanup level ` + + `(vs. ${report.zipped_upload_size_bytes} bytes at the ` + + `'${CleanupLevel.Overlay}' level).`, + ); + } catch (e) { + logger.warning( + `Failed to measure the '${CleanupLevel.Clear}' cleanup database size ` + + `for ${language}: ${util.getErrorMessage(e)}`, + ); + } + } + } finally { + const durationMs = performance.now() - startTime; + for (const report of reports) { + report.clear_cleanup_measurement_duration_ms = durationMs; + } + } +} + /** * Uploads a bundled database to the GitHub API. * From e90a71aaea341c9deecb8a254ab1a8a3892f9fa4 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Fri, 26 Jun 2026 17:37:38 +0100 Subject: [PATCH 2/3] Add tests for clear cleanup size telemetry --- src/database-upload.test.ts | 146 ++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/src/database-upload.test.ts b/src/database-upload.test.ts index 1cfbaecad6..26041fc0e4 100644 --- a/src/database-upload.test.ts +++ b/src/database-upload.test.ts @@ -11,8 +11,10 @@ import * as apiClient from "./api-client"; import { createStubCodeQL } from "./codeql"; import { Config } from "./config-utils"; import { cleanupAndUploadDatabases } from "./database-upload"; +import { Feature } from "./feature-flags"; import * as gitUtils from "./git-utils"; import { BuiltInLanguage } from "./languages"; +import { OverlayDatabaseMode } from "./overlay/overlay-database-mode"; import { RepositoryNwo } from "./repository"; import { checkExpectedLogMessages, @@ -24,6 +26,7 @@ import { setupTests, } from "./testing-utils"; import { + CleanupLevel, GitHubVariant, HTTPError, initializeEnvironment, @@ -335,3 +338,146 @@ test.serial("Successfully uploading a database to GHEC-DR", async (t) => { ); }); }); + +test.serial( + "Records overlay and clear cleanup sizes when uploading an overlay-base database", + async (t) => { + await withTmpDir(async (tmpDir) => { + setupActionsVars(tmpDir, tmpDir); + sinon + .stub(actionsUtil, "getRequiredInput") + .withArgs("upload-database") + .returns("true"); + sinon.stub(gitUtils, "isAnalyzingDefaultBranch").resolves(true); + + await mockHttpRequests(201); + + // Track the cleanup level passed to each cleanup so that the database + // bundle stub can write a differently-sized bundle for each level. + const cleanupLevels: CleanupLevel[] = []; + let lastCleanupLevel: CleanupLevel | undefined; + const overlaySizeBytes = 100; + const clearSizeBytes = 50; + const codeql = createStubCodeQL({ + async databaseCleanupCluster(_config, cleanupLevel) { + cleanupLevels.push(cleanupLevel); + lastCleanupLevel = cleanupLevel; + }, + async databaseBundle(_databasePath, outputFilePath) { + const sizeBytes = + lastCleanupLevel === CleanupLevel.Overlay + ? overlaySizeBytes + : clearSizeBytes; + fs.writeFileSync(outputFilePath, "x".repeat(sizeBytes)); + }, + }); + + const config = getTestConfig(tmpDir); + config.overlayDatabaseMode = OverlayDatabaseMode.OverlayBase; + + const loggedMessages: LoggedMessage[] = []; + const results = await cleanupAndUploadDatabases( + testRepoName, + codeql, + config, + testApiDetails, + createFeatures([Feature.UploadOverlayDbToApi]), + getRecordingLogger(loggedMessages), + ); + + // The database should be cleaned up at the `overlay` level for the upload + // and then re-cleaned at the `clear` level to measure its size. + t.deepEqual(cleanupLevels, [CleanupLevel.Overlay, CleanupLevel.Clear]); + + t.is(results.length, 1); + t.is(results[0].is_overlay_base, true); + t.is(results[0].zipped_upload_size_bytes, overlaySizeBytes); + t.is(results[0].clear_cleanup_zipped_size_bytes, clearSizeBytes); + t.is(typeof results[0].clear_cleanup_measurement_duration_ms, "number"); + }); + }, +); + +test.serial( + "Does not measure clear cleanup size for a regular (non-overlay-base) upload", + async (t) => { + await withTmpDir(async (tmpDir) => { + setupActionsVars(tmpDir, tmpDir); + sinon + .stub(actionsUtil, "getRequiredInput") + .withArgs("upload-database") + .returns("true"); + sinon.stub(gitUtils, "isAnalyzingDefaultBranch").resolves(true); + + await mockHttpRequests(201); + + const cleanupLevels: CleanupLevel[] = []; + const codeql = createStubCodeQL({ + async databaseCleanupCluster(_config, cleanupLevel) { + cleanupLevels.push(cleanupLevel); + }, + async databaseBundle(_databasePath, outputFilePath) { + fs.writeFileSync(outputFilePath, ""); + }, + }); + + const results = await cleanupAndUploadDatabases( + testRepoName, + codeql, + getTestConfig(tmpDir), + testApiDetails, + createFeatures([Feature.UploadOverlayDbToApi]), + getRecordingLogger([]), + ); + + // A regular upload is cleaned only once, at the `clear` level. + t.deepEqual(cleanupLevels, [CleanupLevel.Clear]); + t.is(results[0].is_overlay_base, false); + t.is(results[0].clear_cleanup_zipped_size_bytes, undefined); + t.is(results[0].clear_cleanup_measurement_duration_ms, undefined); + }); + }, +); + +test.serial("Does not measure clear cleanup size in debug mode", async (t) => { + await withTmpDir(async (tmpDir) => { + setupActionsVars(tmpDir, tmpDir); + sinon + .stub(actionsUtil, "getRequiredInput") + .withArgs("upload-database") + .returns("true"); + sinon.stub(gitUtils, "isAnalyzingDefaultBranch").resolves(true); + + await mockHttpRequests(201); + + const cleanupLevels: CleanupLevel[] = []; + const codeql = createStubCodeQL({ + async databaseCleanupCluster(_config, cleanupLevel) { + cleanupLevels.push(cleanupLevel); + }, + async databaseBundle(_databasePath, outputFilePath) { + fs.writeFileSync(outputFilePath, ""); + }, + }); + + const config = getTestConfig(tmpDir); + config.overlayDatabaseMode = OverlayDatabaseMode.OverlayBase; + config.debugMode = true; + + const results = await cleanupAndUploadDatabases( + testRepoName, + codeql, + config, + testApiDetails, + createFeatures([Feature.UploadOverlayDbToApi]), + getRecordingLogger([]), + ); + + // In debug mode we clean up at the `overlay` level for the upload but skip + // the additional `clear` cleanup, to preserve the database for debugging. + t.deepEqual(cleanupLevels, [CleanupLevel.Overlay]); + t.is(results[0].is_overlay_base, true); + t.is(results[0].clear_cleanup_zipped_size_bytes, undefined); + t.is(results[0].clear_cleanup_measurement_duration_ms, undefined); + }); +}); From 6f8864c8b0fdaa2810bf2e549f9a9994006b6548 Mon Sep 17 00:00:00 2001 From: Henry Mercer Date: Fri, 26 Jun 2026 18:43:09 +0100 Subject: [PATCH 3/3] Address review comments --- lib/entry-points.js | 51 +++++++++++++------------- src/database-upload.test.ts | 45 +++++++++++++++++++++++ src/database-upload.ts | 71 +++++++++++++++++++------------------ 3 files changed, 105 insertions(+), 62 deletions(-) diff --git a/lib/entry-points.js b/lib/entry-points.js index 8aba6ba39a..93a4d0ed58 100644 --- a/lib/entry-points.js +++ b/lib/entry-points.js @@ -151839,39 +151839,36 @@ async function cleanupAndUploadDatabases(repositoryNwo, codeql, config, apiDetai async function recordClearCleanupSizes(codeql, config, reports, logger) { const startTime = performance.now(); try { + await codeql.databaseCleanupCluster(config, "clear" /* Clear */); + } catch (e) { + logger.warning( + `Failed to clean up databases at the '${"clear" /* Clear */}' level for size measurement: ${getErrorMessage(e)}` + ); + return; + } + for (const language of config.languages) { + const report = reports.find((r) => r.language === language); + if (report === void 0) { + continue; + } try { - await codeql.databaseCleanupCluster(config, "clear" /* Clear */); + const bundledDb = await bundleDb(config, language, codeql, language, { + includeDiagnostics: false + }); + report.clear_cleanup_zipped_size_bytes = fs17.statSync(bundledDb).size; + logger.debug( + `Database for ${language} is ${report.clear_cleanup_zipped_size_bytes} bytes zipped at the '${"clear" /* Clear */}' cleanup level (vs. ${report.zipped_upload_size_bytes ?? "unknown"} bytes at the '${"overlay" /* Overlay */}' level).` + ); } catch (e) { logger.warning( - `Failed to clean up databases at the '${"clear" /* Clear */}' level for size measurement: ${getErrorMessage(e)}` + `Failed to measure the '${"clear" /* Clear */}' cleanup database size for ${language}: ${getErrorMessage(e)}` ); - return; - } - for (const language of config.languages) { - const report = reports.find((r) => r.language === language); - if (report === void 0) { - continue; - } - try { - const bundledDb = await bundleDb(config, language, codeql, language, { - includeDiagnostics: false - }); - report.clear_cleanup_zipped_size_bytes = fs17.statSync(bundledDb).size; - logger.debug( - `Database for ${language} is ${report.clear_cleanup_zipped_size_bytes} bytes zipped at the '${"clear" /* Clear */}' cleanup level (vs. ${report.zipped_upload_size_bytes} bytes at the '${"overlay" /* Overlay */}' level).` - ); - } catch (e) { - logger.warning( - `Failed to measure the '${"clear" /* Clear */}' cleanup database size for ${language}: ${getErrorMessage(e)}` - ); - } - } - } finally { - const durationMs = performance.now() - startTime; - for (const report of reports) { - report.clear_cleanup_measurement_duration_ms = durationMs; } } + const durationMs = performance.now() - startTime; + for (const report of reports) { + report.clear_cleanup_measurement_duration_ms = durationMs; + } } async function uploadBundledDatabase(repositoryNwo, language, commitOid, bundledDb, bundledDbSize, apiDetails) { const client = getApiClient(); diff --git a/src/database-upload.test.ts b/src/database-upload.test.ts index 26041fc0e4..bcaf9f1c9e 100644 --- a/src/database-upload.test.ts +++ b/src/database-upload.test.ts @@ -481,3 +481,48 @@ test.serial("Does not measure clear cleanup size in debug mode", async (t) => { t.is(results[0].clear_cleanup_measurement_duration_ms, undefined); }); }); + +test.serial( + "Does not record a clear cleanup duration when the clear cleanup fails", + async (t) => { + await withTmpDir(async (tmpDir) => { + setupActionsVars(tmpDir, tmpDir); + sinon + .stub(actionsUtil, "getRequiredInput") + .withArgs("upload-database") + .returns("true"); + sinon.stub(gitUtils, "isAnalyzingDefaultBranch").resolves(true); + + await mockHttpRequests(201); + + const codeql = createStubCodeQL({ + async databaseCleanupCluster(_config, cleanupLevel) { + if (cleanupLevel === CleanupLevel.Clear) { + throw new Error("clear cleanup failed"); + } + }, + async databaseBundle(_databasePath, outputFilePath) { + fs.writeFileSync(outputFilePath, "x".repeat(100)); + }, + }); + + const config = getTestConfig(tmpDir); + config.overlayDatabaseMode = OverlayDatabaseMode.OverlayBase; + + const results = await cleanupAndUploadDatabases( + testRepoName, + codeql, + config, + testApiDetails, + createFeatures([Feature.UploadOverlayDbToApi]), + getRecordingLogger([]), + ); + + // When the `clear` cleanup fails, no size is measured, so we should not + // report a measurement duration either. + t.is(results[0].is_overlay_base, true); + t.is(results[0].clear_cleanup_zipped_size_bytes, undefined); + t.is(results[0].clear_cleanup_measurement_duration_ms, undefined); + }); + }, +); diff --git a/src/database-upload.ts b/src/database-upload.ts index 41aee8fe25..0bd99422a0 100644 --- a/src/database-upload.ts +++ b/src/database-upload.ts @@ -189,8 +189,8 @@ export async function cleanupAndUploadDatabases( /** * Cleans up the databases at the `clear` cleanup level and records the resulting zipped size for - * each language in `clear_cleanup_zipped_size_bytes`, along with the time spent taking the - * measurement in `clear_cleanup_measurement_duration_ms`. + * each language in `clear_cleanup_zipped_size_bytes`. If the cleanup succeeds, also records the + * time spent taking the measurement in `clear_cleanup_measurement_duration_ms`. * * This mutates the entries of `reports` in place. It must run only after all overlay-base uploads * have completed, since the `clear` cleanup discards overlay data that the uploaded database @@ -206,46 +206,47 @@ async function recordClearCleanupSizes( logger: Logger, ): Promise { const startTime = performance.now(); + try { + await codeql.databaseCleanupCluster(config, CleanupLevel.Clear); + } catch (e) { + // The cleanup didn't run, so there are no sizes to measure. Return without recording a + // duration, so that we don't report a measurement duration with no accompanying sizes. + logger.warning( + `Failed to clean up databases at the '${CleanupLevel.Clear}' level for ` + + `size measurement: ${util.getErrorMessage(e)}`, + ); + return; + } + + for (const language of config.languages) { + const report = reports.find((r) => r.language === language); + if (report === undefined) { + continue; + } try { - await codeql.databaseCleanupCluster(config, CleanupLevel.Clear); + const bundledDb = await bundleDb(config, language, codeql, language, { + includeDiagnostics: false, + }); + report.clear_cleanup_zipped_size_bytes = fs.statSync(bundledDb).size; + logger.debug( + `Database for ${language} is ` + + `${report.clear_cleanup_zipped_size_bytes} bytes zipped at the ` + + `'${CleanupLevel.Clear}' cleanup level ` + + `(vs. ${report.zipped_upload_size_bytes ?? "unknown"} bytes at the ` + + `'${CleanupLevel.Overlay}' level).`, + ); } catch (e) { logger.warning( - `Failed to clean up databases at the '${CleanupLevel.Clear}' level for ` + - `size measurement: ${util.getErrorMessage(e)}`, + `Failed to measure the '${CleanupLevel.Clear}' cleanup database size ` + + `for ${language}: ${util.getErrorMessage(e)}`, ); - return; } + } - for (const language of config.languages) { - const report = reports.find((r) => r.language === language); - if (report === undefined) { - continue; - } - try { - const bundledDb = await bundleDb(config, language, codeql, language, { - includeDiagnostics: false, - }); - report.clear_cleanup_zipped_size_bytes = fs.statSync(bundledDb).size; - logger.debug( - `Database for ${language} is ` + - `${report.clear_cleanup_zipped_size_bytes} bytes zipped at the ` + - `'${CleanupLevel.Clear}' cleanup level ` + - `(vs. ${report.zipped_upload_size_bytes} bytes at the ` + - `'${CleanupLevel.Overlay}' level).`, - ); - } catch (e) { - logger.warning( - `Failed to measure the '${CleanupLevel.Clear}' cleanup database size ` + - `for ${language}: ${util.getErrorMessage(e)}`, - ); - } - } - } finally { - const durationMs = performance.now() - startTime; - for (const report of reports) { - report.clear_cleanup_measurement_duration_ms = durationMs; - } + const durationMs = performance.now() - startTime; + for (const report of reports) { + report.clear_cleanup_measurement_duration_ms = durationMs; } }