Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ import { redactableError } from "../common/errors";
import { interpretResultsSarif } from "../query-results";
import { join } from "path";
import { assertNever } from "../common/helpers-pure";
import { dir } from "tmp-promise";
import { writeFile, outputFile } from "fs-extra";
import { dump as dumpYaml } from "js-yaml";
import { MethodSignature } from "./external-api-usage";

type AutoModelQueryOptions = {
queryTag: string;
Expand All @@ -26,6 +30,7 @@ type AutoModelQueryOptions = {
databaseItem: DatabaseItem;
qlpack: QlPacksForLanguage;
sourceInfo: SourceInfo | undefined;
additionalPacks: string[];
extensionPacks: string[];
queryStorageDir: string;

Expand All @@ -52,6 +57,7 @@ async function runAutoModelQuery({
databaseItem,
qlpack,
sourceInfo,
additionalPacks,
extensionPacks,
queryStorageDir,
progress,
Expand Down Expand Up @@ -99,7 +105,7 @@ async function runAutoModelQuery({
quickEvalCountOnly: false,
},
false,
getOnDiskWorkspaceFolders(),
additionalPacks,
extensionPacks,
queryStorageDir,
undefined,
Expand Down Expand Up @@ -147,6 +153,7 @@ async function runAutoModelQuery({

type AutoModelQueriesOptions = {
mode: Mode;
candidateMethods: MethodSignature[];
cliServer: CodeQLCliServer;
queryRunner: QueryRunner;
databaseItem: DatabaseItem;
Expand All @@ -161,6 +168,7 @@ export type AutoModelQueriesResult = {

export async function runAutoModelQueries({
mode,
candidateMethods,
cliServer,
queryRunner,
databaseItem,
Expand Down Expand Up @@ -189,7 +197,13 @@ export async function runAutoModelQueries({
sourceLocationPrefix,
};

const additionalPacks = getOnDiskWorkspaceFolders();
// Generate a pack containing the candidate filters
const filterPackDir = await generateCandidateFilterPack(
databaseItem.language,
candidateMethods,
);

const additionalPacks = [...getOnDiskWorkspaceFolders(), filterPackDir];
const extensionPacks = Object.keys(
await cliServer.resolveQlpacks(additionalPacks, true),
);
Expand All @@ -208,6 +222,7 @@ export async function runAutoModelQueries({
databaseItem,
qlpack,
sourceInfo,
additionalPacks,
extensionPacks,
queryStorageDir,
progress: (update) => {
Expand All @@ -228,3 +243,59 @@ export async function runAutoModelQueries({
candidates,
};
}

/**
* generateCandidateFilterPack will create a temporary extension pack.
* This pack will contain a filter that will restrict the automodel queries
* to the specified candidate methods only.
* This is done using the `extensible` predicate "automodelCandidateFilter".
* @param language
* @param candidateMethods
* @returns
*/
export async function generateCandidateFilterPack(

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Possible performance optimisation: would it make sense to generate the pack in a temp location once up front, at the start of using the data extensions editor, but update the data extensions within the pack with a different set of candidate methods each time you need to run a filter? That might be slightly faster.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any chance that there can be multiple runs happening at the same time? Also, need to make sure that the temp folder is different for each open vscode window.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we use this approach in a few other places, so I'll defer changes to a followup. I have created an issue to discuss this with the team.

language: string,
candidateMethods: MethodSignature[],
): Promise<string> {
// Pack resides in a temporary directory, to not pollute the workspace.
const packDir = (await dir({ unsafeCleanup: true })).path;

const syntheticConfigPack = {
name: "codeql/automodel-filter",
version: "0.0.0",
library: true,
extensionTargets: {
[`codeql/${language}-queries`]: "*",
},
dataExtensions: ["filter.yml"],
};

const qlpackFile = join(packDir, "codeql-pack.yml");
await outputFile(qlpackFile, dumpYaml(syntheticConfigPack), "utf8");

// The predicate has the following defintion:
// extensible predicate automodelCandidateFilter(string package, string type, string name, string signature)
const dataRows = candidateMethods.map((method) => [
method.packageName,
method.typeName,
method.methodName,
method.methodParameters,
]);

const filter = {

@charisk charisk Aug 4, 2023

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a type we could use here to help out with some type safety? Same for the syntheticConfigPath

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We decided to do this in a followup.

extensions: [
{
addsTo: {
pack: `codeql/${language}-queries`,

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In future this will be the automodel queries pack.

extensible: "automodelCandidateFilter",
},
data: dataRows,
},
],
};

const filterFile = join(packDir, "filter.yml");
await writeFile(filterFile, dumpYaml(filter), "utf8");

return packDir;
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,63 @@ import { AutoModelQueriesResult } from "./auto-model-codeml-queries";
import { assertNever } from "../common/helpers-pure";
import * as Sarif from "sarif";
import { gzipEncode } from "../common/zlib";
import { ExternalApiUsage, MethodSignature } from "./external-api-usage";
import { ModeledMethod } from "./modeled-method";
import { groupMethods, sortGroupNames, sortMethods } from "./shared/sorting";

// Soft limit on the number of candidates to send to the model.
// Note that the model may return fewer than this number of candidates.
const candidateLimit = 20;
/**
* Return the candidates that the model should be run on. This includes limiting the number of
* candidates to the candidate limit and filtering out anything that is already modeled and respecting
* the order in the UI.
* @param mode Whether it is application or framework mode.
* @param externalApiUsages all external API usages.
* @param modeledMethods the currently modeled methods.
* @returns list of modeled methods that are candidates for modeling.
*/
export function getCandidates(
mode: Mode,
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
): MethodSignature[] {
// Sort the same way as the UI so we send the first ones listed in the UI first
const grouped = groupMethods(externalApiUsages, mode);
const sortedGroupNames = sortGroupNames(grouped);
const sortedExternalApiUsages = sortedGroupNames.flatMap((name) =>
sortMethods(grouped[name]),
);

const candidates: MethodSignature[] = [];

for (const externalApiUsage of sortedExternalApiUsages) {
const modeledMethod: ModeledMethod = modeledMethods[
externalApiUsage.signature
] ?? {
type: "none",
};

// If we have reached the max number of candidates then stop
if (candidates.length >= candidateLimit) {
break;
}

// Anything that is modeled is not a candidate
if (modeledMethod.type !== "none") {
continue;
}

// A method that is supported is modeled outside of the model file, so it is not a candidate.
if (externalApiUsage.supported) {
continue;
}

// The rest are candidates
candidates.push(externalApiUsage);
}
return candidates;
}

/**
* Encode a SARIF log to the format expected by the server: JSON, GZIP-compressed, base64-encoded
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,10 @@ import { join } from "path";
import { pickExtensionPack } from "./extension-pack-picker";
import { getLanguageDisplayName } from "../common/query-language";
import { runAutoModelQueries } from "./auto-model-codeml-queries";
import { createAutoModelV2Request } from "./auto-model-v2";
import { createAutoModelV2Request, getCandidates } from "./auto-model-v2";
import { load as loadYaml } from "js-yaml";
import { loadDataExtensionYaml } from "./yaml";
import { extLogger } from "../common/logging/vscode";

export class DataExtensionsEditorView extends AbstractWebview<
ToDataExtensionsEditorMessage,
Expand Down Expand Up @@ -380,8 +381,22 @@ export class DataExtensionsEditorView extends AbstractWebview<
let predictedModeledMethods: Record<string, ModeledMethod>;

if (useLlmGenerationV2()) {
// Fetch the candidates to send to the model
const candidateMethods = getCandidates(
this.mode,
externalApiUsages,
modeledMethods,
);

// If there are no candidates, there is nothing to model and we just return
if (candidateMethods.length === 0) {
void extLogger.log("No candidates to model. Stopping.");
return;
}

const usages = await runAutoModelQueries({
mode: this.mode,
candidateMethods,
cliServer: this.cliServer,
queryRunner: this.queryRunner,
queryStorageDir: this.queryStorageDir,
Expand Down Expand Up @@ -421,12 +436,33 @@ export class DataExtensionsEditorView extends AbstractWebview<
filename: "auto-model.yml",
});

const modeledMethods = loadDataExtensionYaml(models);
if (!modeledMethods) {
const loadedMethods = loadDataExtensionYaml(models);
if (!loadedMethods) {
return;
}

predictedModeledMethods = modeledMethods;
// Any candidate that was part of the response is a negative result
// meaning that the canidate is not a sink for the kinds that the LLM is checking for.
// For now we model this as a sink neutral method, however this is subject
// to discussion.
for (const candidate of candidateMethods) {
if (!(candidate.signature in loadedMethods)) {
loadedMethods[candidate.signature] = {
type: "neutral",
kind: "sink",
input: "",
output: "",
provenance: "ai-generated",
signature: candidate.signature,
packageName: candidate.packageName,
typeName: candidate.typeName,
methodName: candidate.methodName,
methodParameters: candidate.methodParameters,
};
}
}

predictedModeledMethods = loadedMethods;
} else {
const usages = await getAutoModelUsages({
cliServer: this.cliServer,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import {
createAutoModelV2Request,
encodeSarif,
getCandidates,
} from "../../../src/data-extensions-editor/auto-model-v2";
import { Mode } from "../../../src/data-extensions-editor/shared/mode";
import { AutomodelMode } from "../../../src/data-extensions-editor/auto-model-api-v2";
import { AutoModelQueriesResult } from "../../../src/data-extensions-editor/auto-model-codeml-queries";
import * as sarif from "sarif";
import { gzipDecode } from "../../../src/common/zlib";
import { ExternalApiUsage } from "../../../src/data-extensions-editor/external-api-usage";
import { ModeledMethod } from "../../../src/data-extensions-editor/modeled-method";

describe("createAutoModelV2Request", () => {
const createSarifLog = (queryId: string): sarif.Log => {
Expand Down Expand Up @@ -80,3 +83,110 @@ describe("createAutoModelV2Request", () => {
expect(parsed).toEqual(result.candidates);
});
});

describe("getCandidates", () => {
it("doesn't return methods that are already modelled", () => {
const externalApiUsages: ExternalApiUsage[] = [
{
library: "my.jar",
signature: "org.my.A#x()",
packageName: "org.my",
typeName: "A",
methodName: "x",
methodParameters: "()",
supported: false,
supportedType: "none",
usages: [],
},
];
const modeledMethods: Record<string, ModeledMethod> = {
"org.my.A#x()": {
type: "neutral",
kind: "",
input: "",
output: "",
provenance: "manual",
signature: "org.my.A#x()",
packageName: "org.my",
typeName: "A",
methodName: "x",
methodParameters: "()",
},
};
const candidates = getCandidates(
Mode.Application,
externalApiUsages,
modeledMethods,
);
expect(candidates.length).toEqual(0);
});

it("doesn't return methods that are supported from other sources", () => {
const externalApiUsages: ExternalApiUsage[] = [
{
library: "my.jar",
signature: "org.my.A#x()",
packageName: "org.my",
typeName: "A",
methodName: "x",
methodParameters: "()",
supported: true,
supportedType: "none",
usages: [],
},
];
const modeledMethods = {};
const candidates = getCandidates(
Mode.Application,
externalApiUsages,
modeledMethods,
);
expect(candidates.length).toEqual(0);
});

it("returns methods that are neither modeled nor supported from other sources", () => {
const externalApiUsages: ExternalApiUsage[] = [];
externalApiUsages.push({
library: "my.jar",
signature: "org.my.A#x()",
packageName: "org.my",
typeName: "A",
methodName: "x",
methodParameters: "()",
supported: false,
supportedType: "none",
usages: [],
});
const modeledMethods = {};
const candidates = getCandidates(
Mode.Application,
externalApiUsages,
modeledMethods,
);
expect(candidates.length).toEqual(1);
});

it("respects the limit", () => {
Comment thread
starcke marked this conversation as resolved.
const externalApiUsages: ExternalApiUsage[] = [];
for (let i = 0; i < 30; i++) {
externalApiUsages.push({
library: "my.jar",
signature: `org.my.A#x${i}()`,
packageName: "org.my",
typeName: "A",
methodName: `x${i}`,
methodParameters: "()",
supported: false,
supportedType: "none",
usages: [],
});
}
const modeledMethods = {};
const candidates = getCandidates(
Mode.Application,
externalApiUsages,
modeledMethods,
);
expect(candidates.length).toEqual(20);
});
});
Loading