-
Notifications
You must be signed in to change notification settings - Fork 230
Use filtering queries to do batched AI querying #2670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -17,6 +17,10 @@ import { redactableError } from "../common/errors"; | |
| import { interpretResultsSarif } from "../query-results"; | ||
| import { join } from "path"; | ||
| import { assertNever } from "../common/helpers-pure"; | ||
| import { dir } from "tmp-promise"; | ||
| import { writeFile, outputFile } from "fs-extra"; | ||
| import { dump as dumpYaml } from "js-yaml"; | ||
| import { MethodSignature } from "./external-api-usage"; | ||
|
|
||
| type AutoModelQueryOptions = { | ||
| queryTag: string; | ||
|
|
@@ -26,6 +30,7 @@ type AutoModelQueryOptions = { | |
| databaseItem: DatabaseItem; | ||
| qlpack: QlPacksForLanguage; | ||
| sourceInfo: SourceInfo | undefined; | ||
| additionalPacks: string[]; | ||
| extensionPacks: string[]; | ||
| queryStorageDir: string; | ||
|
|
||
|
|
@@ -52,6 +57,7 @@ async function runAutoModelQuery({ | |
| databaseItem, | ||
| qlpack, | ||
| sourceInfo, | ||
| additionalPacks, | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| progress, | ||
|
|
@@ -99,7 +105,7 @@ async function runAutoModelQuery({ | |
| quickEvalCountOnly: false, | ||
| }, | ||
| false, | ||
| getOnDiskWorkspaceFolders(), | ||
| additionalPacks, | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| undefined, | ||
|
|
@@ -147,6 +153,7 @@ async function runAutoModelQuery({ | |
|
|
||
| type AutoModelQueriesOptions = { | ||
| mode: Mode; | ||
| candidateMethods: MethodSignature[]; | ||
| cliServer: CodeQLCliServer; | ||
| queryRunner: QueryRunner; | ||
| databaseItem: DatabaseItem; | ||
|
|
@@ -161,6 +168,7 @@ export type AutoModelQueriesResult = { | |
|
|
||
| export async function runAutoModelQueries({ | ||
| mode, | ||
| candidateMethods, | ||
| cliServer, | ||
| queryRunner, | ||
| databaseItem, | ||
|
|
@@ -189,7 +197,13 @@ export async function runAutoModelQueries({ | |
| sourceLocationPrefix, | ||
| }; | ||
|
|
||
| const additionalPacks = getOnDiskWorkspaceFolders(); | ||
| // Generate a pack containing the candidate filters | ||
| const filterPackDir = await generateCandidateFilterPack( | ||
| databaseItem.language, | ||
| candidateMethods, | ||
| ); | ||
|
|
||
| const additionalPacks = [...getOnDiskWorkspaceFolders(), filterPackDir]; | ||
| const extensionPacks = Object.keys( | ||
| await cliServer.resolveQlpacks(additionalPacks, true), | ||
| ); | ||
|
|
@@ -208,6 +222,7 @@ export async function runAutoModelQueries({ | |
| databaseItem, | ||
| qlpack, | ||
| sourceInfo, | ||
| additionalPacks, | ||
| extensionPacks, | ||
| queryStorageDir, | ||
| progress: (update) => { | ||
|
|
@@ -228,3 +243,59 @@ export async function runAutoModelQueries({ | |
| candidates, | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * generateCandidateFilterPack will create a temporary extension pack. | ||
| * This pack will contain a filter that will restrict the automodel queries | ||
| * to the specified candidate methods only. | ||
| * This is done using the `extensible` predicate "automodelCandidateFilter". | ||
| * @param language | ||
| * @param candidateMethods | ||
| * @returns | ||
| */ | ||
| export async function generateCandidateFilterPack( | ||
| language: string, | ||
| candidateMethods: MethodSignature[], | ||
| ): Promise<string> { | ||
| // Pack resides in a temporary directory, to not pollute the workspace. | ||
| const packDir = (await dir({ unsafeCleanup: true })).path; | ||
|
|
||
| const syntheticConfigPack = { | ||
| name: "codeql/automodel-filter", | ||
| version: "0.0.0", | ||
| library: true, | ||
| extensionTargets: { | ||
| [`codeql/${language}-queries`]: "*", | ||
| }, | ||
| dataExtensions: ["filter.yml"], | ||
| }; | ||
|
|
||
| const qlpackFile = join(packDir, "codeql-pack.yml"); | ||
| await outputFile(qlpackFile, dumpYaml(syntheticConfigPack), "utf8"); | ||
|
|
||
| // The predicate has the following defintion: | ||
| // extensible predicate automodelCandidateFilter(string package, string type, string name, string signature) | ||
| const dataRows = candidateMethods.map((method) => [ | ||
| method.packageName, | ||
| method.typeName, | ||
| method.methodName, | ||
| method.methodParameters, | ||
| ]); | ||
|
|
||
| const filter = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a type we could use here to help out with some type safety? Same for the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We decided to do this in a followup. |
||
| extensions: [ | ||
| { | ||
| addsTo: { | ||
| pack: `codeql/${language}-queries`, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In future this will be the automodel queries pack. |
||
| extensible: "automodelCandidateFilter", | ||
| }, | ||
| data: dataRows, | ||
| }, | ||
| ], | ||
| }; | ||
|
|
||
| const filterFile = join(packDir, "filter.yml"); | ||
| await writeFile(filterFile, dumpYaml(filter), "utf8"); | ||
|
|
||
| return packDir; | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Possible performance optimisation: would it make sense to generate the pack in a temp location once up front, at the start of using the data extensions editor, but update the data extensions within the pack with a different set of candidate methods each time you need to run a filter? That might be slightly faster.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Any chance that there can be multiple runs happening at the same time? Also, need to make sure that the temp folder is different for each open vscode window.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we use this approach in a few other places, so I'll defer changes to a followup. I have created an issue to discuss this with the team.