From 1a244b79a56129af5f748ad7b1ac22641dafa730 Mon Sep 17 00:00:00 2001 From: Greg Richardson Date: Wed, 20 Dec 2023 09:34:30 -0700 Subject: [PATCH] Automated AI tests via GitHub Actions (#19880) * ci: ai tests * chore(ai-commands): update readme * fix: dont load .env.local in ci environment * fix: pass openai env variable into ci job * feat(ai): llm evaluated tests * chore(ai): remove unused jest snapshots --- .github/workflows/ai-tests.yml | 50 +++++++++++++ packages/ai-commands/README.md | 6 +- packages/ai-commands/jest.config.js | 1 + .../src/__snapshots__/sql.test.ts.snap | 6 -- packages/ai-commands/src/sql.test.ts | 6 +- packages/ai-commands/test/extensions.d.ts | 11 +++ packages/ai-commands/test/extensions.ts | 72 +++++++++++++++++++ packages/ai-commands/test/setup.ts | 10 +-- packages/ai-commands/tsconfig.json | 17 ++++- 9 files changed, 160 insertions(+), 19 deletions(-) create mode 100644 .github/workflows/ai-tests.yml create mode 100644 packages/ai-commands/test/extensions.d.ts create mode 100644 packages/ai-commands/test/extensions.ts diff --git a/.github/workflows/ai-tests.yml b/.github/workflows/ai-tests.yml new file mode 100644 index 0000000000..96bc283c6d --- /dev/null +++ b/.github/workflows/ai-tests.yml @@ -0,0 +1,50 @@ +name: AI Unit Tests & Type Check + +on: + push: + branches: [master] + paths: + - 'packages/ai-commands/**' + pull_request: + branches: [master] + paths: + - 'packages/ai-commands/**' + schedule: + - cron: '15 0 * * 1' # Every Monday @ 12:15am UTC (off the hour to avoid heavy load times) + +# Cancel old builds on new commit for same workflow + branch/PR +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + test: + runs-on: ubuntu-latest + + env: + OPENAI_KEY: ${{ secrets.OPENAI_KEY }} + + strategy: + matrix: + node-version: [18.x] + + defaults: + run: + working-directory: ./packages/ai-commands + + steps: + - uses: actions/checkout@v4 + with: + sparse-checkout: | + packages + - name: Use Node.js ${{ matrix.node-version }} + uses: actions/setup-node@v3 + with: + node-version: ${{ matrix.node-version }} + cache: 'npm' + - name: Install deps + run: npm ci + - name: Type check + run: npm run typecheck + - name: Run tests + run: npm run test diff --git a/packages/ai-commands/README.md b/packages/ai-commands/README.md index 5068e2ba40..2bfadb0b61 100644 --- a/packages/ai-commands/README.md +++ b/packages/ai-commands/README.md @@ -2,11 +2,11 @@ ## Main purpose -This package contains all features involving OpenAI API. Technically, each feature is implemented as a function which -can be easily tested for regressions. +This package contains all features involving AI and LLMs (eg. via OpenAI API). +Each feature is implemented as a function which can be easily tested for regressions. The streaming functions only work on Edge runtime so they can only be imported via a special `edge` subpath like so: -``` +```ts import { chatRlsPolicy } from 'ai-commands/edge' ``` diff --git a/packages/ai-commands/jest.config.js b/packages/ai-commands/jest.config.js index e5f05acb26..b970fa8370 100644 --- a/packages/ai-commands/jest.config.js +++ b/packages/ai-commands/jest.config.js @@ -7,6 +7,7 @@ module.exports = { '^.+\\.(js|jsx)$': 'babel-jest', }, setupFiles: ['./test/setup.ts'], + setupFilesAfterEnv: ['./test/extensions.ts'], testTimeout: 15000, transformIgnorePatterns: [ 'node_modules/(?!(mdast-.*|micromark|micromark-.*|unist-.*|decode-named-character-reference|character-entities)/)', diff --git a/packages/ai-commands/src/__snapshots__/sql.test.ts.snap b/packages/ai-commands/src/__snapshots__/sql.test.ts.snap index 8317480bf1..ca462cb7b8 100644 --- a/packages/ai-commands/src/__snapshots__/sql.test.ts.snap +++ b/packages/ai-commands/src/__snapshots__/sql.test.ts.snap @@ -38,14 +38,8 @@ exports[`generate single table with specified columns 1`] = ` );" `; -exports[`generate single table with specified columns 2`] = `"Employee Tracking Table"`; - exports[`rls chat select policy using table definition 1`] = ` "create policy select_todo_policy on todos for select using (user_id = auth.uid ());" `; - -exports[`title title matches content 1`] = `"Employee and Department Tables"`; - -exports[`title title matches content 2`] = `"Tables to track employees and their respective departments"`; diff --git a/packages/ai-commands/src/sql.test.ts b/packages/ai-commands/src/sql.test.ts index 8982f7be77..7ff9513d7f 100644 --- a/packages/ai-commands/src/sql.test.ts +++ b/packages/ai-commands/src/sql.test.ts @@ -16,7 +16,7 @@ describe('generate', () => { ) expect(formatSql(sql)).toMatchSnapshot() - expect(title).toMatchSnapshot() + await expect(title).toMatchCriteria('relates to employees') }) }) @@ -94,8 +94,8 @@ describe('title', () => { ` ) - expect(title).toMatchSnapshot() - expect(description).toMatchSnapshot() + await expect(title).toMatchCriteria('relates to employees and departments') + await expect(description).toMatchCriteria('describes employees and departments') }) }) diff --git a/packages/ai-commands/test/extensions.d.ts b/packages/ai-commands/test/extensions.d.ts new file mode 100644 index 0000000000..54b85f00a0 --- /dev/null +++ b/packages/ai-commands/test/extensions.d.ts @@ -0,0 +1,11 @@ +import 'expect' + +declare module 'expect' { + interface Matchers { + /** + * Check that a string matches a natural language criteria + * describing the expected output. Uses a LLM to evaluate. + */ + toMatchCriteria(criteria: string): Promise + } +} diff --git a/packages/ai-commands/test/extensions.ts b/packages/ai-commands/test/extensions.ts new file mode 100644 index 0000000000..bcce558937 --- /dev/null +++ b/packages/ai-commands/test/extensions.ts @@ -0,0 +1,72 @@ +import { expect } from '@jest/globals' +import { codeBlock } from 'common-tags' +import OpenAI from 'openai' + +const openAiKey = process.env.OPENAI_KEY +const openai = new OpenAI({ apiKey: openAiKey }) + +expect.extend({ + async toMatchCriteria(received: string, criteria: string) { + const model = 'gpt-4-1106-preview' + + const completionResponse = await openai.chat.completions.create({ + model, + messages: [ + { + role: 'system', + content: codeBlock` + You are a test runner. Your job is to evaluate whether 'Received' adheres to the test 'Criteria'. + + You must output JSON, specifically an object containing a "pass" boolean and "reason" string: + - \`{ "pass": true, "reason": "" }\` if 'Received' adheres to the test 'Criteria' + - \`{ "pass": false, "reason": "" }\` if 'Received' does not adhere to the test 'Criteria' + + The "reason" must explain exactly which part of 'Received' did or did not pass the test 'Criteria'. + `, + }, + { + role: 'user', + content: codeBlock` + Received: + ${received} + + Criteria: + ${criteria} + `, + }, + ], + max_tokens: 256, + temperature: 0, + response_format: { + type: 'json_object', + }, + stream: false, + }) + + const [choice] = completionResponse.choices + + if (!choice.message.content) { + throw new Error('LLM evaluator returned invalid response') + } + + const { pass, reason }: { pass?: boolean; reason?: string } = JSON.parse(choice.message.content) + + if (pass === undefined) { + throw new Error('LLM evaluator returned invalid response') + } + + return { + message: () => + codeBlock` + ${this.utils.matcherHint('toMatchCriteria', received, criteria, { + comment: `evaluated by LLM '${model}'`, + isNot: this.isNot, + promise: this.promise, + })} + + ${reason} + `, + pass, + } + }, +}) diff --git a/packages/ai-commands/test/setup.ts b/packages/ai-commands/test/setup.ts index a963f60147..a11d7ac225 100644 --- a/packages/ai-commands/test/setup.ts +++ b/packages/ai-commands/test/setup.ts @@ -1,8 +1,10 @@ import { config } from 'dotenv' import { statSync } from 'fs' -// Use studio .env.local for now -const envPath = '../../apps/studio/.env.local' +if (!process.env.CI) { + // Use keys from studio .env.local for local tests + const envPath = '../../apps/studio/.env.local' -statSync(envPath) -config({ path: envPath }) + statSync(envPath) + config({ path: envPath }) +} diff --git a/packages/ai-commands/tsconfig.json b/packages/ai-commands/tsconfig.json index cd6c94d6e8..e8b5504d29 100644 --- a/packages/ai-commands/tsconfig.json +++ b/packages/ai-commands/tsconfig.json @@ -1,5 +1,16 @@ { "extends": "tsconfig/react-library.json", - "include": ["."], - "exclude": ["dist", "build", "node_modules"] -} + "include": [ + "." + ], + "exclude": [ + "dist", + "build", + "node_modules" + ], + "compilerOptions": { + "types": [ + "./test/extensions.d.ts" + ] + } +} \ No newline at end of file