Automated AI tests via GitHub Actions (#19880)

* ci: ai tests

* chore(ai-commands): update readme

* fix: dont load .env.local in ci environment

* fix: pass openai env variable into ci job

* feat(ai): llm evaluated tests

* chore(ai): remove unused jest snapshots
This commit is contained in:
Greg Richardson
2023-12-20 09:34:30 -07:00
committed by GitHub
parent 23e095580c
commit 1a244b79a5
9 changed files with 160 additions and 19 deletions

50
.github/workflows/ai-tests.yml vendored Normal file
View File

@@ -0,0 +1,50 @@
name: AI Unit Tests & Type Check
on:
push:
branches: [master]
paths:
- 'packages/ai-commands/**'
pull_request:
branches: [master]
paths:
- 'packages/ai-commands/**'
schedule:
- cron: '15 0 * * 1' # Every Monday @ 12:15am UTC (off the hour to avoid heavy load times)
# Cancel old builds on new commit for same workflow + branch/PR
concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
test:
runs-on: ubuntu-latest
env:
OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
strategy:
matrix:
node-version: [18.x]
defaults:
run:
working-directory: ./packages/ai-commands
steps:
- uses: actions/checkout@v4
with:
sparse-checkout: |
packages
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v3
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
- name: Install deps
run: npm ci
- name: Type check
run: npm run typecheck
- name: Run tests
run: npm run test

View File

@@ -2,11 +2,11 @@
## Main purpose
This package contains all features involving OpenAI API. Technically, each feature is implemented as a function which
can be easily tested for regressions.
This package contains all features involving AI and LLMs (eg. via OpenAI API).
Each feature is implemented as a function which can be easily tested for regressions.
The streaming functions only work on Edge runtime so they can only be imported via a special `edge` subpath like so:
```
```ts
import { chatRlsPolicy } from 'ai-commands/edge'
```

View File

@@ -7,6 +7,7 @@ module.exports = {
'^.+\\.(js|jsx)$': 'babel-jest',
},
setupFiles: ['./test/setup.ts'],
setupFilesAfterEnv: ['./test/extensions.ts'],
testTimeout: 15000,
transformIgnorePatterns: [
'node_modules/(?!(mdast-.*|micromark|micromark-.*|unist-.*|decode-named-character-reference|character-entities)/)',

View File

@@ -38,14 +38,8 @@ exports[`generate single table with specified columns 1`] = `
);"
`;
exports[`generate single table with specified columns 2`] = `"Employee Tracking Table"`;
exports[`rls chat select policy using table definition 1`] = `
"create policy select_todo_policy on todos for
select
using (user_id = auth.uid ());"
`;
exports[`title title matches content 1`] = `"Employee and Department Tables"`;
exports[`title title matches content 2`] = `"Tables to track employees and their respective departments"`;

View File

@@ -16,7 +16,7 @@ describe('generate', () => {
)
expect(formatSql(sql)).toMatchSnapshot()
expect(title).toMatchSnapshot()
await expect(title).toMatchCriteria('relates to employees')
})
})
@@ -94,8 +94,8 @@ describe('title', () => {
`
)
expect(title).toMatchSnapshot()
expect(description).toMatchSnapshot()
await expect(title).toMatchCriteria('relates to employees and departments')
await expect(description).toMatchCriteria('describes employees and departments')
})
})

View File

@@ -0,0 +1,11 @@
import 'expect'
declare module 'expect' {
interface Matchers {
/**
* Check that a string matches a natural language criteria
* describing the expected output. Uses a LLM to evaluate.
*/
toMatchCriteria(criteria: string): Promise<void>
}
}

View File

@@ -0,0 +1,72 @@
import { expect } from '@jest/globals'
import { codeBlock } from 'common-tags'
import OpenAI from 'openai'
const openAiKey = process.env.OPENAI_KEY
const openai = new OpenAI({ apiKey: openAiKey })
expect.extend({
async toMatchCriteria(received: string, criteria: string) {
const model = 'gpt-4-1106-preview'
const completionResponse = await openai.chat.completions.create({
model,
messages: [
{
role: 'system',
content: codeBlock`
You are a test runner. Your job is to evaluate whether 'Received' adheres to the test 'Criteria'.
You must output JSON, specifically an object containing a "pass" boolean and "reason" string:
- \`{ "pass": true, "reason": "<reason>" }\` if 'Received' adheres to the test 'Criteria'
- \`{ "pass": false, "reason": "<reason>" }\` if 'Received' does not adhere to the test 'Criteria'
The "reason" must explain exactly which part of 'Received' did or did not pass the test 'Criteria'.
`,
},
{
role: 'user',
content: codeBlock`
Received:
${received}
Criteria:
${criteria}
`,
},
],
max_tokens: 256,
temperature: 0,
response_format: {
type: 'json_object',
},
stream: false,
})
const [choice] = completionResponse.choices
if (!choice.message.content) {
throw new Error('LLM evaluator returned invalid response')
}
const { pass, reason }: { pass?: boolean; reason?: string } = JSON.parse(choice.message.content)
if (pass === undefined) {
throw new Error('LLM evaluator returned invalid response')
}
return {
message: () =>
codeBlock`
${this.utils.matcherHint('toMatchCriteria', received, criteria, {
comment: `evaluated by LLM '${model}'`,
isNot: this.isNot,
promise: this.promise,
})}
${reason}
`,
pass,
}
},
})

View File

@@ -1,8 +1,10 @@
import { config } from 'dotenv'
import { statSync } from 'fs'
// Use studio .env.local for now
const envPath = '../../apps/studio/.env.local'
if (!process.env.CI) {
// Use keys from studio .env.local for local tests
const envPath = '../../apps/studio/.env.local'
statSync(envPath)
config({ path: envPath })
statSync(envPath)
config({ path: envPath })
}

View File

@@ -1,5 +1,16 @@
{
"extends": "tsconfig/react-library.json",
"include": ["."],
"exclude": ["dist", "build", "node_modules"]
}
"include": [
"."
],
"exclude": [
"dist",
"build",
"node_modules"
],
"compilerOptions": {
"types": [
"./test/extensions.d.ts"
]
}
}