From 1a244b79a56129af5f748ad7b1ac22641dafa730 Mon Sep 17 00:00:00 2001
From: Greg Richardson <greg.nmr@gmail.com>
Date: Wed, 20 Dec 2023 09:34:30 -0700
Subject: [PATCH] Automated AI tests via GitHub Actions (#19880)

* ci: ai tests

* chore(ai-commands): update readme

* fix: dont load .env.local in ci environment

* fix: pass openai env variable into ci job

* feat(ai): llm evaluated tests

* chore(ai): remove unused jest snapshots
---
 .github/workflows/ai-tests.yml                | 50 +++++++++++++
 packages/ai-commands/README.md                |  6 +-
 packages/ai-commands/jest.config.js           |  1 +
 .../src/__snapshots__/sql.test.ts.snap        |  6 --
 packages/ai-commands/src/sql.test.ts          |  6 +-
 packages/ai-commands/test/extensions.d.ts     | 11 +++
 packages/ai-commands/test/extensions.ts       | 72 +++++++++++++++++++
 packages/ai-commands/test/setup.ts            | 10 +--
 packages/ai-commands/tsconfig.json            | 17 ++++-
 9 files changed, 160 insertions(+), 19 deletions(-)
 create mode 100644 .github/workflows/ai-tests.yml
 create mode 100644 packages/ai-commands/test/extensions.d.ts
 create mode 100644 packages/ai-commands/test/extensions.ts

diff --git a/.github/workflows/ai-tests.yml b/.github/workflows/ai-tests.yml
new file mode 100644
index 0000000000..96bc283c6d
--- /dev/null
+++ b/.github/workflows/ai-tests.yml
@@ -0,0 +1,50 @@
+name: AI Unit Tests & Type Check
+
+on:
+  push:
+    branches: [master]
+    paths:
+      - 'packages/ai-commands/**'
+  pull_request:
+    branches: [master]
+    paths:
+      - 'packages/ai-commands/**'
+  schedule:
+    - cron: '15 0 * * 1' # Every Monday @ 12:15am UTC (off the hour to avoid heavy load times)
+
+# Cancel old builds on new commit for same workflow + branch/PR
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    env:
+      OPENAI_KEY: ${{ secrets.OPENAI_KEY }}
+
+    strategy:
+      matrix:
+        node-version: [18.x]
+
+    defaults:
+      run:
+        working-directory: ./packages/ai-commands
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            packages
+      - name: Use Node.js ${{ matrix.node-version }}
+        uses: actions/setup-node@v3
+        with:
+          node-version: ${{ matrix.node-version }}
+          cache: 'npm'
+      - name: Install deps
+        run: npm ci
+      - name: Type check
+        run: npm run typecheck
+      - name: Run tests
+        run: npm run test
diff --git a/packages/ai-commands/README.md b/packages/ai-commands/README.md
index 5068e2ba40..2bfadb0b61 100644
--- a/packages/ai-commands/README.md
+++ b/packages/ai-commands/README.md
@@ -2,11 +2,11 @@
 
 ## Main purpose
 
-This package contains all features involving OpenAI API. Technically, each feature is implemented as a function which
-can be easily tested for regressions.
+This package contains all features involving AI and LLMs (eg. via OpenAI API).
+Each feature is implemented as a function which can be easily tested for regressions.
 
 The streaming functions only work on Edge runtime so they can only be imported via a special `edge` subpath like so:
 
-```
+```ts
 import { chatRlsPolicy } from 'ai-commands/edge'
 ```
diff --git a/packages/ai-commands/jest.config.js b/packages/ai-commands/jest.config.js
index e5f05acb26..b970fa8370 100644
--- a/packages/ai-commands/jest.config.js
+++ b/packages/ai-commands/jest.config.js
@@ -7,6 +7,7 @@ module.exports = {
     '^.+\\.(js|jsx)$': 'babel-jest',
   },
   setupFiles: ['./test/setup.ts'],
+  setupFilesAfterEnv: ['./test/extensions.ts'],
   testTimeout: 15000,
   transformIgnorePatterns: [
     'node_modules/(?!(mdast-.*|micromark|micromark-.*|unist-.*|decode-named-character-reference|character-entities)/)',
diff --git a/packages/ai-commands/src/__snapshots__/sql.test.ts.snap b/packages/ai-commands/src/__snapshots__/sql.test.ts.snap
index 8317480bf1..ca462cb7b8 100644
--- a/packages/ai-commands/src/__snapshots__/sql.test.ts.snap
+++ b/packages/ai-commands/src/__snapshots__/sql.test.ts.snap
@@ -38,14 +38,8 @@ exports[`generate single table with specified columns 1`] = `
 );"
 `;
 
-exports[`generate single table with specified columns 2`] = `"Employee Tracking Table"`;
-
 exports[`rls chat select policy using table definition 1`] = `
 "create policy select_todo_policy on todos for
 select
   using (user_id = auth.uid ());"
 `;
-
-exports[`title title matches content 1`] = `"Employee and Department Tables"`;
-
-exports[`title title matches content 2`] = `"Tables to track employees and their respective departments"`;
diff --git a/packages/ai-commands/src/sql.test.ts b/packages/ai-commands/src/sql.test.ts
index 8982f7be77..7ff9513d7f 100644
--- a/packages/ai-commands/src/sql.test.ts
+++ b/packages/ai-commands/src/sql.test.ts
@@ -16,7 +16,7 @@ describe('generate', () => {
     )
 
     expect(formatSql(sql)).toMatchSnapshot()
-    expect(title).toMatchSnapshot()
+    await expect(title).toMatchCriteria('relates to employees')
   })
 })
 
@@ -94,8 +94,8 @@ describe('title', () => {
       `
     )
 
-    expect(title).toMatchSnapshot()
-    expect(description).toMatchSnapshot()
+    await expect(title).toMatchCriteria('relates to employees and departments')
+    await expect(description).toMatchCriteria('describes employees and departments')
   })
 })
 
diff --git a/packages/ai-commands/test/extensions.d.ts b/packages/ai-commands/test/extensions.d.ts
new file mode 100644
index 0000000000..54b85f00a0
--- /dev/null
+++ b/packages/ai-commands/test/extensions.d.ts
@@ -0,0 +1,11 @@
+import 'expect'
+
+declare module 'expect' {
+  interface Matchers {
+    /**
+     * Check that a string matches a natural language criteria
+     * describing the expected output. Uses a LLM to evaluate.
+     */
+    toMatchCriteria(criteria: string): Promise<void>
+  }
+}
diff --git a/packages/ai-commands/test/extensions.ts b/packages/ai-commands/test/extensions.ts
new file mode 100644
index 0000000000..bcce558937
--- /dev/null
+++ b/packages/ai-commands/test/extensions.ts
@@ -0,0 +1,72 @@
+import { expect } from '@jest/globals'
+import { codeBlock } from 'common-tags'
+import OpenAI from 'openai'
+
+const openAiKey = process.env.OPENAI_KEY
+const openai = new OpenAI({ apiKey: openAiKey })
+
+expect.extend({
+  async toMatchCriteria(received: string, criteria: string) {
+    const model = 'gpt-4-1106-preview'
+
+    const completionResponse = await openai.chat.completions.create({
+      model,
+      messages: [
+        {
+          role: 'system',
+          content: codeBlock`
+            You are a test runner. Your job is to evaluate whether 'Received' adheres to the test 'Criteria'.
+            
+            You must output JSON, specifically an object containing a "pass" boolean and "reason" string:
+            - \`{ "pass": true, "reason": "<reason>" }\` if 'Received' adheres to the test 'Criteria'
+            - \`{ "pass": false, "reason": "<reason>" }\` if 'Received' does not adhere to the test 'Criteria'
+
+            The "reason" must explain exactly which part of 'Received' did or did not pass the test 'Criteria'.
+          `,
+        },
+        {
+          role: 'user',
+          content: codeBlock`
+            Received:
+            ${received}
+
+            Criteria:
+            ${criteria}
+          `,
+        },
+      ],
+      max_tokens: 256,
+      temperature: 0,
+      response_format: {
+        type: 'json_object',
+      },
+      stream: false,
+    })
+
+    const [choice] = completionResponse.choices
+
+    if (!choice.message.content) {
+      throw new Error('LLM evaluator returned invalid response')
+    }
+
+    const { pass, reason }: { pass?: boolean; reason?: string } = JSON.parse(choice.message.content)
+
+    if (pass === undefined) {
+      throw new Error('LLM evaluator returned invalid response')
+    }
+
+    return {
+      message: () =>
+        codeBlock`
+          ${this.utils.matcherHint('toMatchCriteria', received, criteria, {
+            comment: `evaluated by LLM '${model}'`,
+            isNot: this.isNot,
+            promise: this.promise,
+          })}
+          
+          ${reason}
+        `,
+      pass,
+    }
+  },
+})
diff --git a/packages/ai-commands/test/setup.ts b/packages/ai-commands/test/setup.ts
index a963f60147..a11d7ac225 100644
--- a/packages/ai-commands/test/setup.ts
+++ b/packages/ai-commands/test/setup.ts
@@ -1,8 +1,10 @@
 import { config } from 'dotenv'
 import { statSync } from 'fs'
 
-// Use studio .env.local for now
-const envPath = '../../apps/studio/.env.local'
+if (!process.env.CI) {
+  // Use keys from studio .env.local for local tests
+  const envPath = '../../apps/studio/.env.local'
 
-statSync(envPath)
-config({ path: envPath })
+  statSync(envPath)
+  config({ path: envPath })
+}
diff --git a/packages/ai-commands/tsconfig.json b/packages/ai-commands/tsconfig.json
index cd6c94d6e8..e8b5504d29 100644
--- a/packages/ai-commands/tsconfig.json
+++ b/packages/ai-commands/tsconfig.json
@@ -1,5 +1,16 @@
 {
   "extends": "tsconfig/react-library.json",
-  "include": ["."],
-  "exclude": ["dist", "build", "node_modules"]
-}
+  "include": [
+    "."
+  ],
+  "exclude": [
+    "dist",
+    "build",
+    "node_modules"
+  ],
+  "compilerOptions": {
+    "types": [
+      "./test/extensions.d.ts"
+    ]
+  }
+}
\ No newline at end of file