feat(content api): add management api references to semantic search (#36289)
* docs: add cursor rule for embedding generation process Add documentation for cursor IDE about how docs embeddings are generated, including the workflow for creating and uploading semantic search content. * feat: improve API reference metadata upload with descriptive content - Add preembeddings script to run codegen before embedding generation - Enhance OpenApiReferenceSource to generate more descriptive content including parameters, responses, path information, and better structured documentation * feat: add Management API references to searchDocs GraphQL query - Add ManagementApiReference GraphQL type and model for API endpoint search results - Integrate Management API references into global search results - Update test snapshots and add comprehensive test coverage for Management API search * style: format
This commit is contained in:
59
.cursor/rules/docs-embeddings-generation.md
Normal file
59
.cursor/rules/docs-embeddings-generation.md
Normal file
@@ -0,0 +1,59 @@
|
||||
# Documentation Embeddings Generation System
|
||||
|
||||
## Overview
|
||||
|
||||
The documentation embeddings generation system processes various documentation sources and uploads their metadata to a database for semantic search functionality. The system is located in `apps/docs/scripts/search/` and works by:
|
||||
|
||||
1. **Discovering content sources** from multiple types of documentation
|
||||
2. **Processing content** into structured sections with checksums
|
||||
3. **Generating embeddings** using OpenAI's text-embedding-ada-002 model
|
||||
4. **Storing in database** with vector embeddings for semantic search
|
||||
|
||||
## Architecture
|
||||
|
||||
### Main Entry Point
|
||||
- `generate-embeddings.ts` - Main script that orchestrates the entire process
|
||||
- Supports `--refresh` flag to force regeneration of all content
|
||||
|
||||
### Content Sources (`sources/` directory)
|
||||
|
||||
#### Base Classes
|
||||
- `BaseLoader` - Abstract class for loading content from different sources
|
||||
- `BaseSource` - Abstract class for processing and formatting content
|
||||
|
||||
#### Source Types
|
||||
1. **Markdown Sources** (`markdown.ts`)
|
||||
- Processes `.mdx` files from guides and documentation
|
||||
- Extracts frontmatter metadata and content sections
|
||||
|
||||
2. **Reference Documentation** (`reference-doc.ts`)
|
||||
- **OpenAPI References** - Management API documentation from OpenAPI specs
|
||||
- **Client Library References** - JavaScript, Dart, Python, C#, Swift, Kotlin SDKs
|
||||
- **CLI References** - Command-line interface documentation
|
||||
- Processes YAML/JSON specs and matches with common sections
|
||||
|
||||
3. **GitHub Discussions** (`github-discussion.ts`)
|
||||
- Fetches troubleshooting discussions from GitHub using GraphQL API
|
||||
- Uses GitHub App authentication for access
|
||||
|
||||
4. **Partner Integrations** (`partner-integrations.ts`)
|
||||
- Fetches approved partner integration documentation from Supabase database
|
||||
- Technology integrations only (excludes agencies)
|
||||
|
||||
### Processing Flow
|
||||
|
||||
1. **Content Discovery**: Each source loader discovers and loads content files/data
|
||||
2. **Content Processing**: Each source processes content into:
|
||||
- Checksum for change detection
|
||||
- Metadata (title, subtitle, etc.)
|
||||
- Sections with headings and content
|
||||
3. **Change Detection**: Compares checksums against existing database records
|
||||
4. **Embedding Generation**: Uses OpenAI to generate embeddings for new/changed content
|
||||
5. **Database Storage**: Stores in `page` and `page_section` tables with embeddings
|
||||
6. **Cleanup**: Removes outdated pages using version tracking
|
||||
|
||||
### Database Schema
|
||||
|
||||
- **`page`** table: Stores page metadata, content, checksum, version
|
||||
- **`page_section`** table: Stores individual sections with embeddings, token counts
|
||||
|
||||
@@ -84,6 +84,20 @@ type CLICommandReference implements SearchResult {
|
||||
content: String
|
||||
}
|
||||
|
||||
"""
|
||||
A reference document containing a description of a Supabase Management API endpoint
|
||||
"""
|
||||
type ManagementApiReference implements SearchResult {
|
||||
"""The title of the document"""
|
||||
title: String
|
||||
|
||||
"""The URL of the document"""
|
||||
href: String
|
||||
|
||||
"""The content of the reference document, as text"""
|
||||
content: String
|
||||
}
|
||||
|
||||
"""
|
||||
A reference document containing a description of a function from a Supabase client library
|
||||
"""
|
||||
|
||||
@@ -204,4 +204,40 @@ describe('prod smoke test: graphql: searchDocs', () => {
|
||||
expect(guideNode).toHaveProperty('href')
|
||||
expect(guideNode).toHaveProperty('content')
|
||||
})
|
||||
|
||||
it('searchDocs query includes Management API references', async () => {
|
||||
const query = `
|
||||
query SearchDocsQuery($query: String!) {
|
||||
searchDocs(query: $query) {
|
||||
nodes {
|
||||
...on ManagementApiReference {
|
||||
title
|
||||
href
|
||||
content
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
const result = await fetch(GRAPHQL_URL, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ query, variables: { query: 'create SSO provider' } }),
|
||||
})
|
||||
|
||||
expect(result.status).toBe(200)
|
||||
const { data, errors } = await result.json()
|
||||
expect(errors).toBeUndefined()
|
||||
|
||||
const {
|
||||
searchDocs: { nodes },
|
||||
} = data
|
||||
expect(Array.isArray(nodes)).toBe(true)
|
||||
expect(nodes.length).toBeGreaterThan(0)
|
||||
|
||||
const managementApiNode = nodes.find((node: any) => !!node.title)
|
||||
expect(managementApiNode).toBeDefined()
|
||||
expect(managementApiNode).toHaveProperty('title')
|
||||
expect(managementApiNode).toHaveProperty('href')
|
||||
expect(managementApiNode).toHaveProperty('content')
|
||||
})
|
||||
})
|
||||
|
||||
@@ -33,6 +33,16 @@ const rpcSpy = vi.fn().mockImplementation((funcName, params) => {
|
||||
content: params?.include_full_content ? 'Another content' : null,
|
||||
subsections: [{ title: 'Getting Started', content: 'Getting Started content' }],
|
||||
},
|
||||
{
|
||||
type: 'reference',
|
||||
page_title: 'Create a SSO provider',
|
||||
href: 'https://supabase.com/docs/reference/api/v1-create-a-sso-provider',
|
||||
content: params?.include_full_content ? 'Creates a new SSO provider for a project' : null,
|
||||
metadata: {
|
||||
title: 'Create a SSO provider',
|
||||
subtitle: 'Management API Reference: Create a SSO provider',
|
||||
},
|
||||
},
|
||||
]
|
||||
return Promise.resolve({ data: mockResults.slice(0, limit), error: null })
|
||||
}
|
||||
@@ -190,4 +200,40 @@ describe('/api/graphql searchDocs', () => {
|
||||
expect(json.errors).toBeDefined()
|
||||
expect(json.errors[0].message).toContain('required')
|
||||
})
|
||||
|
||||
it('should return Management API references with proper fields', async () => {
|
||||
const searchQuery = `
|
||||
query {
|
||||
searchDocs(query: "SSO provider", limit: 3) {
|
||||
nodes {
|
||||
... on ManagementApiReference {
|
||||
title
|
||||
href
|
||||
content
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
`
|
||||
const request = new Request('http://localhost/api/graphql', {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ query: searchQuery }),
|
||||
})
|
||||
|
||||
const response = await POST(request)
|
||||
const json = await response.json()
|
||||
|
||||
expect(json.errors).toBeUndefined()
|
||||
expect(json.data).toBeDefined()
|
||||
expect(json.data.searchDocs).toBeDefined()
|
||||
expect(json.data.searchDocs.nodes).toBeInstanceOf(Array)
|
||||
expect(json.data.searchDocs.nodes).toHaveLength(3)
|
||||
|
||||
const managementApiNode = json.data.searchDocs.nodes[2]
|
||||
expect(managementApiNode).toMatchObject({
|
||||
title: 'Create a SSO provider',
|
||||
href: 'https://supabase.com/docs/reference/api/v1-create-a-sso-provider',
|
||||
content: 'Creates a new SSO provider for a project',
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@@ -18,7 +18,12 @@ type Database = {
|
||||
DatabaseGenerated['public']['Functions']['search_content']['Returns'][number],
|
||||
'subsections' | 'metadata'
|
||||
> & {
|
||||
metadata: { language?: string; methodName?: string; platform?: string }
|
||||
metadata: {
|
||||
subtitle?: string
|
||||
language?: string
|
||||
methodName?: string
|
||||
platform?: string
|
||||
}
|
||||
subsections: Array<{ title?: string; href?: string; content?: string }>
|
||||
}
|
||||
>
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
"postbuild": "pnpm run build:sitemap && pnpm run build:llms && ./../../scripts/upload-static-assets.sh",
|
||||
"prebuild": "pnpm run codegen:graphql && pnpm run codegen:references && pnpm run codegen:examples",
|
||||
"predev": "pnpm run codegen:graphql && pnpm run codegen:references && pnpm run codegen:examples",
|
||||
"preembeddings": "pnpm run codegen:references",
|
||||
"preinstall": "npx only-allow pnpm",
|
||||
"presync": "pnpm run codegen:graphql",
|
||||
"pretest": "pnpm run codegen:examples",
|
||||
|
||||
@@ -8,6 +8,7 @@ import {
|
||||
DB_METADATA_TAG_PLATFORM_CLI,
|
||||
ReferenceCLICommandModel,
|
||||
} from '../reference/referenceCLIModel'
|
||||
import { ReferenceManagementApiModel } from '../reference/referenceManagementApiModel'
|
||||
import { ReferenceSDKFunctionModel, SDKLanguageValues } from '../reference/referenceSDKModel'
|
||||
import { TroubleshootingModel } from '../troubleshooting/troubleshootingModel'
|
||||
import { SearchResultInterface } from './globalSearchInterface'
|
||||
@@ -74,6 +75,13 @@ function createModelFromMatch({
|
||||
content,
|
||||
subsections,
|
||||
})
|
||||
// TODO [Charis 2025-06-09] replace with less hacky check
|
||||
} else if (metadata.subtitle?.startsWith('Management API Reference')) {
|
||||
return new ReferenceManagementApiModel({
|
||||
title: page_title,
|
||||
href,
|
||||
content,
|
||||
})
|
||||
} else {
|
||||
return null
|
||||
}
|
||||
|
||||
13
apps/docs/resources/reference/referenceManagementApiModel.ts
Normal file
13
apps/docs/resources/reference/referenceManagementApiModel.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { type SearchResultInterface } from '../globalSearch/globalSearchInterface'
|
||||
|
||||
export class ReferenceManagementApiModel implements SearchResultInterface {
|
||||
public title?: string
|
||||
public href?: string
|
||||
public content?: string
|
||||
|
||||
constructor({ title, href, content }: { title?: string; href?: string; content?: string }) {
|
||||
this.title = title
|
||||
this.href = href
|
||||
this.content = content
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,25 @@
|
||||
import { GraphQLObjectType, GraphQLString } from 'graphql'
|
||||
import { GraphQLInterfaceTypeSearchResult } from '../globalSearch/globalSearchSchema'
|
||||
import { ReferenceManagementApiModel } from './referenceManagementApiModel'
|
||||
|
||||
export const GraphQLObjectTypeReferenceManagementApi = new GraphQLObjectType({
|
||||
name: 'ManagementApiReference',
|
||||
interfaces: [GraphQLInterfaceTypeSearchResult],
|
||||
isTypeOf: (value: unknown) => value instanceof ReferenceManagementApiModel,
|
||||
description:
|
||||
'A reference document containing a description of a Supabase Management API endpoint',
|
||||
fields: {
|
||||
title: {
|
||||
type: GraphQLString,
|
||||
description: 'The title of the document',
|
||||
},
|
||||
href: {
|
||||
type: GraphQLString,
|
||||
description: 'The URL of the document',
|
||||
},
|
||||
content: {
|
||||
type: GraphQLString,
|
||||
description: 'The content of the reference document, as text',
|
||||
},
|
||||
},
|
||||
})
|
||||
@@ -10,6 +10,7 @@ import { errorRoot, errorsRoot } from './error/errorResolver'
|
||||
import { searchRoot } from './globalSearch/globalSearchResolver'
|
||||
import { GraphQLObjectTypeGuide } from './guide/guideSchema'
|
||||
import { GraphQLObjectTypeReferenceCLICommand } from './reference/referenceCLISchema'
|
||||
import { GraphQLObjectTypeReferenceManagementApi } from './reference/referenceManagementApiSchema'
|
||||
import { GraphQLObjectTypeReferenceSDKFunction } from './reference/referenceSDKSchema'
|
||||
import { GraphQLObjectTypeTroubleshooting } from './troubleshooting/troubleshootingSchema'
|
||||
|
||||
@@ -43,6 +44,7 @@ export const rootGraphQLSchema = new GraphQLSchema({
|
||||
types: [
|
||||
GraphQLObjectTypeGuide,
|
||||
GraphQLObjectTypeReferenceCLICommand,
|
||||
GraphQLObjectTypeReferenceManagementApi,
|
||||
GraphQLObjectTypeReferenceSDKFunction,
|
||||
GraphQLObjectTypeTroubleshooting,
|
||||
],
|
||||
|
||||
@@ -8,6 +8,7 @@ import type {
|
||||
IFunctionDefinition,
|
||||
ISpec,
|
||||
} from '../../../components/reference/Reference.types.js'
|
||||
import { getApiEndpointById } from '../../../features/docs/Reference.generated.singleton.js'
|
||||
import type { CliCommand, CliSpec } from '../../../generator/types/CliSpec.js'
|
||||
import { flattenSections } from '../../../lib/helpers.js'
|
||||
import { enrichedOperation, gen_v3 } from '../../../lib/refGenerator/helpers.js'
|
||||
@@ -39,30 +40,35 @@ export abstract class ReferenceLoader<SpecSection> extends BaseLoader {
|
||||
|
||||
const specSections = this.getSpecSections(specContents)
|
||||
|
||||
const sections = flattenedRefSections
|
||||
.map((refSection) => {
|
||||
const specSection = this.matchSpecSection(specSections, refSection.id)
|
||||
const sections = (
|
||||
await Promise.all(
|
||||
flattenedRefSections.map(async (refSection) => {
|
||||
const specSection = await this.matchSpecSection(specSections, refSection.id)
|
||||
|
||||
if (!specSection) {
|
||||
return
|
||||
}
|
||||
if (!specSection) {
|
||||
return
|
||||
}
|
||||
|
||||
return this.sourceConstructor(
|
||||
this.source,
|
||||
`${this.path}/${refSection.slug}`,
|
||||
refSection,
|
||||
specSection,
|
||||
this.enhanceMeta(specSection)
|
||||
)
|
||||
})
|
||||
.filter((item): item is ReferenceSource<SpecSection> => item !== undefined)
|
||||
return this.sourceConstructor(
|
||||
this.source,
|
||||
`${this.path}/${refSection.slug}`,
|
||||
refSection,
|
||||
specSection,
|
||||
this.enhanceMeta(specSection)
|
||||
)
|
||||
})
|
||||
)
|
||||
).filter((item): item is ReferenceSource<SpecSection> => item !== undefined)
|
||||
|
||||
return sections as BaseSource[]
|
||||
}
|
||||
|
||||
abstract getSpecSections(specContents: string): SpecSection[]
|
||||
abstract matchSpecSection(specSections: SpecSection[], id: string): SpecSection | undefined
|
||||
enhanceMeta(section: SpecSection): Json {
|
||||
abstract matchSpecSection(
|
||||
specSections: SpecSection[],
|
||||
id: string
|
||||
): SpecSection | undefined | Promise<SpecSection | undefined>
|
||||
enhanceMeta(_section: SpecSection): Json {
|
||||
return this.meta
|
||||
}
|
||||
}
|
||||
@@ -115,7 +121,7 @@ export abstract class ReferenceSource<SpecSection> extends BaseSource {
|
||||
abstract extractSubtitle(): string
|
||||
}
|
||||
|
||||
export class OpenApiReferenceLoader extends ReferenceLoader<enrichedOperation> {
|
||||
export class OpenApiReferenceLoader extends ReferenceLoader<Partial<enrichedOperation>> {
|
||||
constructor(
|
||||
source: string,
|
||||
path: string,
|
||||
@@ -136,39 +142,108 @@ export class OpenApiReferenceLoader extends ReferenceLoader<enrichedOperation> {
|
||||
|
||||
return generatedSpec.operations
|
||||
}
|
||||
matchSpecSection(operations: enrichedOperation[], id: string): enrichedOperation | undefined {
|
||||
return operations.find((operation) => operation.operationId === id)
|
||||
async matchSpecSection(
|
||||
_operations: enrichedOperation[],
|
||||
id: string
|
||||
): Promise<Partial<enrichedOperation> | undefined> {
|
||||
const apiEndpoint = await getApiEndpointById(id)
|
||||
if (!apiEndpoint) return undefined
|
||||
|
||||
const enrichedOp: Partial<enrichedOperation> = {
|
||||
operationId: apiEndpoint.id,
|
||||
operation: apiEndpoint.method,
|
||||
path: apiEndpoint.path,
|
||||
summary: apiEndpoint.summary,
|
||||
description: apiEndpoint.description,
|
||||
deprecated: apiEndpoint.deprecated,
|
||||
parameters: apiEndpoint.parameters as any,
|
||||
requestBody: apiEndpoint.requestBody as any,
|
||||
responses: apiEndpoint.responses as any,
|
||||
}
|
||||
|
||||
return enrichedOp
|
||||
}
|
||||
}
|
||||
|
||||
export class OpenApiReferenceSource extends ReferenceSource<enrichedOperation> {
|
||||
export class OpenApiReferenceSource extends ReferenceSource<Partial<enrichedOperation>> {
|
||||
formatSection(specOperation: enrichedOperation, _: ICommonItem) {
|
||||
const { summary, description, operation, path, tags } = specOperation
|
||||
const { summary, description, operation, path, tags, parameters, responses, operationId } =
|
||||
specOperation
|
||||
return JSON.stringify({
|
||||
summary,
|
||||
description,
|
||||
operation,
|
||||
path,
|
||||
tags,
|
||||
parameters,
|
||||
responses,
|
||||
operationId,
|
||||
})
|
||||
}
|
||||
|
||||
extractSubtitle() {
|
||||
return `${this.meta.title}: ${this.specSection.description}`
|
||||
return `${this.meta.title}: ${this.specSection.description || this.specSection.operationId || ''}`
|
||||
}
|
||||
|
||||
extractTitle() {
|
||||
return (
|
||||
this.specSection.summary ||
|
||||
(typeof this.meta.title === 'string' ? this.meta.title : this.specSection.operation)
|
||||
(typeof this.meta.title === 'string' ? this.meta.title : this.specSection.operation) ||
|
||||
''
|
||||
)
|
||||
}
|
||||
|
||||
extractIndexedContent(): string {
|
||||
const { summary, description, operation, tags } = this.specSection
|
||||
return `# ${this.meta.title ?? ''}\n\n${summary ?? ''}\n\n${description ?? ''}\n\n${operation ?? ''}\n\n${
|
||||
tags?.join(', ') ?? ''
|
||||
}`
|
||||
const { summary, description, operation, tags, path, parameters, responses } = this.specSection
|
||||
|
||||
const sections: string[] = []
|
||||
|
||||
// Title
|
||||
sections.push(`# ${this.meta.title ?? ''}`)
|
||||
|
||||
// Summary
|
||||
if (summary) {
|
||||
sections.push(summary)
|
||||
}
|
||||
|
||||
// Description
|
||||
if (description) {
|
||||
sections.push(`Description: ${description}`)
|
||||
}
|
||||
|
||||
// Path and Method
|
||||
if (path) {
|
||||
sections.push(`Path: ${operation?.toUpperCase() || 'GET'} ${path}`)
|
||||
}
|
||||
|
||||
// Parameters
|
||||
if (parameters && parameters.length > 0) {
|
||||
const paramList = parameters
|
||||
.map((param: any) => {
|
||||
const required = param.required ? 'required' : 'optional'
|
||||
return `- ${param.name} (${param.schema?.type || 'string'}, ${required}): ${param.description || ''}`
|
||||
})
|
||||
.join('\n')
|
||||
sections.push(`Parameters:\n${paramList}`)
|
||||
}
|
||||
|
||||
// Response Types
|
||||
if (responses) {
|
||||
const responseList = Object.entries(responses)
|
||||
.map(([code, response]: [string, any]) => {
|
||||
const desc = response.description || 'No description'
|
||||
return `- ${code}: ${desc}`
|
||||
})
|
||||
.join('\n')
|
||||
sections.push(`Responses:\n${responseList}`)
|
||||
}
|
||||
|
||||
// Tags
|
||||
if (tags && tags.length > 0) {
|
||||
sections.push(`Tags: ${tags.join(', ')}`)
|
||||
}
|
||||
|
||||
return sections.filter(Boolean).join('\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user