feat: alternate search index for nimbus (#38662)

* feat: alternate search index for nimbus

Create an alternate search index for Nimbus that filters out
feature-flagged pages (equivalent to setting all feature flags to
false).

Notes:
- Creates two new DB tables, `page_nimbus` and `page_section_nimbus`,
  which are filtered versions of `page` and `page_section`
- Makes `nimbus` versions of all the DB search functions
- Refactored the embedding upload script. Changes to make it faster (got
  annoyed by how slow it was when testing...), incorporate retries, and
  produce better summary logs.
- Upload script, when run with the environment variable
  ENABLED_FEATURES_OVERRIDE_DISABLE_ALL, produces and uploads the
  alternate search index
- Changed all the search calls in frontend/API to check for
  `isFeatureEnabled('search:fullIndex')` to determine whether to search
  the full or alternate index

* ci: produce nimbus search indexes on merge

* fix: turn full search index on
This commit is contained in:
Charis
2025-09-16 12:37:53 -04:00
committed by GitHub
parent 9907fb25f4
commit 8cd5e10038
17 changed files with 1256 additions and 229 deletions

View File

@@ -13,10 +13,13 @@ interface PageSection {
rag_ignore?: boolean
}
type MatchPageSectionsFunction = 'match_page_sections_v2' | 'match_page_sections_v2_nimbus'
export async function clippy(
openai: OpenAI,
supabaseClient: SupabaseClient<any, 'public', any>,
messages: Message[]
messages: Message[],
options?: { useAltSearchIndex?: boolean }
) {
// TODO: better sanitization
const contextMessages = messages.map(({ role, content }) => {
@@ -63,14 +66,19 @@ export async function clippy(
const [{ embedding }] = embeddingResponse.data
const searchFunction = options?.useAltSearchIndex
? 'match_page_sections_v2_nimbus'
: 'match_page_sections_v2'
const joinedTable = options?.useAltSearchIndex ? 'page_nimbus' : 'page'
const { error: matchError, data: pageSections } = (await supabaseClient
.rpc('match_page_sections_v2', {
.rpc(searchFunction, {
embedding,
match_threshold: 0.78,
min_content_length: 50,
})
.neq('rag_ignore', true)
.select('content,page!inner(path),rag_ignore')
.select(`content,${joinedTable}!inner(path),rag_ignore`)
.limit(10)) as { error: any; data: PageSection[] | null }
if (matchError || !pageSections) {

View File

@@ -294,6 +294,48 @@ export type Database = {
}
Relationships: []
}
page_nimbus: {
Row: {
checksum: string | null
content: string | null
fts_tokens: unknown | null
id: number
last_refresh: string | null
meta: Json | null
path: string
source: string | null
title_tokens: unknown | null
type: string | null
version: string | null
}
Insert: {
checksum?: string | null
content?: string | null
fts_tokens?: unknown | null
id?: never
last_refresh?: string | null
meta?: Json | null
path: string
source?: string | null
title_tokens?: unknown | null
type?: string | null
version?: string | null
}
Update: {
checksum?: string | null
content?: string | null
fts_tokens?: unknown | null
id?: never
last_refresh?: string | null
meta?: Json | null
path?: string
source?: string | null
title_tokens?: unknown | null
type?: string | null
version?: string | null
}
Relationships: []
}
page_section: {
Row: {
content: string | null
@@ -335,6 +377,47 @@ export type Database = {
},
]
}
page_section_nimbus: {
Row: {
content: string | null
embedding: string | null
heading: string | null
id: number
page_id: number
rag_ignore: boolean | null
slug: string | null
token_count: number | null
}
Insert: {
content?: string | null
embedding?: string | null
heading?: string | null
id?: never
page_id: number
rag_ignore?: boolean | null
slug?: string | null
token_count?: number | null
}
Update: {
content?: string | null
embedding?: string | null
heading?: string | null
id?: never
page_id?: number
rag_ignore?: boolean | null
slug?: string | null
token_count?: number | null
}
Relationships: [
{
foreignKeyName: 'page_section_nimbus_page_id_fkey'
columns: ['page_id']
isOneToOne: false
referencedRelation: 'page_nimbus'
referencedColumns: ['id']
},
]
}
tickets: {
Row: {
company: string | null
@@ -526,6 +609,22 @@ export type Database = {
slugs: string[]
}[]
}
docs_search_embeddings_nimbus: {
Args: {
embedding: string
match_threshold: number
}
Returns: {
id: number
path: string
type: string
title: string
subtitle: string
description: string
headings: string[]
slugs: string[]
}[]
}
docs_search_fts: {
Args: {
query: string
@@ -539,6 +638,19 @@ export type Database = {
description: string
}[]
}
docs_search_fts_nimbus: {
Args: {
query: string
}
Returns: {
id: number
path: string
type: string
title: string
subtitle: string
description: string
}[]
}
get_full_content_url: {
Args: {
type: string
@@ -621,6 +733,23 @@ export type Database = {
token_count: number | null
}[]
}
match_embedding_nimbus: {
Args: {
embedding: string
match_threshold?: number
max_results?: number
}
Returns: {
content: string | null
embedding: string | null
heading: string | null
id: number
page_id: number
rag_ignore: boolean | null
slug: string | null
token_count: number | null
}[]
}
match_page_sections_v2: {
Args: {
embedding: string
@@ -638,6 +767,23 @@ export type Database = {
token_count: number | null
}[]
}
match_page_sections_v2_nimbus: {
Args: {
embedding: string
match_threshold: number
min_content_length: number
}
Returns: {
content: string | null
embedding: string | null
heading: string | null
id: number
page_id: number
rag_ignore: boolean | null
slug: string | null
token_count: number | null
}[]
}
search_content: {
Args: {
embedding: string
@@ -676,6 +822,44 @@ export type Database = {
subsections: Json[]
}[]
}
search_content_hybrid_nimbus: {
Args: {
query_text: string
query_embedding: string
max_result?: number
full_text_weight?: number
semantic_weight?: number
rrf_k?: number
match_threshold?: number
include_full_content?: boolean
}
Returns: {
id: number
page_title: string
type: string
href: string
content: string
metadata: Json
subsections: Json[]
}[]
}
search_content_nimbus: {
Args: {
embedding: string
include_full_content?: boolean
match_threshold?: number
max_result?: number
}
Returns: {
id: number
page_title: string
type: string
href: string
content: string
metadata: Json
subsections: Json[]
}[]
}
update_last_changed_checksum: {
Args: {
new_parent_page: string

View File

@@ -76,5 +76,7 @@
"sdk:dart": true,
"sdk:kotlin": true,
"sdk:python": true,
"sdk:swift": true
"sdk:swift": true,
"search:fullIndex": true
}

View File

@@ -255,6 +255,11 @@
"sdk:swift": {
"type": "boolean",
"description": "Enable the Swift SDK"
},
"search:fullIndex": {
"type": "boolean",
"description": "Enable the full search index. When true, uses the full search; when false, uses the alternate search index."
}
},
"required": [
@@ -314,7 +319,8 @@
"sdk:dart",
"sdk:kotlin",
"sdk:python",
"sdk:swift"
"sdk:swift",
"search:fullIndex"
],
"additionalProperties": false
}

View File

@@ -40,6 +40,15 @@ function isFeatureEnabled<T extends Feature | Feature[]>(
features: T,
runtimeDisabledFeatures?: Feature[]
) {
// Override is used to produce a filtered version of the docs search index
// using the same sync setup as our normal search index
if (process.env.ENABLED_FEATURES_OVERRIDE_DISABLE_ALL === 'true') {
if (Array.isArray(features)) {
return Object.fromEntries(features.map((feature) => [featureToCamelCase(feature), false]))
}
return false
}
const disabledFeatures = new Set([
...(runtimeDisabledFeatures ?? []),
...disabledFeaturesStaticArray,

View File

@@ -3,6 +3,8 @@
import { compact, debounce, uniqBy } from 'lodash'
import { useCallback, useMemo, useReducer, useRef } from 'react'
import { isFeatureEnabled } from '../enabled-features'
const NUMBER_SOURCES = 2
const SUPABASE_URL = process.env.NEXT_PUBLIC_SUPABASE_URL
@@ -200,7 +202,10 @@ const useDocsSearch = () => {
let sourcesLoaded = 0
fetch(`${SUPABASE_URL}/rest/v1/rpc/docs_search_fts`, {
const useAlternateSearchIndex = !isFeatureEnabled('search:fullIndex')
const searchEndpoint = useAlternateSearchIndex ? 'docs_search_fts_nimbus' : 'docs_search_fts'
fetch(`${SUPABASE_URL}/rest/v1/rpc/${searchEndpoint}`, {
method: 'POST',
headers: {
'content-type': 'application/json',
@@ -244,7 +249,7 @@ const useDocsSearch = () => {
fetch(`${SUPABASE_URL}${FUNCTIONS_URL}search-embeddings`, {
method: 'POST',
body: JSON.stringify({ query }),
body: JSON.stringify({ query, useAlternateSearchIndex }),
})
.then((response) => response.json())
.then((results) => {