feat: alternate search index for nimbus (#38662)
* feat: alternate search index for nimbus
Create an alternate search index for Nimbus that filters out
feature-flagged pages (equivalent to setting all feature flags to
false).
Notes:
- Creates two new DB tables, `page_nimbus` and `page_section_nimbus`,
which are filtered versions of `page` and `page_section`
- Makes `nimbus` versions of all the DB search functions
- Refactored the embedding upload script. Changes to make it faster (got
annoyed by how slow it was when testing...), incorporate retries, and
produce better summary logs.
- Upload script, when run with the environment variable
ENABLED_FEATURES_OVERRIDE_DISABLE_ALL, produces and uploads the
alternate search index
- Changed all the search calls in frontend/API to check for
`isFeatureEnabled('search:fullIndex')` to determine whether to search
the full or alternate index
* ci: produce nimbus search indexes on merge
* fix: turn full search index on
This commit is contained in:
@@ -13,10 +13,13 @@ interface PageSection {
|
||||
rag_ignore?: boolean
|
||||
}
|
||||
|
||||
type MatchPageSectionsFunction = 'match_page_sections_v2' | 'match_page_sections_v2_nimbus'
|
||||
|
||||
export async function clippy(
|
||||
openai: OpenAI,
|
||||
supabaseClient: SupabaseClient<any, 'public', any>,
|
||||
messages: Message[]
|
||||
messages: Message[],
|
||||
options?: { useAltSearchIndex?: boolean }
|
||||
) {
|
||||
// TODO: better sanitization
|
||||
const contextMessages = messages.map(({ role, content }) => {
|
||||
@@ -63,14 +66,19 @@ export async function clippy(
|
||||
|
||||
const [{ embedding }] = embeddingResponse.data
|
||||
|
||||
const searchFunction = options?.useAltSearchIndex
|
||||
? 'match_page_sections_v2_nimbus'
|
||||
: 'match_page_sections_v2'
|
||||
const joinedTable = options?.useAltSearchIndex ? 'page_nimbus' : 'page'
|
||||
|
||||
const { error: matchError, data: pageSections } = (await supabaseClient
|
||||
.rpc('match_page_sections_v2', {
|
||||
.rpc(searchFunction, {
|
||||
embedding,
|
||||
match_threshold: 0.78,
|
||||
min_content_length: 50,
|
||||
})
|
||||
.neq('rag_ignore', true)
|
||||
.select('content,page!inner(path),rag_ignore')
|
||||
.select(`content,${joinedTable}!inner(path),rag_ignore`)
|
||||
.limit(10)) as { error: any; data: PageSection[] | null }
|
||||
|
||||
if (matchError || !pageSections) {
|
||||
|
||||
@@ -294,6 +294,48 @@ export type Database = {
|
||||
}
|
||||
Relationships: []
|
||||
}
|
||||
page_nimbus: {
|
||||
Row: {
|
||||
checksum: string | null
|
||||
content: string | null
|
||||
fts_tokens: unknown | null
|
||||
id: number
|
||||
last_refresh: string | null
|
||||
meta: Json | null
|
||||
path: string
|
||||
source: string | null
|
||||
title_tokens: unknown | null
|
||||
type: string | null
|
||||
version: string | null
|
||||
}
|
||||
Insert: {
|
||||
checksum?: string | null
|
||||
content?: string | null
|
||||
fts_tokens?: unknown | null
|
||||
id?: never
|
||||
last_refresh?: string | null
|
||||
meta?: Json | null
|
||||
path: string
|
||||
source?: string | null
|
||||
title_tokens?: unknown | null
|
||||
type?: string | null
|
||||
version?: string | null
|
||||
}
|
||||
Update: {
|
||||
checksum?: string | null
|
||||
content?: string | null
|
||||
fts_tokens?: unknown | null
|
||||
id?: never
|
||||
last_refresh?: string | null
|
||||
meta?: Json | null
|
||||
path?: string
|
||||
source?: string | null
|
||||
title_tokens?: unknown | null
|
||||
type?: string | null
|
||||
version?: string | null
|
||||
}
|
||||
Relationships: []
|
||||
}
|
||||
page_section: {
|
||||
Row: {
|
||||
content: string | null
|
||||
@@ -335,6 +377,47 @@ export type Database = {
|
||||
},
|
||||
]
|
||||
}
|
||||
page_section_nimbus: {
|
||||
Row: {
|
||||
content: string | null
|
||||
embedding: string | null
|
||||
heading: string | null
|
||||
id: number
|
||||
page_id: number
|
||||
rag_ignore: boolean | null
|
||||
slug: string | null
|
||||
token_count: number | null
|
||||
}
|
||||
Insert: {
|
||||
content?: string | null
|
||||
embedding?: string | null
|
||||
heading?: string | null
|
||||
id?: never
|
||||
page_id: number
|
||||
rag_ignore?: boolean | null
|
||||
slug?: string | null
|
||||
token_count?: number | null
|
||||
}
|
||||
Update: {
|
||||
content?: string | null
|
||||
embedding?: string | null
|
||||
heading?: string | null
|
||||
id?: never
|
||||
page_id?: number
|
||||
rag_ignore?: boolean | null
|
||||
slug?: string | null
|
||||
token_count?: number | null
|
||||
}
|
||||
Relationships: [
|
||||
{
|
||||
foreignKeyName: 'page_section_nimbus_page_id_fkey'
|
||||
columns: ['page_id']
|
||||
isOneToOne: false
|
||||
referencedRelation: 'page_nimbus'
|
||||
referencedColumns: ['id']
|
||||
},
|
||||
]
|
||||
}
|
||||
tickets: {
|
||||
Row: {
|
||||
company: string | null
|
||||
@@ -526,6 +609,22 @@ export type Database = {
|
||||
slugs: string[]
|
||||
}[]
|
||||
}
|
||||
docs_search_embeddings_nimbus: {
|
||||
Args: {
|
||||
embedding: string
|
||||
match_threshold: number
|
||||
}
|
||||
Returns: {
|
||||
id: number
|
||||
path: string
|
||||
type: string
|
||||
title: string
|
||||
subtitle: string
|
||||
description: string
|
||||
headings: string[]
|
||||
slugs: string[]
|
||||
}[]
|
||||
}
|
||||
docs_search_fts: {
|
||||
Args: {
|
||||
query: string
|
||||
@@ -539,6 +638,19 @@ export type Database = {
|
||||
description: string
|
||||
}[]
|
||||
}
|
||||
docs_search_fts_nimbus: {
|
||||
Args: {
|
||||
query: string
|
||||
}
|
||||
Returns: {
|
||||
id: number
|
||||
path: string
|
||||
type: string
|
||||
title: string
|
||||
subtitle: string
|
||||
description: string
|
||||
}[]
|
||||
}
|
||||
get_full_content_url: {
|
||||
Args: {
|
||||
type: string
|
||||
@@ -621,6 +733,23 @@ export type Database = {
|
||||
token_count: number | null
|
||||
}[]
|
||||
}
|
||||
match_embedding_nimbus: {
|
||||
Args: {
|
||||
embedding: string
|
||||
match_threshold?: number
|
||||
max_results?: number
|
||||
}
|
||||
Returns: {
|
||||
content: string | null
|
||||
embedding: string | null
|
||||
heading: string | null
|
||||
id: number
|
||||
page_id: number
|
||||
rag_ignore: boolean | null
|
||||
slug: string | null
|
||||
token_count: number | null
|
||||
}[]
|
||||
}
|
||||
match_page_sections_v2: {
|
||||
Args: {
|
||||
embedding: string
|
||||
@@ -638,6 +767,23 @@ export type Database = {
|
||||
token_count: number | null
|
||||
}[]
|
||||
}
|
||||
match_page_sections_v2_nimbus: {
|
||||
Args: {
|
||||
embedding: string
|
||||
match_threshold: number
|
||||
min_content_length: number
|
||||
}
|
||||
Returns: {
|
||||
content: string | null
|
||||
embedding: string | null
|
||||
heading: string | null
|
||||
id: number
|
||||
page_id: number
|
||||
rag_ignore: boolean | null
|
||||
slug: string | null
|
||||
token_count: number | null
|
||||
}[]
|
||||
}
|
||||
search_content: {
|
||||
Args: {
|
||||
embedding: string
|
||||
@@ -676,6 +822,44 @@ export type Database = {
|
||||
subsections: Json[]
|
||||
}[]
|
||||
}
|
||||
search_content_hybrid_nimbus: {
|
||||
Args: {
|
||||
query_text: string
|
||||
query_embedding: string
|
||||
max_result?: number
|
||||
full_text_weight?: number
|
||||
semantic_weight?: number
|
||||
rrf_k?: number
|
||||
match_threshold?: number
|
||||
include_full_content?: boolean
|
||||
}
|
||||
Returns: {
|
||||
id: number
|
||||
page_title: string
|
||||
type: string
|
||||
href: string
|
||||
content: string
|
||||
metadata: Json
|
||||
subsections: Json[]
|
||||
}[]
|
||||
}
|
||||
search_content_nimbus: {
|
||||
Args: {
|
||||
embedding: string
|
||||
include_full_content?: boolean
|
||||
match_threshold?: number
|
||||
max_result?: number
|
||||
}
|
||||
Returns: {
|
||||
id: number
|
||||
page_title: string
|
||||
type: string
|
||||
href: string
|
||||
content: string
|
||||
metadata: Json
|
||||
subsections: Json[]
|
||||
}[]
|
||||
}
|
||||
update_last_changed_checksum: {
|
||||
Args: {
|
||||
new_parent_page: string
|
||||
|
||||
@@ -76,5 +76,7 @@
|
||||
"sdk:dart": true,
|
||||
"sdk:kotlin": true,
|
||||
"sdk:python": true,
|
||||
"sdk:swift": true
|
||||
"sdk:swift": true,
|
||||
|
||||
"search:fullIndex": true
|
||||
}
|
||||
|
||||
@@ -255,6 +255,11 @@
|
||||
"sdk:swift": {
|
||||
"type": "boolean",
|
||||
"description": "Enable the Swift SDK"
|
||||
},
|
||||
|
||||
"search:fullIndex": {
|
||||
"type": "boolean",
|
||||
"description": "Enable the full search index. When true, uses the full search; when false, uses the alternate search index."
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
@@ -314,7 +319,8 @@
|
||||
"sdk:dart",
|
||||
"sdk:kotlin",
|
||||
"sdk:python",
|
||||
"sdk:swift"
|
||||
"sdk:swift",
|
||||
"search:fullIndex"
|
||||
],
|
||||
"additionalProperties": false
|
||||
}
|
||||
|
||||
@@ -40,6 +40,15 @@ function isFeatureEnabled<T extends Feature | Feature[]>(
|
||||
features: T,
|
||||
runtimeDisabledFeatures?: Feature[]
|
||||
) {
|
||||
// Override is used to produce a filtered version of the docs search index
|
||||
// using the same sync setup as our normal search index
|
||||
if (process.env.ENABLED_FEATURES_OVERRIDE_DISABLE_ALL === 'true') {
|
||||
if (Array.isArray(features)) {
|
||||
return Object.fromEntries(features.map((feature) => [featureToCamelCase(feature), false]))
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
const disabledFeatures = new Set([
|
||||
...(runtimeDisabledFeatures ?? []),
|
||||
...disabledFeaturesStaticArray,
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
import { compact, debounce, uniqBy } from 'lodash'
|
||||
import { useCallback, useMemo, useReducer, useRef } from 'react'
|
||||
|
||||
import { isFeatureEnabled } from '../enabled-features'
|
||||
|
||||
const NUMBER_SOURCES = 2
|
||||
|
||||
const SUPABASE_URL = process.env.NEXT_PUBLIC_SUPABASE_URL
|
||||
@@ -200,7 +202,10 @@ const useDocsSearch = () => {
|
||||
|
||||
let sourcesLoaded = 0
|
||||
|
||||
fetch(`${SUPABASE_URL}/rest/v1/rpc/docs_search_fts`, {
|
||||
const useAlternateSearchIndex = !isFeatureEnabled('search:fullIndex')
|
||||
|
||||
const searchEndpoint = useAlternateSearchIndex ? 'docs_search_fts_nimbus' : 'docs_search_fts'
|
||||
fetch(`${SUPABASE_URL}/rest/v1/rpc/${searchEndpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'content-type': 'application/json',
|
||||
@@ -244,7 +249,7 @@ const useDocsSearch = () => {
|
||||
|
||||
fetch(`${SUPABASE_URL}${FUNCTIONS_URL}search-embeddings`, {
|
||||
method: 'POST',
|
||||
body: JSON.stringify({ query }),
|
||||
body: JSON.stringify({ query, useAlternateSearchIndex }),
|
||||
})
|
||||
.then((response) => response.json())
|
||||
.then((results) => {
|
||||
|
||||
Reference in New Issue
Block a user