feat: alternate search index for nimbus (#38662)

* feat: alternate search index for nimbus Create an alternate search index for Nimbus that filters out feature-flagged pages (equivalent to setting all feature flags to false). Notes: - Creates two new DB tables, `page_nimbus` and `page_section_nimbus`, which are filtered versions of `page` and `page_section` - Makes `nimbus` versions of all the DB search functions - Refactored the embedding upload script. Changes to make it faster (got annoyed by how slow it was when testing...), incorporate retries, and produce better summary logs. - Upload script, when run with the environment variable ENABLED_FEATURES_OVERRIDE_DISABLE_ALL, produces and uploads the alternate search index - Changed all the search calls in frontend/API to check for `isFeatureEnabled('search:fullIndex')` to determine whether to search the full or alternate index * ci: produce nimbus search indexes on merge * fix: turn full search index on
2025-09-16 12:37:53 -04:00
parent 9907fb25f4
commit 8cd5e10038
17 changed files with 1256 additions and 229 deletions
--- a/.github/workflows/search.yml
+++ b/.github/workflows/search.yml
@@ -68,8 +68,12 @@ jobs:
        run: |
          pnpm run codegen:examples
          pnpm run embeddings
+          pnpm run embeddings:nimbus

      - name: Refresh embeddings
        working-directory: ./apps/docs
        if: ${{ inputs.refresh }}
-        run: pnpm run embeddings:refresh
+        run: |
+          pnpm run codegen:examples
+          pnpm run embeddings:refresh
+          pnpm run embeddings:nimbus:refresh
--- a/apps/docs/app/api/ai/docs/route.ts
+++ b/apps/docs/app/api/ai/docs/route.ts
@@ -3,6 +3,8 @@ import { ApplicationError, UserError, clippy } from 'ai-commands/edge'
 import { NextRequest, NextResponse } from 'next/server'
 import OpenAI from 'openai'

+import { isFeatureEnabled } from 'common/enabled-features'
+
 export const runtime = 'edge'
 /* To avoid OpenAI errors, restrict to the Vercel Edge Function regions that
  overlap with the OpenAI API regions.
@@ -54,7 +56,10 @@ export async function POST(req: NextRequest) {
      throw new UserError('Missing messages in request data')
    }

-    const response = await clippy(openai, supabaseClient, messages)
+    const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
+    const response = await clippy(openai, supabaseClient, messages, {
+      useAltSearchIndex,
+    })

    // Proxy the streamed SSE response from OpenAI
    return new NextResponse(response.body, {
--- a/apps/docs/package.json
+++ b/apps/docs/package.json
@@ -17,7 +17,9 @@
    "dev:secrets:pull": "AWS_PROFILE=supa-dev node ../../scripts/getSecrets.js -n local/docs",
    "dev:watch:troubleshooting": "node ./scripts/troubleshooting/watch.mjs",
    "embeddings": "tsx --conditions=react-server scripts/search/generate-embeddings.ts",
+    "embeddings:nimbus": "ENABLED_FEATURES_OVERRIDE_DISABLE_ALL=true pnpm run embeddings",
    "embeddings:refresh": "pnpm run embeddings --refresh",
+    "embeddings:nimbus:refresh": "ENABLED_FEATURES_OVERRIDE_DISABLE_ALL=true pnpm run embeddings:refresh",
    "last-changed": "tsx scripts/last-changed.ts",
    "last-changed:reset": "pnpm run last-changed -- --reset",
    "lint": "next lint",
--- a/apps/docs/resources/globalSearch/globalSearchModel.ts
+++ b/apps/docs/resources/globalSearch/globalSearchModel.ts
@@ -3,6 +3,8 @@ import { convertPostgrestToApiError, type ApiErrorGeneric } from '~/app/api/util
 import { Result } from '~/features/helpers.fn'
 import { openAI } from '~/lib/openAi'
 import { supabase, type DatabaseCorrected } from '~/lib/supabase'
+
+import { isFeatureEnabled } from 'common/enabled-features'
 import { GuideModel } from '../guide/guideModel'
 import {
  DB_METADATA_TAG_PLATFORM_CLI,
@@ -13,6 +15,9 @@ import { ReferenceSDKFunctionModel, SDKLanguageValues } from '../reference/refer
 import { TroubleshootingModel } from '../troubleshooting/troubleshootingModel'
 import { SearchResultInterface } from './globalSearchInterface'

+type SearchFunction = 'search_content' | 'search_content_nimbus'
+type SearchHybridFunction = 'search_content_hybrid' | 'search_content_hybrid_nimbus'
+
 export abstract class SearchResultModel {
  static async search(
    args: RootQueryTypeSearchDocsArgs,
@@ -22,9 +27,14 @@ export abstract class SearchResultModel {
    const includeFullContent = requestedFields.includes('content')
    const embeddingResult = await openAI().createContentEmbedding(query)

+    const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
+    const searchFunction: SearchFunction = useAltSearchIndex
+      ? 'search_content_nimbus'
+      : 'search_content'
+
    return embeddingResult.flatMapAsync(async ({ embedding }) => {
      const matchResult = new Result(
-        await supabase().rpc('search_content', {
+        await supabase().rpc(searchFunction, {
          embedding,
          include_full_content: includeFullContent,
          max_result: args.limit ?? undefined,
@@ -49,9 +59,14 @@ export abstract class SearchResultModel {
    const includeFullContent = requestedFields.includes('content')
    const embeddingResult = await openAI().createContentEmbedding(query)

+    const useAltSearchIndex = !isFeatureEnabled('search:fullIndex')
+    const searchFunction: SearchHybridFunction = useAltSearchIndex
+      ? 'search_content_hybrid_nimbus'
+      : 'search_content_hybrid'
+
    return embeddingResult.flatMapAsync(async ({ embedding }) => {
      const matchResult = new Result(
-        await supabase().rpc('search_content_hybrid', {
+        await supabase().rpc(searchFunction, {
          query_text: query,
          query_embedding: embedding,
          include_full_content: includeFullContent,
--- a/apps/docs/scripts/search/embeddings/utils.ts
+++ b/apps/docs/scripts/search/embeddings/utils.ts
@@ -0,0 +1,106 @@
+export interface PageInfo {
+  pageId: number
+  path: string
+  checksum: string
+  sectionsCount: number
+}
+
+export interface PageSectionForEmbedding {
+  pageId: number
+  path: string
+  slug?: string
+  heading?: string
+  content: string
+  input: string
+  ragIgnore: boolean
+}
+
+export interface PageSectionWithEmbedding extends PageSectionForEmbedding {
+  embedding: number[]
+}
+
+export interface ProcessingResult {
+  successfulPages: Set<number>
+  failedPages: Set<number>
+  totalSectionsProcessed: number
+  totalSectionsInserted: number
+}
+
+export function createBatches<T>(array: T[], batchSize: number): T[][] {
+  const batches: T[][] = []
+  for (let i = 0; i < array.length; i += batchSize) {
+    batches.push(array.slice(i, i + batchSize))
+  }
+  return batches
+}
+
+export function mapEmbeddingsToSections(
+  batch: PageSectionForEmbedding[],
+  data: Array<{ embedding?: number[] }>,
+  batchNumber: number
+): {
+  sectionsWithEmbeddings: PageSectionWithEmbedding[]
+  failedSectionIndexes: Set<number>
+} {
+  const sectionsWithEmbeddings: PageSectionWithEmbedding[] = []
+  const failedSectionIndexes: Set<number> = new Set()
+
+  if (batch.length !== data.length) {
+    console.error(
+      `Ignoring all embeddings returned from batch ${batchNumber} because returned number doesn't match input number`
+    )
+    batch.forEach((_, index) => {
+      failedSectionIndexes.add(index)
+    })
+  }
+
+  for (let i = 0; i < batch.length; i++) {
+    if (data[i].embedding) {
+      sectionsWithEmbeddings.push({ ...batch[i], embedding: data[i].embedding! })
+    } else {
+      failedSectionIndexes.add(i)
+    }
+  }
+
+  return { sectionsWithEmbeddings, failedSectionIndexes }
+}
+
+export function updatePageInsertionCounts(
+  pageSectionsInserted: Map<number, number>,
+  sectionsWithEmbeddings: PageSectionWithEmbedding[]
+) {
+  sectionsWithEmbeddings.forEach((section) => {
+    const current = pageSectionsInserted.get(section.pageId) || 0
+    pageSectionsInserted.set(section.pageId, current + 1)
+  })
+}
+
+export function computePageResults(
+  pageInfoMap: Map<number, PageInfo>,
+  pageSectionsInserted: Map<number, number>,
+  result: ProcessingResult
+) {
+  for (const [pageId, pageInfo] of pageInfoMap) {
+    const insertedCount = pageSectionsInserted.get(pageId) || 0
+    if (insertedCount === pageInfo.sectionsCount && !result.failedPages.has(pageId)) {
+      result.successfulPages.add(pageId)
+    } else {
+      result.failedPages.add(pageId)
+      console.warn(
+        `Page ${pageInfo.path}: inserted ${insertedCount}/${pageInfo.sectionsCount} sections`
+      )
+    }
+  }
+}
+
+export function logFailedSections(
+  batch: PageSectionForEmbedding[],
+  inputs: string[],
+  failedSectionIndexes: Set<number>
+) {
+  failedSectionIndexes.forEach((i) => {
+    console.error(
+      `Failed to process section: ${batch[i].path}#${batch[i].slug} (content: "${inputs[i]?.slice(0, 50)}...")`
+    )
+  })
+}
--- a/apps/docs/scripts/search/generate-embeddings.ts
+++ b/apps/docs/scripts/search/generate-embeddings.ts
@@ -1,24 +1,453 @@
 import '../utils/dotenv.js'

-import { createClient } from '@supabase/supabase-js'
+import { createClient, type SupabaseClient } from '@supabase/supabase-js'
 import { parseArgs } from 'node:util'
 import { OpenAI } from 'openai'
 import { v4 as uuidv4 } from 'uuid'
+
 import type { Section } from '../helpers.mdx.js'
+import {
+  type PageInfo,
+  type PageSectionForEmbedding,
+  type PageSectionWithEmbedding,
+  type ProcessingResult,
+  createBatches,
+  mapEmbeddingsToSections,
+  updatePageInsertionCounts,
+  computePageResults,
+  logFailedSections,
+} from './embeddings/utils.js'
 import { fetchAllSources } from './sources/index.js'

+const CONFIG = {
+  // OpenAI settings
+  EMBEDDING_MODEL: 'text-embedding-ada-002' as const,
+  EMBEDDING_DIMENSION: 1536, // Keep in sync with EMBEDDING_MODEL
+  OPENAI_BATCH_SIZE: 128,
+  OPENAI_MAX_RETRIES: 3,
+  OPENAI_BASE_DELAY_MS: 500,
+  /**
+   * If context length is exceeded, truncate inputs over this character length
+   * and retry. This is a character-based heuristic, not token-exact.
+   */
+  EMBEDDING_TRUNCATE_CHAR_LIMIT: 16_000,
+
+  // Supabase settings
+  SUPABASE_MAX_RETRIES: 2,
+  SUPABASE_BASE_DELAY_MS: 100,
+
+  // Processing settings
+  SOURCE_CONCURRENCY: 10,
+} as const
+
+function delay(ms: number): Promise<void> {
+  return new Promise((resolve) => setTimeout(resolve, ms))
+}
+
+function exponentialBackoff(attempt: number, baseDelay: number, maxDelay: number = 30_000): number {
+  const exponentialDelay = baseDelay * Math.pow(2, attempt)
+  const jitter = (Math.random() - 0.5) * 0.1 * exponentialDelay
+  return Math.min(Math.max(0, exponentialDelay + jitter), maxDelay)
+}
+
+async function withRetry<T>(
+  operation: () => Promise<T>,
+  maxRetries: number,
+  baseDelay: number,
+  operationName: string,
+  shouldRetryOnError: (error: unknown) => boolean = () => true
+): Promise<T> {
+  let lastError: Error
+
+  for (let attempt = 0; attempt <= maxRetries; attempt++) {
+    try {
+      return await operation()
+    } catch (error) {
+      lastError = error as Error
+
+      // Allow caller to prevent redundant retries for specific errors
+      if (!shouldRetryOnError?.(error)) {
+        console.warn(`${operationName} encountered non-retryable error:`, lastError.message)
+        throw lastError
+      }
+
+      if (attempt === maxRetries) {
+        console.error(`${operationName} failed after ${maxRetries + 1} attempts:`, lastError)
+        throw lastError
+      }
+
+      const delayMs = exponentialBackoff(attempt, baseDelay)
+      console.warn(
+        `${operationName} attempt ${attempt + 1} failed, retrying in ${delayMs}ms:`,
+        lastError.message
+      )
+      await delay(delayMs)
+    }
+  }
+
+  throw lastError!
+}
+
+function isNimbusMode(): boolean {
+  return process.env.ENABLED_FEATURES_OVERRIDE_DISABLE_ALL === 'true'
+}
+
+function getPageTables() {
+  const nimbus = isNimbusMode()
+  return {
+    pageTable: nimbus ? 'page_nimbus' : 'page',
+    pageSectionTable: nimbus ? 'page_section_nimbus' : 'page_section',
+  } as const
+}
+
+function requireEnvOrThrow(names: string[]): void {
+  const missing = names.filter((n) => !process.env[n])
+  if (missing.length) {
+    throw new Error(
+      `Environment variables ${missing.join(', ')} are required: skipping embeddings generation`
+    )
+  }
+}
+
+function initSupabase(): SupabaseClient {
+  return createClient(process.env.NEXT_PUBLIC_SUPABASE_URL!, process.env.SUPABASE_SECRET_KEY!, {
+    auth: { persistSession: false, autoRefreshToken: false },
+  })
+}
+
+type PreparedSections = {
+  allSectionsToProcess: PageSectionForEmbedding[]
+  pageInfoMap: Map<number, PageInfo>
+}
+
+async function prepareSections(
+  supabaseClient: SupabaseClient,
+  pageTable: string,
+  pageSectionTable: string,
+  shouldRefresh: boolean,
+  refreshVersion: string,
+  refreshDate: Date,
+  fullIndex = true,
+  debug = false
+): Promise<PreparedSections> {
+  const embeddingSources = await fetchAllSources(fullIndex)
+  console.log(`Discovered ${embeddingSources.length} sources`)
+
+  const allSectionsToProcess: PageSectionForEmbedding[] = []
+  const pageInfoMap = new Map<number, PageInfo>()
+
+  for (const sourceBatch of createBatches(embeddingSources, CONFIG.SOURCE_CONCURRENCY)) {
+    await Promise.all(
+      sourceBatch.map(async (embeddingSource) => {
+        const { type, source, path } = embeddingSource
+
+        try {
+          const {
+            checksum,
+            sections,
+            meta = {},
+            ragIgnore = false,
+          }: {
+            checksum: string
+            sections: Section[]
+            ragIgnore?: boolean
+            meta?: Record<string, unknown>
+          } = await embeddingSource.process()
+
+          const { error: fetchPageError, data: existingPage } = await supabaseClient
+            .from(pageTable)
+            .select('id, path, checksum')
+            .filter('path', 'eq', path)
+            .limit(1)
+            .maybeSingle()
+
+          if (fetchPageError) throw fetchPageError
+
+          if (!shouldRefresh && existingPage?.checksum === checksum) {
+            const { error: updatePageError } = await supabaseClient
+              .from(pageTable)
+              .update({
+                type,
+                source,
+                meta,
+                version: refreshVersion,
+                last_refresh: refreshDate,
+              })
+              .filter('id', 'eq', existingPage.id)
+            if (updatePageError) throw updatePageError
+            return
+          }
+
+          if (existingPage && debug) {
+            console.log(
+              !shouldRefresh
+                ? `[${path}] Docs have changed, removing old page sections and their embeddings`
+                : `[${path}] Refresh flag set, removing old page sections and their embeddings`
+            )
+
+            const { error: deletePageSectionError } = await supabaseClient
+              .from(pageSectionTable)
+              .delete()
+              .filter('page_id', 'eq', existingPage.id)
+            if (deletePageSectionError) throw deletePageSectionError
+          }
+
+          const { error: upsertPageError, data: page } = await supabaseClient
+            .from(pageTable)
+            .upsert(
+              {
+                checksum: null,
+                path,
+                type,
+                source,
+                meta,
+                content: embeddingSource.extractIndexedContent(),
+                version: refreshVersion,
+                last_refresh: refreshDate,
+              },
+              { onConflict: 'path' }
+            )
+            .select()
+            .limit(1)
+            .single()
+          if (upsertPageError) throw upsertPageError
+
+          if (debug) {
+            console.log(`[${path}] Preparing ${sections.length} page sections for processing`)
+          }
+
+          pageInfoMap.set(page.id, {
+            pageId: page.id,
+            path,
+            checksum,
+            sectionsCount: sections.length,
+          })
+
+          const sectionsForBatching = sections.map(({ slug, heading, content }) => ({
+            pageId: page.id,
+            path,
+            slug,
+            heading,
+            content,
+            input: content.replace(/\n/g, ' '),
+            ragIgnore,
+          }))
+          allSectionsToProcess.push(...sectionsForBatching)
+        } catch (err) {
+          console.error(`Error preparing path '${path}' for processing.`)
+          console.error(err)
+        }
+      })
+    )
+  }
+
+  console.log(
+    `Prepared ${allSectionsToProcess.length} sections for processing from ${pageInfoMap.size} pages`
+  )
+  return { allSectionsToProcess, pageInfoMap }
+}
+
+async function processAndInsertEmbeddings(
+  openai: OpenAI,
+  supabaseClient: SupabaseClient,
+  pageSectionTable: string,
+  allSections: PageSectionForEmbedding[],
+  pageInfoMap: Map<number, PageInfo>
+): Promise<ProcessingResult> {
+  if (allSections.length === 0) {
+    return {
+      successfulPages: new Set(),
+      failedPages: new Set(),
+      totalSectionsProcessed: 0,
+      totalSectionsInserted: 0,
+    }
+  }
+
+  console.log(`Processing ${allSections.length} sections with embeddings + insertion`)
+
+  const embeddingBatches = createBatches(allSections, CONFIG.OPENAI_BATCH_SIZE)
+  const result: ProcessingResult = {
+    successfulPages: new Set(),
+    failedPages: new Set(),
+    totalSectionsProcessed: 0,
+    totalSectionsInserted: 0,
+  }
+
+  // Track sections inserted per page
+  const pageSectionsInserted = new Map<number, number>()
+
+  for (let batchIndex = 0; batchIndex < embeddingBatches.length; batchIndex++) {
+    const batch = embeddingBatches[batchIndex]
+    try {
+      const batchResult = await processEmbeddingBatch(
+        openai,
+        batch,
+        batchIndex,
+        embeddingBatches.length
+      )
+
+      result.totalSectionsProcessed += batchResult.processedCount
+
+      if (batchResult.sectionsWithEmbeddings.length > 0) {
+        const insertedCount = await insertSectionBatch(
+          supabaseClient,
+          pageSectionTable,
+          batchResult.sectionsWithEmbeddings
+        )
+        result.totalSectionsInserted += insertedCount
+        updatePageInsertionCounts(pageSectionsInserted, batchResult.sectionsWithEmbeddings)
+      }
+
+      // Mark failed section pages
+      batchResult.failedSectionIndexes.forEach((i) => {
+        result.failedPages.add(batch[i].pageId)
+      })
+    } catch (error) {
+      console.error(`Batch ${batchIndex + 1} completely failed:`, error)
+      batch.forEach((section) => result.failedPages.add(section.pageId))
+    }
+
+    if (batchIndex < embeddingBatches.length - 1) {
+      await delay(CONFIG.OPENAI_BASE_DELAY_MS)
+    }
+  }
+
+  computePageResults(pageInfoMap, pageSectionsInserted, result)
+
+  return result
+}
+
+type BatchEmbeddingResult = {
+  sectionsWithEmbeddings: PageSectionWithEmbedding[]
+  failedSectionIndexes: Set<number>
+  processedCount: number
+}
+
+async function processEmbeddingBatch(
+  openai: OpenAI,
+  batch: PageSectionForEmbedding[],
+  batchIndex: number,
+  totalBatches: number
+): Promise<BatchEmbeddingResult> {
+  const inputs = batch.map((section) => section.input)
+  console.log(
+    `Processing embedding batch ${batchIndex + 1}/${totalBatches} (${inputs.length} sections)`
+  )
+
+  // Helper to identify context length exceeded errors from OpenAI
+  const isContextLengthError = (err: unknown) => {
+    if (!(err instanceof OpenAI.APIError)) return false
+
+    const message = err.error?.message as string
+    const status = err.status
+    return status === 400 && message.toLowerCase().includes('context')
+  }
+
+  let embeddingResponse: OpenAI.Embeddings.CreateEmbeddingResponse
+  try {
+    embeddingResponse = await withRetry(
+      () =>
+        openai.embeddings.create({
+          model: CONFIG.EMBEDDING_MODEL,
+          input: inputs,
+        }),
+      CONFIG.OPENAI_MAX_RETRIES,
+      CONFIG.OPENAI_BASE_DELAY_MS,
+      `OpenAI embedding batch ${batchIndex + 1}`,
+      (err) => !isContextLengthError(err)
+    )
+  } catch (err) {
+    if (!isContextLengthError(err)) {
+      throw err
+    }
+
+    // Context length exceeded: truncate problematic sections and try once more
+    const limit = CONFIG.EMBEDDING_TRUNCATE_CHAR_LIMIT
+    const truncatedInputs = inputs.map((s) => (s.length > limit ? s.slice(0, limit) : s))
+    const truncatedCount = truncatedInputs.filter((s, i) => s !== inputs[i]).length
+    console.warn(
+      `OpenAI embedding batch ${batchIndex + 1}: context length exceeded. ` +
+        `Truncating ${truncatedCount} overly long section(s) to ${limit} chars and retrying once.`
+    )
+
+    embeddingResponse = await openai.embeddings.create({
+      model: CONFIG.EMBEDDING_MODEL,
+      input: truncatedInputs,
+    })
+
+    // Replace inputs with truncated inputs for downstream bookkeeping
+    for (let i = 0; i < inputs.length; i++) inputs[i] = truncatedInputs[i]
+  }
+
+  const { sectionsWithEmbeddings, failedSectionIndexes } = mapEmbeddingsToSections(
+    batch,
+    embeddingResponse.data,
+    batchIndex
+  )
+  logFailedSections(batch, inputs, failedSectionIndexes)
+
+  return {
+    sectionsWithEmbeddings,
+    failedSectionIndexes,
+    processedCount: inputs.length,
+  }
+}
+
+async function insertSectionBatch(
+  supabaseClient: SupabaseClient,
+  pageSectionTable: string,
+  sectionsWithEmbeddings: PageSectionWithEmbedding[]
+): Promise<number> {
+  if (sectionsWithEmbeddings.length === 0) {
+    return 0
+  }
+
+  const pageSectionsToInsert = sectionsWithEmbeddings.map((section) => ({
+    page_id: section.pageId,
+    slug: section.slug,
+    heading: section.heading,
+    content: section.content,
+    embedding: section.embedding,
+    rag_ignore: section.ragIgnore,
+  }))
+
+  await withRetry(
+    async () => {
+      const { error } = await supabaseClient.from(pageSectionTable).insert(pageSectionsToInsert)
+
+      if (error) {
+        throw new Error(`Supabase insert error: ${error.message}`)
+      }
+    },
+    CONFIG.SUPABASE_MAX_RETRIES,
+    CONFIG.SUPABASE_BASE_DELAY_MS,
+    `Insert batch of ${sectionsWithEmbeddings.length} sections`
+  )
+
+  return sectionsWithEmbeddings.length
+}
+
 const args = parseArgs({
  options: {
    refresh: {
      type: 'boolean',
    },
+    debug: {
+      type: 'boolean',
+    },
  },
 })

 async function generateEmbeddings() {
  const shouldRefresh = Boolean(args.values.refresh)
+  const debug = Boolean(args.values.debug)

-  const requiredEnvVars = [
+  const nimbus = isNimbusMode()
+  if (nimbus) {
+    console.log('Running in Nimbus mode - will filter content based on disabled feature flags')
+  }
+
+  requireEnvOrThrow([
    'DOCS_GITHUB_APP_ID',
    'DOCS_GITHUB_APP_INSTALLATION_ID',
    'DOCS_GITHUB_APP_PRIVATE_KEY',
@@ -27,217 +456,136 @@ async function generateEmbeddings() {
    'NEXT_PUBLIC_SUPABASE_URL',
    'OPENAI_API_KEY',
    'SUPABASE_SECRET_KEY',
-  ]
+  ])

-  const missingEnvVars = requiredEnvVars.filter((name) => !process.env[name])
-  if (missingEnvVars.length > 0) {
-    throw new Error(
-      `Environment variables ${missingEnvVars.join(
-        ', '
-      )} are required: skipping embeddings generation`
-    )
-  }
+  const supabaseClient = initSupabase()

-  const supabaseClient = createClient(
-    process.env.NEXT_PUBLIC_SUPABASE_URL!,
-    process.env.SUPABASE_SECRET_KEY!,
-    {
-      auth: {
-        persistSession: false,
-        autoRefreshToken: false,
-      },
-    }
-  )
-
-  // Use this version to track which pages to purge
-  // after the refresh
  const refreshVersion = uuidv4()
-
  const refreshDate = new Date()

-  const embeddingSources = await fetchAllSources()
+  const { pageTable, pageSectionTable } = getPageTables()
+  const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })

-  console.log(`Discovered ${embeddingSources.length} pages`)
+  console.log(
+    shouldRefresh
+      ? 'Refresh flag set, re-generating all pages'
+      : 'Checking which pages are new or have changed'
+  )

-  if (!shouldRefresh) {
-    console.log('Checking which pages are new or have changed')
-  } else {
-    console.log('Refresh flag set, re-generating all pages')
+  const { allSectionsToProcess, pageInfoMap } = await prepareSections(
+    supabaseClient,
+    pageTable,
+    pageSectionTable,
+    shouldRefresh,
+    refreshVersion,
+    refreshDate,
+    !nimbus,
+    debug
+  )
+
+  let processingResult: ProcessingResult
+  try {
+    processingResult = await processAndInsertEmbeddings(
+      openai,
+      supabaseClient,
+      pageSectionTable,
+      allSectionsToProcess,
+      pageInfoMap
+    )
+    console.log(
+      `Processing complete: ${processingResult.totalSectionsInserted}/${processingResult.totalSectionsProcessed} sections inserted successfully`
+    )
+    console.log(
+      `Page summary: ${processingResult.successfulPages.size} successful, ${processingResult.failedPages.size} failed`
+    )
+  } catch (error) {
+    console.error('Critical error during embedding processing:', error)
+    console.log('Exiting due to complete processing failure')
+    return
  }

-  for (const embeddingSource of embeddingSources) {
-    const { type, source, path } = embeddingSource
+  console.log(`\nUpdating checksums for ${processingResult.successfulPages.size} successful pages`)
+  const successfulChecksumUpdates = await updateSuccessfulChecksums(
+    supabaseClient,
+    pageTable,
+    pageInfoMap,
+    processingResult
+  )
+  console.log(
+    `Successfully updated checksums for ${successfulChecksumUpdates}/${processingResult.successfulPages.size} successful pages`
+  )

-    try {
-      const {
-        checksum,
-        sections,
-        meta = {},
-        ragIgnore = false,
-      }: {
-        checksum: string
-        sections: Section[]
-        ragIgnore?: boolean
-        meta?: Record<string, unknown>
-      } = await embeddingSource.process()
+  logFailedPages(pageInfoMap, processingResult)

-      // Check for existing page in DB and compare checksums
-      const { error: fetchPageError, data: existingPage } = await supabaseClient
-        .from('page')
-        .select('id, path, checksum')
-        .filter('path', 'eq', path)
-        .limit(1)
-        .maybeSingle()
-
-      if (fetchPageError) {
-        throw fetchPageError
-      }
-
-      // We use checksum to determine if this page & its sections need to be regenerated
-      if (!shouldRefresh && existingPage?.checksum === checksum) {
-        // No content/embedding update required on this page
-        // Update other meta info
-        const { error: updatePageError } = await supabaseClient
-          .from('page')
-          .update({
-            type,
-            source,
-            meta,
-            version: refreshVersion,
-            last_refresh: refreshDate,
-          })
-          .filter('id', 'eq', existingPage.id)
-
-        if (updatePageError) {
-          throw updatePageError
-        }
-
-        continue
-      }
-
-      if (existingPage) {
-        if (!shouldRefresh) {
-          console.log(
-            `[${path}] Docs have changed, removing old page sections and their embeddings`
-          )
-        } else {
-          console.log(`[${path}] Refresh flag set, removing old page sections and their embeddings`)
-        }
-
-        const { error: deletePageSectionError } = await supabaseClient
-          .from('page_section')
-          .delete()
-          .filter('page_id', 'eq', existingPage.id)
-
-        if (deletePageSectionError) {
-          throw deletePageSectionError
-        }
-      }
-
-      // Create/update page record. Intentionally clear checksum until we
-      // have successfully generated all page sections.
-      const { error: upsertPageError, data: page } = await supabaseClient
-        .from('page')
-        .upsert(
-          {
-            checksum: null,
-            path,
-            type,
-            source,
-            meta,
-            content: embeddingSource.extractIndexedContent(),
-            version: refreshVersion,
-            last_refresh: refreshDate,
-          },
-          { onConflict: 'path' }
-        )
-        .select()
-        .limit(1)
-        .single()
-
-      if (upsertPageError) {
-        throw upsertPageError
-      }
-
-      console.log(`[${path}] Adding ${sections.length} page sections (with embeddings)`)
-      for (const { slug, heading, content } of sections) {
-        // OpenAI recommends replacing newlines with spaces for best results (specific to embeddings)
-        // force a redeploy
-        const input = content.replace(/\n/g, ' ')
-
-        try {
-          const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
-
-          const embeddingResponse = await openai.embeddings.create({
-            model: 'text-embedding-ada-002',
-            input,
-          })
-
-          const [responseData] = embeddingResponse.data
-
-          const { error: insertPageSectionError } = await supabaseClient
-            .from('page_section')
-            .insert({
-              page_id: page.id,
-              slug,
-              heading,
-              content,
-              token_count: embeddingResponse.usage.total_tokens,
-              embedding: responseData.embedding,
-              rag_ignore: ragIgnore,
-            })
-            .select()
-            .limit(1)
-            .single()
-
-          if (insertPageSectionError) {
-            throw insertPageSectionError
-          }
-        } catch (err) {
-          // TODO: decide how to better handle failed embeddings
-          console.error(
-            `Failed to generate embeddings for '${path}' page section starting with '${input.slice(
-              0,
-              40
-            )}...'`
-          )
-
-          throw err
-        }
-      }
-
-      // Set page checksum so that we know this page was stored successfully
-      const { error: updatePageError } = await supabaseClient
-        .from('page')
-        .update({ checksum })
-        .filter('id', 'eq', page.id)
-
-      if (updatePageError) {
-        throw updatePageError
-      }
-    } catch (err) {
-      console.error(
-        `Page '${path}' or one/multiple of its page sections failed to store properly. Page has been marked with null checksum to indicate that it needs to be re-generated.`
-      )
-      console.error(err)
-    }
-  }
-
-  console.log(`Removing old pages and their sections`)
-
-  // Delete pages that have been removed (and their sections via cascade)
-  const { error: deletePageError } = await supabaseClient
-    .from('page')
-    .delete()
-    .filter('version', 'neq', refreshVersion)
-
-  if (deletePageError) {
-    throw deletePageError
-  }
+  await purgeOldPages(supabaseClient, pageTable, refreshVersion)

  console.log('Embedding generation complete')
 }

+async function updateSuccessfulChecksums(
+  supabaseClient: SupabaseClient,
+  pageTable: string,
+  pageInfoMap: Map<number, PageInfo>,
+  processingResult: ProcessingResult
+): Promise<number> {
+  let successfulChecksumUpdates = 0
+  const pageIds = Array.from(processingResult.successfulPages)
+  const batches = createBatches(pageIds, CONFIG.SOURCE_CONCURRENCY)
+
+  for (const batch of batches) {
+    const results = await Promise.all(
+      batch.map(async (pageId) => {
+        const pageInfo = pageInfoMap.get(pageId)
+        if (!pageInfo) {
+          console.error(`Missing page info for pageId ${pageId}`)
+          return 0
+        }
+
+        try {
+          const { error: updatePageError } = await supabaseClient
+            .from(pageTable)
+            .update({ checksum: pageInfo.checksum })
+            .eq('id', pageId)
+          if (updatePageError) {
+            console.error(`Failed to update checksum for page ${pageInfo.path}:`, updatePageError)
+            return 0
+          }
+          return 1
+        } catch (error) {
+          console.error(`Error updating checksum for page ${pageInfo.path}:`, error)
+          return 0
+        }
+      })
+    )
+
+    successfulChecksumUpdates += results.reduce((sum, x) => sum + x, 0)
+  }
+
+  return successfulChecksumUpdates
+}
+
+function logFailedPages(pageInfoMap: Map<number, PageInfo>, processingResult: ProcessingResult) {
+  if (processingResult.failedPages.size === 0) return
+  console.log(`\nFailed pages:`)
+  for (const pageId of processingResult.failedPages) {
+    const pageInfo = pageInfoMap.get(pageId)
+    if (pageInfo) console.log(`  - ${pageInfo.path}`)
+  }
+}
+
+async function purgeOldPages(
+  supabaseClient: SupabaseClient,
+  pageTable: string,
+  refreshVersion: string
+) {
+  console.log(`Removing old pages and their sections`)
+  const { error: deletePageError } = await supabaseClient
+    .from(pageTable)
+    .delete()
+    .filter('version', 'neq', refreshVersion)
+  if (deletePageError) throw deletePageError
+}
+
 async function main() {
  await generateEmbeddings()
 }
--- a/apps/docs/scripts/search/sources/index.ts
+++ b/apps/docs/scripts/search/sources/index.ts
@@ -1,3 +1,4 @@
+import { type GuideModel } from '../../../resources/guide/guideModel.js'
 import { GuideModelLoader } from '../../../resources/guide/guideModelLoader.js'
 import {
  GitHubDiscussionLoader,
@@ -28,7 +29,7 @@ export type SearchSource =
 export async function fetchGuideSources() {
  const guides = (await GuideModelLoader.allFromFs()).unwrapLeft()

-  return guides.map((guide) => MarkdownLoader.fromGuideModel('guide', guide))
+  return guides.map((guide: GuideModel) => MarkdownLoader.fromGuideModel('guide', guide))
 }

 export async function fetchOpenApiReferenceSource() {
@@ -125,27 +126,29 @@ export async function fetchLintWarningsGuideSources() {
 /**
 * Fetches all the sources we want to index for search
 */
-export async function fetchAllSources() {
+export async function fetchAllSources(fullIndex: boolean) {
  const guideSources = fetchGuideSources()
  const lintWarningsGuideSources = fetchLintWarningsGuideSources()
  const openApiReferenceSource = fetchOpenApiReferenceSource()
  const jsLibReferenceSource = fetchJsLibReferenceSource()
-  const dartLibReferenceSource = fetchDartLibReferenceSource()
-  const pythonLibReferenceSource = fetchPythonLibReferenceSource()
-  const cSharpLibReferenceSource = fetchCSharpLibReferenceSource()
-  const swiftLibReferenceSource = fetchSwiftLibReferenceSource()
-  const ktLibReferenceSource = fetchKtLibReferenceSource()
-  const cliReferenceSource = fetchCliLibReferenceSource()
+  const dartLibReferenceSource = fullIndex ? fetchDartLibReferenceSource() : []
+  const pythonLibReferenceSource = fullIndex ? fetchPythonLibReferenceSource() : []
+  const cSharpLibReferenceSource = fullIndex ? fetchCSharpLibReferenceSource() : []
+  const swiftLibReferenceSource = fullIndex ? fetchSwiftLibReferenceSource() : []
+  const ktLibReferenceSource = fullIndex ? fetchKtLibReferenceSource() : []
+  const cliReferenceSource = fullIndex ? fetchCliLibReferenceSource() : []

-  const partnerIntegrationSources = fetchPartners()
-    .then((partners) =>
-      partners
-        ? Promise.all(
-            partners.map((partner) => new IntegrationLoader(partner.slug, partner).load())
-          )
-        : []
-    )
-    .then((data) => data.flat())
+  const partnerIntegrationSources = fullIndex
+    ? fetchPartners()
+        .then((partners) =>
+          partners
+            ? Promise.all(
+                partners.map((partner) => new IntegrationLoader(partner.slug, partner).load())
+              )
+            : []
+        )
+        .then((data) => data.flat())
+    : []

  const githubDiscussionSources = fetchDiscussions(
    'supabase',
--- a/apps/docs/spec/common-cli-sections.json
+++ b/apps/docs/spec/common-cli-sections.json
@@ -820,7 +820,7 @@
      {
        "id": "supabase-postgres-config",
        "title": "Manage Postgres configurations",
-        "slug": "supabase-ssl-enforcement",
+        "slug": "supabase-postgres-config",
        "type": "cli-command"
      },
      {
--- a/apps/docs/turbo.json
+++ b/apps/docs/turbo.json
@@ -46,6 +46,7 @@
        "DOCS_GITHUB_APP_PRIVATE_KEY",
        "DOCS_REVALIDATION_KEYS",
        "DOCS_REVALIDATION_OVERRIDE_KEYS",
+        "ENABLED_FEATURES_OVERRIDE_DISABLE_ALL",
        "GITHUB_ACTIONS",
        "FORCE_ASSET_CDN",
        "LOGFLARE_INGESTION_API_KEY",
--- a/packages/ai-commands/src/docs.ts
+++ b/packages/ai-commands/src/docs.ts
@@ -13,10 +13,13 @@ interface PageSection {
  rag_ignore?: boolean
 }

+type MatchPageSectionsFunction = 'match_page_sections_v2' | 'match_page_sections_v2_nimbus'
+
 export async function clippy(
  openai: OpenAI,
  supabaseClient: SupabaseClient<any, 'public', any>,
-  messages: Message[]
+  messages: Message[],
+  options?: { useAltSearchIndex?: boolean }
 ) {
  // TODO: better sanitization
  const contextMessages = messages.map(({ role, content }) => {
@@ -63,14 +66,19 @@ export async function clippy(

  const [{ embedding }] = embeddingResponse.data

+  const searchFunction = options?.useAltSearchIndex
+    ? 'match_page_sections_v2_nimbus'
+    : 'match_page_sections_v2'
+  const joinedTable = options?.useAltSearchIndex ? 'page_nimbus' : 'page'
+
  const { error: matchError, data: pageSections } = (await supabaseClient
-    .rpc('match_page_sections_v2', {
+    .rpc(searchFunction, {
      embedding,
      match_threshold: 0.78,
      min_content_length: 50,
    })
    .neq('rag_ignore', true)
-    .select('content,page!inner(path),rag_ignore')
+    .select(`content,${joinedTable}!inner(path),rag_ignore`)
    .limit(10)) as { error: any; data: PageSection[] | null }

  if (matchError || !pageSections) {
--- a/packages/common/database-types.ts
+++ b/packages/common/database-types.ts
@@ -294,6 +294,48 @@ export type Database = {
        }
        Relationships: []
      }
+      page_nimbus: {
+        Row: {
+          checksum: string | null
+          content: string | null
+          fts_tokens: unknown | null
+          id: number
+          last_refresh: string | null
+          meta: Json | null
+          path: string
+          source: string | null
+          title_tokens: unknown | null
+          type: string | null
+          version: string | null
+        }
+        Insert: {
+          checksum?: string | null
+          content?: string | null
+          fts_tokens?: unknown | null
+          id?: never
+          last_refresh?: string | null
+          meta?: Json | null
+          path: string
+          source?: string | null
+          title_tokens?: unknown | null
+          type?: string | null
+          version?: string | null
+        }
+        Update: {
+          checksum?: string | null
+          content?: string | null
+          fts_tokens?: unknown | null
+          id?: never
+          last_refresh?: string | null
+          meta?: Json | null
+          path?: string
+          source?: string | null
+          title_tokens?: unknown | null
+          type?: string | null
+          version?: string | null
+        }
+        Relationships: []
+      }
      page_section: {
        Row: {
          content: string | null
@@ -335,6 +377,47 @@ export type Database = {
          },
        ]
      }
+      page_section_nimbus: {
+        Row: {
+          content: string | null
+          embedding: string | null
+          heading: string | null
+          id: number
+          page_id: number
+          rag_ignore: boolean | null
+          slug: string | null
+          token_count: number | null
+        }
+        Insert: {
+          content?: string | null
+          embedding?: string | null
+          heading?: string | null
+          id?: never
+          page_id: number
+          rag_ignore?: boolean | null
+          slug?: string | null
+          token_count?: number | null
+        }
+        Update: {
+          content?: string | null
+          embedding?: string | null
+          heading?: string | null
+          id?: never
+          page_id?: number
+          rag_ignore?: boolean | null
+          slug?: string | null
+          token_count?: number | null
+        }
+        Relationships: [
+          {
+            foreignKeyName: 'page_section_nimbus_page_id_fkey'
+            columns: ['page_id']
+            isOneToOne: false
+            referencedRelation: 'page_nimbus'
+            referencedColumns: ['id']
+          },
+        ]
+      }
      tickets: {
        Row: {
          company: string | null
@@ -526,6 +609,22 @@ export type Database = {
          slugs: string[]
        }[]
      }
+      docs_search_embeddings_nimbus: {
+        Args: {
+          embedding: string
+          match_threshold: number
+        }
+        Returns: {
+          id: number
+          path: string
+          type: string
+          title: string
+          subtitle: string
+          description: string
+          headings: string[]
+          slugs: string[]
+        }[]
+      }
      docs_search_fts: {
        Args: {
          query: string
@@ -539,6 +638,19 @@ export type Database = {
          description: string
        }[]
      }
+      docs_search_fts_nimbus: {
+        Args: {
+          query: string
+        }
+        Returns: {
+          id: number
+          path: string
+          type: string
+          title: string
+          subtitle: string
+          description: string
+        }[]
+      }
      get_full_content_url: {
        Args: {
          type: string
@@ -621,6 +733,23 @@ export type Database = {
          token_count: number | null
        }[]
      }
+      match_embedding_nimbus: {
+        Args: {
+          embedding: string
+          match_threshold?: number
+          max_results?: number
+        }
+        Returns: {
+          content: string | null
+          embedding: string | null
+          heading: string | null
+          id: number
+          page_id: number
+          rag_ignore: boolean | null
+          slug: string | null
+          token_count: number | null
+        }[]
+      }
      match_page_sections_v2: {
        Args: {
          embedding: string
@@ -638,6 +767,23 @@ export type Database = {
          token_count: number | null
        }[]
      }
+      match_page_sections_v2_nimbus: {
+        Args: {
+          embedding: string
+          match_threshold: number
+          min_content_length: number
+        }
+        Returns: {
+          content: string | null
+          embedding: string | null
+          heading: string | null
+          id: number
+          page_id: number
+          rag_ignore: boolean | null
+          slug: string | null
+          token_count: number | null
+        }[]
+      }
      search_content: {
        Args: {
          embedding: string
@@ -676,6 +822,44 @@ export type Database = {
          subsections: Json[]
        }[]
      }
+      search_content_hybrid_nimbus: {
+        Args: {
+          query_text: string
+          query_embedding: string
+          max_result?: number
+          full_text_weight?: number
+          semantic_weight?: number
+          rrf_k?: number
+          match_threshold?: number
+          include_full_content?: boolean
+        }
+        Returns: {
+          id: number
+          page_title: string
+          type: string
+          href: string
+          content: string
+          metadata: Json
+          subsections: Json[]
+        }[]
+      }
+      search_content_nimbus: {
+        Args: {
+          embedding: string
+          include_full_content?: boolean
+          match_threshold?: number
+          max_result?: number
+        }
+        Returns: {
+          id: number
+          page_title: string
+          type: string
+          href: string
+          content: string
+          metadata: Json
+          subsections: Json[]
+        }[]
+      }
      update_last_changed_checksum: {
        Args: {
          new_parent_page: string
--- a/packages/common/enabled-features/enabled-features.json
+++ b/packages/common/enabled-features/enabled-features.json
@@ -76,5 +76,7 @@
  "sdk:dart": true,
  "sdk:kotlin": true,
  "sdk:python": true,
-  "sdk:swift": true
+  "sdk:swift": true,
+
+  "search:fullIndex": true
 }
--- a/packages/common/enabled-features/enabled-features.schema.json
+++ b/packages/common/enabled-features/enabled-features.schema.json
@@ -255,6 +255,11 @@
    "sdk:swift": {
      "type": "boolean",
      "description": "Enable the Swift SDK"
+    },
+
+    "search:fullIndex": {
+      "type": "boolean",
+      "description": "Enable the full search index. When true, uses the  full search; when false, uses the alternate search index."
    }
  },
  "required": [
@@ -314,7 +319,8 @@
    "sdk:dart",
    "sdk:kotlin",
    "sdk:python",
-    "sdk:swift"
+    "sdk:swift",
+    "search:fullIndex"
  ],
  "additionalProperties": false
 }
--- a/packages/common/enabled-features/index.ts
+++ b/packages/common/enabled-features/index.ts
@@ -40,6 +40,15 @@ function isFeatureEnabled<T extends Feature | Feature[]>(
  features: T,
  runtimeDisabledFeatures?: Feature[]
 ) {
+  // Override is used to produce a filtered version of the docs search index
+  // using the same sync setup as our normal search index
+  if (process.env.ENABLED_FEATURES_OVERRIDE_DISABLE_ALL === 'true') {
+    if (Array.isArray(features)) {
+      return Object.fromEntries(features.map((feature) => [featureToCamelCase(feature), false]))
+    }
+    return false
+  }
+
  const disabledFeatures = new Set([
    ...(runtimeDisabledFeatures ?? []),
    ...disabledFeaturesStaticArray,
--- a/packages/common/hooks/useDocsSearch.ts
+++ b/packages/common/hooks/useDocsSearch.ts
@@ -3,6 +3,8 @@
 import { compact, debounce, uniqBy } from 'lodash'
 import { useCallback, useMemo, useReducer, useRef } from 'react'

+import { isFeatureEnabled } from '../enabled-features'
+
 const NUMBER_SOURCES = 2

 const SUPABASE_URL = process.env.NEXT_PUBLIC_SUPABASE_URL
@@ -200,7 +202,10 @@ const useDocsSearch = () => {

    let sourcesLoaded = 0

-    fetch(`${SUPABASE_URL}/rest/v1/rpc/docs_search_fts`, {
+    const useAlternateSearchIndex = !isFeatureEnabled('search:fullIndex')
+
+    const searchEndpoint = useAlternateSearchIndex ? 'docs_search_fts_nimbus' : 'docs_search_fts'
+    fetch(`${SUPABASE_URL}/rest/v1/rpc/${searchEndpoint}`, {
      method: 'POST',
      headers: {
        'content-type': 'application/json',
@@ -244,7 +249,7 @@ const useDocsSearch = () => {

    fetch(`${SUPABASE_URL}${FUNCTIONS_URL}search-embeddings`, {
      method: 'POST',
-      body: JSON.stringify({ query }),
+      body: JSON.stringify({ query, useAlternateSearchIndex }),
    })
      .then((response) => response.json())
      .then((results) => {
--- a/supabase/functions/search-embeddings/index.ts
+++ b/supabase/functions/search-embeddings/index.ts
@@ -38,7 +38,7 @@ Deno.serve(async (req) => {
      throw new UserError('Missing request data')
    }

-    const { query } = requestData
+    const { query, useAlternateSearchIndex } = requestData

    if (!query) {
      throw new UserError('Missing query in request data')
@@ -76,7 +76,11 @@ Deno.serve(async (req) => {
    }

    const [{ embedding }] = embeddingResponse.data.data
-    const { error: matchError, data: pages } = await supabaseClient.rpc('docs_search_embeddings', {
+
+    const searchFunction = useAlternateSearchIndex
+      ? 'docs_search_embeddings_nimbus'
+      : 'docs_search_embeddings'
+    const { error: matchError, data: pages } = await supabaseClient.rpc(searchFunction, {
      embedding,
      match_threshold: 0.78,
    })
--- a/supabase/migrations/20250910155912_create_nimbus_search_tables.sql
+++ b/supabase/migrations/20250910155912_create_nimbus_search_tables.sql
@@ -0,0 +1,325 @@
+-- Create nimbus tables for feature-flag-filtered search
+-- These tables mirror the structure of page and page_section but contain only content
+-- that should be visible when all feature flags are disabled
+
+create table "public"."page_nimbus" (
+  id bigint primary key generated always as identity,
+  path text not null unique,
+  checksum text,
+  meta jsonb,
+  type text,
+  source text,
+  content text,
+  version uuid,
+  last_refresh timestamptz,
+  fts_tokens tsvector generated always as (to_tsvector('english', content)) stored,
+  title_tokens tsvector generated always as (to_tsvector('english', coalesce(meta ->> 'title', ''))) stored
+);
+
+alter table "public"."page_nimbus"
+enable row level security;
+
+create policy "anon can read page_nimbus"
+on public.page_nimbus
+for select
+to anon
+using (true);
+
+create policy "authenticated can read page_nimbus"
+on public.page_nimbus
+for select
+to authenticated
+using (true);
+
+create table "public"."page_section_nimbus" (
+  id bigint primary key generated always as identity,
+  page_id bigint not null references public.page_nimbus (id) on delete cascade,
+  content text,
+  token_count int,
+  embedding vector(1536),
+  slug text,
+  heading text,
+  rag_ignore boolean default false
+);
+
+alter table "public"."page_section_nimbus"
+enable row level security;
+
+create policy "anon can read page_section_nimbus"
+on public.page_section_nimbus
+for select
+to anon
+using (true);
+
+create policy "authenticated can read page_section_nimbus"
+on public.page_section_nimbus
+for select
+to authenticated
+using (true);
+
+-- Create indexes for nimbus tables (matching the regular tables)
+create index fts_search_index_content_nimbus
+on page_nimbus
+using gin(fts_tokens);
+
+create index fts_search_index_title_nimbus
+on page_nimbus
+using gin(title_tokens);
+
+-- Create search function for nimbus tables (FTS search)
+create or replace function docs_search_fts_nimbus(query text)
+returns table (
+	id bigint,
+	path text,
+	type text,
+	title text,
+	subtitle text,
+	description text
+)
+set search_path = ''
+language plpgsql
+as $$
+#variable_conflict use_variable
+begin
+	return query
+	select
+	  page_nimbus.id,
+	  page_nimbus.path,
+	  page_nimbus.type,
+	  page_nimbus.meta ->> 'title' as title,
+	  page_nimbus.meta ->> 'subtitle' as subtitle,
+	  page_nimbus.meta ->> 'description' as description
+	from public.page_nimbus
+	where title_tokens @@ websearch_to_tsquery(query) or fts_tokens @@ websearch_to_tsquery(query)
+	order by greatest(
+		-- Title is more important than body, so use 10 as the weighting factor
+		-- Cut off at max rank of 1
+		least(10 * ts_rank(title_tokens, websearch_to_tsquery(query)), 1),
+		ts_rank(fts_tokens, websearch_to_tsquery(query))
+	  ) desc
+	limit 10;
+end;
+$$;
+
+-- Create embedding matching function for nimbus tables
+create or replace function match_embedding_nimbus(
+  embedding vector(1536),
+  match_threshold float default 0.78,
+  max_results int default 30
+)
+returns setof public.page_section_nimbus
+set search_path = ''
+language plpgsql
+as $$
+#variable_conflict use_variable
+begin
+  return query
+  select *
+  from public.page_section_nimbus
+  where (page_section_nimbus.embedding operator(public.<#>) embedding) <= -match_threshold
+  order by page_section_nimbus.embedding operator(public.<#>) embedding
+  limit max_results;
+end;
+$$;
+
+-- Create hybrid search function for nimbus tables
+create or replace function search_content_hybrid_nimbus(
+  query_text text,
+  query_embedding vector(1536),
+  max_result int default 30,
+  full_text_weight float default 1,
+  semantic_weight float default 1,
+  rrf_k int default 50,
+  match_threshold float default 0.78,
+  include_full_content boolean default false
+)
+returns table (
+  id bigint,
+  page_title text,
+  type text,
+  href text,
+  content text,
+  metadata json,
+  subsections json[]
+)
+language sql
+set search_path = ''
+as $$
+with full_text as (
+  select
+    id,
+    row_number() over(order by greatest(
+      least(10 * ts_rank(title_tokens, websearch_to_tsquery(query_text)), 1),
+      ts_rank(fts_tokens, websearch_to_tsquery(query_text))
+    ) desc) as rank_ix
+  from public.page_nimbus
+  where title_tokens @@ websearch_to_tsquery(query_text) or fts_tokens @@ websearch_to_tsquery(query_text)
+  order by rank_ix
+  limit least(max_result, 30) * 2
+),
+semantic as (
+  select
+    page_id as id,
+    row_number() over () as rank_ix
+  from public.match_embedding_nimbus(query_embedding, match_threshold, max_result * 2)
+),
+rrf as (
+  select
+    coalesce(full_text.id, semantic.id) as id,
+    coalesce(1.0 / (rrf_k + full_text.rank_ix), 0.0) * full_text_weight +
+    coalesce(1.0 / (rrf_k + semantic.rank_ix), 0.0) * semantic_weight as rrf_score
+  from full_text
+  full outer join semantic on full_text.id = semantic.id
+)
+select
+  page_nimbus.id,
+  page_nimbus.meta ->> 'title' as page_title,
+  page_nimbus.type,
+  public.get_full_content_url(page_nimbus.type, page_nimbus.path, null) as href,
+  case when include_full_content then page_nimbus.content else null end as content,
+  page_nimbus.meta as metadata,
+  array_agg(json_build_object(
+    'title', page_section_nimbus.heading,
+    'href', public.get_full_content_url(page_nimbus.type, page_nimbus.path, page_section_nimbus.slug),
+    'content', page_section_nimbus.content
+  )) as subsections
+from rrf
+join public.page_nimbus on page_nimbus.id = rrf.id
+left join public.page_section_nimbus on page_section_nimbus.page_id = page_nimbus.id
+where rrf.rrf_score > 0
+group by page_nimbus.id
+order by max(rrf.rrf_score) desc
+limit max_result;
+$$;
+
+create or replace function match_page_sections_v2_nimbus(
+  embedding vector(1536),
+  match_threshold float,
+  min_content_length int
+)
+returns setof page_section_nimbus
+set search_path = ''
+language plpgsql
+as $$
+#variable_conflict use_variable
+begin
+  return query
+  select *
+  from public.page_section_nimbus
+
+  -- We only care about sections that have a useful amount of content
+  where length(page_section_nimbus.content) >= min_content_length
+
+  -- The dot product is negative because of a Postgres limitation, so we negate it
+  and (page_section_nimbus.embedding operator(public.<#>) embedding) * -1 > match_threshold
+
+  -- OpenAI embeddings are normalized to length 1, so
+  -- cosine similarity and dot product will produce the same results.
+  -- Using dot product which can be computed slightly faster.
+  --
+  -- For the different syntaxes, see https://github.com/pgvector/pgvector
+  order by page_section_nimbus.embedding operator(public.<#>) embedding;
+end;
+$$;
+
+create or replace function docs_search_embeddings_nimbus(
+  embedding vector(1536),
+  match_threshold float
+)
+returns table (
+  id bigint,
+  path text,
+  type text,
+  title text,
+  subtitle text,
+  description text,
+  headings text[],
+  slugs text[]
+)
+set search_path = ''
+language plpgsql
+as $$
+#variable_conflict use_variable
+begin
+  return query
+  with match as(
+	select *
+	from public.page_section_nimbus
+	-- The dot product is negative because of a Postgres limitation, so we negate it
+	where (page_section_nimbus.embedding operator(public.<#>) embedding) * -1 > match_threshold	
+	-- OpenAI embeddings are normalized to length 1, so
+	-- cosine similarity and dot product will produce the same results.
+	-- Using dot product which can be computed slightly faster.
+	--
+	-- For the different syntaxes, see https://github.com/pgvector/pgvector
+	order by page_section_nimbus.embedding operator(public.<#>) embedding
+	limit 10
+  )
+  select
+	page_nimbus.id,
+	page_nimbus.path,
+	page_nimbus.type,
+	page_nimbus.meta ->> 'title' as title,
+	page_nimbus.meta ->> 'subtitle' as title,
+	page_nimbus.meta ->> 'description' as description,
+	array_agg(match.heading) as headings,
+	array_agg(match.slug) as slugs
+  from public.page_nimbus
+  join match on match.page_id = page_nimbus.id
+  group by page_nimbus.id;
+end;
+$$;
+
+create or replace function search_content_nimbus(
+  embedding vector(1536),
+  include_full_content boolean default false,
+  match_threshold float default 0.78,
+  max_result int default 30
+)
+returns table (
+  id bigint,
+  page_title text,
+  type text,
+  href text,
+  content text,
+  metadata json,
+  subsections json[]
+)
+set search_path = ''
+language sql
+as $$
+  with matched_section as (
+    select
+      *,
+      row_number() over () as ranking
+    from public.match_embedding_nimbus(
+      embedding,
+      match_threshold,
+      max_result
+    )
+  )
+  select
+    page_nimbus.id,
+    meta ->> 'title' as page_title,
+    type,
+    public.get_full_content_url(type, path, null) as href,
+    case
+      when include_full_content
+        then page_nimbus.content
+      else
+        null
+    end as content,
+    meta as metadata,
+    array_agg(
+      json_build_object(
+        'title', heading,
+        'href', public.get_full_content_url(type, path, slug),
+        'content', matched_section.content
+      )
+    )
+  from matched_section
+  join public.page_nimbus on matched_section.page_id = page_nimbus.id
+  group by page_nimbus.id
+  order by min(ranking);
+$$;
+