Files
rag-manager/firecrawl.json
Travis Vasceannie 37d1e434af fix(docs): validate and correct Firecrawl v2 OpenAPI schema
Based on live API testing of v2 endpoints:

 Validated against actual responses:
- /v2/scrape - matches schema ✓
- /v2/map - corrected response structure (direct links array with url objects)
- /v2/crawl - added missing 'url' field to response
- /v2/batch/scrape - added 'url' and 'invalidURLs' fields
- /v2/search - corrected to data.web structure with creditsUsed at root

🔧 Schema corrections:
- MapResponse: Fixed to use direct 'links' array with {url: string} objects
- CrawlResponse: Added required 'url' field for status endpoint
- BatchScrapeResponse: Added 'url' and 'invalidURLs' fields
- SearchResponse: Fixed to use data.web structure + root creditsUsed
- ScrapeMetadata: Added optional title, favicon, language fields

🚮 Removed all v1 endpoints to focus only on v2 API
🏷️ Updated title to 'Firecrawl API v2' and version to '2.0.0'

All schemas now accurately reflect actual API behavior from container 5d8e66d87762
2025-09-19 00:45:43 +00:00

1256 lines
47 KiB
JSON

{
"openapi": "3.1.0",
"info": {
"title": "Firecrawl API v2",
"version": "2.0.0",
"description": "Comprehensive web scraping and crawling API v2 with support for single page scraping, batch processing, website crawling, and content extraction."
},
"servers": [
{
"url": "http://crawl.lab:30002",
"description": "Local Firecrawl instance"
}
],
"security": [
{
"BearerAuth": []
}
],
"tags": [
{
"name": "scraping",
"description": "Single page scraping operations"
},
{
"name": "crawling",
"description": "Website crawling operations"
},
{
"name": "batch",
"description": "Batch processing operations"
},
{
"name": "extraction",
"description": "Content extraction operations"
},
{
"name": "search",
"description": "Search and discovery operations"
},
{
"name": "jobs",
"description": "Job status and management"
},
{
"name": "team",
"description": "Team usage and billing"
},
{
"name": "websockets",
"description": "WebSocket endpoints for real-time updates"
}
],
"components": {
"securitySchemes": {
"BearerAuth": {
"type": "http",
"scheme": "bearer",
"bearerFormat": "JWT",
"description": "JWT token for authentication"
}
},
"schemas": {
"Error": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": false
},
"error": {
"type": "string",
"description": "Error message"
},
"code": {
"type": "string",
"description": "Error code"
}
},
"required": ["success", "error"]
},
"ScrapeRequest": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "URL to scrape"
},
"formats": {
"type": "array",
"items": {
"type": "string",
"enum": ["markdown", "html", "rawHtml", "screenshot", "links", "summary"]
},
"description": "Output formats",
"default": ["markdown"]
},
"onlyMainContent": {
"type": "boolean",
"description": "Extract only main content",
"default": true
},
"includeTags": {
"type": "array",
"items": {"type": "string"},
"description": "HTML tags to include"
},
"excludeTags": {
"type": "array",
"items": {"type": "string"},
"description": "HTML tags to exclude"
},
"waitFor": {
"type": "integer",
"description": "Wait time in milliseconds",
"minimum": 0
},
"timeout": {
"type": "integer",
"description": "Timeout in milliseconds",
"minimum": 0,
"maximum": 600000
}
},
"required": ["url"]
},
"ScrapeResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"data": {
"type": "object",
"properties": {
"markdown": {
"type": "string",
"description": "Scraped content in markdown format"
},
"html": {
"type": "string",
"description": "Scraped content in HTML format"
},
"rawHtml": {
"type": "string",
"description": "Raw HTML content"
},
"screenshot": {
"type": "string",
"description": "Base64 encoded screenshot"
},
"links": {
"type": "array",
"items": {"type": "string"},
"description": "Extracted links"
},
"metadata": {
"$ref": "#/components/schemas/ScrapeMetadata"
}
}
}
},
"required": ["success", "data"]
},
"ScrapeMetadata": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "Original URL"
},
"scrapeId": {
"type": "string",
"description": "Unique scrape identifier"
},
"sourceURL": {
"type": "string",
"format": "uri",
"description": "Source URL"
},
"statusCode": {
"type": "integer",
"description": "HTTP status code"
},
"contentType": {
"type": "string",
"description": "Content type"
},
"creditsUsed": {
"type": "integer",
"description": "Credits consumed"
},
"proxyUsed": {
"type": "string",
"description": "Proxy type used"
},
"title": {
"type": "string",
"description": "Page title"
},
"favicon": {
"type": "string",
"format": "uri",
"description": "Favicon URL"
},
"language": {
"type": "string",
"description": "Page language"
}
}
},
"CrawlRequest": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "Starting URL for crawling"
},
"maxDepth": {
"type": "integer",
"description": "Maximum crawl depth",
"minimum": 0,
"maximum": 10
},
"limit": {
"type": "integer",
"description": "Maximum number of pages to crawl",
"minimum": 1,
"maximum": 10000
},
"allowExternalLinks": {
"type": "boolean",
"description": "Allow crawling external domains",
"default": false
},
"includePaths": {
"type": "array",
"items": {"type": "string"},
"description": "Paths to include"
},
"excludePaths": {
"type": "array",
"items": {"type": "string"},
"description": "Paths to exclude"
},
"scrapeOptions": {
"$ref": "#/components/schemas/ScrapeRequest"
}
},
"required": ["url"]
},
"CrawlResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"id": {
"type": "string",
"description": "Crawl job ID"
},
"url": {
"type": "string",
"format": "uri",
"description": "URL to check job status"
}
},
"required": ["success", "id", "url"]
},
"BatchScrapeRequest": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string",
"format": "uri"
},
"description": "URLs to scrape",
"maxItems": 1000
},
"scrapeOptions": {
"$ref": "#/components/schemas/ScrapeRequest"
}
},
"required": ["urls"]
},
"BatchScrapeResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"id": {
"type": "string",
"description": "Batch scrape job ID"
},
"url": {
"type": "string",
"format": "uri",
"description": "URL to check job status"
},
"invalidURLs": {
"type": "array",
"items": {"type": "string"},
"description": "URLs that were invalid"
}
},
"required": ["success", "id", "url", "invalidURLs"]
},
"MapRequest": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "Website URL to map"
},
"search": {
"type": "string",
"description": "Filter URLs by search term"
},
"limit": {
"type": "integer",
"description": "Maximum number of URLs to return",
"minimum": 1,
"maximum": 10000,
"default": 100
},
"includeSubdomains": {
"type": "boolean",
"description": "Include subdomains in mapping",
"default": false
}
},
"required": ["url"]
},
"MapResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"links": {
"type": "array",
"items": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri"
}
},
"required": ["url"]
},
"description": "Discovered URLs"
}
},
"required": ["success", "links"]
},
"SearchRequest": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"limit": {
"type": "integer",
"description": "Maximum number of results",
"minimum": 1,
"maximum": 100,
"default": 10
},
"scrapeOptions": {
"$ref": "#/components/schemas/ScrapeRequest"
}
},
"required": ["query"]
},
"SearchResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"data": {
"type": "object",
"properties": {
"web": {
"type": "array",
"items": {
"type": "object",
"properties": {
"url": {
"type": "string",
"format": "uri"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"markdown": {
"type": "string"
}
}
}
}
},
"required": ["web"]
},
"creditsUsed": {
"type": "integer",
"description": "Credits consumed"
}
},
"required": ["success", "data", "creditsUsed"]
},
"ExtractRequest": {
"type": "object",
"properties": {
"urls": {
"type": "array",
"items": {
"type": "string",
"format": "uri"
},
"description": "URLs to extract from"
},
"prompt": {
"type": "string",
"description": "Extraction prompt"
},
"schema": {
"type": "object",
"description": "JSON schema for extraction"
}
},
"required": ["urls"]
},
"ExtractResponse": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"id": {
"type": "string",
"description": "Extract job ID"
}
},
"required": ["success", "id"]
},
"JobStatus": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"status": {
"type": "string",
"enum": ["waiting", "active", "completed", "failed", "cancelled", "paused"]
},
"total": {
"type": "integer",
"description": "Total number of pages/items"
},
"completed": {
"type": "integer",
"description": "Number of completed pages/items"
},
"creditsUsed": {
"type": "integer",
"description": "Credits consumed"
},
"expiresAt": {
"type": "string",
"format": "date-time",
"description": "Job expiration time"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"markdown": {
"type": "string"
},
"html": {
"type": "string"
},
"rawHtml": {
"type": "string"
},
"metadata": {
"$ref": "#/components/schemas/ScrapeMetadata"
}
}
},
"description": "Job results (when completed)"
}
},
"required": ["success", "status"]
},
"CreditUsage": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
},
"data": {
"type": "object",
"properties": {
"totalCreditsUsed": {
"type": "integer"
},
"remainingCredits": {
"type": "integer"
},
"totalCreditsPurchased": {
"type": "integer"
}
}
}
},
"required": ["success", "data"]
}
}
},
"paths": {
"/v2/scrape": {
"post": {
"tags": ["scraping"],
"summary": "Scrape a single URL",
"description": "Extract content from a single web page in various formats",
"operationId": "scrapeUrl",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScrapeRequest"
}
}
}
},
"responses": {
"200": {
"description": "Successful scrape",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ScrapeResponse"
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
},
"401": {
"description": "Unauthorized",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
},
"408": {
"description": "Request timeout",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
},
"500": {
"description": "Internal server error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/v2/scrape/{jobId}": {
"get": {
"tags": ["jobs"],
"summary": "Get scrape job status",
"description": "Retrieve the status and results of a scrape job",
"operationId": "getScrapeStatus",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Scrape job ID"
}
],
"responses": {
"200": {
"description": "Job status retrieved",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
},
"404": {
"description": "Job not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/v2/crawl": {
"post": {
"tags": ["crawling"],
"summary": "Start a crawl job",
"description": "Crawl a website starting from a given URL",
"operationId": "startCrawl",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CrawlRequest"
}
}
}
},
"responses": {
"202": {
"description": "Crawl job started",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CrawlResponse"
}
}
}
},
"400": {
"description": "Bad request",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
},
"401": {
"description": "Unauthorized",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/v2/crawl/{jobId}": {
"get": {
"tags": ["jobs"],
"summary": "Get crawl job status",
"description": "Retrieve the status and results of a crawl job",
"operationId": "getCrawlStatus",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Crawl job ID"
}
],
"responses": {
"200": {
"description": "Job status retrieved",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
},
"404": {
"description": "Job not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
},
"delete": {
"tags": ["jobs"],
"summary": "Cancel crawl job",
"description": "Cancel a running crawl job",
"operationId": "cancelCrawl",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Crawl job ID"
}
],
"responses": {
"200": {
"description": "Job cancelled",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
}
}
}
}
}
},
"404": {
"description": "Job not found",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Error"
}
}
}
}
}
}
},
"/v2/crawl/ongoing": {
"get": {
"tags": ["jobs"],
"summary": "Get ongoing crawls",
"description": "List all ongoing crawl jobs",
"operationId": "getOngoingCrawls",
"responses": {
"200": {
"description": "List of ongoing crawls",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
}
}
}
}
}
}
},
"/v2/crawl/active": {
"get": {
"tags": ["jobs"],
"summary": "Get active crawls",
"description": "List all active crawl jobs (same as ongoing)",
"operationId": "getActiveCrawls",
"responses": {
"200": {
"description": "List of active crawls",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "array",
"items": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
}
}
}
}
}
}
},
"/v2/batch/scrape": {
"post": {
"tags": ["batch"],
"summary": "Start batch scrape job",
"description": "Scrape multiple URLs in batch",
"operationId": "batchScrape",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchScrapeRequest"
}
}
}
},
"responses": {
"202": {
"description": "Batch scrape job started",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/BatchScrapeResponse"
}
}
}
}
}
}
},
"/v2/batch/scrape/{jobId}": {
"get": {
"tags": ["jobs"],
"summary": "Get batch scrape status",
"description": "Retrieve the status and results of a batch scrape job",
"operationId": "getBatchScrapeStatus",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Batch scrape job ID"
}
],
"responses": {
"200": {
"description": "Job status retrieved",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
}
}
},
"delete": {
"tags": ["jobs"],
"summary": "Cancel batch scrape job",
"description": "Cancel a running batch scrape job",
"operationId": "cancelBatchScrape",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Batch scrape job ID"
}
],
"responses": {
"200": {
"description": "Job cancelled",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean",
"const": true
}
}
}
}
}
}
}
}
},
"/v2/map": {
"post": {
"tags": ["search"],
"summary": "Map website URLs",
"description": "Discover and map all URLs on a website",
"operationId": "mapWebsite",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/MapRequest"
}
}
}
},
"responses": {
"200": {
"description": "Website mapped successfully",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/MapResponse"
}
}
}
}
}
}
},
"/v2/search": {
"post": {
"tags": ["search"],
"summary": "Search and scrape",
"description": "Search the web and scrape results",
"operationId": "search",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SearchRequest"
}
}
}
},
"responses": {
"200": {
"description": "Search results",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SearchResponse"
}
}
}
}
}
}
},
"/v2/extract": {
"post": {
"tags": ["extraction"],
"summary": "Extract structured data",
"description": "Extract structured data from web pages using AI",
"operationId": "extract",
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ExtractRequest"
}
}
}
},
"responses": {
"202": {
"description": "Extract job started",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ExtractResponse"
}
}
}
}
}
}
},
"/v2/extract/{jobId}": {
"get": {
"tags": ["jobs"],
"summary": "Get extract job status",
"description": "Retrieve the status and results of an extract job",
"operationId": "getExtractStatus",
"parameters": [
{
"name": "jobId",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "Extract job ID"
}
],
"responses": {
"200": {
"description": "Job status retrieved",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/JobStatus"
}
}
}
}
}
}
},
"/v2/team/credit-usage": {
"get": {
"tags": ["team"],
"summary": "Get credit usage",
"description": "Retrieve team credit usage statistics",
"operationId": "getCreditUsage",
"responses": {
"200": {
"description": "Credit usage data",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreditUsage"
}
}
}
}
}
}
},
"/v2/team/credit-usage/historical": {
"get": {
"tags": ["team"],
"summary": "Get historical credit usage",
"description": "Retrieve historical team credit usage statistics",
"operationId": "getHistoricalCreditUsage",
"responses": {
"200": {
"description": "Historical credit usage data",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreditUsage"
}
}
}
}
}
}
},
"/v2/team/token-usage": {
"get": {
"tags": ["team"],
"summary": "Get token usage",
"description": "Retrieve team token usage statistics",
"operationId": "getTokenUsage",
"responses": {
"200": {
"description": "Token usage data",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "object",
"properties": {
"totalTokensUsed": {
"type": "integer"
}
}
}
}
}
}
}
}
}
}
},
"/v2/team/token-usage/historical": {
"get": {
"tags": ["team"],
"summary": "Get historical token usage",
"description": "Retrieve historical team token usage statistics",
"operationId": "getHistoricalTokenUsage",
"responses": {
"200": {
"description": "Historical token usage data",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "object",
"properties": {
"totalTokensUsed": {
"type": "integer"
}
}
}
}
}
}
}
}
}
}
},
"/v2/concurrency-check": {
"get": {
"tags": ["jobs"],
"summary": "Check concurrency limits",
"description": "Check current concurrency usage and limits",
"operationId": "checkConcurrency",
"responses": {
"200": {
"description": "Concurrency status",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "object",
"properties": {
"current": {
"type": "integer",
"description": "Current concurrent jobs"
},
"limit": {
"type": "integer",
"description": "Maximum concurrent jobs allowed"
}
}
}
}
}
}
}
}
}
}
},
"/v2/team/queue-status": {
"get": {
"tags": ["team"],
"summary": "Get queue status",
"description": "Retrieve current queue status and statistics",
"operationId": "getQueueStatus",
"responses": {
"200": {
"description": "Queue status data",
"content": {
"application/json": {
"schema": {
"type": "object",
"properties": {
"success": {
"type": "boolean"
},
"data": {
"type": "object",
"properties": {
"waiting": {
"type": "integer"
},
"active": {
"type": "integer"
},
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
}
}
}
}
}
}
}
}
}
}
}
}
}