diff --git a/firecrawl.json b/firecrawl.json index d56b12a..8d8d63f 100644 --- a/firecrawl.json +++ b/firecrawl.json @@ -1,9 +1,9 @@ { "openapi": "3.1.0", "info": { - "title": "Firecrawl API", - "version": "1.0.0", - "description": "Comprehensive web scraping and crawling API with support for single page scraping, batch processing, website crawling, and content extraction." + "title": "Firecrawl API v2", + "version": "2.0.0", + "description": "Comprehensive web scraping and crawling API v2 with support for single page scraping, batch processing, website crawling, and content extraction." }, "servers": [ { @@ -18,60 +18,32 @@ ], "tags": [ { - "name": "v1-scraping", - "description": "v1 Single page scraping operations" + "name": "scraping", + "description": "Single page scraping operations" }, { - "name": "v1-crawling", - "description": "v1 Website crawling operations" + "name": "crawling", + "description": "Website crawling operations" }, { - "name": "v1-batch", - "description": "v1 Batch processing operations" + "name": "batch", + "description": "Batch processing operations" }, { - "name": "v1-extraction", - "description": "v1 Content extraction operations" + "name": "extraction", + "description": "Content extraction operations" }, { - "name": "v1-search", - "description": "v1 Search and discovery operations" + "name": "search", + "description": "Search and discovery operations" }, { - "name": "v1-jobs", - "description": "v1 Job status and management" + "name": "jobs", + "description": "Job status and management" }, { - "name": "v1-team", - "description": "v1 Team usage and billing" - }, - { - "name": "v2-scraping", - "description": "v2 Single page scraping operations" - }, - { - "name": "v2-crawling", - "description": "v2 Website crawling operations" - }, - { - "name": "v2-batch", - "description": "v2 Batch processing operations" - }, - { - "name": "v2-extraction", - "description": "v2 Content extraction operations" - }, - { - "name": "v2-search", - "description": "v2 Search and discovery operations" - }, - { - "name": "v2-jobs", - "description": "v2 Job status and management" - }, - { - "name": "v2-team", - "description": "v2 Team usage and billing" + "name": "team", + "description": "Team usage and billing" }, { "name": "websockets", @@ -223,6 +195,19 @@ "proxyUsed": { "type": "string", "description": "Proxy type used" + }, + "title": { + "type": "string", + "description": "Page title" + }, + "favicon": { + "type": "string", + "format": "uri", + "description": "Favicon URL" + }, + "language": { + "type": "string", + "description": "Page language" } } }, @@ -277,9 +262,14 @@ "id": { "type": "string", "description": "Crawl job ID" + }, + "url": { + "type": "string", + "format": "uri", + "description": "URL to check job status" } }, - "required": ["success", "id"] + "required": ["success", "id", "url"] }, "BatchScrapeRequest": { "type": "object", @@ -309,9 +299,19 @@ "id": { "type": "string", "description": "Batch scrape job ID" + }, + "url": { + "type": "string", + "format": "uri", + "description": "URL to check job status" + }, + "invalidURLs": { + "type": "array", + "items": {"type": "string"}, + "description": "URLs that were invalid" } }, - "required": ["success", "id"] + "required": ["success", "id", "url", "invalidURLs"] }, "MapRequest": { "type": "object", @@ -347,21 +347,22 @@ "type": "boolean", "const": true }, - "data": { - "type": "object", - "properties": { - "links": { - "type": "array", - "items": { + "links": { + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { "type": "string", "format": "uri" - }, - "description": "Discovered URLs" - } - } + } + }, + "required": ["url"] + }, + "description": "Discovered URLs" } }, - "required": ["success", "data"] + "required": ["success", "links"] }, "SearchRequest": { "type": "object", @@ -391,28 +392,38 @@ "const": true }, "data": { - "type": "array", - "items": { - "type": "object", - "properties": { - "url": { - "type": "string", - "format": "uri" - }, - "title": { - "type": "string" - }, - "description": { - "type": "string" - }, - "markdown": { - "type": "string" + "type": "object", + "properties": { + "web": { + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string", + "format": "uri" + }, + "title": { + "type": "string" + }, + "description": { + "type": "string" + }, + "markdown": { + "type": "string" + } + } } } - } + }, + "required": ["web"] + }, + "creditsUsed": { + "type": "integer", + "description": "Credits consumed" } }, - "required": ["success", "data"] + "required": ["success", "data", "creditsUsed"] }, "ExtractRequest": { "type": "object", @@ -480,7 +491,21 @@ "data": { "type": "array", "items": { - "$ref": "#/components/schemas/ScrapeResponse" + "type": "object", + "properties": { + "markdown": { + "type": "string" + }, + "html": { + "type": "string" + }, + "rawHtml": { + "type": "string" + }, + "metadata": { + "$ref": "#/components/schemas/ScrapeMetadata" + } + } }, "description": "Job results (when completed)" } @@ -514,12 +539,12 @@ } }, "paths": { - "/v1/scrape": { + "/v2/scrape": { "post": { - "tags": ["v1-scraping"], + "tags": ["scraping"], "summary": "Scrape a single URL", "description": "Extract content from a single web page in various formats", - "operationId": "scrapeUrl_v1", + "operationId": "scrapeUrl", "requestBody": { "required": true, "content": { @@ -584,12 +609,53 @@ } } }, - "/v1/crawl": { + "/v2/scrape/{jobId}": { + "get": { + "tags": ["jobs"], + "summary": "Get scrape job status", + "description": "Retrieve the status and results of a scrape job", + "operationId": "getScrapeStatus", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Scrape job ID" + } + ], + "responses": { + "200": { + "description": "Job status retrieved", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobStatus" + } + } + } + }, + "404": { + "description": "Job not found", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + } + } + } + } + } + } + }, + "/v2/crawl": { "post": { - "tags": ["v1-crawling"], + "tags": ["crawling"], "summary": "Start a crawl job", "description": "Crawl a website starting from a given URL", - "operationId": "startCrawl_v1", + "operationId": "startCrawl", "requestBody": { "required": true, "content": { @@ -634,12 +700,12 @@ } } }, - "/v1/crawl/{jobId}": { + "/v2/crawl/{jobId}": { "get": { - "tags": ["v1-jobs"], + "tags": ["jobs"], "summary": "Get crawl job status", "description": "Retrieve the status and results of a crawl job", - "operationId": "getCrawlStatus_v1", + "operationId": "getCrawlStatus", "parameters": [ { "name": "jobId", @@ -675,10 +741,10 @@ } }, "delete": { - "tags": ["v1-jobs"], + "tags": ["jobs"], "summary": "Cancel crawl job", "description": "Cancel a running crawl job", - "operationId": "cancelCrawl_v1", + "operationId": "cancelCrawl", "parameters": [ { "name": "jobId", @@ -720,249 +786,12 @@ } } }, - "/v1/batch/scrape": { - "post": { - "tags": ["v1-batch"], - "summary": "Start batch scrape job", - "description": "Scrape multiple URLs in batch", - "operationId": "batchScrape_v1", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/BatchScrapeRequest" - } - } - } - }, - "responses": { - "202": { - "description": "Batch scrape job started", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/BatchScrapeResponse" - } - } - } - } - } - } - }, - "/v1/batch/scrape/{jobId}": { + "/v2/crawl/ongoing": { "get": { - "tags": ["v1-jobs"], - "summary": "Get batch scrape status", - "description": "Retrieve the status and results of a batch scrape job", - "operationId": "getBatchScrapeStatus_v1", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "Batch scrape job ID" - } - ], - "responses": { - "200": { - "description": "Job status retrieved", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobStatus" - } - } - } - } - } - }, - "delete": { - "tags": ["v1-jobs"], - "summary": "Cancel batch scrape job", - "description": "Cancel a running batch scrape job", - "operationId": "cancelBatchScrape_v1", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "Batch scrape job ID" - } - ], - "responses": { - "200": { - "description": "Job cancelled", - "content": { - "application/json": { - "schema": { - "type": "object", - "properties": { - "success": { - "type": "boolean", - "const": true - } - } - } - } - } - } - } - } - }, - "/v1/map": { - "post": { - "tags": ["v1-search"], - "summary": "Map website URLs", - "description": "Discover and map all URLs on a website", - "operationId": "mapWebsite_v1", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/MapRequest" - } - } - } - }, - "responses": { - "200": { - "description": "Website mapped successfully", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/MapResponse" - } - } - } - } - } - } - }, - "/v1/search": { - "post": { - "tags": ["v1-search"], - "summary": "Search and scrape", - "description": "Search the web and scrape results", - "operationId": "search_v1", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SearchRequest" - } - } - } - }, - "responses": { - "200": { - "description": "Search results", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/SearchResponse" - } - } - } - } - } - } - }, - "/v1/extract": { - "post": { - "tags": ["v1-extraction"], - "summary": "Extract structured data", - "description": "Extract structured data from web pages using AI", - "operationId": "extract_v1", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ExtractRequest" - } - } - } - }, - "responses": { - "202": { - "description": "Extract job started", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ExtractResponse" - } - } - } - } - } - } - }, - "/v1/extract/{jobId}": { - "get": { - "tags": ["v1-jobs"], - "summary": "Get extract job status", - "description": "Retrieve the status and results of an extract job", - "operationId": "getExtractStatus_v1", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "Extract job ID" - } - ], - "responses": { - "200": { - "description": "Job status retrieved", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobStatus" - } - } - } - } - } - } - }, - "/v1/team/credit-usage": { - "get": { - "tags": ["v1-team"], - "summary": "Get credit usage", - "description": "Retrieve team credit usage statistics", - "operationId": "getCreditUsage_v1", - "responses": { - "200": { - "description": "Credit usage data", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreditUsage" - } - } - } - } - } - } - }, - "/v1/crawl/ongoing": { - "get": { - "tags": ["v1-jobs"], + "tags": ["jobs"], "summary": "Get ongoing crawls", "description": "List all ongoing crawl jobs", - "operationId": "getOngoingCrawls_v1", + "operationId": "getOngoingCrawls", "responses": { "200": { "description": "List of ongoing crawls", @@ -988,133 +817,28 @@ } } }, - "/v2/scrape": { - "post": { - "tags": ["v2-scraping"], - "summary": "Scrape a single URL (v2)", - "description": "Extract content from a single web page in various formats using v2 API", - "operationId": "scrapeUrl_v2", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScrapeRequest" - } - } - } - }, - "responses": { - "200": { - "description": "Successful scrape", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ScrapeResponse" - } - } - } - }, - "400": { - "description": "Bad request", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Error" - } - } - } - } - } - } - }, - "/v2/crawl": { - "post": { - "tags": ["v2-crawling"], - "summary": "Start a crawl job (v2)", - "description": "Crawl a website starting from a given URL using v2 API", - "operationId": "startCrawl_v2", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CrawlRequest" - } - } - } - }, - "responses": { - "202": { - "description": "Crawl job started", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CrawlResponse" - } - } - } - } - } - } - }, - "/v2/crawl/{jobId}": { + "/v2/crawl/active": { "get": { - "tags": ["v2-jobs"], - "summary": "Get crawl job status (v2)", - "description": "Retrieve the status and results of a crawl job using v2 API", - "operationId": "getCrawlStatus_v2", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "Crawl job ID" - } - ], + "tags": ["jobs"], + "summary": "Get active crawls", + "description": "List all active crawl jobs (same as ongoing)", + "operationId": "getActiveCrawls", "responses": { "200": { - "description": "Job status retrieved", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/JobStatus" - } - } - } - } - } - }, - "delete": { - "tags": ["v2-jobs"], - "summary": "Cancel crawl job (v2)", - "description": "Cancel a running crawl job using v2 API", - "operationId": "cancelCrawl_v2", - "parameters": [ - { - "name": "jobId", - "in": "path", - "required": true, - "schema": { - "type": "string" - }, - "description": "Crawl job ID" - } - ], - "responses": { - "200": { - "description": "Job cancelled", + "description": "List of active crawls", "content": { "application/json": { "schema": { "type": "object", "properties": { "success": { - "type": "boolean", - "const": true + "type": "boolean" + }, + "data": { + "type": "array", + "items": { + "$ref": "#/components/schemas/JobStatus" + } } } } @@ -1126,10 +850,10 @@ }, "/v2/batch/scrape": { "post": { - "tags": ["v2-batch"], - "summary": "Start batch scrape job (v2)", - "description": "Scrape multiple URLs in batch using v2 API", - "operationId": "batchScrape_v2", + "tags": ["batch"], + "summary": "Start batch scrape job", + "description": "Scrape multiple URLs in batch", + "operationId": "batchScrape", "requestBody": { "required": true, "content": { @@ -1154,12 +878,78 @@ } } }, + "/v2/batch/scrape/{jobId}": { + "get": { + "tags": ["jobs"], + "summary": "Get batch scrape status", + "description": "Retrieve the status and results of a batch scrape job", + "operationId": "getBatchScrapeStatus", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Batch scrape job ID" + } + ], + "responses": { + "200": { + "description": "Job status retrieved", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobStatus" + } + } + } + } + } + }, + "delete": { + "tags": ["jobs"], + "summary": "Cancel batch scrape job", + "description": "Cancel a running batch scrape job", + "operationId": "cancelBatchScrape", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Batch scrape job ID" + } + ], + "responses": { + "200": { + "description": "Job cancelled", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean", + "const": true + } + } + } + } + } + } + } + } + }, "/v2/map": { "post": { - "tags": ["v2-search"], - "summary": "Map website URLs (v2)", - "description": "Discover and map all URLs on a website using v2 API", - "operationId": "mapWebsite_v2", + "tags": ["search"], + "summary": "Map website URLs", + "description": "Discover and map all URLs on a website", + "operationId": "mapWebsite", "requestBody": { "required": true, "content": { @@ -1186,10 +976,10 @@ }, "/v2/search": { "post": { - "tags": ["v2-search"], - "summary": "Search and scrape (v2)", - "description": "Search the web and scrape results using v2 API", - "operationId": "search_v2", + "tags": ["search"], + "summary": "Search and scrape", + "description": "Search the web and scrape results", + "operationId": "search", "requestBody": { "required": true, "content": { @@ -1216,10 +1006,10 @@ }, "/v2/extract": { "post": { - "tags": ["v2-extraction"], - "summary": "Extract structured data (v2)", - "description": "Extract structured data from web pages using AI with v2 API", - "operationId": "extract_v2", + "tags": ["extraction"], + "summary": "Extract structured data", + "description": "Extract structured data from web pages using AI", + "operationId": "extract", "requestBody": { "required": true, "content": { @@ -1244,25 +1034,86 @@ } } }, - "/v2/crawl/params-preview": { - "post": { - "tags": ["v2-crawling"], - "summary": "Preview crawl parameters (v2)", - "description": "Preview and validate crawl parameters before starting a crawl job", - "operationId": "previewCrawlParams_v2", - "requestBody": { - "required": true, - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CrawlRequest" + "/v2/extract/{jobId}": { + "get": { + "tags": ["jobs"], + "summary": "Get extract job status", + "description": "Retrieve the status and results of an extract job", + "operationId": "getExtractStatus", + "parameters": [ + { + "name": "jobId", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Extract job ID" + } + ], + "responses": { + "200": { + "description": "Job status retrieved", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/JobStatus" + } } } } - }, + } + } + }, + "/v2/team/credit-usage": { + "get": { + "tags": ["team"], + "summary": "Get credit usage", + "description": "Retrieve team credit usage statistics", + "operationId": "getCreditUsage", "responses": { "200": { - "description": "Parameter preview", + "description": "Credit usage data", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreditUsage" + } + } + } + } + } + } + }, + "/v2/team/credit-usage/historical": { + "get": { + "tags": ["team"], + "summary": "Get historical credit usage", + "description": "Retrieve historical team credit usage statistics", + "operationId": "getHistoricalCreditUsage", + "responses": { + "200": { + "description": "Historical credit usage data", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/CreditUsage" + } + } + } + } + } + } + }, + "/v2/team/token-usage": { + "get": { + "tags": ["team"], + "summary": "Get token usage", + "description": "Retrieve team token usage statistics", + "operationId": "getTokenUsage", + "responses": { + "200": { + "description": "Token usage data", "content": { "application/json": { "schema": { @@ -1274,13 +1125,121 @@ "data": { "type": "object", "properties": { - "estimatedPages": { + "totalTokensUsed": { + "type": "integer" + } + } + } + } + } + } + } + } + } + } + }, + "/v2/team/token-usage/historical": { + "get": { + "tags": ["team"], + "summary": "Get historical token usage", + "description": "Retrieve historical team token usage statistics", + "operationId": "getHistoricalTokenUsage", + "responses": { + "200": { + "description": "Historical token usage data", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "totalTokensUsed": { + "type": "integer" + } + } + } + } + } + } + } + } + } + } + }, + "/v2/concurrency-check": { + "get": { + "tags": ["jobs"], + "summary": "Check concurrency limits", + "description": "Check current concurrency usage and limits", + "operationId": "checkConcurrency", + "responses": { + "200": { + "description": "Concurrency status", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "current": { "type": "integer", - "description": "Estimated number of pages to crawl" + "description": "Current concurrent jobs" }, - "estimatedCredits": { + "limit": { "type": "integer", - "description": "Estimated credits required" + "description": "Maximum concurrent jobs allowed" + } + } + } + } + } + } + } + } + } + } + }, + "/v2/team/queue-status": { + "get": { + "tags": ["team"], + "summary": "Get queue status", + "description": "Retrieve current queue status and statistics", + "operationId": "getQueueStatus", + "responses": { + "200": { + "description": "Queue status data", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "success": { + "type": "boolean" + }, + "data": { + "type": "object", + "properties": { + "waiting": { + "type": "integer" + }, + "active": { + "type": "integer" + }, + "completed": { + "type": "integer" + }, + "failed": { + "type": "integer" } } }