Crawl a Site
Crawl an entire website and get every page as clean markdown.
Crawl a Site
Crawl an entire website starting from a URL. Returns all discovered pages as clean markdown. Runs asynchronously — you submit the job and poll for results.
Start a Crawl
curl -X POST https://scrapeforllm.com/api/app/scrapes \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"url": "https://docs.example.com",
"type": "crawl"
}'const response = await fetch("https://scrapeforllm.com/api/app/scrapes", {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer YOUR_API_KEY",
},
body: JSON.stringify({
url: "https://docs.example.com",
type: "crawl",
}),
});
const { scrape } = await response.json();
console.log("Crawl started:", scrape.id);import requests
response = requests.post(
"https://scrapeforllm.com/api/app/scrapes",
headers={
"Content-Type": "application/json",
"Authorization": "Bearer YOUR_API_KEY",
},
json={
"url": "https://docs.example.com",
"type": "crawl",
},
)
scrape = response.json()["scrape"]
print("Crawl started:", scrape["id"])Response (201 — job started):
{
"scrape": {
"id": "550e8400-e29b-41d4-a716-446655440000",
"url": "https://docs.example.com",
"type": "crawl",
"status": "processing",
"firecrawlJobId": "fc-job-abc123",
"createdAt": "2025-01-15T10:30:00.000Z"
}
}Poll for Progress
Crawls run asynchronously. Poll the scrape ID until status is completed or failed.
# Poll every 2 seconds until done
curl https://scrapeforllm.com/api/app/scrapes/YOUR_SCRAPE_ID \
-H "Authorization: Bearer YOUR_API_KEY"async function pollCrawl(scrapeId) {
while (true) {
const res = await fetch(
`https://scrapeforllm.com/api/app/scrapes/${scrapeId}`,
{ headers: { Authorization: "Bearer YOUR_API_KEY" } }
);
const data = await res.json();
if (data.scrape.status === "completed") {
console.log(`Done! ${data.scrape.pagesScraped} pages scraped.`);
return data.scrape;
}
if (data.scrape.status === "failed") {
throw new Error("Crawl failed");
}
// Show progress
if (data.progress) {
console.log(`Progress: ${data.progress.completed}/${data.progress.total}`);
}
await new Promise((r) => setTimeout(r, 2000));
}
}
const result = await pollCrawl("YOUR_SCRAPE_ID");import time
import requests
def poll_crawl(scrape_id):
while True:
res = requests.get(
f"https://scrapeforllm.com/api/app/scrapes/{scrape_id}",
headers={"Authorization": "Bearer YOUR_API_KEY"},
)
data = res.json()
if data["scrape"]["status"] == "completed":
print(f"Done! {data['scrape']['pagesScraped']} pages scraped.")
return data["scrape"]
if data["scrape"]["status"] == "failed":
raise Exception("Crawl failed")
# Show progress
if "progress" in data:
print(f"Progress: {data['progress']['completed']}/{data['progress']['total']}")
time.sleep(2)
result = poll_crawl("YOUR_SCRAPE_ID")While crawling:
{
"scrape": {
"id": "550e8400-e29b-41d4-a716-446655440000",
"status": "processing"
},
"progress": {
"completed": 15,
"total": 42,
"percentage": 35,
"partialPages": [
{
"title": "Introduction",
"sourceURL": "https://docs.example.com/intro",
"markdown": "# Introduction\n\nFirst 500 characters...",
"statusCode": 200
}
]
}
}When complete:
{
"scrape": {
"id": "550e8400-e29b-41d4-a716-446655440000",
"status": "completed",
"creditsUsed": 42,
"pagesScraped": 42,
"result": {
"data": [
{
"markdown": "# Page content...",
"metadata": {
"title": "Page Title",
"sourceURL": "https://docs.example.com/page-1"
}
}
]
},
"completedAt": "2025-01-15T10:32:00.000Z"
}
}Credits
Crawls cost 1 credit per page scraped. Credits are charged when the crawl completes, not when it starts. You need at least 1 credit to start a crawl.