Crawl a Site

Crawl an entire website starting from a URL. Returns all discovered pages as clean markdown. Runs asynchronously — you submit the job and poll for results.

Start a Crawl

curl -X POST https://scrapeforllm.com/api/app/scrapes \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -d '{
    "url": "https://docs.example.com",
    "type": "crawl"
  }'

const response = await fetch("https://scrapeforllm.com/api/app/scrapes", {
  method: "POST",
  headers: {
    "Content-Type": "application/json",
    Authorization: "Bearer YOUR_API_KEY",
  },
  body: JSON.stringify({
    url: "https://docs.example.com",
    type: "crawl",
  }),
});

const { scrape } = await response.json();
console.log("Crawl started:", scrape.id);

import requests

response = requests.post(
    "https://scrapeforllm.com/api/app/scrapes",
    headers={
        "Content-Type": "application/json",
        "Authorization": "Bearer YOUR_API_KEY",
    },
    json={
        "url": "https://docs.example.com",
        "type": "crawl",
    },
)

scrape = response.json()["scrape"]
print("Crawl started:", scrape["id"])

Response (201 — job started):

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "url": "https://docs.example.com",
    "type": "crawl",
    "status": "processing",
    "firecrawlJobId": "fc-job-abc123",
    "createdAt": "2025-01-15T10:30:00.000Z"
  }
}

Poll for Progress

Crawls run asynchronously. Poll the scrape ID until status is completed or failed.

# Poll every 2 seconds until done
curl https://scrapeforllm.com/api/app/scrapes/YOUR_SCRAPE_ID \
  -H "Authorization: Bearer YOUR_API_KEY"

async function pollCrawl(scrapeId) {
  while (true) {
    const res = await fetch(
      `https://scrapeforllm.com/api/app/scrapes/${scrapeId}`,
      { headers: { Authorization: "Bearer YOUR_API_KEY" } }
    );
    const data = await res.json();

    if (data.scrape.status === "completed") {
      console.log(`Done! ${data.scrape.pagesScraped} pages scraped.`);
      return data.scrape;
    }

    if (data.scrape.status === "failed") {
      throw new Error("Crawl failed");
    }

    // Show progress
    if (data.progress) {
      console.log(`Progress: ${data.progress.completed}/${data.progress.total}`);
    }

    await new Promise((r) => setTimeout(r, 2000));

}
}

const result = await pollCrawl("YOUR_SCRAPE_ID");

import time
import requests

def poll_crawl(scrape_id):
    while True:
        res = requests.get(
            f"https://scrapeforllm.com/api/app/scrapes/{scrape_id}",
            headers={"Authorization": "Bearer YOUR_API_KEY"},
        )
        data = res.json()

        if data["scrape"]["status"] == "completed":
            print(f"Done! {data['scrape']['pagesScraped']} pages scraped.")
            return data["scrape"]

        if data["scrape"]["status"] == "failed":
            raise Exception("Crawl failed")

        # Show progress
        if "progress" in data:
            print(f"Progress: {data['progress']['completed']}/{data['progress']['total']}")

        time.sleep(2)

result = poll_crawl("YOUR_SCRAPE_ID")

While crawling:

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "status": "processing"
  },
  "progress": {
    "completed": 15,
    "total": 42,
    "percentage": 35,
    "partialPages": [
      {
        "title": "Introduction",
        "sourceURL": "https://docs.example.com/intro",
        "markdown": "# Introduction\n\nFirst 500 characters...",
        "statusCode": 200
      }
    ]
  }
}

When complete:

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "status": "completed",
    "creditsUsed": 42,
    "pagesScraped": 42,
    "result": {
      "data": [
        {
          "markdown": "# Page content...",
          "metadata": {
            "title": "Page Title",
            "sourceURL": "https://docs.example.com/page-1"
          }
        }
      ]
    },
    "completedAt": "2025-01-15T10:32:00.000Z"
  }
}

Credits

Crawls cost 1 credit per page scraped. Credits are charged when the crawl completes, not when it starts. You need at least 1 credit to start a crawl.

Crawl a Site

Crawl an entire website starting from a URL. Returns all discovered pages as clean markdown. Runs asynchronously — you submit the job and poll for results.

Start a Crawl

curl -X POST https://scrapeforllm.com/api/app/scrapes \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer YOUR_API_KEY" \
  -d '{
    "url": "https://docs.example.com",
    "type": "crawl"
  }'

const response = await fetch("https://scrapeforllm.com/api/app/scrapes", {
  method: "POST",
  headers: {
    "Content-Type": "application/json",
    Authorization: "Bearer YOUR_API_KEY",
  },
  body: JSON.stringify({
    url: "https://docs.example.com",
    type: "crawl",
  }),
});

const { scrape } = await response.json();
console.log("Crawl started:", scrape.id);

import requests

response = requests.post(
    "https://scrapeforllm.com/api/app/scrapes",
    headers={
        "Content-Type": "application/json",
        "Authorization": "Bearer YOUR_API_KEY",
    },
    json={
        "url": "https://docs.example.com",
        "type": "crawl",
    },
)

scrape = response.json()["scrape"]
print("Crawl started:", scrape["id"])

Response (201 — job started):

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "url": "https://docs.example.com",
    "type": "crawl",
    "status": "processing",
    "firecrawlJobId": "fc-job-abc123",
    "createdAt": "2025-01-15T10:30:00.000Z"
  }
}

Poll for Progress

Crawls run asynchronously. Poll the scrape ID until status is completed or failed.

# Poll every 2 seconds until done
curl https://scrapeforllm.com/api/app/scrapes/YOUR_SCRAPE_ID \
  -H "Authorization: Bearer YOUR_API_KEY"

async function pollCrawl(scrapeId) {
  while (true) {
    const res = await fetch(
      `https://scrapeforllm.com/api/app/scrapes/${scrapeId}`,
      { headers: { Authorization: "Bearer YOUR_API_KEY" } }
    );
    const data = await res.json();

    if (data.scrape.status === "completed") {
      console.log(`Done! ${data.scrape.pagesScraped} pages scraped.`);
      return data.scrape;
    }

    if (data.scrape.status === "failed") {
      throw new Error("Crawl failed");
    }

    // Show progress
    if (data.progress) {
      console.log(`Progress: ${data.progress.completed}/${data.progress.total}`);
    }

    await new Promise((r) => setTimeout(r, 2000));

}
}

const result = await pollCrawl("YOUR_SCRAPE_ID");

import time
import requests

def poll_crawl(scrape_id):
    while True:
        res = requests.get(
            f"https://scrapeforllm.com/api/app/scrapes/{scrape_id}",
            headers={"Authorization": "Bearer YOUR_API_KEY"},
        )
        data = res.json()

        if data["scrape"]["status"] == "completed":
            print(f"Done! {data['scrape']['pagesScraped']} pages scraped.")
            return data["scrape"]

        if data["scrape"]["status"] == "failed":
            raise Exception("Crawl failed")

        # Show progress
        if "progress" in data:
            print(f"Progress: {data['progress']['completed']}/{data['progress']['total']}")

        time.sleep(2)

result = poll_crawl("YOUR_SCRAPE_ID")

While crawling:

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "status": "processing"
  },
  "progress": {
    "completed": 15,
    "total": 42,
    "percentage": 35,
    "partialPages": [
      {
        "title": "Introduction",
        "sourceURL": "https://docs.example.com/intro",
        "markdown": "# Introduction\n\nFirst 500 characters...",
        "statusCode": 200
      }
    ]
  }
}

When complete:

{
  "scrape": {
    "id": "550e8400-e29b-41d4-a716-446655440000",
    "status": "completed",
    "creditsUsed": 42,
    "pagesScraped": 42,
    "result": {
      "data": [
        {
          "markdown": "# Page content...",
          "metadata": {
            "title": "Page Title",
            "sourceURL": "https://docs.example.com/page-1"
          }
        }
      ]
    },
    "completedAt": "2025-01-15T10:32:00.000Z"
  }
}

Credits

Crawls cost 1 credit per page scraped. Credits are charged when the crawl completes, not when it starts. You need at least 1 credit to start a crawl.

Crawl a Site

Crawl a Site

Start a Crawl

Poll for Progress

On this page

Crawl a Site

Crawl a Site

Start a Crawl

Poll for Progress

On this page

Crawl a Site

Crawl a Site

Start a Crawl

Poll for Progress

Crawl Options

Page Limits by Plan

On this page

Crawl a Site

Crawl a Site

Start a Crawl

Poll for Progress

Crawl Options

Page Limits by Plan

On this page