Skip to main content

Documentation Index

Fetch the complete documentation index at: https://developers.scrapeunblocker.com/llms.txt

Use this file to discover all available pages before exploring further.

There is no Python SDK to install - the API is plain HTTP, so the requests library covers everything. Async examples use httpx.
pip install requests
Store your API key in an environment variable:
export SCRAPEUNBLOCKER_KEY="su_live_..."

Fetch page HTML

import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/getPageSource",
    params={"url": "https://example.com"},
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)
html = response.text

Get parsed JSON instead of HTML

The killer feature: pass parsed_data=True and the API extracts structured data using Schema.org, __NEXT_DATA__, or AI-generated rules - no per-site parsers to maintain.
import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/getPageSource",
    params={
        "url": "https://www.amazon.com/dp/B08N5WRWNW",
        "parsed_data": True,
    },
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)

payload = response.json()
print(payload["data"]["page_type"])    # "product"
print(payload["data"]["data"]["title"])
print(payload["data"]["data"]["price"])
See the parsed data guide for response shapes.

Scrape a Google SERP

import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/serpApi",
    params={
        "keyword": "best running shoes",
        "pages_to_check": 2,
    },
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)

serp = response.json()
for result in serp["organic"]:
    print(result["position"], result["title"], result["url"])

Force a country

proxy_country works on all three endpoints.
import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/getPageSource",
    params={
        "url": "https://www.amazon.de/dp/B08N5WRWNW",
        "parsed_data": True,
        "proxy_country": "de",
    },
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)

Capture cookies and the proxy used

import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/getPageSource",
    params={
        "url": "https://example.com",
        "get_cookies": True,
    },
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)

data = response.json()
html = data["html"]
cookies = {c["name"]: c["value"] for c in data["cookies"]}
served_by = data["proxy"]   # e.g. "us"

Fetch an image as PNG bytes

import os
import requests

response = requests.post(
    "https://api.scrapeunblocker.com/getImage",
    params={"url": "https://example.com/photo.jpg"},
    headers={"x-scrapeunblocker-key": os.environ["SCRAPEUNBLOCKER_KEY"]},
)

with open("photo.png", "wb") as f:
    f.write(response.content)

Async with httpx

For high-throughput crawls, httpx.AsyncClient runs many requests concurrently. Cap concurrency to your plan’s limit with a semaphore.
import os
import asyncio
import httpx

KEY = os.environ["SCRAPEUNBLOCKER_KEY"]
CONCURRENCY = 10

async def scrape(client, sem, url):
    async with sem:
        r = await client.post(
            "https://api.scrapeunblocker.com/getPageSource",
            params={"url": url, "parsed_data": True},
        )
        return r.json()

async def main(urls):
    sem = asyncio.Semaphore(CONCURRENCY)
    async with httpx.AsyncClient(
        headers={"x-scrapeunblocker-key": KEY},
        timeout=180,
    ) as client:
        return await asyncio.gather(*(scrape(client, sem, u) for u in urls))

results = asyncio.run(main([
    "https://www.amazon.com/dp/B08N5WRWNW",
    "https://www.amazon.com/dp/B07FZ8S74R",
]))

Retries that handle every failure mode

403 rotates country once. 408, 503, 504 retry with exponential backoff. 401, 422, and other terminal codes fail fast.
import os
import time
import requests

KEY = os.environ["SCRAPEUNBLOCKER_KEY"]
RETRYABLE = {408, 503, 504}

def fetch(url, **params):
    rotated = False
    for attempt in range(4):
        r = requests.post(
            "https://api.scrapeunblocker.com/getPageSource",
            params={"url": url, **params},
            headers={"x-scrapeunblocker-key": KEY},
        )
        if r.status_code == 200:
            return r
        if r.status_code == 403 and not rotated:
            params = {**params, "proxy_country": "us"}
            rotated = True
            continue
        if r.status_code in RETRYABLE:
            time.sleep(2 ** attempt)
            continue
        r.raise_for_status()
    r.raise_for_status()
    return r

response = fetch("https://example.com", parsed_data=True)
See handling failures for what each status code means.