From f12eccc8674b2645628e438336664e979d4094d0 Mon Sep 17 00:00:00 2001
From: FrancescoSaverioZuppichini <francesco.zuppichini@gmail.com>
Date: Thu, 28 May 2026 11:08:15 +0200
Subject: [PATCH 1/4] Add Vercel AI SDK integration docs

---
 docs.json                  |   1 +
 integrations/vercel_ai.mdx | 262 +++++++++++++++++++++++++++----------
 logo/vercel.svg            |   9 ++
 3 files changed, 202 insertions(+), 70 deletions(-)
 create mode 100644 logo/vercel.svg
diff --git a/docs.json b/docs.json
index 2e312bf..1482bb7 100644
--- a/docs.json
+++ b/docs.json
@@ -96,6 +96,7 @@
               {
                 "group": "Frameworks",
                 "pages": [
+                  "integrations/vercel_ai",
                   "integrations/langchain",
                   "integrations/langgraph",
                   "integrations/llamaindex",
diff --git a/integrations/vercel_ai.mdx b/integrations/vercel_ai.mdx
index 9a4e3f9..196a040 100644
--- a/integrations/vercel_ai.mdx
+++ b/integrations/vercel_ai.mdx
@@ -1,108 +1,230 @@
 ---
-title: "⚡ Vercel AI"
-description: "Integrate ScrapeGraphAI into Vercel AI"
+title: "Vercel AI SDK"
+description: "Use ScrapeGraphAI as first-party tools inside Vercel AI SDK agents"
+icon: "/logo/vercel.svg"
 ---
 
 ## Overview
 
-[Vercel AI SDK](https://ai-sdk.dev/) is a popular JavaScript/TypeScript framework to interact with various LLM providers. This page shows how to integrate it with ScrapeGraph.
+`@scrapegraph-ai/ai-sdk` exposes ScrapeGraphAI endpoints as Vercel AI SDK tools. Add the tools to `generateText` or `streamText`, set `stopWhen`, and the model can scrape, extract, search, crawl, and monitor web data during the run.
 
-<Card
-  title="Official Vercel AI documentation"
-  icon="book"
-  href="https://ai-sdk.dev/"
->
-  View the Vercel AI SDK documentation
-</Card>
+<CardGroup cols={2}>
+  <Card
+    title="AI SDK docs"
+    icon="book"
+    href="https://ai-sdk.dev/docs/introduction"
+  >
+    Official Vercel AI SDK documentation
+  </Card>
+  <Card
+    title="Tool calling"
+    icon="wrench"
+    href="https://ai-sdk.dev/docs/ai-sdk-core/tools-and-tool-calling"
+  >
+    How AI SDK Core tools are executed
+  </Card>
+</CardGroup>
 
 ## Installation
 
-Follow our [JavaScript SDK installation steps](/sdks/javascript) using your favourite package manager:
+Install the ScrapeGraphAI tool package, the AI SDK, and the model provider you use:
 
 ```bash
-# Using npm
-npm i scrapegraph-js
+npm i @scrapegraph-ai/ai-sdk ai @ai-sdk/openai
+pnpm add @scrapegraph-ai/ai-sdk ai @ai-sdk/openai
+yarn add @scrapegraph-ai/ai-sdk ai @ai-sdk/openai
+bun add @scrapegraph-ai/ai-sdk ai @ai-sdk/openai
+```
 
-# Using pnpm
-pnpm i scrapegraph-js
+Set your keys:
 
-# Using yarn
-yarn add scrapegraph-js
+```bash
+export SGAI_API_KEY="your-scrapegraph-key"
+export OPENAI_API_KEY="your-openai-key"
+```
+
+<Note>
+The tools read `SGAI_API_KEY` from the environment by default. You can also pass `{ apiKey: process.env.SGAI_API_KEY }` to any tool factory.
+</Note>
+
+## Quickstart
 
-# Using bun
-bun add scrapegraph-js
+Give the model a scrape tool and allow multiple steps so it can call the tool, receive the result, then write the final answer.
+
+```ts
+import { openai } from "@ai-sdk/openai";
+import { generateText, stepCountIs } from "ai";
+import { scrapeTool } from "@scrapegraph-ai/ai-sdk";
+
+const { text } = await generateText({
+  model: openai("gpt-5-nano"),
+  prompt:
+    "Scrape Hacker News and write a short, concise summary of what people are talking about today.",
+  tools: {
+    scrape: scrapeTool(),
+  },
+  stopWhen: stepCountIs(3),
+});
+
+console.log(text);
 ```
 
-Then, install [Vercel AI](https://ai-sdk.dev/docs/getting-started) with their [OpenAI provider](https://ai-sdk.dev/providers/ai-sdk-providers/openai):
+## Available tools
 
-```bash
-# Using npm
-npm i ai @ai-sdk/openai
+| Factory | What it gives the model |
+|---|---|
+| `scrapeTool()` | Scrape a page as markdown, HTML, JSON, links, images, summary, branding, or screenshot |
+| `extractTool()` | Extract structured JSON from a URL, HTML, or markdown with a prompt |
+| `searchTool()` | Search the web and optionally extract structured data from results |
+| `crawlTools()` | Start, poll, page through, stop, resume, and delete crawl jobs |
+| `monitorTools()` | Create, list, update, pause, resume, delete, and inspect monitor activity |
 
-# Using pnpm
-pnpm i ai @ai-sdk/openai
+Use a narrow tool set when the task is specific. Use all tools when the agent needs to decide the workflow:
 
-# Using yarn
-yarn add ai @ai-sdk/openai
+```ts
+import { openai } from "@ai-sdk/openai";
+import { generateText, stepCountIs } from "ai";
+import {
+  crawlTools,
+  extractTool,
+  monitorTools,
+  scrapeTool,
+  searchTool,
+} from "@scrapegraph-ai/ai-sdk";
+
+const { text } = await generateText({
+  model: openai("gpt-5-nano"),
+  prompt: "Search for ScrapeGraphAI docs, scrape the best page, and summarize it.",
+  tools: {
+    scrape: scrapeTool(),
+    extract: extractTool(),
+    search: searchTool(),
+    ...crawlTools(),
+    ...monitorTools(),
+  },
+  stopWhen: stepCountIs(10),
+});
 
-# Using bun
-bun add ai @ai-sdk/openai
+console.log(text);
 ```
 
-## Usage
+## Scrape example
 
-The ScrapeGraph SDK can be used like any other tool. See [Vercel AI tool calling docs](https://ai-sdk.dev/docs/ai-sdk-core/tools-and-tool-calling).
+This is the smallest useful agent: one scrape tool, a concrete target, and enough steps for the model to call the tool before answering.
 
 ```ts
-import { z } from "zod";
-import { generateText, tool } from "ai";
 import { openai } from "@ai-sdk/openai";
-import { extract } from "scrapegraph-js";
+import { generateText, stepCountIs } from "ai";
+import { scrapeTool } from "@scrapegraph-ai/ai-sdk";
 
 const result = await generateText({
-  model: openai("gpt-4.1-mini"),
+  model: openai("gpt-5-nano"),
+  prompt: "Find the main headline on https://example.com",
   tools: {
-    scrape: tool({
-      description: "Extract articles information from a given URL.",
-      parameters: z.object({
-        url: z.string().describe("The exact URL."),
-      }),
-      execute: async ({ url }) => {
-        const response = await extract(process.env.SGAI_API_KEY!, {
-          url,
-          prompt: "Extract the article information",
-          schema: {
-            type: "object",
-            properties: {
-              articles: {
-                type: "array",
-                items: {
-                  type: "object",
-                  properties: {
-                    title: { type: "string" },
-                    author: { type: "string" },
-                    publishDate: { type: "string" },
-                    content: { type: "string" },
-                    category: { type: "string" },
-                  },
-                },
-              },
-            },
-          },
-        });
-        return response.data?.json;
-      },
-    }),
+    scrape: scrapeTool(),
   },
-  prompt: "Can you find me the articles on https://scrapegraphai.com/blog?",
+  stopWhen: stepCountIs(5),
 });
 
-console.log(result);
+console.log(result.text);
 ```
 
-## Support
+Pass an API key explicitly when your runtime does not expose environment variables:
 
-Need help with the integration?
+```ts
+const tools = {
+  scrape: scrapeTool({ apiKey: process.env.SGAI_API_KEY }),
+};
+```
+
+## Crawl example
+
+`crawlTools()` gives the model the full async crawl loop: start the job, poll status with `getCrawl`, then retrieve paginated pages with `getCrawlPages`.
+
+```ts
+import { openai } from "@ai-sdk/openai";
+import { generateText, stepCountIs } from "ai";
+import { crawlTools } from "@scrapegraph-ai/ai-sdk";
+
+const { text, steps } = await generateText({
+  model: openai("gpt-5-nano"),
+  prompt:
+    "Find 10 https://scrapegraphai.com/ blog posts. Start a crawl, poll its status, fetch crawled pages with getCrawlPages, then summarize what you found.",
+  tools: {
+    ...crawlTools(),
+  },
+  stopWhen: stepCountIs(20),
+});
+
+for (const step of steps) {
+  for (const toolCall of step.toolCalls) {
+    console.log(`[tool] ${toolCall.toolName}`);
+    console.log(JSON.stringify(toolCall.input, null, 2));
+  }
+}
+
+console.log(text);
+```
+
+For longer crawls, keep the same tools but add your app's own timeout, cancellation, and persistence around the AI SDK call.
+
+## Tool reference
+
+### Scrape
+
+```ts
+import { scrapeTool } from "@scrapegraph-ai/ai-sdk";
+
+const tools = {
+  scrape: scrapeTool(),
+};
+```
+
+### Extract
+
+```ts
+import { extractTool } from "@scrapegraph-ai/ai-sdk";
+
+const tools = {
+  extract: extractTool(),
+};
+```
+
+### Search
+
+```ts
+import { searchTool } from "@scrapegraph-ai/ai-sdk";
+
+const tools = {
+  search: searchTool(),
+};
+```
+
+### Crawl
+
+```ts
+import { crawlTools } from "@scrapegraph-ai/ai-sdk";
+
+const tools = {
+  ...crawlTools(),
+};
+```
+
+`crawlTools()` registers `startCrawl`, `getCrawl`, `getCrawlPages`, `stopCrawl`, `resumeCrawl`, and `deleteCrawl`.
+
+### Monitor
+
+```ts
+import { monitorTools } from "@scrapegraph-ai/ai-sdk";
+
+const tools = {
+  ...monitorTools(),
+};
+```
+
+`monitorTools()` registers `createMonitor`, `listMonitors`, `getMonitor`, `updateMonitor`, `deleteMonitor`, `pauseMonitor`, `resumeMonitor`, and `getMonitorActivity`.
+
+## Support
 
 <CardGroup cols={2}>
   <Card
diff --git a/logo/vercel.svg b/logo/vercel.svg
new file mode 100644
index 0000000..65e4171
--- /dev/null
+++ b/logo/vercel.svg
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 20010904//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd">
+<svg version="1.0" xmlns="http://www.w3.org/2000/svg" width="512.000000pt" height="512.000000pt" viewBox="0 0 512.000000 512.000000" preserveAspectRatio="xMidYMid meet">
+
+<g transform="translate(0.000000,512.000000) scale(0.100000,-0.100000)" stroke="none">
+<path fill="#000000" d="M2310 5109 c-502 -55 -974 -249 -1355 -556 -129 -104 -340 -321 -433 -445 -265 -354 -434 -758 -498 -1193 -22 -147 -29 -428 -15 -575 42 -433 180 -833 405 -1174 414 -629 1047 -1032 1791 -1142 147 -22 428 -29 575 -15 433 42 833 180 1174 405 629 414 1032 1047 1142 1791 22 147 29 428 15 575 -51 524 -238 988 -558 1385 -104 129 -321 340 -445 433 -354 265 -755 432 -1193 498 -121 18 -487 26 -605 13z"/>
+<path fill="#ffffff" d="M3219 2761 c347 -607 631 -1105 631 -1107 0 -2 -569 -4 -1265 -4 -696 0 -1265 2 -1265 5 0 8 1263 2216 1266 2212 1 -1 287 -499 633 -1106z"/>
+</g>
+</svg>

From cc0caef87578ee06e15e65fe75d316ea3f6b1ce9 Mon Sep 17 00:00:00 2001
From: FrancescoSaverioZuppichini <francesco.zuppichini@gmail.com>
Date: Thu, 28 May 2026 11:09:35 +0200
Subject: [PATCH 2/4] Add explicit Vercel AI SDK docs link

---
 integrations/vercel_ai.mdx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/integrations/vercel_ai.mdx b/integrations/vercel_ai.mdx
index 196a040..294f78e 100644
--- a/integrations/vercel_ai.mdx
+++ b/integrations/vercel_ai.mdx
@@ -6,11 +6,11 @@ icon: "/logo/vercel.svg"
 
 ## Overview
 
-`@scrapegraph-ai/ai-sdk` exposes ScrapeGraphAI endpoints as Vercel AI SDK tools. Add the tools to `generateText` or `streamText`, set `stopWhen`, and the model can scrape, extract, search, crawl, and monitor web data during the run.
+`@scrapegraph-ai/ai-sdk` exposes ScrapeGraphAI endpoints as [Vercel AI SDK](https://ai-sdk.dev/docs/introduction) tools. Add the tools to `generateText` or `streamText`, set `stopWhen`, and the model can scrape, extract, search, crawl, and monitor web data during the run.
 
 <CardGroup cols={2}>
   <Card
-    title="AI SDK docs"
+    title="Vercel AI SDK docs"
     icon="book"
     href="https://ai-sdk.dev/docs/introduction"
   >

From 3e36159301fe0883ee1a5d63a1ab1f357b49f22f Mon Sep 17 00:00:00 2001
From: FrancescoSaverioZuppichini <francesco.zuppichini@gmail.com>
Date: Thu, 28 May 2026 11:12:23 +0200
Subject: [PATCH 3/4] Add missing crawl pages API reference

---
 api-reference/endpoint/crawl/pages.mdx | 103 +++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 api-reference/endpoint/crawl/pages.mdx

diff --git a/api-reference/endpoint/crawl/pages.mdx b/api-reference/endpoint/crawl/pages.mdx
new file mode 100644
index 0000000..eacecd4
--- /dev/null
+++ b/api-reference/endpoint/crawl/pages.mdx
@@ -0,0 +1,103 @@
+---
+title: 'Get crawl pages'
+description: 'Fetch paginated crawl pages with resolved scrape results.'
+---
+
+```http
+GET https://v2-api.scrapegraphai.com/api/crawl/:id/pages
+```
+
+Returns a cursor-paginated slice of crawl pages for a job started with [`POST /api/crawl`](/api-reference/endpoint/crawl/start). Each returned page includes its lightweight crawl metadata and, when available, the resolved `scrape` result for that page.
+
+Use this endpoint for page content. Keep [`GET /api/crawl/:id`](/api-reference/endpoint/crawl/get-status) for lightweight status polling.
+
+## Path parameters
+
+<ParamField path="id" type="string" required>
+  The crawl job UUID returned by `POST /api/crawl`.
+</ParamField>
+
+## Query parameters
+
+<ParamField query="limit" type="integer" default="50">
+  Number of crawl pages to return in this response. Minimum `1`, maximum `100`.
+</ParamField>
+
+<ParamField query="cursor" type="integer" default="0">
+  Zero-based index cursor. `0` starts at the first crawl page. Use the `pagination.nextCursor` value from the previous response to fetch the next slice.
+</ParamField>
+
+### Pagination behavior
+
+`limit` controls the page size. If you omit it, the API returns up to `50` crawl pages. `cursor` is an index into the ordered crawl page list, not an opaque token. For example:
+
+```bash
+# First 50 crawl pages
+curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/:id/pages?limit=50&cursor=0" \
+  -H "SGAI-APIKEY: $SGAI_API_KEY"
+
+# If the response returns "nextCursor": "50", fetch the next 50
+curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/:id/pages?limit=50&cursor=50" \
+  -H "SGAI-APIKEY: $SGAI_API_KEY"
+```
+
+When `pagination.nextCursor` is `null`, there are no more crawl pages to fetch.
+
+## Example request
+
+```bash
+curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/79694e03-f2ea-43f2-93cc-7c6fc26f999a/pages?limit=50&cursor=0" \
+  -H "SGAI-APIKEY: $SGAI_API_KEY"
+```
+
+## Example response
+
+```json
+{
+  "data": [
+    {
+      "url": "https://example.com",
+      "depth": 0,
+      "title": "",
+      "status": "completed",
+      "parentUrl": null,
+      "contentType": "text/html",
+      "links": ["https://iana.org/domains/example"],
+      "scrapeRefId": "83a911ed-c0bc-4a8c-ad62-8efeeb93f33a",
+      "scrape": {
+        "results": {
+          "markdown": {
+            "data": ["# Example Domain\n\nThis domain is for use in illustrative examples..."]
+          }
+        },
+        "metadata": {
+          "contentType": "text/html"
+        }
+      }
+    }
+  ],
+  "pagination": {
+    "limit": 50,
+    "nextCursor": null
+  }
+}
+```
+
+| Field | Description |
+|-------|-------------|
+| `data[]` | Ordered crawl pages for this slice. |
+| `data[].scrapeRefId` | UUID of the underlying Scrape request. |
+| `data[].scrape` | Resolved Scrape response for the page, when the page has a `scrapeRefId` and the result is available. |
+| `pagination.limit` | Echo of the requested page size. |
+| `pagination.nextCursor` | Cursor for the next request, or `null` when there are no more pages. |
+
+<Note>
+`scrape` is resolved by default. There is no `expand` or `populate` query parameter. If you only need one page's underlying Scrape request, you can also fetch `data[].scrapeRefId` with [`GET /api/history/:id`](/api-reference/endpoint/history).
+</Note>
+
+## Related
+
+- Start a job: [`POST /api/crawl`](/api-reference/endpoint/crawl/start)
+- Poll status: [`GET /api/crawl/:id`](/api-reference/endpoint/crawl/get-status)
+- Fetch one underlying scrape: [`GET /api/history/:id`](/api-reference/endpoint/history)
+- Stop / resume / delete: [Manage crawl jobs](/api-reference/endpoint/crawl/manage)

From 4c5cfe752f14a21ab91e40f977ec2dabd63ff37b Mon Sep 17 00:00:00 2001
From: FrancescoSaverioZuppichini <francesco.zuppichini@gmail.com>
Date: Thu, 28 May 2026 11:14:26 +0200
Subject: [PATCH 4/4] Revert "Add missing crawl pages API reference"

This reverts commit 3e36159301fe0883ee1a5d63a1ab1f357b49f22f.
---
 api-reference/endpoint/crawl/pages.mdx | 103 -------------------------
 1 file changed, 103 deletions(-)
 delete mode 100644 api-reference/endpoint/crawl/pages.mdx

diff --git a/api-reference/endpoint/crawl/pages.mdx b/api-reference/endpoint/crawl/pages.mdx
deleted file mode 100644
index eacecd4..0000000
--- a/api-reference/endpoint/crawl/pages.mdx
+++ /dev/null
@@ -1,103 +0,0 @@
----
-title: 'Get crawl pages'
-description: 'Fetch paginated crawl pages with resolved scrape results.'
----
-
-```http
-GET https://v2-api.scrapegraphai.com/api/crawl/:id/pages
-```
-
-Returns a cursor-paginated slice of crawl pages for a job started with [`POST /api/crawl`](/api-reference/endpoint/crawl/start). Each returned page includes its lightweight crawl metadata and, when available, the resolved `scrape` result for that page.
-
-Use this endpoint for page content. Keep [`GET /api/crawl/:id`](/api-reference/endpoint/crawl/get-status) for lightweight status polling.
-
-## Path parameters
-
-<ParamField path="id" type="string" required>
-  The crawl job UUID returned by `POST /api/crawl`.
-</ParamField>
-
-## Query parameters
-
-<ParamField query="limit" type="integer" default="50">
-  Number of crawl pages to return in this response. Minimum `1`, maximum `100`.
-</ParamField>
-
-<ParamField query="cursor" type="integer" default="0">
-  Zero-based index cursor. `0` starts at the first crawl page. Use the `pagination.nextCursor` value from the previous response to fetch the next slice.
-</ParamField>
-
-### Pagination behavior
-
-`limit` controls the page size. If you omit it, the API returns up to `50` crawl pages. `cursor` is an index into the ordered crawl page list, not an opaque token. For example:
-
-```bash
-# First 50 crawl pages
-curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/:id/pages?limit=50&cursor=0" \
-  -H "SGAI-APIKEY: $SGAI_API_KEY"
-
-# If the response returns "nextCursor": "50", fetch the next 50
-curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/:id/pages?limit=50&cursor=50" \
-  -H "SGAI-APIKEY: $SGAI_API_KEY"
-```
-
-When `pagination.nextCursor` is `null`, there are no more crawl pages to fetch.
-
-## Example request
-
-```bash
-curl -X GET "https://v2-api.scrapegraphai.com/api/crawl/79694e03-f2ea-43f2-93cc-7c6fc26f999a/pages?limit=50&cursor=0" \
-  -H "SGAI-APIKEY: $SGAI_API_KEY"
-```
-
-## Example response
-
-```json
-{
-  "data": [
-    {
-      "url": "https://example.com",
-      "depth": 0,
-      "title": "",
-      "status": "completed",
-      "parentUrl": null,
-      "contentType": "text/html",
-      "links": ["https://iana.org/domains/example"],
-      "scrapeRefId": "83a911ed-c0bc-4a8c-ad62-8efeeb93f33a",
-      "scrape": {
-        "results": {
-          "markdown": {
-            "data": ["# Example Domain\n\nThis domain is for use in illustrative examples..."]
-          }
-        },
-        "metadata": {
-          "contentType": "text/html"
-        }
-      }
-    }
-  ],
-  "pagination": {
-    "limit": 50,
-    "nextCursor": null
-  }
-}
-```
-
-| Field | Description |
-|-------|-------------|
-| `data[]` | Ordered crawl pages for this slice. |
-| `data[].scrapeRefId` | UUID of the underlying Scrape request. |
-| `data[].scrape` | Resolved Scrape response for the page, when the page has a `scrapeRefId` and the result is available. |
-| `pagination.limit` | Echo of the requested page size. |
-| `pagination.nextCursor` | Cursor for the next request, or `null` when there are no more pages. |
-
-<Note>
-`scrape` is resolved by default. There is no `expand` or `populate` query parameter. If you only need one page's underlying Scrape request, you can also fetch `data[].scrapeRefId` with [`GET /api/history/:id`](/api-reference/endpoint/history).
-</Note>
-
-## Related
-
-- Start a job: [`POST /api/crawl`](/api-reference/endpoint/crawl/start)
-- Poll status: [`GET /api/crawl/:id`](/api-reference/endpoint/crawl/get-status)
-- Fetch one underlying scrape: [`GET /api/history/:id`](/api-reference/endpoint/history)
-- Stop / resume / delete: [Manage crawl jobs](/api-reference/endpoint/crawl/manage)