From 01ca2384f098ecbb063ac4681e6d32f590a03f42 Mon Sep 17 00:00:00 2001 From: DPende Date: Mon, 25 Nov 2024 22:06:47 +0100 Subject: [PATCH 01/24] refactor: code refactoring --- scrapegraph-js/examples/getCredits_example.js | 2 +- scrapegraph-js/examples/getSmartScraperRequest_example.js | 4 ++-- scrapegraph-js/examples/sendFeedback_example.js | 6 +++--- scrapegraph-js/examples/smartScraper_example.js | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/scrapegraph-js/examples/getCredits_example.js b/scrapegraph-js/examples/getCredits_example.js index b4bf470..21419fb 100644 --- a/scrapegraph-js/examples/getCredits_example.js +++ b/scrapegraph-js/examples/getCredits_example.js @@ -1,4 +1,4 @@ -import { getCredits } from "scrapegraph-sdk"; +import { getCredits } from 'scrapegraph-sdk'; import 'dotenv/config'; try { diff --git a/scrapegraph-js/examples/getSmartScraperRequest_example.js b/scrapegraph-js/examples/getSmartScraperRequest_example.js index e0f362c..249dfe7 100644 --- a/scrapegraph-js/examples/getSmartScraperRequest_example.js +++ b/scrapegraph-js/examples/getSmartScraperRequest_example.js @@ -1,9 +1,9 @@ -import { getSmartScraperRequest } from "scrapegraph-sdk"; +import { getSmartScraperRequest } from 'scrapegraph-sdk'; import 'dotenv/config'; try { const apiKey = process.env.SGAI_APIKEY; - const requestId = "3fa85f64-5717-4562-b3fc-2c963f66afa6" + const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' const requestInfo = await getSmartScraperRequest(apiKey, requestId); diff --git a/scrapegraph-js/examples/sendFeedback_example.js b/scrapegraph-js/examples/sendFeedback_example.js index f72c28d..423e5c3 100644 --- a/scrapegraph-js/examples/sendFeedback_example.js +++ b/scrapegraph-js/examples/sendFeedback_example.js @@ -1,11 +1,11 @@ -import { sendFeedback } from "scrapegraph-sdk"; +import { sendFeedback } from 'scrapegraph-sdk'; import 'dotenv/config'; try { const apiKey = process.env.SGAI_APIKEY; - const requestId = "16a63a80-c87f-4cde-b005-e6c3ecda278b"; + const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; const rating = 5; - const feedbackMessage = "This is a test feedback message."; + const feedbackMessage = 'This is a test feedback message.'; const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); console.log(feedback_response); diff --git a/scrapegraph-js/examples/smartScraper_example.js b/scrapegraph-js/examples/smartScraper_example.js index 18e8212..96d61b3 100644 --- a/scrapegraph-js/examples/smartScraper_example.js +++ b/scrapegraph-js/examples/smartScraper_example.js @@ -1,10 +1,10 @@ -import { smartScraper } from "scrapegraph-sdk"; +import { smartScraper } from 'scrapegraph-sdk'; import 'dotenv/config'; try { const apiKey = process.env.SGAI_APIKEY; - const url = "https://scrapegraphai.com"; - const prompt = "What does the company do?"; + const url = 'https://scrapegraphai.com'; + const prompt = 'What does the company do?'; const response = await smartScraper(apiKey, url, prompt); From 96649f05301da920d143c644e2492623675d6d2b Mon Sep 17 00:00:00 2001 From: DPende Date: Mon, 25 Nov 2024 22:08:21 +0100 Subject: [PATCH 02/24] add: implemented error handling --- scrapegraph-js/src/credits.js | 23 +++------ scrapegraph-js/src/feedback.js | 27 ++++------ scrapegraph-js/src/smartScraper.js | 67 +++++++------------------ scrapegraph-js/src/utils/handleError.js | 40 +++++++++++++++ 4 files changed, 74 insertions(+), 83 deletions(-) create mode 100644 scrapegraph-js/src/utils/handleError.js diff --git a/scrapegraph-js/src/credits.js b/scrapegraph-js/src/credits.js index 8f541f0..1c83e90 100644 --- a/scrapegraph-js/src/credits.js +++ b/scrapegraph-js/src/credits.js @@ -1,4 +1,5 @@ import axios from 'axios'; +import handleError from './utils/handleError.js'; /** * Retrieve credits from the API. @@ -6,27 +7,17 @@ import axios from 'axios'; * @param {string} apiKey - Your ScrapeGraph AI API key * @returns {Promise} Response from the API in JSON format */ -export async function credits(apiKey) { - const endpoint = "https://api.scrapegraphai.com/v1/credits"; +export async function getCredits(apiKey) { + const endpoint = 'https://api.scrapegraphai.com/v1/credits'; const headers = { - "accept": "application/json", - "SGAI-APIKEY": apiKey + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey }; try { const response = await axios.get(endpoint, { headers }); - return JSON.stringify(response.data); + return response.data; } catch (error) { - if (error.response) { - return JSON.stringify({ - error: "HTTP error occurred", - message: error.message, - status_code: error.response.status - }); - } - return JSON.stringify({ - error: "An error occurred", - message: error.message - }); + handleError(error) } } \ No newline at end of file diff --git a/scrapegraph-js/src/feedback.js b/scrapegraph-js/src/feedback.js index 6630631..206a87f 100644 --- a/scrapegraph-js/src/feedback.js +++ b/scrapegraph-js/src/feedback.js @@ -1,4 +1,5 @@ import axios from 'axios'; +import handleError from './utils/handleError.js'; /** * Send feedback to the API. @@ -6,15 +7,15 @@ import axios from 'axios'; * @param {string} apiKey - Your ScrapeGraph AI API key * @param {string} requestId - The request ID associated with the feedback * @param {number} rating - The rating score - * @param {string} feedbackText - The feedback message to send + * @param {string} feedbackText - Optional feedback message to send * @returns {Promise} Response from the API in JSON format */ -export async function feedback(apiKey, requestId, rating, feedbackText) { - const endpoint = "https://api.scrapegraphai.com/v1/feedback"; +export async function sendFeedback(apiKey, requestId, rating, feedbackText = null) { + const endpoint = 'https://api.scrapegraphai.com/v1/feedback'; const headers = { - "accept": "application/json", - "SGAI-APIKEY": apiKey, - "Content-Type": "application/json" + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json' }; const feedbackData = { @@ -25,18 +26,8 @@ export async function feedback(apiKey, requestId, rating, feedbackText) { try { const response = await axios.post(endpoint, feedbackData, { headers }); - return JSON.stringify(response.data); + return response.data; } catch (error) { - if (error.response) { - return JSON.stringify({ - error: "HTTP error occurred", - message: error.message, - status_code: error.response.status - }); - } - return JSON.stringify({ - error: "An error occurred", - message: error.message - }); + handleError(error); } } \ No newline at end of file diff --git a/scrapegraph-js/src/smartScraper.js b/scrapegraph-js/src/smartScraper.js index dcaeba3..5f8d3f5 100644 --- a/scrapegraph-js/src/smartScraper.js +++ b/scrapegraph-js/src/smartScraper.js @@ -1,4 +1,5 @@ import axios from 'axios'; +import handleError from './utils/handleError.js' /** * Scrape and extract structured data from a webpage using ScrapeGraph AI. @@ -8,13 +9,14 @@ import axios from 'axios'; * @param {string} prompt - Natural language prompt describing what data to extract * @param {Object} [schema] - Optional schema object defining the output structure * @returns {Promise} Extracted data in JSON format matching the provided schema + * @throws - Will throw an error in case of an HTTP failure. */ export async function smartScraper(apiKey, url, prompt, schema = null) { - const endpoint = "https://api.scrapegraphai.com/v1/smartscraper"; + const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper'; const headers = { - "accept": "application/json", - "SGAI-APIKEY": apiKey, - "Content-Type": "application/json" + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, + 'Content-Type': 'application/json' }; const payload = { @@ -24,8 +26,8 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { if (schema) { payload.output_schema = { - description: schema.title || "Schema", - name: schema.title || "Schema", + description: schema.title || 'Schema', + name: schema.title || 'Schema', properties: schema.properties || {}, required: schema.required || [] }; @@ -33,63 +35,30 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { try { const response = await axios.post(endpoint, payload, { headers }); - return JSON.stringify(response.data); + return response.data; } catch (error) { - if (error.response) { - if (error.response.status === 403) { - return JSON.stringify({ - error: "Access forbidden (403)", - message: "You do not have permission to access this resource." - }); - } - return JSON.stringify({ - error: "HTTP error occurred", - message: error.message, - status_code: error.response.status - }); - } - return JSON.stringify({ - error: "An error occurred", - message: error.message - }); + handleError(error) } } /** - * Retrieve the status or the result of a scraping request. It also allows you to see the result of old requests. + * Retrieve the status or the result of a smartScraper request. It also allows you to see the result of old requests. * * @param {string} apiKey - Your ScrapeGraph AI API key - * @param {string} requestId - The request ID associated with the feedback + * @param {string} requestId - The request ID associated with the output of a smartScraper request. * @returns {Promise} Information related to the status or result of a scraping request. */ -export async function smartScraperInfo(apiKey, requestId) { - const endpoint = "https://api.scrapegraphai.com/v1/smartscraper/" + requestId; +export async function getSmartScraperRequest(apiKey, requestId) { + const endpoint = 'https://api.scrapegraphai.com/v1/smartscraper/' + requestId; const headers = { - "accept": "application/json", - "SGAI-APIKEY": apiKey, + 'accept': 'application/json', + 'SGAI-APIKEY': apiKey, }; try { const response = await axios.get(endpoint, { headers }); - return JSON.stringify(response.data) + return response.data; } catch (error) { - if (error.response) { - if (error.response.status === 403) { - return JSON.stringify({ - error: "Access forbidden (403)", - message: "You do not have permission to access this resource." - }); - } - return JSON.stringify({ - error: "HTTP error occurred", - message: error.message, - status_code: error.response.status - }); - } - return JSON.stringify({ - error: "An error occurred", - message: error.message - }); + handleError(error) } - } \ No newline at end of file diff --git a/scrapegraph-js/src/utils/handleError.js b/scrapegraph-js/src/utils/handleError.js new file mode 100644 index 0000000..eec79d7 --- /dev/null +++ b/scrapegraph-js/src/utils/handleError.js @@ -0,0 +1,40 @@ +class HttpError extends Error { + constructor(statusCode, title, detail) { + super(HttpError.makeMessage(statusCode, title, detail)); + this.statusCode = statusCode; + this.title = title; + this.detail = detail; + } + + static makeMessage(statusCode, title, detail) { + let message = ''; + + message += statusCode ? `${statusCode} - ` : '(unknown status code) - '; + message += title ? `${title} - ` : '(unknown error message) - '; + message += detail ? `${JSON.stringify(detail)}` : '(unknown error detail)'; + + return message; + } +} + +class NetworkError extends Error { + constructor(message) { + super(message); + } +} + +class UnexpectedError extends Error { + constructor(message) { + super(message); + } +} + +export default function handleError(error) { + if (error.response) { + throw new HttpError(error.response.status, error.response.statusText, error.response.data.detail) + } else if (error.request) { + throw new NetworkError('Impossible to contact the server. Check your internet connection.'); + } else { + throw new UnexpectedError(`${error.message}`); + } +} \ No newline at end of file From 6752c4379535ec282118f7d5eac13494d5bf1af5 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Thu, 28 Nov 2024 13:26:06 +0100 Subject: [PATCH 03/24] removed unued import --- scrapegraph-py/examples/smartscraper_example.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/scrapegraph-py/examples/smartscraper_example.py b/scrapegraph-py/examples/smartscraper_example.py index 9585a9d..5d44348 100644 --- a/scrapegraph-py/examples/smartscraper_example.py +++ b/scrapegraph-py/examples/smartscraper_example.py @@ -16,8 +16,4 @@ print(f"Request ID: {response['request_id']}") print(f"Result: {response['result']}") -# Get previous results using get_smartscraper -# result = sgai_client.get_smartscraper(request_id=response['request_id']) -# print(f"\nRetrieved Result: {result}") - sgai_client.close() From 3c2178e04e873885abc8aca0312f5a4a1dd9cdd0 Mon Sep 17 00:00:00 2001 From: Lorenzo Padoan Date: Thu, 28 Nov 2024 17:28:58 +0100 Subject: [PATCH 04/24] fix: readme js sdk --- scrapegraph-js/readme.md | 238 +++++++++++++++++++++++---------------- scrapegraph-py/README.md | 7 +- 2 files changed, 147 insertions(+), 98 deletions(-) diff --git a/scrapegraph-js/readme.md b/scrapegraph-js/readme.md index f4db6b6..2abcf17 100644 --- a/scrapegraph-js/readme.md +++ b/scrapegraph-js/readme.md @@ -1,150 +1,200 @@ -# ScrapeGraph JS SDK +# 🌐 ScrapeGraph JavaScript SDK -A JavaScript SDK for interacting with the ScrapeGraph AI API. This SDK provides easy-to-use functions for web scraping, managing credits, and submitting feedback. +[![npm version](https://badge.fury.io/js/scrapegraph-js.svg)](https://badge.fury.io/js/scrapegraph-js) +[![TypeScript Support](https://img.shields.io/badge/TypeScript-Ready-blue.svg)](https://www.typescriptlang.org/) +[![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) +[![Build Status](https://github.com/ScrapeGraphAI/scrapegraph-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ScrapeGraphAI/scrapegraph-sdk/actions) +[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.scrapegraphai.com) -## Installation +Official JavaScript/TypeScript SDK for the ScrapeGraph AI API - Smart web scraping powered by AI. -Install the package using npm: +## 🚀 Features -```bash -npm install scrapegraph-js -``` +- ✨ Smart web scraping with AI +- 🔄 Fully asynchronous design +- 📊 TypeScript-ready with strongly typed responses +- 🔍 Detailed error handling +- ⚡ Automatic retries and logging +- 🔐 Secure API authentication -## Usage +## 📦 Installation -> [!WARNING] -> Remember not to write API keys directly in the code; instead, store them securely in `.env` files. +Install the package using npm or yarn: -First, import the required functions: +```bash +# Using npm +npm install scrapegraph-js -```javascript -import { smartScraper, getSmartScraperRequest, getCredits, sendFeedback } from 'scrapegraph-sdk'; -``` -### Scraping Websites +## 🔧 Quick Start + +> **Note**: Store your API keys securely in environment variables. Use `.env` files and libraries like `dotenv` to load them into your app. -#### Basic scraping +### Basic Example ```javascript -import { smartScraper } from 'scrapegraph-sdk'; +import { smartScraper } from 'scrapegraph-js'; -const apiKey = process.env.SGAI_APIKEY; -const url = 'https://scrapegraphai.com'; +// Initialize variables +const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable +const websiteUrl = 'https://example.com'; const prompt = 'What does the company do?'; -try { - const response = await smartScraper(apiKey, url, prompt); - console.log(response); -} catch (error) { - console.error(error); -} +(async () => { + try { + const response = await smartScraper(apiKey, websiteUrl, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); ``` -#### Scraping with custom output schema +## 🎯 Examples + +### Scraping Websites + +#### Basic Scraping ```javascript -import { smartScraper } from 'scrapegraph-sdk'; +import { smartScraper } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; +const prompt = 'Extract the main heading and description.'; + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); +``` -const apiKey = 'your_api_key'; -const url = 'https://scrapegraphai.com'; -const prompt = 'What does the company do?'; -const schema = //TODO +#### Scraping with Custom Output Schema + +```typescript +import { smartScraper } from 'scrapegraph-js'; -try { - const response = await smartScraper(apiKey, url, prompt, schema); - console.log(response); -} catch (error) { - console.error(error); +interface WebsiteData { + title: string; + description: string; } + +const apiKey = 'your-api-key'; +const url = 'https://example.com'; +const prompt = 'Extract the title and description.'; + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt); + console.log(response.result.title, response.result.description); + } catch (error) { + console.error('Error:', error); + } +})(); ``` -### Checking Credits +### Checking API Credits ```javascript -import { getCredist } from 'scrapegraph-sdk'; - -const apiKey = 'your_api_key'; - -try { - const myCredit = await getCredits(apiKey); - console.log(myCredit) -} catch (error) { - console.error(error) -} +import { getCredits } from 'scrapegraph-js'; + +const apiKey = 'your-api-key'; + +(async () => { + try { + const credits = await getCredits(apiKey); + console.log('Available credits:', credits); + } catch (error) { + console.error('Error fetching credits:', error); + } +})(); ``` ### Submitting Feedback ```javascript -import { sendFeedback } from 'scrapegraph-sdk'; +import { sendFeedback } from 'scrapegraph-js'; -const apiKey = 'your_api_key'; +const apiKey = 'your-api-key'; const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; const rating = 5; -const feedbackMessage = 'This is a test feedback message.'; - -try { - const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); - console.log(feedback_response); -} catch (error) { - console.error(error) -} +const feedbackText = 'This is a test feedback message.'; + +(async () => { + try { + const response = await sendFeedback(apiKey, requestId, rating, feedbackText); + console.log('Feedback response:', response); + } catch (error) { + console.error('Error sending feedback:', error); + } +})(); ``` -## API Reference - -### scrape(apiKey, url[, options]) +## 📚 Documentation -Scrapes a website and returns the extracted data. +For detailed documentation, visit [docs.scrapegraphai.com](https://docs.scrapegraphai.com) -Parameters: -- `apiKey` (string): Your ScrapeGraph AI API key -- `url` (string): The URL to scrape -- `options` (object, optional): - - `elements` (array): Specific elements to extract - - `wait_for` (string): CSS selector to wait for before scraping - - `javascript` (boolean): Enable JavaScript rendering +## 🛠️ Development -### credits(apiKey) +### Setup -Retrieves your current credit balance. +1. Clone the repository: + ```bash + git clone https://github.com/ScrapeGraphAI/scrapegraph-sdk.git + cd scrapegraph-sdk/scrapegraph-js + ``` -Parameters: -- `apiKey` (string): Your ScrapeGraph AI API key +2. Install dependencies: + ```bash + npm install + ``` -### feedback(apiKey, requestId, rating, feedbackText) +3. Run linting and testing: + ```bash + npm run lint + npm test + ``` -Submits feedback for a scraping request. +### Running Tests -Parameters: -- `apiKey` (string): Your ScrapeGraph AI API key -- `requestId` (string): The request ID from the scrape response -- `rating` (number): Rating score -- `feedbackText` (string) (optional): Feedback message +```bash +# Run all tests +npm test -## Error Handling +# Run tests with coverage +npm run test:coverage +``` -All functions return javascript `Error` object with imformation. In case of errors, the response will include error details: +## 📝 License -// TODO error list +This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. -```javascript -{ - "statusCode": 400, - "title": "HTTP error occurred" - "details": "Error details", - -} -``` +## 🤝 Contributing -## License +Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change. -MIT +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request -## Support +## 🔗 Links -For support, please visit [ScrapeGraph AI Documentation](https://sgai-api.onrender.com/docs). +- [Website](https://scrapegraphai.com) +- [Documentation](https://scrapegraphai.com/documentation) +- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) +## 💬 Support +- 📧 Email: support@scrapegraphai.com +- 💻 GitHub Issues: [Create an issue](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues) +- 🌟 Feature Requests: [Request a feature](https://github.com/ScrapeGraphAI/scrapegraph-sdk/issues/new) +--- +Made with ❤️ by [ScrapeGraph AI](https://scrapegraphai.com) diff --git a/scrapegraph-py/README.md b/scrapegraph-py/README.md index f2e40f9..221698e 100644 --- a/scrapegraph-py/README.md +++ b/scrapegraph-py/README.md @@ -155,10 +155,9 @@ Contributions are welcome! Please feel free to submit a Pull Request. For major ## 🔗 Links -- [Website](https://scrapegraphai.com) -- [Documentation](https://docs.scrapegraphai.com) -- [API Reference](https://docs.scrapegraphai.com/api) -- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) +- [Website](https://scrapegraphai.com) +- [Documentation](https://scrapegraphai.com/documentation) +- [GitHub](https://github.com/ScrapeGraphAI/scrapegraph-sdk) ## 💬 Support From 9e9e138617658e068a1c77a4dbac24b4d550d42a Mon Sep 17 00:00:00 2001 From: DPende Date: Thu, 28 Nov 2024 17:57:23 +0100 Subject: [PATCH 05/24] chore: changed pakage name --- scrapegraph-js/package.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index e1da1c4..e2d089d 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -1,5 +1,5 @@ { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "author": "ScrapeGraphAI", "version": "0.0.1", "description": "Scrape and extract structured data from a webpage using ScrapeGraph AI.", @@ -23,7 +23,8 @@ "module": "index.js", "type": "module", "dependencies": { - "axios": "^1.6.0" + "axios": "^1.6.0", + "zod": "^3.23.8" }, "devDependencies": { "dotenv": "^16.4.5" From 88a2f509dc34ad69f41fe6d13f31de191895bc1a Mon Sep 17 00:00:00 2001 From: DPende Date: Thu, 28 Nov 2024 17:58:17 +0100 Subject: [PATCH 06/24] fix: removed wrong information --- scrapegraph-js/readme.md | 33 +++++++++------------------------ 1 file changed, 9 insertions(+), 24 deletions(-) diff --git a/scrapegraph-js/readme.md b/scrapegraph-js/readme.md index 2abcf17..a69e313 100644 --- a/scrapegraph-js/readme.md +++ b/scrapegraph-js/readme.md @@ -1,7 +1,6 @@ # 🌐 ScrapeGraph JavaScript SDK [![npm version](https://badge.fury.io/js/scrapegraph-js.svg)](https://badge.fury.io/js/scrapegraph-js) -[![TypeScript Support](https://img.shields.io/badge/TypeScript-Ready-blue.svg)](https://www.typescriptlang.org/) [![License](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT) [![Build Status](https://github.com/ScrapeGraphAI/scrapegraph-sdk/actions/workflows/ci.yml/badge.svg)](https://github.com/ScrapeGraphAI/scrapegraph-sdk/actions) [![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://docs.scrapegraphai.com) @@ -11,8 +10,7 @@ Official JavaScript/TypeScript SDK for the ScrapeGraph AI API - Smart web scrapi ## 🚀 Features - ✨ Smart web scraping with AI -- 🔄 Fully asynchronous design -- 📊 TypeScript-ready with strongly typed responses +- 🔄 Fully asynchronous design - 🔍 Detailed error handling - ⚡ Automatic retries and logging - 🔐 Secure API authentication @@ -23,7 +21,11 @@ Install the package using npm or yarn: ```bash # Using npm -npm install scrapegraph-js +npm i scrapegraph-js + +# Using yarn +yarn add scrapegraph-js +``` ## 🔧 Quick Start @@ -34,6 +36,7 @@ npm install scrapegraph-js ```javascript import { smartScraper } from 'scrapegraph-js'; +import 'dotenv/config'; // Initialize variables const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable @@ -75,26 +78,8 @@ const prompt = 'Extract the main heading and description.'; #### Scraping with Custom Output Schema -```typescript -import { smartScraper } from 'scrapegraph-js'; - -interface WebsiteData { - title: string; - description: string; -} - -const apiKey = 'your-api-key'; -const url = 'https://example.com'; -const prompt = 'Extract the title and description.'; - -(async () => { - try { - const response = await smartScraper(apiKey, url, prompt); - console.log(response.result.title, response.result.description); - } catch (error) { - console.error('Error:', error); - } -})(); +```javascript +//TODO ``` ### Checking API Credits From b5f2d4c755faf285ca7c7588eeefc0750d022696 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 29 Nov 2024 10:17:40 +0000 Subject: [PATCH 07/24] ci(release): 1.2.1 [skip ci] ## [1.2.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.0...v1.2.1) (2024-11-29) ### Bug Fixes * readme js sdk ([3c2178e](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/3c2178e04e873885abc8aca0312f5a4a1dd9cdd0)) * removed wrong information ([88a2f50](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/88a2f509dc34ad69f41fe6d13f31de191895bc1a)) ### chore * changed pakage name ([9e9e138](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/9e9e138617658e068a1c77a4dbac24b4d550d42a)) * fix pylint scripts ([5913d5f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/5913d5f0d697196469f8ec952e1a65e1c7f49621)) ### Docs * improved examples ([a9c1fa5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/a9c1fa5dcd7610b2b0c217d39fb2b77a67aa3fac)) * updated precommit and installation guide ([c16705b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/c16705b8f405f57d2cb1719099d4b566186a7257)) * updated readme ([ee9efa6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/ee9efa608b9a284861f712ab2a69d49da3d26523)) ### Refactor * code refactoring ([01ca238](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/01ca2384f098ecbb063ac4681e6d32f590a03f42)) --- scrapegraph-py/CHANGELOG.md | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 593e7b9..797c5e6 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,29 @@ +## [1.2.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.0...v1.2.1) (2024-11-29) + + +### Bug Fixes + +* readme js sdk ([3c2178e](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/3c2178e04e873885abc8aca0312f5a4a1dd9cdd0)) +* removed wrong information ([88a2f50](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/88a2f509dc34ad69f41fe6d13f31de191895bc1a)) + + +### chore + +* changed pakage name ([9e9e138](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/9e9e138617658e068a1c77a4dbac24b4d550d42a)) +* fix pylint scripts ([5913d5f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/5913d5f0d697196469f8ec952e1a65e1c7f49621)) + + +### Docs + +* improved examples ([a9c1fa5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/a9c1fa5dcd7610b2b0c217d39fb2b77a67aa3fac)) +* updated precommit and installation guide ([c16705b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/c16705b8f405f57d2cb1719099d4b566186a7257)) +* updated readme ([ee9efa6](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/ee9efa608b9a284861f712ab2a69d49da3d26523)) + + +### Refactor + +* code refactoring ([01ca238](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/01ca2384f098ecbb063ac4681e6d32f590a03f42)) + ## [1.2.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.1.0...v1.2.0) (2024-11-28) From d737996e813be7c47d654b0e33ec08319b052511 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Fri, 29 Nov 2024 11:19:14 +0100 Subject: [PATCH 08/24] Update package.json --- scrapegraph-js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index e2d089d..e2d1ffb 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -2,7 +2,7 @@ "name": "scrapegraph-js", "author": "ScrapeGraphAI", "version": "0.0.1", - "description": "Scrape and extract structured data from a webpage using ScrapeGraph AI.", + "description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs.", "repository": { "type": "git", "url": "https://github.com/ScrapeGraphAI/scrapegraph-sdk", From 46ebd9dc9897ca2ef9460a3e46b3a24abe90f943 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Fri, 29 Nov 2024 15:28:27 +0100 Subject: [PATCH 09/24] fix: add enw timeout --- scrapegraph-py/scrapegraph_py/async_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py index e53222a..e7c44e1 100644 --- a/scrapegraph-py/scrapegraph_py/async_client.py +++ b/scrapegraph-py/scrapegraph_py/async_client.py @@ -22,7 +22,7 @@ def __init__( self, api_key: str, verify_ssl: bool = True, - timeout: float = 30, + timeout: float = 120, max_retries: int = 3, retry_delay: float = 1.0, ): From d6e7f409f2550db60d70b1c7f143a188cbdaf1ed Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Fri, 29 Nov 2024 14:29:37 +0000 Subject: [PATCH 10/24] ci(release): 1.2.2 [skip ci] ## [1.2.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.1...v1.2.2) (2024-11-29) ### Bug Fixes * add enw timeout ([46ebd9d](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/46ebd9dc9897ca2ef9460a3e46b3a24abe90f943)) --- scrapegraph-py/CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 797c5e6..b1a959f 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.2.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.1...v1.2.2) (2024-11-29) + + +### Bug Fixes + +* add enw timeout ([46ebd9d](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/46ebd9dc9897ca2ef9460a3e46b3a24abe90f943)) + ## [1.2.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.0...v1.2.1) (2024-11-29) From a2b57c7e482dfb5c7c1a125d1684e0367088c83b Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:13:21 +0100 Subject: [PATCH 11/24] refactor: code refactoring --- scrapegraph-js/examples/.env.example | 2 +- scrapegraph-js/examples/getCredits_example.js | 7 +++---- .../examples/getSmartScraperRequest_example.js | 9 ++++----- scrapegraph-js/examples/sendFeedback_example.js | 12 ++++++------ scrapegraph-js/examples/smartScraper_example.js | 11 +++++------ 5 files changed, 19 insertions(+), 22 deletions(-) diff --git a/scrapegraph-js/examples/.env.example b/scrapegraph-js/examples/.env.example index 21d84ef..8d318c0 100644 --- a/scrapegraph-js/examples/.env.example +++ b/scrapegraph-js/examples/.env.example @@ -1,2 +1,2 @@ # ScrapegraphAI API Key -SGAI-APIKEY="your ScrapegraphAI API Key" \ No newline at end of file +SGAI_APIKEY="your ScrapegraphAI API Key" \ No newline at end of file diff --git a/scrapegraph-js/examples/getCredits_example.js b/scrapegraph-js/examples/getCredits_example.js index 7051b8d..a102028 100644 --- a/scrapegraph-js/examples/getCredits_example.js +++ b/scrapegraph-js/examples/getCredits_example.js @@ -1,11 +1,10 @@ -import { getCredits } from 'scrapegraph-sdk'; +import { getCredits } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; +const apiKey = process.env.SGAI_APIKEY; +try { const myCredit = await getCredits(apiKey); - console.log(myCredit) } catch (error) { console.error(error) diff --git a/scrapegraph-js/examples/getSmartScraperRequest_example.js b/scrapegraph-js/examples/getSmartScraperRequest_example.js index 88cfbbc..6a2d7fb 100644 --- a/scrapegraph-js/examples/getSmartScraperRequest_example.js +++ b/scrapegraph-js/examples/getSmartScraperRequest_example.js @@ -1,12 +1,11 @@ -import { getSmartScraperRequest } from 'scrapegraph-sdk'; +import { getSmartScraperRequest } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' +const apiKey = process.env.SGAI_APIKEY; +const requestId = '3fa85f64-5717-4562-b3fc-2c963f66afa6' +try { const requestInfo = await getSmartScraperRequest(apiKey, requestId); - console.log(requestInfo); } catch (error) { console.error(error); diff --git a/scrapegraph-js/examples/sendFeedback_example.js b/scrapegraph-js/examples/sendFeedback_example.js index 423e5c3..a3f246d 100644 --- a/scrapegraph-js/examples/sendFeedback_example.js +++ b/scrapegraph-js/examples/sendFeedback_example.js @@ -1,12 +1,12 @@ -import { sendFeedback } from 'scrapegraph-sdk'; +import { sendFeedback } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; - const rating = 5; - const feedbackMessage = 'This is a test feedback message.'; +const apiKey = process.env.SGAI_APIKEY; +const requestId = '16a63a80-c87f-4cde-b005-e6c3ecda278b'; +const rating = 5; +const feedbackMessage = 'This is a test feedback message.'; +try { const feedback_response = await sendFeedback(apiKey, requestId, rating, feedbackMessage); console.log(feedback_response); } catch (error) { diff --git a/scrapegraph-js/examples/smartScraper_example.js b/scrapegraph-js/examples/smartScraper_example.js index 96d61b3..38e5613 100644 --- a/scrapegraph-js/examples/smartScraper_example.js +++ b/scrapegraph-js/examples/smartScraper_example.js @@ -1,13 +1,12 @@ -import { smartScraper } from 'scrapegraph-sdk'; +import { smartScraper } from 'scrapegraph-js'; import 'dotenv/config'; -try { - const apiKey = process.env.SGAI_APIKEY; - const url = 'https://scrapegraphai.com'; - const prompt = 'What does the company do?'; +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com'; +const prompt = 'What does the company do?'; +try { const response = await smartScraper(apiKey, url, prompt); - console.log(response); } catch (error) { console.error(error); From 129917377b6a685d769a480b717bf980d3199833 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:17:18 +0100 Subject: [PATCH 12/24] fix: the "workspace" key has been removed because it was conflicting with the package.json file in the scrapegraph-js folder. --- package.json | 3 --- 1 file changed, 3 deletions(-) diff --git a/package.json b/package.json index b902016..a02e34d 100644 --- a/package.json +++ b/package.json @@ -9,9 +9,6 @@ }, "author": "ScrapeGraphAI", "license": "MIT", - "workspaces": [ - "scrapegraph-js" - ], "scripts": { "semantic-release": "semantic-release" }, From ee5738bd737cd07a553d148403a4bbb5e80e5be3 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:18:41 +0100 Subject: [PATCH 13/24] chore: added Zod package dependency --- scrapegraph-js/package-lock.json | 26 +++++++++++++++++++++++--- scrapegraph-js/package.json | 3 ++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/scrapegraph-js/package-lock.json b/scrapegraph-js/package-lock.json index cb84fb4..ba7917b 100644 --- a/scrapegraph-js/package-lock.json +++ b/scrapegraph-js/package-lock.json @@ -1,15 +1,17 @@ { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "license": "MIT", "dependencies": { - "axios": "^1.6.0" + "axios": "^1.6.0", + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" }, "devDependencies": { "dotenv": "^16.4.5" @@ -126,6 +128,24 @@ "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", "license": "MIT" + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.23.5", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.23.5.tgz", + "integrity": "sha512-5wlSS0bXfF/BrL4jPAbz9da5hDlDptdEppYfe+x4eIJ7jioqKG9uUxOwPzqof09u/XeVdrgFu29lZi+8XNDJtA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.23.3" + } } } } diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index e2d1ffb..46f1dd9 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -24,7 +24,8 @@ "type": "module", "dependencies": { "axios": "^1.6.0", - "zod": "^3.23.8" + "zod": "^3.23.8", + "zod-to-json-schema": "^3.23.5" }, "devDependencies": { "dotenv": "^16.4.5" From cf2f28fa029df0acb7058fde8239046d77ef0a8a Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:20:19 +0100 Subject: [PATCH 14/24] docs: added an example of the smartScraper functionality using a schema --- scrapegraph-js/readme.md | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/scrapegraph-js/readme.md b/scrapegraph-js/readme.md index a69e313..654a540 100644 --- a/scrapegraph-js/readme.md +++ b/scrapegraph-js/readme.md @@ -77,9 +77,34 @@ const prompt = 'Extract the main heading and description.'; ``` #### Scraping with Custom Output Schema +> [!NOTE] +> To use this feature, it is necessary to employ the [Zod](https://www.npmjs.com/package/zod) package for schema creation. + +Here is a real-world example: ```javascript -//TODO +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = 'your-api-key'; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = z.object({ + title: z.string().describe('The title of the webpage'), + description: z.string().describe('The description of the webpage'), + summary: z.string().describe('A brief summary of the webpage') +}); + +(async () => { + try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); + } catch (error) { + console.error('Error:', error); + } +})(); ``` ### Checking API Credits From 10a1a5a477a6659aabf3afebfffdbefc14d12d3e Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:21:43 +0100 Subject: [PATCH 15/24] feat: implemented support for requests with schema --- scrapegraph-js/src/smartScraper.js | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/scrapegraph-js/src/smartScraper.js b/scrapegraph-js/src/smartScraper.js index 5f8d3f5..b96a597 100644 --- a/scrapegraph-js/src/smartScraper.js +++ b/scrapegraph-js/src/smartScraper.js @@ -1,5 +1,7 @@ import axios from 'axios'; -import handleError from './utils/handleError.js' +import handleError from './utils/handleError.js'; +import { ZodType } from 'zod'; +import { zodToJsonSchema } from 'zod-to-json-schema'; /** * Scrape and extract structured data from a webpage using ScrapeGraph AI. @@ -25,12 +27,11 @@ export async function smartScraper(apiKey, url, prompt, schema = null) { }; if (schema) { - payload.output_schema = { - description: schema.title || 'Schema', - name: schema.title || 'Schema', - properties: schema.properties || {}, - required: schema.required || [] - }; + if (schema instanceof ZodType) { + payload.output_schema = zodToJsonSchema(schema); + } else { + throw new Error('The schema must be an instance of a valid Zod schema'); + } } try { From baf933b0826b63d4ecf61c8593676357619a1c73 Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 22:23:06 +0100 Subject: [PATCH 16/24] feat: added example of the smartScraper function using a schema --- .../examples/schema_smartScraper_example.js | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 scrapegraph-js/examples/schema_smartScraper_example.js diff --git a/scrapegraph-js/examples/schema_smartScraper_example.js b/scrapegraph-js/examples/schema_smartScraper_example.js new file mode 100644 index 0000000..5024bae --- /dev/null +++ b/scrapegraph-js/examples/schema_smartScraper_example.js @@ -0,0 +1,16 @@ +import { smartScraper } from 'scrapegraph-js'; +import { z } from 'zod'; +import 'dotenv/config'; + +const apiKey = process.env.SGAI_APIKEY; +const url = 'https://scrapegraphai.com/'; +const prompt = 'What does the company do? and '; + +const schema = 2; + +try { + const response = await smartScraper(apiKey, url, prompt, schema); + console.log(response.result); +} catch (error) { + console.error(error); +} \ No newline at end of file From 13cf1e5c28ec739d2d35617bd57d7cf8203c3f7e Mon Sep 17 00:00:00 2001 From: DPende Date: Fri, 29 Nov 2024 23:38:25 +0100 Subject: [PATCH 17/24] chore: set up eslint and prettier for code linting and formatting --- scrapegraph-js/.prettierignore | 1 + scrapegraph-js/.prettierrc.json | 11 ++++++++++ scrapegraph-js/eslint.config.js | 11 ++++++++++ scrapegraph-js/package-lock.json | 35 ++++++++++++++++++++++++++++---- scrapegraph-js/package.json | 12 ++++++++++- 5 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 scrapegraph-js/.prettierignore create mode 100644 scrapegraph-js/.prettierrc.json create mode 100644 scrapegraph-js/eslint.config.js diff --git a/scrapegraph-js/.prettierignore b/scrapegraph-js/.prettierignore new file mode 100644 index 0000000..30bc162 --- /dev/null +++ b/scrapegraph-js/.prettierignore @@ -0,0 +1 @@ +/node_modules \ No newline at end of file diff --git a/scrapegraph-js/.prettierrc.json b/scrapegraph-js/.prettierrc.json new file mode 100644 index 0000000..57e2f8b --- /dev/null +++ b/scrapegraph-js/.prettierrc.json @@ -0,0 +1,11 @@ +{ + "semi": true, + "singleQuote": true, + "trailingComma": "es5", + "tabWidth": 2, + "useTabs": false, + "printWidth": 110, + "bracketSpacing": true, + "arrowParens": "always", + "quoteProps": "preserve" +} diff --git a/scrapegraph-js/eslint.config.js b/scrapegraph-js/eslint.config.js new file mode 100644 index 0000000..01a4fe2 --- /dev/null +++ b/scrapegraph-js/eslint.config.js @@ -0,0 +1,11 @@ +import globals from 'globals'; +import pluginJs from '@eslint/js'; +import eslintPluginPrettierRecommended from 'eslint-plugin-prettier/recommended'; + +/** @type {import('eslint').Linter.Config[]} */ +export default [ + { languageOptions: { globals: { ...globals.browser, ...globals.node } } }, + pluginJs.configs.recommended, + eslintPluginPrettierRecommended, + { ignorePatterns: ['node_modules/'] }, +]; diff --git a/scrapegraph-js/package-lock.json b/scrapegraph-js/package-lock.json index cb84fb4..cd44aa4 100644 --- a/scrapegraph-js/package-lock.json +++ b/scrapegraph-js/package-lock.json @@ -1,18 +1,20 @@ { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "scrapegraph-sdk", + "name": "scrapegraph-js", "version": "0.0.1", "license": "MIT", "dependencies": { - "axios": "^1.6.0" + "axios": "^1.6.0", + "zod": "^3.23.8" }, "devDependencies": { - "dotenv": "^16.4.5" + "dotenv": "^16.4.5", + "prettier": "3.4.1" } }, "node_modules/asynckit": { @@ -121,11 +123,36 @@ "node": ">= 0.6" } }, + "node_modules/prettier": { + "version": "3.4.1", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.4.1.tgz", + "integrity": "sha512-G+YdqtITVZmOJje6QkXQWzl3fSfMxFwm1tjTyo9exhkmWSqC4Yhd1+lug++IlR2mvRVAxEDDWYkQdeSztajqgg==", + "dev": true, + "license": "MIT", + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==", "license": "MIT" + }, + "node_modules/zod": { + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } } } } diff --git a/scrapegraph-js/package.json b/scrapegraph-js/package.json index e2d1ffb..e894b2f 100644 --- a/scrapegraph-js/package.json +++ b/scrapegraph-js/package.json @@ -8,6 +8,10 @@ "url": "https://github.com/ScrapeGraphAI/scrapegraph-sdk", "directory": "scrapegraph-js" }, + "scripts": { + "format": "prettier --write --cache --cache-strategy metadata . !dist", + "lint": "eslint ." + }, "license": "MIT", "homepage": "https://github.com/ScrapeGraphAI/scrapegraph-sdk/tree/main/scrapegraph-js", "keywords": [ @@ -27,6 +31,12 @@ "zod": "^3.23.8" }, "devDependencies": { - "dotenv": "^16.4.5" + "@eslint/js": "^9.16.0", + "dotenv": "^16.4.5", + "eslint": "^9.16.0", + "eslint-config-prettier": "^9.1.0", + "eslint-plugin-prettier": "^5.2.1", + "globals": "^15.12.0", + "prettier": "3.4.1" } } From 6a351f3ef70a1f00b5f5de5aaba2f408b6bf07dd Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Sat, 30 Nov 2024 12:53:45 +0100 Subject: [PATCH 18/24] feat: add integration for env variables --- ...mple.py => smartscraper_schema_example.py} | 0 scrapegraph-py/scrapegraph_py/async_client.py | 28 +++++++++++++++++++ scrapegraph-py/scrapegraph_py/client.py | 28 +++++++++++++++++++ 3 files changed, 56 insertions(+) rename scrapegraph-py/examples/{schema_smartscraper_example.py => smartscraper_schema_example.py} (100%) diff --git a/scrapegraph-py/examples/schema_smartscraper_example.py b/scrapegraph-py/examples/smartscraper_schema_example.py similarity index 100% rename from scrapegraph-py/examples/schema_smartscraper_example.py rename to scrapegraph-py/examples/smartscraper_schema_example.py diff --git a/scrapegraph-py/scrapegraph_py/async_client.py b/scrapegraph-py/scrapegraph_py/async_client.py index e7c44e1..d56ba9b 100644 --- a/scrapegraph-py/scrapegraph_py/async_client.py +++ b/scrapegraph-py/scrapegraph_py/async_client.py @@ -54,6 +54,34 @@ def __init__( logger.info("✅ AsyncClient initialized successfully") + @classmethod + def from_env( + cls, + verify_ssl: bool = True, + timeout: float = 120, + max_retries: int = 3, + retry_delay: float = 1.0, + ): + """Initialize AsyncClient using API key from environment variable. + + Args: + verify_ssl: Whether to verify SSL certificates + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + retry_delay: Delay between retries in seconds + """ + from os import getenv + api_key = getenv("SGAI_API_KEY") + if not api_key: + raise ValueError("SGAI_API_KEY environment variable not set") + return cls( + api_key=api_key, + verify_ssl=verify_ssl, + timeout=timeout, + max_retries=max_retries, + retry_delay=retry_delay, + ) + async def _make_request(self, method: str, url: str, **kwargs) -> Any: """Make HTTP request with retry logic.""" for attempt in range(self.max_retries): diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py index 9a54de6..600df2a 100644 --- a/scrapegraph-py/scrapegraph_py/client.py +++ b/scrapegraph-py/scrapegraph_py/client.py @@ -18,6 +18,34 @@ class SyncClient: + @classmethod + def from_env( + cls, + verify_ssl: bool = True, + timeout: float = 30, + max_retries: int = 3, + retry_delay: float = 1.0, + ): + """Initialize SyncClient using API key from environment variable. + + Args: + verify_ssl: Whether to verify SSL certificates + timeout: Request timeout in seconds + max_retries: Maximum number of retry attempts + retry_delay: Delay between retries in seconds + """ + from os import getenv + api_key = getenv("SGAI_API_KEY") + if not api_key: + raise ValueError("SGAI_API_KEY environment variable not set") + return cls( + api_key=api_key, + verify_ssl=verify_ssl, + timeout=timeout, + max_retries=max_retries, + retry_delay=retry_delay, + ) + def __init__( self, api_key: str, From 34777af8fae97a2fa0ba15c854f79b2c4bbac15a Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sat, 30 Nov 2024 11:54:51 +0000 Subject: [PATCH 19/24] ci(release): 1.3.0 [skip ci] ## [1.3.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.2...v1.3.0) (2024-11-30) ### Features * add integration for env variables ([6a351f3](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/6a351f3ef70a1f00b5f5de5aaba2f408b6bf07dd)) --- scrapegraph-py/CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index b1a959f..8b528e9 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.3.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.2...v1.3.0) (2024-11-30) + + +### Features + +* add integration for env variables ([6a351f3](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/6a351f3ef70a1f00b5f5de5aaba2f408b6bf07dd)) + ## [1.2.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.1...v1.2.2) (2024-11-29) From d5ce21809c2d806b853a2bc560bb88a111455ca6 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Sat, 30 Nov 2024 16:17:09 +0000 Subject: [PATCH 20/24] ci(release): 1.4.0 [skip ci] ## [1.4.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.3.0...v1.4.0) (2024-11-30) ### Features * added example of the smartScraper function using a schema ([baf933b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/baf933b0826b63d4ecf61c8593676357619a1c73)) * implemented support for requests with schema ([10a1a5a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/10a1a5a477a6659aabf3afebfffdbefc14d12d3e)) ### Bug Fixes * the "workspace" key has been removed because it was conflicting with the package.json file in the scrapegraph-js folder. ([1299173](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/129917377b6a685d769a480b717bf980d3199833)) ### chore * added Zod package dependency ([ee5738b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/ee5738bd737cd07a553d148403a4bbb5e80e5be3)) ### Docs * added an example of the smartScraper functionality using a schema ([cf2f28f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/cf2f28fa029df0acb7058fde8239046d77ef0a8a)) ### Refactor * code refactoring ([a2b57c7](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/a2b57c7e482dfb5c7c1a125d1684e0367088c83b)) --- scrapegraph-py/CHANGELOG.md | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 8b528e9..7de7591 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,31 @@ +## [1.4.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.3.0...v1.4.0) (2024-11-30) + + +### Features + +* added example of the smartScraper function using a schema ([baf933b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/baf933b0826b63d4ecf61c8593676357619a1c73)) +* implemented support for requests with schema ([10a1a5a](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/10a1a5a477a6659aabf3afebfffdbefc14d12d3e)) + + +### Bug Fixes + +* the "workspace" key has been removed because it was conflicting with the package.json file in the scrapegraph-js folder. ([1299173](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/129917377b6a685d769a480b717bf980d3199833)) + + +### chore + +* added Zod package dependency ([ee5738b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/ee5738bd737cd07a553d148403a4bbb5e80e5be3)) + + +### Docs + +* added an example of the smartScraper functionality using a schema ([cf2f28f](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/cf2f28fa029df0acb7058fde8239046d77ef0a8a)) + + +### Refactor + +* code refactoring ([a2b57c7](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/a2b57c7e482dfb5c7c1a125d1684e0367088c83b)) + ## [1.3.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.2.2...v1.3.0) (2024-11-30) From 690e87b52505f12da172147a78007497f6edf54c Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Mon, 2 Dec 2024 11:13:21 +0100 Subject: [PATCH 21/24] fix: sync client --- scrapegraph-py/scrapegraph_py/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py index 600df2a..e1e4947 100644 --- a/scrapegraph-py/scrapegraph_py/client.py +++ b/scrapegraph-py/scrapegraph_py/client.py @@ -22,7 +22,7 @@ class SyncClient: def from_env( cls, verify_ssl: bool = True, - timeout: float = 30, + timeout: float = 120, max_retries: int = 3, retry_delay: float = 1.0, ): From bc5c6aefcd90840f81e514f45c2e25ac991570b4 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 2 Dec 2024 10:14:26 +0000 Subject: [PATCH 22/24] ci(release): 1.4.1 [skip ci] ## [1.4.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.0...v1.4.1) (2024-12-02) ### Bug Fixes * sync client ([690e87b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/690e87b52505f12da172147a78007497f6edf54c)) ### chore * set up eslint and prettier for code linting and formatting ([13cf1e5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/13cf1e5c28ec739d2d35617bd57d7cf8203c3f7e)) --- scrapegraph-py/CHANGELOG.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 7de7591..9c9b10f 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,15 @@ +## [1.4.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.0...v1.4.1) (2024-12-02) + + +### Bug Fixes + +* sync client ([690e87b](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/690e87b52505f12da172147a78007497f6edf54c)) + + +### chore + +* set up eslint and prettier for code linting and formatting ([13cf1e5](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/13cf1e5c28ec739d2d35617bd57d7cf8203c3f7e)) + ## [1.4.0](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.3.0...v1.4.0) (2024-11-30) From 589aa49d4434f7112a840d178e5e48918b7799e1 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra Date: Mon, 2 Dec 2024 11:18:34 +0100 Subject: [PATCH 23/24] fix: timeout --- scrapegraph-py/scrapegraph_py/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py index e1e4947..30ce15c 100644 --- a/scrapegraph-py/scrapegraph_py/client.py +++ b/scrapegraph-py/scrapegraph_py/client.py @@ -50,7 +50,7 @@ def __init__( self, api_key: str, verify_ssl: bool = True, - timeout: float = 30, + timeout: float = 120, max_retries: int = 3, retry_delay: float = 1.0, ): From fb6a0792265a3263c39cdd5456ba5298df9e79f6 Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 2 Dec 2024 10:19:35 +0000 Subject: [PATCH 24/24] ci(release): 1.4.2 [skip ci] ## [1.4.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.1...v1.4.2) (2024-12-02) ### Bug Fixes * timeout ([589aa49](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/589aa49d4434f7112a840d178e5e48918b7799e1)) --- scrapegraph-py/CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scrapegraph-py/CHANGELOG.md b/scrapegraph-py/CHANGELOG.md index 9c9b10f..516e8cf 100644 --- a/scrapegraph-py/CHANGELOG.md +++ b/scrapegraph-py/CHANGELOG.md @@ -1,3 +1,10 @@ +## [1.4.2](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.1...v1.4.2) (2024-12-02) + + +### Bug Fixes + +* timeout ([589aa49](https://github.com/ScrapeGraphAI/scrapegraph-sdk/commit/589aa49d4434f7112a840d178e5e48918b7799e1)) + ## [1.4.1](https://github.com/ScrapeGraphAI/scrapegraph-sdk/compare/v1.4.0...v1.4.1) (2024-12-02)