Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import scrapegraphai from "../../scrapegraphai.app.mjs";

export default {
key: "scrapegraphai-start-local-scraper",
name: "Start Local Scraper",
description: "Extract content from HTML content using AI by providing a natural language prompt and the HTML content. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/localscraper/start)",
version: "0.0.1",
type: "action",
props: {
scrapegraphai,
html: {
type: "string",
label: "HTML",
description: "The HTML to scrape",
},
prompt: {
propDefinition: [
scrapegraphai,
"prompt",
],
},
waitForCompletion: {
propDefinition: [
scrapegraphai,
"waitForCompletion",
],
},
},
async run({ $ }) {
let response = await this.scrapegraphai.startLocalScraper({
$,
data: {
website_html: this.html,
user_prompt: this.prompt,
},
});

if (this.waitForCompletion) {
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
while (response.status !== "completed" && response.status !== "failed") {
response = await this.scrapegraphai.getLocalScraperStatus({
$,
requestId: response.request_id,
});
await timer(3000);
}
}

if (response.status !== "failed") {
$.export("$summary", `Successfully ${this.waitForCompletion
? "completed"
: "started" } scraping HTML.`);
}
return response;
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import scrapegraphai from "../../scrapegraphai.app.mjs";

export default {
key: "scrapegraphai-start-markdownify",
name: "Start Markdownify",
description: "Convert any webpage into clean, readable Markdown format. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/markdownify/start)",
version: "0.0.1",
type: "action",
props: {
scrapegraphai,
url: {
propDefinition: [
scrapegraphai,
"url",
],
description: "The URL of the website to convert into markdown",
},
waitForCompletion: {
propDefinition: [
scrapegraphai,
"waitForCompletion",
],
},
},
async run({ $ }) {
let response = await this.scrapegraphai.startMarkdownify({
$,
data: {
website_url: this.url,
},
});

if (this.waitForCompletion) {
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
while (response.status !== "completed" && response.status !== "failed") {
response = await this.scrapegraphai.getMarkdownifyStatus({
$,
requestId: response.request_id,
});
await timer(3000);
}
}

if (response.status !== "failed") {
$.export("$summary", `Successfully ${this.waitForCompletion
? "completed"
: "started" } converting ${this.url} to markdown.`);
}
return response;
},
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import scrapegraphai from "../../scrapegraphai.app.mjs";

export default {
key: "scrapegraphai-start-smart-scraper",
name: "Start Smart Scraper",
description: "Extract content from a webpage using AI by providing a natural language prompt and a URL. [See the documentation](https://docs.scrapegraphai.com/api-reference/endpoint/smartscraper/start).",
version: "0.0.1",
type: "action",
props: {
scrapegraphai,
url: {
propDefinition: [
scrapegraphai,
"url",
],
},
prompt: {
propDefinition: [
scrapegraphai,
"prompt",
],
},
waitForCompletion: {
propDefinition: [
scrapegraphai,
"waitForCompletion",
],
},
},
async run({ $ }) {
let response = await this.scrapegraphai.startSmartScraper({
$,
data: {
website_url: this.url,
user_prompt: this.prompt,
},
});

if (this.waitForCompletion) {
const timer = (ms) => new Promise((res) => setTimeout(res, ms));
while (response.status !== "completed" && response.status !== "failed") {
response = await this.scrapegraphai.getSmartScraperStatus({
$,
requestId: response.request_id,
});
await timer(3000);
}
}

if (response.status !== "failed") {
$.export("$summary", `Successfully ${this.waitForCompletion
? "completed"
: "started" } scraping ${this.url}.`);
}
return response;
},
};
7 changes: 5 additions & 2 deletions components/scrapegraphai/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pipedream/scrapegraphai",
"version": "0.0.1",
"version": "0.1.0",
"description": "Pipedream ScrapeGraphAI Components",
"main": "scrapegraphai.app.mjs",
"keywords": [
Expand All @@ -11,5 +11,8 @@
"author": "Pipedream <[email protected]> (https://pipedream.com/)",
"publishConfig": {
"access": "public"
},
"dependencies": {
"@pipedream/platform": "^3.0.3"
}
}
}
84 changes: 80 additions & 4 deletions components/scrapegraphai/scrapegraphai.app.mjs
Original file line number Diff line number Diff line change
@@ -1,11 +1,87 @@
import { axios } from "@pipedream/platform";

export default {
type: "app",
app: "scrapegraphai",
propDefinitions: {},
propDefinitions: {
url: {
type: "string",
label: "URL to Scrape",
description: "The URL of the website to scrape.",
},
prompt: {
type: "string",
label: "Prompt",
description: "A prompt describing what you want to extract. Example: `Extract info about the company`",
},
waitForCompletion: {
type: "boolean",
label: "Wait For Completion",
description: "Set to `true` to poll the API in 3-second intervals until the request is completed",
optional: true,
},
},
methods: {
// this.$auth contains connected account data
authKeys() {
console.log(Object.keys(this.$auth));
_baseUrl() {
return "https://api.scrapegraphai.com/v1";
},
_makeRequest({
$ = this,
path,
...opts
}) {
return axios($, {
url: `${this._baseUrl()}${path}`,
headers: {
"sgai-apikey": `${this.$auth.api_key}`,
},
...opts,
});
},
startSmartScraper(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/smartscraper",
...opts,
});
},
getSmartScraperStatus({
requestId, ...opts
}) {
return this._makeRequest({
path: `/smartscraper/${requestId}`,
...opts,
});
},
startLocalScraper(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/localscraper",
...opts,
});
},
getLocalScraperStatus({
requestId, ...opts
}) {
return this._makeRequest({
path: `/localscraper/${requestId}`,
...opts,
});
},
startMarkdownify(opts = {}) {
return this._makeRequest({
method: "POST",
path: "/markdownify",
...opts,
});
},
getMarkdownifyStatus({
requestId, ...opts
}) {
return this._makeRequest({
path: `/markdownify/${requestId}`,
...opts,
});
},
},
};
9 changes: 6 additions & 3 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading