Skip to content

Fix issues and run scraper #13

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20,240 changes: 8,883 additions & 11,357 deletions data/classes.json

Large diffs are not rendered by default.

1,738 changes: 1,133 additions & 605 deletions data/defines.json

Large diffs are not rendered by default.

1,213 changes: 421 additions & 792 deletions package-lock.json

Large diffs are not rendered by default.

14 changes: 7 additions & 7 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,17 @@
"test": "node ./node_modules/vscode/bin/test"
},
"dependencies": {
"lodash": "^4.17.11"
"lodash": "^4.17.21"
},
"devDependencies": {
"@types/lodash": "^4.14.134",
"@types/lodash": "^4.14.168",
"@types/mocha": "^5.2.7",
"@types/node": "^12.0.7",
"@types/node": "^12.20.10",
"html-to-text": "^5.1.1",
"mocha": "^6.1.4",
"mocha": "^6.2.3",
"osmosis": "^1.1.10",
"ts-node": "^8.2.0",
"typescript": "^3.5.1",
"vscode": "^1.1.34"
"ts-node": "^8.10.2",
"typescript": "^3.9.9",
"vscode": "^1.1.37"
}
}
337 changes: 172 additions & 165 deletions scraper/classes.ts
Original file line number Diff line number Diff line change
@@ -1,177 +1,184 @@
import * as osmosis from "osmosis"
import * as fs from "fs"
import * as htmlToText from "html-to-text"
import * as _ from "lodash"
import * as osmosis from "osmosis";
import * as fs from "fs";
import * as htmlToText from "html-to-text";
import * as _ from "lodash";

import { splitFnRegex, arrayToObject, parseHtml } from "./utils"
import config from "./config"
import { splitFnRegex, arrayToObject, parseHtml } from "./utils";
import config from "./config";
import { FactorioType, FactorioTypeMap } from "../src/types";

const { keys } = Object
const { isArray } = Array
const { keys } = Object;
const { isArray } = Array;

const URL = config.baseUrl + "/Classes.html"
const URL = config.baseUrl + "/Classes.html";

export const scrape = () => {
let classes: FactorioTypeMap = {}

return new Promise((resolve, reject) => {
osmosis
.get(URL)
.find("body > div.brief-listing > div.brief-listing")
.set({
name: "span.type-name > a",
type: "span.type-name > a",
inherits: ["div:contains('Inherited from')"]
})
.then((context, data, next) => {
console.log("current: ", data.name)
classes[data.name] = data
next(context, {})
})
.fail("a:nth-child(2):contains('ControlBehavior')")
.select("span.type-name > a")
.follow("@href")
.find("body > div.brief-listing > div.brief-listing")
let classes: FactorioTypeMap = {};

return new Promise((resolve, reject) => {
osmosis
.get(URL)
.find("body > div.brief-listing > div.brief-listing")
.set({
name: "span.type-name > a",
type: "span.type-name > a",
inherits: ["div:contains('Inherited from')"],
})
.then((context, data, next) => {
console.log("current: ", data.name);
classes[data.name] = data;
next(context, {});
})
.fail("a:nth-child(2):contains('ControlBehavior')")
.select("span.type-name > a")
.follow("@href")
.find("body > div.brief-listing > div.brief-listing")
.set({
name: "span.type-name",
properties: [
osmosis.find("table.brief-members > tr").set({
name: "span.element-name > a",
type: "span.param-type > a",
mode: "span.attribute-mode",
doc: "td.description:not(:empty)",
}),
],
})
.then((document, data, next) => {
_.merge(classes[data.name], data);
next(document.querySelector("#" + data.name), {});
})
.set({
name: "node() !> div@id", // gets the id of parent div.element (!> is parent)
doc: "p:first",
properties: [
osmosis
.find("div.element")
.set({
name: "span.type-name",
properties: [
osmosis
.find("table.brief-members > tr")
.set({
name: "span.element-name > a",
type: "span.param-type > a",
mode: "span.attribute-mode",
doc: "td.description:not(:empty)"
})
]
})
.then((document, data, next) => {
_.merge(classes[data.name], data)
next(document.querySelector("#" + data.name), {})
})
.set({
name: "node() !> div@id", // gets the id of parent div.element (!> is parent)
doc: "p:first",
properties: [
osmosis
.find("div.element")
.set({
name: "span.element-name",
type: "span.param-type:first",
returns: "span.return-type > span.param-type",
args: [
osmosis
.find("div.element-content .detail-header:contains(Parameters) + .detail-content > div")
.set({
name: "span.param-name",
// TODO: Missing types. e.g. LuaCircuitNetwork.get_signal(SignalID)
type: "span.param-type:first"
})
.set("doc", (context) => {
return parseHtml(context.innerHTML)
})
]
})
.select("div.element-content")
.set("doc", (context) => {
return parseHtml(context.innerHTML)
})
]
})
.then((context, data, next) => {
const notString = (obj) => !_.isEmpty(obj) && typeof obj !== "string"

data.properties = data.properties.filter(notString)

data.properties.forEach((prop) => {
if (_.isEmpty(prop.doc)) {
delete prop.doc
}

// Check if this is a function
if (splitFnRegex.test(prop.name)) {
let [__, fnName] = prop.name.match(splitFnRegex)
prop.name = fnName
prop.type = "function"
}

if (!_.isArray(prop.args)) {
prop.args = [prop.args]
}

prop.args = _(prop.args)
.filter(notString)
.each((arg) => {
if (!arg.type) {
arg.type = arg.name
}
})

if (prop.args.length === 0) {
delete prop.args
}
})

_.merge(classes[data.name], data)

next(context, {})
name: "span.element-name",
type: "span.param-type:first",
returns: "span.return-type > span.param-type",
args: [
osmosis
.find(
"div.element-content .detail-header:contains(Parameters) + .detail-content > div"
)
.set({
name: "span.param-name",
// TODO: Missing types. e.g. LuaCircuitNetwork.get_signal(SignalID)
type: "span.param-type:first",
})
.set("doc", (context) => {
return parseHtml(context.innerHTML);
}),
],
})
.log((msg) => {
if (/follow|find/.test(msg)) return
console.log(msg)
})
.error(console.log)
//.debug(console.log)
.done(() => {
_.forOwn(classes, (type, key) => {
_.forEach(type.properties, (prop: FactorioType) => {
if (prop.args) {
prop.args = arrayToObject(prop.args as any)
}
})

if (type.inherits.length) {
handleInheritance(type, classes)
}

type.properties = arrayToObject(type.properties as any)

// comment for debugging
// delete type.inherits
})

// Sort classes by key
classes = _(classes).toPairs().sortBy(0).fromPairs().value()
console.log(`done: ${Object.keys(classes).length} classes`)
resolve(classes)
})
})
}

const handleInheritance = (type, typeMap) => {
// Merge properties from parent classes
type.inherits.reduce((type, inheritStr: string) => {
let [__, fromStr, propsStr] = inheritStr.match(/Inherited from (\w+): (.*)/)
const from = typeMap[fromStr]

if (!from) {
console.error(`${type.name} inherits from ${from}, but ${from} doesn't exit!`)
return type
}
.select("div.element-content")
.set("doc", (context) => {
return parseHtml(context.innerHTML);
}),
],
})
.then((context, data, next) => {
const notString = (obj) => !_.isEmpty(obj) && typeof obj !== "string";

data.properties = data.properties.filter(notString);

data.properties.forEach((prop) => {
if (_.isEmpty(prop.doc)) {
delete prop.doc;
}

// Check if this is a function
if (splitFnRegex.test(prop.name)) {
let [__, fnName] = prop.name.match(splitFnRegex);
prop.name = fnName;
prop.type = "function";
}

if (!_.isArray(prop.args)) {
prop.args = [prop.args];
}

prop.args = _(prop.args)
.filter(notString)
.each((arg) => {
if (!arg.type) {
arg.type = arg.name;
}
});

if (prop.args.length === 0) {
delete prop.args;
}
});

_.merge(classes[data.name], data);

next(context, {});
})
.log((msg) => {
if (/follow|find/.test(msg)) return;
console.log(msg);
})
.error(console.log)
//.debug(console.log)
.done(() => {
_.forOwn(classes, (type, key) => {
// @ts-ignore
type.properties = type.properties || [];
_.forEach(type.properties, (prop: FactorioType) => {
if (prop.args) {
prop.args = arrayToObject(prop.args as any);
}
});

let propsStrArr = propsStr.split(", ")
if (type.inherits.length) {
handleInheritance(type, classes);
}

propsStrArr.forEach((propStr) => {
let fromProp = from.properties[propStr]
type.properties = arrayToObject(type.properties as any);

if (!fromProp) {
console.error(`${from.name} has no property ${propStr}`)
return
}
// comment for debugging
// delete type.inherits
});

type.properties.push(fromProp)
})
// Sort classes by key
classes = _(classes).toPairs().sortBy(0).fromPairs().value();
console.log(`done: ${Object.keys(classes).length} classes`);
resolve(classes);
});
});
};

return type
}, type)
}
const handleInheritance = (type, typeMap) => {
// Merge properties from parent classes
type.inherits.reduce((type, inheritStr: string) => {
let [__, fromStr, propsStr] = inheritStr.match(
/Inherited from (\w+): (.*)/
);
const from = typeMap[fromStr];

if (!from) {
console.error(
`${type.name} inherits from ${from}, but ${from} doesn't exit!`
);
return type;
}

let propsStrArr = propsStr.split(", ");

propsStrArr.forEach((propStr) => {
let fromProp = from.properties[propStr];

if (!fromProp) {
console.error(`${from.name} has no property ${propStr}`);
return;
}

type.properties.push(fromProp);
});

return type;
}, type);
};
Loading