Skip to content

Instantly share code, notes, and snippets.

@dderevjanik
Created June 29, 2018 12:28
Show Gist options
  • Select an option

  • Save dderevjanik/3bf7822c09e75397d1bebcde333db82f to your computer and use it in GitHub Desktop.

Select an option

Save dderevjanik/3bf7822c09e75397d1bebcde333db82f to your computer and use it in GitHub Desktop.
Scrap-ts
import { load } from "cheerio";
type QueryData = {
[property: string]: Select | List<any>;
};
type Select = {
// --- Internal ---
type: "SELECT";
result: string;
// ---Additional---
selector: string;
options?: Partial<{
attr: string;
}>;
};
type List<T extends object> = {
// --- Internal ---
type: "LIST";
result: Array<T>;
// ---Additional---
selector: string;
data: QueryData;
};
type Query = QueryData;
type GetTypeFromQuery<Q extends QueryData> = { [P in keyof Q]: Q[P]["result"] };
const scrapObject = <Q extends QueryData>(
$: CheerioStatic,
context: string,
queryData: Q,
ref: any // object
): GetTypeFromQuery<Q> => {
Object.entries(queryData).forEach(([prop, val]) => {
if (val.type === "SELECT") {
const el = $(val.selector, context);
if (val.options && val.options.attr) {
ref[prop] = el.attr(val.options.attr);
} else {
ref[prop] = el.text();
}
} else if (val.type === "LIST") {
const result: GetTypeFromQuery<typeof val.data>[] = [];
const els = $(val.selector);
for (let i = 0; i < els.length; i++) {
const el = els.eq(i);
const scrapedData = scrapObject($, el as any, val.data, {});
result.push(scrapedData);
}
ref[prop] = result;
} else {
throw new Error(`Unexpected property type '${val}'`);
}
});
return ref as GetTypeFromQuery<Q>;
};
export const scrap = <Q extends Query>(html: string, query: Q): GetTypeFromQuery<Q> => {
const $ = load(html);
const result = scrapObject($, "", query, {});
return result as GetTypeFromQuery<Q>;
};
export const Q = {
/**
* @param selector - css selector
*/
select: (selector: string): Select => ({ type: "SELECT", selector, result: "" }),
/**
* @param selector - css selector for list of items
* @param data - query per item
*/
list: <Q extends QueryData>(selector: string, data: Q): List<Q> => ({ type: "LIST", result: [data], selector, data })
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment