Created
June 6, 2025 02:22
-
-
Save thuykaka/d3df57f14042a7d5786047f3e976841d to your computer and use it in GitHub Desktop.
Crawl meta ad library
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| var session_id = self.crypto.randomUUID(); | |
| var lsd = require("LSD").token; | |
| var dtsg = require("DTSGInitialData").token; | |
| var uid = require("CurrentUserInitialData").USER_ID; | |
| var params = { | |
| "q":"loose weight", | |
| "countries[0]":"US", | |
| "count":30, | |
| "session_id":session_id, | |
| "active_status":"all", | |
| "ad_type":"all", | |
| "media_type":"all", | |
| "search_type":"keyword_unordered" | |
| }; | |
| var qs = new URLSearchParams(params).toString(); | |
| var url = `https://www.facebook.com/ads/library/async/search_ads/?${qs}`; | |
| var f = await fetch(url, { | |
| "credentials": "include", | |
| "headers": { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 YellowWeb", | |
| "Accept": "*/*", | |
| "Accept-Language": "vi-VN;q=0.7,en;q=0.3", | |
| "Content-Type": "application/x-www-form-urlencoded", | |
| "X-FB-LSD": lsd, | |
| "Sec-Fetch-Dest": "empty", | |
| "Sec-Fetch-Mode": "cors", | |
| "Sec-Fetch-Site": "same-origin", | |
| }, | |
| "body": `__user=${uid}&__a=1&fb_dtsg=${dtsg}&lsd=${lsd}`, | |
| "method": "POST", | |
| "mode": "cors" | |
| }); | |
| var resp = await f.text(); | |
| console.log(resp); |
Author
Script này cũ rồi để mai mình update
thanks guy @thuykaka đã star nha
@thuykaka Any new updates?
Author
@hashcott bạn xem đỡ file này của mình nhé, cơ bản lúc ban đầu nó sẽ gọi với docId khác và parse kiểu khác, lúc sau nó sẽ gọi docId mới kèm cursor:
config.ts
----------------------------
const config = {
urls: {
base: 'https://www.facebook.com',
},
defaultRequestHeaders: {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36',
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'accept-language': 'en-US,en;q=0.9',
'content-type': 'text/html; charset="utf-8"',
'cache-control': 'max-age=0',
'Upgrade-Insecure-Requests': '1',
'Sec-Fetch-Dest': 'document',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-Site': 'none',
'Sec-Fetch-User': '?1',
},
defaultRequestParams: {
__user: '0',
__a: '1',
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: 'AdLibraryMobileFocusedStateProviderRefetchQuery',
server_timestamps: 'true',
__jssesw: '1',
},
};
export default config;import qs from 'qs';
import { type AxiosResponse, type AxiosRequestConfig } from 'axios';
import { Proxy922s5, ProxyPlainText } from './proxy';
import config from '../config';
import { Logger, Request, Utils } from '../utils';
import { FileSaver } from './saver';
type Ads = {
page_id: number;
page_name: string;
};
type SearchResults = {
count?: number;
edges?: {
node: {
collated_results: Ads[];
};
}[];
page_info: {
has_next_page: boolean;
end_cursor: string;
};
};
type Platform = 'FACEBOOK' | 'INSTAGRAM' | 'AUDIENCE_NETWORK' | 'MESSENGER' | 'WHATSAPP' | 'OCULUS' | 'THREADS';
// https://www.facebook.com/ads/library/api/?source=nav-header
export type GetAdsRequest = {
adType?: 'ALL' | 'EMPLOYMENT_ADS' | 'FINANCIAL_PRODUCTS_AND_SERVICES_ADS' | 'HOUSING_ADS' | 'POLITICAL_AND_ISSUE_ADS';
page?: number;
query: string;
searchType?: 'KEYWORD_UNORDERED' | 'KEYWORD_EXACT_PHRASE';
cursor?: string;
contentLanguages?: string[];
activeStatus?: 'ACTIVE' | 'INACTIVE' | 'ALL';
mediaType?: 'ALL' | 'IMAGE' | 'MEME' | 'VIDEO' | 'NONE';
publisherPlatforms?: Platform[];
country?: string;
startDate?: {
min?: string; // 2018-06-15
max?: string; // 2025-06-07
};
maxRecords?: number;
};
class MetaAds {
private request: Request;
private sessionId?: string;
private mapQueryParams: Record<string, unknown> = {};
private adsData: Map<number, string> = new Map();
private context: string = '';
constructor(private debug = false) {
this.request = new Request(
{
baseURL: config.urls.base,
headers: config.defaultRequestHeaders,
},
this.debug
);
}
clearAdsData(): void {
this.adsData.clear();
}
private updateCookies(cookies?: string[]): void {
if (!cookies) return;
for (let cookie of cookies) {
if (typeof cookie !== 'string') continue;
const cookiePair = cookie.split(';')[0];
if (cookiePair?.startsWith('datr=')) {
const cookieValue = `${cookiePair}; dpr=1.53; wd=1920x1080`;
Logger.info(`[${this.context}] - set cookies: ${cookieValue}`);
return this.request.setHeaders({ Cookies: cookieValue });
}
}
}
private updateMapQueryParams(data?: string): void {
if (!data) return;
const scriptText = data.match(
/<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>\{"require":\[\["ScheduledServerJS","handle"[^<]*(?:<\/script>|$)/
)?.[0];
if (scriptText) {
const jsonString = scriptText.replace(/<script type="application\/json"(?:\s+data-content-len="\d+")?\s+data-sjs>/, '').replace(/<\/script>|$/, '');
try {
const jsonData = JSON.parse(jsonString);
const defineData =
Utils.getLastItemFromArray<Array<{ __bbox: { define?: [string, unknown[], Record<string, unknown>, unknown][] } }>>(jsonData.require?.[0]) || [];
for (let define of defineData) {
const box = define?.__bbox?.define || [];
for (let item of box) {
if (item[0] === 'LSD') {
this.mapQueryParams['lsd'] = item[2]['token'] as string;
} else if (item[0] === 'WebConnectionClassServerGuess') {
this.mapQueryParams['__ccg'] = item[2]['connectionClass'] as string;
} else if (item[0] === 'GetAsyncParamsExtraData') {
this.mapQueryParams['__aaid'] = (item[2]['extra_data'] as Record<string, string>)['__aaid'];
} else if (item[0] === 'SiteData') {
const dict = item[2] as Record<string, string>;
this.mapQueryParams['__hs'] = dict['haste_session'];
this.mapQueryParams['__rev'] = dict['__spin_r'];
this.mapQueryParams['__hsi'] = dict['hsi'];
this.mapQueryParams['__spin_r'] = dict['__spin_r'];
this.mapQueryParams['__spin_b'] = dict['__spin_b'];
this.mapQueryParams['__spin_t'] = dict['__spin_t'];
}
}
}
} catch (err: any) {
Logger.error(`[${this.context}] - Failed to parse script data: ${err.message}`);
}
}
}
private updateSessionId(data?: string): void {
this.sessionId = data?.split('sessionId')?.[1]?.split(':')?.[1]?.split(',')?.[0]?.replaceAll('"', '');
}
private async createSession(): Promise<void> {
const params = {
active_status: 'all',
ad_type: 'all',
country: 'ALL',
is_targeted_country: 'false',
media_type: 'all',
};
const response = await this.request.send<AxiosResponse>({
url: '/ads/library/',
params,
keepRawResponse: true,
});
const cookies = response?.headers?.['set-cookie'];
this.updateCookies(cookies);
this.updateMapQueryParams(response?.data);
this.updateSessionId(response?.data);
}
public async refreshSession() {
Logger.info(`[${this.context}] - refresh session, current session id: ${this.sessionId}`);
await this.createSession();
Logger.success(`[${this.context}] - refresh session done, new session id: ${this.sessionId}`);
}
private buildQueryParamVariables(req: GetAdsRequest): string {
const variables = {
activeStatus: req.activeStatus || 'ALL',
adType: req.adType || 'ALL',
audienceTimeframe: 'LAST_7_DAYS',
bylines: [],
contentLanguages: req.contentLanguages || [],
countries: ['ALL'],
country: req.country || 'ALL',
excludedIDs: [],
fetchPageInfo: false,
fetchSharedDisclaimers: false,
isTargetedCountry: false,
location: null,
mediaType: req.mediaType || 'ALL',
multiCountryFilterMode: null,
pageIDs: [],
first: 30,
potentialReachInput: [],
publisherPlatforms: req.publisherPlatforms || [],
queryString: req.query,
...(req.cursor && { cursor: req.cursor }),
regions: [],
searchType: req.searchType || 'KEYWORD_UNORDERED',
sessionID: this.sessionId || '',
sortData: null,
...(req.startDate && {
startDate: {
min: req.startDate?.min || null,
max: req.startDate?.max || null,
},
}),
viewAllPageID: '0',
};
return JSON.stringify(variables);
}
private buildQueryParams(req: GetAdsRequest): string {
const isFirstPage = req.page === 1;
const params = {
doc_id: isFirstPage ? '24456302960624351' : '24394279933540792',
server_timestamps: 'true',
fb_api_caller_class: 'RelayModern',
fb_api_req_friendly_name: isFirstPage ? 'AdLibraryMobileFocusedStateProviderRefetchQuery' : 'AdLibrarySearchPaginationQuery',
variables: this.buildQueryParamVariables(req),
__jssesw: '1',
__spin_t: this.mapQueryParams['__spin_t'],
__spin_b: this.mapQueryParams['__spin_b'],
__spin_r: this.mapQueryParams['__spin_r'],
lsd: this.mapQueryParams['lsd'],
__comet_req: '1',
__hsi: this.mapQueryParams['__hsi'],
__rev: this.mapQueryParams['__rev'],
__ccg: this.mapQueryParams['__ccg'],
dpr: '1',
__hs: this.mapQueryParams['__hs'],
__req: '17',
__a: '1',
__user: '0',
__aaid: '0',
av: '0',
};
return qs.stringify(params);
}
private buildQueryHeaders() {
const headers: AxiosRequestConfig['headers'] = {
'content-type': 'application/x-www-form-urlencoded',
refer: 'https://www.facebook.com/ads/library/?active_status=all&ad_type=all&country=ALL&is_targeted_country=false&media_type=all',
accept: '*/*',
'x-asbd-id': '359341',
'x-fb-friendly-name': 'AdLibraryMobileFocusedStateProviderRefetchQuery',
'sec-fetch-site': 'same-origin',
'sec-fetch-mode': 'cors',
'sec-fetch-dest': 'empty',
'sec-gpc': '1',
...(!!this.mapQueryParams?.lsd && { 'x-fb-lsd': this.mapQueryParams.lsd as string }),
};
return headers;
}
private async queryAds(req: GetAdsRequest, maxRetries = 3): Promise<SearchResults | undefined> {
try {
const payload = {
url: `/api/graphql/`,
method: 'post',
headers: this.buildQueryHeaders(),
data: this.buildQueryParams(req),
};
Logger.info(`[${this.context}] - query cursor: ${Utils.truncateLongText(req.cursor ?? '--')}`);
const response = await this.request.send<string | any>(payload);
if (!response) throw new Error('Empty response');
let searchResults: SearchResults | undefined;
if (typeof response === 'string') {
const responseJsonString = `{"label": "AdLibraryMobileFocusedStateProvider${response.split('AdLibraryMobileFocusedStateProvider')?.[1]}`;
const responseJson = JSON.parse(responseJsonString);
searchResults = responseJson?.data?.ad_library_main?.search_results_connection;
} else if (typeof response === 'object' && response?.data?.ad_library_main?.search_results_connection) {
searchResults = response?.data?.ad_library_main?.search_results_connection;
}
if (!searchResults || searchResults.count === 0 || (searchResults.edges && searchResults.edges.length === 0)) {
Logger.info(`[${this.context}] - ${JSON.stringify(searchResults)}`);
throw new Error('Empty response');
}
return searchResults;
} catch (err: any) {
const retry = maxRetries - 1;
if (retry > 0) {
Logger.error(`[${this.context}] - query ads failed, error: ${err.message}, retry ${retry} -> retry`);
await this.refreshSession();
return await this.queryAds(req, retry);
} else {
Logger.error(`[${this.context}] - query ads failed, error: ${err.message} -> stop`);
return;
}
}
}
private parseAndSaveAds({ edges }: SearchResults, saver: FileSaver, page: number) {
if (!edges) {
Logger.info(`[${this.context}] - Not found any ads`);
return;
}
let addCount = 0;
for (let ads of edges) {
const node = ads.node;
const { page_id, page_name } = node.collated_results[0] || {};
if (page_id && page_name) {
if (!this.adsData.has(page_id)) {
this.adsData.set(page_id, page_name);
saver.write(`${page_id}\n`);
Logger.success(`[${this.context}] - found ads: page id: ${page_id} - page name: ${page_name}`);
addCount++;
}
}
}
Logger.info(`[${this.context}] - page: ${page} - add ${addCount} ads, total ads: ${this.adsData.size}`);
}
public async getAds(req: GetAdsRequest) {
try {
const startTime = Date.now();
Logger.info(`Get ads: ${JSON.stringify(req)}`);
this.clearAdsData();
this.context = `query: ${req.query}`;
const fileSaver = new FileSaver(`ads-${req.query}`);
if (!this.sessionId) await this.createSession();
let info: {
cursor?: string;
hasNext: boolean;
count: number;
page: number;
} = {
hasNext: true,
count: 0,
page: 1,
};
do {
const payload = { ...req, cursor: info.cursor, page: info.page };
const response = await this.queryAds(payload);
if (!response) {
Logger.info(`[${this.context}] - No results found.`);
break;
}
if (!info.count) {
info.count = response.count || 0;
Logger.info(`[${this.context}] - total found ads: ${info.count}`);
}
this.parseAndSaveAds(response, fileSaver, info.page);
if (req.maxRecords && this.adsData.size >= req.maxRecords) {
Logger.info(`[${this.context}] - max records reached: ${req.maxRecords} -> stop`);
break;
}
info.hasNext = response.page_info?.has_next_page && !!response.page_info?.end_cursor;
info.cursor = response.page_info?.end_cursor;
info.page++;
if (!info.hasNext) {
Logger.info(`[${this.context}] - no more ads -> stop, page_info: ${JSON.stringify(response.page_info)}`);
}
} while (info.hasNext);
fileSaver.close();
Logger.success(`[${this.context}] - get ads done, total ads: ${this.adsData.size}, took: ${Date.now() - startTime}ms`);
} catch (err: any) {
Logger.error(`Error when get ads of query: ${req.query}, error: ${err.message}`);
}
}
}
export default MetaAds;oh, thanks guy. i will try it
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@thuykaka bạn có biết cách nào để next page không ? hiện tại nó chỉ trả về data page đầu tiên