Skip to content

Instantly share code, notes, and snippets.

@jools-uk
Last active September 12, 2024 14:36
Show Gist options
  • Select an option

  • Save jools-uk/82a79e69f858ff9755d87edd1378ede1 to your computer and use it in GitHub Desktop.

Select an option

Save jools-uk/82a79e69f858ff9755d87edd1378ede1 to your computer and use it in GitHub Desktop.
Tapermonkey AliExpress Order scraper
// ==UserScript==
// @name Ali Express Order Downloader
// @namespace https://gist.github.com/
// @version 0.1
// @description Retrieve Aliexpress order information and export as CSV via clipboard - 2023 version
// @author jools-uk
// @match https://www.aliexpress.com/p/order/index.html*
// @match https://www.aliexpress.com/p/order/detail.html*
// @grant unsafeWindow
// @grant GM_xmlhttpRequest
// @grant GM_setClipboard
// @grant GM_openInTab
// @grant GM_getTabs
// @grant GM_getTab
// @grant GM_saveTab
// @require http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js
// @require https://code.jquery.com/jquery-latest.js
// @require https://www.17track.net/externalcall.js
// @require https://cdnjs.cloudflare.com/ajax/libs/alasql/0.4.11/alasql.min.js
// ==/UserScript==
/* globals jQuery, $ */
// How this script works:
// If I'm loaded as the Order index page (eg https://www.aliexpress.com/p/order/index.html), then present the user with 2 buttons
// - one the scrape order IDs from current loaded order list and then do some work to fetch order details
// - one to output to a CSV file
//
// Getting the Order Ids from current page is fairly easy - just look for objects of type 'order-item'
// Getting the Order Details is harder
// - we need to load that page in a separate tab, have that tab scrape its own data, and then signal to the parent script with the data
// - use of GM_getTab and GM_saveTab and loops with sleeps to signal data back
// On completition of each tab, save locally
// Deliberatly this is done in a sequence with sleep rather than spanwing lots of Promises - deliberatly don't want to spam Ali servers
// Loosely based on the script from here:
// https://khaz.me/an-easier-way-to-download-your-aliexpress-order-history/
function parseDate(txt)
{
// May. 21 2019
const re = new RegExp('(\\w{3}) (\\d*), (\\d{4})');
const res = txt.match(re);
console.log(res);
return new Date(res[3], ("JanFebMarAprMayJunJulAugSepOctNovDec".indexOf(res[1]))/3, res[2]);
}
function parseOrderId(txt)
{
const re = new RegExp(' (\\d*)Copy$');
const res = txt.match(re);
console.log(res);
return res[1];
}
function parseOrderAmount(txt)
{
const re = new RegExp('(\\d*\.\\d*)$');
const res = txt.match(re);
// console.log(res);
return Number(res[1]);
}
function parseItemPrice(txt)
{
const re = new RegExp('(\\d*\.\\d*)x\\d*$');
const res = txt.match(re);
// console.log(res);
return Number(res[1]);
}
function parseItemCount(txt)
{
const re = new RegExp('\\d*\.\\d*x(\\d)*$');
const res = txt.match(re);
// console.log(res);
return Number(res[1]);
}
const sleep = ms => new Promise(r => setTimeout(r, ms));
//The main Order Details worker, called when order details page loads (we wait a little while for full load)
var items=[];
const url = new URL(location.href);
var orderId = url.searchParams.get('orderId');
async function worker()
{
await sleep(5000);
var inum = 0;
$(".order-detail-item-content-wrap").each((idx,oi)=>{
let item = {
idx: ++inum,
product_id: $(oi).find(".item-title").text().trim(),
title: $(oi).find(".item-title").text().trim(),
skuAttr: $(oi).find(".item-sku-attr").text().trim(),
url: $(oi).find(".item-title").children('a')[0].href,
price: parseItemPrice($(oi).find(".item-price").text().trim()),
count: parseItemCount($(oi).find(".item-price").text().trim()),
orderId: orderId,
};
items.push(item);
});
GM_getTab(function(tabObject){
tabObject.orderDetails = items;
tabObject.orderId = orderId;
GM_saveTab(tabObject);
});
};
//General setup - I'm either and OrderDetails tab, or an OrdersIndex tab
//For OrderDetails, grab the order details and save to a tab data
//For OrderIndex, setup the in-memory SQL tables for storage
var myTabId;
if(location.href.indexOf("detail.html") > -1){
GM_getTab(function(tabObject){
myTabId = tabObject.tabId;
tabObject.Type = "OrderDetails";
GM_saveTab(tabObject);
});
worker();
}
else
{
var aliDb = new alasql.Database('aliDb');
aliDb.exec('CREATE TABLE orderItems (id String, idx Number, title String, skuAttr String, productURL String, price Number, quantity Number)');
var insertOrderItem = alasql.compile('INSERT INTO orderItems (?,?,?,?,?,?,?)', 'aliDb');
aliDb.exec('CREATE TABLE orders (id String, date Date, status String, amount Number)');
var insertOrder = alasql.compile('INSERT INTO orders (?,?,?,?)', 'aliDb');
insertOrder( [ "123", parseDate('Feb 4, 2023'), "Complete", 123.45 ] );
insertOrderItem( [ "123", 1, "Some product desc", "sku", "productURL", 123.45, 5 ] );
GM_getTab(function(tabObject){
myTabId = tabObject.tabId;
tabObject.Type = "OrdersIndex";
GM_saveTab(tabObject);
});
}
//Idea here is that this async function is not tied to main thread, but we are looping
//through the tabs in sequence, giving each tab enough time to do its work
//Through the GM_saveTab/GM_getTabs we can poll the order details page for its data
async function asyncFetchOrderDetails(orderIds){
for (var i = 0; i < orderIds.length; i++) {
$("#csvBtn").text("Fetching... " + (i+1) + " of " + orderIds.length);
let orderId = orderIds[i];
let tab = GM_openInTab("https://www.aliexpress.com/p/order/detail.html?orderId=" + orderId);
let orderDetails = null;
let found = false;
do {
GM_getTabs((tabs) => {
for (const [tabId, tab] of Object.entries(tabs)) {
console.log(`tab ${tabId}`, tab);
if('orderDetails' in tab && tab.orderId === orderId){
orderDetails = tab.orderDetails;
found = true;
}
}
});
await sleep(1000);
} while(!found);
tab.close();
orderDetails.forEach((i) => {
insertOrderItem( [ i.orderId, i.idx, i.title, i.skuAttr, i.url, i.price, i.count ] );
});
};
$("#csvBtn").text("Fetched! " + i);
}
//Place any debug here
//e.g. print out any active tabs
$('<button/>', {
text: "Debug Stuff",
id: 'debugBtn',
style: "background-color:Tomato;",
class: 'page-menu-item page-menu-title',
click: function () {
$("#debugBtn").text("Debugging!");
GM_getTabs((tabs) => {
for (const [tabId, tab] of Object.entries(tabs)) {
console.log(`tab ${tabId}`, tab);
}
});
}
}).prependTo("#page-menu");
//Save the AlaSql tables to CSV file
$('<button/>', {
text: "Save CSV",
id: 'headerBtn',
style: "background-color:DodgerBlue;",
class: 'page-menu-item page-menu-title',
click: function () {
var res = aliDb.exec("SELECT * FROM orderItems");
console.log(res);
aliDb.exec('SELECT * INTO CSV("orders.csv", {headers:true}) FROM orders');
aliDb.exec('SELECT * INTO CSV("orderItems.csv", {headers:true}) FROM orderItems');
$("#headerBtn").text("Saved!");
}
}).prependTo("#page-menu");
//Main fetch functionality
//Evaluate all the orders on the current page and then open sub tabs to grab the details
//This function should return quickly, leaving the async worker to do the hard work
$('<button/>', {
text: "Fetch Order Details",
id: 'csvBtn',
style: "background-color:Violet;",
class: 'page-menu-item page-menu-title',
click: function () {
$("#csvBtn").text("Fetching...");
var res = aliDb.exec("SELECT * FROM orders");
console.log(res);
let orderIds = [];
$(".order-item").each((ind, eo)=>{
var hasTracking = $(eo).find(".button-logisticsTracking ").length > 0;
let order = {
id: parseOrderId($(eo).find(".order-item-header-right-info").text().trim()),
date: parseDate($(eo).find(".order-item-header .order-item-header-right .order-item-header-right-info").text().trim()),
amount: parseOrderAmount($(eo).find(".order-item-content-opt-price-total").text().trim()),
status: $(eo).find(".order-item-header .order-item-header-status .order-item-header-status-text").text().trim(),
store_name: $(eo).find(".store-info .first-row .info-body").text().trim(),
store_url: $(eo).find(".store-info .second-row a:first()").attr('href'),
order_tr_num: $(eo).find(".order-action .tracking_number").text().trim(),
order_tr_stat: $(eo).find(".order-body .order-action .tracking_status").text().trim(),
product_action: $(eo).find(".product-action span:first()").text().trim(),
hasTracking: hasTracking,
};
orderIds.push(order.id);
insertOrder( [ order.id, order.date, order.status, order.amount ] );
});
$("#csvBtn").text("Fetching... 0 of " + orderIds.length);
asyncFetchOrderDetails(orderIds);
}
}).prependTo("#page-menu");
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment