Last active
September 12, 2024 14:36
-
-
Save jools-uk/82a79e69f858ff9755d87edd1378ede1 to your computer and use it in GitHub Desktop.
Tapermonkey AliExpress Order scraper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ==UserScript== | |
| // @name Ali Express Order Downloader | |
| // @namespace https://gist.github.com/ | |
| // @version 0.1 | |
| // @description Retrieve Aliexpress order information and export as CSV via clipboard - 2023 version | |
| // @author jools-uk | |
| // @match https://www.aliexpress.com/p/order/index.html* | |
| // @match https://www.aliexpress.com/p/order/detail.html* | |
| // @grant unsafeWindow | |
| // @grant GM_xmlhttpRequest | |
| // @grant GM_setClipboard | |
| // @grant GM_openInTab | |
| // @grant GM_getTabs | |
| // @grant GM_getTab | |
| // @grant GM_saveTab | |
| // @require http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js | |
| // @require https://code.jquery.com/jquery-latest.js | |
| // @require https://www.17track.net/externalcall.js | |
| // @require https://cdnjs.cloudflare.com/ajax/libs/alasql/0.4.11/alasql.min.js | |
| // ==/UserScript== | |
| /* globals jQuery, $ */ | |
| // How this script works: | |
| // If I'm loaded as the Order index page (eg https://www.aliexpress.com/p/order/index.html), then present the user with 2 buttons | |
| // - one the scrape order IDs from current loaded order list and then do some work to fetch order details | |
| // - one to output to a CSV file | |
| // | |
| // Getting the Order Ids from current page is fairly easy - just look for objects of type 'order-item' | |
| // Getting the Order Details is harder | |
| // - we need to load that page in a separate tab, have that tab scrape its own data, and then signal to the parent script with the data | |
| // - use of GM_getTab and GM_saveTab and loops with sleeps to signal data back | |
| // On completition of each tab, save locally | |
| // Deliberatly this is done in a sequence with sleep rather than spanwing lots of Promises - deliberatly don't want to spam Ali servers | |
| // Loosely based on the script from here: | |
| // https://khaz.me/an-easier-way-to-download-your-aliexpress-order-history/ | |
| function parseDate(txt) | |
| { | |
| // May. 21 2019 | |
| const re = new RegExp('(\\w{3}) (\\d*), (\\d{4})'); | |
| const res = txt.match(re); | |
| console.log(res); | |
| return new Date(res[3], ("JanFebMarAprMayJunJulAugSepOctNovDec".indexOf(res[1]))/3, res[2]); | |
| } | |
| function parseOrderId(txt) | |
| { | |
| const re = new RegExp(' (\\d*)Copy$'); | |
| const res = txt.match(re); | |
| console.log(res); | |
| return res[1]; | |
| } | |
| function parseOrderAmount(txt) | |
| { | |
| const re = new RegExp('(\\d*\.\\d*)$'); | |
| const res = txt.match(re); | |
| // console.log(res); | |
| return Number(res[1]); | |
| } | |
| function parseItemPrice(txt) | |
| { | |
| const re = new RegExp('(\\d*\.\\d*)x\\d*$'); | |
| const res = txt.match(re); | |
| // console.log(res); | |
| return Number(res[1]); | |
| } | |
| function parseItemCount(txt) | |
| { | |
| const re = new RegExp('\\d*\.\\d*x(\\d)*$'); | |
| const res = txt.match(re); | |
| // console.log(res); | |
| return Number(res[1]); | |
| } | |
| const sleep = ms => new Promise(r => setTimeout(r, ms)); | |
| //The main Order Details worker, called when order details page loads (we wait a little while for full load) | |
| var items=[]; | |
| const url = new URL(location.href); | |
| var orderId = url.searchParams.get('orderId'); | |
| async function worker() | |
| { | |
| await sleep(5000); | |
| var inum = 0; | |
| $(".order-detail-item-content-wrap").each((idx,oi)=>{ | |
| let item = { | |
| idx: ++inum, | |
| product_id: $(oi).find(".item-title").text().trim(), | |
| title: $(oi).find(".item-title").text().trim(), | |
| skuAttr: $(oi).find(".item-sku-attr").text().trim(), | |
| url: $(oi).find(".item-title").children('a')[0].href, | |
| price: parseItemPrice($(oi).find(".item-price").text().trim()), | |
| count: parseItemCount($(oi).find(".item-price").text().trim()), | |
| orderId: orderId, | |
| }; | |
| items.push(item); | |
| }); | |
| GM_getTab(function(tabObject){ | |
| tabObject.orderDetails = items; | |
| tabObject.orderId = orderId; | |
| GM_saveTab(tabObject); | |
| }); | |
| }; | |
| //General setup - I'm either and OrderDetails tab, or an OrdersIndex tab | |
| //For OrderDetails, grab the order details and save to a tab data | |
| //For OrderIndex, setup the in-memory SQL tables for storage | |
| var myTabId; | |
| if(location.href.indexOf("detail.html") > -1){ | |
| GM_getTab(function(tabObject){ | |
| myTabId = tabObject.tabId; | |
| tabObject.Type = "OrderDetails"; | |
| GM_saveTab(tabObject); | |
| }); | |
| worker(); | |
| } | |
| else | |
| { | |
| var aliDb = new alasql.Database('aliDb'); | |
| aliDb.exec('CREATE TABLE orderItems (id String, idx Number, title String, skuAttr String, productURL String, price Number, quantity Number)'); | |
| var insertOrderItem = alasql.compile('INSERT INTO orderItems (?,?,?,?,?,?,?)', 'aliDb'); | |
| aliDb.exec('CREATE TABLE orders (id String, date Date, status String, amount Number)'); | |
| var insertOrder = alasql.compile('INSERT INTO orders (?,?,?,?)', 'aliDb'); | |
| insertOrder( [ "123", parseDate('Feb 4, 2023'), "Complete", 123.45 ] ); | |
| insertOrderItem( [ "123", 1, "Some product desc", "sku", "productURL", 123.45, 5 ] ); | |
| GM_getTab(function(tabObject){ | |
| myTabId = tabObject.tabId; | |
| tabObject.Type = "OrdersIndex"; | |
| GM_saveTab(tabObject); | |
| }); | |
| } | |
| //Idea here is that this async function is not tied to main thread, but we are looping | |
| //through the tabs in sequence, giving each tab enough time to do its work | |
| //Through the GM_saveTab/GM_getTabs we can poll the order details page for its data | |
| async function asyncFetchOrderDetails(orderIds){ | |
| for (var i = 0; i < orderIds.length; i++) { | |
| $("#csvBtn").text("Fetching... " + (i+1) + " of " + orderIds.length); | |
| let orderId = orderIds[i]; | |
| let tab = GM_openInTab("https://www.aliexpress.com/p/order/detail.html?orderId=" + orderId); | |
| let orderDetails = null; | |
| let found = false; | |
| do { | |
| GM_getTabs((tabs) => { | |
| for (const [tabId, tab] of Object.entries(tabs)) { | |
| console.log(`tab ${tabId}`, tab); | |
| if('orderDetails' in tab && tab.orderId === orderId){ | |
| orderDetails = tab.orderDetails; | |
| found = true; | |
| } | |
| } | |
| }); | |
| await sleep(1000); | |
| } while(!found); | |
| tab.close(); | |
| orderDetails.forEach((i) => { | |
| insertOrderItem( [ i.orderId, i.idx, i.title, i.skuAttr, i.url, i.price, i.count ] ); | |
| }); | |
| }; | |
| $("#csvBtn").text("Fetched! " + i); | |
| } | |
| //Place any debug here | |
| //e.g. print out any active tabs | |
| $('<button/>', { | |
| text: "Debug Stuff", | |
| id: 'debugBtn', | |
| style: "background-color:Tomato;", | |
| class: 'page-menu-item page-menu-title', | |
| click: function () { | |
| $("#debugBtn").text("Debugging!"); | |
| GM_getTabs((tabs) => { | |
| for (const [tabId, tab] of Object.entries(tabs)) { | |
| console.log(`tab ${tabId}`, tab); | |
| } | |
| }); | |
| } | |
| }).prependTo("#page-menu"); | |
| //Save the AlaSql tables to CSV file | |
| $('<button/>', { | |
| text: "Save CSV", | |
| id: 'headerBtn', | |
| style: "background-color:DodgerBlue;", | |
| class: 'page-menu-item page-menu-title', | |
| click: function () { | |
| var res = aliDb.exec("SELECT * FROM orderItems"); | |
| console.log(res); | |
| aliDb.exec('SELECT * INTO CSV("orders.csv", {headers:true}) FROM orders'); | |
| aliDb.exec('SELECT * INTO CSV("orderItems.csv", {headers:true}) FROM orderItems'); | |
| $("#headerBtn").text("Saved!"); | |
| } | |
| }).prependTo("#page-menu"); | |
| //Main fetch functionality | |
| //Evaluate all the orders on the current page and then open sub tabs to grab the details | |
| //This function should return quickly, leaving the async worker to do the hard work | |
| $('<button/>', { | |
| text: "Fetch Order Details", | |
| id: 'csvBtn', | |
| style: "background-color:Violet;", | |
| class: 'page-menu-item page-menu-title', | |
| click: function () { | |
| $("#csvBtn").text("Fetching..."); | |
| var res = aliDb.exec("SELECT * FROM orders"); | |
| console.log(res); | |
| let orderIds = []; | |
| $(".order-item").each((ind, eo)=>{ | |
| var hasTracking = $(eo).find(".button-logisticsTracking ").length > 0; | |
| let order = { | |
| id: parseOrderId($(eo).find(".order-item-header-right-info").text().trim()), | |
| date: parseDate($(eo).find(".order-item-header .order-item-header-right .order-item-header-right-info").text().trim()), | |
| amount: parseOrderAmount($(eo).find(".order-item-content-opt-price-total").text().trim()), | |
| status: $(eo).find(".order-item-header .order-item-header-status .order-item-header-status-text").text().trim(), | |
| store_name: $(eo).find(".store-info .first-row .info-body").text().trim(), | |
| store_url: $(eo).find(".store-info .second-row a:first()").attr('href'), | |
| order_tr_num: $(eo).find(".order-action .tracking_number").text().trim(), | |
| order_tr_stat: $(eo).find(".order-body .order-action .tracking_status").text().trim(), | |
| product_action: $(eo).find(".product-action span:first()").text().trim(), | |
| hasTracking: hasTracking, | |
| }; | |
| orderIds.push(order.id); | |
| insertOrder( [ order.id, order.date, order.status, order.amount ] ); | |
| }); | |
| $("#csvBtn").text("Fetching... 0 of " + orderIds.length); | |
| asyncFetchOrderDetails(orderIds); | |
| } | |
| }).prependTo("#page-menu"); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment