Skip to content

Instantly share code, notes, and snippets.

@seb26
Last active December 4, 2025 19:49
Show Gist options
  • Select an option

  • Save seb26/68be90b27be9e1298bdb0b0f250a4574 to your computer and use it in GitHub Desktop.

Select an option

Save seb26/68be90b27be9e1298bdb0b0f250a4574 to your computer and use it in GitHub Desktop.
mebank_process.py

mebank_process.py

This python script will take an input of HTML or webarchive of the login page of https://internetbanking.mebank.com.au, parse the semi structured HTML content and return the transactions as structured data: JSON, CSV.

As of 05/12/2025, Australian bank MEBank (division of Bank of Queensland) offers an Internet banking platform with no function for transaction export, such as to CSV or OFX.

So this will do the job in the interim.

Design

Works with file input and stdout output. No network connectivity, aka offline.

Input

Saved Webarchive or HTML file containing transaction fields from internetbanking.mebank.com.au.

Function

  • Parse for known fields, interpet to a dict
  • Parse human readable date strings to Australia/Sydney timezone and ISO format

Output

Export to CSV or JSON

Dependencies

Requirements:

  • python3
  • Beautifulsoup4 (pip install bs4)
  • Use of terminal/CLI

Usage

  1. Using a web browser that can save to .webarchive or HTML file, login to https://internetbanking.mebank.com.au
  2. Navigate to the account of your choice
  3. Scroll down and 'load more' until you reach the end of the transactions
  4. (Optional) Click on each transaction's dropdown arrow manually one-by-one, to gather detailed info from the dynamic accordion (time & date, receipt number, long description, to/from BSB account number)
  5. Save the page as .webarchive file or HTML to desired folder on disk
    • Note you should not save 'page source', it is only the static HTML page prior to JavaScript loading the elements. You must save the active page HTML content
  6. Install python prerequisite
pip install bs4
  1. Run script
# JSON by default
python mebank_process.py `~/Downloads/ME Go - Internet Banking | Account.webarchive`

# CSV output piped to file
python mebank_process.py `~/Downloads/ME Go - Internet Banking | Account.webarchive` -o csv > ~/Downloads/transactions.csv

Help

python mebank_process.py --help
usage: mebank_process.py [-h] [-o {json,csv}] filename

positional arguments:
  filename              HTML or webarchive file

options:
  -h, --help            show this help message and exit
  -o {json,csv}, --output {json,csv}

Example output

# JSON
{
    {
        "id": "333e3a86-57f4-4c24-8a29-096722a0342e",
        "type": "Local Mechanical Repa",
        "amount": "-$46.19",
        "from_summary": "",
        "from_detail": "",
        "to_summary": "",
        "to_detail": "",
        "date": "2024-03-19T14:22:00+11:00",
        "description": "LOCAL MECHANICAL REPA",
        "receipt": "FX2AB6DFXJSLH"
    },
    {
        "id": "abce650a-18a5-472f-9e92-659f2675de7b",
        "type": "Transfer",
        "amount": "+$1,900.00",
        "from_summary": "John Appleseed",
        "from_detail": "John Appleseed",
        "to_summary": "",
        "to_detail": "062000 123456789",
        "date": "2024-03-27T21:45:00+11:00",
        "description": "Savings",
        "receipt": "MEBX8FD98D6AHFH"
    },
}
# CSV
id,type,amount,date,from_summary,from_detail,to_summary,to_detail,description,receipt
333e3a86-57f4-4c24-8a29-096722a0342e,Local Mechanical Repa,-$46.19,2024-03-19T14:22:00+11:00,,,,,LOCAL MECHANICAL REPA,FX2AB6DFXJSLH
abce650a-18a5-472f-9e92-659f2675de7b,Transfer,"+$1,900.00",2024-03-27T21:45:00+11:00,John Appleseed,John Appleseed,,062000 123456789,Savings,MEBX8FD98D6AHFH

Limitations

  • Must manually save the web page
  • Must manually load each transaction's dynamic accordion in order to obtain complete transaction details
  • No export to OFX (though I think the data source provides sufficient details to make a useful OFX)
#!/usr/bin/env python3
from bs4 import BeautifulSoup
from datetime import datetime
from zoneinfo import ZoneInfo
import argparse
import csv
import json
import plistlib
import sys
def parse_date_field(value):
"""Parse date string like '01 Dec 2025 at 2:06AM' in Sydney timezone"""
if not value:
return ""
try:
dt = datetime.strptime(value, "%d %b %Y at %I:%M%p")
dt = dt.replace(tzinfo=ZoneInfo("Australia/Sydney"))
return dt.isoformat()
except Exception:
return value
def parse_html(html):
"""Parse HTML content for known banking transaction fields"""
soup = BeautifulSoup(html, "html.parser")
transactions = []
for item in soup.select('[data-testid="transaction-history-item"]'):
# transaction ID
parent_button = item.find_parent('button', {'aria-controls': True})
tx_id = parent_button.get('data-testid').replace('accordion-trigger-', '') if parent_button else ''
# transaction type
type_tag = item.select_one('span[data-testid$="-transaction-history-item-name"]')
tx_type = type_tag.get_text(strip=True) if type_tag else ''
# summary "From" and "To" from main row
from_summary_tag = item.select_one('div.flex.flex-col span.text-label-sm')
from_summary = ''
to_summary = ''
if from_summary_tag:
text = from_summary_tag.get_text(strip=True)
if text.lower().startswith('from:'):
from_summary = text[5:].strip()
elif text.lower().startswith('to:'):
to_summary = text[3:].strip()
# amount
amount_tag = item.select_one('div.flex.flex-col.items-end span.text-label-md')
amount = amount_tag.get_text(strip=True) if amount_tag else ''
# details panel
details_panel = item.find_next('div', {'role': 'region'})
from_detail = ''
to_detail = ''
details = {}
for row in details_panel.select('div.flex.justify-between'):
spans = [s.get_text(strip=True) for s in row.select('span') if s.get_text(strip=True)]
if len(spans) >= 2:
label = spans[0].lower()
value = spans[-1]
if label == "when":
details['date'] = parse_date_field(value)
elif label == "description":
details['description'] = value
elif label == "receipt":
details['receipt'] = value
elif label == "from":
from_detail = value
elif label == "to":
to_detail = value
transactions.append({
'id': tx_id,
'type': tx_type,
'amount': amount,
'from_summary': from_summary,
'from_detail': from_detail,
'to_summary': to_summary,
'to_detail': to_detail,
**details
})
return transactions
def open_webarchive(filename):
"""Open webarchive and parse"""
with open(filename, "rb") as f:
plist = plistlib.load(f)
html_data = plist['WebMainResource']['WebResourceData']
html_text = html_data.decode('utf-8', errors='ignore')
return parse_html(html_text)
def open_html(filename):
"""Open html file and parse"""
with open(filename, 'r', encoding='utf-8') as f:
return parse_html(f.read())
def output_csv(transactions, out_file):
fieldnames = ['id','type','amount','date','from_summary','from_detail','to_summary','to_detail','description','receipt']
writer = csv.DictWriter(out_file, fieldnames=fieldnames)
writer.writeheader()
for tx in transactions:
writer.writerow({k: tx.get(k, '') for k in fieldnames})
def main():
parser = argparse.ArgumentParser()
parser.add_argument('filename', help='HTML or webarchive file')
parser.add_argument('-o', '--output', choices=['json','csv'], default='json')
args = parser.parse_args()
if args.filename.endswith('.webarchive'):
transactions = open_webarchive(args.filename)
else:
transactions = open_html(args.filename)
if args.output == 'json':
print(json.dumps(transactions, indent=4))
else:
output_csv(transactions, out_file=sys.stdout)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment