-
-
Save roder/743047 to your computer and use it in GitHub Desktop.
| #!/usr/bin/env python | |
| from optparse import OptionParser | |
| from xml.dom.minidom import parse | |
| import os | |
| import sqlite3 | |
| datatypeMap = { | |
| 'integer': 'INT', | |
| 'datetime': 'DATETIME', | |
| 'boolean': 'BOOLEAN' | |
| } | |
| defaultDataType = 'TEXT' | |
| def get_xml_doms(directory): | |
| result = [] | |
| for filename in directory: | |
| if filename.endswith('.xml'): | |
| dom = parse(filename) | |
| result.append(dom) | |
| return result | |
| def yield_db_schema(dbDef): | |
| result = '' | |
| for (table, tableDef) in dbDef.items(): | |
| result += create_table(table, tableDef) | |
| return result | |
| def exec_create_schema(dbDef, conn, db): | |
| for (table, tableDef) in dbDef.items(): | |
| create = create_table(table, tableDef) | |
| db.execute(create) | |
| def yield_inserts(recordSet): | |
| inserts = '' | |
| for (table, rows) in recordSet.items(): | |
| for row in rows: | |
| fields = "\'" + '\', \''.join(row.keys()) + "\'" | |
| data = "\'" + '\', \''.join(row.values()) + "\'" | |
| if fields != "''": | |
| inserts += "INSERT INTO \'%s\' (%s) VALUES (%s);\n" % (table, fields, data) | |
| return inserts | |
| def exec_insert(recordSet, conn, db): | |
| for (table, rows) in recordSet.items(): | |
| for row in rows: | |
| fields = "\'" + '\', \''.join(row.keys()) + "\'" | |
| data = "\'" + '\', \''.join(row.values()) + "\'" | |
| if len(row.keys()) >0: | |
| marklist = ["?"] * len(row.keys()) | |
| marks = ', '.join(marklist) | |
| insert = "INSERT INTO \'%s\' (%s) VALUES (%s)" % (table, fields, marks) | |
| values = tuple(row.values()) | |
| db.execute(insert, values) | |
| conn.commit() | |
| def create_table(table, tableDef): | |
| fields = [] | |
| begin = 'CREATE TABLE \'%s\' ( \n' % table | |
| for field, fieldDef in tableDef.items(): | |
| fields.append(create_field(field, fieldDef)) | |
| end = '\n);\n\n' | |
| result = begin + ',\n'.join(fields) + end | |
| return result | |
| def create_field(field, fieldDef): | |
| if fieldDef.has_key(u'type'): | |
| datatype = fieldDef.get(u'type') | |
| else: | |
| datatype = defaultDataType | |
| return " '%s' %s" % (field, datatype) | |
| def collect_structure(doms): | |
| db = {} | |
| records = {} | |
| for dom in doms: | |
| db = gen_db_struct(dom.childNodes, db) | |
| return db | |
| def collect_data(dbDef, doms): | |
| recordset = {} | |
| for dom in doms: | |
| for (table, fieldDef) in dbDef.items(): | |
| if not recordset.has_key(table): | |
| recordset[table] = [] | |
| for row in dom.getElementsByTagName(table): | |
| record = {} | |
| for (column, _) in fieldDef.items(): | |
| for node in row.getElementsByTagName(column): | |
| if node.hasChildNodes(): | |
| for item in node.childNodes: | |
| if hasattr(item, 'data'): | |
| if len(item.data.strip()) > 0: | |
| record[column] = item.data | |
| recordset[table].append(record) | |
| return recordset | |
| def gen_db_struct(nodeList, db = {}): | |
| for node in nodeList: | |
| if not node.hasChildNodes() and node.parentNode.parentNode.nodeName != '#document': | |
| # a new field of data | |
| field = node.parentNode | |
| fieldName = field.nodeName | |
| table = field.parentNode | |
| tableName = table.nodeName | |
| if not db.has_key(tableName): | |
| db[tableName] = {} | |
| db[tableName][fieldName] = {} | |
| if field.hasAttributes(): | |
| for (Key, Value) in field.attributes.items(): | |
| if Key != u'type' and Value != u'array': | |
| db[tableName][fieldName][Key] = datatypeMap[Value] | |
| else: | |
| gen_db_struct(node.childNodes, db) | |
| return db | |
| def run(inputDir, outputFile): | |
| files = [] | |
| for filename in os.listdir(inputDir): | |
| files.append(os.path.join(inputDir, filename)) | |
| domList = get_xml_doms(files) | |
| dbDef = collect_structure(domList) | |
| records = collect_data(dbDef, domList) | |
| conn = sqlite3.connect(outputFile) | |
| db = conn.cursor() | |
| exec_create_schema(dbDef, conn, db) | |
| exec_insert(records, conn, db) | |
| db.close() | |
| def main(): | |
| usage = "usage: %prog [options] /path/to/dir/with/xml" | |
| parser = OptionParser(usage) | |
| parser.add_option("-f", "--file", dest="outputFile", default = 'xmlsqlite.db3', | |
| help="Specify the filename for the sqlite database. It will be created if it does not exist [Default: xmlsqlite.db3]") | |
| (options, args) = parser.parse_args() | |
| if len(args) != 1: | |
| parser.error("incorrect number of arguments") | |
| inputDir = os.path.abspath(os.path.expanduser(args[0])) | |
| run(inputDir, options.outputFile) | |
| if __name__ == "__main__": main() |
With my data I get the error:
"KeyError: '1'
If I replace line 128 with
db[tableName][fieldName][Key] = Value
the script runs, but does not create a foreign key in the sr_fa table.
Here is an example of my data:
<sr_id>5430</sr_id>
<sr_vorname>Bernd</sr_vorname>
<sr_faecher>
<sr_fa ind="1">
<srf_kurz>GE</srf_kurz>
<srf_kurs hj="1">
<srfk_kurz>GE1</srfk_kurz>
<srfk_punkte>12</srfk_punkte>
</srf_kurs>
<srf_kurs hj="2">
<srfk_kurz>E1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="3">
<srfk_kurz>E1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="4">
<srfk_kurz>E1</srfk_kurz>
</srf_kurs>
</sr_fa>
<sr_fa ind="2">
<srf_kurz>DE</srf_kurz>
<srf_kurs hj="1">
<srfk_kurz>DE1</srfk_kurz>
<srfk_punkte>12</srfk_punkte>
</srf_kurs>
<srf_kurs hj="2">
<srfk_kurz>DE1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="3">
<srfk_kurz>DE1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="4">
<srfk_kurz>DE1</srfk_kurz>
</srf_kurs>
</sr_fa>
<sr_fa ind="3">
<srf_kurz>PW</srf_kurz>
<srf_kurs hj="1">
<srfk_kurz>PW1</srfk_kurz>
<srfk_punkte>3</srfk_punkte>
</srf_kurs>
<srf_kurs hj="2">
<srfk_kurz>PW1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="3">
<srfk_kurz>PW1</srfk_kurz>
</srf_kurs>
<srf_kurs hj="4">
<srfk_kurz>PW1</srfk_kurz>
</srf_kurs>
</sr_fa>
</sr_faecher>
Hope, someone can help me.
has_key was removed