Created
November 7, 2017 03:43
-
-
Save marquisthunder/4f69af360bb29785a2ce84af1de3e40d to your computer and use it in GitHub Desktop.
[pmml]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def pmml_validation(pmml): | |
| """ | |
| Use openscoring to test pmml on validation data and Save the probability result. | |
| """ | |
| headers = {'Content-type': 'text/xml'} | |
| data = bytes(pmml, encoding='utf-8') | |
| rt = requests.put('{}/{}'.format(validate_url, uuid.uuid4()), headers=headers, data=data) | |
| return rt.status_code == 200 or rt.status_code == 201 | |
| def verify_pmml(pmml, verifydata, retry=3): | |
| with codecs.open("{}.pmml".format(pmml), "r", encoding="utf-8") as inpmml: | |
| bd = xmltodict.parse(inpmml.read(), encoding="utf-8") | |
| dataFields = [OrderedDict([('@field', dataField['@name']), | |
| ('@column', dataField['@name'])]) | |
| for dataField in bd["PMML"]["DataDictionary"]["DataField"]] | |
| modelType = None | |
| for field in bd["PMML"].keys(): | |
| if "Model" in field: | |
| modelType = field | |
| break | |
| # rebase Output under modelType | |
| if "Segmentation" in bd["PMML"][modelType]: | |
| bd["PMML"][modelType]["Output"] = bd["PMML"][modelType]["Segmentation"]["Segment"][-1]["RegressionModel"]["Output"] | |
| del bd["PMML"][modelType]["Segmentation"]["Segment"][-1]["RegressionModel"]["Output"] | |
| predFields = [OrderedDict([('@field', dataField['@name']), | |
| ('@column', "{}_{}".format(dataField['@feature'], dataField['@value']) if '@value' in dataField else dataField['@name'])]) | |
| for dataField in bd["PMML"][modelType]['Output']['OutputField']] | |
| allFields = dataFields + predFields | |
| verificationFields = OrderedDict([('VerificationField', allFields)]) | |
| retry = min(retry, len(verifydata)) | |
| while retry: | |
| retry -= 1 | |
| verifydata = DataFrame(data=verifydata) | |
| if len(verifydata) > 3: | |
| kf = KFold(n_splits=len(verifydata)//2) | |
| item = verifydata.iloc[ [i for i in kf.split(verifydata)][0][1]] | |
| else: | |
| item = verifydata.iloc[retry:retry+1] | |
| test = item.to_dict(orient='row') | |
| rows = [OrderedDict([(k, v) for k, v in inst.items()]) for inst in test] | |
| inlineTable = OrderedDict([('row', rows)]) | |
| bd["PMML"][modelType]["ModelVerification"] = OrderedDict([('@recordCount', str(len(rows))), | |
| ('@fieldCount', str(len(allFields)))]) | |
| bd["PMML"][modelType]["ModelVerification"]['VerificationFields'] = verificationFields | |
| bd["PMML"][modelType]["ModelVerification"]['InlineTable'] = inlineTable | |
| serialized = xmltodict.unparse(bd, encoding="utf-8", pretty=True) | |
| if pmml_validation(pmml): | |
| with codecs.open("{}-verified.pmml".format(pmml), "w", encoding="utf-8") as outpmml: | |
| outpmml.write(serialized) | |
| print("verification done") | |
| break | |
| else: | |
| raise RuntimeError("""The JPMML-SkLearn conversion application has failed. | |
| predict_prob precision mismatch""") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment