Last active
July 28, 2020 13:36
-
-
Save markkernke/a05a0a226952bf595cc8302f6e79ccd8 to your computer and use it in GitHub Desktop.
A python script for processing VizQL logs into a format usable by lumberjack
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv,argparse,json,zipfile,os | |
| from io import BytesIO | |
| # Parse input | |
| parser = argparse.ArgumentParser(description='Convert Tableau Logs for Lumberjack') | |
| parser.add_argument('-i', '--input-file', help='Location of logs.zip', default=None) | |
| parser.add_argument('-o', '--output-file', help='Location to write csv output', default=None) | |
| parser.add_argument('-a', '--append', help='Append to the csv output file', action='store_true') | |
| args = parser.parse_args() | |
| logs_zip = args.input_file | |
| output_file = args.output_file | |
| append_file = args.append | |
| # Error handling | |
| if logs_zip is None or output_file is None: | |
| sys.exit('Missing Parameter: Please use --help for details') | |
| id_no = 0 | |
| line_no = 0 | |
| def cache_hit(job): | |
| if job.get('fusion-parent',None) is not None: | |
| return 'fusion' | |
| elif job.get('owner-component',None) is not None: | |
| return 'unknown' | |
| elif job.get('exception',None) is not None: | |
| return 'exception' | |
| else: | |
| return 'cache_hit' | |
| def process_log(z,log_file,writer): | |
| global id_no,line_no | |
| for line in z.open(log_file): | |
| id_no = id_no + 1 | |
| if len(line) > 0 and line.startswith(b'{'): | |
| line_no = line_no + 1 | |
| line = json.loads(str(line, 'utf-8').replace("|","!")) | |
| if line['k'] == 'qp-batch-summary': | |
| for job in line['v']['jobs']: | |
| line_out = { | |
| 'id': id_no, | |
| 'line': line_no, | |
| 'ts': line['ts'], | |
| 'date': line['ts'].split('T')[0], | |
| 'time': line['ts'].split('T')[1], | |
| 'pid': line['pid'], | |
| 'tid': line['tid'], | |
| 'sev': line['sev'], | |
| 'req': line['req'], | |
| 'sess': line['sess'], | |
| 'site': line['site'], | |
| 'user': line['user'].split('\\')[1] if '\\' in line['user'] else line['user'], | |
| 'k': line['k'], | |
| 'elapsed': line['v']['elapsed'], | |
| 'elapsed_sum': line['v']['elapsed-sum'], | |
| 'job_count': line['v']['job-count'], | |
| 'query_id': job.get('query-id',''), | |
| 'cache_hit': cache_hit(job), | |
| 'elapsed_query': job.get('elapsed',0) if cache_hit(job) == 'cache_hit' else 0, | |
| 'sheet': job.get('owner-worksheet',''), | |
| 'View (Dashboard)': job.get('owner-dashboard',''), | |
| 'query_processor_type': job.get('owner-component',''), | |
| 'query_text': job.get('query-compiled',job.get('exception','No Query')).replace("\n",r"\n").replace("\r",r"\r"), | |
| 'query_batch_text': job.get('query-abstract','').replace("\n",r"\n").replace("\r",r"\r") | |
| } | |
| writer.writerow(line_out) | |
| fieldnames = ["id","line","ts","date","time","pid","tid","sev","req","sess","site","user","k","elapsed","elapsed_sum","job_count","query_id","cache_hit","elapsed_query","sheet","View (Dashboard)","query_processor_type","query_text","query_batch_text"] | |
| file_action = 'a' if append_file else 'w' | |
| with open(os.path.abspath(os.path.expanduser(output_file)),file_action, encoding='utf-8') as csvfile: | |
| writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
| if not append_file: | |
| writer.writeheader() | |
| with zipfile.ZipFile(os.path.abspath(os.path.expanduser(logs_zip)), "r") as zfile: | |
| # VizQL logs on the Primary | |
| vizql_logs = [z for z in zfile.namelist() if ('vizqlserver/Logs/vizqlserver_' in z and z.endswith('.txt')) or ('/logs/nativeapi_vizqlserver_' in z and z.endswith('.txt'))] | |
| print("Primary Node" if len(vizql_logs) > 0 and 'vizqlserver/Logs/vizqlserver_' in vizql_logs[0] else "Processing Logs 🐧") | |
| for vl in vizql_logs: | |
| print("\tPrimary VizQL Log: {vl}".format(vl=vl)) | |
| process_log(zfile,vl,writer) | |
| # VizQL logs in each worker are zipped in the worker's logs | |
| worker_zips = [z for z in zfile.namelist() if z.endswith('.zip')] | |
| for wz in worker_zips: | |
| print("Worker: {wz}".format(wz=wz)) | |
| zip_stream = BytesIO(zfile.read(wz)) | |
| with zipfile.ZipFile(zip_stream) as zf: | |
| vizql_logs = [z for z in zf.namelist() if 'vizqlserver/Logs/vizqlserver_' in z and z.endswith('.txt')] | |
| for vl in vizql_logs: | |
| print("\tWorker '{wz}' VizQL Log: {vl}".format(wz=wz,vl=vl)) | |
| process_log(zf,vl,writer) | |
| print('Log File Data Ready for Tableau!\n{line_no} Query Lines Processed.'.format(line_no=line_no)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment