Skip to content

Instantly share code, notes, and snippets.

@markkernke
Last active July 28, 2020 13:36
Show Gist options
  • Select an option

  • Save markkernke/a05a0a226952bf595cc8302f6e79ccd8 to your computer and use it in GitHub Desktop.

Select an option

Save markkernke/a05a0a226952bf595cc8302f6e79ccd8 to your computer and use it in GitHub Desktop.
A python script for processing VizQL logs into a format usable by lumberjack
import csv,argparse,json,zipfile,os
from io import BytesIO
# Parse input
parser = argparse.ArgumentParser(description='Convert Tableau Logs for Lumberjack')
parser.add_argument('-i', '--input-file', help='Location of logs.zip', default=None)
parser.add_argument('-o', '--output-file', help='Location to write csv output', default=None)
parser.add_argument('-a', '--append', help='Append to the csv output file', action='store_true')
args = parser.parse_args()
logs_zip = args.input_file
output_file = args.output_file
append_file = args.append
# Error handling
if logs_zip is None or output_file is None:
sys.exit('Missing Parameter: Please use --help for details')
id_no = 0
line_no = 0
def cache_hit(job):
if job.get('fusion-parent',None) is not None:
return 'fusion'
elif job.get('owner-component',None) is not None:
return 'unknown'
elif job.get('exception',None) is not None:
return 'exception'
else:
return 'cache_hit'
def process_log(z,log_file,writer):
global id_no,line_no
for line in z.open(log_file):
id_no = id_no + 1
if len(line) > 0 and line.startswith(b'{'):
line_no = line_no + 1
line = json.loads(str(line, 'utf-8').replace("|","!"))
if line['k'] == 'qp-batch-summary':
for job in line['v']['jobs']:
line_out = {
'id': id_no,
'line': line_no,
'ts': line['ts'],
'date': line['ts'].split('T')[0],
'time': line['ts'].split('T')[1],
'pid': line['pid'],
'tid': line['tid'],
'sev': line['sev'],
'req': line['req'],
'sess': line['sess'],
'site': line['site'],
'user': line['user'].split('\\')[1] if '\\' in line['user'] else line['user'],
'k': line['k'],
'elapsed': line['v']['elapsed'],
'elapsed_sum': line['v']['elapsed-sum'],
'job_count': line['v']['job-count'],
'query_id': job.get('query-id',''),
'cache_hit': cache_hit(job),
'elapsed_query': job.get('elapsed',0) if cache_hit(job) == 'cache_hit' else 0,
'sheet': job.get('owner-worksheet',''),
'View (Dashboard)': job.get('owner-dashboard',''),
'query_processor_type': job.get('owner-component',''),
'query_text': job.get('query-compiled',job.get('exception','No Query')).replace("\n",r"\n").replace("\r",r"\r"),
'query_batch_text': job.get('query-abstract','').replace("\n",r"\n").replace("\r",r"\r")
}
writer.writerow(line_out)
fieldnames = ["id","line","ts","date","time","pid","tid","sev","req","sess","site","user","k","elapsed","elapsed_sum","job_count","query_id","cache_hit","elapsed_query","sheet","View (Dashboard)","query_processor_type","query_text","query_batch_text"]
file_action = 'a' if append_file else 'w'
with open(os.path.abspath(os.path.expanduser(output_file)),file_action, encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
if not append_file:
writer.writeheader()
with zipfile.ZipFile(os.path.abspath(os.path.expanduser(logs_zip)), "r") as zfile:
# VizQL logs on the Primary
vizql_logs = [z for z in zfile.namelist() if ('vizqlserver/Logs/vizqlserver_' in z and z.endswith('.txt')) or ('/logs/nativeapi_vizqlserver_' in z and z.endswith('.txt'))]
print("Primary Node" if len(vizql_logs) > 0 and 'vizqlserver/Logs/vizqlserver_' in vizql_logs[0] else "Processing Logs 🐧")
for vl in vizql_logs:
print("\tPrimary VizQL Log: {vl}".format(vl=vl))
process_log(zfile,vl,writer)
# VizQL logs in each worker are zipped in the worker's logs
worker_zips = [z for z in zfile.namelist() if z.endswith('.zip')]
for wz in worker_zips:
print("Worker: {wz}".format(wz=wz))
zip_stream = BytesIO(zfile.read(wz))
with zipfile.ZipFile(zip_stream) as zf:
vizql_logs = [z for z in zf.namelist() if 'vizqlserver/Logs/vizqlserver_' in z and z.endswith('.txt')]
for vl in vizql_logs:
print("\tWorker '{wz}' VizQL Log: {vl}".format(wz=wz,vl=vl))
process_log(zf,vl,writer)
print('Log File Data Ready for Tableau!\n{line_no} Query Lines Processed.'.format(line_no=line_no))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment