mrjones-plip/proccess.cht.user.doc.counts.py

## readme.md

      
    Raw
  

              readme.md
            
          
    CHT User Doc Count JSON to Markdwn

A python script written to show how to process the output of the /api/v1/users-doc-count API for the CHT.
Use


Save the JSON from the users-doc-count API  into a file called users-doc-count.json
In the same directory as users-doc-count.json, create a copy of the proccess.cht.user.doc.counts.py file in this gist
Run the script: python3 proccess.cht.user.doc.counts.py
Your markdown table will be saved in a file called top.users.md

Example

Resulting table in top.users.md will look like this:
4,434 total, 1,234 active, 14 affected, max 254,340


count
doc count
user name


1
254,340
Melisande


2
135,700
Kairi


3
90,368
Macy


4
56,559
Roux


5
36,642
Charity


6
33,220
Viviana


7
31,864
Milan


8
28,366
Calla


9
25,604
Annora


10
18,061
Ambrosia


11
17,653
Finlee


12
16,142
Leopoldine


13
16,120
Daya


14
15,770
Blaize


## proccess.cht.user.doc.counts.py
import subprocess
import csv


def generate_sorted_json(file):
    command = ('jq "[.users[] | {user: .user, count: .count}] | sort_by(.count)" '
               + file + '.json  > '
               + file + '.sorted.json'
               )
    subprocess.run(command, shell=True, executable="/bin/bash", stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)


def get_user_counts(file, type):
    command = f"jq '.connected_users.count, .couchdb.users.doc_count' {file}.json"
    total = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[1].rstrip())
    active = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[0].rstrip())
    tmp = {
        'total':  f'{total:,}',
        'active': f'{active:,}'
    }
    return tmp[type]


def generate_csv(file):
    sorted_json_file = file + '.sorted.json'
    sorted_csv_file = file + '.sorted.csv'
    command = ('jq -r  \'(map(keys) | add | unique) as $cols | map(. as $row | $cols | map($row[.])) as $rows |' +
               ' $cols, $rows[] | @csv\' ' + sorted_json_file + ' > ' + sorted_csv_file)
    subprocess.run(command, shell=True, executable="/bin/bash")


def output_site_markdown(file, active, total, f, max_docs):
    sorted_csv_file = file + '.sorted.csv'
    markdown = ''
    users_over_limit = 0
    largest_docs = 0
    with open(sorted_csv_file, mode ='r')as file:
        for users_over_limit, user in enumerate(reversed(list(csv.reader(file)))):
            if user[0] == 'count':
                continue
            doc_count = int(float(user[0]))
            if doc_count > max_docs:
                formated_doc_count = f'{doc_count:,}'
                markdown = markdown + f"| {(users_over_limit+1)} | {formated_doc_count} | {user[1]} |\n"
                if users_over_limit == 0:
                    largest_docs = formated_doc_count
            else:
                break
    f.write(
        f"{total} total, " +
        f"{active} active, " +
        f"{users_over_limit} affected, " +
        f"max {largest_docs}\n\n"
    )
    f.write(f"| count | doc count | user name |\n")
    f.write(f"|--|--|--|\n")
    f.write(f"{markdown}")
    if users_over_limit == 0:
        f.write(f"| | No users have {max_docs:,} docs | |\n")


def clean_up_temp_file():
    command = 'rm *sorted.json;rm *sorted.csv'
    print(f"Cleaning up temp files...")
    subprocess.run(command, shell=True, executable="/bin/bash")


max_docs = 15000
json_filename_base = 'users-doc-count'
with open("top.users.md", "w") as f:
    generate_sorted_json(json_filename_base)
    total_users = get_user_counts(json_filename_base, 'total')
    active_users = get_user_counts(json_filename_base, 'active')
    generate_csv(json_filename_base)
    output_site_markdown(json_filename_base, active_users, total_users, f, max_docs)

clean_up_temp_file()
count	doc count	user name
1	254,340	Melisande
2	135,700	Kairi
3	90,368	Macy
4	56,559	Roux
5	36,642	Charity
6	33,220	Viviana
7	31,864	Milan
8	28,366	Calla
9	25,604	Annora
10	18,061	Ambrosia
11	17,653	Finlee
12	16,142	Leopoldine
13	16,120	Daya
14	15,770	Blaize
	import subprocess
	import csv


	def generate_sorted_json(file):
	command = ('jq "[.users[] \| {user: .user, count: .count}] \| sort_by(.count)" '
	+ file + '.json > '
	+ file + '.sorted.json'
	)
	subprocess.run(command, shell=True, executable="/bin/bash", stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)


	def get_user_counts(file, type):
	command = f"jq '.connected_users.count, .couchdb.users.doc_count' {file}.json"
	total = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[1].rstrip())
	active = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[0].rstrip())
	tmp = {
	'total': f'{total:,}',
	'active': f'{active:,}'
	}
	return tmp[type]


	def generate_csv(file):
	sorted_json_file = file + '.sorted.json'
	sorted_csv_file = file + '.sorted.csv'
	command = ('jq -r \'(map(keys) \| add \| unique) as $cols \| map(. as $row \| $cols \| map($row[.])) as $rows \|' +
	' $cols, $rows[] \| @csv\' ' + sorted_json_file + ' > ' + sorted_csv_file)
	subprocess.run(command, shell=True, executable="/bin/bash")


	def output_site_markdown(file, active, total, f, max_docs):
	sorted_csv_file = file + '.sorted.csv'
	markdown = ''
	users_over_limit = 0
	largest_docs = 0
	with open(sorted_csv_file, mode ='r')as file:
	for users_over_limit, user in enumerate(reversed(list(csv.reader(file)))):
	if user[0] == 'count':
	continue
	doc_count = int(float(user[0]))
	if doc_count > max_docs:
	formated_doc_count = f'{doc_count:,}'
	markdown = markdown + f"\| {(users_over_limit+1)} \| {formated_doc_count} \| {user[1]} \|\n"
	if users_over_limit == 0:
	largest_docs = formated_doc_count
	else:
	break
	f.write(
	f"{total} total, " +
	f"{active} active, " +
	f"{users_over_limit} affected, " +
	f"max {largest_docs}\n\n"
	)
	f.write(f"\| count \| doc count \| user name \|\n")
	f.write(f"\|--\|--\|--\|\n")
	f.write(f"{markdown}")
	if users_over_limit == 0:
	f.write(f"\| \| No users have {max_docs:,} docs \| \|\n")


	def clean_up_temp_file():
	command = 'rm sorted.json;rm sorted.csv'
	print(f"Cleaning up temp files...")
	subprocess.run(command, shell=True, executable="/bin/bash")


	max_docs = 15000
	json_filename_base = 'users-doc-count'
	with open("top.users.md", "w") as f:
	generate_sorted_json(json_filename_base)
	total_users = get_user_counts(json_filename_base, 'total')
	active_users = get_user_counts(json_filename_base, 'active')
	generate_csv(json_filename_base)
	output_site_markdown(json_filename_base, active_users, total_users, f, max_docs)

	clean_up_temp_file()