Skip to content

Instantly share code, notes, and snippets.

@mrjones-plip
Last active October 22, 2025 19:01
Show Gist options
  • Select an option

  • Save mrjones-plip/0983601a4e3b068d2cc2d7bffaf6f94f to your computer and use it in GitHub Desktop.

Select an option

Save mrjones-plip/0983601a4e3b068d2cc2d7bffaf6f94f to your computer and use it in GitHub Desktop.
Output a markdown table of users who have more than 15k documents

CHT User Doc Count JSON to Markdwn

A python script written to show how to process the output of the /api/v1/users-doc-count API for the CHT.

Use

  1. Save the JSON from the users-doc-count API into a file called users-doc-count.json
  2. In the same directory as users-doc-count.json, create a copy of the proccess.cht.user.doc.counts.py file in this gist
  3. Run the script: python3 proccess.cht.user.doc.counts.py
  4. Your markdown table will be saved in a file called top.users.md

Example

Resulting table in top.users.md will look like this:

4,434 total, 1,234 active, 14 affected, max 254,340

count doc count user name
1 254,340 Melisande
2 135,700 Kairi
3 90,368 Macy
4 56,559 Roux
5 36,642 Charity
6 33,220 Viviana
7 31,864 Milan
8 28,366 Calla
9 25,604 Annora
10 18,061 Ambrosia
11 17,653 Finlee
12 16,142 Leopoldine
13 16,120 Daya
14 15,770 Blaize
import subprocess
import csv
def generate_sorted_json(file):
command = ('jq "[.users[] | {user: .user, count: .count}] | sort_by(.count)" '
+ file + '.json > '
+ file + '.sorted.json'
)
subprocess.run(command, shell=True, executable="/bin/bash", stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
def get_user_counts(file, type):
command = f"jq '.connected_users.count, .couchdb.users.doc_count' {file}.json"
total = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[1].rstrip())
active = int(subprocess.check_output(command, shell=True, executable="/bin/bash").split()[0].rstrip())
tmp = {
'total': f'{total:,}',
'active': f'{active:,}'
}
return tmp[type]
def generate_csv(file):
sorted_json_file = file + '.sorted.json'
sorted_csv_file = file + '.sorted.csv'
command = ('jq -r \'(map(keys) | add | unique) as $cols | map(. as $row | $cols | map($row[.])) as $rows |' +
' $cols, $rows[] | @csv\' ' + sorted_json_file + ' > ' + sorted_csv_file)
subprocess.run(command, shell=True, executable="/bin/bash")
def output_site_markdown(file, active, total, f, max_docs):
sorted_csv_file = file + '.sorted.csv'
markdown = ''
users_over_limit = 0
largest_docs = 0
with open(sorted_csv_file, mode ='r')as file:
for users_over_limit, user in enumerate(reversed(list(csv.reader(file)))):
if user[0] == 'count':
continue
doc_count = int(float(user[0]))
if doc_count > max_docs:
formated_doc_count = f'{doc_count:,}'
markdown = markdown + f"| {(users_over_limit+1)} | {formated_doc_count} | {user[1]} |\n"
if users_over_limit == 0:
largest_docs = formated_doc_count
else:
break
f.write(
f"{total} total, " +
f"{active} active, " +
f"{users_over_limit} affected, " +
f"max {largest_docs}\n\n"
)
f.write(f"| count | doc count | user name |\n")
f.write(f"|--|--|--|\n")
f.write(f"{markdown}")
if users_over_limit == 0:
f.write(f"| | No users have {max_docs:,} docs | |\n")
def clean_up_temp_file():
command = 'rm *sorted.json;rm *sorted.csv'
print(f"Cleaning up temp files...")
subprocess.run(command, shell=True, executable="/bin/bash")
max_docs = 15000
json_filename_base = 'users-doc-count'
with open("top.users.md", "w") as f:
generate_sorted_json(json_filename_base)
total_users = get_user_counts(json_filename_base, 'total')
active_users = get_user_counts(json_filename_base, 'active')
generate_csv(json_filename_base)
output_site_markdown(json_filename_base, active_users, total_users, f, max_docs)
clean_up_temp_file()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment