Skip to content

Instantly share code, notes, and snippets.

@jkibele
Last active February 20, 2020 16:24
Show Gist options
  • Select an option

  • Save jkibele/9de7f6d9b3250c09c688 to your computer and use it in GitHub Desktop.

Select an option

Save jkibele/9de7f6d9b3250c09c688 to your computer and use it in GitHub Desktop.
Python code to generate a list of labels from latex documents.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# List All Labels\n",
"\n",
"This notebook will list all the labels in my thesis. My labels are all like `chp:Chapter_Name`, `fig:figure_name`, `eq:equation`, etc. The patterns `patt`, and/or `app_patt` can be modified to suit different label naming conventions."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"chdir = '/home/jkibele/Documents/code_projects/PhDThesis/'"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# reg exp to find full label specifications\n",
"patt = re.compile(\"\\\\label{(\\w*:\\w*)}\")\n",
"# reg exp to find `addtotoc` labels\n",
"app_patt = re.compile(\"(\\w+:\\w+)\")\n",
"\n",
"lbls = []\n",
"for root, dirs, files in os.walk(chdir):\n",
" for fn in files: \n",
" if fn == 'main.tex':\n",
" # my project has some appendices that are pulled into\n",
" # the project using includepdf and the labels for these\n",
" # are specified using `addtotoc`. This if statement\n",
" # finds those labels in main.tex.\n",
" with open(os.path.join(root, fn)) as f:\n",
" lbls.extend(re.findall(app_patt, f.read()))\n",
" elif fn.endswith(\".tex\") and fn<>\"zTemplateChapter1.tex\":\n",
" # there's a template file in my project that I don't\n",
" # want to search for labels.\n",
" with open(os.path.join(root,fn)) as f:\n",
" txt = f.read()\n",
" labels = re.findall(patt, txt)\n",
" lbls.extend(labels)\n",
"typs = []\n",
"names = []\n",
"for lbl in lbls:\n",
" t, n = lbl.split(':')\n",
" typs.append(t)\n",
" names.append(n)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df = pd.DataFrame({'typ':typs, 'nam':names})"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"app:\n",
" depth_param_est\n",
" param_est\n",
" preprocessing\n",
" water_column\n",
"\n",
"chp:\n",
" BPS\n",
" Depth\n",
" Intro\n",
" OpticalRS\n",
" ParamEst\n",
" Water\n",
"\n",
"sec:\n",
" curve_fit_est\n",
" lin_reg_est\n",
"\n",
"fig:\n",
" bps_fig1\n",
" pd_K_ests\n",
" pe_curve_fit\n",
" pe_dwm_bars\n",
" pe_linear_K_est\n",
" pe_linear_K_est_dr\n",
" pe_location_map\n",
" pe_surf_corr_K\n",
" wcc_depths\n",
" wcc_imgcomp\n",
" wcc_location_map\n",
" wcc_method_comp\n",
" wcc_parallel\n",
"\n",
"tab:\n",
" est_parameters\n",
"\n",
"eq:\n",
" albedo_plus\n",
" geometric_factor\n",
" k_from_slope\n",
" linregress\n",
" lyz_model_sag\n",
" lyz_model_transformed\n",
" lyz_shallow_water\n",
" lyz_transform\n",
" mar_sag_comp\n",
" mar_wcc\n",
" maritorena9a\n",
" maritorena_albedo_subsurf\n",
" maritorena_model\n",
" maritorena_toa\n",
" refraction\n",
" sag_index_radiance\n",
" sag_ref_index\n",
" surf_correction\n",
" surf_ref\n",
" toa_refletance\n",
" toar_deep\n"
]
}
],
"source": [
"for typ in set(typs):\n",
" print \"\\n{}:\".format(typ)\n",
" for lab in df[df.typ==typ]['nam'].sort_values():\n",
" print \" {}\".format(lab)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
@TRomijn
Copy link

TRomijn commented Jul 3, 2018

Thanks for sharing this code!

I changed some parts in the larger part of your code to make it work with Python 3.6

# import regex instead of re
import regex

# reg exp to find full label specifications
patt = regex.compile("\\label{(\w*:\w*)}")
# reg exp to find `addtotoc` labels
app_patt = re.compile("(\w+:\w+)")

lbls = []
for root, dirs, files in os.walk(chdir):
    for fn in files: 
        if fn == 'main.tex':
            # my project has some appendices that are pulled into
            # the project using includepdf and the labels for these
            # are specified using `addtotoc`. This if statement
            # finds those labels in main.tex.
            with open(os.path.join(root, fn),encoding="Latin-1") as f:
                lbls.extend(re.findall(app_patt, f.read()))
        elif fn.endswith(".tex"):
            # there's a template file in my project that I don't
            # want to search for labels.
            with open(os.path.join(root,fn),encoding="Latin-1") as f:
                txt = f.read()
                labels = regex.findall(patt, txt)
                lbls.extend(labels)
typs = []
names = []
for lbl in lbls:
    t, n = lbl.split(':')
    typs.append(t)
    names.append(n)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment