Last active
February 20, 2020 16:24
-
-
Save jkibele/9de7f6d9b3250c09c688 to your computer and use it in GitHub Desktop.
Python code to generate a list of labels from latex documents.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# List All Labels\n", | |
| "\n", | |
| "This notebook will list all the labels in my thesis. My labels are all like `chp:Chapter_Name`, `fig:figure_name`, `eq:equation`, etc. The patterns `patt`, and/or `app_patt` can be modified to suit different label naming conventions." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import os\n", | |
| "import re" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "chdir = '/home/jkibele/Documents/code_projects/PhDThesis/'" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# reg exp to find full label specifications\n", | |
| "patt = re.compile(\"\\\\label{(\\w*:\\w*)}\")\n", | |
| "# reg exp to find `addtotoc` labels\n", | |
| "app_patt = re.compile(\"(\\w+:\\w+)\")\n", | |
| "\n", | |
| "lbls = []\n", | |
| "for root, dirs, files in os.walk(chdir):\n", | |
| " for fn in files: \n", | |
| " if fn == 'main.tex':\n", | |
| " # my project has some appendices that are pulled into\n", | |
| " # the project using includepdf and the labels for these\n", | |
| " # are specified using `addtotoc`. This if statement\n", | |
| " # finds those labels in main.tex.\n", | |
| " with open(os.path.join(root, fn)) as f:\n", | |
| " lbls.extend(re.findall(app_patt, f.read()))\n", | |
| " elif fn.endswith(\".tex\") and fn<>\"zTemplateChapter1.tex\":\n", | |
| " # there's a template file in my project that I don't\n", | |
| " # want to search for labels.\n", | |
| " with open(os.path.join(root,fn)) as f:\n", | |
| " txt = f.read()\n", | |
| " labels = re.findall(patt, txt)\n", | |
| " lbls.extend(labels)\n", | |
| "typs = []\n", | |
| "names = []\n", | |
| "for lbl in lbls:\n", | |
| " t, n = lbl.split(':')\n", | |
| " typs.append(t)\n", | |
| " names.append(n)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.DataFrame({'typ':typs, 'nam':names})" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "app:\n", | |
| " depth_param_est\n", | |
| " param_est\n", | |
| " preprocessing\n", | |
| " water_column\n", | |
| "\n", | |
| "chp:\n", | |
| " BPS\n", | |
| " Depth\n", | |
| " Intro\n", | |
| " OpticalRS\n", | |
| " ParamEst\n", | |
| " Water\n", | |
| "\n", | |
| "sec:\n", | |
| " curve_fit_est\n", | |
| " lin_reg_est\n", | |
| "\n", | |
| "fig:\n", | |
| " bps_fig1\n", | |
| " pd_K_ests\n", | |
| " pe_curve_fit\n", | |
| " pe_dwm_bars\n", | |
| " pe_linear_K_est\n", | |
| " pe_linear_K_est_dr\n", | |
| " pe_location_map\n", | |
| " pe_surf_corr_K\n", | |
| " wcc_depths\n", | |
| " wcc_imgcomp\n", | |
| " wcc_location_map\n", | |
| " wcc_method_comp\n", | |
| " wcc_parallel\n", | |
| "\n", | |
| "tab:\n", | |
| " est_parameters\n", | |
| "\n", | |
| "eq:\n", | |
| " albedo_plus\n", | |
| " geometric_factor\n", | |
| " k_from_slope\n", | |
| " linregress\n", | |
| " lyz_model_sag\n", | |
| " lyz_model_transformed\n", | |
| " lyz_shallow_water\n", | |
| " lyz_transform\n", | |
| " mar_sag_comp\n", | |
| " mar_wcc\n", | |
| " maritorena9a\n", | |
| " maritorena_albedo_subsurf\n", | |
| " maritorena_model\n", | |
| " maritorena_toa\n", | |
| " refraction\n", | |
| " sag_index_radiance\n", | |
| " sag_ref_index\n", | |
| " surf_correction\n", | |
| " surf_ref\n", | |
| " toa_refletance\n", | |
| " toar_deep\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for typ in set(typs):\n", | |
| " print \"\\n{}:\".format(typ)\n", | |
| " for lab in df[df.typ==typ]['nam'].sort_values():\n", | |
| " print \" {}\".format(lab)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 2", | |
| "language": "python", | |
| "name": "python2" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 2 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython2", | |
| "version": "2.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for sharing this code!
I changed some parts in the larger part of your code to make it work with Python 3.6