fabiolimace/notekeeper-html.awk

## notekeeper-html.awk
#!/usr/bin/awk -f


#
#
# NOTE:
#
# This file is been refactored. Only a few thing work.
# The original code is safe in `apt-html.original.awk`.
#
#
#


#
# Converts markdown to HTML
#
# See:
#
# * https://spec.commonmark.org
# * https://markdown-it.github.io
# * https://www.javatpoint.com/markdown
# * https://www.markdownguide.org/cheat-sheet
# * https://www.markdownguide.org/extended-syntax
# * https://pandoc.org/MANUAL.html#pandocs-markdown
# * https://www.dotcms.com/docs/latest/markdown-syntax
# * https://www.codecademy.com/resources/docs/markdown
# * https://daringfireball.net/projects/markdown/syntax
# * https://www.ecovida.org.br/docs/manual_site/markdown
# * https://quarto.org/docs/authoring/markdown-basics.html
# * https://docs.github.com/en/get-started/writing-on-github
# * https://fuchsia.dev/fuchsia-src/contribute/docs/markdown
# * https://www.ibm.com/docs/en/SSYKAV?topic=train-how-do-use-markdown
# * https://www.knowledgehut.com/blog/web-development/what-is-markdown
# * https://www.ionos.com/digitalguide/websites/web-development/markdown/
# * https://learn.microsoft.com/en-us/contribute/content/markdown-reference
# * https://developer.mozilla.org/en-US/docs/MDN/Writing_guidelines/Howto/Markdown_in_MDN
# * https://confluence.atlassian.com/bitbucketserver/markdown-syntax-guide-776639995.html
# * https://learn.microsoft.com/en-us/azure/devops/project/wiki/markdown-guidance?view=azure-devops
# * https://medium.com/analytics-vidhya/the-ultimate-markdown-guide-for-jupyter-notebook-d5e5abf728fd

function ready() {
    return at("root") || at("blockquote") || at("li");
}

function empty() {
    return idx == 0
}

function peek() {
    return stk[idx];
}

function peek_attr() {
    return stk_attr[idx];
}

function peek_spaces() {
    return stk_spaces[idx];
}

function peek_value(key,    found) {
    attr = " " peek_attr();
    if (match(attr, "[ ]" key "='[^']*'") > 0) {
        found = substr(attr, RSTART, RLENGTH);
        match(found, "='[^']*'");
        return substr(found, RSTART + 2, RLENGTH - 3);
    }
    return "";
}

function identifier() {
    return ++id;
}

function at(tag) {
    return peek() == tag ? 1 : 0;
}

function any(tags,   i, n, arr) {
    n = split(tags, arr, ",");
    for (i = 1; i <= n; i++) {
        if (at(arr[i])) {
            return 1;
        }
    }
    return "";
}

function pop_at(tag) {
    if (at(tag)) {
        return pop();
    }
    return "";
}

function pop_any(tags) {
    if (any(tags)) {
        return pop();
    }
    return "";
}

function container() {
    return any("ol,ul,li");
}

function pop() {

    if (empty()) {
        return "";
    }

    if (container()) {
        print_buf();
        close_tag();
    } else {
        print_tag();
    }

    return unpush();
}

function spaces() {
    match($0, /^[ ]*[^ ]/);
    # the number of spaces before non-space
    return (RLENGTH > 0) ? RLENGTH - 1 : RLENGTH;
}

function push(tag, attr) {

    pop_list(tag);

    ++idx;
    stk[idx] = tag;
    stk_attr[idx] = attr;
    stk_spaces[idx] = spaces();

    if (container()) {
        print_buf();
        open_tag();
    }
}

function pop_list(tag) {
    if (any("ol,ul") && tag != "li") {
        pop();
    }
}

function unpush(    tag) {

    tag = peek();
    if (!empty()) {
        delete stk_spaces[idx];
        delete stk_attr[idx];
        delete stk[idx];
        idx--;
    }

    return tag;
}

function print_tag() {
    open_tag();
    print_buf();
    close_tag();
}

function open_tag() {

    if (at("br") || at("hr")) {
        printf "<%s>\n", peek();
        return;
    }

    if (at("pre") || at("code")) {
        open_pre(peek_value("title"));
        return;
    }

    if (!peek_attr()) {
        printf "<%s>\n", peek();
    } else {
        printf "<%s %s>\n", peek(), peek_attr();
    }
}

function close_tag() {

    if (at("br") || at("hr")) {
        return; # empty element
    }

    if (at("pre") || at("code")) {
        close_pre();
        return;
    }

    printf "</%s>\n", peek();
}

function buffer(str,    sep) {

    if (at("pre") || at("code")) {
        sep = "\n";
    } else {
        sep = " ";
        # 2-spaces line break
        if (str ~ /[ ][ ]+$/) {
            str = rtrim(str) make_tag("br");
        }
        str = trim(str);
    }

    if (buf == "") {
        buf = str;
    } else {
        buf=buf sep str;
    }
}

function print_buf() {

    if (at("pre") || at("code")) {
        buf = escape(buf);
    } else {
        # the order matters
        buf = angles(buf);
        buf = footnotes(buf);
        buf = images(buf);
        buf = links(buf);
        buf = reflinks(buf);
        buf = styles(buf);
    }

    if (buf != "") {
        print buf;
    }
    buf = "";
}

function coalesce(str, alternative) {
    return (str) ? str : alternative;
}

function open_pre(title,    id) {

    id = identifier();
    title = coalesce(title, "&gt;_");

    if (TEST) {
        printf "<pre><code>\n";
    } else {
        printf "<div class='codeblock'>";
        printf "<div class='codeblock-head'>";
        printf "<span class='codeblock-title'>%s</span>", title;
        printf "<span class='codeblock-buttons'>%s</span>", buttons(id);
        printf "</div>";
        printf "<pre class='codeblock-body' id='%s'>", id;
        printf "<code class='codeblock-code'>";
    }
}

function close_pre() {
    if (TEST) {
        printf "</code></pre>\n";
    } else {
        printf "</code></pre>\n";
        printf "</div>\n";
    }
}

function buttons(id,    style, copy, collapse, wordwrap) {

    copy_icon = "&#x1F4CB;";
    collapse_icon = "&#x2195;";
    wordwrap_icon = "&#x21B5;";

    copy = "<button onclick='copy(" id ")' title='Copy'>" copy_icon "</button>";
    collapse = "<button onclick='collapse(" id ")' title='Collapse'>" collapse_icon "</button>";
    wordwrap = "<button onclick='wordwrap(" id ")' title='Word wrap'>" wordwrap_icon "</button>";

    # must return in reverse order
    return copy collapse wordwrap;
}

function styles(buf) {

    buf = snippet(buf);
    buf = formula(buf);
    buf = asterisk(buf);
    buf = underscore(buf);
    buf = deleted(buf);
    buf = inserted(buf);
    buf = highlighted(buf);
    buf = superscript(buf);
    buf = subscript(buf);

    return buf;
}

function snippet(buf) {
    buf = apply_style(buf, "``", "code");
    buf = apply_style(buf, "`", "code");
    return buf;
}

function formula(buf) {
    buf = apply_style(buf, "$$", "code");
    buf = apply_style(buf, "$", "code");
    return buf;
}

function underscore(buf) {
    buf = apply_style(buf, "__", "strong");
    buf = apply_style(buf, "_", "em");
    return buf;
}

function asterisk(buf) {
    buf = apply_style(buf, "**", "strong");
    buf = apply_style(buf, "*", "em");
    return buf;
}

function deleted(buf) {
    return apply_style(buf, "~~", "del");
}

function inserted(buf) {
    return apply_style(buf, "++", "ins");
}

function highlighted(buf) {
    return apply_style(buf, "==", "mark");
}

function superscript(buf) {
    return apply_style(buf, "^", "sup");
}

function subscript(buf) {
    return apply_style(buf, "~", "sub");
}

function apply_style(buf, mark, tag,    out, found, rstart, rlength) {

    out = "";
    len = length(mark);

    position = index(buf, mark);

    while (position > 0) {

        rstart = position + len;
        rlength = index(substr(buf, rstart), mark) - 1;

        if (rlength <= 0) break;

        found = substr(buf, rstart, rlength);

        if (tag == "code") {
            found = escape(found);
        }

        out = out substr(buf, 1, rstart -1 - len);
        out = out make_tag(tag, found);

        buf = substr(buf, rstart + rlength + len);
        position = index(buf, mark);
    }

    out = out buf;

    return out;
}

function escape(str) {
    # html special characters
    gsub(/[&]/, "\\&amp;", str);
    gsub(/[<]/, "\\&lt;", str);
    gsub(/[>]/, "\\&gt;", str);
    # markdown special characters
    gsub(/[$]/, "\\&#36;", str);
    gsub(/[*]/, "\\&#42;", str);
    gsub(/[+]/, "\\&#43;", str);
    gsub(/[-]/, "\\&#45;", str);
    gsub(/[=]/, "\\&#61;", str);
    gsub(/[\^]/, "\\&#94;", str);
    gsub(/[_]/, "\\&#95;", str);
    gsub(/[`]/, "\\&#96;", str);
    gsub(/[~]/, "\\&#126;", str);
    return str;
}


function prefix(str, start, x) {
    x = (x) ? x : 1;
    return substr(str, 1, start - x);
}

function suffix(str, start, end, x) {
    x = (x) ? x : 1;
    return substr(str, start + (end - start) + x);
}

function extract(str, start, end, x, y) {
    x = (x) ? x : 1;
    y = (y) ? y : 1;
    return substr(str, start + x, (end - start) - y);
}

# TODO: change order: tag, attr, text (<tag attr>text</tag>)
function make_tag(tag, text, attr) {

        if (text) {
            if (attr) {
                return "<" tag " " attr ">" text "</" tag ">";
            } else {
                return "<" tag ">" text "</" tag ">";
            }
        } else {
            if (attr) {
                return "<" tag " " attr "/>";
            } else {
                return "<" tag "/>";
            }
        }
}

# TODO: change order: href, title, text (<a href title>text</a>)
function make_link(text, href, title) {
    if (title) {
        return make_tag("a", text, "href='" href "' title='" title "'");
    } else {
        return make_tag("a", text, "href='" href "'");
    }
}

# TODO: change order and names: href, title, alt (<a href title alt/>)
function make_image(text, href, title)  {
    if (title) {
        return make_tag("img", "", "alt='" text "' src='" href "' title='" title "'");
    } else {
        return make_tag("img", "", "alt='" text "' src='" href "'");
    }
}

function make_footnote(ref) {
    return make_tag("a", "<sup>[" ref "]<sup>", "href='#foot-" ref "'");
}

# TODO: change order: ref, text (<a href="ref">text</a>)
function make_reflink(text, ref) {
    return make_tag("a", text, "href='#link-" ref "'");
}

# <ftp...>
# <http...>
# <https...>
# <email@...>
function angles(buf,    start, end, href, out) {

    out = "";
    start = index(buf, "<");
    end = index(buf, ">");

    while (0 < start && start < end) {

        href = extract(buf, start, end);

        if (index(href, "http") == 1 || index(href, "ftp") == 1) {
            push_link(id++, href);
            out = out prefix(buf, start);
            out = out make_link(href, href);
        } else if (index(href, "@") > 1) {
            push_link(id++, "mailto:" href);
            out = out prefix(buf, start);
            out = out make_link(href, "mailto:" href);
        } else {
            # do nothing; just give back
            out = out prefix(buf, end + 1);
        }

        buf = suffix(buf, start, end);
        start = index(buf, "<");
        end = index(buf, ">");
    }

    out = out buf;

    return out;
}

# [text](href)
# [text](href "title")
function links(buf, regex,    start, end, mid, t1, t2, temp, text, href, title, out) {

    out = "";
    start = index(buf, "[");
    mid = index(buf, "](");
    end = index(buf, ")");

    while (0 < start && start < mid && mid < end) {

        out = out prefix(buf, start);

        text = extract(buf, start, mid);
        href = extract(buf, mid, end, 2, 2);

        t1 = index(href, "\"");
        t2 = index(substr(href, t1 + 1), "\"") + t1;

        if (0 < t1 && t1 < t2) {
            temp = href;
            href = trim(prefix(temp, t1));
            title = trim(extract(temp, t1, t2));
        }

        out = out make_link(text, href, title);
        push_link(id++, href, title, text);

        buf = suffix(buf, start, end);
        start = index(buf, "[");
        mid = index(buf, "](");
        end = index(buf, ")");
    }

    out = out buf;

    return out;
}

# ![alt](src)
# ![alt](src "title")
function images(buf, regex,    start, end, mid, t1, t2, temp, text, href, title, out) {

    out = "";
    start = index(buf, "![");
    mid = index(buf, "](");
    end = index(buf, ")");

    while (0 < start && start < mid && mid < end) {

        out = out prefix(buf, start);

        text = extract(buf, start, mid, 2, 2);
        href = extract(buf, mid, end, 2, 2);

        t1 = index(href, "\"");
        t2 = index(substr(href, t1 + 1), "\"") + t1;

        if (0 < t1 && t1 < t2) {
            temp = href;
            href = trim(prefix(temp, t1));
            title = trim(extract(temp, t1, t2));
        }

        out = out make_image(text, href, title);

        buf = suffix(buf, start, end);
        start = index(buf, "![");
        mid = index(buf, "](");
        end = index(buf, ")");
    }

    out = out buf;

    return out;
}

# [^footnote]
function footnotes(buf, regex,    start, end, ref, out) {

    out = "";
    start = index(buf, "[^");
    end = index(buf, "]");

    while (0 < start && start < end) {

        out = out prefix(buf, start);

        ref = extract(buf, start, end, 2, 2);
        out = out make_footnote(ref);

        buf = suffix(buf, start, end);
        start = index(buf, "[^");
        end = index(buf, "]");
    }

    out = out buf;

    return out;
}

# [text][ref]
# [text] [ref]
function reflinks(buf,    start, end, mid1, mid2, out, text, ref) {

    out = "";
    start = index(buf, "[");
    mid1 = index(buf, "]");

    while (0 < start && start < mid1) {

        mid2 = index(substr(buf, mid1 + 1), "[") + mid1;
        end = index(substr(buf, mid2 + 1), "]") + mid2;

        if (mid1 < mid2 && mid2 < end) {
            if (mid2 - mid1 <= 2) {
                text = extract(buf, start, mid1);
                ref = extract(buf, mid2, end, 1, 1);
                out = out prefix(buf, start);
                out = out make_reflink(text, ref);
            } else {
                out = out prefix(buf, end + 1);
            }
        }

        buf = suffix(buf, start, end);
        start = index(buf, "[");
        mid1 = index(buf, "]");
    }

    out = out buf;

    return out;
}

function print_header() {

    print "<!DOCTYPE html>";
    print "<html>";
    print "<head>";
    print "<title></title>";

    print "<style>";
    print "    :root {";
    print "        --gray: #efefef;";
    print "        --black: #444;";
    print "        --dark-gray: #aaaaaa;";
    print "        --light-gray: #fafafa;";
    print "        --dark-blue: #0000ff;";
    print "        --light-blue: #0969da;";
    print "        --light-yellow: #fafaaa;";
    print "    }";
    print "    html {";
    print "        font-size: 16px;";
    print "        max-width: 100%;";
    print "    }";
    print "    body {";
    print "        padding: 1rem;";
    print "        margin: 0 auto;";
    print "        max-width: 50rem;";
    print "        line-height: 1.5rem;";
    print "        font-family: sans-serif;";
    print "        color: var(--black);";
    print "    }";
    print "    p {";
    print "        font-size: 1rem;";
    print "        margin-bottom: 1.3rem;";
    print "    }";
    print "    a, a:visited { color: var(--light-blue); }";
    print "    a:hover, a:focus, a:active { color: var(--dark-blue); }";
    print "    h1 { font-size: 1.7rem; }";
    print "    h2 { font-size: 1.4rem; }";
    print "    h3 { font-size: 1.1rem; }";
    print "    h4 { font-size: 1.1rem; }";
    print "    h5 { font-size: 0.8rem; }";
    print "    h6 { font-size: 0.8rem; }";
    print "    h1, h2 {";
    print "        padding-bottom: 0.5rem;";
    print "        border-bottom: 2px solid var(--gray);";
    print "    }";
    print "    h1, h2, h3, h4, h5, h6 {";
    print "        font-weight: bold;";
    print "        font-style: normal;";
    print "        margin: 1.4rem 0 .5rem;";
    print "    }";
    print "    h3, h5 {";
    print "        font-weight: bold;";
    print "        font-style: normal;";
    print "    }";
    print "    h4, h6 {";
    print "        font-weight: normal;";
    print "        font-style: italic;";
    print "    }";
    print "    div.codeblock {";
    print "        border-radius: .4rem;";
    print "        background-color: var(--gray);";
    print "        border: 1px solid var(--dark-gray);";
    print "    }";
    print "    div.codeblock-head {";
    print "        margin: 0rem 0rem;";
    print "        padding: 0rem 0rem;";
    print "        border-bottom: 1px solid var(--dark-gray);";
    print "    }";
    print "    span.codeblock-title {";
    print "        font-weight: bold;";
    print "        margin: 0rem 0rem;";
    print "        padding: 0rem 1rem;";
    print "    }";
    print "    span.codeblock-buttons {";
    print "        float: right;";
    print "        font-weight: bold;";
    print "        margin: 0rem 0rem;";
    print "        padding: 0rem 1rem;";
    print "    }";
    print "    pre.codeblock-body {";
    print "        overflow-x:auto;";
    print "        margin: 0rem 0rem;";
    print "        padding: 1rem 1rem;";
    print "        line-height: 1.0rem;";
    print "    }";
    print "    code.codeblock-code {";
    print "        font-size: 0.8rem;";
    print "        margin: 0rem 0rem;";
    print "        padding: 0rem 0rem;";
    print "        font-family: monospace;";
    print "    }";
    print "    code {";
    print "        border-radius: .2rem;";
    print "        padding: 0.1rem 0.3rem;";
    print "        font-family: monospace;";
    print "        background-color: var(--gray);";
    print "    }";
    print "    mark {";
    print "        padding: 0.1rem 0.3rem;";
    print "        border-radius: .2rem;";
    print "        background-color: var(--light-yellow);";
    print "    }";
    print "    blockquote {";
    print "        margin: 1.5rem;";
    print "        padding: 1rem;";
    print "        border-radius: .4rem;";
    print "        background-color: var(--light-gray);";
    print "        border: 1px solid var(--dark-gray);";
    print "        border-left: 12px solid var(--dark-gray);";
    print "    }";
    print "    dt { font-weight: bold; }";
    print "    hr { border: 1px solid var(--dark-gray); }";
    print "    img { height: auto; max-width: 100%; }";
    print "    table { border-collapse: collapse; margin-bottom: 1.3rem; }";
    print "    th { padding: .7rem; border-bottom: 1px solid var(--black);}";
    print "    td { padding: .7rem; border-bottom: 1px solid var(--gray);}";
    print "</style>";

    print "<script>";
    print "    function copy(id) {";
    print "        var element = document.getElementById(id);";
    print "        navigator.clipboard.writeText(element.textContent);";
    print "    }";
    print "    function wordwrap(id) {";
    print "        var element = document.getElementById(id);";
    print "        if (element.style.whiteSpace != 'pre-wrap') {";
    print "            element.style.whiteSpace = 'pre-wrap';";
    print "        } else {";
    print "            element.style.whiteSpace = 'pre';";
    print "        }";
    print "    }";
    print "    function collapse(id) {";
    print "        var element = document.getElementById(id);";
    print "        if (element.style.display != 'none') {";
    print "            element.style.display = 'none';";
    print "        } else {";
    print "            element.style.display = 'block';";
    print "        }";
    print "    }";
    print "</script>"

    print "</head>";
    print "<body>";
}

function print_footer (    i, ref, href, title, text) {

    print "<footer>";

    if (link_count > 0 || footnote_count > 0) {
        print "<hr>";
    }

    if (link_count > 0) {
        print "<h6>LINKS</h6>";
        print "<ol>";
        for (i = 1; i <= link_count; i++) {

            ref = link_ref[i];
            href = link_href[i];
            title = link_title[i];

            if (title == "") {
                title = href;
            }

            print make_tag("li", title " <a href='" href "' id='link-" ref "'>&#x1F517;</a>");

        }
        print "</ol>";
    }

    if (footnote_count > 0) {
        print "<h6>FOOTNOTES</h6>";
        print "<ol>";
        for (i = 1; i <= footnote_count; i++) {

            ref = footnote_ref[i];
            text = footnote_text[i];

            print make_tag("li", text " <a href='#foot-" ref "' id='link-" ref "'>&#x1F517;</a>");

        }
        print "</ol>";
    }

    print "</footer>";

    print "</body>";
    print "</html>";
}

BEGIN {

    buf=""

    idx=0
    stk[0]="root";
    stk_attr[0]="";
    stk_spaces[0]=0;

    blockquote_prefix = "^[ ]*>[ ]?";
    ul_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[*+-][ ]";
    ol_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[0-9]+\\.[ ]";

    blank = -1; # prepare to signal blank line

    print_header();
}

function pop_until(tag) {
    while (!empty() && !at(tag)) {
        pop();
    }
}

function level_blockquote(   i, n) {
    n = 0;
    for (i = idx; i > 0; i--) {
        if (stk[i] == "blockquote") {
            n++;
        }
    }
    return n;
}

function level_list(   i, n) {
    n = 0;
    for (i = idx; i > 0; i--) {
        if (stk[i] == "ul" || stk[i] == "ol") {
            n++;
        }
        if (stk[i] == "blockquote") break;
    }
    return n;
}

function count_indent(line) {
    return count_prefix(line, "^[ ][ ][ ][ ]");
}

function count_prefix(line, pref,    n) {
    n=0
    while (sub(pref, "", line)) {
        n++;
    }
    return n;
}

function remove_indent(line) {
    return remove_prefix(line, "^[ ][ ][ ][ ]");
}

function remove_prefix(line, pref) {

    # remove leading quote marks
    while (line ~ pref) {
        sub(pref, "", line);
    };

    return line;
}

function min(x, y) {
    return (x <= y) ? x : y;
}

function max(x, y) {
    return (x >= y) ? x : y;
}

function ltrim(s) { sub(/^[ \t]+/, "", s); return s; }
function rtrim(s) { sub(/[ \t]+$/, "", s); return s; }
function trim(s) { return rtrim(ltrim(s)); }

function slug(str) {
    gsub(/[^a-zA-Z0-9]/, "-", str);
    gsub(/-+/, "-", str);
    return tolower(str);
}

function push_link(ref, href, title, text) {
    link_count++;
    link_ref[link_count] = ref;
    link_href[link_count] = href;
    link_title[link_count] = title;
    link_text[link_count] = text;
}

# undo last push
function undo(    tmp) {
    tmp = buf;
    buf = "";
    unpush();
    return tmp;
}

#===========================================
# TABULATION
#===========================================

/^\t/ {
    s = " ";
    # replace only 1st tab
    sub(/^\t/, s s s s, $0);
}

#===========================================
# BLOCKQUOTES
#===========================================

function unblockquote() {
    sub(/^[ ]*>[ ]*/, "", $0);
}

# one level
/^[ ]*>[ ]*/ {

    if (at("blockquote")) {
        unblockquote();
        buffer($0);
        next;
    }

    if (at("root")) {
        push("blockquote");
        unblockquote();
        buffer($0);
        next;
    }

    if (!at("root")) {
        pop();
        push("blockquote");
        unblockquote();
        buffer($0);
        next;
    }
}

#===========================================
# LISTS
#===========================================

/^([ ]*[*+-][ ]+|[ ]*[0-9]+[.][ ]+).+$/ {

    str = $0; # copy register
    # detect the type of list
    if (str ~ /^[ ]*[*+-][ ]+/) {
        ulol = "ul";
        sub(/^[ ]*[*+-][ ]+/, "", str);
    } else {
        ulol = "ol";
        sub(/^[ ]*[0-9]+[.][ ]+/, "", str);
    }

    # compare spaces
    a = peek_spaces();
    b = spaces();

    if (b > a) {
        if (at("li")) {
            push(ulol);
            push("li");
            buffer(str);
            next;
        }
    }

    if (b < a) {
        if (at("li")) {
            pop();
            pop();
            pop();
            push("li");
            buffer(str);
            next;
        }
    }

    if (at("li")) {
        pop();
        push("li");
        buffer(str);
        next;
    }

    if (at("root")) {
        push(ulol);
        push("li");
        buffer(str);
        next;
    }

    if (!at("root")) {
        pop();
        push(ulol);
        push("li");
        buffer(str);
        next;
    }
}

#===========================================
# CODE BLOCKS
#===========================================

function unindent() {
    sub(/^[ ][ ][ ][ ]/, "", $0);
}

/^```/ {

    if (at("code")) {
        pop();
        next;
    }

    if (at("root")) {
        sub(/^`+/, "");
        push("code", "title='" $1 "'");
        next;
    }

    if (!at("root")) {
        pop();
        sub(/^`+/, "");
        push("code", "title='" $1 "'");
        next;
    }
}

at("code") {
    buffer($0);
    next;
}

/^[ ][ ][ ][ ]/ {

    if (at("pre")) {
        unindent();
        buffer($0);
        next;
    }

    if (at("root")) {
        push("pre");
        unindent();
        buffer($0);
        next;
    }

    if (!at("root")) {
        pop();
        push("pre");
        unindent();
        buffer($0);
        next;
    }
}

#===========================================
# HEADING
#===========================================

/^[\x23]+[ ]+/ {

    # count header level
    match($0, /^[\x23]+/);
    # remove all leading hashes
    sub(/^[\x23]+[ ]*/, "", $0);
    # remove all trailing hashes
    sub(/[ ]*[\x23]+$/, "", $0);

    if (at("root")) {
        push("h" min(RLENGTH, 6));
        buffer($0);
        next;
    }

    if (!at("root")) {
        pop();
        push("h" min(RLENGTH, 6));
        buffer($0);
        next;
    }
}

/^=+[ ]*$/ && at("p") {
    unpush();
    push("h1");
    pop();
    next;
}

/^-+[ ]*$/ && at("p") {
    unpush();
    push("h2");
    pop();
    next;
}

#===========================================
# HORIZONTAL RULER
#===========================================

# TODO: fix <hr> between <ul|ol> and <li>

/^[*_-][*_-][*_-]+[ ]*$/ {

    if (at("root")) {
        push("hr");
        pop();
        next;
    }

    if (!at("root")) {
        pop();
        push("hr");
        pop();
        next;
    }
}


#===========================================
# BLANK
#===========================================

/^[ ]*$/ {

    blank_flag = 1;

    if (at("pre")) {
        buffer("");
        next;
    }

    if (at("li")) {
        next;
    }

    if (at("root")) {
        next;
    }

    if (!at("root")) {
        pop();
        next;
    }
}

#===========================================
# PARAGRAPH
#===========================================

/^.+$/ {

    if (at("p")) {
        buffer($0);
        next;
    }

    if (any("h1,h2,h3,h4,h5,h6")) {
        buffer($0);
        next;
    }

    if (at("root")) {
        push("p");
        buffer($0);
        next;
    }

    if (!at("root")) {
        pop();
        push("p");
        buffer($0);
        next;
    }
}

{
    blank_flag = 0;
}

#===========================================
# THE END
#===========================================

END {

    pop_at("p");
    pop_at("li");
    pop_any("pre,code");
    pop_any("h1,h2,h3,h4,h5,h6");

    # compatible end of file,
    # e.g., `diff`, `ed` etc.
    printf "\n";
}


## notekeeper-html.original.awk
#!/usr/bin/awk -f

#
# Converts markdown to HTML
#
# See:
#
# * https://spec.commonmark.org
# * https://markdown-it.github.io
# * https://www.javatpoint.com/markdown
# * https://www.markdownguide.org/cheat-sheet
# * https://www.markdownguide.org/extended-syntax
# * https://pandoc.org/MANUAL.html#pandocs-markdown
# * https://www.dotcms.com/docs/latest/markdown-syntax
# * https://www.codecademy.com/resources/docs/markdown
# * https://daringfireball.net/projects/markdown/syntax
# * https://www.ecovida.org.br/docs/manual_site/markdown
# * https://quarto.org/docs/authoring/markdown-basics.html
# * https://docs.github.com/en/get-started/writing-on-github
# * https://fuchsia.dev/fuchsia-src/contribute/docs/markdown
# * https://www.ibm.com/docs/en/SSYKAV?topic=train-how-do-use-markdown
# * https://www.knowledgehut.com/blog/web-development/what-is-markdown
# * https://www.ionos.com/digitalguide/websites/web-development/markdown/
# * https://learn.microsoft.com/en-us/contribute/content/markdown-reference
# * https://developer.mozilla.org/en-US/docs/MDN/Writing_guidelines/Howto/Markdown_in_MDN
# * https://confluence.atlassian.com/bitbucketserver/markdown-syntax-guide-776639995.html
# * https://learn.microsoft.com/en-us/azure/devops/project/wiki/markdown-guidance?view=azure-devops
# * https://medium.com/analytics-vidhya/the-ultimate-markdown-guide-for-jupyter-notebook-d5e5abf728fd

function ready() {
    return at("root") || at("blockquote") || at("li");
}

function empty() {
    return idx == 0
}

function at(tag) {
    return peek() == tag ? 1 : 0;
}

function peek() {
    return stk[idx];
}

function peek_attr() {
    return stk_attr[idx];
}

function push(tag, attr) {

    ++id;
    ++idx;

    stk[idx] = tag;
    stk_attr[idx] = attr;

    open_tag(id);

    # close <br> and <hr>
    if (at("br") || at("hr")) {
        pop();
    }

    return id;
}

function pop() {
    if (empty()) {
        return "";
    }

    close_tag();
    return unpush();
}

function unpush(    tag) {
    tag = peek();
    if (!empty()) {
        delete stk_attr[idx];
        delete stk[idx--];
    }
    return tag;
}

function write() {

    if (at("pre") || at("code")) {
        buf = escape(buf);
    } else {
        # the order matters
        buf = diamonds(buf);
        buf = footnotes(buf);
        buf = images(buf);
        buf = links(buf);
        buf = reflinks(buf);
        buf = styles(buf);
    }

    if (buf != "") {
        print buf;
    }
    buf = "";
}

function append(str, sep) {

    if (at("pre") || at("code")) {
        if (sep == "") sep = "\n";
    } else {
        if (sep == "") sep = " ";
        # append 2-spaces line break
        if (str ~ /^[^ ]+[ ][ ]+$/) {
            str = rtrim(str) "<br>";
        }
        str = trim(str);
    }

    if (buf == "") {
        buf = str;
    } else {
        buf=buf sep str;
    }
}

function open_tag(id) {

    write();

    tag = peek();
    attr = peek_attr();

    if (at("br") || at("hr")) {
        printf "<%s>\n", tag;
        return;
    }

    if (at("pre") || at("code")) {
        open_pre(id, peek_value("title"));
        return;
    }

#    if (at("h1") || at("h2") || at("h3")) {
#        if (!attr) {
#            attr = "id='" id "'";
#        } else {
#            attr = "id='" id "' " attr;
#        }
#    }

    if (!attr) {
        printf "<%s>\n", tag;
    } else {
        printf "<%s %s>\n", tag, attr;
    }
}

function close_tag() {

    write();

    if (at("br") || at("hr")) {
        # do nothing.
        # already closed.
        return;
    }

    if (at("pre") || at("code")) {
        close_pre();
        return;
    }

    printf "</%s>\n", peek();
}

function peek_value(key,    found) {
    attr = " " peek_attr();
    if (match(attr, "[ ]" key "='[^']*'") > 0) {
        found = substr(attr, RSTART, RLENGTH);
        match(found, "='[^']*'");
        return substr(found, RSTART + 2, RLENGTH - 3);
    }
    return "";
}

function open_pre(id, title) {
    printf "<pre>";
    printf "<div class='pre-head'>";
    printf "<span>%s</span>", title;
    printf "%s", buttons(id);
    printf "</div>";
    printf "<div class='pre-body' id='%s'>", id;
    return;
}

function close_pre() {
    printf "</div>";
    printf "</pre>";
    return;
}

function buttons(id,    style, clipboard, wordwrap) {
    collapse = "<button onclick='collapse(" id ")' title='Toggle collapse' class='pre-button'>↕</button>";
    clipboard = "<button onclick='wordwrap(" id ")' title='Toggle word-wrap' class='pre-button'>⏎</button>";
    wordwrap = "<button onclick='clipboard(" id ")' title='Copy to clipboard' class='pre-button'>📋</button>";
    return clipboard collapse wordwrap;
}

# TODO: change order: tag, attr, text (<tag attr>text</tag>)
function make(tag, text, attr) {

        if (text) {
            if (attr) {
                return "<" tag " " attr ">" text "</" tag ">";
            } else {
                return "<" tag ">" text "</" tag ">";
            }
        } else {
            if (attr) {
                return "<" tag " " attr "/>";
            } else {
                return "<" tag "/>";
            }
        }
}

function snippet(buf) {
    buf = apply_style(buf, "``", 2, "code");
    buf = apply_style(buf, "`", 1, "code");
    return buf;
}

function formula(buf) {
    buf = apply_style(buf, "$$", 2, "code");
    buf = apply_style(buf, "$", 1, "code");
    return buf;
}

function underscore(buf) {
    buf = apply_style(buf, "__", 2, "strong");
    buf = apply_style(buf, "_", 1, "em");
    return buf;
}

function asterisk(buf) {
    buf = apply_style(buf, "**", 2, "strong");
    buf = apply_style(buf, "*", 1, "em");
    return buf;
}

function deleted(buf) {
    return apply_style(buf, "~~", 2, "del");
}

function inserted(buf) {
    return apply_style(buf, "++", 2, "ins");
}

function highlighted(buf) {
    return apply_style(buf, "==", 2, "mark");
}

function superscript(buf) {
    return apply_style(buf, "^", 1, "sup");
}

function subscript(buf) {
    return apply_style(buf, "~", 1, "sub");
}

function styles(buf) {

    buf = snippet(buf);
    buf = formula(buf);
    buf = asterisk(buf);
    buf = underscore(buf);
    buf = deleted(buf);
    buf = inserted(buf);
    buf = highlighted(buf);
    buf = superscript(buf);
    buf = subscript(buf);

    return buf;
}

function apply_style(buf, mark, len, tag,    out, found, rstart, rlength) {

    out = "";

    position = index(buf, mark);

    while (position > 0) {

        rstart = position + len;
        rlength = index(substr(buf, rstart), mark) - 1;

        if (rlength <= 0) break;

        found = substr(buf, rstart, rlength);

        if (tag == "code") {
            found = escape(found);
        }

        out = out substr(buf, 1, rstart -1 - len);
        out = out make(tag, found);

        buf = substr(buf, rstart + rlength + len);
        position = index(buf, mark);
    }

    out = out buf;

    return out;
}

function escape(str) {
    # html special characters
    gsub(/[&]/, "\\&amp;", str);
    gsub(/[<]/, "\\&lt;", str);
    gsub(/[>]/, "\\&gt;", str);
    # markdown special characters
    gsub(/[$]/, "\\&#36;", str);
    gsub(/[*]/, "\\&#42;", str);
    gsub(/[+]/, "\\&#43;", str);
    gsub(/[-]/, "\\&#45;", str);
    gsub(/[=]/, "\\&#61;", str);
    gsub(/[\^]/, "\\&#94;", str);
    gsub(/[_]/, "\\&#95;", str);
    gsub(/[`]/, "\\&#96;", str);
    gsub(/[~]/, "\\&#126;", str);
    return str;
}


function prefix(str, start, x) {
    x = (x) ? x : 1;
    return substr(str, 1, start - x);
}

function suffix(str, start, end, x) {
    x = (x) ? x : 1;
    return substr(str, start + (end - start) + x);
}

function extract(str, start, end, x, y) {
    x = (x) ? x : 1;
    y = (y) ? y : 1;
    return substr(str, start + x, (end - start) - y);
}

function make_link(text, href, title) {
    if (title) {
        return make("a", text, "href='" href "' title='" title "'");
    } else {
        return make("a", text, "href='" href "'");
    }
}

function make_image(text, href, title)  {
    if (title) {
        return make("img", "", "alt='" text "' src='" href "' title='" title "'");
    } else {
        return make("img", "", "alt='" text "' src='" href "'");
    }
}

function make_footnote(footnote) {
    return make("a", "<sup>[" footnote "]<sup>", "href='#foot-" footnote "'");
}

function make_reflink(text, ref) {
    return make("a", text, "href='#link-" ref "'");
}

# <ftp...>
# <http...>
# <https...>
# <email@...>
function diamonds(buf,    start, end, href, out) {

    out = "";
    start = index(buf, "<");
    end = index(buf, ">");

    while (0 < start && start < end) {

        href = extract(buf, start, end);

        if (index(href, "http") == 1 || index(href, "ftp") == 1) {
            push_link(id++, href);
            out = out prefix(buf, start);
            out = out make_link(href, href);
        } else if (index(href, "@") > 1) {
            push_link(id++, "mailto:" href);
            out = out prefix(buf, start);
            out = out make_link(href, "mailto:" href);
        } else {
            # do nothing; just give back
            out = out prefix(buf, end + 1);
        }

        buf = suffix(buf, start, end);
        start = index(buf, "<");
        end = index(buf, ">");
    }

    out = out buf;

    return out;
}

# [text](href)
# [text](href "title")
function links(buf, regex,    start, end, mid, t1, t2, temp, text, href, title, out) {

    out = "";
    start = index(buf, "[");
    mid = index(buf, "](");
    end = index(buf, ")");

    while (0 < start && start < mid && mid < end) {

        out = out prefix(buf, start);

        text = extract(buf, start, mid);
        href = extract(buf, mid, end, 2, 2);

        t1 = index(href, "\"");
        t2 = index(substr(href, t1 + 1), "\"") + t1;

        if (0 < t1 && t1 < t2) {
            temp = href;
            href = trim(prefix(temp, t1));
            title = trim(extract(temp, t1, t2));
        }

        out = out make_link(text, href, title);
        push_link(id++, href, title, text);

        buf = suffix(buf, start, end);
        start = index(buf, "[");
        mid = index(buf, "](");
        end = index(buf, ")");
    }

    out = out buf;

    return out;
}

# ![alt](src)
# ![alt](src "title")
function images(buf, regex,    start, end, mid, t1, t2, temp, text, href, title, out) {

    out = "";
    start = index(buf, "![");
    mid = index(buf, "](");
    end = index(buf, ")");

    while (0 < start && start < mid && mid < end) {

        out = out prefix(buf, start);

        text = extract(buf, start, mid, 2, 2);
        href = extract(buf, mid, end, 2, 2);

        t1 = index(href, "\"");
        t2 = index(substr(href, t1 + 1), "\"") + t1;

        if (0 < t1 && t1 < t2) {
            temp = href;
            href = trim(prefix(temp, t1));
            title = trim(extract(temp, t1, t2));
        }

        out = out make_image(text, href, title);

        buf = suffix(buf, start, end);
        start = index(buf, "![");
        mid = index(buf, "](");
        end = index(buf, ")");
    }

    out = out buf;

    return out;
}

# [^footnote]
function footnotes(buf, regex,    start, end, out, footnote) {

    out = "";
    start = index(buf, "[^");
    end = index(buf, "]");

    while (0 < start && start < end) {

        out = out prefix(buf, start);

        footnote = extract(buf, start, end, 2, 2);
        out = out make_footnote(footnote);

        buf = suffix(buf, start, end);
        start = index(buf, "[^");
        end = index(buf, "]");
    }

    out = out buf;

    return out;
}

function min(x, y) {
    return (x <= y) ? x : y;
}

function max(x, y) {
    return (x >= y) ? x : y;
}

# [text][ref]
# [text] [ref]
function reflinks(buf,    start, end, mid1, mid2, out, text, ref) {

    out = "";
    start = index(buf, "[");
    mid1 = index(buf, "]");

    while (0 < start && start < mid1) {

        mid2 = index(substr(buf, mid1 + 1), "[") + mid1;
        end = index(substr(buf, mid2 + 1), "]") + mid2;

        if (mid1 < mid2 && mid2 < end) {
            if (mid2 - mid1 <= 2) {
                text = extract(buf, start, mid1);
                ref = extract(buf, mid2, end, 1, 1);
                out = out prefix(buf, start);
                out = out make_reflink(text, ref);
            } else {
                out = out prefix(buf, end + 1);
            }
        }

        buf = suffix(buf, start, end);
        start = index(buf, "[");
        mid1 = index(buf, "]");
    }

    out = out buf;

    return out;
}

function print_header() {

    print "<!DOCTYPE html>";
    print "<html>";
    print "<head>";
    print "<title></title>";

    print "<style>";
    print "    :root {";
    print "        --gray: #efefef;";
    print "        --black: #444;";
    print "        --dark-gray: #aaaaaa;";
    print "        --light-gray: #fafafa;";
    print "        --dark-blue: #0000ff;";
    print "        --light-blue: #0969da;";
    print "        --light-yellow: #fafaaa;";
    print "    }";
    print "    html {";
    print "        font-size: 16px;";
    print "        max-width: 100%;";
    print "    }";
    print "    body {";
    print "        padding: 1rem;";
    print "        margin: 0 auto;";
    print "        max-width: 50rem;";
    print "        line-height: 1.8;";
    print "        font-family: sans-serif;";
    print "        color: var(--black);";
    print "    }";
    print "    p {";
    print "        font-size: 1rem;";
    print "        margin-bottom: 1.3rem;";
    print "    }";
    print "    a, a:visited { color: var(--light-blue); }";
    print "    a:hover, a:focus, a:active { color: var(--dark-blue); }";
    print "    h1 { font-size: 2.0rem; }";
    print "    h2 { font-size: 1.5rem; }";
    print "    h3 { font-size: 1.2rem; }";
    print "    h4 { font-size: 1.2rem; }";
    print "    h5 { font-size: 0.8rem; }";
    print "    h6 { font-size: 0.8rem; }";
    print "    h1, h2 {";
    print "        padding-bottom: 0.5rem;";
    print "        border-bottom: 2px solid var(--gray);";
    print "    }";
    print "    h1, h2, h3, h4, h5, h6 {";
    print "        line-height: 1.4;";
    print "        font-style: normal;";
    print "        font-weight: bold;";
    print "        margin: 1.4rem 0 .5rem;";
    print "    }";
    print "    h3, h5 {";
    print "        font-weight: bold;";
    print "        font-style: normal;";
    print "    }";
    print "    h4, h6 {";
    print "        font-weight: normal;";
    print "        font-style: italic;";
    print "    }";
    print "    pre {";
    print "        overflow-x:auto;";
    print "        line-height: 1.5;";
    print "        border-radius: .4rem;";
    print "        font-family: monospace;";
    print "        background-color: var(--gray);";
    print "        border: 1px solid var(--dark-gray);";
    print "    }";
    print "    div.pre-head {";
    print "        height: 1.5rem;";
    print "        padding: 1rem;";
    print "        font-weight: bold;";
    print "        padding-top: 0.5rem;";
    print "        padding-bottom: 0.5rem;";
    print "        border-bottom: 1px solid var(--dark-gray);";
    print "    }";
    print "    div.pre-body {";
    print "        padding: 1rem;";
    print "    }";
    print "    button.pre-button {";
    print "        font-size: 100%; float: right;";
    print "    }";
    print "    code {";
    print "        padding: 0.3rem;";
    print "        border-radius: .2rem;";
    print "        font-family: monospace;";
    print "        background-color: var(--gray);";
    print "    }";
    print "    mark {";
    print "        padding: 0.3rem;";
    print "        border-radius: .2rem;";
    print "        background-color: var(--light-yellow);";
    print "    }";
    print "    blockquote {";
    print "        margin: 1.5rem;";
    print "        padding: 1rem;";
    print "        border-radius: .4rem;";
    print "        background-color: var(--light-gray);";
    print "        border: 1px solid var(--dark-gray);";
    print "        border-left: 12px solid var(--dark-gray);";
    print "    }";
    print "    dt { font-weight: bold; }";
    print "    hr { border: 1px solid var(--dark-gray); }";
    print "    img { height: auto; max-width: 100%; }";
    print "    table { border-collapse: collapse; margin-bottom: 1.3rem; }";
    print "    th { padding: .7rem; border-bottom: 1px solid var(--black);}";
    print "    td { padding: .7rem; border-bottom: 1px solid var(--gray);}";
    print "</style>";

    print "<script>";
    print "    function clipboard(id) {";
    print "        var element = document.getElementById(id);";
    print "        navigator.clipboard.writeText(element.textContent);";
    print "    }";
    print "    function wordwrap(id) {";
    print "        var element = document.getElementById(id);";
    print "        if (element.style.whiteSpace != 'pre-wrap') {";
    print "            element.style.whiteSpace = 'pre-wrap';";
    print "        } else {";
    print "            element.style.whiteSpace = 'pre';";
    print "        }";
    print "    }";
    print "    function collapse(id) {";
    print "        var element = document.getElementById(id);";
    print "        if (element.style.display != 'none') {";
    print "            element.style.display = 'none';";
    print "        } else {";
    print "            element.style.display = 'block';";
    print "        }";
    print "    }";
    print "</script>"

    print "</head>";
    print "<body>";
}

function print_footer (    i, ref, href, title, text) {

    print "<footer>";

    if (link_count > 0 || footnote_count > 0) {
        print "<hr>";
    }

    if (link_count > 0) {
        print "<h6>LINKS</h6>";
        print "<ol>";
        for (i = 1; i <= link_count; i++) {

            ref = link_ref[i];
            href = link_href[i];
            title = link_title[i];

            if (title == "") {
                title = href;
            }

            print make("li", title " <a href='" href "' id='link-" ref "'>&#x1F517;</a>");

        }
        print "</ol>";
    }

    if (footnote_count > 0) {
        print "<h6>FOOTNOTES</h6>";
        print "<ol>";
        for (i = 1; i <= footnote_count; i++) {

            ref = footnote_ref[i];
            text = footnote_text[i];

            print make("li", text " <a href='#foot-" ref "' id='link-" ref "'>&#x1F517;</a>");

        }
        print "</ol>";
    }

    print "</footer>";

    print "</body>";
    print "</html>";
}

BEGIN {

    buf=""

    idx=0
    stk[0]="root";
    stk_attr[0]="";

    blockquote_prefix = "^[ ]*>[ ]?";
    ul_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[*+-][ ]";
    ol_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[0-9]+\\.[ ]";

    blank = -1; # prepare to signal blank line

    print_header();
}

function pop_until(tag) {
    while (!empty() && !at(tag)) {
        pop();
    }
}

function level_blockquote(   i, n) {
    n = 0;
    for (i = idx; i > 0; i--) {
        if (stk[i] == "blockquote") {
            n++;
        }
    }
    return n;
}

function level_list(   i, n) {
    n = 0;
    for (i = idx; i > 0; i--) {
        if (stk[i] == "ul" || stk[i] == "ol") {
            n++;
        }
        if (stk[i] == "blockquote") break;
    }
    return n;
}

function count_indent(line) {
    return count_prefix(line, "^[ ][ ][ ][ ]");
}

function count_prefix(line, pref,    n) {
    n=0
    while (sub(pref, "", line)) {
        n++;
    }
    return n;
}

function remove_indent(line) {
    return remove_prefix(line, "^[ ][ ][ ][ ]");
}

function remove_prefix(line, pref) {

    # remove leading quote marks
    while (line ~ pref) {
        sub(pref, "", line);
    };

    return line;
}

function ltrim(s) { sub(/^[ \t]+/, "", s); return s; }
function rtrim(s) { sub(/[ \t]+$/, "", s); return s; }
function trim(s) { return rtrim(ltrim(s)); }

function slug(str) {
    gsub(/[^a-zA-Z0-9]/, "-", str);
    gsub(/-+/, "-", str);
    return tolower(str);
}

#===========================================
# TABULATIONS
#===========================================

{
    gsub("\t", "    ", $0); # replace tabas with 4 spaces
}

#===========================================
# BLANK LINES
#===========================================

# Blank line flag states:
#  0: not signaling blank line
# -1: preparing to signal blank line
#  1: signaling blank line

blank == 1 {
    blank = 0;
}

blank == -1 {
    blank = 1;
}

/^[ ]*$/ {
    if (!at("code")) {
        blank = -1;
        pop_p();
        pop_blockquote();
        next;
    }
}

#===========================================
# BLOCKQUOTE
#===========================================

function pop_blockquote() {

    if (!at("blockquote")) return;

    lv = level_blockquote();
    cp = count_prefix($0, blockquote_prefix);

    n = lv - cp;
    while (n-- > 0) {
        if (at("blockquote")) pop();
    }
}

$0 !~ blockquote_prefix {
    pop_blockquote();
}

$0 ~ blockquote_prefix {

    lv = level_blockquote();
    cp = count_prefix($0, blockquote_prefix);

    $0 = remove_prefix($0, blockquote_prefix);

    if (cp > lv) {
        n = cp - lv;
        while (n-- > 0) {
            pop_p();
            push("blockquote");
        }
    } else {
        n = lv - cp;
        while (n-- > 0) {
            pop();
        }
    }

    if ($0 ~ /^$/) {
        pop_until("blockquote");
    }
}

#===========================================
# LIST ITENS
#===========================================

# TODO: add more POSIX compatibility as MAWK doesn't support regex quantifiers {x,y}
# See: https://unix.stackexchange.com/questions/506119/how-to-specify-regex-quantifiers-with-mawk

function pop_p() {
    if (!ready()) pop();
}

function pop_list () {

    if (!at("li")) return;

    lv = level_list();
    cp = count_indent($0);

    n = lv - cp;
    while (n-- > 0) {
        if (stk[idx-1] == "li") pop();
        if (at("li")) pop();
        if (at("ol") || at("ul")) pop();
    }
}

function remove_list_indent (line) {

    n = level_list();
    while (n > 0) {
        sub(/^[ ][ ][ ][ ]/, "", line);
        n--;
    }

    return line;
}

$0 !~ ul_prefix && $0 !~ ol_prefix {

    temp = remove_list_indent($0);

    if (blank > 0) {
        pop_list();
    }

    $0 = temp;
}

function list_start(line) {
    sub("^[ ]+", "", line);
    match(line, "^[0-9]+");
    return substr(line, RSTART, RLENGTH);
}

function push_li(tag, start) {

    if (tag == "ol") {
        if (start == "") {
            if (!at("ul") && !at("ol")) push(tag);
        } else {
            if (!at("ul") && !at("ol")) push(tag, "start='" start "'");
        }
    } else {
        if (!at("ul") && !at("ol")) push(tag);
    }

    push("li");
}

function parse_list_item(tag, pref, start) {

    lv = level_list();
    cp = count_indent($0) + 1;

    $0 = remove_prefix($0, pref);

    if (cp == lv) {

        pop_p();
        if (at("li")) pop();
        push_li(tag);
        append($0);

    } else if (cp > lv) {

        # add levels
        n = (cp - 1) - lv;
        while (n-- > 0) {
            push_li(tag);
        }

        push_li(tag, start);
        append($0);

    } else if (cp < lv) {

        # del levels
        n = lv - cp;
        while (n-- > 0) {
            pop_p();
            if (at("li")) pop();
            if (at("ol") || at("ul")) pop();
        }

        if (at("li")) pop();
        push_li(tag);
        append($0);
    }
}

$0 ~ ul_prefix {
    parse_list_item("ul", ul_prefix);
    next;
}

$0 ~ ol_prefix {

    # the user specifies
    # the starting number
    start = list_start($0);

    parse_list_item("ol", ol_prefix, start);
    next;
}

#===========================================
# CODE BLOCKS
#===========================================

/^```/ {

    if (!at("code")) {

        sub(/^`+/, "");
        title = $0;

        push("code", "title='" title "'");
        next;
    }

    pop();
    next;
}

at("code") {
    append($0);
    next;
}

/^[ ][ ][ ][ ]/ {

    if (!at("pre")) {
        push("pre");
    }

    sub("^[ ][ ][ ][ ]", "", $0);
    append($0);
    next;
}

#===========================================
# HEADING
#===========================================

# undo last push
function undo(    tmp) {
    tmp = buf;
    buf = "";
    unpush();
    return tmp;
}

/^===+/ && at("p") {

    # <h1>
    $0 = undo();
    push("h1");
    append($0);
    pop_p();
    next;
}

/^---+/ && at("p") {

    # <h2>
    $0 = undo();
    push("h2");
    append($0);
    pop_p();
    next;
}

/^[\x23]+[ ]+/ {

    # count hashes
    match($0, "\x23+")
    n = RLENGTH > 6 ? 6 : RLENGTH

    # remove leading hashes
    $0 = substr($0, n + 1);

    pop_p();
    push("h" n);
    append($0);
    next;
}

#===========================================
# HORIZONTAL RULER
#===========================================

/^[*_-][*_-][*_-]+[ ]*$/ {
    pop_p();
    push("hr");
    next;
}

#===========================================
# DEFINITION LIST
#===========================================

# TODO: make definition list multi-level like <li>

/^:/ {

    dd = substr($0, 2);

    if (at("p")) {
        dt = undo();
        push("dl");
        push("dt");
        append(dt);
        pop_p();
        push("dd");
        append(dd);
        next;
    }
    if (at("dd")) {
        pop_p();
        push("dd");
        append(dd);
        next;
    }
}

#===========================================
# TABLE
#===========================================

function set_table_aligns(line,    arr, regex, found, l, r, n) {

    delete table_aligns;
    regex = "(:--[-]+:|:--[-]+|--[-]+:)";

    delete arr; # starts from 2
    n = split(line, arr, /\|/);
    for(i = 2; i < n; i++) {

        if (match(arr[i], regex) > 0) {

            found = substr(arr[i], RSTART, RLENGTH);

            l = substr(found, 1, 1);
            r = substr(found, RLENGTH, 1);

            if (l == ":" && r == ":") {
                table_aligns[i] = "center";
            } else if (l == ":" && r == "-") {
                table_aligns[i] = "left";
            } else if (l == "-" && r == ":") {
                table_aligns[i] = "right";
            } else {
                table_aligns[i] = "l:" l " r: " r;
            }
        }
    }
}

/^[ ]*\|.*\|[ ]*/ {

    if (!at("table")) {

        push("table");
        push("tr");

        delete arr; # starts from 2
        n = split($0, arr, /\|/);
        for(i = 2; i < n; i++) {
            push("th");
            append(arr[i]);
            pop();
        }
        pop();
        next;
    }

    if (at("table")) {

        if ($0 ~ /^[ ]*\|[ ]*([:]?--[-]+[:]?)[ ]*\|[ ]*/) {
            set_table_aligns($0);
            next;
        }

        push("tr");

        delete arr; # starts from 2
        n = split($0, arr, /\|/);
        for(i = 2; i < n; i++) {

            if (table_aligns[i] != "") {
                push("td", "style='text-align:" table_aligns[i] ";'");
            } else {
                push("td");
            }
            append(arr[i]);
            pop();

        }
        pop();
        next;
    }
}

#===========================================
# FOOTNOTE
#===========================================

function push_footnote(ref, text) {
    footnote_count++
    footnote_ref[footnote_count] = ref;
    footnote_text[footnote_count] = styles(text);
}

/^[ ]*\[\^[^]]+\][:]/ {

    # [^id]: note
    if (match($0, /\[\^[^]]+\][:]/) > 0) {

        ref = substr($0, RSTART + 2, RLENGTH - 4);
        text = substr($0, RSTART + RLENGTH);

        push_footnote(ref, text);
    }
    next;
}

#===========================================
# (REFERENCE STYLE) LINK
#===========================================

# TODO: implement all styles: https://gist.github.com/emedinaa/28ed71b450243aba48accd634679f805

function push_link(ref, href, title, text) {
    link_count++;
    link_ref[link_count] = ref;
    link_href[link_count] = href;
    link_title[link_count] = title;
    link_text[link_count] = text;
}

/^[ ]*\[[^]]+\][:]/ {

    # [ref]: href
    # [ref]: href "title"
    # [ref]: href 'title'
    # [ref]: href (title)
    # [ref]: <href> "title"
    # [ref]: <href> 'title'
    # [ref]: <href> (title)
    if (match($0, /\[[^]]+\][:]/) > 0) {

        ref = substr($0, RSTART + 1, RLENGTH - 3);
        href = substr($0, RSTART + RLENGTH);

        if (match(href, "[ ](\"[^\"]*\"|'[^']*'|\\([^\\)]*\\))") > 0) {
            title = substr(href, RSTART + 2, RLENGTH - 3);
            href = substr(href, 1, RSTART - 1)

            # remove '<' '>'.
            if (match(href, "<[^>]+>") > 0) {
                href = substr(href, RSTART + 1, RLENGTH - 2);
            }
        }

        # remove leading spaces
        sub("^[ ]*", "", href);

        push_link(ref, href, title, title);
    }
    next;
}

#===========================================
# PARAGRAPH
#===========================================

# TODO: transform "<li>text" in "<li><p>text", undoing the previous <li>

/^.+$/ {
    if (ready()) {
        if (at("li")) {
            if (blank == 1) {
                push("p");
            }
        } else {
            push("p");
        }
    }

    append($0);
    next;
}

#===========================================
# THE END
#===========================================

END {

    pop_p();
    pop_list();
    pop_blockquote();

    print_footer();
}


## notekeeper-http-server.sh
#!/bin/sh

#
# Runs the Busybox httpd server.
#
# Usage:
#
#     notekeeper-http-server.sh
#
# Configuration:
#
#     # file .notekeeper/conf.txt
#     busybox.httpd.port=127.0.0.1:9000
#

. "`dirname "$0"`/notekeeper-common.sh";

property_port="busybox.httpd.port"
property_port_default="127.0.0.1:9000"

busybox_httpd_port() {
    local port=`grep -E "^${property_port}" "${WORKING_DIR}/.notekeeper/notekeeper.conf" | sed "s/${property_port}=//"`;
    if [ -n "${port}" ]; then
        echo "${port}";
    else
        echo "${property_port_default}";
    fi;
}

busybox_httpd_stop() {
    local pid=`ps aux | grep 'busybox httpd' | grep -v "grep" | awk '{ print $2 }'`
    if [ -n "$pid" ] && [ "$pid" -gt 1024 ]; then
        kill -9 $pid;
    fi;
}

busybox_httpd_start() {
    local port=`busybox_httpd_port`;
#    busybox httpd -p "$port" -h "$PROGRAM_DIR/www/"
    busybox httpd -p "$port" -h "$WORKING_DIR/.notekeeper/html/"
    echo Listening: "http://$port"
}

main() {
    busybox_httpd_stop;
    busybox_httpd_start;
}

main;

# https://datatracker.ietf.org/doc/html/rfc3875
# https://www.vivaolinux.com.br/artigo/Introducao-a-CGI-com-a-RFC-3875
# https://gist.github.com/stokito/a9a2732ffc7982978a16e40e8d063c8f
# https://github.com/Mikepicker/cgiblog
# https://medium.com/@Mikepicker/no-framework-blog-for-fun-and-profit-using-bash-cgi-cbb99cf5366b

## notekeeper-save-html.sh
#!/bin/sh

#
# Saves HTML in `html` folder.
#
# Usage:
#
#     notekeeper-save-html.sh FILE
#

. "`dirname "$0"`/notekeeper-common.sh";

file="${1}"
require_file "${file}";

main() {
    local file="${1}"
    local html=`html_path "${file}"`
    mkdir -p "`dirname "${html}"`"
    "$PROGRAM_DIR/awk/notekeeper-html.awk" "${file}" > "${html}"
}

main "${file}";

## notekeeper-save-stat.sh
#!/bin/sh

#
# Saves a STAT file in in `data` folder.
#
# Usage:
#
#     apwm-save-stat.sh FILE
#

. "`dirname "$0"`/notekeeper-common.sh";

file="${1}"
require_file "${file}";

main() {
    local file="${1}"
    local uuid=`path_uuid "${file}"`;
    local stat=`make_stat "${file}"`;
    LC_ALL=C "$PROGRAM_DIR/awk/notekeeper-stat.awk" -v WRITETO=/dev/stdout "${file}" > "${stat}"
}

main "${file}";

## notekeeper-stat.awk
#!/usr/bin/awk -f

# Note:
#   * Files encoded using MAC-UTF-8 must be normalized to UTF-8.

function token_type(token)
{
    return toascii(tolower(token));
}

function token_format(token)
{
    if (token ~ /^[[:alpha:]]+([\x27’-]?[[:alpha:]])*$/) {
        return "W"; # Word format: all-letter token with optional hyphens
    } else if (token ~ /^[+-]?([[:digit:]][h°%/:,.+-]?)+$/) {
        return "N"; # Number format: all-letter token with some optional puncts
    } else if (token ~ /^[[:punct:]]+$/) {
        return "P"; # Punct format: all-punct token
    } else {
        return "NA"; # None of the above
    }

    # NOTE:
    # This function returns NA to words that contain "accented" characters encoded
    # with MAC-UTF-8. You must normilize the input files to regular UTF-8 encoding.
}

function token_case(token)
{
    token = toascii(token);

    if (token ~ /^[[:upper:]][[:lower:]]*([\x27’-]([[:alpha:]][[:lower:]]*))*$/) {
        return "S"; # Start case: "Word", "Compound-word"
    } else if (token ~ /^[[:lower:]]+([\x27’-]([[:lower:]]+))*$/) {
        return "L"; # Lower case: "word", "compound-word"
    } else if (token ~ /^[[:upper:]]+([\x27’-]([[:upper:]]+))*$/) {
        return "U"; # Upper case: "WORD", "COMPOUND-WORD"
    } else if (token ~ /^[[:alpha:]][[:lower:]]*([[:upper:]][[:lower:]]+)+$/) {
        return "C"; # Camel case: "compoundWord", "CompoundWord"
    } else if (token ~ /^[[:alpha:]]+([\x27’-]([[:alpha:]]+))*$/) {
        return "M"; # Mixed case: "wOrD", "cOmPoUnD-wOrD"
    } else {
        return "NA"; # None of the above
    }

    # NOTE:
    # UPPERCASE words with a single character, for example "É", are treated as start case words by this function.
    # The author considers it a very convenient behavior that helps to identify proper nouns and the beginning of
    # sentences, although he admits that it may not be intuitive. The order of the `if`s is important to preserve
    # this behavior.
}

function token_mask(token)
{
    if (token ~ /^[+-]?[0-9]+$/) {
        return "I"; # Integer mask
    } else if (token ~ /^[+-]?[0-9][0-9]?[0-9]?([,.]?[0-9][0-9][0-9])*([,.][0-9]+)?$/) {
        return "R"; # Real number
    } else if (token ~ /^[0-9]([0-9]|[0-9][0-9][0-9])[/.-][0-9][0-9]?[/.-][0-9]([0-9]|[0-9][0-9][0-9])$/) {
        return "D"; # Date mask
    } else if (token ~ /^([0-9][0-9]?[:h][0-9][0-9]|[0-9][0-9]?[h])$/) {
        return "T"; # Time mask
    } else if (token ~ /^[+-]?[0-9]+[/][0-9]+$/) {
        return "F"; # Fraction mask
    } else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?%$/) {
        return "P"; # Percent mask
    } else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?°$/) {
        return "G"; # Degrees mask
    } else {
        return "NA"; # None of the above
    }
}

function insert_token(token)
{
    idx++;
    tokens[idx]=token;
    counters[token]++;

    if (!types[token]) types[token] = token_type(token);
    if (!formats[token]) formats[token] = token_format(token);
    if (!cases[token]) cases[token] = token_case(token);
    if (!masks[token]) masks[token] = token_mask(token);

    if (!indexes[token]) indexes[token] = idx;
    else indexes[token] = indexes[token] "," idx;
}

function toascii(string) {

    # Unicode Latin-1 Supplement
    gsub(/[ÀÁÂÃÄÅ]/,"A", string);
    gsub(/[ÈÉÊË]/,"E", string);
    gsub(/[ÌÍÎÏ]/,"I", string);
    gsub(/[ÒÓÔÕÖ]/,"O", string);
    gsub(/[ÙÚÛÜ]/,"U", string);
    gsub(/Ý/,"Y", string);
    gsub(/Ç/,"C", string);
    gsub(/Ñ/,"N", string);
    gsub(/Ð/,"D", string);
    gsub(/Ø/,"OE", string);
    gsub(/Þ/,"TH", string);
    gsub(/Æ/,"AE", string);
    gsub(/[àáâãäåª]/,"a", string);
    gsub(/[èéêë]/,"e", string);
    gsub(/[ìíîï]/,"i", string);
    gsub(/[òóôõöº°]/,"o", string);
    gsub(/[ùúûü]/,"u", string);
    gsub(/[ýÿ]/,"y", string);
    gsub(/ç/,"c", string);
    gsub(/ñ/,"n", string);
    gsub(/ð/,"d", string);
    gsub(/ø/,"oe", string);
    gsub(/þ/,"th", string);
    gsub(/ae/,"ae", string);
    gsub(/ß/,"ss", string);

    # Unicode Punctuation
    gsub(/–/,"-", string);
    gsub(/—/,"--", string);
    gsub(/…/,"...", string);
    gsub(/[‘’]/,"\x27", string);
    gsub(/[“”«»]/,"\x22", string);

    # Remove MAC-UTF-8 combining diacritical marks (only those used in Latin-1)
    gsub(/[\xCC\x80\xCC\x81\xCC\x82\xCC\x83\xCC\x88\xCC\x8A\xCC\xA7]/,"", string);

    # Replace non-ASCII with SUB (0x1A)
    gsub(/[^\x00-\x7E]/,"\x1A", string);

    return string;
}

function get_stopwords_regex(    file, regex, line) {

    if (!option_value("stopwords")) {
        return /^$/;
    }

    file=pwd "/../lib/lang/" lang "/stopwords.txt"

    regex=""
    while((getline line < file) > 0) {

        # skip line started with #
        if (line ~ /^[[:space:]]*$/ || line ~ /^#/) continue;

        regex=regex "|" line;
    }

    # remove leading pipe
    regex=substr(regex,2);

    return "^(" regex ")$"
}

# separates tokens by spaces
function separate_tokens() {
    $0=" " $0 " ";
    gsub(/\xA0/, " ");
    gsub(/[]()—{}[]/, " & ");
    gsub(/[.,;:!?…][[:space:][:punct:]]/, " &");
    gsub(/[[:space:][:punct:]][\x22\x27“”‘’«»]/, "& ");
    gsub(/[\x22\x27“”‘’«»][[:space:][:punct:]]/, " &");
}

# 123 456 789,01 -> 123456789,01
function join_numbers(    number) {
    while (match($0, /[[:space:][:punct:]][0-9]+[[:space:]][0-9][0-9][0-9][[:space:][:punct:]]/)) {
        number = substr($0, RSTART + 1, RLENGTH - 2);
        sub(/[[:space:]]/, "", number);
        $0 = substr($0, 0, RSTART) number substr($0, RSTART + RLENGTH - 1);
    }
}

function generate_records(    token, count, ratio, sum, sep, r, f, flength, key, val)
{
    # start of operational checks #
    sum=0
    for (token in counters) {
        sum += counters[token];
    }
    if (sum != length(tokens)) {
        print "Wrong sum of counts" > "/dev/stderr";
        exit 1;
    }
    # end of operational checks #

    r=0
    for (token in counters) {

    	r++;
        sep = ""
        flength = fields[0];
        count = counters[token];
        ratio = count / length(tokens);

        for (f = 1; f <= flength; f++) {
                key = fields[f,"key"];
                val = fields[f,"value"];
                if (val == 0) continue;
                if (key == "token")  {
                    records[r,"token"] = token;
                } else if (key == "type")  {
                    records[r,"type"] = types[token];
                } else if (key == "count")  {
                    records[r,"count"] = count;
                } else if (key == "ratio")  {
                    records[r,"ratio"] = ratio;
                } else if (key == "format")  {
                    records[r,"format"] = formats[token];
                } else if (key == "case")  {
                    records[r,"case"] = cases[token];
                } else if (key == "mask")  {
                    records[r,"mask"] = masks[token];
                } else if (key == "length")  {
                    records[r,"length"] = length(token);
                } else if (key == "indexes")  {
                    records[r,"indexes"] = indexes[token];
                } else {
                    continue;
                }
            sep="\t"
        }
    }

    # array length
    records[0] = r;
}

function print_records(    sep, r, f, rlength, flength)
{
    flength = fields[0];
    rlength = records[0];

    if (length(records)) {
        sep = ""
        for (f = 1; f <= flength; f++) {
            if (fields[f,"value"] == 0) continue;
            printf "%s%s", sep, toupper(fields[f,"key"]) > output;
            sep = "\t"
        }
        printf "\n" > output;
        for (r = 1; r <= rlength; r++) {
            sep = ""
            for (f = 1; f <= flength; f++) {
                if (fields[f,"value"] == 0) continue;
    	    	printf "%s%s", sep, records[r,fields[f,"key"]] > output;
    		    sep = "\t"
    	    }
            printf "\n" > output;
        }
    }
}

function basename(file) {
    sub("^.*/", "", file)
    return file
}

function basedir(file) {
    sub("/[^/]+$", "", file)
    return file
}

function parse_confs(    file, line, string)
{
    file=pwd "/../abw.conf"

    string=""
    while((getline line < file) > 0) {

        # skip comments
        gsub(/#.*$/,"", line);

        # skip invalid lines
        if (line !~ /^[[:space:]]*[[:alnum:]]+[[:space:]]*=[[:space:]]*[[:alnum:]]+[[:space:]]*$/) continue;
        if (!string) string = line;
        else string=string "," line;
    }

    fields[0] = 0; # declare array
    parse_fields(FIELDS, fields);
    if (length(fields) == 0) {
        parse_fields(string, fields);
    }

    options[0] = 0; # declare array
    parse_options(OPTIONS, options);
    if (length(options) == 0) {
        parse_options(string, options);
   }
}

function parse_fields(string, fields,    default_string)
{
    gsub(":","=",string);
    default_string="token,type,count,ratio,format,case,mask,length,indexes";
    if (!string) string = default_string;
    parse_key_values(string, fields, default_string);
}

function parse_options(string, options,    default_string)
{
    gsub(":","=",string);
    default_string="ascii=0,lower=0,upper=0,stopwords=1,lang=none,eol=1,asc=none,desc=none";
    if (!string) string = default_string;
    parse_key_values(string, options, default_string);
}

# Option formats: 'key' or 'key:value'
# If the format is 'key', name is 'key' and value is '1'
# If the format is 'key:value', name is 'key' and value is 'value'
function parse_key_values(string, keyvalues,     default_string, items, i, key, value, splitter)
{
    split(string, items, ",");
    for (i in items)
    {
        gsub(/=.*$/, "", items[i]);
        if (default_string !~ "\\<" items[i] "\\>") {
            gsub("\\<" items[i] "\\>(=[^,]*)?", "", string);
        }
    }

    gsub(",+", ",", string);
    gsub("^,|,$", "", string);

    split(string, items, ",");
    for (i in items)
    {
        if (items[i] !~ "=" ) {
            key = items[i];
            value = 1;
        } else {
            splitter = index(items[i], "=");
            key = substr(items[i], 0, splitter - 1);
            value = substr(items[i], splitter + 1);
        }
        keyvalues[i,"key"] = key;
        keyvalues[i,"value"] = value;
    }

    # save the array length
    keyvalues[0] = length(items);
}

function get_sort_order(    sort_order, o, olength, key)
{
    olength = options[0];
    for (o = 1; o <= olength; o++) {
        key = options[o,"key"];
        if (key == "asc") {
            if (options[o,"value"] == "token") sort_order = "@ind_str_asc";
            if (options[o,"value"] == "count") sort_order = "@val_num_asc";
        } else if (key == "desc") {
            if (options[o,"value"] == "token") sort_order = "@ind_str_desc";
            if (options[o,"value"] == "count") sort_order = "@val_num_desc";
        } else {
            continue;
        }
    }
    return sort_order;
}

function remove_stopwords(    i)
{
    for (i = 1; i <= NF; i++) {
        if (tolower($i) ~ tolower(stopwords_regex)) $i = "";
    }
}

function transform_line(    o, olength, key)
{
    olength = options[0];
    for (o = 1; o <= olength; o++) {
        key = options[o,"key"];
        if (key == "ascii") {
            if (options[o,"value"] == 1) $0 = toascii($0);
        } else if (key == "lower") {
            if (options[o,"value"] == 1) $0 = tolower($0);
        } else if (key == "upper") {
            if (options[o,"value"] == 1) $0 = toupper($0);
        } else if (key == "stopwords") {
            if (options[o,"value"] == 0) remove_stopwords();
        } else {
            continue;
        }
    }
}

function option_value(key,    o, olength) {
    olength = options[0];
    for (o = 1; o <= olength; o++) {
        if (options[o,"key"] == key) return options[o,"value"];
    }
    return 0;
}

BEGIN {

    pwd = PWD;
    parse_confs();

    eol = option_value("eol");
    lang = option_value("lang");

    sort_order = get_sort_order();
    stopwords_regex = get_stopwords_regex();
}

function endfile() {
    output=WRITETO;
    filedir=basedir(FILENAME)
    filename=basename(FILENAME)
    sub(/:filedir/, filedir, output);
    sub(/:filename/, filename, output);

    generate_records();
    print_records();

    idx = 0;
    delete tokens;
    delete types;
    delete counters;
    delete formats;
    delete cases;
    delete masks;
    delete indexes;
    delete records;
}

FNR == 1 && (NR > 1) {
    endfile();
}

NF {

    join_numbers();
    transform_line();
    separate_tokens();

    for (i = 1; i <= NF; i++) {
        insert_token($i);
    }

    if (eol) insert_token("<eol>");
}

END {
    endfile();
}

## www_cgi-bin_api_date.sh
#!/bin/sh

cat <<EOF
Content-Type: text/json; charset=utf-8

$(date -Iseconds)
EOF

## www_cgi-bin_halloworld.sh
#!/bin/sh

cat <<EOF
Content-Type: text/html; charset=utf-8

<!DOCTYPE html>
<html>
<head>
<title>Hello, World!</title>
</head>
<body>

<p>Hello, World!</p>

</body>
</html>
EOF
	#!/bin/sh

	#
	# Runs the Busybox httpd server.
	#
	# Usage:
	#
	# notekeeper-http-server.sh
	#
	# Configuration:
	#
	# # file .notekeeper/conf.txt
	# busybox.httpd.port=127.0.0.1:9000
	#

	. "`dirname "$0"`/notekeeper-common.sh";

	property_port="busybox.httpd.port"
	property_port_default="127.0.0.1:9000"

	busybox_httpd_port() {
	local port=`grep -E "^${property_port}" "${WORKING_DIR}/.notekeeper/notekeeper.conf" \| sed "s/${property_port}=//"`;
	if [ -n "${port}" ]; then
	echo "${port}";
	else
	echo "${property_port_default}";
	fi;
	}

	busybox_httpd_stop() {
	local pid=`ps aux \| grep 'busybox httpd' \| grep -v "grep" \| awk '{ print $2 }'`
	if [ -n "$pid" ] && [ "$pid" -gt 1024 ]; then
	kill -9 $pid;
	fi;
	}

	busybox_httpd_start() {
	local port=`busybox_httpd_port`;
	# busybox httpd -p "$port" -h "$PROGRAM_DIR/www/"
	busybox httpd -p "$port" -h "$WORKING_DIR/.notekeeper/html/"
	echo Listening: "http://$port"
	}

	main() {
	busybox_httpd_stop;
	busybox_httpd_start;
	}

	main;

	# https://datatracker.ietf.org/doc/html/rfc3875
	# https://www.vivaolinux.com.br/artigo/Introducao-a-CGI-com-a-RFC-3875
	# https://gist.github.com/stokito/a9a2732ffc7982978a16e40e8d063c8f
	# https://github.com/Mikepicker/cgiblog
	# https://medium.com/@Mikepicker/no-framework-blog-for-fun-and-profit-using-bash-cgi-cbb99cf5366b
	#!/bin/sh

	#
	# Saves HTML in `html` folder.
	#
	# Usage:
	#
	# notekeeper-save-html.sh FILE
	#

	. "`dirname "$0"`/notekeeper-common.sh";

	file="${1}"
	require_file "${file}";

	main() {
	local file="${1}"
	local html=`html_path "${file}"`
	mkdir -p "`dirname "${html}"`"
	"$PROGRAM_DIR/awk/notekeeper-html.awk" "${file}" > "${html}"
	}

	main "${file}";
	#!/bin/sh

	#
	# Saves a STAT file in in `data` folder.
	#
	# Usage:
	#
	# apwm-save-stat.sh FILE
	#

	. "`dirname "$0"`/notekeeper-common.sh";

	file="${1}"
	require_file "${file}";

	main() {
	local file="${1}"
	local uuid=`path_uuid "${file}"`;
	local stat=`make_stat "${file}"`;
	LC_ALL=C "$PROGRAM_DIR/awk/notekeeper-stat.awk" -v WRITETO=/dev/stdout "${file}" > "${stat}"
	}

	main "${file}";
	#!/usr/bin/awk -f

	# Note:
	# * Files encoded using MAC-UTF-8 must be normalized to UTF-8.

	function token_type(token)
	{
	return toascii(tolower(token));
	}

	function token_format(token)
	{
	if (token ~ /^[[:alpha:]]+([\x27’-]?[[:alpha:]])*$/) {
	return "W"; # Word format: all-letter token with optional hyphens
	} else if (token ~ /^[+-]?([[:digit:]][h°%/:,.+-]?)+$/) {
	return "N"; # Number format: all-letter token with some optional puncts
	} else if (token ~ /^[[:punct:]]+$/) {
	return "P"; # Punct format: all-punct token
	} else {
	return "NA"; # None of the above
	}

	# NOTE:
	# This function returns NA to words that contain "accented" characters encoded
	# with MAC-UTF-8. You must normilize the input files to regular UTF-8 encoding.
	}

	function token_case(token)
	{
	token = toascii(token);

	if (token ~ /^[[:upper:]][[:lower:]]([\x27’-]([[:alpha:]][[:lower:]]))*$/) {
	return "S"; # Start case: "Word", "Compound-word"
	} else if (token ~ /^[[:lower:]]+([\x27’-]([[:lower:]]+))*$/) {
	return "L"; # Lower case: "word", "compound-word"
	} else if (token ~ /^[[:upper:]]+([\x27’-]([[:upper:]]+))*$/) {
	return "U"; # Upper case: "WORD", "COMPOUND-WORD"
	} else if (token ~ /^[[:alpha:]][[:lower:]]*([[:upper:]][[:lower:]]+)+$/) {
	return "C"; # Camel case: "compoundWord", "CompoundWord"
	} else if (token ~ /^[[:alpha:]]+([\x27’-]([[:alpha:]]+))*$/) {
	return "M"; # Mixed case: "wOrD", "cOmPoUnD-wOrD"
	} else {
	return "NA"; # None of the above
	}

	# NOTE:
	# UPPERCASE words with a single character, for example "É", are treated as start case words by this function.
	# The author considers it a very convenient behavior that helps to identify proper nouns and the beginning of
	# sentences, although he admits that it may not be intuitive. The order of the `if`s is important to preserve
	# this behavior.
	}

	function token_mask(token)
	{
	if (token ~ /^[+-]?[0-9]+$/) {
	return "I"; # Integer mask
	} else if (token ~ /^[+-]?[0-9][0-9]?[0-9]?([,.]?[0-9][0-9][0-9])*([,.][0-9]+)?$/) {
	return "R"; # Real number
	} else if (token ~ /^[0-9]([0-9]\|[0-9][0-9][0-9])[/.-][0-9][0-9]?[/.-][0-9]([0-9]\|[0-9][0-9][0-9])$/) {
	return "D"; # Date mask
	} else if (token ~ /^([0-9][0-9]?[:h][0-9][0-9]\|[0-9][0-9]?[h])$/) {
	return "T"; # Time mask
	} else if (token ~ /^[+-]?[0-9]+[/][0-9]+$/) {
	return "F"; # Fraction mask
	} else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?%$/) {
	return "P"; # Percent mask
	} else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?°$/) {
	return "G"; # Degrees mask
	} else {
	return "NA"; # None of the above
	}
	}

	function insert_token(token)
	{
	idx++;
	tokens[idx]=token;
	counters[token]++;

	if (!types[token]) types[token] = token_type(token);
	if (!formats[token]) formats[token] = token_format(token);
	if (!cases[token]) cases[token] = token_case(token);
	if (!masks[token]) masks[token] = token_mask(token);

	if (!indexes[token]) indexes[token] = idx;
	else indexes[token] = indexes[token] "," idx;
	}

	function toascii(string) {

	# Unicode Latin-1 Supplement
	gsub(/[ÀÁÂÃÄÅ]/,"A", string);
	gsub(/[ÈÉÊË]/,"E", string);
	gsub(/[ÌÍÎÏ]/,"I", string);
	gsub(/[ÒÓÔÕÖ]/,"O", string);
	gsub(/[ÙÚÛÜ]/,"U", string);
	gsub(/Ý/,"Y", string);
	gsub(/Ç/,"C", string);
	gsub(/Ñ/,"N", string);
	gsub(/Ð/,"D", string);
	gsub(/Ø/,"OE", string);
	gsub(/Þ/,"TH", string);
	gsub(/Æ/,"AE", string);
	gsub(/[àáâãäåª]/,"a", string);
	gsub(/[èéêë]/,"e", string);
	gsub(/[ìíîï]/,"i", string);
	gsub(/[òóôõöº°]/,"o", string);
	gsub(/[ùúûü]/,"u", string);
	gsub(/[ýÿ]/,"y", string);
	gsub(/ç/,"c", string);
	gsub(/ñ/,"n", string);
	gsub(/ð/,"d", string);
	gsub(/ø/,"oe", string);
	gsub(/þ/,"th", string);
	gsub(/ae/,"ae", string);
	gsub(/ß/,"ss", string);

	# Unicode Punctuation
	gsub(/–/,"-", string);
	gsub(/—/,"--", string);
	gsub(/…/,"...", string);
	gsub(/[‘’]/,"\x27", string);
	gsub(/[“”«»]/,"\x22", string);

	# Remove MAC-UTF-8 combining diacritical marks (only those used in Latin-1)
	gsub(/[\xCC\x80\xCC\x81\xCC\x82\xCC\x83\xCC\x88\xCC\x8A\xCC\xA7]/,"", string);

	# Replace non-ASCII with SUB (0x1A)
	gsub(/[^\x00-\x7E]/,"\x1A", string);

	return string;
	}

	function get_stopwords_regex( file, regex, line) {

	if (!option_value("stopwords")) {
	return /^$/;
	}

	file=pwd "/../lib/lang/" lang "/stopwords.txt"

	regex=""
	while((getline line < file) > 0) {

	# skip line started with #
	if (line ~ /^[[:space:]]*$/ \|\| line ~ /^#/) continue;

	regex=regex "\|" line;
	}

	# remove leading pipe
	regex=substr(regex,2);

	return "^(" regex ")$"
	}

	# separates tokens by spaces
	function separate_tokens() {
	$0=" " $0 " ";
	gsub(/\xA0/, " ");
	gsub(/[]()—{}[]/, " & ");
	gsub(/[.,;:!?…][[:space:][:punct:]]/, " &");
	gsub(/[[:space:][:punct:]][\x22\x27“”‘’«»]/, "& ");
	gsub(/[\x22\x27“”‘’«»][[:space:][:punct:]]/, " &");
	}

	# 123 456 789,01 -> 123456789,01
	function join_numbers( number) {
	while (match($0, /[[:space:][:punct:]][0-9]+[[:space:]][0-9][0-9][0-9][[:space:][:punct:]]/)) {
	number = substr($0, RSTART + 1, RLENGTH - 2);
	sub(/[[:space:]]/, "", number);
	$0 = substr($0, 0, RSTART) number substr($0, RSTART + RLENGTH - 1);
	}
	}

	function generate_records( token, count, ratio, sum, sep, r, f, flength, key, val)
	{
	# start of operational checks #
	sum=0
	for (token in counters) {
	sum += counters[token];
	}
	if (sum != length(tokens)) {
	print "Wrong sum of counts" > "/dev/stderr";
	exit 1;
	}
	# end of operational checks #

	r=0
	for (token in counters) {

	r++;
	sep = ""
	flength = fields[0];
	count = counters[token];
	ratio = count / length(tokens);

	for (f = 1; f <= flength; f++) {
	key = fields[f,"key"];
	val = fields[f,"value"];
	if (val == 0) continue;
	if (key == "token") {
	records[r,"token"] = token;
	} else if (key == "type") {
	records[r,"type"] = types[token];
	} else if (key == "count") {
	records[r,"count"] = count;
	} else if (key == "ratio") {
	records[r,"ratio"] = ratio;
	} else if (key == "format") {
	records[r,"format"] = formats[token];
	} else if (key == "case") {
	records[r,"case"] = cases[token];
	} else if (key == "mask") {
	records[r,"mask"] = masks[token];
	} else if (key == "length") {
	records[r,"length"] = length(token);
	} else if (key == "indexes") {
	records[r,"indexes"] = indexes[token];
	} else {
	continue;
	}
	sep="\t"
	}
	}

	# array length
	records[0] = r;
	}

	function print_records( sep, r, f, rlength, flength)
	{
	flength = fields[0];
	rlength = records[0];

	if (length(records)) {
	sep = ""
	for (f = 1; f <= flength; f++) {
	if (fields[f,"value"] == 0) continue;
	printf "%s%s", sep, toupper(fields[f,"key"]) > output;
	sep = "\t"
	}
	printf "\n" > output;
	for (r = 1; r <= rlength; r++) {
	sep = ""
	for (f = 1; f <= flength; f++) {
	if (fields[f,"value"] == 0) continue;
	printf "%s%s", sep, records[r,fields[f,"key"]] > output;
	sep = "\t"
	}
	printf "\n" > output;
	}
	}
	}

	function basename(file) {
	sub("^.*/", "", file)
	return file
	}

	function basedir(file) {
	sub("/[^/]+$", "", file)
	return file
	}

	function parse_confs( file, line, string)
	{
	file=pwd "/../abw.conf"

	string=""
	while((getline line < file) > 0) {

	# skip comments
	gsub(/#.*$/,"", line);

	# skip invalid lines
	if (line !~ /^[[:space:]][[:alnum:]]+[[:space:]]=[[:space:]][[:alnum:]]+[[:space:]]$/) continue;
	if (!string) string = line;
	else string=string "," line;
	}

	fields[0] = 0; # declare array
	parse_fields(FIELDS, fields);
	if (length(fields) == 0) {
	parse_fields(string, fields);
	}

	options[0] = 0; # declare array
	parse_options(OPTIONS, options);
	if (length(options) == 0) {
	parse_options(string, options);
	}
	}

	function parse_fields(string, fields, default_string)
	{
	gsub(":","=",string);
	default_string="token,type,count,ratio,format,case,mask,length,indexes";
	if (!string) string = default_string;
	parse_key_values(string, fields, default_string);
	}

	function parse_options(string, options, default_string)
	{
	gsub(":","=",string);
	default_string="ascii=0,lower=0,upper=0,stopwords=1,lang=none,eol=1,asc=none,desc=none";
	if (!string) string = default_string;
	parse_key_values(string, options, default_string);
	}

	# Option formats: 'key' or 'key:value'
	# If the format is 'key', name is 'key' and value is '1'
	# If the format is 'key:value', name is 'key' and value is 'value'
	function parse_key_values(string, keyvalues, default_string, items, i, key, value, splitter)
	{
	split(string, items, ",");
	for (i in items)
	{
	gsub(/=.*$/, "", items[i]);
	if (default_string !~ "\\<" items[i] "\\>") {
	gsub("\\<" items[i] "\\>(=[^,]*)?", "", string);
	}
	}

	gsub(",+", ",", string);
	gsub("^,\|,$", "", string);

	split(string, items, ",");
	for (i in items)
	{
	if (items[i] !~ "=" ) {
	key = items[i];
	value = 1;
	} else {
	splitter = index(items[i], "=");
	key = substr(items[i], 0, splitter - 1);
	value = substr(items[i], splitter + 1);
	}
	keyvalues[i,"key"] = key;
	keyvalues[i,"value"] = value;
	}

	# save the array length
	keyvalues[0] = length(items);
	}

	function get_sort_order( sort_order, o, olength, key)
	{
	olength = options[0];
	for (o = 1; o <= olength; o++) {
	key = options[o,"key"];
	if (key == "asc") {
	if (options[o,"value"] == "token") sort_order = "@ind_str_asc";
	if (options[o,"value"] == "count") sort_order = "@val_num_asc";
	} else if (key == "desc") {
	if (options[o,"value"] == "token") sort_order = "@ind_str_desc";
	if (options[o,"value"] == "count") sort_order = "@val_num_desc";
	} else {
	continue;
	}
	}
	return sort_order;
	}

	function remove_stopwords( i)
	{
	for (i = 1; i <= NF; i++) {
	if (tolower($i) ~ tolower(stopwords_regex)) $i = "";
	}
	}

	function transform_line( o, olength, key)
	{
	olength = options[0];
	for (o = 1; o <= olength; o++) {
	key = options[o,"key"];
	if (key == "ascii") {
	if (options[o,"value"] == 1) $0 = toascii($0);
	} else if (key == "lower") {
	if (options[o,"value"] == 1) $0 = tolower($0);
	} else if (key == "upper") {
	if (options[o,"value"] == 1) $0 = toupper($0);
	} else if (key == "stopwords") {
	if (options[o,"value"] == 0) remove_stopwords();
	} else {
	continue;
	}
	}
	}

	function option_value(key, o, olength) {
	olength = options[0];
	for (o = 1; o <= olength; o++) {
	if (options[o,"key"] == key) return options[o,"value"];
	}
	return 0;
	}

	BEGIN {

	pwd = PWD;
	parse_confs();

	eol = option_value("eol");
	lang = option_value("lang");

	sort_order = get_sort_order();
	stopwords_regex = get_stopwords_regex();
	}

	function endfile() {
	output=WRITETO;
	filedir=basedir(FILENAME)
	filename=basename(FILENAME)
	sub(/:filedir/, filedir, output);
	sub(/:filename/, filename, output);

	generate_records();
	print_records();

	idx = 0;
	delete tokens;
	delete types;
	delete counters;
	delete formats;
	delete cases;
	delete masks;
	delete indexes;
	delete records;
	}

	FNR == 1 && (NR > 1) {
	endfile();
	}

	NF {

	join_numbers();
	transform_line();
	separate_tokens();

	for (i = 1; i <= NF; i++) {
	insert_token($i);
	}

	if (eol) insert_token("<eol>");
	}

	END {
	endfile();
	}
	#!/bin/sh

	cat <<EOF
	Content-Type: text/json; charset=utf-8

	$(date -Iseconds)
	EOF
	#!/bin/sh

	cat <<EOF
	Content-Type: text/html; charset=utf-8

	<!DOCTYPE html>
	<html>
	<head>
	<title>Hello, World!</title>
	</head>
	<body>

	<p>Hello, World!</p>

	</body>
	</html>
	EOF