Skip to content

Instantly share code, notes, and snippets.

@fabiolimace
Last active January 3, 2026 02:35
Show Gist options
  • Select an option

  • Save fabiolimace/c2f3478931277e036eff7f1c4c000dc2 to your computer and use it in GitHub Desktop.

Select an option

Save fabiolimace/c2f3478931277e036eff7f1c4c000dc2 to your computer and use it in GitHub Desktop.
Removed from Note Keeper (formerly APKM) at 02 jan 2026
#!/usr/bin/awk -f
#
#
# NOTE:
#
# This file is been refactored. Only a few thing work.
# The original code is safe in `apt-html.original.awk`.
#
#
#
#
# Converts markdown to HTML
#
# See:
#
# * https://spec.commonmark.org
# * https://markdown-it.github.io
# * https://www.javatpoint.com/markdown
# * https://www.markdownguide.org/cheat-sheet
# * https://www.markdownguide.org/extended-syntax
# * https://pandoc.org/MANUAL.html#pandocs-markdown
# * https://www.dotcms.com/docs/latest/markdown-syntax
# * https://www.codecademy.com/resources/docs/markdown
# * https://daringfireball.net/projects/markdown/syntax
# * https://www.ecovida.org.br/docs/manual_site/markdown
# * https://quarto.org/docs/authoring/markdown-basics.html
# * https://docs.github.com/en/get-started/writing-on-github
# * https://fuchsia.dev/fuchsia-src/contribute/docs/markdown
# * https://www.ibm.com/docs/en/SSYKAV?topic=train-how-do-use-markdown
# * https://www.knowledgehut.com/blog/web-development/what-is-markdown
# * https://www.ionos.com/digitalguide/websites/web-development/markdown/
# * https://learn.microsoft.com/en-us/contribute/content/markdown-reference
# * https://developer.mozilla.org/en-US/docs/MDN/Writing_guidelines/Howto/Markdown_in_MDN
# * https://confluence.atlassian.com/bitbucketserver/markdown-syntax-guide-776639995.html
# * https://learn.microsoft.com/en-us/azure/devops/project/wiki/markdown-guidance?view=azure-devops
# * https://medium.com/analytics-vidhya/the-ultimate-markdown-guide-for-jupyter-notebook-d5e5abf728fd
function ready() {
return at("root") || at("blockquote") || at("li");
}
function empty() {
return idx == 0
}
function peek() {
return stk[idx];
}
function peek_attr() {
return stk_attr[idx];
}
function peek_spaces() {
return stk_spaces[idx];
}
function peek_value(key, found) {
attr = " " peek_attr();
if (match(attr, "[ ]" key "='[^']*'") > 0) {
found = substr(attr, RSTART, RLENGTH);
match(found, "='[^']*'");
return substr(found, RSTART + 2, RLENGTH - 3);
}
return "";
}
function identifier() {
return ++id;
}
function at(tag) {
return peek() == tag ? 1 : 0;
}
function any(tags, i, n, arr) {
n = split(tags, arr, ",");
for (i = 1; i <= n; i++) {
if (at(arr[i])) {
return 1;
}
}
return "";
}
function pop_at(tag) {
if (at(tag)) {
return pop();
}
return "";
}
function pop_any(tags) {
if (any(tags)) {
return pop();
}
return "";
}
function container() {
return any("ol,ul,li");
}
function pop() {
if (empty()) {
return "";
}
if (container()) {
print_buf();
close_tag();
} else {
print_tag();
}
return unpush();
}
function spaces() {
match($0, /^[ ]*[^ ]/);
# the number of spaces before non-space
return (RLENGTH > 0) ? RLENGTH - 1 : RLENGTH;
}
function push(tag, attr) {
pop_list(tag);
++idx;
stk[idx] = tag;
stk_attr[idx] = attr;
stk_spaces[idx] = spaces();
if (container()) {
print_buf();
open_tag();
}
}
function pop_list(tag) {
if (any("ol,ul") && tag != "li") {
pop();
}
}
function unpush( tag) {
tag = peek();
if (!empty()) {
delete stk_spaces[idx];
delete stk_attr[idx];
delete stk[idx];
idx--;
}
return tag;
}
function print_tag() {
open_tag();
print_buf();
close_tag();
}
function open_tag() {
if (at("br") || at("hr")) {
printf "<%s>\n", peek();
return;
}
if (at("pre") || at("code")) {
open_pre(peek_value("title"));
return;
}
if (!peek_attr()) {
printf "<%s>\n", peek();
} else {
printf "<%s %s>\n", peek(), peek_attr();
}
}
function close_tag() {
if (at("br") || at("hr")) {
return; # empty element
}
if (at("pre") || at("code")) {
close_pre();
return;
}
printf "</%s>\n", peek();
}
function buffer(str, sep) {
if (at("pre") || at("code")) {
sep = "\n";
} else {
sep = " ";
# 2-spaces line break
if (str ~ /[ ][ ]+$/) {
str = rtrim(str) make_tag("br");
}
str = trim(str);
}
if (buf == "") {
buf = str;
} else {
buf=buf sep str;
}
}
function print_buf() {
if (at("pre") || at("code")) {
buf = escape(buf);
} else {
# the order matters
buf = angles(buf);
buf = footnotes(buf);
buf = images(buf);
buf = links(buf);
buf = reflinks(buf);
buf = styles(buf);
}
if (buf != "") {
print buf;
}
buf = "";
}
function coalesce(str, alternative) {
return (str) ? str : alternative;
}
function open_pre(title, id) {
id = identifier();
title = coalesce(title, "&gt;_");
if (TEST) {
printf "<pre><code>\n";
} else {
printf "<div class='codeblock'>";
printf "<div class='codeblock-head'>";
printf "<span class='codeblock-title'>%s</span>", title;
printf "<span class='codeblock-buttons'>%s</span>", buttons(id);
printf "</div>";
printf "<pre class='codeblock-body' id='%s'>", id;
printf "<code class='codeblock-code'>";
}
}
function close_pre() {
if (TEST) {
printf "</code></pre>\n";
} else {
printf "</code></pre>\n";
printf "</div>\n";
}
}
function buttons(id, style, copy, collapse, wordwrap) {
copy_icon = "&#x1F4CB;";
collapse_icon = "&#x2195;";
wordwrap_icon = "&#x21B5;";
copy = "<button onclick='copy(" id ")' title='Copy'>" copy_icon "</button>";
collapse = "<button onclick='collapse(" id ")' title='Collapse'>" collapse_icon "</button>";
wordwrap = "<button onclick='wordwrap(" id ")' title='Word wrap'>" wordwrap_icon "</button>";
# must return in reverse order
return copy collapse wordwrap;
}
function styles(buf) {
buf = snippet(buf);
buf = formula(buf);
buf = asterisk(buf);
buf = underscore(buf);
buf = deleted(buf);
buf = inserted(buf);
buf = highlighted(buf);
buf = superscript(buf);
buf = subscript(buf);
return buf;
}
function snippet(buf) {
buf = apply_style(buf, "``", "code");
buf = apply_style(buf, "`", "code");
return buf;
}
function formula(buf) {
buf = apply_style(buf, "$$", "code");
buf = apply_style(buf, "$", "code");
return buf;
}
function underscore(buf) {
buf = apply_style(buf, "__", "strong");
buf = apply_style(buf, "_", "em");
return buf;
}
function asterisk(buf) {
buf = apply_style(buf, "**", "strong");
buf = apply_style(buf, "*", "em");
return buf;
}
function deleted(buf) {
return apply_style(buf, "~~", "del");
}
function inserted(buf) {
return apply_style(buf, "++", "ins");
}
function highlighted(buf) {
return apply_style(buf, "==", "mark");
}
function superscript(buf) {
return apply_style(buf, "^", "sup");
}
function subscript(buf) {
return apply_style(buf, "~", "sub");
}
function apply_style(buf, mark, tag, out, found, rstart, rlength) {
out = "";
len = length(mark);
position = index(buf, mark);
while (position > 0) {
rstart = position + len;
rlength = index(substr(buf, rstart), mark) - 1;
if (rlength <= 0) break;
found = substr(buf, rstart, rlength);
if (tag == "code") {
found = escape(found);
}
out = out substr(buf, 1, rstart -1 - len);
out = out make_tag(tag, found);
buf = substr(buf, rstart + rlength + len);
position = index(buf, mark);
}
out = out buf;
return out;
}
function escape(str) {
# html special characters
gsub(/[&]/, "\\&amp;", str);
gsub(/[<]/, "\\&lt;", str);
gsub(/[>]/, "\\&gt;", str);
# markdown special characters
gsub(/[$]/, "\\&#36;", str);
gsub(/[*]/, "\\&#42;", str);
gsub(/[+]/, "\\&#43;", str);
gsub(/[-]/, "\\&#45;", str);
gsub(/[=]/, "\\&#61;", str);
gsub(/[\^]/, "\\&#94;", str);
gsub(/[_]/, "\\&#95;", str);
gsub(/[`]/, "\\&#96;", str);
gsub(/[~]/, "\\&#126;", str);
return str;
}
function prefix(str, start, x) {
x = (x) ? x : 1;
return substr(str, 1, start - x);
}
function suffix(str, start, end, x) {
x = (x) ? x : 1;
return substr(str, start + (end - start) + x);
}
function extract(str, start, end, x, y) {
x = (x) ? x : 1;
y = (y) ? y : 1;
return substr(str, start + x, (end - start) - y);
}
# TODO: change order: tag, attr, text (<tag attr>text</tag>)
function make_tag(tag, text, attr) {
if (text) {
if (attr) {
return "<" tag " " attr ">" text "</" tag ">";
} else {
return "<" tag ">" text "</" tag ">";
}
} else {
if (attr) {
return "<" tag " " attr "/>";
} else {
return "<" tag "/>";
}
}
}
# TODO: change order: href, title, text (<a href title>text</a>)
function make_link(text, href, title) {
if (title) {
return make_tag("a", text, "href='" href "' title='" title "'");
} else {
return make_tag("a", text, "href='" href "'");
}
}
# TODO: change order and names: href, title, alt (<a href title alt/>)
function make_image(text, href, title) {
if (title) {
return make_tag("img", "", "alt='" text "' src='" href "' title='" title "'");
} else {
return make_tag("img", "", "alt='" text "' src='" href "'");
}
}
function make_footnote(ref) {
return make_tag("a", "<sup>[" ref "]<sup>", "href='#foot-" ref "'");
}
# TODO: change order: ref, text (<a href="ref">text</a>)
function make_reflink(text, ref) {
return make_tag("a", text, "href='#link-" ref "'");
}
# <ftp...>
# <http...>
# <https...>
# <email@...>
function angles(buf, start, end, href, out) {
out = "";
start = index(buf, "<");
end = index(buf, ">");
while (0 < start && start < end) {
href = extract(buf, start, end);
if (index(href, "http") == 1 || index(href, "ftp") == 1) {
push_link(id++, href);
out = out prefix(buf, start);
out = out make_link(href, href);
} else if (index(href, "@") > 1) {
push_link(id++, "mailto:" href);
out = out prefix(buf, start);
out = out make_link(href, "mailto:" href);
} else {
# do nothing; just give back
out = out prefix(buf, end + 1);
}
buf = suffix(buf, start, end);
start = index(buf, "<");
end = index(buf, ">");
}
out = out buf;
return out;
}
# [text](href)
# [text](href "title")
function links(buf, regex, start, end, mid, t1, t2, temp, text, href, title, out) {
out = "";
start = index(buf, "[");
mid = index(buf, "](");
end = index(buf, ")");
while (0 < start && start < mid && mid < end) {
out = out prefix(buf, start);
text = extract(buf, start, mid);
href = extract(buf, mid, end, 2, 2);
t1 = index(href, "\"");
t2 = index(substr(href, t1 + 1), "\"") + t1;
if (0 < t1 && t1 < t2) {
temp = href;
href = trim(prefix(temp, t1));
title = trim(extract(temp, t1, t2));
}
out = out make_link(text, href, title);
push_link(id++, href, title, text);
buf = suffix(buf, start, end);
start = index(buf, "[");
mid = index(buf, "](");
end = index(buf, ")");
}
out = out buf;
return out;
}
# ![alt](src)
# ![alt](src "title")
function images(buf, regex, start, end, mid, t1, t2, temp, text, href, title, out) {
out = "";
start = index(buf, "![");
mid = index(buf, "](");
end = index(buf, ")");
while (0 < start && start < mid && mid < end) {
out = out prefix(buf, start);
text = extract(buf, start, mid, 2, 2);
href = extract(buf, mid, end, 2, 2);
t1 = index(href, "\"");
t2 = index(substr(href, t1 + 1), "\"") + t1;
if (0 < t1 && t1 < t2) {
temp = href;
href = trim(prefix(temp, t1));
title = trim(extract(temp, t1, t2));
}
out = out make_image(text, href, title);
buf = suffix(buf, start, end);
start = index(buf, "![");
mid = index(buf, "](");
end = index(buf, ")");
}
out = out buf;
return out;
}
# [^footnote]
function footnotes(buf, regex, start, end, ref, out) {
out = "";
start = index(buf, "[^");
end = index(buf, "]");
while (0 < start && start < end) {
out = out prefix(buf, start);
ref = extract(buf, start, end, 2, 2);
out = out make_footnote(ref);
buf = suffix(buf, start, end);
start = index(buf, "[^");
end = index(buf, "]");
}
out = out buf;
return out;
}
# [text][ref]
# [text] [ref]
function reflinks(buf, start, end, mid1, mid2, out, text, ref) {
out = "";
start = index(buf, "[");
mid1 = index(buf, "]");
while (0 < start && start < mid1) {
mid2 = index(substr(buf, mid1 + 1), "[") + mid1;
end = index(substr(buf, mid2 + 1), "]") + mid2;
if (mid1 < mid2 && mid2 < end) {
if (mid2 - mid1 <= 2) {
text = extract(buf, start, mid1);
ref = extract(buf, mid2, end, 1, 1);
out = out prefix(buf, start);
out = out make_reflink(text, ref);
} else {
out = out prefix(buf, end + 1);
}
}
buf = suffix(buf, start, end);
start = index(buf, "[");
mid1 = index(buf, "]");
}
out = out buf;
return out;
}
function print_header() {
print "<!DOCTYPE html>";
print "<html>";
print "<head>";
print "<title></title>";
print "<style>";
print " :root {";
print " --gray: #efefef;";
print " --black: #444;";
print " --dark-gray: #aaaaaa;";
print " --light-gray: #fafafa;";
print " --dark-blue: #0000ff;";
print " --light-blue: #0969da;";
print " --light-yellow: #fafaaa;";
print " }";
print " html {";
print " font-size: 16px;";
print " max-width: 100%;";
print " }";
print " body {";
print " padding: 1rem;";
print " margin: 0 auto;";
print " max-width: 50rem;";
print " line-height: 1.5rem;";
print " font-family: sans-serif;";
print " color: var(--black);";
print " }";
print " p {";
print " font-size: 1rem;";
print " margin-bottom: 1.3rem;";
print " }";
print " a, a:visited { color: var(--light-blue); }";
print " a:hover, a:focus, a:active { color: var(--dark-blue); }";
print " h1 { font-size: 1.7rem; }";
print " h2 { font-size: 1.4rem; }";
print " h3 { font-size: 1.1rem; }";
print " h4 { font-size: 1.1rem; }";
print " h5 { font-size: 0.8rem; }";
print " h6 { font-size: 0.8rem; }";
print " h1, h2 {";
print " padding-bottom: 0.5rem;";
print " border-bottom: 2px solid var(--gray);";
print " }";
print " h1, h2, h3, h4, h5, h6 {";
print " font-weight: bold;";
print " font-style: normal;";
print " margin: 1.4rem 0 .5rem;";
print " }";
print " h3, h5 {";
print " font-weight: bold;";
print " font-style: normal;";
print " }";
print " h4, h6 {";
print " font-weight: normal;";
print " font-style: italic;";
print " }";
print " div.codeblock {";
print " border-radius: .4rem;";
print " background-color: var(--gray);";
print " border: 1px solid var(--dark-gray);";
print " }";
print " div.codeblock-head {";
print " margin: 0rem 0rem;";
print " padding: 0rem 0rem;";
print " border-bottom: 1px solid var(--dark-gray);";
print " }";
print " span.codeblock-title {";
print " font-weight: bold;";
print " margin: 0rem 0rem;";
print " padding: 0rem 1rem;";
print " }";
print " span.codeblock-buttons {";
print " float: right;";
print " font-weight: bold;";
print " margin: 0rem 0rem;";
print " padding: 0rem 1rem;";
print " }";
print " pre.codeblock-body {";
print " overflow-x:auto;";
print " margin: 0rem 0rem;";
print " padding: 1rem 1rem;";
print " line-height: 1.0rem;";
print " }";
print " code.codeblock-code {";
print " font-size: 0.8rem;";
print " margin: 0rem 0rem;";
print " padding: 0rem 0rem;";
print " font-family: monospace;";
print " }";
print " code {";
print " border-radius: .2rem;";
print " padding: 0.1rem 0.3rem;";
print " font-family: monospace;";
print " background-color: var(--gray);";
print " }";
print " mark {";
print " padding: 0.1rem 0.3rem;";
print " border-radius: .2rem;";
print " background-color: var(--light-yellow);";
print " }";
print " blockquote {";
print " margin: 1.5rem;";
print " padding: 1rem;";
print " border-radius: .4rem;";
print " background-color: var(--light-gray);";
print " border: 1px solid var(--dark-gray);";
print " border-left: 12px solid var(--dark-gray);";
print " }";
print " dt { font-weight: bold; }";
print " hr { border: 1px solid var(--dark-gray); }";
print " img { height: auto; max-width: 100%; }";
print " table { border-collapse: collapse; margin-bottom: 1.3rem; }";
print " th { padding: .7rem; border-bottom: 1px solid var(--black);}";
print " td { padding: .7rem; border-bottom: 1px solid var(--gray);}";
print "</style>";
print "<script>";
print " function copy(id) {";
print " var element = document.getElementById(id);";
print " navigator.clipboard.writeText(element.textContent);";
print " }";
print " function wordwrap(id) {";
print " var element = document.getElementById(id);";
print " if (element.style.whiteSpace != 'pre-wrap') {";
print " element.style.whiteSpace = 'pre-wrap';";
print " } else {";
print " element.style.whiteSpace = 'pre';";
print " }";
print " }";
print " function collapse(id) {";
print " var element = document.getElementById(id);";
print " if (element.style.display != 'none') {";
print " element.style.display = 'none';";
print " } else {";
print " element.style.display = 'block';";
print " }";
print " }";
print "</script>"
print "</head>";
print "<body>";
}
function print_footer ( i, ref, href, title, text) {
print "<footer>";
if (link_count > 0 || footnote_count > 0) {
print "<hr>";
}
if (link_count > 0) {
print "<h6>LINKS</h6>";
print "<ol>";
for (i = 1; i <= link_count; i++) {
ref = link_ref[i];
href = link_href[i];
title = link_title[i];
if (title == "") {
title = href;
}
print make_tag("li", title " <a href='" href "' id='link-" ref "'>&#x1F517;</a>");
}
print "</ol>";
}
if (footnote_count > 0) {
print "<h6>FOOTNOTES</h6>";
print "<ol>";
for (i = 1; i <= footnote_count; i++) {
ref = footnote_ref[i];
text = footnote_text[i];
print make_tag("li", text " <a href='#foot-" ref "' id='link-" ref "'>&#x1F517;</a>");
}
print "</ol>";
}
print "</footer>";
print "</body>";
print "</html>";
}
BEGIN {
buf=""
idx=0
stk[0]="root";
stk_attr[0]="";
stk_spaces[0]=0;
blockquote_prefix = "^[ ]*>[ ]?";
ul_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[*+-][ ]";
ol_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[0-9]+\\.[ ]";
blank = -1; # prepare to signal blank line
print_header();
}
function pop_until(tag) {
while (!empty() && !at(tag)) {
pop();
}
}
function level_blockquote( i, n) {
n = 0;
for (i = idx; i > 0; i--) {
if (stk[i] == "blockquote") {
n++;
}
}
return n;
}
function level_list( i, n) {
n = 0;
for (i = idx; i > 0; i--) {
if (stk[i] == "ul" || stk[i] == "ol") {
n++;
}
if (stk[i] == "blockquote") break;
}
return n;
}
function count_indent(line) {
return count_prefix(line, "^[ ][ ][ ][ ]");
}
function count_prefix(line, pref, n) {
n=0
while (sub(pref, "", line)) {
n++;
}
return n;
}
function remove_indent(line) {
return remove_prefix(line, "^[ ][ ][ ][ ]");
}
function remove_prefix(line, pref) {
# remove leading quote marks
while (line ~ pref) {
sub(pref, "", line);
};
return line;
}
function min(x, y) {
return (x <= y) ? x : y;
}
function max(x, y) {
return (x >= y) ? x : y;
}
function ltrim(s) { sub(/^[ \t]+/, "", s); return s; }
function rtrim(s) { sub(/[ \t]+$/, "", s); return s; }
function trim(s) { return rtrim(ltrim(s)); }
function slug(str) {
gsub(/[^a-zA-Z0-9]/, "-", str);
gsub(/-+/, "-", str);
return tolower(str);
}
function push_link(ref, href, title, text) {
link_count++;
link_ref[link_count] = ref;
link_href[link_count] = href;
link_title[link_count] = title;
link_text[link_count] = text;
}
# undo last push
function undo( tmp) {
tmp = buf;
buf = "";
unpush();
return tmp;
}
#===========================================
# TABULATION
#===========================================
/^\t/ {
s = " ";
# replace only 1st tab
sub(/^\t/, s s s s, $0);
}
#===========================================
# BLOCKQUOTES
#===========================================
function unblockquote() {
sub(/^[ ]*>[ ]*/, "", $0);
}
# one level
/^[ ]*>[ ]*/ {
if (at("blockquote")) {
unblockquote();
buffer($0);
next;
}
if (at("root")) {
push("blockquote");
unblockquote();
buffer($0);
next;
}
if (!at("root")) {
pop();
push("blockquote");
unblockquote();
buffer($0);
next;
}
}
#===========================================
# LISTS
#===========================================
/^([ ]*[*+-][ ]+|[ ]*[0-9]+[.][ ]+).+$/ {
str = $0; # copy register
# detect the type of list
if (str ~ /^[ ]*[*+-][ ]+/) {
ulol = "ul";
sub(/^[ ]*[*+-][ ]+/, "", str);
} else {
ulol = "ol";
sub(/^[ ]*[0-9]+[.][ ]+/, "", str);
}
# compare spaces
a = peek_spaces();
b = spaces();
if (b > a) {
if (at("li")) {
push(ulol);
push("li");
buffer(str);
next;
}
}
if (b < a) {
if (at("li")) {
pop();
pop();
pop();
push("li");
buffer(str);
next;
}
}
if (at("li")) {
pop();
push("li");
buffer(str);
next;
}
if (at("root")) {
push(ulol);
push("li");
buffer(str);
next;
}
if (!at("root")) {
pop();
push(ulol);
push("li");
buffer(str);
next;
}
}
#===========================================
# CODE BLOCKS
#===========================================
function unindent() {
sub(/^[ ][ ][ ][ ]/, "", $0);
}
/^```/ {
if (at("code")) {
pop();
next;
}
if (at("root")) {
sub(/^`+/, "");
push("code", "title='" $1 "'");
next;
}
if (!at("root")) {
pop();
sub(/^`+/, "");
push("code", "title='" $1 "'");
next;
}
}
at("code") {
buffer($0);
next;
}
/^[ ][ ][ ][ ]/ {
if (at("pre")) {
unindent();
buffer($0);
next;
}
if (at("root")) {
push("pre");
unindent();
buffer($0);
next;
}
if (!at("root")) {
pop();
push("pre");
unindent();
buffer($0);
next;
}
}
#===========================================
# HEADING
#===========================================
/^[\x23]+[ ]+/ {
# count header level
match($0, /^[\x23]+/);
# remove all leading hashes
sub(/^[\x23]+[ ]*/, "", $0);
# remove all trailing hashes
sub(/[ ]*[\x23]+$/, "", $0);
if (at("root")) {
push("h" min(RLENGTH, 6));
buffer($0);
next;
}
if (!at("root")) {
pop();
push("h" min(RLENGTH, 6));
buffer($0);
next;
}
}
/^=+[ ]*$/ && at("p") {
unpush();
push("h1");
pop();
next;
}
/^-+[ ]*$/ && at("p") {
unpush();
push("h2");
pop();
next;
}
#===========================================
# HORIZONTAL RULER
#===========================================
# TODO: fix <hr> between <ul|ol> and <li>
/^[*_-][*_-][*_-]+[ ]*$/ {
if (at("root")) {
push("hr");
pop();
next;
}
if (!at("root")) {
pop();
push("hr");
pop();
next;
}
}
#===========================================
# BLANK
#===========================================
/^[ ]*$/ {
blank_flag = 1;
if (at("pre")) {
buffer("");
next;
}
if (at("li")) {
next;
}
if (at("root")) {
next;
}
if (!at("root")) {
pop();
next;
}
}
#===========================================
# PARAGRAPH
#===========================================
/^.+$/ {
if (at("p")) {
buffer($0);
next;
}
if (any("h1,h2,h3,h4,h5,h6")) {
buffer($0);
next;
}
if (at("root")) {
push("p");
buffer($0);
next;
}
if (!at("root")) {
pop();
push("p");
buffer($0);
next;
}
}
{
blank_flag = 0;
}
#===========================================
# THE END
#===========================================
END {
pop_at("p");
pop_at("li");
pop_any("pre,code");
pop_any("h1,h2,h3,h4,h5,h6");
# compatible end of file,
# e.g., `diff`, `ed` etc.
printf "\n";
}
#!/usr/bin/awk -f
#
# Converts markdown to HTML
#
# See:
#
# * https://spec.commonmark.org
# * https://markdown-it.github.io
# * https://www.javatpoint.com/markdown
# * https://www.markdownguide.org/cheat-sheet
# * https://www.markdownguide.org/extended-syntax
# * https://pandoc.org/MANUAL.html#pandocs-markdown
# * https://www.dotcms.com/docs/latest/markdown-syntax
# * https://www.codecademy.com/resources/docs/markdown
# * https://daringfireball.net/projects/markdown/syntax
# * https://www.ecovida.org.br/docs/manual_site/markdown
# * https://quarto.org/docs/authoring/markdown-basics.html
# * https://docs.github.com/en/get-started/writing-on-github
# * https://fuchsia.dev/fuchsia-src/contribute/docs/markdown
# * https://www.ibm.com/docs/en/SSYKAV?topic=train-how-do-use-markdown
# * https://www.knowledgehut.com/blog/web-development/what-is-markdown
# * https://www.ionos.com/digitalguide/websites/web-development/markdown/
# * https://learn.microsoft.com/en-us/contribute/content/markdown-reference
# * https://developer.mozilla.org/en-US/docs/MDN/Writing_guidelines/Howto/Markdown_in_MDN
# * https://confluence.atlassian.com/bitbucketserver/markdown-syntax-guide-776639995.html
# * https://learn.microsoft.com/en-us/azure/devops/project/wiki/markdown-guidance?view=azure-devops
# * https://medium.com/analytics-vidhya/the-ultimate-markdown-guide-for-jupyter-notebook-d5e5abf728fd
function ready() {
return at("root") || at("blockquote") || at("li");
}
function empty() {
return idx == 0
}
function at(tag) {
return peek() == tag ? 1 : 0;
}
function peek() {
return stk[idx];
}
function peek_attr() {
return stk_attr[idx];
}
function push(tag, attr) {
++id;
++idx;
stk[idx] = tag;
stk_attr[idx] = attr;
open_tag(id);
# close <br> and <hr>
if (at("br") || at("hr")) {
pop();
}
return id;
}
function pop() {
if (empty()) {
return "";
}
close_tag();
return unpush();
}
function unpush( tag) {
tag = peek();
if (!empty()) {
delete stk_attr[idx];
delete stk[idx--];
}
return tag;
}
function write() {
if (at("pre") || at("code")) {
buf = escape(buf);
} else {
# the order matters
buf = diamonds(buf);
buf = footnotes(buf);
buf = images(buf);
buf = links(buf);
buf = reflinks(buf);
buf = styles(buf);
}
if (buf != "") {
print buf;
}
buf = "";
}
function append(str, sep) {
if (at("pre") || at("code")) {
if (sep == "") sep = "\n";
} else {
if (sep == "") sep = " ";
# append 2-spaces line break
if (str ~ /^[^ ]+[ ][ ]+$/) {
str = rtrim(str) "<br>";
}
str = trim(str);
}
if (buf == "") {
buf = str;
} else {
buf=buf sep str;
}
}
function open_tag(id) {
write();
tag = peek();
attr = peek_attr();
if (at("br") || at("hr")) {
printf "<%s>\n", tag;
return;
}
if (at("pre") || at("code")) {
open_pre(id, peek_value("title"));
return;
}
# if (at("h1") || at("h2") || at("h3")) {
# if (!attr) {
# attr = "id='" id "'";
# } else {
# attr = "id='" id "' " attr;
# }
# }
if (!attr) {
printf "<%s>\n", tag;
} else {
printf "<%s %s>\n", tag, attr;
}
}
function close_tag() {
write();
if (at("br") || at("hr")) {
# do nothing.
# already closed.
return;
}
if (at("pre") || at("code")) {
close_pre();
return;
}
printf "</%s>\n", peek();
}
function peek_value(key, found) {
attr = " " peek_attr();
if (match(attr, "[ ]" key "='[^']*'") > 0) {
found = substr(attr, RSTART, RLENGTH);
match(found, "='[^']*'");
return substr(found, RSTART + 2, RLENGTH - 3);
}
return "";
}
function open_pre(id, title) {
printf "<pre>";
printf "<div class='pre-head'>";
printf "<span>%s</span>", title;
printf "%s", buttons(id);
printf "</div>";
printf "<div class='pre-body' id='%s'>", id;
return;
}
function close_pre() {
printf "</div>";
printf "</pre>";
return;
}
function buttons(id, style, clipboard, wordwrap) {
collapse = "<button onclick='collapse(" id ")' title='Toggle collapse' class='pre-button'>↕</button>";
clipboard = "<button onclick='wordwrap(" id ")' title='Toggle word-wrap' class='pre-button'>⏎</button>";
wordwrap = "<button onclick='clipboard(" id ")' title='Copy to clipboard' class='pre-button'>📋</button>";
return clipboard collapse wordwrap;
}
# TODO: change order: tag, attr, text (<tag attr>text</tag>)
function make(tag, text, attr) {
if (text) {
if (attr) {
return "<" tag " " attr ">" text "</" tag ">";
} else {
return "<" tag ">" text "</" tag ">";
}
} else {
if (attr) {
return "<" tag " " attr "/>";
} else {
return "<" tag "/>";
}
}
}
function snippet(buf) {
buf = apply_style(buf, "``", 2, "code");
buf = apply_style(buf, "`", 1, "code");
return buf;
}
function formula(buf) {
buf = apply_style(buf, "$$", 2, "code");
buf = apply_style(buf, "$", 1, "code");
return buf;
}
function underscore(buf) {
buf = apply_style(buf, "__", 2, "strong");
buf = apply_style(buf, "_", 1, "em");
return buf;
}
function asterisk(buf) {
buf = apply_style(buf, "**", 2, "strong");
buf = apply_style(buf, "*", 1, "em");
return buf;
}
function deleted(buf) {
return apply_style(buf, "~~", 2, "del");
}
function inserted(buf) {
return apply_style(buf, "++", 2, "ins");
}
function highlighted(buf) {
return apply_style(buf, "==", 2, "mark");
}
function superscript(buf) {
return apply_style(buf, "^", 1, "sup");
}
function subscript(buf) {
return apply_style(buf, "~", 1, "sub");
}
function styles(buf) {
buf = snippet(buf);
buf = formula(buf);
buf = asterisk(buf);
buf = underscore(buf);
buf = deleted(buf);
buf = inserted(buf);
buf = highlighted(buf);
buf = superscript(buf);
buf = subscript(buf);
return buf;
}
function apply_style(buf, mark, len, tag, out, found, rstart, rlength) {
out = "";
position = index(buf, mark);
while (position > 0) {
rstart = position + len;
rlength = index(substr(buf, rstart), mark) - 1;
if (rlength <= 0) break;
found = substr(buf, rstart, rlength);
if (tag == "code") {
found = escape(found);
}
out = out substr(buf, 1, rstart -1 - len);
out = out make(tag, found);
buf = substr(buf, rstart + rlength + len);
position = index(buf, mark);
}
out = out buf;
return out;
}
function escape(str) {
# html special characters
gsub(/[&]/, "\\&amp;", str);
gsub(/[<]/, "\\&lt;", str);
gsub(/[>]/, "\\&gt;", str);
# markdown special characters
gsub(/[$]/, "\\&#36;", str);
gsub(/[*]/, "\\&#42;", str);
gsub(/[+]/, "\\&#43;", str);
gsub(/[-]/, "\\&#45;", str);
gsub(/[=]/, "\\&#61;", str);
gsub(/[\^]/, "\\&#94;", str);
gsub(/[_]/, "\\&#95;", str);
gsub(/[`]/, "\\&#96;", str);
gsub(/[~]/, "\\&#126;", str);
return str;
}
function prefix(str, start, x) {
x = (x) ? x : 1;
return substr(str, 1, start - x);
}
function suffix(str, start, end, x) {
x = (x) ? x : 1;
return substr(str, start + (end - start) + x);
}
function extract(str, start, end, x, y) {
x = (x) ? x : 1;
y = (y) ? y : 1;
return substr(str, start + x, (end - start) - y);
}
function make_link(text, href, title) {
if (title) {
return make("a", text, "href='" href "' title='" title "'");
} else {
return make("a", text, "href='" href "'");
}
}
function make_image(text, href, title) {
if (title) {
return make("img", "", "alt='" text "' src='" href "' title='" title "'");
} else {
return make("img", "", "alt='" text "' src='" href "'");
}
}
function make_footnote(footnote) {
return make("a", "<sup>[" footnote "]<sup>", "href='#foot-" footnote "'");
}
function make_reflink(text, ref) {
return make("a", text, "href='#link-" ref "'");
}
# <ftp...>
# <http...>
# <https...>
# <email@...>
function diamonds(buf, start, end, href, out) {
out = "";
start = index(buf, "<");
end = index(buf, ">");
while (0 < start && start < end) {
href = extract(buf, start, end);
if (index(href, "http") == 1 || index(href, "ftp") == 1) {
push_link(id++, href);
out = out prefix(buf, start);
out = out make_link(href, href);
} else if (index(href, "@") > 1) {
push_link(id++, "mailto:" href);
out = out prefix(buf, start);
out = out make_link(href, "mailto:" href);
} else {
# do nothing; just give back
out = out prefix(buf, end + 1);
}
buf = suffix(buf, start, end);
start = index(buf, "<");
end = index(buf, ">");
}
out = out buf;
return out;
}
# [text](href)
# [text](href "title")
function links(buf, regex, start, end, mid, t1, t2, temp, text, href, title, out) {
out = "";
start = index(buf, "[");
mid = index(buf, "](");
end = index(buf, ")");
while (0 < start && start < mid && mid < end) {
out = out prefix(buf, start);
text = extract(buf, start, mid);
href = extract(buf, mid, end, 2, 2);
t1 = index(href, "\"");
t2 = index(substr(href, t1 + 1), "\"") + t1;
if (0 < t1 && t1 < t2) {
temp = href;
href = trim(prefix(temp, t1));
title = trim(extract(temp, t1, t2));
}
out = out make_link(text, href, title);
push_link(id++, href, title, text);
buf = suffix(buf, start, end);
start = index(buf, "[");
mid = index(buf, "](");
end = index(buf, ")");
}
out = out buf;
return out;
}
# ![alt](src)
# ![alt](src "title")
function images(buf, regex, start, end, mid, t1, t2, temp, text, href, title, out) {
out = "";
start = index(buf, "![");
mid = index(buf, "](");
end = index(buf, ")");
while (0 < start && start < mid && mid < end) {
out = out prefix(buf, start);
text = extract(buf, start, mid, 2, 2);
href = extract(buf, mid, end, 2, 2);
t1 = index(href, "\"");
t2 = index(substr(href, t1 + 1), "\"") + t1;
if (0 < t1 && t1 < t2) {
temp = href;
href = trim(prefix(temp, t1));
title = trim(extract(temp, t1, t2));
}
out = out make_image(text, href, title);
buf = suffix(buf, start, end);
start = index(buf, "![");
mid = index(buf, "](");
end = index(buf, ")");
}
out = out buf;
return out;
}
# [^footnote]
function footnotes(buf, regex, start, end, out, footnote) {
out = "";
start = index(buf, "[^");
end = index(buf, "]");
while (0 < start && start < end) {
out = out prefix(buf, start);
footnote = extract(buf, start, end, 2, 2);
out = out make_footnote(footnote);
buf = suffix(buf, start, end);
start = index(buf, "[^");
end = index(buf, "]");
}
out = out buf;
return out;
}
function min(x, y) {
return (x <= y) ? x : y;
}
function max(x, y) {
return (x >= y) ? x : y;
}
# [text][ref]
# [text] [ref]
function reflinks(buf, start, end, mid1, mid2, out, text, ref) {
out = "";
start = index(buf, "[");
mid1 = index(buf, "]");
while (0 < start && start < mid1) {
mid2 = index(substr(buf, mid1 + 1), "[") + mid1;
end = index(substr(buf, mid2 + 1), "]") + mid2;
if (mid1 < mid2 && mid2 < end) {
if (mid2 - mid1 <= 2) {
text = extract(buf, start, mid1);
ref = extract(buf, mid2, end, 1, 1);
out = out prefix(buf, start);
out = out make_reflink(text, ref);
} else {
out = out prefix(buf, end + 1);
}
}
buf = suffix(buf, start, end);
start = index(buf, "[");
mid1 = index(buf, "]");
}
out = out buf;
return out;
}
function print_header() {
print "<!DOCTYPE html>";
print "<html>";
print "<head>";
print "<title></title>";
print "<style>";
print " :root {";
print " --gray: #efefef;";
print " --black: #444;";
print " --dark-gray: #aaaaaa;";
print " --light-gray: #fafafa;";
print " --dark-blue: #0000ff;";
print " --light-blue: #0969da;";
print " --light-yellow: #fafaaa;";
print " }";
print " html {";
print " font-size: 16px;";
print " max-width: 100%;";
print " }";
print " body {";
print " padding: 1rem;";
print " margin: 0 auto;";
print " max-width: 50rem;";
print " line-height: 1.8;";
print " font-family: sans-serif;";
print " color: var(--black);";
print " }";
print " p {";
print " font-size: 1rem;";
print " margin-bottom: 1.3rem;";
print " }";
print " a, a:visited { color: var(--light-blue); }";
print " a:hover, a:focus, a:active { color: var(--dark-blue); }";
print " h1 { font-size: 2.0rem; }";
print " h2 { font-size: 1.5rem; }";
print " h3 { font-size: 1.2rem; }";
print " h4 { font-size: 1.2rem; }";
print " h5 { font-size: 0.8rem; }";
print " h6 { font-size: 0.8rem; }";
print " h1, h2 {";
print " padding-bottom: 0.5rem;";
print " border-bottom: 2px solid var(--gray);";
print " }";
print " h1, h2, h3, h4, h5, h6 {";
print " line-height: 1.4;";
print " font-style: normal;";
print " font-weight: bold;";
print " margin: 1.4rem 0 .5rem;";
print " }";
print " h3, h5 {";
print " font-weight: bold;";
print " font-style: normal;";
print " }";
print " h4, h6 {";
print " font-weight: normal;";
print " font-style: italic;";
print " }";
print " pre {";
print " overflow-x:auto;";
print " line-height: 1.5;";
print " border-radius: .4rem;";
print " font-family: monospace;";
print " background-color: var(--gray);";
print " border: 1px solid var(--dark-gray);";
print " }";
print " div.pre-head {";
print " height: 1.5rem;";
print " padding: 1rem;";
print " font-weight: bold;";
print " padding-top: 0.5rem;";
print " padding-bottom: 0.5rem;";
print " border-bottom: 1px solid var(--dark-gray);";
print " }";
print " div.pre-body {";
print " padding: 1rem;";
print " }";
print " button.pre-button {";
print " font-size: 100%; float: right;";
print " }";
print " code {";
print " padding: 0.3rem;";
print " border-radius: .2rem;";
print " font-family: monospace;";
print " background-color: var(--gray);";
print " }";
print " mark {";
print " padding: 0.3rem;";
print " border-radius: .2rem;";
print " background-color: var(--light-yellow);";
print " }";
print " blockquote {";
print " margin: 1.5rem;";
print " padding: 1rem;";
print " border-radius: .4rem;";
print " background-color: var(--light-gray);";
print " border: 1px solid var(--dark-gray);";
print " border-left: 12px solid var(--dark-gray);";
print " }";
print " dt { font-weight: bold; }";
print " hr { border: 1px solid var(--dark-gray); }";
print " img { height: auto; max-width: 100%; }";
print " table { border-collapse: collapse; margin-bottom: 1.3rem; }";
print " th { padding: .7rem; border-bottom: 1px solid var(--black);}";
print " td { padding: .7rem; border-bottom: 1px solid var(--gray);}";
print "</style>";
print "<script>";
print " function clipboard(id) {";
print " var element = document.getElementById(id);";
print " navigator.clipboard.writeText(element.textContent);";
print " }";
print " function wordwrap(id) {";
print " var element = document.getElementById(id);";
print " if (element.style.whiteSpace != 'pre-wrap') {";
print " element.style.whiteSpace = 'pre-wrap';";
print " } else {";
print " element.style.whiteSpace = 'pre';";
print " }";
print " }";
print " function collapse(id) {";
print " var element = document.getElementById(id);";
print " if (element.style.display != 'none') {";
print " element.style.display = 'none';";
print " } else {";
print " element.style.display = 'block';";
print " }";
print " }";
print "</script>"
print "</head>";
print "<body>";
}
function print_footer ( i, ref, href, title, text) {
print "<footer>";
if (link_count > 0 || footnote_count > 0) {
print "<hr>";
}
if (link_count > 0) {
print "<h6>LINKS</h6>";
print "<ol>";
for (i = 1; i <= link_count; i++) {
ref = link_ref[i];
href = link_href[i];
title = link_title[i];
if (title == "") {
title = href;
}
print make("li", title " <a href='" href "' id='link-" ref "'>&#x1F517;</a>");
}
print "</ol>";
}
if (footnote_count > 0) {
print "<h6>FOOTNOTES</h6>";
print "<ol>";
for (i = 1; i <= footnote_count; i++) {
ref = footnote_ref[i];
text = footnote_text[i];
print make("li", text " <a href='#foot-" ref "' id='link-" ref "'>&#x1F517;</a>");
}
print "</ol>";
}
print "</footer>";
print "</body>";
print "</html>";
}
BEGIN {
buf=""
idx=0
stk[0]="root";
stk_attr[0]="";
blockquote_prefix = "^[ ]*>[ ]?";
ul_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[*+-][ ]";
ol_prefix = "^([ ][ ][ ][ ])*([ ]|[ ][ ]|[ ][ ][ ])?[0-9]+\\.[ ]";
blank = -1; # prepare to signal blank line
print_header();
}
function pop_until(tag) {
while (!empty() && !at(tag)) {
pop();
}
}
function level_blockquote( i, n) {
n = 0;
for (i = idx; i > 0; i--) {
if (stk[i] == "blockquote") {
n++;
}
}
return n;
}
function level_list( i, n) {
n = 0;
for (i = idx; i > 0; i--) {
if (stk[i] == "ul" || stk[i] == "ol") {
n++;
}
if (stk[i] == "blockquote") break;
}
return n;
}
function count_indent(line) {
return count_prefix(line, "^[ ][ ][ ][ ]");
}
function count_prefix(line, pref, n) {
n=0
while (sub(pref, "", line)) {
n++;
}
return n;
}
function remove_indent(line) {
return remove_prefix(line, "^[ ][ ][ ][ ]");
}
function remove_prefix(line, pref) {
# remove leading quote marks
while (line ~ pref) {
sub(pref, "", line);
};
return line;
}
function ltrim(s) { sub(/^[ \t]+/, "", s); return s; }
function rtrim(s) { sub(/[ \t]+$/, "", s); return s; }
function trim(s) { return rtrim(ltrim(s)); }
function slug(str) {
gsub(/[^a-zA-Z0-9]/, "-", str);
gsub(/-+/, "-", str);
return tolower(str);
}
#===========================================
# TABULATIONS
#===========================================
{
gsub("\t", " ", $0); # replace tabas with 4 spaces
}
#===========================================
# BLANK LINES
#===========================================
# Blank line flag states:
# 0: not signaling blank line
# -1: preparing to signal blank line
# 1: signaling blank line
blank == 1 {
blank = 0;
}
blank == -1 {
blank = 1;
}
/^[ ]*$/ {
if (!at("code")) {
blank = -1;
pop_p();
pop_blockquote();
next;
}
}
#===========================================
# BLOCKQUOTE
#===========================================
function pop_blockquote() {
if (!at("blockquote")) return;
lv = level_blockquote();
cp = count_prefix($0, blockquote_prefix);
n = lv - cp;
while (n-- > 0) {
if (at("blockquote")) pop();
}
}
$0 !~ blockquote_prefix {
pop_blockquote();
}
$0 ~ blockquote_prefix {
lv = level_blockquote();
cp = count_prefix($0, blockquote_prefix);
$0 = remove_prefix($0, blockquote_prefix);
if (cp > lv) {
n = cp - lv;
while (n-- > 0) {
pop_p();
push("blockquote");
}
} else {
n = lv - cp;
while (n-- > 0) {
pop();
}
}
if ($0 ~ /^$/) {
pop_until("blockquote");
}
}
#===========================================
# LIST ITENS
#===========================================
# TODO: add more POSIX compatibility as MAWK doesn't support regex quantifiers {x,y}
# See: https://unix.stackexchange.com/questions/506119/how-to-specify-regex-quantifiers-with-mawk
function pop_p() {
if (!ready()) pop();
}
function pop_list () {
if (!at("li")) return;
lv = level_list();
cp = count_indent($0);
n = lv - cp;
while (n-- > 0) {
if (stk[idx-1] == "li") pop();
if (at("li")) pop();
if (at("ol") || at("ul")) pop();
}
}
function remove_list_indent (line) {
n = level_list();
while (n > 0) {
sub(/^[ ][ ][ ][ ]/, "", line);
n--;
}
return line;
}
$0 !~ ul_prefix && $0 !~ ol_prefix {
temp = remove_list_indent($0);
if (blank > 0) {
pop_list();
}
$0 = temp;
}
function list_start(line) {
sub("^[ ]+", "", line);
match(line, "^[0-9]+");
return substr(line, RSTART, RLENGTH);
}
function push_li(tag, start) {
if (tag == "ol") {
if (start == "") {
if (!at("ul") && !at("ol")) push(tag);
} else {
if (!at("ul") && !at("ol")) push(tag, "start='" start "'");
}
} else {
if (!at("ul") && !at("ol")) push(tag);
}
push("li");
}
function parse_list_item(tag, pref, start) {
lv = level_list();
cp = count_indent($0) + 1;
$0 = remove_prefix($0, pref);
if (cp == lv) {
pop_p();
if (at("li")) pop();
push_li(tag);
append($0);
} else if (cp > lv) {
# add levels
n = (cp - 1) - lv;
while (n-- > 0) {
push_li(tag);
}
push_li(tag, start);
append($0);
} else if (cp < lv) {
# del levels
n = lv - cp;
while (n-- > 0) {
pop_p();
if (at("li")) pop();
if (at("ol") || at("ul")) pop();
}
if (at("li")) pop();
push_li(tag);
append($0);
}
}
$0 ~ ul_prefix {
parse_list_item("ul", ul_prefix);
next;
}
$0 ~ ol_prefix {
# the user specifies
# the starting number
start = list_start($0);
parse_list_item("ol", ol_prefix, start);
next;
}
#===========================================
# CODE BLOCKS
#===========================================
/^```/ {
if (!at("code")) {
sub(/^`+/, "");
title = $0;
push("code", "title='" title "'");
next;
}
pop();
next;
}
at("code") {
append($0);
next;
}
/^[ ][ ][ ][ ]/ {
if (!at("pre")) {
push("pre");
}
sub("^[ ][ ][ ][ ]", "", $0);
append($0);
next;
}
#===========================================
# HEADING
#===========================================
# undo last push
function undo( tmp) {
tmp = buf;
buf = "";
unpush();
return tmp;
}
/^===+/ && at("p") {
# <h1>
$0 = undo();
push("h1");
append($0);
pop_p();
next;
}
/^---+/ && at("p") {
# <h2>
$0 = undo();
push("h2");
append($0);
pop_p();
next;
}
/^[\x23]+[ ]+/ {
# count hashes
match($0, "\x23+")
n = RLENGTH > 6 ? 6 : RLENGTH
# remove leading hashes
$0 = substr($0, n + 1);
pop_p();
push("h" n);
append($0);
next;
}
#===========================================
# HORIZONTAL RULER
#===========================================
/^[*_-][*_-][*_-]+[ ]*$/ {
pop_p();
push("hr");
next;
}
#===========================================
# DEFINITION LIST
#===========================================
# TODO: make definition list multi-level like <li>
/^:/ {
dd = substr($0, 2);
if (at("p")) {
dt = undo();
push("dl");
push("dt");
append(dt);
pop_p();
push("dd");
append(dd);
next;
}
if (at("dd")) {
pop_p();
push("dd");
append(dd);
next;
}
}
#===========================================
# TABLE
#===========================================
function set_table_aligns(line, arr, regex, found, l, r, n) {
delete table_aligns;
regex = "(:--[-]+:|:--[-]+|--[-]+:)";
delete arr; # starts from 2
n = split(line, arr, /\|/);
for(i = 2; i < n; i++) {
if (match(arr[i], regex) > 0) {
found = substr(arr[i], RSTART, RLENGTH);
l = substr(found, 1, 1);
r = substr(found, RLENGTH, 1);
if (l == ":" && r == ":") {
table_aligns[i] = "center";
} else if (l == ":" && r == "-") {
table_aligns[i] = "left";
} else if (l == "-" && r == ":") {
table_aligns[i] = "right";
} else {
table_aligns[i] = "l:" l " r: " r;
}
}
}
}
/^[ ]*\|.*\|[ ]*/ {
if (!at("table")) {
push("table");
push("tr");
delete arr; # starts from 2
n = split($0, arr, /\|/);
for(i = 2; i < n; i++) {
push("th");
append(arr[i]);
pop();
}
pop();
next;
}
if (at("table")) {
if ($0 ~ /^[ ]*\|[ ]*([:]?--[-]+[:]?)[ ]*\|[ ]*/) {
set_table_aligns($0);
next;
}
push("tr");
delete arr; # starts from 2
n = split($0, arr, /\|/);
for(i = 2; i < n; i++) {
if (table_aligns[i] != "") {
push("td", "style='text-align:" table_aligns[i] ";'");
} else {
push("td");
}
append(arr[i]);
pop();
}
pop();
next;
}
}
#===========================================
# FOOTNOTE
#===========================================
function push_footnote(ref, text) {
footnote_count++
footnote_ref[footnote_count] = ref;
footnote_text[footnote_count] = styles(text);
}
/^[ ]*\[\^[^]]+\][:]/ {
# [^id]: note
if (match($0, /\[\^[^]]+\][:]/) > 0) {
ref = substr($0, RSTART + 2, RLENGTH - 4);
text = substr($0, RSTART + RLENGTH);
push_footnote(ref, text);
}
next;
}
#===========================================
# (REFERENCE STYLE) LINK
#===========================================
# TODO: implement all styles: https://gist.github.com/emedinaa/28ed71b450243aba48accd634679f805
function push_link(ref, href, title, text) {
link_count++;
link_ref[link_count] = ref;
link_href[link_count] = href;
link_title[link_count] = title;
link_text[link_count] = text;
}
/^[ ]*\[[^]]+\][:]/ {
# [ref]: href
# [ref]: href "title"
# [ref]: href 'title'
# [ref]: href (title)
# [ref]: <href> "title"
# [ref]: <href> 'title'
# [ref]: <href> (title)
if (match($0, /\[[^]]+\][:]/) > 0) {
ref = substr($0, RSTART + 1, RLENGTH - 3);
href = substr($0, RSTART + RLENGTH);
if (match(href, "[ ](\"[^\"]*\"|'[^']*'|\\([^\\)]*\\))") > 0) {
title = substr(href, RSTART + 2, RLENGTH - 3);
href = substr(href, 1, RSTART - 1)
# remove '<' '>'.
if (match(href, "<[^>]+>") > 0) {
href = substr(href, RSTART + 1, RLENGTH - 2);
}
}
# remove leading spaces
sub("^[ ]*", "", href);
push_link(ref, href, title, title);
}
next;
}
#===========================================
# PARAGRAPH
#===========================================
# TODO: transform "<li>text" in "<li><p>text", undoing the previous <li>
/^.+$/ {
if (ready()) {
if (at("li")) {
if (blank == 1) {
push("p");
}
} else {
push("p");
}
}
append($0);
next;
}
#===========================================
# THE END
#===========================================
END {
pop_p();
pop_list();
pop_blockquote();
print_footer();
}
#!/bin/sh
#
# Runs the Busybox httpd server.
#
# Usage:
#
# notekeeper-http-server.sh
#
# Configuration:
#
# # file .notekeeper/conf.txt
# busybox.httpd.port=127.0.0.1:9000
#
. "`dirname "$0"`/notekeeper-common.sh";
property_port="busybox.httpd.port"
property_port_default="127.0.0.1:9000"
busybox_httpd_port() {
local port=`grep -E "^${property_port}" "${WORKING_DIR}/.notekeeper/notekeeper.conf" | sed "s/${property_port}=//"`;
if [ -n "${port}" ]; then
echo "${port}";
else
echo "${property_port_default}";
fi;
}
busybox_httpd_stop() {
local pid=`ps aux | grep 'busybox httpd' | grep -v "grep" | awk '{ print $2 }'`
if [ -n "$pid" ] && [ "$pid" -gt 1024 ]; then
kill -9 $pid;
fi;
}
busybox_httpd_start() {
local port=`busybox_httpd_port`;
# busybox httpd -p "$port" -h "$PROGRAM_DIR/www/"
busybox httpd -p "$port" -h "$WORKING_DIR/.notekeeper/html/"
echo Listening: "http://$port"
}
main() {
busybox_httpd_stop;
busybox_httpd_start;
}
main;
# https://datatracker.ietf.org/doc/html/rfc3875
# https://www.vivaolinux.com.br/artigo/Introducao-a-CGI-com-a-RFC-3875
# https://gist.github.com/stokito/a9a2732ffc7982978a16e40e8d063c8f
# https://github.com/Mikepicker/cgiblog
# https://medium.com/@Mikepicker/no-framework-blog-for-fun-and-profit-using-bash-cgi-cbb99cf5366b
#!/bin/sh
#
# Saves HTML in `html` folder.
#
# Usage:
#
# notekeeper-save-html.sh FILE
#
. "`dirname "$0"`/notekeeper-common.sh";
file="${1}"
require_file "${file}";
main() {
local file="${1}"
local html=`html_path "${file}"`
mkdir -p "`dirname "${html}"`"
"$PROGRAM_DIR/awk/notekeeper-html.awk" "${file}" > "${html}"
}
main "${file}";
#!/bin/sh
#
# Saves a STAT file in in `data` folder.
#
# Usage:
#
# apwm-save-stat.sh FILE
#
. "`dirname "$0"`/notekeeper-common.sh";
file="${1}"
require_file "${file}";
main() {
local file="${1}"
local uuid=`path_uuid "${file}"`;
local stat=`make_stat "${file}"`;
LC_ALL=C "$PROGRAM_DIR/awk/notekeeper-stat.awk" -v WRITETO=/dev/stdout "${file}" > "${stat}"
}
main "${file}";
#!/usr/bin/awk -f
# Note:
# * Files encoded using MAC-UTF-8 must be normalized to UTF-8.
function token_type(token)
{
return toascii(tolower(token));
}
function token_format(token)
{
if (token ~ /^[[:alpha:]]+([\x27’-]?[[:alpha:]])*$/) {
return "W"; # Word format: all-letter token with optional hyphens
} else if (token ~ /^[+-]?([[:digit:]][h°%/:,.+-]?)+$/) {
return "N"; # Number format: all-letter token with some optional puncts
} else if (token ~ /^[[:punct:]]+$/) {
return "P"; # Punct format: all-punct token
} else {
return "NA"; # None of the above
}
# NOTE:
# This function returns NA to words that contain "accented" characters encoded
# with MAC-UTF-8. You must normilize the input files to regular UTF-8 encoding.
}
function token_case(token)
{
token = toascii(token);
if (token ~ /^[[:upper:]][[:lower:]]*([\x27’-]([[:alpha:]][[:lower:]]*))*$/) {
return "S"; # Start case: "Word", "Compound-word"
} else if (token ~ /^[[:lower:]]+([\x27’-]([[:lower:]]+))*$/) {
return "L"; # Lower case: "word", "compound-word"
} else if (token ~ /^[[:upper:]]+([\x27’-]([[:upper:]]+))*$/) {
return "U"; # Upper case: "WORD", "COMPOUND-WORD"
} else if (token ~ /^[[:alpha:]][[:lower:]]*([[:upper:]][[:lower:]]+)+$/) {
return "C"; # Camel case: "compoundWord", "CompoundWord"
} else if (token ~ /^[[:alpha:]]+([\x27’-]([[:alpha:]]+))*$/) {
return "M"; # Mixed case: "wOrD", "cOmPoUnD-wOrD"
} else {
return "NA"; # None of the above
}
# NOTE:
# UPPERCASE words with a single character, for example "É", are treated as start case words by this function.
# The author considers it a very convenient behavior that helps to identify proper nouns and the beginning of
# sentences, although he admits that it may not be intuitive. The order of the `if`s is important to preserve
# this behavior.
}
function token_mask(token)
{
if (token ~ /^[+-]?[0-9]+$/) {
return "I"; # Integer mask
} else if (token ~ /^[+-]?[0-9][0-9]?[0-9]?([,.]?[0-9][0-9][0-9])*([,.][0-9]+)?$/) {
return "R"; # Real number
} else if (token ~ /^[0-9]([0-9]|[0-9][0-9][0-9])[/.-][0-9][0-9]?[/.-][0-9]([0-9]|[0-9][0-9][0-9])$/) {
return "D"; # Date mask
} else if (token ~ /^([0-9][0-9]?[:h][0-9][0-9]|[0-9][0-9]?[h])$/) {
return "T"; # Time mask
} else if (token ~ /^[+-]?[0-9]+[/][0-9]+$/) {
return "F"; # Fraction mask
} else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?%$/) {
return "P"; # Percent mask
} else if (token ~ /^[+-]?[0-9]+([,.][0-9]+)?°$/) {
return "G"; # Degrees mask
} else {
return "NA"; # None of the above
}
}
function insert_token(token)
{
idx++;
tokens[idx]=token;
counters[token]++;
if (!types[token]) types[token] = token_type(token);
if (!formats[token]) formats[token] = token_format(token);
if (!cases[token]) cases[token] = token_case(token);
if (!masks[token]) masks[token] = token_mask(token);
if (!indexes[token]) indexes[token] = idx;
else indexes[token] = indexes[token] "," idx;
}
function toascii(string) {
# Unicode Latin-1 Supplement
gsub(/[ÀÁÂÃÄÅ]/,"A", string);
gsub(/[ÈÉÊË]/,"E", string);
gsub(/[ÌÍÎÏ]/,"I", string);
gsub(/[ÒÓÔÕÖ]/,"O", string);
gsub(/[ÙÚÛÜ]/,"U", string);
gsub(/Ý/,"Y", string);
gsub(/Ç/,"C", string);
gsub(/Ñ/,"N", string);
gsub(/Ð/,"D", string);
gsub(/Ø/,"OE", string);
gsub(/Þ/,"TH", string);
gsub(/Æ/,"AE", string);
gsub(/[àáâãäåª]/,"a", string);
gsub(/[èéêë]/,"e", string);
gsub(/[ìíîï]/,"i", string);
gsub(/[òóôõöº°]/,"o", string);
gsub(/[ùúûü]/,"u", string);
gsub(/[ýÿ]/,"y", string);
gsub(/ç/,"c", string);
gsub(/ñ/,"n", string);
gsub(/ð/,"d", string);
gsub(/ø/,"oe", string);
gsub(/þ/,"th", string);
gsub(/ae/,"ae", string);
gsub(/ß/,"ss", string);
# Unicode Punctuation
gsub(/–/,"-", string);
gsub(/—/,"--", string);
gsub(/…/,"...", string);
gsub(/[‘’]/,"\x27", string);
gsub(/[“”«»]/,"\x22", string);
# Remove MAC-UTF-8 combining diacritical marks (only those used in Latin-1)
gsub(/[\xCC\x80\xCC\x81\xCC\x82\xCC\x83\xCC\x88\xCC\x8A\xCC\xA7]/,"", string);
# Replace non-ASCII with SUB (0x1A)
gsub(/[^\x00-\x7E]/,"\x1A", string);
return string;
}
function get_stopwords_regex( file, regex, line) {
if (!option_value("stopwords")) {
return /^$/;
}
file=pwd "/../lib/lang/" lang "/stopwords.txt"
regex=""
while((getline line < file) > 0) {
# skip line started with #
if (line ~ /^[[:space:]]*$/ || line ~ /^#/) continue;
regex=regex "|" line;
}
# remove leading pipe
regex=substr(regex,2);
return "^(" regex ")$"
}
# separates tokens by spaces
function separate_tokens() {
$0=" " $0 " ";
gsub(/\xA0/, " ");
gsub(/[]()—{}[]/, " & ");
gsub(/[.,;:!?…][[:space:][:punct:]]/, " &");
gsub(/[[:space:][:punct:]][\x22\x27“”‘’«»]/, "& ");
gsub(/[\x22\x27“”‘’«»][[:space:][:punct:]]/, " &");
}
# 123 456 789,01 -> 123456789,01
function join_numbers( number) {
while (match($0, /[[:space:][:punct:]][0-9]+[[:space:]][0-9][0-9][0-9][[:space:][:punct:]]/)) {
number = substr($0, RSTART + 1, RLENGTH - 2);
sub(/[[:space:]]/, "", number);
$0 = substr($0, 0, RSTART) number substr($0, RSTART + RLENGTH - 1);
}
}
function generate_records( token, count, ratio, sum, sep, r, f, flength, key, val)
{
# start of operational checks #
sum=0
for (token in counters) {
sum += counters[token];
}
if (sum != length(tokens)) {
print "Wrong sum of counts" > "/dev/stderr";
exit 1;
}
# end of operational checks #
r=0
for (token in counters) {
r++;
sep = ""
flength = fields[0];
count = counters[token];
ratio = count / length(tokens);
for (f = 1; f <= flength; f++) {
key = fields[f,"key"];
val = fields[f,"value"];
if (val == 0) continue;
if (key == "token") {
records[r,"token"] = token;
} else if (key == "type") {
records[r,"type"] = types[token];
} else if (key == "count") {
records[r,"count"] = count;
} else if (key == "ratio") {
records[r,"ratio"] = ratio;
} else if (key == "format") {
records[r,"format"] = formats[token];
} else if (key == "case") {
records[r,"case"] = cases[token];
} else if (key == "mask") {
records[r,"mask"] = masks[token];
} else if (key == "length") {
records[r,"length"] = length(token);
} else if (key == "indexes") {
records[r,"indexes"] = indexes[token];
} else {
continue;
}
sep="\t"
}
}
# array length
records[0] = r;
}
function print_records( sep, r, f, rlength, flength)
{
flength = fields[0];
rlength = records[0];
if (length(records)) {
sep = ""
for (f = 1; f <= flength; f++) {
if (fields[f,"value"] == 0) continue;
printf "%s%s", sep, toupper(fields[f,"key"]) > output;
sep = "\t"
}
printf "\n" > output;
for (r = 1; r <= rlength; r++) {
sep = ""
for (f = 1; f <= flength; f++) {
if (fields[f,"value"] == 0) continue;
printf "%s%s", sep, records[r,fields[f,"key"]] > output;
sep = "\t"
}
printf "\n" > output;
}
}
}
function basename(file) {
sub("^.*/", "", file)
return file
}
function basedir(file) {
sub("/[^/]+$", "", file)
return file
}
function parse_confs( file, line, string)
{
file=pwd "/../abw.conf"
string=""
while((getline line < file) > 0) {
# skip comments
gsub(/#.*$/,"", line);
# skip invalid lines
if (line !~ /^[[:space:]]*[[:alnum:]]+[[:space:]]*=[[:space:]]*[[:alnum:]]+[[:space:]]*$/) continue;
if (!string) string = line;
else string=string "," line;
}
fields[0] = 0; # declare array
parse_fields(FIELDS, fields);
if (length(fields) == 0) {
parse_fields(string, fields);
}
options[0] = 0; # declare array
parse_options(OPTIONS, options);
if (length(options) == 0) {
parse_options(string, options);
}
}
function parse_fields(string, fields, default_string)
{
gsub(":","=",string);
default_string="token,type,count,ratio,format,case,mask,length,indexes";
if (!string) string = default_string;
parse_key_values(string, fields, default_string);
}
function parse_options(string, options, default_string)
{
gsub(":","=",string);
default_string="ascii=0,lower=0,upper=0,stopwords=1,lang=none,eol=1,asc=none,desc=none";
if (!string) string = default_string;
parse_key_values(string, options, default_string);
}
# Option formats: 'key' or 'key:value'
# If the format is 'key', name is 'key' and value is '1'
# If the format is 'key:value', name is 'key' and value is 'value'
function parse_key_values(string, keyvalues, default_string, items, i, key, value, splitter)
{
split(string, items, ",");
for (i in items)
{
gsub(/=.*$/, "", items[i]);
if (default_string !~ "\\<" items[i] "\\>") {
gsub("\\<" items[i] "\\>(=[^,]*)?", "", string);
}
}
gsub(",+", ",", string);
gsub("^,|,$", "", string);
split(string, items, ",");
for (i in items)
{
if (items[i] !~ "=" ) {
key = items[i];
value = 1;
} else {
splitter = index(items[i], "=");
key = substr(items[i], 0, splitter - 1);
value = substr(items[i], splitter + 1);
}
keyvalues[i,"key"] = key;
keyvalues[i,"value"] = value;
}
# save the array length
keyvalues[0] = length(items);
}
function get_sort_order( sort_order, o, olength, key)
{
olength = options[0];
for (o = 1; o <= olength; o++) {
key = options[o,"key"];
if (key == "asc") {
if (options[o,"value"] == "token") sort_order = "@ind_str_asc";
if (options[o,"value"] == "count") sort_order = "@val_num_asc";
} else if (key == "desc") {
if (options[o,"value"] == "token") sort_order = "@ind_str_desc";
if (options[o,"value"] == "count") sort_order = "@val_num_desc";
} else {
continue;
}
}
return sort_order;
}
function remove_stopwords( i)
{
for (i = 1; i <= NF; i++) {
if (tolower($i) ~ tolower(stopwords_regex)) $i = "";
}
}
function transform_line( o, olength, key)
{
olength = options[0];
for (o = 1; o <= olength; o++) {
key = options[o,"key"];
if (key == "ascii") {
if (options[o,"value"] == 1) $0 = toascii($0);
} else if (key == "lower") {
if (options[o,"value"] == 1) $0 = tolower($0);
} else if (key == "upper") {
if (options[o,"value"] == 1) $0 = toupper($0);
} else if (key == "stopwords") {
if (options[o,"value"] == 0) remove_stopwords();
} else {
continue;
}
}
}
function option_value(key, o, olength) {
olength = options[0];
for (o = 1; o <= olength; o++) {
if (options[o,"key"] == key) return options[o,"value"];
}
return 0;
}
BEGIN {
pwd = PWD;
parse_confs();
eol = option_value("eol");
lang = option_value("lang");
sort_order = get_sort_order();
stopwords_regex = get_stopwords_regex();
}
function endfile() {
output=WRITETO;
filedir=basedir(FILENAME)
filename=basename(FILENAME)
sub(/:filedir/, filedir, output);
sub(/:filename/, filename, output);
generate_records();
print_records();
idx = 0;
delete tokens;
delete types;
delete counters;
delete formats;
delete cases;
delete masks;
delete indexes;
delete records;
}
FNR == 1 && (NR > 1) {
endfile();
}
NF {
join_numbers();
transform_line();
separate_tokens();
for (i = 1; i <= NF; i++) {
insert_token($i);
}
if (eol) insert_token("<eol>");
}
END {
endfile();
}
#!/bin/sh
cat <<EOF
Content-Type: text/json; charset=utf-8
$(date -Iseconds)
EOF
#!/bin/sh
cat <<EOF
Content-Type: text/html; charset=utf-8
<!DOCTYPE html>
<html>
<head>
<title>Hello, World!</title>
</head>
<body>
<p>Hello, World!</p>
</body>
</html>
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment