#!/bin/sh
# See https://spec.commonmark.org for reference
md2html() {
awk '
function newblock(nb) {
if (text) print "<" block ">" text "" block ">"
text = ""
block = nb ? nb : "p"
}
function subinline(tgl, inl) {
while (match($0, tgl)) {
if (inline[ni] == inl)
ni -= sub(tgl, "" inl ">")
else if (sub(tgl, "<" inl ">"))
inline[++ni] = inl
}
}
function dolink(href, lnk) {
# Undo escaped html in uris
gsub(/&/, "\\&", href)
gsub(/</, "<", href)
gsub(/>/, ">", href)
# & can be tricky, and not standard:
gsub(/&/, "\\\\\\&", href)
gsub(/&/, "\\\\\\&", lnk)
return "" lnk ""
}
BEGIN { ni = 0; nl = 0; text = ""; block = "p" }
# Escape html
esc != "false" { gsub("&", "\\&"); gsub("<", "\\<"); gsub(">", "\\>") }
# Horizontal rules (_ is not in markdown)
/^[ ]*([-*_] ?)+[ ]*$/ && text == "" { print "
"; next }
# Tables (not in markdown)
# Syntax:
# Right Align| Center Align |Left Align
/([ ]\|)|(\|[ ])/ {
if (block != "table") newblock("table")
nc = split($0, cells, "|")
$0 = "\n"
for (i = 1; i <= nc; i++) {
align = "left"
if (sub(/^[ ]+/, "", cells[i])) {
if (sub(/[ ]+$/, "", cells[i]))
align = "center"
else
align = "right"
}
sub(/[ ]+$/, "", cells[i])
$0 = $0 "| " cells[i] " | \n"
}
$0 = $0 "
"
}
# Ordered and unordered (possibly nested) lists
/^[ ]*([*+-]|(([0-9]+[\.-]?)+))[ ]/ {
newblock("li")
nnl = 1
while (match($0, /^[ ]/)) { sub(/^[ ]/,""); nnl++ }
while (nl > nnl) print "" list[nl--] ">"
while (nl < nnl) {
list[++nl] = "ol"
if (match($0, /^[*+-]/)) list[nl] = "ul"
print "<" list[nl] ">"
}
sub(/^([*+-]|(([0-9]+[\.-]?)+))[ ]/,"")
}
# Multi line list items
block == "li" { sub(/^( *)|( *)/,"") }
# Code blocks
/^( | )/ {
if (block != "code") newblock("code")
sub(/^( | )/, "")
text = text $0 "\n"
next
}
# Paragraph
/^$/ { newblock(); while(nl > 0) print "" list[nl--] ">" }
# Setex-style Headers (plus h3 with underscores)
/^=+$/ { block = "h" 1; next }
/^-+$/ { block = "h" 2; next }
/^_+$/ { block = "h" 3; next }
# Atx-style headers
/^#/ {
newblock()
match($0, /#+/)
n = RLENGTH
if (n > 6) n = 6
sub(/# */, "#")
text = substr($0, RLENGTH + 1)
block = "h" n
next
}
{
# Images
while (match($0, /!\[[^\]]+\]\([^\)]+\)/)) {
split(substr($0, RSTART, RLENGTH), a, /(!\[)|\)|(\]\()/)
sub(/!\[[^\]]+\]\([^\)]+\)/, "
")
}
# Links
while (match($0, /\[[^\]]+\]\([^\)]+\)/)) {
split(substr($0, RSTART, RLENGTH), a, /[\[\)]|(\]\()/)
sub(/\[[^\]]+\]\([^\)]+\)/, dolink(a[3], a[2]))
}
# Auto links (uri matching is poor)
na = split($0, a, /(^\()|[ ]|([,\.\)]([ ]|$))/)
for (i = 1; i <= na; i++)
if (match(a[i], /^(((https?|ftp|file|news|irc):\/\/)|(mailto:)).+$/))
sub(a[i], dolink(a[i], a[i]))
# Inline
subinline("(\\*\\*)|(__)", "strong")
subinline("\\*", "em")
subinline("`", "code")
text = text (text ? " " : "") $0
}
END {
while(ni > 0) text = text "" inline[ni--] ">"
newblock()
while(nl > 0) print "" list[nl--] ">"
}' "$1"
}
md2html "$1"