#!/usr/bin/awk -f # md2html.awk # Usage: # md2html file.md > file.html # Options: -v esc=false to not escape html function newblock(nblock) { if (text) print "<" block ">" text "" text = "" block = nblock ? nblock : "p" } function subinline(tgl, inl) { while (match($0, tgl)){ if (inline[ni] == inl) ni -= sub(tgl, "") else if (sub(tgl, "<" inl ">")) inline[++ni] = inl } } function dolink(href, lnk) { # Undo escaped html in uris gsub(/&/, "\\&", href) gsub(/</, "<", href) gsub(/>/, ">", href) # & can be tricky, and not standard: gsub(/&/, "\\\\\\&", href) gsub(/&/, "\\\\\\&", lnk) return "" lnk "" } BEGIN { ni = 0; # inlines nl = 0; # nested lists text = "" block = "p" } # Escape html esc != "false" { gsub("&", "\\&") gsub("<", "\\<") gsub(">", "\\>") } # Horizontal rules (_ is not in markdown) /^[ ]*([-*_] ?)+[ ]*$/ && text == "" { print "
" next } # Tables (not in markdown) # Syntax: # Right Align| Center Align |Left Align /([ ]\|)|(\|[ ])/ { if (block != "table") newblock("table") nc = split($0, cells, "|") $0 = "\n" for (i = 1; i <= nc; i++){ align = "left" if (sub(/^[ ]+/, "", cells[i])){ if (sub(/[ ]+$/, "", cells[i])) align = "center" else align = "right" } sub(/[ ]+$/,"", cells[i]) $0 = $0 "" cells[i] "\n" } $0 = $0 "" } # Ordered and unordered (possibly nested) lists /^[ ]*([*+-]|(([0-9]+[\.-]?)+))[ ]/ { newblock("li") nnl = 1 while (match($0, /^[ ]/)){ sub(/^[ ]/,"") nnl++ } while (nl > nnl) print "" while (nl < nnl){ list[++nl] = "ol" if (match($0, /^[*+-]/)) list[nl] = "ul" print "<" list[nl] ">" } sub(/^([*+-]|(([0-9]+[\.-]?)+))[ ]/,"") } # Multi line list items block == "li" { sub(/^( *)|( *)/,"") } # Code blocks /^( | )/ { if (block != "code") newblock("code") sub(/^( | )/, "") text = text $0 "\n" next } # Paragraphs /^$/ { newblock() while (nl > 0) print "" } # Headers /^#/ { newblock() match($0, /#+/) n = RLENGTH if (n > 6) n = 6 text = substr($0, RLENGTH + 1) block = "h" n next } # Alternate headers (underlined) /^=+$/ { block = "h" 1 next } /^-+$/ { block = "h" 2 next } { # Images while (match($0, /!\[[^\]]+\]\([^\)]+\)/)){ split(substr($0, RSTART, RLENGTH), a, /(!\[)|\)|(\]\()/) sub(/!\[[^\]]+\]\([^\)]+\)/, "\""") } # Links while (match($0, /\[[^\]]+\]\([^\)]+\)/)){ split(substr($0, RSTART, RLENGTH), a, /[\[\)]|(\]\()/) sub(/\[[^\]]+\]\([^\)]+\)/, dolink(a[3], a[2])) } # Auto links (uri matching is poor) na = split($0, a, /(^\()|[ ]|([,\.\)]([ ]|$))/) for (i = 1; i <= na; i++) if (match(a[i], /^(((https?|ftp|file|news|irc):\/\/)|(mailto:)).+$/)) sub(a[i], dolink(a[i], a[i])) # Inline subinline("(\\*\\*)|(__)", "strong") subinline("\\*", "em") subinline("`", "code") text = text (text ? " " : "") $0 } END { while (ni > 0) text = text "" newblock() while (nl > 0) print "" }