#!/usr/bin/awk -f # md2html.awk # by: Jesus Galan (yiyus) , May 2009 # Usage: # md2html file.md > file.html # Options: -v esc=false to not escape html function newblock(nblock){ if(text) print "<" block ">" text ""; text = ""; block = nblock ? nblock : "p"; } function subinline(tgl, inl){ while(match($0, tgl)){ if (inline[ni] == inl) ni -= sub(tgl, ""); else if (sub(tgl, "<" inl ">")) inline[++ni] = inl; } } function dolink(href, lnk){ # Undo escaped html in uris gsub(/&/, "\\&", href); gsub(/</, "<", href); gsub(/>/, ">", href); # & can be tricky, and not standard: gsub(/&/, "\\\\\\&", href); gsub(/&/, "\\\\\\&", lnk); return "" lnk ""; } BEGIN { ni = 0; # inlines nl = 0; # nested lists text = ""; block = "p"; } # Escape html esc != "false" { gsub("&", "\\&") gsub("<", "\\<") gsub(">", "\\>") } # Horizontal rules (_ is not in markdown) /^[ ]*([-*_] ?)+[ ]*$/ && text == "" { print "
"; next; } # Tables (not in markdown) # Syntax: # Right Align| Center Align |Left Align /([ ]\|)|(\|[ ])/ { if(block != "table") newblock("table"); nc = split($0, cells, "|"); $0 = "\n"; for(i = 1; i <= nc; i++){ align = "left"; if(sub(/^[ ]+/, "", cells[i])){ if(sub(/[ ]+$/, "", cells[i])) align = "center"; else align = "right"; } sub(/[ ]+$/,"", cells[i]); $0 = $0 "" cells[i] "\n"; } $0 = $0 ""; } # Ordered and unordered (possibly nested) lists /^[ ]*([*+-]|(([0-9]+[\.-]?)+))[ ]/ { newblock("li"); nnl = 1; while(match($0, /^[ ]/)){ sub(/^[ ]/,""); nnl++; } while(nl > nnl) print ""; while(nl < nnl){ list[++nl] = "ol"; if(match($0, /^[*+-]/)) list[nl] = "ul"; print "<" list[nl] ">"; } sub(/^([*+-]|(([0-9]+[\.-]?)+))[ ]/,""); } # Multi line list items block == "li" { sub(/^( *)|( *)/,""); } # Code blocks /^( | )/ { if(block != "code") newblock("code"); sub(/^( | )/, ""); text = text $0 "\n"; next; } # Paragraph /^$/ { newblock(); while(nl > 0) print ""; } # Setex-style Headers # (Plus h3 with underscores.) /^=+$/ { block = "h" 1; next; } /^-+$/ { block = "h" 2; next; } /^_+$/ { block = "h" 3; next; } # Atx-style headers /^#/ { newblock(); match($0, /#+/); n = RLENGTH; if(n > 6) n = 6; text = substr($0, RLENGTH + 1); block = "h" n; next; } // { # Images while(match($0, /!\[[^\]]+\]\([^\)]+\)/)){ split(substr($0, RSTART, RLENGTH), a, /(!\[)|\)|(\]\()/); sub(/!\[[^\]]+\]\([^\)]+\)/, "\"""); } # Links while(match($0, /\[[^\]]+\]\([^\)]+\)/)){ split(substr($0, RSTART, RLENGTH), a, /[\[\)]|(\]\()/); sub(/\[[^\]]+\]\([^\)]+\)/, dolink(a[3], a[2])); } # Auto links (uri matching is poor) na = split($0, a, /(^\()|[ ]|([,\.\)]([ ]|$))/); for(i = 1; i <= na; i++) if(match(a[i], /^(((https?|ftp|file|news|irc):\/\/)|(mailto:)).+$/)) sub(a[i], dolink(a[i], a[i])); # Inline subinline("(\\*\\*)|(__)", "strong"); subinline("\\*", "em"); subinline("`", "code"); text = text (text ? " " : "") $0; } END { while(ni > 0) text = text ""; newblock(); while(nl > 0) print ""; }