427 lines
8.5 KiB
Awk
Executable file
427 lines
8.5 KiB
Awk
Executable file
#!/bin/awk -f
|
|
#
|
|
# by: Jesus Galan (yiyus) 2009
|
|
#
|
|
# Usage: md2html.awk file.md > file.html
|
|
# See: http://4l77.com/src/md2html.awk
|
|
|
|
function eschtml(t) {
|
|
gsub("&", "\\&", t);
|
|
gsub("<", "\\<", t);
|
|
return t;
|
|
}
|
|
|
|
function oprint(t){
|
|
if(nr == 0)
|
|
print t;
|
|
else
|
|
otext = otext "\n" t;
|
|
}
|
|
|
|
function subref(id){
|
|
for(; nr > 0 && sub("<<" id, ref[id], otext); nr--);
|
|
if(nr == 0 && otext) {
|
|
print otext;
|
|
otext = "";
|
|
}
|
|
}
|
|
|
|
function nextil(t) {
|
|
if(!match(t, /[`<&\[*_\\-]|(\!\[)/))
|
|
return t;
|
|
t1 = substr(t, 1, RSTART - 1);
|
|
tag = substr(t, RSTART, RLENGTH);
|
|
t2 = substr(t, RSTART + RLENGTH);
|
|
if(ilcode && tag != "`")
|
|
return eschtml(t1 tag) nextil(t2);
|
|
# Backslash escaping
|
|
if(tag == "\\"){
|
|
if(match(t2, /^[\\`*_{}\[\]()#+\-\.!]/)){
|
|
tag = substr(t2, 1, 1);
|
|
t2 = substr(t2, 2);
|
|
}
|
|
return t1 tag nextil(t2);
|
|
}
|
|
# Dashes
|
|
if(tag == "-"){
|
|
if(sub(/^-/, "", t2))
|
|
tag = "—";
|
|
return t1 tag nextil(t2);
|
|
}
|
|
# Inline Code
|
|
if(tag == "`"){
|
|
if(sub(/^`/, "", t2)){
|
|
if(!match(t2, /``/))
|
|
return t1 "”" nextil(t2);
|
|
ilcode2 = !ilcode2;
|
|
}
|
|
else if(ilcode2)
|
|
return t1 tag nextil(t2);
|
|
tag = "<code>";
|
|
if(ilcode){
|
|
t1 = eschtml(t1);
|
|
tag = "</code>";
|
|
}
|
|
ilcode = !ilcode;
|
|
return t1 tag nextil(t2);
|
|
}
|
|
if(tag == "<"){
|
|
# Autolinks
|
|
if(match(t2, /^[^ ]+[\.@][^ ]+>/)){
|
|
url = eschtml(substr(t2, 1, RLENGTH - 1));
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
linktext = url;
|
|
if(match(url, /@/) && !match(url, /^mailto:/))
|
|
url = "mailto:" url;
|
|
return t1 "<a href=\"" url "\">" linktext "</a>" nextil(t2);
|
|
}
|
|
# Html tags
|
|
if(match(t2, /^[A-Za-z\/!][^>]*>/)){
|
|
tag = tag substr(t2, RSTART, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
return t1 tag nextil(t2);
|
|
}
|
|
return t1 "<" nextil(t2);
|
|
}
|
|
# Html special entities
|
|
if(tag == "&"){
|
|
if(match(t2, /^#?[A-Za-z0-9]+;/)){
|
|
tag = tag substr(t2, RSTART, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 1);
|
|
return t1 tag nextil(t2);
|
|
}
|
|
return t1 "&" nextil(t2);
|
|
}
|
|
# Images
|
|
if(tag == "!["){
|
|
if(!match(t2, /(\[.*\])|(\(.*\))/))
|
|
return t1 tag nextil(t2);
|
|
match(t2, /^[^\]]*/);
|
|
alt = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(match(t2, /^\(/)){
|
|
# Inline
|
|
sub(/^\(/, "", t2);
|
|
match(t2, /^[^\)]+/);
|
|
url = eschtml(substr(t2, 1, RLENGTH));
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
title = "";
|
|
if(match(url, /[ ]+\".*\"[ ]*$/)) {
|
|
title = substr(url, RSTART, RLENGTH);
|
|
url = substr(url, 1, RSTART - 1);
|
|
match(title, /\".*\"/);
|
|
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
|
|
}
|
|
if(match(url, /^<.*>$/))
|
|
url = substr(url, 2, RLENGTH - 2);
|
|
return t1 "<img src=\"" url "\" alt=\"" alt "\"" title " />" nextil(t2);
|
|
}
|
|
else{
|
|
# Referenced
|
|
sub(/^ ?\[/, "", t2);
|
|
id = alt;
|
|
if(match(t2, /^[^\]]+/))
|
|
id = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(ref[id])
|
|
r = ref[id];
|
|
else{
|
|
r = "<<" id;
|
|
nr++;
|
|
}
|
|
return t1 "<img src=\"" r "\" alt=\"" alt "\" />" nextil(t2);
|
|
}
|
|
}
|
|
# Links
|
|
if(tag == "["){
|
|
if(!match(t2, /(\[.*\])|(\(.*\))/))
|
|
return t1 tag nextil(t2);
|
|
match(t2, /^[^\]]*(\[[^\]]*\][^\]]*)*/);
|
|
linktext = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(match(t2, /^\(/)){
|
|
# Inline
|
|
match(t2, /^[^\)]+(\([^\)]+\)[^\)]*)*/);
|
|
url = substr(t2, 2, RLENGTH - 1);
|
|
pt2 = substr(t2, RLENGTH + 2);
|
|
title = "";
|
|
if(match(url, /[ ]+\".*\"[ ]*$/)) {
|
|
title = substr(url, RSTART, RLENGTH);
|
|
url = substr(url, 1, RSTART - 1);
|
|
match(title, /\".*\"/);
|
|
title = " title=\"" substr(title, RSTART + 1, RLENGTH - 2) "\"";
|
|
}
|
|
if(match(url, /^<.*>$/))
|
|
url = substr(url, 2, RLENGTH - 2);
|
|
url = eschtml(url);
|
|
return t1 "<a href=\"" url "\"" title ">" nextil(linktext) "</a>" nextil(pt2);
|
|
}
|
|
else{
|
|
# Referenced
|
|
sub(/^ ?\[/, "", t2);
|
|
id = linktext;
|
|
if(match(t2, /^[^\]]+/))
|
|
id = substr(t2, 1, RLENGTH);
|
|
t2 = substr(t2, RLENGTH + 2);
|
|
if(ref[id])
|
|
r = ref[id];
|
|
else{
|
|
r = "<<" id;
|
|
nr++;
|
|
}
|
|
pt2 = t2;
|
|
return t1 "<a href=\"" r "\" />" nextil(linktext) "</a>" nextil(pt2);
|
|
}
|
|
}
|
|
# Emphasis
|
|
if(match(tag, /[*_]/)){
|
|
ntag = tag;
|
|
if(sub("^" tag, "", t2)){
|
|
if(stag[ns] == tag && match(t2, "^" tag))
|
|
t2 = tag t2;
|
|
else
|
|
ntag = tag tag
|
|
}
|
|
n = length(ntag);
|
|
tag = (n == 2) ? "strong" : "em";
|
|
if(match(t1, / $/) && match(t2, /^ /))
|
|
return t1 tag nextil(t2);
|
|
if(stag[ns] == ntag){
|
|
tag = "/" tag;
|
|
ns--;
|
|
}
|
|
else
|
|
stag[++ns] = ntag;
|
|
tag = "<" tag ">";
|
|
return t1 tag nextil(t2);
|
|
}
|
|
}
|
|
|
|
function inline(t) {
|
|
ilcode = 0;
|
|
ilcode2 = 0;
|
|
ns = 0;
|
|
|
|
return nextil(t);
|
|
}
|
|
|
|
function printp(tag) {
|
|
if(!match(text, /^[ ]*$/)){
|
|
text = inline(text);
|
|
if(tag != "")
|
|
oprint("<" tag ">" text "</" tag ">");
|
|
else
|
|
oprint(text);
|
|
}
|
|
text = "";
|
|
}
|
|
|
|
BEGIN {
|
|
blank = 0;
|
|
code = 0;
|
|
hr = 0;
|
|
html = 0;
|
|
nl = 0;
|
|
nr = 0;
|
|
otext = "";
|
|
text = "";
|
|
par = "p";
|
|
}
|
|
|
|
# References
|
|
!code && /^ *\[[^\]]*\]:[ ]+/ {
|
|
sub(/^ *\[/, "");
|
|
match($0, /\]/);
|
|
id = substr($0, 1, RSTART - 1);
|
|
sub(id "\\]:[ ]+", "");
|
|
title = "";
|
|
if(match($0, /\".*\"$/))
|
|
title = "\" title=\"" substr($0, RSTART + 1, RLENGTH - 2);
|
|
sub(/[ ]+\".*\"$/, "");
|
|
url = eschtml($0);
|
|
ref[id] = url title;
|
|
|
|
subref(id);
|
|
next;
|
|
}
|
|
|
|
# html
|
|
!html && /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
|
|
isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/ {
|
|
if(code)
|
|
oprint("</pre></code>");
|
|
for(; !text && block[nl] == "blockquote"; nl--)
|
|
oprint("</blockquote>");
|
|
match($0, /^<(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
|
|
isindex|menu|noframes|noscript|ol|p|pre|table|ul|!--)/);
|
|
htag = substr($0, 2, RLENGTH - 1);
|
|
if(!match($0, "(<\\/" htag ">)|((^<hr ?\\/?)|(--)>$)"))
|
|
html = 1;
|
|
if(html && match($0, /^<hr/))
|
|
hr = 1;
|
|
oprint($0);
|
|
next;
|
|
}
|
|
|
|
html && (/(^<\/(address|blockquote|center|dir|div|dl|fieldset|form|h[1-6r]|\
|
|
isindex|menu|noframes|noscript|ol|p|pre|table|ul).*)|(--)>$/ ||
|
|
(hr && />$/)) {
|
|
html = 0;
|
|
hr = 0;
|
|
oprint($0);
|
|
next;
|
|
}
|
|
|
|
html {
|
|
oprint($0);
|
|
next;
|
|
}
|
|
|
|
# List and quote blocks
|
|
|
|
# Remove indentation
|
|
{
|
|
for(nnl = 0; nnl < nl; nnl++)
|
|
if((match(block[nnl + 1], /[ou]l/) && !sub(/^( | )/, "")) || \
|
|
(block[nnl + 1] == "blockquote" && !sub(/^> ?/, "")))
|
|
break;
|
|
}
|
|
nnl < nl && !blank && text && ! /^ ? ? ?([*+-]|([0-9]+\.)+)( +| )/ { nnl = nl; }
|
|
# Quote blocks
|
|
{
|
|
while(sub(/^> /, ""))
|
|
nblock[++nnl] = "blockquote";
|
|
}
|
|
# Horizontal rules
|
|
{ hr = 0; }
|
|
(blank || (!text && !code)) && /^ ? ? ?([-*_][ ]*)([-*_][ ]*)([-*_][ ]*)+$/ {
|
|
if(code){
|
|
oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
blank = 0;
|
|
nnl = 0;
|
|
hr = 1;
|
|
}
|
|
# List items
|
|
block[nl] ~ /[ou]l/ && /^$/ {
|
|
blank = 1;
|
|
next;
|
|
}
|
|
{ newli = 0; }
|
|
!hr && (nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?[*+-]( +| )/ {
|
|
sub(/^ ? ? ?[*+-]( +| )/, "");
|
|
nnl++;
|
|
nblock[nnl] = "ul";
|
|
newli = 1;
|
|
}
|
|
(nnl != nl || !text || block[nl] ~ /[ou]l/) && /^ ? ? ?([0-9]+\.)+( +| )/ {
|
|
sub(/^ ? ? ?([0-9]+\.)+( +| )/, "");
|
|
nnl++;
|
|
nblock[nnl] = "ol";
|
|
newli = 1;
|
|
}
|
|
newli {
|
|
if(blank && nnl == nl && !par)
|
|
par = "p";
|
|
blank = 0;
|
|
printp(par);
|
|
if(nnl == nl && block[nl] == nblock[nl])
|
|
oprint("</li><li>");
|
|
}
|
|
blank && ! /^$/ {
|
|
if(match(block[nnl], /[ou]l/) && !par)
|
|
par = "p";
|
|
printp(par);
|
|
par = "p";
|
|
blank = 0;
|
|
}
|
|
|
|
# Close old blocks and open new ones
|
|
nnl != nl || nblock[nl] != block[nl] {
|
|
if(code){
|
|
oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
printp(par);
|
|
b = (nnl > nl) ? nblock[nnl] : block[nnl];
|
|
par = (match(b, /[ou]l/)) ? "" : "p";
|
|
}
|
|
nnl < nl || (nnl == nl && nblock[nl] != block[nl]) {
|
|
for(; nl > nnl || (nnl == nl && pblock[nl] != block[nl]); nl--){
|
|
if(match(block[nl], /[ou]l/))
|
|
oprint("</li>");
|
|
oprint("</" block[nl] ">");
|
|
}
|
|
}
|
|
nnl > nl {
|
|
for(; nl < nnl; nl++){
|
|
block[nl + 1] = nblock[nl + 1];
|
|
oprint("<" block[nl + 1] ">");
|
|
if(match(block[nl + 1], /[ou]l/))
|
|
oprint("<li>");
|
|
}
|
|
}
|
|
hr {
|
|
oprint("<hr>");
|
|
next;
|
|
}
|
|
|
|
# Code blocks
|
|
code && /^$/ {
|
|
if(blanK)
|
|
oprint("");
|
|
blank = 1;
|
|
next;
|
|
}
|
|
!text && sub(/^( | )/, "") {
|
|
if(blanK)
|
|
oprint("");
|
|
blank = 0;
|
|
if(!code)
|
|
oprint("<code><pre>");
|
|
code = 1;
|
|
$0 = eschtml($0);
|
|
oprint($0);
|
|
next;
|
|
}
|
|
code {
|
|
oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
|
|
# Setex-style Headers
|
|
text && /^=+$/ {printp("h1"); next;}
|
|
text && /^-+$/ {printp("h2"); next;}
|
|
|
|
# Atx-Style headers
|
|
/^#+/ && (!newli || par=="p" || /^##/) {
|
|
for(n = 0; n < 6 && sub(/^# */, ""); n++)
|
|
sub(/#$/, "");
|
|
par = "h" n;
|
|
}
|
|
|
|
# Paragraph
|
|
/^$/ {
|
|
printp(par);
|
|
par = "p";
|
|
next;
|
|
}
|
|
|
|
# Add text
|
|
{ text = (text ? text " " : "") $0; }
|
|
|
|
END {
|
|
if(code){
|
|
oprint("</pre></code>");
|
|
code = 0;
|
|
}
|
|
printp(par);
|
|
for(; nl > 0; nl--){
|
|
if(match(block[nl], /[ou]l/))
|
|
oprint("</li>");
|
|
oprint("</" block[nl] ">");
|
|
}
|
|
gsub(/<<[^\"]*/, "", otext);
|
|
print(otext);
|
|
}
|