Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- -*- coding: raw-text-unix -*- -- escripts.lua - convert ascii files (e-scripts) to html. -- This is part of blogme3. -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- Version: 2013jan25 -- <http://anggtwu.net/blogme3/escripts.lua> -- <http://anggtwu.net/blogme3/escripts.lua.html> -- License: GPL. -- -- BUG: UTF-8 characters are not converted correctly (yet). -- -- -- Here's a very brief explanation of what this file does: -- When we run this, -- -- lua51 blogme3.lua -o foo.html -a2html foo -- -- blogme3.lua processes the options according to the entries in the -- table "_O"; "-o foo.html" sets the output file, and "-a2html foo" -- says to read the file "foo", process it with the default htmlizer, -- add an HTML header and a footer, and write the result to the output -- file. The default htmlizer is the function "htmlizelines", defined -- below. It applies "htmlizeline" on each line, and "htmlizeline" -- handles these cases: -- -- * Glyphs, non-ascii characters, and characters that need to be -- sgml-quoted. Examples: "&<>*«»áº×". -- See: (find-blogme3 "charset.lua") -- -- * Sexp hyperlinks. See: -- (find-eevarticlesection "hyperlinks") -- (find-eevarticlesection "shorter-hyperlinks") -- (find-eevarticlesection "e-scripts") -- (find-blogme3 "elisp.lua") -- (find-blogme3 "angglisp.lua") -- -- * Anchors, and "to" links pointing to anchors. An example: -- «here» (to "there") -- «there» (to "here") -- See: (find-eevarticlesection "anchors") -- -- * Urls, and a few special strings: |&, $S/, <<'%%%'. -- (find-blogme3 "options.lua" "basic-options") -- (find-blogme3 "options.lua" "htmlizefile") -- (find-blogme3 "escripts.lua" "htmlizelines") -- (find-blogme3grep "grep -nH -e htmlizer *") -- (find-blogme3grep "grep -nH -e htmlizefile *") -- (find-es "lua5" "sheadsymbol-roberto") -- (find-angg "LUA/lua50init.lua") -- (find-angg "LUA/lua50init.lua" "loadlpeg") -- (find-lpegw3m "doc.html" "function anywhere (p)") -- (find-blogmefile "blogme2-outer.lua" "entities and quoting (Q)") -- (find-anggfile "TH/Generate") -- (find-anggfile "TH/Generate" "txt2html") -- (ascstr 33 126) -- !"#$%&'()*+,-./ :;<=>?@ [\]^_` {|}~ -- 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz -- «.anchor» (to "anchor") -- «.assexplink» (to "assexplink") -- «.htmlizeline» (to "htmlizeline") -- «.htmlizelines» (to "htmlizelines") DBG = DBG or function () end -- (find-blogme3file "anggdefs.lua" "Q_table =") Q_re = "([&<>])" Q_table = { ["&"]="&", ["<"]="<", [">"]=">" } Q = function (text) return translatechars(text, Q_re, Q_table) end --------[ Basics ]-------- loadlpeg() Eos = lpeg.P(-1) Eol = #(lpeg.S "\n") + Eos Pos = lpeg.Cp() AlphaNumeric = lpeg.R("AZ", "az", "09") -- OptU8 = lpeg.P("\195")^-1 --------[ Anchor ]-------- -- «anchor» (to ".anchor") AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") Anchor = "\171" * lpeg.C(AnchorChar^1) * "\187" -- Anchor = OptU8*"\171" * lpeg.C(AnchorChar^1) * OptU8*"\187" --------[ Url ]-------- UrlProtocol = lpeg.P("https") + lpeg.P("http") + lpeg.P("ftp") UrlDomainChar = lpeg.R("az", "09") + lpeg.S("-") UrlDomain = UrlDomainChar^1 * ("." * UrlDomainChar^1)^0 UrlPathChar = AlphaNumeric + lpeg.S("!#$%&()*+,-./:;=?@[]^_{|}~") UrlPath = UrlPathChar^0 Url = UrlProtocol * "://" * UrlDomain * "/" * UrlPath --------[ SString, SNumber, SSymbol ]-------- SStringChar = 1 - lpeg.S "\"\n" SStringBsl = "\\" * (1 - lpeg.S "\n") SString = "\"" * SStringChar^0 * "\"" SNumber = lpeg.P"-"^-1 * lpeg.R"09"^1 SNonSymbolChar = lpeg.S "\"#'(),.[\\]`" SSymbolChar = lpeg.R "!~" - SNonSymbolChar - lpeg.S "{}" SSymbol = SSymbolChar^1 --------[ SHeadSymbol, SSexpLink ]-------- -- SHeadSymbol is like SSymbol but more strict. -- Its logic is: if SSymbol matches at pos, then ckeck if the symbol -- (as a string) has an entry in the table _E; if yes, then return its -- ending position. lpeg.P is used to convert that function - that has -- signature subj,pos|->endposornil - into a pattern. -- -- The table _E contains the "code to htmlize elisp hyperlinks"... -- Its entries - one for each htmlizable elisp hyperlink function - -- are functions with signatures like -- "all,funname,qarg1,qarg2|->html", like his: -- -- _E["to"] = function (all, funname, qarg1, qarg2) -- return href("#"..dequote(qarg1), Q(all)) -- end -- -- SexpLink matches sexps that are lists formed by a SHeadSymbol -- followed by zero or more atoms. This covers most of the kinds of -- hyperlink sexps that I want like to htmlize, but not all. For -- example, this is not recognized: -- (find-iconbookpage (+ 22 143)) _E = _E or {} SHeadSymbol = lpeg.P(function (subj, pos) local e = lpeg.match(SSymbol, subj, pos) local symbol = e and string.sub(subj, pos, e - 1) return symbol and _E[symbol] and e end) SAtom = SString + SNumber + SSymbol SSpace = lpeg.S " \t" SSpaces = SSpace^1 SexpLink = "(" * lpeg.C(SHeadSymbol) * (SSpaces * lpeg.C(SAtom))^0 * ")" --------[ Translators ]-------- dequote = function (sstr) return sstr and string.sub(sstr, 2, -2) end href = function (target, text) if target then return format("<a href=\"%s\">%s</a>", target, text) end return text end -- (find-blogme3 "anggdefs.lua" "asurl_hack") -- (find-THgrep "grep -nH -e gsub *.blogme | grep angg") asurl_hack = asurl_hack or id asurl = function (url) return format("<a href=\"%s\">%s</a>", asurl_hack(url), url) end asanchor = function (anchor) return format("<a name=\"%s\"><font color=\"green\"><i>«</i>" .. "%s<i>»</i></font></a>", anchor, anchor) end -- «assexplink» (to ".assexplink") -- Used by the lpeg pattern `SexpLink' above. -- This is very old - from 2007, I think. -- The table _E is defined here: -- (find-blogme3 "angglisp.lua") -- assexplink = function (all, funname, qarg1, qarg2) if _E[funname] then return _E[funname](all, funname, qarg1, qarg2) else return Q(all) end end UrlT = lpeg.C(Url) / asurl SexpLinkT = lpeg.C(SexpLink) / assexplink AnchorT = Anchor / asanchor --------[ Some extra translators ]-------- PipeAmpT = lpeg.P("|&") / '<a href="http://anggtwu.net/e/bash.e.html#pipe_stdout_stderr">|&</a>' -- PipeSnarfDirT = lpeg.P("$S/") / -- '<a href="http://anggtwu.net/eev-article.html#local-copies">$S/</a>' PipeSnarfDirT = lpeg.P("$S/") / '<a href="http://anggtwu.net/eev-intros/find-psne-intro.html">$S/</a>' HereDocT = lpeg.P("<<'%%%'") / "<a href=\"http://en.wikipedia.org/wiki/Here-document\"><<'%%%%%%'</a>" --------[ Parsing and translating lines ]-------- SpecialT = UrlT + AnchorT + SexpLinkT * Eol SpecialT = UrlT + AnchorT + PipeAmpT + PipeSnarfDirT + HereDocT + SexpLinkT * Eol EtcChar = 1 - lpeg.S "\n" EtcSpecial = Pos * lpeg.P { [1] = Pos * SpecialT + EtcChar * lpeg.V(1) } EtcSpecials = EtcSpecial^0 EtcSpecialsEtc = EtcSpecials * Pos * EtcChar^0 * Pos -- «htmlizeline» (to ".htmlizeline") htmlizeline = function (str) DBG("e208"); return lpeg.match(lpeg.Ct(EtcSpecialsEtc) / sbeconcat(str, Q), str) end htmlizelines = function (bigstr) DBG("e210"); return table.concat(map(htmlizeline, splitlines(bigstr)), "\n") end -- «htmlizelines» (to ".htmlizelines") -- 2008jan03: new definition: htmlizelines = function (bigstr) DBG("e216"); return (bigstr:gsub("[^\n]+", htmlizeline)) end --------[ Functions for tests: M and M2 ]-------- esubj = [[(find-angg ".emacs" "foo")]] esubj2 = [[(find-ongg ".emacs" "foo")]] M = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj)); print() end M2 = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj2)); print() end --[[ -- Tests * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "escripts.lua" str = "\195\171Hello\195\187" str = "\171Hello\187" = AnchorT:match(str) --]]