-- -*- coding: raw-text-unix -*- -- escripts.lua - convert ascii files (e-scripts) to html. -- This is part of blogme3. -- Author: Eduardo Ochs -- Version: 2007apr16 -- -- -- License: GPL. -- -- Here's a very brief explanation of what this file does: -- When we run this, -- -- lua51 blogme3.lua -o foo.html -a2html foo -- -- blogme3.lua processes the options according to the entries in the -- table "_O"; "-o foo.html" sets the output file, and "-a2html foo" -- says to read the file "foo", process it with the default htmlizer, -- add an HTML header and a footer, and write the result to the output -- file. The default htmlizer is the function "htmlizelines", defined -- below. It applies "htmlizeline" on each line, and "htmlizeline" -- handles these cases: -- -- * Glyphs, non-ascii characters, and characters that need to be -- sgml-quoted. Examples: "&<>«»αΊΧ". -- See: (find-blogme3 "charset.lua") -- -- * Sexp hyperlinks. See: -- (find-eevarticlesection "hyperlinks") -- (find-eevarticlesection "shorter-hyperlinks") -- (find-eevarticlesection "e-scripts") -- (find-blogme3 "elisp.lua") -- (find-blogme3 "angglisp.lua") -- -- * Anchors, and "to" links pointing to anchors. An example: -- «here» (to "there") -- «there» (to "here") -- See: (find-eevarticlesection "anchors") -- -- * Urls, and a few special strings: |&, $S/, <<'%%%'. -- (find-blogme3 "options.lua" "basic-options") -- (find-blogme3 "options.lua" "htmlizefile") -- (find-blogme3 "escripts.lua" "htmlizelines") -- (find-blogme3grep "grep -nH -e htmlizer *") -- (find-blogme3grep "grep -nH -e htmlizefile *") -- (find-es "lua5" "sheadsymbol-roberto") -- (find-angg "LUA/lua50init.lua") -- (find-angg "LUA/lua50init.lua" "loadlpeg") -- (find-lpegw3m "doc.html" "function anywhere (p)") -- (find-blogmefile "blogme2-outer.lua" "entities and quoting (Q)") -- (find-anggfile "TH/Generate") -- (find-anggfile "TH/Generate" "txt2html") -- (ascstr 33 126) -- !"#$%&'()*+,-./ :;<=>?@ [\]^_` {|}~ -- 0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz -- (find-blogme3file "defs.lua" "Q_table =") Q_re = "([&<>])" Q_table = { ["&"]="&", ["<"]="<", [">"]=">" } Q = function (text) return translatechars(text, Q_re, Q_table) end --------[ Basics ]-------- loadlpeg() Eos = lpeg.P(-1) Eol = #(lpeg.S "\n") + Eos Pos = lpeg.Cp() AlphaNumeric = lpeg.R("AZ", "az", "09") --------[ Anchor ]-------- AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~") Anchor = "\171" * lpeg.C(AnchorChar^1) * "\187" --------[ Url ]-------- UrlProtocol = lpeg.P("http") + lpeg.P("ftp") + lpeg.P("shttp") UrlDomainChar = lpeg.R("az", "09") + lpeg.S("-") UrlDomain = UrlDomainChar^1 * ("." * UrlDomainChar^1)^0 UrlPathChar = AlphaNumeric + lpeg.S("!#$%&()*+,-./:;=?@[]^_{|}~") UrlPath = UrlPathChar^0 Url = UrlProtocol * "://" * UrlDomain * "/" * UrlPath --------[ SString, SNumber, SSymbol ]-------- SStringChar = 1 - lpeg.S "\"\n" SStringBsl = "\\" * (1 - lpeg.S "\n") SString = "\"" * SStringChar^0 * "\"" SNumber = lpeg.P"-"^-1 * lpeg.R"09"^1 SNonSymbolChar = lpeg.S "\"#'(),.[\\]`" SSymbolChar = lpeg.R "!~" - SNonSymbolChar - lpeg.S "{}" SSymbol = SSymbolChar^1 --------[ SHeadSymbol, SSexpLink ]-------- -- SHeadSymbol is like SSymbol but more strict. -- Its logic is: if SSymbol matches at pos, then ckeck if the symbol -- (as a string) has an entry in the table _E; if yes, then return its -- ending position. lpeg.P is used to convert that function - that has -- signature subj,pos|->endposornil - into a pattern. -- -- The table _E contains the "code to htmlize elisp hyperlinks"... -- Its entries - one for each htmlizable elisp hyperlink function - -- are functions with signatures like -- "all,funname,qarg1,qarg2|->html", like his: -- -- _E["to"] = function (all, funname, qarg1, qarg2) -- return href("#"..dequote(qarg1), Q(all)) -- end -- -- SexpLink matches sexps that are lists formed by a SHeadSymbol -- followed by zero or more atoms. This covers most of the kinds of -- hyperlink sexps that I want like to htmlize, but not all. For -- example, this is not recognized: -- (find-iconbookpage (+ 22 143)) _E = _E or {} SHeadSymbol = lpeg.P(function (subj, pos) local e = lpeg.match(SSymbol, subj, pos) local symbol = e and string.sub(subj, pos, e - 1) return symbol and _E[symbol] and e end) SAtom = SString + SNumber + SSymbol SSpace = lpeg.S " \t" SSpaces = SSpace^1 SexpLink = "(" * lpeg.C(SHeadSymbol) * (SSpaces * lpeg.C(SAtom))^0 * ")" --------[ Translators ]-------- dequote = function (sstr) return sstr and string.sub(sstr, 2, -2) end href = function (target, text) if target then return format("%s", target, text) end return text end asurl = function (url) return format("%s", url, url) end asanchor = function (anchor) return format("«" .. "%s»", anchor, anchor) end assexplink = function (all, funname, qarg1, qarg2) if _E[funname] then return _E[funname](all, funname, qarg1, qarg2) else return Q(all) end end UrlT = lpeg.C(Url) / asurl SexpLinkT = lpeg.C(SexpLink) / assexplink AnchorT = Anchor / asanchor --------[ Some extra translators ]-------- PipeAmpT = lpeg.P("|&") / '|&' PipeSnarfDirT = lpeg.P("$S/") / '$S/' HereDocT = lpeg.P("<<'%%%'") / "<<'%%%%%%'" --------[ Parsing and translating lines ]-------- SpecialT = UrlT + AnchorT + SexpLinkT * Eol SpecialT = UrlT + AnchorT + PipeAmpT + PipeSnarfDirT + HereDocT + SexpLinkT * Eol EtcChar = 1 - lpeg.S "\n" EtcSpecial = Pos * lpeg.P { [1] = Pos * SpecialT + EtcChar * lpeg.V(1) } EtcSpecials = EtcSpecial^0 EtcSpecialsEtc = EtcSpecials * Pos * EtcChar^0 * Pos htmlizeline = function (str) return lpeg.match(lpeg.Ct(EtcSpecialsEtc) / sbeconcat(str, Q), str) end htmlizelines = function (bigstr) return table.concat(map(htmlizeline, splitlines(bigstr)), "\n") end -- 2008jan03: new definition: htmlizelines = function (bigstr) return (bigstr:gsub("[^\n]+", htmlizeline)) end --------[ Functions for tests: M and M2 ]-------- esubj = [[(find-angg ".emacs" "foo")]] esubj2 = [[(find-ongg ".emacs" "foo")]] M = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj)); print() end M2 = function (pat, s) PPP("lpeg.match:")(lpeg.match(pat, s or esubj2)); print() end