Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- brackets.lua - the core of blogme3. -- This is part of blogme3. -- Note: the code that htmlizes elisp hyperlinks is in other file: -- (find-blogme3 "elisp.lua") -- -- See: (find-angg "LUA/lua50init.lua" "Blogme") -- -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- Version: 2011jan10 -- <http://anggtwu.net/blogme3/brackets.lua> -- <http://anggtwu.net/blogme3/brackets.lua.html> -- License: GPL. -- «.brackstructure» (to "brackstructure") -- «.myconcat» (to "myconcat") -- «.mygather» (to "mygather") -- «.parsers» (to "parsers") -- «.parsers_» (to "parsers_") -- «.readvword» (to "readvword") -- «.evalblock» (to "evalblock") -- 2007mar21, Edrx -- This is part of blogme3. -- http://anggtwu.net/blogme3/brackets.lua.html -- (find-a2ps (buffer-file-name)) -- «brackstructure» (to ".brackstructure") --------[ Bracket Structure ]-------- bracketstructure = function (subj) local pos2n, n2pos, pos2pos = {}, {}, {} local opens = {} local n = 0 for pos,bracket in string.gmatch(subj, "()([%[%]])") do n = n + 1 pos2n[pos] = n n2pos[n] = pos if bracket == "[" then table.insert(opens, pos) pos2pos[pos] = "?" else -- bracket == "]" if table.getn(opens) > 0 then local openpos = table.remove(opens) pos2pos[openpos] = pos pos2pos[pos] = openpos else error("Extra closing bracket at pos " .. pos) end end end if table.getn(opens) > 0 then error("Extra opening bracket at pos " .. opens[table.getn(opens)]) end return n2pos, pos2pos, pos2n end setsubjto = function (str) subj = str pos = 1 n2pos, pos2pos, pos2n = bracketstructure(subj) end withsubj = function (str, fun) local subj_, pos_, n2pos_, pos2pos_, pos2n_ = subj, pos, n2pos, pos2pos, pos2n setsubjto(str) local results = pack(fun()) subj, pos, n2pos, pos2pos, pos2n = subj_, pos_, n2pos_, pos2pos_, pos2n_ return unpack(results) end --------[ Some tools ]-------- -- «myconcat» (to ".myconcat") myconcat = function (T) if table.getn(T) > 1 then return table.concat(T, "") end if table.getn(T) == 1 then return T[1] end end -- «mygather» (to ".mygather") mygather = function (f) local T = {} while true do local item = f() if not item then return T end table.insert(T, item) end end --------[ Parsers and Evaluators ]-------- -- This is the most twisted part of blogme3... -- That's because it involves some recursions. -- The problem: evaluating a "block" usually involve "reading vwords", -- where a vword is the "value of a (big) word"; and to obtain the -- value of a big word we need to evaluate all the blocks in it. -- -- Something like "[print foo[+ 22 33]bar]" is a "block", and parsing -- it with parseblock() just returns the position of the "[" (as a -- number!) and advances pos past the "]"... But that's just the -- "syntactical level", and that's the easy part; above that there's a -- "semantical level", where blocks can have "values", obtained by -- evaluation. To understand how evaluation works we need to -- understand a function, "evalblock(start)" - the argument "start" is -- the position of a "[", as a number -, and two tables, _A and _B, -- whose keys are strings and whose values are functions: -- -- _A["print"] is the "argument parser" for "print"; -- _B["print"] is the "blogme code" for "print". -- -- They are similar to Lua's "_G": _G["print"] is the "Lua code" for -- "print". -- -- So: if we run "evalblock(start)" after the "parseblock()" then -- blogme tries to execute the "print": it first parses "print", then -- uses the function in _A["print"] to parse the argument list, then -- runs the code in _B["print"] with those arguments: -- _B["print"]("foo55bar"). -- -- _A["print"] knows that the arguments are a series of "vwords" - -- "values of (big) words". A similar idea is that of "qwords" - -- "quoted (big) words". Parsing "foo[+ 22 33]bar" as a qword (by -- calling getqword() with pos at the "f") would return this, as a -- string: "foo[+ 22 33]bar"; but parsing "foo[+ 22 33]bar" as vword -- (by calling readqword() with pos at the "f") involves evaluating the -- blocks in the way - and "[+ 22 33]" evaluates to 55 (a number), and -- myconcat {"foo", 55, "bar"} returns "foo55bar". -- -- _A["print"] is set to `readvargs' - a function that returns a -- variable number of results. In "[HREF http://foo/bar Foo bar]" the -- function in _A["HREF"] is `readvvrest', that returns exactly two -- results: first a vword, then a "vrest" - and "vrests" are like -- vwords, but whitespace chars are treated as regular chars, not as -- separators; the result of running readvrest() with pos at the "F" -- is "Foo bar". -- -- "readvrest" returns the "rest of the arguments" as a string; -- "readvlist" returns it as an array of vwords; and "readvargs" is -- like "readvlist" but varargs-ish - it returns the vwords that it -- can read as several values, like in "return v1, v2, v3". (Note: the -- choice of terms is not very good - "list" could become "array", and -- maybe "args" should become "list"...) -- -- "Parsers" return positions, as numbers; "readers" return "values", -- that are usually strings. Readers are divided into two classes: -- "quoters", that don't call evalblock and always return strings, and -- "evaluators", that call evalblock on blocks; all readers use -- myconcat and mygather to build their results. -- -- "Parsers" return nil - and don't advance pos - when they "fail"; -- that is, when they can't parse what they expected. "Readers" return -- the empty string. -- -- Readers whose names have the suffix "_" (meaning "low-level") don't -- advance pos when they fail; readers without the "_" in their names -- are higher-level versions that call "parsespaces" at some places - -- high-level readers may advance pos past some whitespace then fail, -- and when that happens pos is not returned to before the whitespace. -- -- Char classes: Basic parsers: Quoters: Evaluators: -- wordchar parsewchars evalblock -- regularchar parserchars readqblock readvblock -- spacechar parsespaces readqword readvword -- parseblock readqrest readvrest -- readqqrest readvvrest -- readqqqrest readvvvrest -- readqlist readvlist -- readqqlist readvvlist -- readqqqlist readvvvlist -- readqargs readvargs -- (find-blogmefile "blogme2-inner.lua" "-- run_head:") -- (find-blogmefile "blogme2-middle.lua") -- «parsers» (to ".parsers") parsebypattern = function (pat) local capture, newpos = string.match(subj, pat, pos) if newpos then pos = newpos; return capture end end parsespaces = function () return parsebypattern("^([ \t\n]+)()") end -- ours includes "\n"; see: (find-blogme3file "miniforth3.lua") parsewchars = function () return parsebypattern("^([^ \t\n%[%]]+)()") end parserchars = function () return parsebypattern("^([^%[%]]+)()") end parseblock = function () if pos2pos[pos] and pos < pos2pos[pos] then local inside = pos + 1 pos = pos2pos[pos] + 1 return inside end end -- endofblockp = function () -- return (pos2pos[pos] and pos2pos[pos] < pos) or pos == strlen(subj) -- end -- «parsers_» (to ".parsers_") readvblock_ = function () local blockstart = parseblock() if blockstart then return evalblock(blockstart) or "" end end readqblock_ = function () local blockstart = parseblock() if blockstart then return string.sub(subj, blockstart - 1, pos - 1) end end readwcharsorqblock_ = function () return parsewchars() or readqblock_() end readwcharsorvblock_ = function () return parsewchars() or readvblock_() end readrcharsorqblock_ = function () return parserchars() or readqblock_() end readrcharsorvblock_ = function () return parserchars() or readvblock_() end readqword__ = function () return myconcat(mygather(readwcharsorqblock_)) end readvword__ = function () return myconcat(mygather(readwcharsorvblock_)) end readqrest__ = function () return myconcat(mygather(readrcharsorqblock_)) end readvrest__ = function () return myconcat(mygather(readrcharsorvblock_)) end readqword_ = function () parsespaces(); return readqword__() end readvword_ = function () parsespaces(); return readvword__() end readqrest_ = function () parsespaces(); return readqrest__() end readvrest_ = function () parsespaces(); return readvrest__() end -- «readvword» (to ".readvword") readqword = function () return readqword_() or "" end readvword = function () return readvword_() or "" end readqrest = function () return readqrest_() or "" end readvrest = function () return readvrest_() or "" end readqlist = function () return mygather(readqword_) end readvlist = function () return mygather(readvword_) end readqargs = function () return unpack(readqlist()) end readvargs = function () return unpack(readvlist()) end readqqrest = function () return readqword(), readqrest() end readqqqrest = function () return readqword(), readqqrest() end readqqqqrest = function () return readqword(), readqqqrest() end readqqqqqrest = function () return readqword(), readqqqqrest() end readvvrest = function () return readvword(), readvrest() end readvvvrest = function () return readvword(), readvvrest() end readvvvvrest = function () return readvword(), readvvvrest() end readvvvvvrest = function () return readvword(), readvvvvrest() end readqqlist = function () return readqword(), readqlist() end readqqqlist = function () return readqword(), readqqlist() end readqqqqlist = function () return readqword(), readqqqlist() end readqqqqqlist = function () return readqword(), readqqqqlist() end readvvlist = function () return readvword(), readvlist() end readvvvlist = function () return readvword(), readvvlist() end readvvvvlist = function () return readvword(), readvvvlist() end readvvvvvlist = function () return readvword(), readvvvvlist() end -- Rename "gather" -> "collect"? _A = {} -- arglist parser functions for blogme words _B = {} -- like _F, but for blogme words -- «evalblock» (to ".evalblock") evalblock = function (start) local oldpos = pos pos = start parsespaces() word = parsewchars() -- PPP("word:")(word) local argsfun = _A[word] or error("Not in _A: " .. word) local headfun = _B[word] or _G[word] or error("Not in _B or _G: " .. word) local result = headfun(argsfun()) pos = oldpos return result end doblogme = function (str) return withsubj(str, readvrest) end _A["PP"] = readvlist _B["PP"] = PP _A["lua"] = readqrest _B["lua"] = function (str) return assert(loadstring("return "..str))() end -- Local Variables: -- coding: raw-text-unix -- End: