|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- brackets.lua - the core of blogme3.
-- This is part of blogme3.
-- Note: the code that htmlizes elisp hyperlinks is in other file:
-- (find-blogme3 "elisp.lua")
--
-- See: (find-angg "LUA/lua50init.lua" "Blogme")
--
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2011jan10
-- <http://anggtwu.net/blogme3/brackets.lua>
-- <http://anggtwu.net/blogme3/brackets.lua.html>
-- License: GPL.
-- «.brackstructure» (to "brackstructure")
-- «.myconcat» (to "myconcat")
-- «.mygather» (to "mygather")
-- «.parsers» (to "parsers")
-- «.parsers_» (to "parsers_")
-- «.readvword» (to "readvword")
-- «.evalblock» (to "evalblock")
-- 2007mar21, Edrx
-- This is part of blogme3.
-- http://anggtwu.net/blogme3/brackets.lua.html
-- (find-a2ps (buffer-file-name))
-- «brackstructure» (to ".brackstructure")
--------[ Bracket Structure ]--------
bracketstructure = function (subj)
local pos2n, n2pos, pos2pos = {}, {}, {}
local opens = {}
local n = 0
for pos,bracket in string.gmatch(subj, "()([%[%]])") do
n = n + 1
pos2n[pos] = n
n2pos[n] = pos
if bracket == "[" then
table.insert(opens, pos)
pos2pos[pos] = "?"
else -- bracket == "]"
if table.getn(opens) > 0 then
local openpos = table.remove(opens)
pos2pos[openpos] = pos
pos2pos[pos] = openpos
else
error("Extra closing bracket at pos " .. pos)
end
end
end
if table.getn(opens) > 0 then
error("Extra opening bracket at pos " .. opens[table.getn(opens)])
end
return n2pos, pos2pos, pos2n
end
setsubjto = function (str)
subj = str
pos = 1
n2pos, pos2pos, pos2n = bracketstructure(subj)
end
withsubj = function (str, fun)
local subj_, pos_, n2pos_, pos2pos_, pos2n_ =
subj, pos, n2pos, pos2pos, pos2n
setsubjto(str)
local results = pack(fun())
subj, pos, n2pos, pos2pos, pos2n =
subj_, pos_, n2pos_, pos2pos_, pos2n_
return unpack(results)
end
--------[ Some tools ]--------
-- «myconcat» (to ".myconcat")
myconcat = function (T)
if table.getn(T) > 1 then return table.concat(T, "") end
if table.getn(T) == 1 then return T[1] end
end
-- «mygather» (to ".mygather")
mygather = function (f)
local T = {}
while true do
local item = f()
if not item then return T end
table.insert(T, item)
end
end
--------[ Parsers and Evaluators ]--------
-- This is the most twisted part of blogme3...
-- That's because it involves some recursions.
-- The problem: evaluating a "block" usually involve "reading vwords",
-- where a vword is the "value of a (big) word"; and to obtain the
-- value of a big word we need to evaluate all the blocks in it.
--
-- Something like "[print foo[+ 22 33]bar]" is a "block", and parsing
-- it with parseblock() just returns the position of the "[" (as a
-- number!) and advances pos past the "]"... But that's just the
-- "syntactical level", and that's the easy part; above that there's a
-- "semantical level", where blocks can have "values", obtained by
-- evaluation. To understand how evaluation works we need to
-- understand a function, "evalblock(start)" - the argument "start" is
-- the position of a "[", as a number -, and two tables, _A and _B,
-- whose keys are strings and whose values are functions:
--
-- _A["print"] is the "argument parser" for "print";
-- _B["print"] is the "blogme code" for "print".
--
-- They are similar to Lua's "_G": _G["print"] is the "Lua code" for
-- "print".
--
-- So: if we run "evalblock(start)" after the "parseblock()" then
-- blogme tries to execute the "print": it first parses "print", then
-- uses the function in _A["print"] to parse the argument list, then
-- runs the code in _B["print"] with those arguments:
-- _B["print"]("foo55bar").
--
-- _A["print"] knows that the arguments are a series of "vwords" -
-- "values of (big) words". A similar idea is that of "qwords" -
-- "quoted (big) words". Parsing "foo[+ 22 33]bar" as a qword (by
-- calling getqword() with pos at the "f") would return this, as a
-- string: "foo[+ 22 33]bar"; but parsing "foo[+ 22 33]bar" as vword
-- (by calling readqword() with pos at the "f") involves evaluating the
-- blocks in the way - and "[+ 22 33]" evaluates to 55 (a number), and
-- myconcat {"foo", 55, "bar"} returns "foo55bar".
--
-- _A["print"] is set to `readvargs' - a function that returns a
-- variable number of results. In "[HREF http://foo/bar Foo bar]" the
-- function in _A["HREF"] is `readvvrest', that returns exactly two
-- results: first a vword, then a "vrest" - and "vrests" are like
-- vwords, but whitespace chars are treated as regular chars, not as
-- separators; the result of running readvrest() with pos at the "F"
-- is "Foo bar".
--
-- "readvrest" returns the "rest of the arguments" as a string;
-- "readvlist" returns it as an array of vwords; and "readvargs" is
-- like "readvlist" but varargs-ish - it returns the vwords that it
-- can read as several values, like in "return v1, v2, v3". (Note: the
-- choice of terms is not very good - "list" could become "array", and
-- maybe "args" should become "list"...)
--
-- "Parsers" return positions, as numbers; "readers" return "values",
-- that are usually strings. Readers are divided into two classes:
-- "quoters", that don't call evalblock and always return strings, and
-- "evaluators", that call evalblock on blocks; all readers use
-- myconcat and mygather to build their results.
--
-- "Parsers" return nil - and don't advance pos - when they "fail";
-- that is, when they can't parse what they expected. "Readers" return
-- the empty string.
--
-- Readers whose names have the suffix "_" (meaning "low-level") don't
-- advance pos when they fail; readers without the "_" in their names
-- are higher-level versions that call "parsespaces" at some places -
-- high-level readers may advance pos past some whitespace then fail,
-- and when that happens pos is not returned to before the whitespace.
--
-- Char classes: Basic parsers: Quoters: Evaluators:
-- wordchar parsewchars evalblock
-- regularchar parserchars readqblock readvblock
-- spacechar parsespaces readqword readvword
-- parseblock readqrest readvrest
-- readqqrest readvvrest
-- readqqqrest readvvvrest
-- readqlist readvlist
-- readqqlist readvvlist
-- readqqqlist readvvvlist
-- readqargs readvargs
-- (find-blogmefile "blogme2-inner.lua" "-- run_head:")
-- (find-blogmefile "blogme2-middle.lua")
-- «parsers» (to ".parsers")
parsebypattern = function (pat)
local capture, newpos = string.match(subj, pat, pos)
if newpos then pos = newpos; return capture end
end
parsespaces = function () return parsebypattern("^([ \t\n]+)()") end
-- ours includes "\n"; see: (find-blogme3file "miniforth3.lua")
parsewchars = function () return parsebypattern("^([^ \t\n%[%]]+)()") end
parserchars = function () return parsebypattern("^([^%[%]]+)()") end
parseblock = function ()
if pos2pos[pos] and pos < pos2pos[pos] then
local inside = pos + 1
pos = pos2pos[pos] + 1
return inside
end
end
-- endofblockp = function ()
-- return (pos2pos[pos] and pos2pos[pos] < pos) or pos == strlen(subj)
-- end
-- «parsers_» (to ".parsers_")
readvblock_ = function ()
local blockstart = parseblock()
if blockstart then return evalblock(blockstart) or "" end
end
readqblock_ = function ()
local blockstart = parseblock()
if blockstart then return string.sub(subj, blockstart - 1, pos - 1) end
end
readwcharsorqblock_ = function () return parsewchars() or readqblock_() end
readwcharsorvblock_ = function () return parsewchars() or readvblock_() end
readrcharsorqblock_ = function () return parserchars() or readqblock_() end
readrcharsorvblock_ = function () return parserchars() or readvblock_() end
readqword__ = function () return myconcat(mygather(readwcharsorqblock_)) end
readvword__ = function () return myconcat(mygather(readwcharsorvblock_)) end
readqrest__ = function () return myconcat(mygather(readrcharsorqblock_)) end
readvrest__ = function () return myconcat(mygather(readrcharsorvblock_)) end
readqword_ = function () parsespaces(); return readqword__() end
readvword_ = function () parsespaces(); return readvword__() end
readqrest_ = function () parsespaces(); return readqrest__() end
readvrest_ = function () parsespaces(); return readvrest__() end
-- «readvword» (to ".readvword")
readqword = function () return readqword_() or "" end
readvword = function () return readvword_() or "" end
readqrest = function () return readqrest_() or "" end
readvrest = function () return readvrest_() or "" end
readqlist = function () return mygather(readqword_) end
readvlist = function () return mygather(readvword_) end
readqargs = function () return unpack(readqlist()) end
readvargs = function () return unpack(readvlist()) end
readqqrest = function () return readqword(), readqrest() end
readqqqrest = function () return readqword(), readqqrest() end
readqqqqrest = function () return readqword(), readqqqrest() end
readqqqqqrest = function () return readqword(), readqqqqrest() end
readvvrest = function () return readvword(), readvrest() end
readvvvrest = function () return readvword(), readvvrest() end
readvvvvrest = function () return readvword(), readvvvrest() end
readvvvvvrest = function () return readvword(), readvvvvrest() end
readqqlist = function () return readqword(), readqlist() end
readqqqlist = function () return readqword(), readqqlist() end
readqqqqlist = function () return readqword(), readqqqlist() end
readqqqqqlist = function () return readqword(), readqqqqlist() end
readvvlist = function () return readvword(), readvlist() end
readvvvlist = function () return readvword(), readvvlist() end
readvvvvlist = function () return readvword(), readvvvlist() end
readvvvvvlist = function () return readvword(), readvvvvlist() end
-- Rename "gather" -> "collect"?
_A = {} -- arglist parser functions for blogme words
_B = {} -- like _F, but for blogme words
-- «evalblock» (to ".evalblock")
evalblock = function (start)
local oldpos = pos
pos = start
parsespaces()
word = parsewchars()
-- PPP("word:")(word)
local argsfun = _A[word] or error("Not in _A: " .. word)
local headfun = _B[word] or _G[word] or error("Not in _B or _G: " .. word)
local result = headfun(argsfun())
pos = oldpos
return result
end
doblogme = function (str) return withsubj(str, readvrest) end
_A["PP"] = readvlist
_B["PP"] = PP
_A["lua"] = readqrest
_B["lua"] = function (str) return assert(loadstring("return "..str))() end
-- Local Variables:
-- coding: raw-text-unix
-- End: