blogme4/eval.lua (htmlized)

Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- eval.lua: functions for parsing and evaluating blogme code.
-- This file:
--   http://angg.twu.net/blogme4/eval.lua.html
--   http://angg.twu.net/blogme4/eval.lua
--            (find-blogme4file "eval.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2011feb17
-- License: GPL3
--
-- «.control-flow»	(to "control-flow")
-- «.parse_pattern»		(to "parse_pattern")
-- «.tlongwords»		(to "tlongwords")
-- «.qlongwords»		(to "qlongwords")
-- «.tlongword_mapconcat»	(to "tlongword_mapconcat")
-- «.longwords»			(to "longwords")
-- «.readvword»			(to "readvword")
-- «.readvrest»			(to "readvrest")
-- «.readqword»			(to "readqword")
-- «.readqrest»			(to "readqrest")
-- «._A»			(to "_A")
-- «.with_subj»			(to "with_subj")
-- «.blogme_eval»		(to "blogme_eval")

-- «.test-tlongword»		(to "test-tlongword")
-- «.test-parse»		(to "test-parse")
-- «.test-read»			(to "test-read")



-- «control-flow»  (to ".control-flow")
-- The difficult part of evaluation is dealing with the [] "blocks".
-- The execution flow involves this very nasty recursion:
--
--        blogme_eval(subj)
--            v
--        with_subj(subj, readverylongword)
--            v
--        readverylongword()  <--------------------------\
--            v                                          |
--        parse_verylongword(blogme_evalblock)           |
--            v                                          |
--    /-> tlongword_mapconcat(blogme_evalblock, T, "")   |
--    |       :                                          |
--    |       v                                          |
--    |   blogme_evalblock(s, e)                         |
--    |       v                                          |
--    |   with_pos_endpos(s+1, e-1, blogme__eval)        |
--    |       v                                          |
--    |   blogme__eval()                                 |
--    |       v                                          |
--    |   _A[argp]()                                     |
--    |       :                                          |
--    |       v                                          |
--    |   readvvvrest()                                  |
--    |      | |        \                                |
--    |      v v         v                               |
--    |   readvword()      readvrest()                   |
--    |       v	       	        |     			 |
--    |   readlongword()	\------------------------/
--    |       v
--    \-- parse_longword(blogme_evalblock)



-- «parse_pattern»  (to ".parse_pattern")
-- Conventions for the "parse*" functions
-- ======================================
-- They operate on these three globals:
--   subj   (read-only),
--   pos    (advanced when parsing succeeds, unchanged when not),
--   oldpos (gets the old value of pos on success, garbage on failure),
--   result (discussed below; relevant on success, garbage on failure),
-- and they return true if they succed, nil if they fail.
-- They are all called either "parse__blah" or "parse_blah".
-- The "parse__blah" functions just store "oldpos" in "result".
-- The "parse_blah" functions are more complex, and they produce less
-- trivial "result"s. In most cases - mainly the cases implemented
-- with "parse_pattern" - their results are the substring of subj
-- between oldpos and pos; the cases involving "longwords" will be
-- described below.
--
-- The "blah" in the names of the "parse__blah" and "parse_blah"
-- function indicate what those functions try to parse, and
-- (sometimes) how the result is calculated from the parsed region:
--   "spaces": one or more whitespace chars
--   "block":  a region enclosed in balanced "[]"s
--   "wchars": one or more "word chars", i.e., which are neither
--     whitespace nor "[]"s
--   "rchars": one or more "regular chars", i.e., those which are
--     not "[]"s.
--
parse_pattern = function (pat)
    oldpos, result, pos = pos, subj:match(pat, pos)
    if not pos then pos = oldpos else return true end
  end
parse__spaces = function () return parse_pattern("^()[ \t\n]+()") end
parse_spaces  = function () return parse_pattern("^([ \t\n]+)()") end
parse__wchars = function () return parse_pattern("^()[^ \t\n%[%]]+()") end
parse_wchars  = function () return parse_pattern("^([^ \t\n%[%]]+)()") end
parse__rchars = function () return parse_pattern("^()[^%[%]]+()") end
parse_rchars  = function () return parse_pattern("^([^%[%]]+)()") end
parse__block  = function () return parse_pattern("^()%b[]()") end  -- slow
parse_block   = function () return parse_pattern("^(%b[])()") end  -- slow



-- «tlongwords»  (to ".tlongwords")
-- A "longword" is something of the form "(wchars | block)+", and a
-- "verylongword" is something of the form "(rchars | block)+". A
-- string like "a[+ 1 2][+ 3 4]b c[+ 5 6]d" is two longwords,
-- separated by a space, and is a single verylongword; verylongwords
-- end at "]"s or the end of the string, while longwords can also end
-- at whitespace.

-- Usually we want the "value" of a longword/verylongword; the "value"
-- is calculated by replacing each "[]" in the {very}longword by its
-- result - for example, the "value" of "a[+ 1 2][+ 3 4]b" is "a37b".
-- To calculate these "values" we need a nasty recursion, so here we
-- start with something simpler.
--
-- A "tlongword" is an array of strings and {begpos, endpos} pairs.
-- For example, the value "as a tlongword" of the string
--        --          11111111112222
--        -- 12345678901234567890123
--   subj = "e[+ 1 [+ 2 3]]f[+ 4 5]"
-- is:
--   {"e", {2, 15}, "f", {16, 22}}
-- Note that:
--   subj:sub(2, 15) == "[+ 1 [+ 2 3]]"
--
parse__xcharsandblocks = function (parse_xchars)
    local origpos = pos
    local T = {}     -- a table of strings and {begpos, endpos} pairs
    local push = function (r) table.insert(T, r) end
    while parse__block() do push({oldpos, pos}) end     -- push pair
    while parse_xchars() do
      push(result)                                      -- push string
      while parse__block() do push({oldpos, pos}) end   -- push pair
    end
    result = T       -- the result is a table of strings and pairs
    oldpos = origpos
    return #T > 0
  end
parse_tlongword = function ()
    return parse__xcharsandblocks(parse_wchars)
  end
parse_tverylongword = function ()
    return parse__xcharsandblocks(parse_rchars)
  end


-- «qlongwords»  (to ".qlongwords")
-- Quoted longwords.
-- These are used by blogme words like "#" and "lua:".
parse__qlongword = function ()
    if parse_tlongword() then
      result = oldpos
      return true
    end
  end
parse_qlongword = function ()
    if parse_tlongword() then
      result = subj:sub(oldpos, pos-1)
      return true
    end
  end
parse__qverylongword = function ()
    if pos < endpos then
      oldpos, pos, result = pos, endpos, pos
      return true
    end
  end
parse_qverylongword = function ()
    if pos < endpos then
      oldpos, pos, result = pos, endpos, subj:sub(pos, endpos-1)
      return true
    end
  end



-- «tlongword_mapconcat»  (to ".tlongword_mapconcat")
-- This is the function that we use to evaluate tlongwords.
-- The function "f" is usually "blogme_evalblock", so this ends up
-- being recursive.
-- See: (find-blogme4 "eval.lua" "blogme_eval")
--      (find-elnode "Mapping Functions" "Function: mapconcat")
-- In blogme3 I implemented a special behavior for tlongwords of
-- length 1 - I skipped the concatenation step. This should be done
-- here too, I think (for HLIST and friends?).
-- 
tlongword_mapconcat = function (f, T, sep)
    if not T then return nil end
    for i=1,#T do
      if type(T[i]) == "table" then
        T[i] = f(T[i][1], T[i][2]) or ""
      end                       -- Note that *we change the table T*!!!
    end
    if #T == 1 then return T[1] end
    return table.concat(T, sep)
  end



-- «longwords»  (to ".longwords")
-- These functions are similar to the ones that return tlongwords, but
-- here we run tlongword_mapconcat to return the "values" of these
-- tlongwords.
-- (I think that they destroy endpos... is that important?)
parse_longword = function (eval_block)
    if not parse_tlongword() then return nil end
    result = tlongword_mapconcat(eval_block, result, "")
    return true
  end
parse_verylongword = function (eval_block)
    if not parse_tverylongword() then return nil end
    result = tlongword_mapconcat(eval_block, result, "")
    return true
  end



-- «readvword»  (to ".readvword")
-- «readvrest»  (to ".readvrest")
-- The "read*" functions are high-level functions used to parse
-- arguments for blogme "calls"; they follow conventions that are
-- quite different from the "parse*" functions.
-- For example, the argparser for "HREF" has to parse a longword
-- and a verylongword; in the evaluation process for
--   "[HREF http://foo/ bar plic]"
-- we get:
--   HREF(readvvrest())
-- that becomes:
--   HREF("http://foo/", "bar plic")
-- but
--   "[HREF http://foo/ bar plic]"
-- becomes:
--   HREF("http://foo/", "")
--
-- Shorthands: a "vword" is the value of a longword; a "vrest" (used
-- to obtain the "rest of the arguments", as &rest in Lisp) is the
-- value of a verylongword. Additional "v"s in the prefix mean vwords;
-- for example, a "vvvrest" is a vword, then another vword, then a
-- vrest.
--
-- Remember that the "parse*" functions returned a flag, and stored
-- the "result" of the parsed region in the global variable "result".
-- The "read*" functions return their "results" straight away, and in
-- the case of failure (i.e., of parsing nothing) they return the
-- empty string. Also, they parse (and discard) spaces before each
-- vword and vrest.

readlongword = function ()
    if parse_longword(blogme_evalblock) then return result end
    return ""
  end
readverylongword = function ()
    if parse_verylongword(blogme_evalblock) then return result end
    return ""
  end
readvword     = function () parse__spaces(); return readlongword() end
readvrest     = function () parse__spaces(); return readverylongword() end
readvvrest    = function () return readvword(), readvrest()   end
readvvvrest   = function () return readvword(), readvvrest()  end
readvvvvrest  = function () return readvword(), readvvvrest() end
readvvvvvrest = function () return readvword(), readvvvvrest() end

-- «readqword»  (to ".readqword")
-- «readqrest»  (to ".readqrest")
readqlongword     = function ()
    if parse_qlongword() then return result end
    return ""
  end
readqverylongword = function ()
    if parse_qverylongword() then return result end
    return ""
  end
readqword        = function () parse__spaces(); return readqlongword()     end
readqrest        = function () parse__spaces(); return readqverylongword() end
readqqrest       = function () return readqword(), readqrest()    end
readqqqrest      = function () return readqword(), readqqrest()   end
readqqqqrest     = function () return readqword(), readqqqrest()  end
readqqqqqrest    = function () return readqword(), readqqqqrest() end

-- «_A»  (to "._A")
-- (find-blogme3 "definers.lua" "_AA")
-- (find-blogme3 "brackets.lua" "readvword")
-- (find-blogme3 "anggdefs.lua" "basic-special-words" "lua:")
_A = _A or {}
_A["0"] = nop
_A["1"] = readvrest
_A["2"] = readvvrest
_A["3"] = readvvvrest
_A["4"] = readvvvvrest
_A["5"] = readvvvvvrest
_A["1Q"] = readqrest
_A["2Q"] = readqqrest
_A["3Q"] = readqqqrest
_A["4Q"] = readqqqqrest
_A["5Q"] = readqqqqqrest




-- «with_subj»  (to ".with_subj")
with_pos_endpos = function (pos_, endpos_, f)
    local backups = {pos=pos, endpos=endpos}
    pos, endpos = pos_, endpos_
      local r = f(pos, endpos)
    pos, endpos = backups.pos, backups.endpos
    return r
  end
with_subj = function (subj_, f)
    local backups = {subj=subj, pos=pos, endpos=endpos}
    subj, pos, endpos = subj_, 1, #subj_+1
      local r = f(pos, endpos)
    subj, pos, endpos = backups.subj, backups.pos, backups.endpos
    return r
  end



-- «blogme_eval»  (to ".blogme_eval")
-- "blogme__eval" (with a double "__") is a very low-level function,
--   that does the heavy work for both "blogme_evalblock" and
--   "blogme_eval". It takes as its "input" the global variables subj,
--   pos and endpos, parses a word, and then returns the result of
--   wordf(argpf()). Here is a typical example of how it runs. If:
--     subj = "ab [HREF http://foo/ bar] cd"
--     pos  =      5
--     endpos =                        25
--   then "word" is "HREF", "argp" is "2", and the result of argpf()
--   is the sequence "http://foo/", "bar"; then blogme__eval will
--   return the result of HREF("http://foo/", "bar").
--   Note that blogme__eval uses the table _B of blogmewords and
--   the table _A or argparser codes. See:
--     (find-blogme4 "def.lua" "BlogmeWord")
--
-- "blogme_evalblock" is used to run a blogme "call" inside "[]s" (as
--   in the example above).
--
-- "blogme_eval" is used to evaluate all the blogme calls inside a
--   string, replacing each one by its result; for example,
--     blogme_eval "ab [HREF http://foo/ bar] cd"
--   returns
--     "ab " .. HREF("http://foo/", "bar") .. " cd".
--
blogme__eval = function ()
    parse__spaces()
    if not parse_wchars() then error("Empty word!") end
    local word  = result
    local bword = _B[word] or error("Unknown blogme word: "..word)
    local wordf = bword.fun
    local argp  = bword.argp
    local argpf = (type(argp) == "string" and (_A[argp] or _G[argp]))
               or argp or error("Unknow arglist parser: "..argp)
    return wordf(argpf())
  end
blogme_evalblock = function (s, e)
    return with_pos_endpos(s+1, e-1, blogme__eval)  -- skip the '[' and the ']'
  end
blogme_eval = function (subj_)
    return with_subj(subj_, readverylongword)
  end






-- dump-to: tests
--[===[
-- «test-tlongword»  (to ".test-tlongword")
-- (find-blogme4 "argparsers.lua")
-- (find-blogme4 "brackets.lua")
-- (find-blogme4 "def.lua")

* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
userocks()
ee_dofile "~/blogme4/blogme4-all.lua"
-- These tests should make clear what a tlongword is.
--     /-----------------------------------------------------------\
--     |            /-----\        /-----------------\/-------\    |
str = "[HREF http://[+ 1 2]/  bar  [* [+ 1 2] [+ 3 4]][* 10 10]plic]"
--     ^     ^      ^     ^        ^                 ^^
--     1     7      14    20  ^24  29               4748      ^56  ^61
angf = function (s, e) return "<"..subj:sub(s, e-1)..">" end
test = function (p, f, ...)
    subj, pos = str, p
    PP(f(...), oldpos, pos, result)
  end
test(7, parse_tlongword)
--> <true> 7      22  {"http://", {14, 21}, "/"}
--  bool   oldpos pos result
test(24, parse_tverylongword)
--> <true> 24     61  {"bar  ", {29, 48}, {48, 57}, "plic"}
--  bool   oldpos pos result

test(24, parse_tverylongword)                 -- reset "result"
PP(tlongword__mapconcat(angf, result, ".."))  -- this changes "result"
--> "bar  ..<[* [+ 1 2] [+ 3 4]]>..<[* 10 10]>..plic"
PP(result)                     
--> {1="bar  ", 2="<[* [+ 1 2] [+ 3 4]]>", 3="<[* 10 10]>", 4="plic"}

test(7,  parse_tlongword)
--> <true> 7 22 {1="http://", 2={1=14, 2=21}, 3="/"}
test(7,  parse_longword,     angf)
--  <true> 7 22 "http://<[+ 1 2]>/"
test(24, parse_verylongword, angf)
--> <true> 24 61 "bar  <[* [+ 1 2] [+ 3 4]]><[* 10 10]>plic"

def [[ HREF   2 url,str   "<a href=\"$url\">$str</a>" ]]
def [[ *      2 a,b       a*b                         ]]
def [[ +      2 a,b       a+b                         ]]
--     /-----------------------------------------------------------\
--     |            /-----\        /-----------------\/-------\    |
str = "[HREF http://[+ 1 2]/  bar  [* [+ 1 2] [+ 3 4]][* 10 10]plic]"
--     ^     ^      ^     ^        ^                 ^^
--     1     7      14    20  ^24  29               4748      ^56  ^61
subj = str
= blogme_evalblock(14, 20)
= blogme_evalblock(29, 48)
= blogme_evalblock( 1, 61)
= blogme_eval(str)



-- «test-parse»  (to ".test-parse")
-- «test-read»  (to ".test-read")
-- High-level tests.
-- "be" tests evaluating a string using blogme_eval,
-- "tp" tests a "parse_*" word,
-- "tr" tests a "read*" word.

* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
   -- (find-blogme4 "blogme4-all.lua")
ee_dofile "~/blogme4/blogme4-all.lua"
def [[ pp 1  body "<"..body..">", print("<"..body..">") ]]
def [[ pq 1Q body "<"..body..">", print("<"..body..">") ]]
comp = function (f, g) return function (...) return f(g(...)) end end
be = function (str) PP(blogme_eval(str)) end
tp = function (f) return function (subj) PP(with_subj(subj, f), result) end end
tr = function (f) return function (subj) with_subj(subj, comp(PP, f)) end end
tp(parse_spaces)        [==[ ab cd ef ]==]
tp(parse_rchars)        [==[ ab cd ef gh ]==]
tp(parse_rchars)        [==[ ab cd [pp ef] gh ]==]
tp(parse_qverylongword) [==[ ab cd [pp ef] gh ]==]
tp(parse_tverylongword) [==[ ab cd [pp ef] gh ]==]
tp(parse_tlongword)     [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse__qlongword)    [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse_qlongword)     [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tp(parse_qlongword)    [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readlongword)        [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readverylongword)    [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvword)          [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvrest)          [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvrest)         [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvrest)        [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvvrest)       [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readvvvvvrest)      [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqlongword)       [==[ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqlongword)      [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqverylongword)  [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqword)          [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqrest)          [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqrest)         [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqrest)        [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqqrest)       [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
tr(readqqqqqrest)      [==[ ab[ pp cd ]ef gh [pp ij] kl ]==]
be                     [==[ ab[ pp cd ]ef [pq [pp gh] ij] kl ]==]

--]===]









-- Local Variables:
-- coding:             raw-text-unix
-- ee-anchor-format:   "«%s»"
-- End: