Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file: -- http://angg.twu.net/LUA/Re.lua.html -- http://angg.twu.net/LUA/Re.lua -- (find-angg "LUA/Re.lua") -- Author: Eduardo Ochs <eduardoochs@gmail.com> -- Introduction -- ============ -- I wrote the class Re because I found lpeg.re quite clumsy to use -- "in natura". Most of the time when I was developing and debugging -- patterns I would need to change a part of the pattern many while keeping -- the rest fixed, and I would have to print the result of "match"ing -- those patterns agains some test strings... and often I would have -- to use a fixed set of definitions, a special printing function, and -- a proprocessor. -- -- The class Re lets me create objects like this one, -- -- rtt = Re { print = PP, -- preproc = preproc_u, -- defs = { u = und }, -- grammar = ' Number <- { [0-9]+ ( "." [0-9]+ )? } ', -- } -- -- and use them like this: -- -- rtt:c ' top <- { Number "->" Number } -> u ' '23->45' -- -- I can put lots of calls like these in test blocks, and indicate -- their outputs in comments. See the tests - and note that the first -- test blocks are demos/tutorials for lpeg.re in e-script form. -- Old comments: -- -- Classes for testing and documenting lpeg.re. -- This is currently a mess, and far from being standalone code. -- At this moment this needs to be in ~/LUA/, and needs my init file. -- See the instructions here: -- (find-angg "LUA/README.e") -- See: (find-angg "LUA/lpeg-minitut.lua") -- (find-es "lua-intro" "lpeg-quickref") -- (find-es "lpeg" "re-quickref") -- The tests need the class Tos. -- «.Re» (to "Re") -- «.Re-tests» (to "Re-tests") -- «.grammars» (to "grammars") -- «.und» (to "und") -- «.preproc_u» (to "preproc_u") -- «.arit1» (to "arit1") -- «.arit2» (to "arit2") -- «.arit2-output» (to "arit2-output") -- «.right» (to "right") require "re" require "Rect" -- «Re» (to ".Re") -- Also here: -- (find-angg "LUA/lua50init.lua" "Re") -- Re = Class { type = "Re", __tostring = function (r) return mytostringv(r) end, __call = function (r, subj, init) return r:test(subj, init) end, -- __index = { grammar = "", defs = {}, preproc = function (res0) return res0 end, -- -- Every call to r:compile(str) overwrites -- the fields r.res0, r.res, and r.rec of r. compile = function (r, res0) local res = r.preproc(res0) .. r.grammar r.res0 = res0 r.res = res if res == res0 then r.res0 = nil end r.rec = re.compile(r.res, r.defs) return r end, match = function (r, subj, init) return r.rec:match(subj, init) end, test = function (r, subj, init) if r.print then (r.print)(r:match(subj, init)) else return r:match(subj, init) end end, -- p = function (r, ...) print(r.res) end, c = function (r, ...) return r:compile(...) end, cc = function (r, ...) return copy(r):compile(...) end, }, } rt0 = Re { } rt = Re { print = print } rtp = Re { print = PP } -- «Re-tests» (to ".Re-tests") -- See: (find-es "lpeg" "re-quickref") -- --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" = rtp:c ' { "a" { "b" } } ' = rtp:c ' { "a" { "b" } } '.res rtp:c ' { "a" { "b" } } ' :p() rtp:c ' { "a" { "b" } } ' :test "ab" rtp:c ' { "a" { "b" } } ' "ab" rt0:c ' { "a" { "b" } } ' "ab" = rt0:c ' { "a" { "b" } } ' "ab" -- {} position capture -- { p } simple capture -- {: p :} anonymous group capture -- {:name: p :} named group capture -- {~ p ~} substitution capture -- {| p |} table capture -- p -> 'string' string capture -- p -> "string" string capture -- p -> num numbered capture -- rtp:c ' "a" "b" ' 'abc' --> 3 rtp:c ' "a" "b" {} ' 'abc' --> 3 rtp:c ' {} "a" "b" {} ' 'abc' --> 1 3 rtp:c ' { "a" "b" } ' 'abc' --> "ab" rtp:c ' {| "a" "b" |} ' 'abc' --> {} rtp:c ' {| { "a" "b" } |} ' 'abc' --> {1="ab"} rtp:c ' {| { "a" } { "b" } |} ' 'abc' --> {1="a", 2="b"} rtp:c ' {| { "a" } {: "b" :} |} ' 'abc' --> {1="a", 2="b"} rtp:c ' {| { "a" } {:foo: "b" :} |} ' 'abc' --> {1="a", "foo"="b"} rtp:c ' {:foo: "a" :} ' 'abc' --> 2 rtp:c ' {| {:foo: "a" :} |} ' 'abc' --> {"foo"="a"} rtp:c ' { {:foo: "a" :} } ' 'abc' --> "a" rtp:c ' { { "a" } "b" } ' 'abc' --> "ab" "a" rtp:c ' { { "a" } { "b" } } ' 'abc' --> "ab" "a" "b" rtp:c ' { { "a" } {| "b" |} } ' 'abc' --> "ab" "a" "b" rtp:c ' { { "a" } {| {"b"}|} } ' 'abc' --> "ab" "a" {1="b"} rtp:c ' { { "a" } {| {"b"}|} } ' 'abc' --> "ab" "a" {1="b"} rtp:c ' {| {:foo: "a" :} |} ' 'abc' --> "a" rtp:c ' {| {:foo: ""->"bar" :} |} ' 'abc' --> {"foo"="bar"} rtp:c [[ {| { "a" } {:B: "b" :} { "c" } {:D: "d" :} |} ]] 'abcd' --> {1="a", 2="c", "B"="b", "D"="d"} rtp:c ' {~ "a"->"AA" "b" "c"->"AA" ~} ' 'abc' --> "AAbCC" rtp:c ' ( ""->"a" ""->"b" ""->"c" ""->"d" ) -> 3 ' 'abcd' --> "c" -- p -> name function/query/string capture equivalent to p / defs[name] -- p => name match-time capture equivalent to lpeg.Cmt(p, defs[name]) -- p ~> name fold capture equivalent to lpeg.Cf(p, defs[name]) -- (find-es "lpeg" "re-quickref") -- "string" literal string -- [class] character class -- p * zero or more repetitions -- p + one or more repetitions -- name non terminal -- p1 p2 concatenation -- p1 / p2 ordered choice -- ( p ) grouping -- { p } simple capture -- (name <- p)+ grammar -- p -> name function/query/string capture equivalent to p / defs[name] --]==] -- «grammars» (to ".grammars") -- rtq_grammar = "\n\n" .. [=[ number <- { [-+]? [0-9]+ ( "." [0-9]+ )? } quotedlit <- { '"' [^"]* '"' } dquotedlit <- { "'" [^']* "'" } word <- { [!-~]+ } -- Number <- {| ""->"Number" number |} QuotedLit <- {| ""->"QuotedLit" (quotedlit / dquotedlit) |} ]=] rtq = Re { print = PP, grammar = rtq_grammar } --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" rtq:c ' top <- Number ' :p() rtq:c ' top <- Number ' '42' rtq:c ' top <- ""->"Foo" Number ' '42foo' rtq:c ' top <- {| ""->"Foo" Number |} ' '42foo' --]] -- «preproc_u» (to ".preproc_u") -- und = function (A) A[0] = A.o; return UndTree.from(A) end Re.__index.defs.u = function (A) return und(A) end preproc_u0 = function (cstr) local fmt2 = '{| {:o: ""->"%s" :} %s |} -> u ' local fmt1 = '{| %s |} -> u ' local a,b = cstr:match("^{%.u%.([!-~]+)%.(.+)}$") if a then return format(fmt2, a, preproc_u(b)) end local b = cstr:match("^{%.u%.(.+)}$") if b then return format(fmt1, preproc_u(b)) end end preproc_u = function (str) return (str:gsub("(%b{})", preproc_u0)) end rtu = Re { preproc = preproc_u } -- «und» (to ".und") -- See: (find-angg "LUA/Rect.lua" "UndTree-tests") -- --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" = und {o=22, 33, 44} = und {33, 44} = und {o=22, 33, {44, 55}} = und {o=22, 33, {o=66, 44, 55}} = und {o=22, 33, und {o=66, 44, 55}} PPPV(und {o=22, 33, {o=66, 44, 55}}) PPPV(und {o=22, 33, und {o=66, 44, 55}}) = preproc_u 'foo {.u.bletch } bar' = preproc_u 'foo {.u. bletch} bar' = preproc_u 'foo {.u.pip. bletch} bar' rtu:c ' {.u. "a" "b" } ' :p() rtu:c ' {.u.foo. "a" "b" } ' :p() rtu:c ' {.u.foo. "a" "b" } ' 'ab' rtu:c ' {.u.foo. { "a" } { "b" } } ' 'ab' rtu:c ' {.u. { "a" } { "b" } } ' 'ab' rtu:c ' {.u. {.u. { "a" } { "b" } } { "c" } } ' 'abc' rtu:c ' {.u.foo. {.u.bar. { "a" } { "b" } } { "c" } } ' 'abc' --]] -- «arit1» (to ".arit1") -- Adapted from: (find-angg "LUA/lpeg-minitut.lua") -- See: (find-angg "LUA/Rect.lua" "SynTree-tests") -- syntreeg = function (...) local A = {...} local S = SynTree {[0] = A[2]} for i=1,#A,2 do table.insert(S, A[i]) end return S end -- "pat -> f" runs packcaptures on the captures of pat. -- packcaptures (...) runs packcaptures0(...). -- packcaptures0(...) can run either -- packcaptures_pars(...) or -- packcaptures_tree(...); use -- usepars() and usetree() to select which. -- packcaptures = function (...) local A={...} return #A==1 and A[1] or packcaptures0(...) end Re.__index.defs.f = packcaptures packcaptures_pars = function (...) return "("..table.concat({...}, " ")..")" end packcaptures_tree = function (...) return syntreeg(...) end usepars = function () packcaptures0 = packcaptures_pars end usetree = function () packcaptures0 = packcaptures_tree end usetree() test_arit1 = [[ e <- e3 e3 <- (e2 ({"+"} e2)*) -> f e2 <- (e1 ({"*"} e1)*) -> f e1 <- (e0 ({"^"} e0)*) -> f e0 <- "(" e3 ")" / {[0-9]+} ]] --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" = syntreeg(1, "+", 2) = syntreeg(1, "+", 2, "*", 3) = syntreeg(1, "+", 2, "*", syntreeg(1, "/", 2)) = syntreeg(1, "+", 2, "*", syntreeg(1, "/", syntreeg(4))) usepars(); rt :c(test_arit1) "1*2+3^4*5^6+7^8" usetree(); rtu:c(test_arit1) "1*2+3^4*5^6+7^8" --]] -- «arit2» (to ".arit2") -- test_arit2 = [[ top <- e3 number <- {[0-9]+} e3 <- {.u.+. e2 ({"+"} e2)* } e2 <- {.u.*. e1 ({"*"} e1)* } e1 <- {.u.^. e0 ({"^"} e0)* } e0 <- {.u.(). {"("} e3 {")"} } / {.u.n. number } ]] --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" rtu:c(test_arit2) :p() rtu:c(test_arit2) '11+20' rtu:c(test_arit2) '1*2+3^4*5^6+7^8' rtu:c(test_arit2) '1+(2*3^4+5)' --]] -- «arit2-output» (to ".arit2-output") --[[ > = rtu:c(test_arit2) '1+(2*3^4+5)' 1 + ( 2 * 3 ^ 4 + 5 ) - - - - - n n n n n - - ╰─────╯ - ^ ^ ^ ^ - ╰───────────╯ - * * * ╰─────────────────╯ + ╰───────────────────────╯ () ╰───────────────────────╯ ^ ╰───────────────────────╯ * ╰─────────────────────────────╯ + > --]] -- «right» (to ".right") --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" test_right = [[ top <- AS A <- {"a"} AT <- A (! A) AS <- {.u. A (AT / AS) } ]] = rtu:c(test_right) 'aaaa' test_left = [[ top <- AS A <- {"a"} AI <- A AS <- {.u. A (AT / AS) } ]] = rtu:c(test_right) 'aaaa' --]==] --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "Re.lua" gram = [=[ stuff <- {~ ( wordsf / special / . ) * ~} wordsf <- {~ ("" -> "\textsf{") word ("" -> "}")~} word <- {~ ([A-Za-z0-9]+ / ('_' -> '\_')) + ~} special <- ( '[' / ']' / '.' ) -> specials ]=] specials = { ["["] = "<<", ["]"] = ">>", ["."] = "\\,", } rtg = Re { print = PP, grammar = gram, defs = {specials = specials} } rtg:c 'top <- word' 'foo_bar0plic bletch' rtg:c 'top <- wordsf' 'foo_bar0plic bletch' rtg:c 'top <- stuff' 'foo_bar0plic bletch' rtg:c 'top <- stuff' 'foo_bar0plic.[bletch]' --]==] -- Local Variables: -- coding: utf-8-unix -- End: