|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file:
-- http://angg.twu.net/LUA/Re.lua.html
-- http://angg.twu.net/LUA/Re.lua
-- (find-angg "LUA/Re.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
-- Introduction
-- ============
-- I wrote the class Re because I found lpeg.re quite clumsy to use
-- "in natura". Most of the time when I was developing and debugging
-- patterns I would need to change a part of the pattern many while keeping
-- the rest fixed, and I would have to print the result of "match"ing
-- those patterns agains some test strings... and often I would have
-- to use a fixed set of definitions, a special printing function, and
-- a proprocessor.
--
-- The class Re lets me create objects like this one,
--
-- rtt = Re { print = PP,
-- preproc = preproc_u,
-- defs = { u = und },
-- grammar = ' Number <- { [0-9]+ ( "." [0-9]+ )? } ',
-- }
--
-- and use them like this:
--
-- rtt:c ' top <- { Number "->" Number } -> u ' '23->45'
--
-- I can put lots of calls like these in test blocks, and indicate
-- their outputs in comments. See the tests - and note that the first
-- test blocks are demos/tutorials for lpeg.re in e-script form.
-- Old comments:
--
-- Classes for testing and documenting lpeg.re.
-- This is currently a mess, and far from being standalone code.
-- At this moment this needs to be in ~/LUA/, and needs my init file.
-- See the instructions here:
-- (find-angg "LUA/README.e")
-- See: (find-angg "LUA/lpeg-minitut.lua")
-- (find-es "lua-intro" "lpeg-quickref")
-- (find-es "lpeg" "re-quickref")
-- The tests need the class Tos.
-- «.Re» (to "Re")
-- «.Re-tests» (to "Re-tests")
-- «.grammars» (to "grammars")
-- «.und» (to "und")
-- «.preproc_u» (to "preproc_u")
-- «.arit1» (to "arit1")
-- «.arit2» (to "arit2")
-- «.arit2-output» (to "arit2-output")
-- «.right» (to "right")
require "re"
require "Rect"
-- «Re» (to ".Re")
-- Also here:
-- (find-angg "LUA/lua50init.lua" "Re")
--
Re = Class {
type = "Re",
__tostring = function (r) return mytostringv(r) end,
__call = function (r, subj, init) return r:test(subj, init) end,
--
__index = {
grammar = "",
defs = {},
preproc = function (res0) return res0 end,
--
-- Every call to r:compile(str) overwrites
-- the fields r.res0, r.res, and r.rec of r.
compile = function (r, res0)
local res = r.preproc(res0) .. r.grammar
r.res0 = res0
r.res = res
if res == res0 then r.res0 = nil end
r.rec = re.compile(r.res, r.defs)
return r
end,
match = function (r, subj, init)
return r.rec:match(subj, init)
end,
test = function (r, subj, init)
if r.print
then (r.print)(r:match(subj, init))
else return r:match(subj, init)
end
end,
--
p = function (r, ...) print(r.res) end,
c = function (r, ...) return r:compile(...) end,
cc = function (r, ...) return copy(r):compile(...) end,
},
}
rt0 = Re { }
rt = Re { print = print }
rtp = Re { print = PP }
-- «Re-tests» (to ".Re-tests")
-- See: (find-es "lpeg" "re-quickref")
--
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
= rtp:c ' { "a" { "b" } } '
= rtp:c ' { "a" { "b" } } '.res
rtp:c ' { "a" { "b" } } ' :p()
rtp:c ' { "a" { "b" } } ' :test "ab"
rtp:c ' { "a" { "b" } } ' "ab"
rt0:c ' { "a" { "b" } } ' "ab"
= rt0:c ' { "a" { "b" } } ' "ab"
-- {} position capture
-- { p } simple capture
-- {: p :} anonymous group capture
-- {:name: p :} named group capture
-- {~ p ~} substitution capture
-- {| p |} table capture
-- p -> 'string' string capture
-- p -> "string" string capture
-- p -> num numbered capture
--
rtp:c ' "a" "b" ' 'abc' --> 3
rtp:c ' "a" "b" {} ' 'abc' --> 3
rtp:c ' {} "a" "b" {} ' 'abc' --> 1 3
rtp:c ' { "a" "b" } ' 'abc' --> "ab"
rtp:c ' {| "a" "b" |} ' 'abc' --> {}
rtp:c ' {| { "a" "b" } |} ' 'abc' --> {1="ab"}
rtp:c ' {| { "a" } { "b" } |} ' 'abc' --> {1="a", 2="b"}
rtp:c ' {| { "a" } {: "b" :} |} ' 'abc' --> {1="a", 2="b"}
rtp:c ' {| { "a" } {:foo: "b" :} |} ' 'abc' --> {1="a", "foo"="b"}
rtp:c ' {:foo: "a" :} ' 'abc' --> 2
rtp:c ' {| {:foo: "a" :} |} ' 'abc' --> {"foo"="a"}
rtp:c ' { {:foo: "a" :} } ' 'abc' --> "a"
rtp:c ' { { "a" } "b" } ' 'abc' --> "ab" "a"
rtp:c ' { { "a" } { "b" } } ' 'abc' --> "ab" "a" "b"
rtp:c ' { { "a" } {| "b" |} } ' 'abc' --> "ab" "a" "b"
rtp:c ' { { "a" } {| {"b"}|} } ' 'abc' --> "ab" "a" {1="b"}
rtp:c ' { { "a" } {| {"b"}|} } ' 'abc' --> "ab" "a" {1="b"}
rtp:c ' {| {:foo: "a" :} |} ' 'abc' --> "a"
rtp:c ' {| {:foo: ""->"bar" :} |} ' 'abc' --> {"foo"="bar"}
rtp:c [[ {| { "a" } {:B: "b" :}
{ "c" } {:D: "d" :} |} ]] 'abcd' --> {1="a", 2="c", "B"="b", "D"="d"}
rtp:c ' {~ "a"->"AA" "b" "c"->"AA" ~} ' 'abc' --> "AAbCC"
rtp:c ' ( ""->"a" ""->"b" ""->"c" ""->"d" ) -> 3 ' 'abcd' --> "c"
-- p -> name function/query/string capture equivalent to p / defs[name]
-- p => name match-time capture equivalent to lpeg.Cmt(p, defs[name])
-- p ~> name fold capture equivalent to lpeg.Cf(p, defs[name])
-- (find-es "lpeg" "re-quickref")
-- "string" literal string
-- [class] character class
-- p * zero or more repetitions
-- p + one or more repetitions
-- name non terminal
-- p1 p2 concatenation
-- p1 / p2 ordered choice
-- ( p ) grouping
-- { p } simple capture
-- (name <- p)+ grammar
-- p -> name function/query/string capture equivalent to p / defs[name]
--]==]
-- «grammars» (to ".grammars")
--
rtq_grammar = "\n\n" .. [=[
number <- { [-+]? [0-9]+ ( "." [0-9]+ )? }
quotedlit <- { '"' [^"]* '"' }
dquotedlit <- { "'" [^']* "'" }
word <- { [!-~]+ }
--
Number <- {| ""->"Number" number |}
QuotedLit <- {| ""->"QuotedLit" (quotedlit / dquotedlit) |}
]=]
rtq = Re { print = PP, grammar = rtq_grammar }
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
rtq:c ' top <- Number ' :p()
rtq:c ' top <- Number ' '42'
rtq:c ' top <- ""->"Foo" Number ' '42foo'
rtq:c ' top <- {| ""->"Foo" Number |} ' '42foo'
--]]
-- «preproc_u» (to ".preproc_u")
--
und = function (A) A[0] = A.o; return UndTree.from(A) end
Re.__index.defs.u = function (A) return und(A) end
preproc_u0 = function (cstr)
local fmt2 = '{| {:o: ""->"%s" :} %s |} -> u '
local fmt1 = '{| %s |} -> u '
local a,b = cstr:match("^{%.u%.([!-~]+)%.(.+)}$")
if a then return format(fmt2, a, preproc_u(b)) end
local b = cstr:match("^{%.u%.(.+)}$")
if b then return format(fmt1, preproc_u(b)) end
end
preproc_u = function (str)
return (str:gsub("(%b{})", preproc_u0))
end
rtu = Re { preproc = preproc_u }
-- «und» (to ".und")
-- See: (find-angg "LUA/Rect.lua" "UndTree-tests")
--
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
= und {o=22, 33, 44}
= und {33, 44}
= und {o=22, 33, {44, 55}}
= und {o=22, 33, {o=66, 44, 55}}
= und {o=22, 33, und {o=66, 44, 55}}
PPPV(und {o=22, 33, {o=66, 44, 55}})
PPPV(und {o=22, 33, und {o=66, 44, 55}})
= preproc_u 'foo {.u.bletch } bar'
= preproc_u 'foo {.u. bletch} bar'
= preproc_u 'foo {.u.pip. bletch} bar'
rtu:c ' {.u. "a" "b" } ' :p()
rtu:c ' {.u.foo. "a" "b" } ' :p()
rtu:c ' {.u.foo. "a" "b" } ' 'ab'
rtu:c ' {.u.foo. { "a" } { "b" } } ' 'ab'
rtu:c ' {.u. { "a" } { "b" } } ' 'ab'
rtu:c ' {.u. {.u. { "a" } { "b" } } { "c" } } ' 'abc'
rtu:c ' {.u.foo. {.u.bar. { "a" } { "b" } } { "c" } } ' 'abc'
--]]
-- «arit1» (to ".arit1")
-- Adapted from: (find-angg "LUA/lpeg-minitut.lua")
-- See: (find-angg "LUA/Rect.lua" "SynTree-tests")
--
syntreeg = function (...)
local A = {...}
local S = SynTree {[0] = A[2]}
for i=1,#A,2 do table.insert(S, A[i]) end
return S
end
-- "pat -> f" runs packcaptures on the captures of pat.
-- packcaptures (...) runs packcaptures0(...).
-- packcaptures0(...) can run either
-- packcaptures_pars(...) or
-- packcaptures_tree(...); use
-- usepars() and usetree() to select which.
--
packcaptures = function (...)
local A={...}
return #A==1 and A[1] or packcaptures0(...)
end
Re.__index.defs.f = packcaptures
packcaptures_pars = function (...) return "("..table.concat({...}, " ")..")" end
packcaptures_tree = function (...) return syntreeg(...) end
usepars = function () packcaptures0 = packcaptures_pars end
usetree = function () packcaptures0 = packcaptures_tree end
usetree()
test_arit1 = [[
e <- e3
e3 <- (e2 ({"+"} e2)*) -> f
e2 <- (e1 ({"*"} e1)*) -> f
e1 <- (e0 ({"^"} e0)*) -> f
e0 <- "(" e3 ")" / {[0-9]+}
]]
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
= syntreeg(1, "+", 2)
= syntreeg(1, "+", 2, "*", 3)
= syntreeg(1, "+", 2, "*", syntreeg(1, "/", 2))
= syntreeg(1, "+", 2, "*", syntreeg(1, "/", syntreeg(4)))
usepars(); rt :c(test_arit1) "1*2+3^4*5^6+7^8"
usetree(); rtu:c(test_arit1) "1*2+3^4*5^6+7^8"
--]]
-- «arit2» (to ".arit2")
--
test_arit2 = [[
top <- e3
number <- {[0-9]+}
e3 <- {.u.+. e2 ({"+"} e2)* }
e2 <- {.u.*. e1 ({"*"} e1)* }
e1 <- {.u.^. e0 ({"^"} e0)* }
e0 <- {.u.(). {"("} e3 {")"} } / {.u.n. number }
]]
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
rtu:c(test_arit2) :p()
rtu:c(test_arit2) '11+20'
rtu:c(test_arit2) '1*2+3^4*5^6+7^8'
rtu:c(test_arit2) '1+(2*3^4+5)'
--]]
-- «arit2-output» (to ".arit2-output")
--[[
> = rtu:c(test_arit2) '1+(2*3^4+5)'
1 + ( 2 * 3 ^ 4 + 5 )
- - - - -
n n n n n
- - ╰─────╯ -
^ ^ ^ ^
- ╰───────────╯ -
* * *
╰─────────────────╯
+
╰───────────────────────╯
()
╰───────────────────────╯
^
╰───────────────────────╯
*
╰─────────────────────────────╯
+
>
--]]
-- «right» (to ".right")
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
test_right = [[
top <- AS
A <- {"a"}
AT <- A (! A)
AS <- {.u. A (AT / AS) }
]]
= rtu:c(test_right) 'aaaa'
test_left = [[
top <- AS
A <- {"a"}
AI <- A
AS <- {.u. A (AT / AS) }
]]
= rtu:c(test_right) 'aaaa'
--]==]
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"
gram = [=[
stuff <- {~ ( wordsf / special / . ) * ~}
wordsf <- {~ ("" -> "\textsf{") word ("" -> "}")~}
word <- {~ ([A-Za-z0-9]+ / ('_' -> '\_')) + ~}
special <- ( '[' / ']' / '.' ) -> specials
]=]
specials = {
["["] = "<<",
["]"] = ">>",
["."] = "\\,",
}
rtg = Re { print = PP, grammar = gram, defs = {specials = specials} }
rtg:c 'top <- word' 'foo_bar0plic bletch'
rtg:c 'top <- wordsf' 'foo_bar0plic bletch'
rtg:c 'top <- stuff' 'foo_bar0plic bletch'
rtg:c 'top <- stuff' 'foo_bar0plic.[bletch]'
--]==]
-- Local Variables:
-- coding: utf-8-unix
-- End: