Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- This file:
--   http://angg.twu.net/LUA/Re.lua.html
--   http://angg.twu.net/LUA/Re.lua
--           (find-angg "LUA/Re.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>

-- Introduction
-- ============
-- I wrote the class Re because I found lpeg.re quite clumsy to use
-- "in natura". Most of the time when I was developing and debugging
-- patterns I would need to change a part of the pattern many while keeping
-- the rest fixed, and I would have to print the result of "match"ing
-- those patterns agains some test strings... and often I would have
-- to use a fixed set of definitions, a special printing function, and
-- a proprocessor.
--
-- The class Re lets me create objects like this one,
--
--   rtt = Re { print = PP,
--              preproc = preproc_u,
--              defs = { u = und },
--              grammar = ' Number <- { [0-9]+ ( "." [0-9]+ )? } ',
--            }
--
-- and use them like this:
--
--   rtt:c ' top <- { Number "->" Number } -> u ' '23->45'
--
-- I can put lots of calls like these in test blocks, and indicate
-- their outputs in comments. See the tests - and note that the first
-- test blocks are demos/tutorials for lpeg.re in e-script form.

-- Old comments:
--
-- Classes for testing and documenting lpeg.re.
-- This is currently a mess, and far from being standalone code.
-- At this moment this needs to be in ~/LUA/, and needs my init file.
-- See the instructions here:
--   (find-angg "LUA/README.e")


-- See: (find-angg "LUA/lpeg-minitut.lua")
--      (find-es "lua-intro" "lpeg-quickref")
--      (find-es "lpeg" "re-quickref")
-- The tests need the class Tos.

-- «.Re»			(to "Re")
-- «.Re-tests»			(to "Re-tests")
-- «.grammars»			(to "grammars")
-- «.und»			(to "und")
-- «.preproc_u»			(to "preproc_u")
-- «.arit1»			(to "arit1")
-- «.arit2»			(to "arit2")
-- «.arit2-output»		(to "arit2-output")
-- «.right»			(to "right")



require "re"
require "Rect"


-- «Re»  (to ".Re")
-- Also here:
-- (find-angg "LUA/lua50init.lua" "Re")
--
Re = Class {
  type = "Re",
  __tostring = function (r) return mytostringv(r) end,
  __call = function (r, subj, init) return r:test(subj, init) end,
  --
  __index    = {
    grammar = "",
    defs    = {},
    preproc = function (res0) return res0 end,
    --
    -- Every call to r:compile(str) overwrites
    -- the fields r.res0, r.res, and r.rec of r.
    compile = function (r, res0)
        local res = r.preproc(res0) .. r.grammar
        r.res0 = res0
        r.res  = res
        if res == res0 then r.res0 = nil end
        r.rec = re.compile(r.res, r.defs)
        return r
      end,
    match = function (r, subj, init)
        return r.rec:match(subj, init)
      end,
    test = function (r, subj, init)
        if    r.print
        then (r.print)(r:match(subj, init))
        else return    r:match(subj, init)
        end
      end,
    --
    p = function (r, ...) print(r.res) end,
    c = function (r, ...) return r:compile(...) end,
    cc = function (r, ...) return copy(r):compile(...) end,
  },
}

rt0 = Re { }
rt  = Re { print = print }
rtp = Re { print = PP }



-- «Re-tests»  (to ".Re-tests")
-- See: (find-es "lpeg" "re-quickref")
--
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

= rtp:c ' { "a" { "b" } } '
= rtp:c ' { "a" { "b" } } '.res
  rtp:c ' { "a" { "b" } } ' :p()
  rtp:c ' { "a" { "b" } } ' :test "ab" 
  rtp:c ' { "a" { "b" } } '       "ab" 

  rt0:c ' { "a" { "b" } } '       "ab" 
= rt0:c ' { "a" { "b" } } '       "ab" 

-- {}               position capture
-- { p }            simple capture
-- {: p :}          anonymous group capture
-- {:name: p :}     named group capture
-- {~ p ~}          substitution capture
-- {| p |}          table capture
-- p -> 'string'    string capture
-- p -> "string"    string capture
-- p -> num         numbered capture
--
rtp:c  '      "a"          "b"        '  'abc'   --> 3
rtp:c  '      "a"          "b"    {}  '  'abc'   --> 3
rtp:c  ' {}   "a"          "b"    {}  '  'abc'   --> 1 3
rtp:c  ' {    "a"          "b"     }  '  'abc'   --> "ab"
rtp:c  ' {|   "a"          "b"    |}  '  'abc'   --> {}
rtp:c  ' {| { "a"          "b"  } |}  '  'abc'   --> {1="ab"}
rtp:c  ' {| { "a" } {      "b"  } |}  '  'abc'   --> {1="a", 2="b"}
rtp:c  ' {| { "a" } {:     "b" :} |}  '  'abc'   --> {1="a", 2="b"}
rtp:c  ' {| { "a" } {:foo: "b" :} |}  '  'abc'   --> {1="a", "foo"="b"}
rtp:c  '            {:foo: "a" :}     '  'abc'   --> 2
rtp:c  ' {|         {:foo: "a" :} |}  '  'abc'   --> {"foo"="a"}
rtp:c  ' {          {:foo: "a" :}  }  '  'abc'   --> "a"

rtp:c  ' {  { "a" }        "b"     }  '  'abc'   --> "ab" "a"
rtp:c  ' {  { "a" } {      "b"  }  }  '  'abc'   --> "ab" "a" "b"
rtp:c  ' {  { "a" } {|     "b" |}  }  '  'abc'   --> "ab" "a" "b"
rtp:c  ' {  { "a" } {|    {"b"}|}  }  '  'abc'   --> "ab" "a" {1="b"}
rtp:c  ' {  { "a" } {|    {"b"}|}  }  '  'abc'   --> "ab" "a" {1="b"}

rtp:c  ' {| {:foo: "a"       :}   |}  '  'abc'   --> "a"
rtp:c  ' {| {:foo: ""->"bar" :}   |}  '  'abc'   --> {"foo"="bar"}

rtp:c [[ {| { "a" } {:B: "b" :}
            { "c" } {:D: "d" :} |} ]]   'abcd'   --> {1="a", 2="c", "B"="b", "D"="d"}

rtp:c  ' {~ "a"->"AA" "b" "c"->"AA" ~} ' 'abc'   --> "AAbCC"

rtp:c  ' ( ""->"a" ""->"b" ""->"c" ""->"d" ) -> 3 ' 'abcd'   --> "c"

-- p -> name        function/query/string capture equivalent to p / defs[name]
-- p => name        match-time capture equivalent to lpeg.Cmt(p, defs[name])
-- p ~> name        fold capture equivalent to lpeg.Cf(p, defs[name])

-- (find-es "lpeg" "re-quickref")
-- "string"         literal string
-- [class]          character class
-- p *              zero or more repetitions
-- p +              one or more repetitions
--  name             non terminal
-- p1 p2            concatenation
-- p1 / p2          ordered choice
--  ( p )            grouping
-- { p }            simple capture
-- (name <- p)+     grammar
-- p -> name        function/query/string capture equivalent to p / defs[name]


--]==]




-- «grammars»  (to ".grammars")
--
rtq_grammar = "\n\n" .. [=[
  number     <- { [-+]? [0-9]+ ( "." [0-9]+ )? }
  quotedlit  <- { '"' [^"]* '"' }
  dquotedlit <- { "'" [^']* "'" }
  word       <- { [!-~]+ }
  --
  Number     <- {| ""->"Number"     number                  |}
  QuotedLit  <- {| ""->"QuotedLit" (quotedlit / dquotedlit) |}
  ]=]

rtq = Re { print = PP, grammar = rtq_grammar }

--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

rtq:c ' top <-              Number    ' :p()
rtq:c ' top <-              Number    ' '42'
rtq:c ' top <-    ""->"Foo" Number    ' '42foo'
rtq:c ' top <- {| ""->"Foo" Number |} ' '42foo'

--]]






-- «preproc_u»  (to ".preproc_u")
--
und = function (A) A[0] = A.o; return UndTree.from(A) end
Re.__index.defs.u = function (A) return und(A) end

preproc_u0 = function (cstr)
    local fmt2 = '{| {:o: ""->"%s" :} %s |} -> u '
    local fmt1 =                  '{| %s |} -> u '
    local a,b  = cstr:match("^{%.u%.([!-~]+)%.(.+)}$")
    if a then return format(fmt2, a, preproc_u(b)) end
    local   b  = cstr:match("^{%.u%.(.+)}$")
    if b then return format(fmt1,    preproc_u(b)) end
  end
preproc_u = function (str)
    return (str:gsub("(%b{})", preproc_u0))
  end

rtu = Re { preproc = preproc_u }

-- «und»  (to ".und")
-- See: (find-angg "LUA/Rect.lua" "UndTree-tests")
--
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

= und {o=22, 33, 44}
= und       {33, 44}
= und {o=22, 33,           {44, 55}}
= und {o=22, 33,     {o=66, 44, 55}}
= und {o=22, 33, und {o=66, 44, 55}}

PPPV(und {o=22, 33,     {o=66, 44, 55}})
PPPV(und {o=22, 33, und {o=66, 44, 55}})

= preproc_u 'foo {.u.bletch     } bar'
= preproc_u 'foo {.u.     bletch} bar'
= preproc_u 'foo {.u.pip. bletch} bar'

rtu:c ' {.u.                "a"     "b"             } ' :p()
rtu:c ' {.u.foo.            "a"     "b"             } ' :p()
rtu:c ' {.u.foo.            "a"     "b"             } ' 'ab'
rtu:c ' {.u.foo.          { "a" } { "b" }           } ' 'ab'
rtu:c ' {.u.              { "a" } { "b" }           } ' 'ab'
rtu:c ' {.u.     {.u.     { "a" } { "b" } } { "c" } } ' 'abc'
rtu:c ' {.u.foo. {.u.bar. { "a" } { "b" } } { "c" } } ' 'abc'

--]]





-- «arit1»  (to ".arit1")
-- Adapted from: (find-angg "LUA/lpeg-minitut.lua")
--          See: (find-angg "LUA/Rect.lua" "SynTree-tests")
--
syntreeg = function (...)
    local A = {...}
    local S = SynTree {[0] = A[2]}
    for i=1,#A,2 do table.insert(S, A[i]) end
    return S
  end

-- "pat -> f" runs packcaptures on the captures of pat.
-- packcaptures (...) runs packcaptures0(...).
-- packcaptures0(...) can run either
--   packcaptures_pars(...) or
--   packcaptures_tree(...); use
-- usepars() and usetree() to select which.
--
packcaptures = function (...)
    local A={...}
    return #A==1 and A[1] or packcaptures0(...)
  end
Re.__index.defs.f = packcaptures

packcaptures_pars = function (...) return "("..table.concat({...}, " ")..")" end
packcaptures_tree = function (...) return syntreeg(...) end
usepars = function () packcaptures0 = packcaptures_pars end
usetree = function () packcaptures0 = packcaptures_tree end

usetree()

test_arit1 = [[
  e  <- e3
  e3 <- (e2 ({"+"} e2)*) -> f
  e2 <- (e1 ({"*"} e1)*) -> f
  e1 <- (e0 ({"^"} e0)*) -> f
  e0 <- "(" e3 ")" / {[0-9]+}
]]

--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

= syntreeg(1, "+", 2)
= syntreeg(1, "+", 2, "*", 3)
= syntreeg(1, "+", 2, "*", syntreeg(1, "/", 2))
= syntreeg(1, "+", 2, "*", syntreeg(1, "/", syntreeg(4)))

usepars(); rt :c(test_arit1) "1*2+3^4*5^6+7^8"
usetree(); rtu:c(test_arit1) "1*2+3^4*5^6+7^8"

--]]



-- «arit2»  (to ".arit2")
--
test_arit2 = [[
  top <- e3
  number <- {[0-9]+}
  e3 <- {.u.+.  e2 ({"+"} e2)* }
  e2 <- {.u.*.  e1 ({"*"} e1)* }
  e1 <- {.u.^.  e0 ({"^"} e0)* }
  e0 <- {.u.(). {"("} e3 {")"} } / {.u.n. number }
]]

--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

rtu:c(test_arit2) :p()
rtu:c(test_arit2) '11+20'
rtu:c(test_arit2) '1*2+3^4*5^6+7^8'
rtu:c(test_arit2) '1+(2*3^4+5)'

--]]

-- «arit2-output»  (to ".arit2-output")
--[[

> = rtu:c(test_arit2) '1+(2*3^4+5)'
1  +  (  2  *  3  ^  4  +  5  )
-        -     -     -     -
n        n     n     n     n
-        -     ╰─────╯     -
^        ^     ^           ^
-        ╰───────────╯     -
*        *                 *
         ╰─────────────────╯
         +
      ╰───────────────────────╯
      ()
      ╰───────────────────────╯
      ^
      ╰───────────────────────╯
      *
╰─────────────────────────────╯
+
> 

--]]




-- «right»  (to ".right")

--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

test_right = [[
  top <- AS
    A <- {"a"}
   AT <- A (! A)
   AS <- {.u. A (AT / AS) }
]]

= rtu:c(test_right) 'aaaa'

test_left = [[
  top <- AS
    A <- {"a"}
   AI <- A
   AS <- {.u. A (AT / AS) }
]]

= rtu:c(test_right) 'aaaa'

--]==]



--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re.lua"

gram = [=[
  stuff   <- {~ ( wordsf / special / . ) * ~}
  wordsf  <- {~ ("" -> "\textsf{") word ("" -> "}")~}
  word    <- {~  ([A-Za-z0-9]+ / ('_' -> '\_')) +  ~}
  special <- ( '[' / ']' / '.' ) -> specials
]=]

specials = {
  ["["] = "<<",
  ["]"] = ">>",
  ["."] = "\\,",
}

rtg = Re { print = PP, grammar = gram, defs = {specials = specials} }
rtg:c 'top <- word'   'foo_bar0plic bletch'
rtg:c 'top <- wordsf' 'foo_bar0plic bletch'
rtg:c 'top <- stuff'  'foo_bar0plic bletch'
rtg:c 'top <- stuff'  'foo_bar0plic.[bletch]'


--]==]






-- Local Variables:
-- coding:  utf-8-unix
-- End: