|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
-- This file:
-- http://anggtwu.net/LUA/Re2.lua.html
-- http://anggtwu.net/LUA/Re2.lua
-- (find-angg "LUA/Re2.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
--
-- This file implements a :pm(...) "method" for re.lua that lets us
-- compare the syntax of Lua patterns, lpeg, and re. See this test
-- block:
-- (find-angg "LUA/ELpeg1.lua" "lpeg.pm-tests")
--
-- The long story
-- ==============
-- In 2012 I had a project that needed a "precedence parser" that
-- could parse arithmetical expressions. At that point I (still)
-- couldn't wrap my mind around "pure" lpeg, so I tried to learn
-- re.lua instead, and I wrote this class:
-- (find-angg "LUA/Re.lua")
-- (find-angg "LUA/lua50init.lua" "Re")
-- (find-angg "LUA/lua50init.lua" "re_expand_INFIX")
-- (find-angg "LUA/lua50init.lua" "math-grammar")
-- that allowed me to use a preprocessor on patterns for re.lua. Using
-- that was very clumsy, though.
--
-- In 2022 I tried to learn lpegrex. It was much more powerful that
-- re.lua, but after a while I realized that it had the same defects
-- as re.lua:
--
-- 1. it didn't have some features that I needed,
-- 2. it didn't let us explore the lpeg patterns that it generated
-- from its input string,
-- 3. I couldn't modify parts of its code from a REPL,
-- 4. it was hard to explore, hack, and extend (IMO),
-- 5. its docs weren't very clear (IMO),
-- 6. its developer was too busy to help me =(.
--
-- In 2023 I wrote ELpeg1.lua, that was an attempt to write a
-- hacker-friendly version of (the back-end parts of) re.lua and
-- lpegrex.lua, without the front-end part that parses a grammar given
-- as a string. In dec/2023 I mentioned ELpeg1.lua in my presentation
-- at the EmacsConf2023, and I wrote :pm(...) methods for re.lua and
-- lpegrex.lua to let me compare their input languages. See:
-- (find-angg "LUA/LpegRex3.lua")
--
-- (defun r2 () (interactive) (find-angg "LUA/Re2.lua"))
-- (defun r1 () (interactive) (find-angg "LUA/Re.lua"))
-- (defun rq () (interactive) (find-es "lpeg" "re-quickref"))
-- (find-lpegremanual "")
-- (find-lpegremanual "#ex")
-- «.Re» (to "Re")
-- «.Re-tests» (to "Re-tests")
-- «.basic-1» (to "basic-1")
re = require "re"
-- ____
-- | _ \ ___
-- | |_) / _ \
-- | _ < __/
-- |_| \_\___|
--
-- «Re» (to ".Re")
Re = Class {
type = "Re",
from = function (str) return Re {str=str} end,
__index = {
defs = {},
compile = function (r) return re.compile(r.str, r.defs) end,
find = function (r,subj,init) return re.find(subj, r:compile(), init) end,
match = function (r,subj) return re.match(subj, r:compile()) end,
gsub = function (r,subj,repl) return re.gsub(subj, r:compile(), repl) end,
pm = function (r,subj) return PP(r:match(subj)) end,
},
}
rre = Re.from
-- «Re-tests» (to ".Re-tests")
-- See: (find-angg "LUA/Re.lua" "Re-tests")
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re2.lua"
print(re.find ("the number 423 is odd", "[0-9]+")) --> 12 14
print(re.match("the number 423 is odd", "({%a+} / .)*")) --> the number is odd
print(re.match("the number 423 is odd", "s <- {%d+} / . s")) --> 423
print(re.gsub ("hello World", "[aeiou]", ".")) --> h.ll. W.rld
= Re.from "[0-9]+" :find "the number 423 is odd"
= Re.from "({%a+} / .)*" :match "the number 423 is odd"
= Re.from "s <- {%d+} / . s" :match "the number 423 is odd"
= Re.from "[aeiou]" :gsub ("hello World", ".")
Re.from "({%a+} / .)*" :pm "the number 423 is odd"
Re.from "s <- {%d+} / . s" :pm "the number 423 is odd"
rre "({%a+} / .)*" :pm "the number 423 is odd"
rre "s <- {%d+} / . s" :pm "the number 423 is odd"
rre "[io]" :pm "i42"
rre "{[io]}" :pm "i42"
rre "{[io]} {[0-9]+}" :pm "i42"
rre "'(' {[io]} {[0-9]+} ')'" :pm "(i42) 2+3;"
rre "{ '(' {[io]} {[0-9]+} ')' }" :pm "(i42) 2+3;"
rre "{ '(' {[io]} {[0-9]+} ')' } {.*}" :pm "(i42) 2+3;"
rre "{| '(' {[io]} {[0-9]+} ')' |} {.*}" :pm "(i42) 2+3;"
rre "{| '(' {:a: [io]:} {:b: [0-9]+:} ')' |} {.*}" :pm "(i42) 2+3;"
--]==]
-- (find-lpegremanual "")
-- ( p ) grouping
-- 'string' literal string
-- "string" literal string
-- [class] character class
-- . any character
-- {} position capture
-- { p } simple capture
-- {: p :} anonymous group capture
-- {:name: p :} named group capture
-- {~ p ~} substitution capture
-- {| p |} table capture
-- p ? optional match
-- p * zero or more repetitions
-- p + one or more repetitions
-- p^num exactly n repetitions
-- p^+num at least n repetitions
-- p^-num at most n repetitions
-- p -> 'string' string capture
-- p -> "string" string capture
-- p -> num numbered capture
--
-- «basic-1» (to ".basic-1")
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Re2.lua"
string.pm = function (spat,subj) PP(subj:match(spat)) end
( ".b(.)(d)" ) :pm "abcd"
("(.b(.)(d))") :pm "abcd"
rre " . {.} 'c' {'d'} " :pm "abcd"
rre " { . {.} 'c' {'d'} } " :pm "abcd"
rre " . {.} 'c' {'d'} {} " :pm "abcd"
rre " { . {.} 'c' {'d'} {} } " :pm "abcd"
rre " { 'a' { 'b' } } " :pm "ab"
rre " 'a' 'b' " :pm "abc" --> 3
rre " 'a' 'b' {} " :pm "abc" --> 3
rre " {} 'a' 'b' {} " :pm "abc" --> 1 3
rre " { 'a' 'b' } " :pm "abc" --> 'ab'
rre " {| 'a' 'b' |} " :pm "abc" --> {}
rre " {| { 'a' 'b' } |} " :pm "abc" --> {1='ab'}
rre " {| { 'a' } { 'b' } |} " :pm "abc" --> {1='a', 2='b'}
rre " {| { 'a' } {: 'b' :} |} " :pm "abc" --> {1='a', 2='b'}
rre " {| { 'a' } {:foo: 'b' :} |} " :pm "abc" --> {1='a', 'foo'='b'}
rre " {:foo: 'a' :} " :pm "abc" --> 2
rre " {| {:foo: 'a' :} |} " :pm "abc" --> {'foo'='a'}
rre " { {:foo: 'a' :} } " :pm "abc" --> 'a'
rre " { { 'a' } 'b' } " :pm "abc" --> 'ab' 'a'
rre " { { 'a' } { 'b' } } " :pm "abc" --> 'ab' 'a' 'b'
rre " { { 'a' } {| 'b' |} } " :pm "abc" --> 'ab' 'a' 'b'
rre " { { 'a' } {| {'b'}|} } " :pm "abc" --> 'ab' 'a' {1='b'}
rre " { { 'a' } {| {'b'}|} } " :pm "abc" --> 'ab' 'a' {1='b'}
rre " {| {:foo: 'a' :} |} " :pm "abc" --> 'a'
rre " {| {:foo: ''->'bar' :} |} " :pm "abc" --> {'foo'='bar'}
rre [[ {| { 'a' } {:B: 'b' :}
{ 'c' } {:D: 'd' :} |} ]] :pm "abcd" --> {1='a', 2='c', 'B'='b', 'D'='d'}
rre " {~ 'a'->'AA' 'b' 'c'->'AA' ~} " :pm "abc" --> 'AAbCC'
rre " ( ''->'a' ''->'b' ''->'c' ''->'d' ) -> 3 " :pm "abcd" --> 'c'
--]==]
-- %name pattern defs[name] or a pre-defined pattern
-- name non terminal
-- <name> non terminal
-- Local Variables:
-- coding: utf-8-unix
-- End: