LUA/Interpret.lua (htmlized)

Warning: this is an htmlized version!
The original is here, and
the conversion rules are here.
-- This file:
--   http://angg.twu.net/LUA/Interpret.lua.html
--   http://angg.twu.net/LUA/Interpret.lua
--           (find-angg "LUA/Interpret.lua")
-- Author: Eduardo Ochs <eduardoochs@gmail.com>
--
-- (defun e () (interactive) (find-angg "LUA/Interpret.lua"))
-- (find-angg "SRF/srfx.lua" "tokenisation")
--
-- This is obsolete.
-- The new version is here:
--   (find-angg "SRF/srfa.lua")



-- «.re-grammar»		(to "re-grammar")
-- «.Interpreter»		(to "Interpreter")
-- «.Interpreter-tests»		(to "Interpreter-tests")


--                                                            
--  _ __ ___    __ _ _ __ __ _ _ __ ___  _ __ ___   __ _ _ __ 
-- | '__/ _ \  / _` | '__/ _` | '_ ` _ \| '_ ` _ \ / _` | '__|
-- | | |  __/ | (_| | | | (_| | | | | | | | | | | | (_| | |   
-- |_|  \___|  \__, |_|  \__,_|_| |_| |_|_| |_| |_|\__,_|_|   
--             |___/                                          
--
-- «re-grammar»  (to ".re-grammar")
-- A parser for srfx tokens written as a grammar in lpeg.re

re = require "re"

srf_gram = [=[
  s   <- ' '
  eol <- !.
  eow <- &s / !.
  c   <- [^ ]

  b   <- {:b: {} :}
  e   <- {:e: {} :}

  dqliteral0 <-  '"' {:body: { [^"]* } :} '"'
  sqliteral0 <-  "'" {:body: { [^']* } :} "'"
  decnumber0 <-      {:body: { [-+]? [0-9]+ ('.' [0-9]+)? } :} {:base: "" -> "10" :}
  hexnumber0 <- "0x" {:body: { [0-9A-Fa-f]+ }               :} {:base: "" -> "16" :}
  octnumber0 <-  "o" {:body: { [0-7]+ }                     :} {:base: "" ->  "8" :}
  knownword0 <-      {:body: { c+ } => known :}
  word0      <-      {:body: { c+ }          :}

  strlit1 <- {:kind: "" -> "strlit" :} (dqliteral0 / sqliteral0)
  number1 <- {:kind: "" -> "numlit" :} (decnumber0 / hexnumber0 / octnumber0)
  kword1  <- {:kind: "" -> "word"   :} knownword0
  word1   <- {:kind: "" -> "word"   :} word0
  rest1   <- {:kind: "" -> "word"   :} word0

  token <- {| s* b (strlit1 eow / number1 eow / kword1 eow) e |}

]=]

srf_defs = {
  known = function (subj, pos, body) return true end,
}

srf_reP = Re { grammar = srf_gram, defs = srf_defs, print = PP }
srf_re0 = Re { grammar = srf_gram, defs = srf_defs }
srf_parsetoken = srf_re0:cc "top <- token"
srf_parseword  = srf_re0:cc "top <- token"
srf_parserest  = srf_re0:cc "top <- token"



--  ___       _                           _            
-- |_ _|_ __ | |_ ___ _ __ _ __  _ __ ___| |_ ___ _ __ 
--  | || '_ \| __/ _ \ '__| '_ \| '__/ _ \ __/ _ \ '__|
--  | || | | | ||  __/ |  | |_) | | |  __/ ||  __/ |   
-- |___|_| |_|\__\___|_|  | .__/|_|  \___|\__\___|_|   
--                        |_|                          
--
-- «Interpreter»  (to ".Interpreter")

Interpreter = Class {
  type = "Interpreter",
  new  = function (line, pos)
      return Interpreter {subj=line, pos=pos or 1}
    end,
  __index = {
    --
    -- Low-level parsers that expect patterns.
    setlastparsedtoken = function (terp, o)
        terp.lastparsedtoken = o
        terp.pos = o.e
        return o        
      end,
    parsepat = function (terp, pat, kind)
        local b,body,e = terp.subj:match(pat, terp.pos)
        local o = {b=b, kind=kind, body=body, e=e}
        if b then return terp:setlastparsedtoken(o) end
      end,
    parselpegpat = function (terp, lpegpat)
        local o = lpegpat:match(terp.subj, terp.pos)
        if o then return terp:setlastparsedtoken(o) end
      end,
    --
    -- High-level parsers that use predefined patterns.
    parseword = function (terp)
        local pat = "%s*()(%S+)()"
	return terp:parsepat(pat, "word")
      end,
    parsetoken = function (terp)
        return terp:parselpegpat(srf_parsetoken.rec)
      end,
    --
    -- Functions for testing and debugging.
    carets = function (terp, usecopy, b, e)
        b = b or terp.lastparsedtoken.b
        e = e or terp.lastparsedtoken.e
        local str = usecopy and terp.subj:sub(b, e-1) or ("^"):rep(e-b)
        return (" "):rep(#terp.subj):replace(b-1, str)
      end,
    tokenshort = function (terp) -- short description of the last token
        local o = terp.lastparsedtoken
        local f = function (field, q)
            if not o[field] then return "" end
            return " "..field..":"..(q or "")..o[field]..(q or "")
          end
        return f("kind")..f("body", '"')..f("base")
      end,
    test = function (terp, verbose, method)
        method = method or "parsetoken"
        print(terp.subj)
        local o = terp[method](terp)
        while o do
          print(terp:carets("usecopy")..terp:tokenshort())
          if verbose then PP(o); print() end
          o = terp[method](terp)
        end
      end,
  },
}

-- «Interpreter-tests»  (to ".Interpreter-tests")
--
--[==[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "Interpret.lua"

line = [=[ : 5* 5 * ; 'foo bar' + 0x23 ]=]
Interpreter.new(line):test()
Interpreter.new(line):test(nil, "parseword")
Interpreter.new(line):test("verbose")

line = [=[ : 5* 5 * ; 'foo bar' .. 0x23 ]=]
terp = Interpreter.new(line)
terp:parsetoken()
= terp.subj.."\n"..terp:carets()
= terp.subj.."\n"..terp:carets("useword")

--]==]