-- This file:
-- http://anggtwu.net/blogme3/htmlize-utf8.lua.html
-- http://anggtwu.net/blogme3/htmlize-utf8.lua
-- (find-angg "blogme3/htmlize-utf8.lua")
-- Author: Eduardo Ochs
--
-- Until sep/2021 Blogme3 was only able to parse and htmlize files in
-- utf-8 by converting them to unibyte first...
-- Based on:
-- (find-blogme3 "escripts.lua" "anchor")
-- (find-blogme3 "charset.lua" "sgmlify")
loadlpeg()
-- SpecialChar
--
SpecialChars_table = {
["&"] = "&",
["<"] = "<",
[">"] = ">",
[""] = "*",
["«"] = "«",
["»"] = "»",
}
SpecialChar0 = (lpeg.S("&<>") + "«" + "»")
SpecialChar = SpecialChar0 / SpecialChars_table
-- SpecialSeq
--
SpecialSeq_table = {
["•"] = "•",
["|&"] = '|&',
["$S/"] = '$S/',
["<<'%%%'"] = '<<\'%%%\'',
}
SpecialSeq0 = (lpeg.P("|&") + "•" + "$S/" + "<<'%%%'")
SpecialSeq = SpecialSeq0 / SpecialSeq_table
-- Anchor
--
AlphaNumeric = lpeg.R("AZ", "az", "09")
AnchorChar = AlphaNumeric + lpeg.S("!#$%()*+,-./:;=?@^_{|}~")
Anchor0 = ("«" * lpeg.C(AnchorChar^1) * "»")
Anchor = Anchor0
/ '«%1»'
-- Url
--
UrlProtocol = lpeg.P("https") + lpeg.P("http") + lpeg.P("ftp")
UrlDomainChar = lpeg.R("az", "09") + lpeg.S("-")
UrlDomain = UrlDomainChar^1 * ("." * UrlDomainChar^1)^0
UrlPathChar = AlphaNumeric + lpeg.S("!#$%&()*+,-./:;=?@[]^_{|}~")
UrlPath = UrlPathChar^0
Url0 = UrlProtocol * "://" * UrlDomain * "/" * UrlPath
Url = lpeg.C(Url0) / '%1'
-- HtmlizeLeft
--
SpecialLeft = Anchor + SpecialSeq + SpecialChar + Url
SpecialLeft0 = SpecialSeq0 + SpecialChar0 + Url0
NonSpecialLeftChar = (- SpecialLeft0) * lpeg.P(1)
NonSpecialLeftChars = lpeg.C(NonSpecialLeftChar^1)
HtmlizeLeft0 = lpeg.Ct((NonSpecialLeftChars + SpecialLeft)^0)
HtmlizeLeft = HtmlizeLeft0 / table.concat
-- HtmlizeMiddle
--
SpecialMiddle = SpecialChar
NonSpecialMiddleChar = (- SpecialMiddle) * lpeg.P(1)
NonSpecialMiddleChars = lpeg.C(NonSpecialMiddleChar^1)
HtmlizeMiddle0 = lpeg.Ct((NonSpecialMiddleChars + SpecialMiddle)^0)
HtmlizeMiddle = HtmlizeMiddle0 / table.concat
teststr1 = "«foo»"
teststr2 = ""
teststr3 = "http://foo.bar/"
--[[
(eepitch-lua51)
(eepitch-kill)
(eepitch-lua51)
dofile "htmlize-utf8.lua"
= SpecialLeft:match(teststr1)
= SpecialLeft:match(teststr2)
= SpecialLeft:match(teststr3)
= SpecialLeft:match "<<'%%%'"
= SpecialLeft:match "|&"
= NonSpecialLeftChars:match "bla_http://foo.bar/"
= NonSpecialLeftChars:match "bla_&"
= NonSpecialLeftChars:match "http://foo.bar/"
= NonSpecialLeftChars:match "&"
= NonSpecialLeftChars:match "abc"
= HtmlizeLeft:match "ab&cd<> http://foo.bar/ !http://"
--]]
-- (find-blogme3 "anggdefs.lua" "headers")
-- (find-THfile "test-utf8.blogme")
-- (find-blogme3 "options.lua" "htmlizefile")
-- (find-blogme3 "options.lua" "basic-options-sandwich")
-- (find-blogme3 "sandwiches.lua")
-- (find-blogme3 "sandwiches-defs.lua" "use_sand_htmlizeline")
-- (find-blogme3grep "grep --color=auto -nH --null -e htmlizer *.lua")
-- (find-blogme3grep "grep --color=auto -nH --null -e htmlizelines *.lua")
-- (find-blogme3 "escripts.lua" "htmlizeline")
--[[
(eepitch-lua51)
(eepitch-kill)
(eepitch-lua51)
loadblogme3()
-- (find-fline "~/LATEX/dednat6/eoo-unicode.lua")
fname = "~/LATEX/dednat6/eoo-unicode.lua"
outfname = "/tmp/out.html"
(find-sh0 "rm -fv /tmp/out.html")
htmlizefile_utf8(fname, outfname)
-- (find-fline "/tmp/out.html")
-- file:///tmp/out.html
require "sandwiches-defs"
require "htmlize-utf8"
html_dtd = dtd_transitional .. dtd_encoding_utf8
fname = "~/blogme3/htmlize-utf8.lua"
-- (find-fline "~/LATEX/dednat6/eoo-unicode.lua")
fname = "~/LATEX/dednat6/eoo-unicode.lua"
outfname = "/tmp/out.html"
fcontents = ee_readfile(fname)
fnamestem = fnamenondirectory(fname)
warning = htmlization_warning(fname)
head = htmlization_head(fname)
= head
= warning
hl_utf8 = HtmlizeLine {
left = function (hl, str) return HtmlizeLeft :match(str) end,
plain = function (hl, str) return HtmlizeMiddle:match(str) end,
}
htmlizeline = function (linestr) return (hl_utf8:line(linestr)) end
htmlizer = htmlizelines
body = BODY(warning .. PRE(htmlizer(fcontents)))
writefile(outfname, HTML(head .. body))
writefile(outfname, html_dtd .. HTML(head .. body))
-- (find-fline "/tmp/out.html")
-- file:///tmp/out.html
--]]
-- Local Variables:
-- coding: utf-8-unix
-- End: