Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
#!/usr/bin/env lua -- This file: -- http://angg.twu.net/fbcache/urls.lua -- http://angg.twu.net/fbcache/urls.lua.html -- (find-angg "fbcache/urls.lua") -- 2014sep07 -- (defun u () (interactive) (find-angg-upload-links "fbcache/" "urls.lua")) -- (defun c (id) (interactive "sId: ") (find-fbcacheluafile id)) -- (defun pru () (interactive) (find-angg "fbcache/urls.lua" "prubyks")) -- -- (find-lua51manualw3m "") -- (find-books "__comp/__comp.el" "ierusalimschy") -- (find-pil2page 8 "Contents") -- (find-pil2text 8 "Contents") -- «.basic» (to "basic") -- «.curl» (to "curl") -- «.pypp» (to "pypp") -- «.pypp-cache» (to "pypp-cache") -- «.lua-cache» (to "lua-cache") -- «.translations0» (to "translations0") -- «.urls-by-kind» (to "urls-by-kind") -- «.prubyks» (to "prubyks") -- «.testids» (to "testids") -- «.all-fb-urls» (to "all-fb-urls") -- «.url-to-ids» (to "url-to-ids") -- «.wget» (to "wget") -- «.examples» (to "examples") -- Dependencies: -- rocks (for penlight) -- posix (find-es "lua5" "lua-posix-wheezy") -- lpeg: re.lua (find-es "lua-intro" "lpeg-re-1") -- penlight: pretty, wrap (find-es "lua5" "penlight") -- ____ _ -- | __ ) __ _ ___(_) ___ -- | _ \ / _` / __| |/ __| -- | |_) | (_| \__ \ | (__ -- |____/ \__,_|___/_|\___| -- -- «basic» (to ".basic") -- Some functions copied from my LUA_INIT file. -- (This is an attempt to make this script self-contained). -- (find-angg "LUA/lua50init.lua") format = string.format write = io.write printf = function (...) write(format(...)) end readfile = function (fname) local f = assert(io.open(fname, "r")) local bigstr = f:read("*a") f:close() return bigstr end writefile = function (fname, bigstr) local f = assert(io.open(fname, "w+")) f:write(bigstr) f:close() end readfile_or_nil = function (fname) local ok,contents = pcall(function () return readfile(fname) end) if ok then return contents end end file_exists = function (fname) return readfile_or_nil(fname) end -- quick hack cfmt = function (fmt) return function (...) return format(fmt, ...) end end ee_fmt = function (fmt) return cfmt(ee_expand(fmt)) end require "re" require "posix" userocks() -- (find-angg "LUA/lua50init.lua" "userocks") pretty = require 'pl.pretty' -- (find-es "lua5" "pl.pretty") pp0 = function (o) return pretty.write(o) end pp = function (o) print(pretty.write(o)) end ee_readfile_pp = function (fname) return expr(ee_readfile(fname)) end ee_writefile_pp = function (fname, o) ee_writefile(fname, pp0(o)) end -- (find-es "lua5" "pl.pretty-fix") longquote = function (str) local T = {} local f = function (eqs) T[#eqs+1] = 1 end if str:gsub("%](=*)", f) then local eqs = string.rep("=", #T) return '['..eqs..'[\n'..str..']'..eqs..']' end return '[[\n'..str..']]' end -- (find-es "lua5" "pl.text.wrap") wrap = (require "pl.text").wrap wraps = function (bigstr) local f = function (li) return table.concat(wrap(li), "\n").."\n" end return (bigstr:gsub("([^\n]+)", f)) end -- (find-angg "LUA/lua50init.lua" "ee_ls") no_dots = function (L) for i=#L,1,-1 do if L[i]=="." or L[i]==".." then table.remove(L, i) end end return L end ee_ls = function (dir) return (posix.dir(ee_expand(dir))) end ee_ls_no_dots = function (dir) return sorted(no_dots(ee_ls(dir))) end require "re" repat0 = "" redefs = { concat = table.concat } recomp = function (pat) return re.compile(pat..repat0, redefs) end retest = function (p) return function (s) print(re.match(s, recomp(p))) end end -- _ -- ___ _ _ _ __| | -- / __| | | | '__| | -- | (__| |_| | | | | -- \___|\__,_|_| |_| -- -- «curl» (to ".curl") -- (find-angg ".pythonrc.py") -- https://developers.facebook.com/tools/explorer token = "CAACEdEose0cBAGL96HTCZBl3DgHTwZAWcxYr9lGr5sQ06A2IZAGZBbqhotSWeK9sLrQGWC0ovqjGqr2jxhjtEvScfWfw7YZBn9ZArgkBFuQOHRMZCYetsWqZBUlGdh1XwHe2EO9u0T18RQVDvcEjhJK2uDUSQ0bD0k17FV2cZCoZC4BFPkarDWkh62cROVF2BbqP80CiPb7CFi4otcUxiLPD22FeYVWE4k414ZD" fbcurl = function (query) local c = query:match"%?" and "&" or "?" local url = "https://graph.facebook.com/v2.2/".. query .. c .. "access_token=" .. token local cmd = format("curl -s '%s'", url) return getoutput(cmd) end testid = function (id) print(fbcurl(id):sub(1,500)) end --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" pp(curl_ls()) pp(pypp_ls()) for _,id in ipairs(pypp_ls()) do json = fbcurl(id) print(id..": "..#json.." bytes") ee_writefile(id_to_curlf(id), json) end -- (find-fline "~/fbcache/cache_by_id/") -- (find-fline "~/fbcache/cache_by_id_lua/") -- (find-fline "~/fbcache/cache_by_id_curl/") --]==] --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" = lpeg.version() retest [=[ cs <- { c* } c <- { [a-z] } ]=] "bop" --]==] -- -- _ __ _ _ _ __ _ __ -- | '_ \| | | | '_ \| '_ \ -- | |_) | |_| | |_) | |_) | -- | .__/ \__, | .__/| .__/ -- |_| |___/|_| |_| -- -- «pypp» (to ".pypp") -- A "pypp object" is "Python pretty-printed object", i.e., a a string -- produced by Python's "pprint.pprint". We have to handle these -- objects because we use facebook-sdk (in Python) to talk to -- Facebook; facebook-sdk returns dict objects, that we then save into -- files. -- -- The main function here is pypp_parse, that converts pypp objects -- into Lua tables. -- -- Note that fetching information from Facebook is very slow and -- error-prone - Facebook posts can be deleted or set to restricted, -- our access token may expire, our internet connection may be -- failing, and so on - so we want to keep a cache with the returned -- objects, as pypp objects (one for each Facebook id). Reading pypp -- objects is also relatively slow (up to 1/100 second for each), so -- we want to keep a cache of "luapp objects" for speed; a luapp -- object is a string containg a pretty-printed version of a Lua -- table. -- (find-angg "fbcache/p.py" "basic") -- (find-es "python" "facebook-sdk") -- (find-es "lua-intro" "lpeg-re-infix-1") -- file:///usr/share/doc/lua-lpeg-dev/lpeg.html -- file:///usr/share/doc/lua-lpeg-dev/re.html#ex -- http://www.inf.puc-rio.br/~roberto/lpeg/ -- http://www.inf.puc-rio.br/~roberto/lpeg/re.html -- (find-file "~/fbcache/cache_by_id/141539566016667") pypp_grammar0 = [=[ objrest <- obj {} [%s]* {.*} obj <- bool / num / str / table / list objp <- obj colon obj table <- ("{" (objp (comma objp)*)* "}") -> totable list <- ("[" (obj (comma obj )*)* "]") -> tolist colon <- [%s]* ":" [%s]* comma <- [%s]* "," [%s]* bool <- ("True" / "False") -> tobool num <- ("-"? [0-9.]+) -> tonum str <- (ustr / str0) ustr <- "u" str0 str0 <- ("'" (stritem / {'"'})* -> concat "'") / ('"' (stritem / {"'"})* -> concat '"') stritem <- strcnormals / strcc / strcx / strcu / strcU / strcother strcnormals <- {[^'"\]+} strcc <- "\" {['"\]} strcx <- "\x" ({[0-9a-f][0-9a-f]} -> hextoc) strcu <- { "\u" [0-9a-f][0-9a-f][0-9a-f][0-9a-f] } strcU <- { "\U" [%x][%x][%x][%x][%x][%x][%x][%x] } strcother <- "\" . -> otherc ]=] pypp_defs = { tobool = function (s) return s == "True" and true or false end, tonum = function (s) return tonumber(s) end, tostr = function (s) return s end, tolist = function (...) return {...} end, totable = function (...) local L, T = {...}, {} for i=1,#L-1,2 do T[L[i]] = L[i+1] end return T end, hextoc = function (cc) return string.char(tonumber(cc, 16)) end, concat = function (...) return table.concat {...} end, otherc = function (c) if c == "n" then return "\n" end if c == "r" then return "\r" end if c == "t" then return "\t" end print("\\"..c) return "\\"..c end, } pypp_grammar = re.compile(pypp_grammar0, pypp_defs) pypp_parse = function (bigstr, pos) return pypp_grammar:match(bigstr, pos) end pypp_test = function (bigstr, pos) pp(pypp_parse(bigstr, pos)) end -- _ -- _ __ _ _ _ __ _ __ ___ __ _ ___| |__ ___ -- | '_ \| | | | '_ \| '_ \ / __/ _` |/ __| '_ \ / _ \ -- | |_) | |_| | |_) | |_) | | (_| (_| | (__| | | | __/ -- | .__/ \__, | .__/| .__/ \___\__,_|\___|_| |_|\___| -- |_| |___/|_| |_| -- -- «pypp-cache» (to ".pypp-cache") -- -- Conversions: -- readfile parse -- id ---> pyppf ----------> pypp -------> obj ---> sexp -- obj ---> date -- obj ---> txt -- -- (find-fline "~/fbcache/cache_by_id/") -- (find-fline "~/fbcache/cache_by_id_lua/") pypp_dir = "~/fbcache/cache_by_id/" curl_dir = "~/fbcache/cache_by_id_curl/" luapp_dir = "~/fbcache/cache_by_id_lua/" id_to_pyppf = ee_fmt "~/fbcache/cache_by_id/%s" id_to_curlf = ee_fmt "~/fbcache/cache_by_id_curl/%s" id_to_luappf = ee_fmt "~/fbcache/cache_by_id_lua/%s" pypp_ls = function () return ee_ls_no_dots(pypp_dir) end curl_ls = function () return ee_ls_no_dots(curl_dir) end luapp_ls = function () return ee_ls_no_dots(luapp_dir) end pypp_fname = function (id, e) return pypp_dir..id..(e or "") end pypp_readfile = function (id, e) return ee_readfile(pypp_fname(id)) end pypp_id_obj = function (id, p) return pypp_parse(pypp_readfile(id), p) end pypp_id_sexp = function (id) return format('(find-fline "%s%s")', luapp_dir, id) end obj_date = function (o) return o and (o.created_time or o.updated_time) end pypp_obj_txt = function (o) local fmt = "From: $FROM\nDate: $DATE\n$URL\n $SEXP\n\n$BODY" local T = { FROM = o.from and o.from.name or "(no FROM)", DATE = obj_date(o) or "(no DATE)", URL = o.link or "(no URL)", SEXP = pypp_id_sexp(o.id), BODY = wraps(o.caption or o.name or o.message or "(no NAME or MESSAGE)") } local f = function (name) return T[name] or error(name.." is nil") end return (fmt:gsub("%$([A-Z]+)", f)) end pypp_id_txt = function (id) return pypp_obj_txt(pypp_id_obj(id)) end -- _ _ -- | | _ _ __ _ ___ __ _ ___| |__ ___ -- | | | | | |/ _` | / __/ _` |/ __| '_ \ / _ \ -- | |__| |_| | (_| | | (_| (_| | (__| | | | __/ -- |_____\__,_|\__,_| \___\__,_|\___|_| |_|\___| -- -- «lua-cache» (to ".lua-cache") id_objs = {} write_lua_cache = function (verbose) for _,id in ipairs(pypp_ls()) do if verbose then print(id) end -- local o = pypp_readfile(id) local o = pypp_parse(ee_readfile(id_to_pyppf(id))) ee_writefile_pp(id_to_luappf(id), o) end end test_lua_cache = function () read_lua_cache("verbose") end read_lua_cache = function (verbose) ids = luapp_ls() id_objs = {} for _,id in ipairs(ids) do if verbose then print(id) end id_objs[id] = ee_readfile_pp(id_to_luappf(id)) end end --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" write_lua_cache("verbose") test_lua_cache() read_lua_cache() * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" read_lua_cache() = table.concat(ids, "\n") pp(id_objs[ids[1]]) -- (find-fline "~/fbcache/") -- (find-fline "~/fbcache/cache_by_id/") -- (find-fline "~/fbcache/cache_by_id_lua/") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" for _,id in ipairs(pypp_ls()) do print(id) print(pypp_id_txt(id)) print("\n--------------\n") end hugestr = mapconcat(pypp_id_txt, pypp_ls(), "\n--------------\n") writefile("huge.txt", hugestr) # (find-fline "~/fbcache/cache_by_id/") # (find-fline "~/fbcache/huge.txt") # (find-sh "grep ^From: ~/fbcache/huge.txt") # (find-sh "grep ^From: ~/fbcache/huge.txt | sort") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cd ~/fbcache/ Scp-np huge.txt $TWUP/fbcache/ Scp-np huge.txt $TWUS/fbcache/ # http://angg.twu.net/fbcache/huge.txt --]==] -- _ _ _ -- | |_ _ __ __ _ _ __ ___| | __ _| |_ ___ _ __ ___ -- | __| '__/ _` | '_ \/ __| |/ _` | __/ _ \| '__/ __| -- | |_| | | (_| | | | \__ \ | (_| | || (_) | | \__ \ -- \__|_| \__,_|_| |_|___/_|\__,_|\__\___/|_| |___/ -- -- «translations0» (to ".translations0") -- Idea: Facebook URLs are split into "parts" by fb_url_to_iparts, -- like this: ... -- /permalink.php?story_fbid=N&set=OR pesfs_L_L 0 -- /W/photos/W/WR photos_L_L_L 3 -- (find-angg "fbcache/urls.lua" "translations0" " /W/photos/W/WR ") -- /W/media_set?set=OR media0_L_L 0 -- Python version: (find-angg "fbcache/p.py" "translations0") fb_translations0 = [[ /events/N/permalink/N/R evpe_L_L 21_ok /events/N/N/R ev2_L_L 21_ok /events/N/R ev1_L 1_ok /groups/N/permalink/N/R grpe_L_L 21_ok /groups/W/permalink/N/R grpe_L_L 0_nothingworks /groups/N/NR groups_L_L 1_whydoesnt2work /groups/W/NR groups_L_L 2_ok /groups/NR groups_L 1_ok /groups/WR groups_L 0_how_to_convert_name->id? /W/media_set?set=a.N.N.NR media4_L_L_L 0_nothingworks /media/set/?set=OR media_L 0_nothingworks /notes/W/W/NR notes_L_L_L 0_3shouldworkbutdeprecated /notes/N/R notes_L 0_1shouldworkbutdeprecated /pages/W/NR pages_L_L 2_ok /permalink.php?story_fbid=N&id=NR pesfi_L_L 12_ok /photo.php?fbid=N&set=t.NR phofst_L_t.L 0_nothingworks /photo.php?fbid=N&set=a.N.N.NR phofsa_L_a.L.L.L 0_nothingworks /photo.php?fbid=N&set=OR phofs_L_L 1 /N/photos/t.N/N/R photos_L_t.L_L 13 /N/photos/gm.N/N/R photos_L_gm.L_L 123 /N/photos/a.N.N.N/N/R photos_L_a.L.L.L_L 1245 /photo.php?v=N&set=OR photovs_L_L 1 /photo.php?v=NR photov_L 1 /W/posts/NR posts_L_L 2 /video.php?v=NR video_L 1 ]] fb_chars_to_tbl = function (str) local tbl = {} for char,rgx in str:gmatch("(%S):(%S*)") do tbl[char] = rgx end return tbl end fb_char_to_re = fb_chars_to_tbl( " W:([^/]+) N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? ") fb_char_to_fmt = fb_chars_to_tbl( " W:%s N:%s O:%s R: L:%s .:. ?:? ") fb_abc_to_tr = function (a, b, c) local f_re = function (str) return str:gsub("[WNORL?.]", fb_char_to_re) end local f_fmt = function (str) return str:gsub("[WNORL?.]", fb_char_to_fmt) end local f = function (str) return { orig = str, re = "^"..f_re(str).."$", fmt = f_fmt(str), } end ha = "https://www.facebook.com"..a return { u = f(ha), f = f(b), etc = c} end fb_translations1 = {} for a,b,c in fb_translations0:gmatch("(%S+)[ \t]+(%S+)[ \t]+(%S+)") do table.insert(fb_translations1, fb_abc_to_tr(a, b, c)) end --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" pp(fb_translations1) --]==] fb_url_to_iparts = function (url) for i,tr in ipairs(fb_translations1) do -- local a,b,c,d,e = url:match(tr[1][1]) local a,b,c,d,e = url:match(tr.u.re) if a then return i,{a,b,c,d,e} end end end -- fb_u1toc = function (url1) -- for i,entry in ipairs(fb_translations) do -- local a,b,c,d,e = url1:match(entry.upat) -- if a then return format(entry.cfmt, a, b, c, d, e) end -- end -- end -- fb_ctou1 = function (cfname) -- for i,entry in ipairs(fb_translations) do -- local a,b,c,d,e = cfname:match(entry.cpat) -- if a then return format(entry.ufmt, a, b, c, d, e) end -- end -- end -- _ _ _ _ _ -- _ _ _ __| |___ | |__ _ _ | | _(_)_ __ __| | -- | | | | '__| / __| | '_ \| | | | | |/ / | '_ \ / _` | -- | |_| | | | \__ \ | |_) | |_| | | <| | | | | (_| | -- \__,_|_| |_|___/ |_.__/ \__, | |_|\_\_|_| |_|\__,_| -- |___/ -- -- «urls-by-kind» (to ".urls-by-kind") fb_urls_by_kind = function (bigstr) urls_by_kind = {} for i=0,#fb_translations1 do urls_by_kind[i] = {} end for url in bigstr:gmatch("(%S+)") do local i,parts = fb_url_to_iparts(url) table.insert(urls_by_kind[i or 0], url) end return urls_by_kind end -- ubyk = fb_urls_by_kind(readfile("/tmp/o0")) prubyk = function (kind, n) if kind > 0 then local tr = fb_translations1[kind] printf(' (find-angg "fbcache/urls.lua" "translations0" " %s " "%s")\n', tr.u.orig:sub(25), tr.etc) end for i,url in ipairs(ubyk[kind]) do print(kind.." "..url) if i == n then break end end end prubyks = function (n, a, b) for i=(a or 1),(b or #fb_translations1) do print(); prubyk(i, n) end end --[==[ -- «prubyks» (to ".prubyks") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) * (eek "M-o <<wrap>> M-o") dofile "urls.lua" ubyk = fb_urls_by_kind(readfile("all-fb-urls.lst")) prubyks(4, 17) prubyks(4) # «testids» (to ".testids") * (eepitch-python) * (eepitch-kill) * (eepitch-python) print "https://www.facebook.com/events/1433035430297888/permalink/1433056046962493/" testid("1433035430297888") testid("1433056046962493") testid("531549197") # (find-angg "fbcache/urls.lua" "translations0" " /W/media_set?set=a.N.N.NR ") # https://www.facebook.com/alexsander.lepletier/media_set?set=a.10151933894649198.1073741851.531549197 # (find-fbc-links "a.10151933894649198.1073741851.531549197") o = ggo( "531549197") o = ggo( "1073741851") o = ggo( "1073741851.531549197") o = ggo( "10151933894649198") o = ggo( "10151933894649198.1073741851") o = ggo( "10151933894649198.1073741851.531549197") o = ggo("a.10151933894649198") o = ggo("a.10151933894649198.1073741851") o = ggo("a.10151933894649198.1073741851.531549197") # (find-angg "fbcache/urls.lua" "translations0" " /media/set/?set=OR " "0") # https://www.facebook.com/media/set/?set=a.10202222226170686.1073741844.1040842293 # (find-fbc-links "a.10202222226170686.1073741844.1040842293") o = ggo( "1040842293") o = ggo( "1073741844") o = ggo( "1073741844.1040842293") o = ggo( "10202222226170686") o = ggo( "10202222226170686.1073741844") o = ggo( "10202222226170686.1073741844.1040842293") o = ggo("a.10202222226170686") o = ggo("a.10202222226170686.1073741844") o = ggo("a.10202222226170686.1073741844.1040842293") # (find-angg "fbcache/urls.lua" "translations0" " /photo.php?fbid=N&set=t.NR ") # https://www.facebook.com/photo.php?fbid=10151256245188625&set=t.100002233344951 o = ggo("10151256245188625") o = ggo("t.100002233344951") o = ggo( "100002233344951") # (find-angg "fbcache/urls.lua" "translations0" " /photo.php?fbid=N&set=a.N.N.NR ") # https://www.facebook.com/photo.php?fbid=10151900172318321&set=a.10150633280098321.376945.508548320 o = ggo( "508548320") o = ggo( "376945") o = ggo( "376945.508548320") o = ggo( "10150633280098321") o = ggo( "10150633280098321.376945") o = ggo( "10150633280098321.376945.508548320") o = ggo("a.10150633280098321") o = ggo("a.10150633280098321.376945") o = ggo("a.10150633280098321.376945.508548320") * (eepitch-python) * (eepitch-kill) * (eepitch-python) * (find-3ee '(eepitch-lua51) '(eepitch-python)) # https://www.facebook.com/events/1397247553878845/permalink/1412951502308450/ o = ggo("1397247553878845") o = ggo("1412951502308450") o = ggo("267999310067525") o = ggo("ReflexivaTransparente/permalink/267999310067525") o = ggo("ReflexivaTransparente.267999310067525") o = ggo("105487286280461/240851649410690") type(o) o.keys() o['description'] o = ggo("1412951502308450") --]==] pyhead = [=[ * (eepitch-python) * (eepitch-kill) * (eepitch-python) execfile("p.py", globals()) ]=] all_ids = function (bigstr, f) if not f then print(pyhead) end url_ids = {} for url in bigstr:gmatch("(%S+)") do if not f then print("# "..url) end local i,parts = fb_url_to_iparts(url) if i then local digits = fb_translations1[i].etc:match("^[1-9]+") if digits then for d in digits:gmatch(".") do local id = parts[0 + d] table.insert(url_ids, id) if f then f(id) else printf('pp(fi("%s"))\n', id) end end end end end return url_ids end --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) * (eek "M-o <<wrap>> M-o") dofile "urls.lua" all_ids(ee_readfile "/tmp/o1") print(pyhead) all_ids(ee_readfile "/tmp/o1", function (id) print('fc("'..id..'")') end) all_ids(ee_readfile "/tmp/o0", function (id) print('fc("'..id..'")') end) * * (eek "M-o M-< 2*<down>") * (eepitch-python) * (eepitch-kill) * (eepitch-python) execfile("p.py", globals()) execfile("/tmp/o2", globals()) -- (find-fline "/tmp/o1") -- (find-file "~/fbcache/cache_by_id/") --]==] -- digits = etc:match("^([1-9]*)") -- if #digits > 0 then -- local parts = {a, b, c, d, e} -- return parts[0 + digits[1]] -- end -- _ _ _ _____ ____ _ -- / \ | | | | ___| __ ) _ _ _ __| |___ -- / _ \ | | | | |_ | _ \ | | | | '__| / __| -- / ___ \| | | | _| | |_) | | |_| | | | \__ \ -- /_/ \_\_|_| |_| |____/ \__,_|_| |_|___/ -- -- «all-fb-urls» (to ".all-fb-urls") --[==[ * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cat ~/TODO \ ~/ORG/index.org \ ~/TH/ee.blogme \ ~/TH/2014-xs.blogme \ ~/TH/links-sobre-gaza.blogme \ | grep https://www.facebook.com/ \ | tr ' ' '\n' \ | grep https://www.facebook.com/ \ | sort | uniq \ | tee /tmp/o0 laf /tmp/o0 ~/fbcache/all-fb-urls.lst cp -v /tmp/o0 ~/fbcache/all-fb-urls.lst # (find-fline "/tmp/o0") # (find-fline "~/fbcache/all-fb-urls.lst") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) * (eek "M-o <<wrap>> M-o") dofile "urls.lua" ubyk = fb_urls_by_kind(readfile("all-fb-urls.lst")) prubyks(4) prubyk(0) * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" ubyk = fb_urls_by_kind(readfile("/tmp/o0")) for i=0,#ubyk do print(i,#ubyk[i]) end prubyks(4) pp(fb_translations1[1]) prubyk(0) prubyk(0, 10) prubyk(1) prubyk(1, 10) --]==] -- _ _ _ _ -- _ _ _ __| | | |_ ___ (_) __| |___ -- | | | | '__| | | __/ _ \ | |/ _` / __| -- | |_| | | | | | || (_) | | | (_| \__ \ -- \__,_|_| |_| \__\___/ |_|\__,_|___/ -- -- «url-to-ids» (to ".url-to-ids") url_to_ids = function (url) local ids = {} local i,parts = fb_url_to_iparts(url) if i then local digits = fb_translations1[i].etc:match("^[1-9]+") if digits then for d in digits:gmatch(".") do local id = parts[0 + d] table.insert(ids, id) end return ids end end end fb_urls = {} fb_urls_file = "~/fbcache/all-fb-urls.lst" read_fb_urls = function () fb_urls = split(ee_readfile(fb_urls_file)) end -- (find-fline "~/fbcache/all-fb-urls.lst") id_urls = {} read_id_urls = function (verbose) for _,url in ipairs(fb_urls) do if verbose then print(url) end for _,id in ipairs(url_to_ids(url) or {}) do if verbose then print(" "..id) end id_urls[id] = id_urls[id] or {} table.insert(id_urls[id], url) end end end id_obj_txt0 = function (id) return pypp_obj_txt(id_objs[id]) end id_obj_txt1 = function (id) local urls = mapconcat(cfmt "%s\n", id_urls[id] or {}) return urls..id_obj_txt0(id) end id_obj_txt2 = function (id) return cfmt "\171%s\187\n" (id) .. id_obj_txt1(id) end id_objs_txt = function (ids) return mapconcat(id_obj_txt2, ids, "\n\n---------------\n") end id_date = function (id) local o = id_objs[id] local date = obj_date(o) or " "..id return date end ids_sorted_by_date = function () local lt = function (a, b) return id_date(a) <= id_date(b) end return sorted(shallowcopy(ids), lt) end raw_text_unix = "\n\n\n"..[[ -- Local Variables: -- coding: raw-text-unix -- End: ]] write_huge_txt = function () local ids = ids_sorted_by_date() ee_writefile("huge.txt", id_objs_txt(ids)..raw_text_unix) end -- all_fb_urls0 = function () return ee_readfile("~/fbcache/all-fb-urls.lst") end -- all_fb_urls1 = function () return split(all_fb_urls0()) end -- gen_all_fb_urls = function () return all_fb_urls0():gmatch("%S+") end --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" read_fb_urls() all_fb_ids = Set.new() -- (find-angg "LUA/lua50init.lua" "Set") for _,url in ipairs(fb_urls) do for _,id in ipairs(url_to_ids(url) or {}) do all_fb_ids._[id] = id end end = all_fb_ids python_ids = Set.from(pypp_ls()) = python_ids missing_ids = all_fb_ids - python_ids = missing_ids = missing_ids:ksc() = pyhead..mapconcat(cfmt 'pp(fi("%s"))\n', missing_ids:ks()) --]==] --[==[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" read_fb_urls() read_id_urls() read_lua_cache() write_huge_txt() -- (find-fline "huge.txt") -- (find-angg "fbcache/huge.txt") -- (find-angg "fbcache/huge.txt" "10152622815229198") pp(id_urls["701795806545331"]) * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) laf huge.txt lua5.1 ~/blogme3/blogme3.lua -o huge.txt.html -a2html huge.txt cd ~/fbcache/ Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/slow_html/fbcache/ Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/public_html/fbcache/ http://angg.twu.net/fbcache/huge.txt.html = id_obj_txt2(ids[1]) = id_obj_txt2(ids[1])..id_obj_txt2(ids[2]) = cfmt "\187" "a" = cfmt "\171%s\187" "a" -- for _,id in ipairs(ids) do print(id, id_date(id)) end for _,id in ipairs(ids_sorted_by_date()) do print(id.." "..id_date(id)) end = id_date "701795806545331" = mapconcat(cfmt " %s\n", id_urls["701795806545331"] or {}) id = "701795806545331" pp(id_objs[id]) = id_obj_txt(id) pp(id_urls[id]) = = id_obj_txt0(id) id = "10151841953998086" pypp_id_txt = pypp_obj_txtfunction (o) all_fb_urls = all_fb_urls1() url = "https://www.facebook.com/usauncut/photos/a.190167221017767.44131.186219261412563/823206274380522/?type=1" url = "https://www.facebook.com/video.php?v=1009188935762835" pp(url_to_ids(url)) id_urls = {} for url in = #all_fb_urls get_id_urls = function id_urls = {} for url in id_urls[id] = id_urls[id] or {} table.insert(id_urls[id], url) pp(url_to_ids) gen_all_fb_urls --]==] -- _ -- __ ____ _ ___| |_ -- \ \ /\ / / _` |/ _ \ __| -- \ V V / (_| | __/ |_ -- \_/\_/ \__, |\___|\__| -- |___/ -- -- «wget» (to ".wget") fb_linesplit0 = function (line) local pat = "^(.-)(https?://)([!-.0-~]*)([!-~]*)(.*)$" local pre,prot,site,path,rest = line:match(pat) return pre,prot,site,path,rest end fb_goodpre = function (pre) return pre and (pre == "" or pre:sub(-1):match"[ \t]") end fb_linesplit1 = function (line) local pre,prot,site,path,rest = fb_linesplit0(line) if fb_goodpre(pre) and site:match "facebook" then local cfname = fb_u1toc(path) return pre,prot,site,path,rest,cfname end end fb_linesplit = function (line) local pre,prot,site,path,rest = fb_linesplit0(line) if fb_goodpre(pre) and site:match "facebook" then local cfname = fb_u1toc(path) if cfname then print(cfname) end -- if not cfname then print(path) end end end myerror = function (fmt, ...) printf(fmt.."\n", ...); os.exit(2) end myerror = function (fmt, ...) printf(fmt.."\n", ...); error() end fb_utof = function (url) local pre,prot,site,path,rest,fname = fb_linesplit1(url) if not fname then myerror("No FB URL in: %s", line) end return fname end fb_user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) ".. "Gecko/20070310 Iceweasel/2.0.0.3 (Debian-2.0.0.3-1)" fb_wget = function (fname, url) local A = {UA=fb_user_agent, FNAME=fname, URL=url} local a = function (str) return (str:gsub("%$([A-Z]+)", A)) end local cmd = a "wget --progress=dot -U '$UA' -O $FNAME '$URL'" getoutput(cmd) end fb_wget_if_needed = function (url) local fname = fb_utof(url)..".wget" if not file_exists(fname) then fb_wget(fname, url) end end fb_cache = function (url) return readfile(fb_utof(url)) end -- (find-ydb "ydb" "dooptions") dooptions = function (optionname, a, b) if optionname == "-utof" then local pre,prot,site,path,rest,cfname = fb_linesplit1(a) if cfname then print(cfname) else myerror("No FB URL in: %s", a) end elseif optionname == "-utot" then local url = a local id = url_to_ids(url)[1] local o = ee_readfile_pp(id_to_luappf(id)) id_urls[id] = {url} id_objs[id] = o print(id_obj_txt1(id)..raw_text_unix) elseif optionname == "-itot" then -- experimental local id = a local o = ee_readfile_pp(id_to_luappf(id)) id_urls[id] = {} id_objs[id] = o print(id_obj_txt1(id)..raw_text_unix) elseif optionname == "-win" then fb_wget_if_needed(a) elseif optionname == "-wins" then local bigstr = readfile(a) for url in bigstr:gmatch "([^\n]+)" do print(url) pcall(function () fb_wget_if_needed(url) end) end end end dooptions(...) -- myerror "Not implemented" --[[ * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) cd /tmp/ U=https://www.facebook.com/gustavo.gindre/posts/515590031857632 lua5.1 ~/fbcache/urls.lua -utot $U --]] --[[ -- (find-sh "grep facebook ~/TODO | sort") -- (find-sh "grep facebook ~/TODO | grep photo.php | sort") -- (find-sh "grep facebook ~/TODO | grep posts | sort") -- (find-sh "grep facebook ~/TODO | grep -v photo.php | grep -v posts | sort") # «examples» (to ".examples") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) export LUA_INIT= lua51 dofile "urls.lua" dooptions("-utof", "https://www.facebook.com/christian.fischgold/posts/713430788717068") dooptions("-ftou", "posts_FIPRJ_487608268040090.wget") dooptions("-wins", "urls.lst") * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) -- (find-fline "~/fbcache/urls.lst") bigstr = ee_readfile "~/fbcache/urls.lst" for li in splitlines(bigstr) do end * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) mkdir ~/fbcache/tmp/ cd ~/fbcache/tmp/ URL=https://www.facebook.com/sergio.martins.984991/posts/10152616093738086 # Convert a URL to a filename: ~/fbcache/urls.lua -utof $URL # outputs # posts_sergio.martins.984991_10152616093738086 # Run wget if needed: ~/fbcache/urls.lua -win $URL # as the file "posts_sergio.martins.984991_10152616093738086.wget" # does not exist yet, this runs wget on the URL with output to # "posts_sergio.martins.984991_10152616093738086.wget". # Run wget if needed: ~/fbcache/urls.lua -win $URL # We are running this a second time - # "posts_sergio.martins.984991_10152616093738086.wget" exists, so this # does nothing. ~/fbcache/urls.lua -wins ../urls.lst # Bad news: running this yields no output! python ~/usrc/code/python/edu/fbtxt.py pos* # (find-fline "~/usrc/code/python/edu/") # (find-fline "~/usrc/code/python/edu/fbtxt.py") # (find-fline "~/fbcache/tmp/") # (find-fline "~/usrc/") # (find-oilercodefile "python/edu/") * (eepitch-shell2) * (eepitch-kill) * (eepitch-shell2) cd ~/fbcache/tmp/ tar -cvzf /tmp/posts-wget.tgz * laf /tmp/posts-wget.tgz # Old stuff: * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "urls.lua" bigstr = getoutput "grep facebook ~/TODO | sort" bigstr:gsub("([^\n]+)", fb_linesplit) fb_linesplit2 = function (line) local pre,prot,site,path,rest = fb_linesplit0(line) if fb_goodpre(pre) and site:match "facebook" then local cfname = fb_u1toc(path) if cfname then print(cfname) print(prot..site..path) print() end end end bigstr:gsub("([^\n]+)", fb_linesplit2) bigstr = [=[ https://www.facebook.com/sergio.martins.984991/posts/10152616093738086 https://www.facebook.com/jornalanovademocracia/photos/a.288492381220437.66632.187051701364506/679809862088685/ https://www.facebook.com/permalink.php?story_fbid=921476867869306&id=347772661906399 https://www.facebook.com/photo.php?fbid=10201336092313990&set=a.1569106477271.73917.1523735650 ]=] bigstr:gsub("([^\n]+)", fb_linesplit) bigstr:gsub("([^\n]+)", linesplit1) bigstr:gsub("([^\n]+)", linesplit2) = bigstr:gsub("([^\n]+)", url_to_cache) PP(fbdictionary) = #bigstr -- = bigstr:gsub("([^\n]+)", "_%1_") -- = bigstr:gsub("([^\n]+)", linesplit1) s = "/photo.php?fbid=664059120317353&set=a.140340059355931.27893.100001398127964&type=1" = s:match "^/photo%.php%?fbid=([0-9]+)&set=([^/&?]+)(.*)" A = {} bigstr:gsub("([^\n]+)", function (li) A[#A+1] = linesplit1(li) end) table.sort(A) print(table.concat(A, "\n")) * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) u="/W/photos/W/WR" T = {W="([^/]+)", R="(.*)"} = u:gsub("[WR]", T) local P = t" N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? " # Tau's translation to Python: # (find-faceutilsfile "url.py") # (find-angg "faceutils/README") * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) --]] -- Local Variables: -- coding: raw-text-unix -- End: