Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
#!/usr/bin/env lua5.1 -- redirect.lua: discover where a URL redirects to. -- Used by: (find-angg ".emacs" "ee-read-redirect") -- See: (find-angg "LUA/lua50init.lua" "url_split") -- -- This file: -- http://angg.twu.net/LUA/redirect.lua -- http://angg.twu.net/LUA/redirect.lua.html -- (find-angg "LUA/redirect.lua") -- -- By: Eduardo Ochs <eduardoochs@gmail.com> -- Version: 2015nov19 -- Public domain. require "re" -- (find-es "lua5" "lpeg-re") -- 2024feb21: -- (find-blogme3 "cruft-jan2024.lua") ee_dofile "~/blogme3/cruft-jan2024.lua" youtube_redirect0 = function (u) return "https://www.youtube.com" .. u.q.u end youtube_redirect = function (u) return u.host:match "youtu" and u.path == "attribution_link" and youtube_redirect0(u) end google_redirect0 = function (u) return u.q.url end google_redirect = function (u) return u.host:match "google" and u.path == "url" and google_redirect0(u) end facebook_redirect0 = function (u) return u.q.u end facebook_redirect = function (u) return (u.host == "l.facebook.com" or u.host == "www.facebook.com") and u.path == "l.php" and facebook_redirect0(u) end redirect_substring_0 = function (u) return facebook_redirect(u) or google_redirect(u) or youtube_redirect(u) end redirect_substring = function (url) local u = url_split(url) return u and redirect_substring_0(u) end redirect_wget_0 = function (url) local options = "--max-redirect=1 -O /dev/null" local fmt = "wget %s '%s' 2>&1 | grep '^Location:' | awk '{print $2}'" return format(fmt, options, url) end redirect_wget_1 = function (url) return getoutput(redirect_wget_0(url)) end redirect_wget = function (url) local output = redirect_wget_1(url) local lines = splitlines(output) for i=#lines,1,-1 do if not lines[i]:match("^http") then table.remove(lines, i) end end return lines[#lines] end redirect = function (url) return redirect_substring(url) or redirect_wget(url) end -- process script arguments local url = ... if url then local red = redirect(url) if red then print(red) end end UrlSplit = Class { type = "UrlSplit", new = function (url) return UrlSplit {url=url} end, __tostring = tos_VTable, __index = { makeparts_re = re.compile [=[ {| {:scheme: [a-z]+ :} "://" {:host: [^/]+ :} ( "/" {:path: [^?#]* :} ) ? ( "?" {:query: [^#]* :} ) ? ( "#" {:anchor: [^#]* :} ) ? |} ]=], makeparts = function (us) us.parts = us.makeparts_re:match(us.url) if us.parts then us.parts = VTable(us.parts) end return us end, -- percent_decode = function (us, str) local f = function (hh) return string.char(tonumber(hh, 16)) end return (str:gsub("%%(%x%x)", f)) end, makepquery = function (us) if us.parts and us.parts.query then local Q = VTable {} for _,kv in ipairs(split(us.parts.query, "([^&]+)")) do local k,v = kv:match("([^=]*)=(.*)") local u = us:percent_decode(v) Q[k] = u end us.pquery = Q us.pqueryu = us.pquery.u end return us end, }, } --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "redirect.lua" this_line = "https://l.facebook.com/l.php?u=https%3A%2F%2Fbit.ly%2F2UKH8vh%3Ffbclid%3DIwAR0K53JzvTaZTUMdODNURV91z5xUven39M9Yohq0QdaqcTV4FF3HiXaEQWA&h=AT1kBgqF6oV63KWgO291BhGSI4stbLWRRkiz8fWcvkL2rdMgxTqty49IKt7bxQpsnEivkDpSiRtRsO1LeqHN-4ZD0TCxD0se9ET6no23iPdqIiOeDTvtUDnvkw&__tn__=-UK-y-R&c[0]=AT0KTZRUVyf-k0SklQBTr1YR7pxqH04UaxUFV7didzZFifa91qghZiNn5s2jk2CuQ9skT-9geGppP8hc4wlkqcN7dSqZQNNO71y0kdCdY7L1jq_JbFfe7e--LPMwerz4eDOzUa11aMEKRb5pryxlP48sHs7Ipqiy0XpdyjZ3NEuvQPPj9Fw5s0oab6TyTrP_LF0wW9Q0gqYOFhwlI97o3sSq4I7KJviNX6tU" us = UrlSplit.new(this_line) = us:makeparts() = us = us.parts = us.parts.query = us:makepquery() = us.pquery = us.pquery.u = us.pqueryu us = UrlSplit.new(this_line):makeparts():makepquery() = us.pqueryu fname = "~/TODO" fblines = {} for _,li in ipairs(splitlines(ee_readfile(fname))) do if li:match("l.facebook.com/l.php") then us = UrlSplit.new(li):makeparts():makepquery() if us.pqueryu then -- print(us.pqueryu) table.insert(fblines, us.pqueryu) else print(li) end end end PPV(fblines) url = fblines[4] = url us = UrlSplit.new(url):makeparts():makepquery() = us pquerykeys = function (url) local us = UrlSplit.new(url):makeparts():makepquery() return keys(us.pquery) end url = fblines[4] PPV(pquerykeys(url)) s = SetL.new() for _,url in ipairs(fblines) do local ks = pquerykeys(url) PP(ks) for _,k in ipairs(ks) do s:add(k) end end = s = s:ksc() trivialkeys = [=[ ab_channel ad-keywords ad_id ad_name adset_id adset_name campaign_id cmpid fbclid utm_campaign utm_content utm_medium utm_source utm_term ]=] trivialkeys = split(trivialkeys) trivialkeys = SetL.fromarray(trivialkeys) = trivialkeys:ksc() = (s - trivialkeys):ksc() --]] -- Tests --[[ * (eepitch-lua51) * (eepitch-kill) * (eepitch-lua51) dofile "redirect.lua" r = function (url) print(redirect(url)) end rr = function (url) printf("%s", redirect_wget_1(url)) end -- Tests: redirect_substring r "https://l.facebook.com/l.php?u=http%3A%2F%2Fwww.pragmatismopolitico.com.br%2F2015%2F11%2Fdocumentario-por-dentro-do-estado-islamico.html&h=GAQE_HFwE" r "https://l.facebook.com/l.php?u=http%3A%2F%2Fyoutu.be%2FKtmV7-f_j7Y&h=KAQGSKy0E" r "https://l.facebook.com/l.php?u=https%3A%2F%2Ftruthout.org%2Farticles%2Ffacebook-has-begun-purging-accounts-tied-to-anti-fascist-groups" r "https://www.google.com.br/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CB4QyCkwAGoVChMIztmKrZibyQIVDIGQCh3OZAO6&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DXGAy4KObjNE&usg=AFQjCNE-zWEymngBPEdEX0oSmWTdV98LAQ&sig2=vY1WNmt5yDI9hiMHW0Ljew&bvm=bv.107763241,d.Y2I" r "https://www.google.com.br/url?sa=t&rct=j&q=&esrc=s&source=web&cd=2&cad=rja&uact=8&ved=0CCMQtwIwAWoVChMIztmKrZibyQIVDIGQCh3OZAO6&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DeNHDG-2w1xo&usg=AFQjCNFPl7l-ErtRSjsjWNtDJTGNcvxeag&sig2=ODOPfOK07fF5ELfBtA1UuA&bvm=bv.107763241,d.Y2I" r "https://www.youtube.com/attribution_link?a=xjhRozk_ia8&u=%2Fwatch%3Fv%3DRVcpuKIinf8%26feature%3Dplayer_embedded" -- Tests: redirect_wget rr "http://dlvr.it/ChmKzf" r "http://dlvr.it/ChmKzf" rr "http://glo.bo/1H9s38Y" r "http://glo.bo/1H9s38Y" rr "http://bit.ly/1T0OuyD" r "http://bit.ly/1T0OuyD" -- Tests: redirect_wget & paywall (I don't know ho to handle this) rr "http://nyti.ms/1MUWKPQ" r "http://nyti.ms/1MUWKPQ" -- Test: urls that do not redirect rr "http://angg.twu.net/" r "http://angg.twu.net/" -- Not yet: -- https://www.google.com/amp/s/www1.folha.uol.com.br/amp/cotidiano/2019/12/neopentecostais-armados-atormentam-minorias-religiosas-brasileiras.shtml -> --> http://archive.md/https://www1.folha.uol.com.br/cotidiano/2019/12/neopentecostais-armados-atormentam-minorias-religiosas-brasileiras.shtml -- http://l.facebook.com/l/LAQEv5dNkAQFsqBezog5tCQDEj__EmkC8hhzMY7bRYpjazA/sipse.com/mexico/tuve-gran-aceptacion-en-el-congreso-primera-regidora-transgenero-195653.html -- http://googleweblight.com/?lite_url=http%3A%2F%2Fm.sao-paulo.estadao.com.br%2Fnoticias%2Fgeral%2Cbabas-top-de-linha-sao-nova-opcao-nas-ferias-imp-%2C812746&ei=dBcCR7Yz&lc=pt-BR&s=1&m=909&host=www.google.com.br&ts=1457997896&sig=APY536xScDN3rxLgJO_BhWT0u5fs9ZfhhQ -- https://www.facebook.com/n/?email%2Fufi%2Fclick&action=like&target=737381759731108&hash=AVIqbInFZXOsHhu4&aref=1458151249562098&medium=email&mid=52e4026f80d98G5ad25c62G52e2e4b4249f2G96G7627&bcode=1.1458229199.Abmx19fFxp2BMAd3&n_m=eduardoochs%40gmail.com&lloc=email_ufi_like&sig_t=1458229199&sig=AVIDlVC5hDJ4MQHB -- -> https://www.facebook.com/groups/692407277561890/permalink/737381759731108/ -- http://www.nytimes.com/glogin?URI=http%3A%2F%2Fwww.nytimes.com%2F2011%2F11%2F26%2Fworld%2Famericas%2Fsilas-malafaia-tv-evangelist-rises-in-brazils-culture-wars.html%3F_r%3D1 -> ? -- hhttps://www.youtube.com/attribution_link?a=mOXlHogL-aA&u=%2Fwatch%3Fv%3DiMLFa4Xcw4w%26feature%3Dshare --> https://www.youtube.com/watch?v=iMLFa4Xcw4w&feature=share -- https://www.google.com/url?q=http://fossil-scm.org/home/doc/trunk/www/quickstart.wiki&sa=D&source=hangouts&ust=1580427302732000&usg=AFQjCNF--1cdmwtLMiZMfiUV4ZLfPc91dA --]] --[[ * (eepitch-shell) * (eepitch-kill) * (eepitch-shell) ~/LUA/redirect.lua foobar ~/LUA/redirect.lua http://glo.bo/1H9s38Y --]] -- Local Variables: -- coding: raw-text-unix -- End: