|
Warning: this is an htmlized version!
The original is here, and the conversion rules are here. |
#!/usr/bin/env lua5.1
-- redirect.lua: discover where a URL redirects to.
-- Used by: (find-angg ".emacs" "ee-read-redirect")
-- See: (find-angg "LUA/lua50init.lua" "url_split")
--
-- This file:
-- http://angg.twu.net/LUA/redirect.lua
-- http://angg.twu.net/LUA/redirect.lua.html
-- (find-angg "LUA/redirect.lua")
--
-- By: Eduardo Ochs <eduardoochs@gmail.com>
-- Version: 2015nov19
-- Public domain.
require "re" -- (find-es "lua5" "lpeg-re")
-- 2024feb21:
-- (find-blogme3 "cruft-jan2024.lua")
ee_dofile "~/blogme3/cruft-jan2024.lua"
youtube_redirect0 = function (u) return "https://www.youtube.com" .. u.q.u end
youtube_redirect = function (u)
return u.host:match "youtu"
and u.path == "attribution_link"
and youtube_redirect0(u)
end
google_redirect0 = function (u) return u.q.url end
google_redirect = function (u)
return u.host:match "google"
and u.path == "url"
and google_redirect0(u)
end
facebook_redirect0 = function (u) return u.q.u end
facebook_redirect = function (u)
return (u.host == "l.facebook.com" or u.host == "www.facebook.com")
and u.path == "l.php"
and facebook_redirect0(u)
end
redirect_substring_0 = function (u)
return facebook_redirect(u) or google_redirect(u) or youtube_redirect(u)
end
redirect_substring = function (url)
local u = url_split(url)
return u and redirect_substring_0(u)
end
redirect_wget_0 = function (url)
local options = "--max-redirect=1 -O /dev/null"
local fmt = "wget %s '%s' 2>&1 | grep '^Location:' | awk '{print $2}'"
return format(fmt, options, url)
end
redirect_wget_1 = function (url)
return getoutput(redirect_wget_0(url))
end
redirect_wget = function (url)
local output = redirect_wget_1(url)
local lines = splitlines(output)
for i=#lines,1,-1 do
if not lines[i]:match("^http") then table.remove(lines, i) end
end
return lines[#lines]
end
redirect = function (url)
return redirect_substring(url) or redirect_wget(url)
end
-- process script arguments
local url = ...
if url then
local red = redirect(url)
if red then print(red) end
end
UrlSplit = Class {
type = "UrlSplit",
new = function (url) return UrlSplit {url=url} end,
__tostring = tos_VTable,
__index = {
makeparts_re = re.compile [=[
{| {:scheme: [a-z]+ :} "://"
{:host: [^/]+ :}
( "/" {:path: [^?#]* :} ) ?
( "?" {:query: [^#]* :} ) ?
( "#" {:anchor: [^#]* :} ) ?
|}
]=],
makeparts = function (us)
us.parts = us.makeparts_re:match(us.url)
if us.parts then us.parts = VTable(us.parts) end
return us
end,
--
percent_decode = function (us, str)
local f = function (hh) return string.char(tonumber(hh, 16)) end
return (str:gsub("%%(%x%x)", f))
end,
makepquery = function (us)
if us.parts and us.parts.query then
local Q = VTable {}
for _,kv in ipairs(split(us.parts.query, "([^&]+)")) do
local k,v = kv:match("([^=]*)=(.*)")
local u = us:percent_decode(v)
Q[k] = u
end
us.pquery = Q
us.pqueryu = us.pquery.u
end
return us
end,
},
}
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "redirect.lua"
this_line = "https://l.facebook.com/l.php?u=https%3A%2F%2Fbit.ly%2F2UKH8vh%3Ffbclid%3DIwAR0K53JzvTaZTUMdODNURV91z5xUven39M9Yohq0QdaqcTV4FF3HiXaEQWA&h=AT1kBgqF6oV63KWgO291BhGSI4stbLWRRkiz8fWcvkL2rdMgxTqty49IKt7bxQpsnEivkDpSiRtRsO1LeqHN-4ZD0TCxD0se9ET6no23iPdqIiOeDTvtUDnvkw&__tn__=-UK-y-R&c[0]=AT0KTZRUVyf-k0SklQBTr1YR7pxqH04UaxUFV7didzZFifa91qghZiNn5s2jk2CuQ9skT-9geGppP8hc4wlkqcN7dSqZQNNO71y0kdCdY7L1jq_JbFfe7e--LPMwerz4eDOzUa11aMEKRb5pryxlP48sHs7Ipqiy0XpdyjZ3NEuvQPPj9Fw5s0oab6TyTrP_LF0wW9Q0gqYOFhwlI97o3sSq4I7KJviNX6tU"
us = UrlSplit.new(this_line)
= us:makeparts()
= us
= us.parts
= us.parts.query
= us:makepquery()
= us.pquery
= us.pquery.u
= us.pqueryu
us = UrlSplit.new(this_line):makeparts():makepquery()
= us.pqueryu
fname = "~/TODO"
fblines = {}
for _,li in ipairs(splitlines(ee_readfile(fname))) do
if li:match("l.facebook.com/l.php") then
us = UrlSplit.new(li):makeparts():makepquery()
if us.pqueryu
then -- print(us.pqueryu)
table.insert(fblines, us.pqueryu)
else print(li)
end
end
end
PPV(fblines)
url = fblines[4]
= url
us = UrlSplit.new(url):makeparts():makepquery()
= us
pquerykeys = function (url)
local us = UrlSplit.new(url):makeparts():makepquery()
return keys(us.pquery)
end
url = fblines[4]
PPV(pquerykeys(url))
s = SetL.new()
for _,url in ipairs(fblines) do
local ks = pquerykeys(url)
PP(ks)
for _,k in ipairs(ks) do s:add(k) end
end
= s
= s:ksc()
trivialkeys = [=[
ab_channel ad-keywords ad_id ad_name adset_id adset_name campaign_id
cmpid fbclid utm_campaign utm_content utm_medium utm_source utm_term
]=]
trivialkeys = split(trivialkeys)
trivialkeys = SetL.fromarray(trivialkeys)
= trivialkeys:ksc()
= (s - trivialkeys):ksc()
--]]
-- Tests
--[[
* (eepitch-lua51)
* (eepitch-kill)
* (eepitch-lua51)
dofile "redirect.lua"
r = function (url) print(redirect(url)) end
rr = function (url) printf("%s", redirect_wget_1(url)) end
-- Tests: redirect_substring
r "https://l.facebook.com/l.php?u=http%3A%2F%2Fwww.pragmatismopolitico.com.br%2F2015%2F11%2Fdocumentario-por-dentro-do-estado-islamico.html&h=GAQE_HFwE"
r "https://l.facebook.com/l.php?u=http%3A%2F%2Fyoutu.be%2FKtmV7-f_j7Y&h=KAQGSKy0E"
r "https://l.facebook.com/l.php?u=https%3A%2F%2Ftruthout.org%2Farticles%2Ffacebook-has-begun-purging-accounts-tied-to-anti-fascist-groups"
r "https://www.google.com.br/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&uact=8&ved=0CB4QyCkwAGoVChMIztmKrZibyQIVDIGQCh3OZAO6&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DXGAy4KObjNE&usg=AFQjCNE-zWEymngBPEdEX0oSmWTdV98LAQ&sig2=vY1WNmt5yDI9hiMHW0Ljew&bvm=bv.107763241,d.Y2I"
r "https://www.google.com.br/url?sa=t&rct=j&q=&esrc=s&source=web&cd=2&cad=rja&uact=8&ved=0CCMQtwIwAWoVChMIztmKrZibyQIVDIGQCh3OZAO6&url=https%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DeNHDG-2w1xo&usg=AFQjCNFPl7l-ErtRSjsjWNtDJTGNcvxeag&sig2=ODOPfOK07fF5ELfBtA1UuA&bvm=bv.107763241,d.Y2I"
r "https://www.youtube.com/attribution_link?a=xjhRozk_ia8&u=%2Fwatch%3Fv%3DRVcpuKIinf8%26feature%3Dplayer_embedded"
-- Tests: redirect_wget
rr "http://dlvr.it/ChmKzf"
r "http://dlvr.it/ChmKzf"
rr "http://glo.bo/1H9s38Y"
r "http://glo.bo/1H9s38Y"
rr "http://bit.ly/1T0OuyD"
r "http://bit.ly/1T0OuyD"
-- Tests: redirect_wget & paywall (I don't know ho to handle this)
rr "http://nyti.ms/1MUWKPQ"
r "http://nyti.ms/1MUWKPQ"
-- Test: urls that do not redirect
rr "http://angg.twu.net/"
r "http://angg.twu.net/"
-- Not yet:
-- https://www.google.com/amp/s/www1.folha.uol.com.br/amp/cotidiano/2019/12/neopentecostais-armados-atormentam-minorias-religiosas-brasileiras.shtml ->
--> http://archive.md/https://www1.folha.uol.com.br/cotidiano/2019/12/neopentecostais-armados-atormentam-minorias-religiosas-brasileiras.shtml
-- http://l.facebook.com/l/LAQEv5dNkAQFsqBezog5tCQDEj__EmkC8hhzMY7bRYpjazA/sipse.com/mexico/tuve-gran-aceptacion-en-el-congreso-primera-regidora-transgenero-195653.html
-- http://googleweblight.com/?lite_url=http%3A%2F%2Fm.sao-paulo.estadao.com.br%2Fnoticias%2Fgeral%2Cbabas-top-de-linha-sao-nova-opcao-nas-ferias-imp-%2C812746&ei=dBcCR7Yz&lc=pt-BR&s=1&m=909&host=www.google.com.br&ts=1457997896&sig=APY536xScDN3rxLgJO_BhWT0u5fs9ZfhhQ
-- https://www.facebook.com/n/?email%2Fufi%2Fclick&action=like&target=737381759731108&hash=AVIqbInFZXOsHhu4&aref=1458151249562098&medium=email&mid=52e4026f80d98G5ad25c62G52e2e4b4249f2G96G7627&bcode=1.1458229199.Abmx19fFxp2BMAd3&n_m=eduardoochs%40gmail.com&lloc=email_ufi_like&sig_t=1458229199&sig=AVIDlVC5hDJ4MQHB
-- -> https://www.facebook.com/groups/692407277561890/permalink/737381759731108/
-- http://www.nytimes.com/glogin?URI=http%3A%2F%2Fwww.nytimes.com%2F2011%2F11%2F26%2Fworld%2Famericas%2Fsilas-malafaia-tv-evangelist-rises-in-brazils-culture-wars.html%3F_r%3D1 -> ?
-- hhttps://www.youtube.com/attribution_link?a=mOXlHogL-aA&u=%2Fwatch%3Fv%3DiMLFa4Xcw4w%26feature%3Dshare
--> https://www.youtube.com/watch?v=iMLFa4Xcw4w&feature=share
-- https://www.google.com/url?q=http://fossil-scm.org/home/doc/trunk/www/quickstart.wiki&sa=D&source=hangouts&ust=1580427302732000&usg=AFQjCNF--1cdmwtLMiZMfiUV4ZLfPc91dA
--]]
--[[
* (eepitch-shell)
* (eepitch-kill)
* (eepitch-shell)
~/LUA/redirect.lua foobar
~/LUA/redirect.lua http://glo.bo/1H9s38Y
--]]
-- Local Variables:
-- coding: raw-text-unix
-- End: