-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patharchive-links.lua
More file actions
executable file
·99 lines (78 loc) · 2.77 KB
/
archive-links.lua
File metadata and controls
executable file
·99 lines (78 loc) · 2.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env luajit
math.randomseed(os.time())
package.path = (arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "lib" .. package.config:sub(1, 1) .. "?.lua;" .. package.path
local utility = require "utility"
local argparse = utility.require("argparse")
local json = utility.require("dkjson")
local parser = argparse():description("Takes a JSON file of links and uses archiving services to preserve them."):help_max_width(80)
parser:argument("json", "The JSON file."):args(1)
local options = parser:parse()
local links = utility.open(options.json, "r", function(file)
return json.decode(file:read("*all"))
end)
local blacklist = { -- these are URLs which are themselves archives
"://web%.archive%.org/web/",
"://archive%.is/",
"://preservetube%.com/watch%?v%=",
"://ghostarchive%.org/archive/",
}
local concurrency, iteration = 4, 0 -- used to slow rate of opening links
local link_count = 0
for _ in pairs(links) do
link_count = link_count + 1
end
local day = os.date("%Y-%m-%d") -- don't repeat archives on the same day
local urls_tried = {} -- URLs to try in Internet Archive after main loop
for url, value in pairs(links) do
local function quick_archive()
if not value then
return
end
if type(value) ~= "table" then
value = {}
links[url] = value
end
if value.blacklisted or value.disabled then
return
end
if value.last_attempt == day then
return
end
for _, fragment in ipairs(blacklist) do
if url:find(fragment) then
print(url:enquote() .. " cannot be archived, marking it as such.")
value.blacklisted = true
return
end
end
if url:find("://youtube.com/watch?v=") then
value.preservetube_export = true
end
if value.preservetube_export then
os.execute("open " .. ("https://preservetube.com/save?url=" .. url):enquote())
else
urls_tried[#urls_tried + 1] = url -- this is so Internet Archive can be started after the initial loop
end
os.execute("open " .. ("https://archive.is/submit/?url=" .. url):enquote())
value.last_attempt = day
iteration = iteration + 1
if iteration % concurrency == 0 then
print(iteration .. "/" .. link_count .. " opened. Press enter to continue.")
io.read("*line")
end
end
quick_archive()
end
-- because Internet Archive is much slower, we save now and don't worry about how well it goes
utility.open(options.json, "w", function(file)
file:write(json.encode(links, { indent = true }))
file:write("\n")
end)
for index, url in ipairs(urls_tried) do
print("Archiving " .. index .. "/" .. #urls_tried .. "...")
local command = "spn.sh -qns -f ./data "
if access_key then
command = command .. "-a " .. access_key:enquote() .. " "
end
os.execute(command .. url)
end