-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstorage-statistics.lua
More file actions
executable file
·282 lines (239 loc) · 8.55 KB
/
storage-statistics.lua
File metadata and controls
executable file
·282 lines (239 loc) · 8.55 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/usr/bin/env luajit
-- Primarily written by ChatGPT using GPT-3.5, with fixes/mods by Tangent Fox.
-- Do whatever the hell you want with it.
-- Previously only tested on Windows.
-- Now should work on any system, even if LuaFileSystem isn't installed!
local path_separator = package.config:sub(1, 1)
local success, lfs = pcall(function() return require "lfs" end)
if not success then
math.randomseed(os.time())
local system
if path_separator == "\\" then
system = {
temp = "C:\\Windows\\Temp\\",
list = "dir /w /b",
}
else
system = {
temp = "/tmp/",
list = "ls -1a",
}
end
lfs = {
dir = function(path)
local file_name = system.temp .. math.random()
os.execute(system.list .. " \"" .. path .. "\" > \"" .. file_name .. "\"")
local file = io.open(file_name, "r")
local output = file:read("*all")
file:close()
os.execute("rm \"" .. file_name .. "\"")
return output:gmatch("[^\r\n]+")
end,
attributes = function(path)
local file = io.open(path, "r")
if file then
local _, error_message = file:read(1) -- defaults to reading a whole line, so we read 1 byte instead
file:close()
if error_message == "Is a directory" then
return { mode = "directory" }
end
return { mode = "file" }
else
return { mode = "directory" }
end
end,
}
end
-- Function to get the filesize of a given file
function get_filesize(filepath)
local file = io.open(filepath, "rb")
if file then
local size = file:seek("end")
file:close()
return size
else
return nil
end
end
-- Function to recursively traverse directories and get file sizes
function traverse_directory(path)
local total_size = 0
local total_files = 0
local file_sizes = {}
for entry in lfs.dir(path) do
if entry ~= "." and entry ~= ".." then
local full_path = path .. path_separator .. entry
local attributes = lfs.attributes(full_path)
if attributes and attributes.mode == "file" then
local size = get_filesize(full_path)
if size then
print(full_path, size, "bytes")
table.insert(file_sizes, size)
total_size = total_size + size
total_files = total_files + 1
else
print(full_path, "File not found or inaccessible")
end
elseif attributes and attributes.mode == "directory" then
local subdir_total_size, subdir_total_files, subdir_file_sizes = traverse_directory(full_path)
total_size = total_size + subdir_total_size
total_files = total_files + subdir_total_files
while #subdir_file_sizes > 0 do
table.insert(file_sizes, table.remove(subdir_file_sizes))
end
end
end
end
return total_size, total_files, file_sizes
end
-- Function to calculate evenly spaced percentiles
function calculate_percentiles(data, num_percentiles)
local result = {}
table.sort(data)
for i = 1, num_percentiles do
local p = (i - 1) / (num_percentiles - 1) * 100
local index = math.ceil(#data * p / 100)
if index == 0 then index = 1 end
result[i] = data[index]
end
return result
end
-- Function to print percentiles table returned from calculate_percentiles
function print_percentiles(percentiles)
for i, value in pairs(percentiles) do
local p = (i - 1) / (#percentiles - 1) * 100
if p == 50 then
print(p .. "th percentile (median):", value, "bytes")
else
print(p .. "th percentile:", value, "bytes")
end
end
end
-- Function to calculate mode
function calculate_mode(data)
local freq_map = {}
local max_freq = 0
local modes = {}
for _, value in ipairs(data) do
freq_map[value] = (freq_map[value] or 0) + 1
if freq_map[value] > max_freq then
max_freq = freq_map[value]
end
end
if max_freq == 1 then
return modes, max_freq -- no mode
end
for value, freq in pairs(freq_map) do
if freq == max_freq then
table.insert(modes, value)
end
end
table.sort(modes)
return modes, max_freq
end
-- Function to print mode results
function print_mode_results(modes, max_freq)
if #modes == 0 then
print("No mode found.")
elseif #modes == 1 then
print("Mode:", modes[1], "bytes")
else
print("Multiple modes:")
for i, mode in ipairs(modes) do
print("Mode " .. i .. ":", mode, "bytes")
end
end
print("Frequency:", max_freq)
end
-- Function to calculate standard deviation
function calculate_standard_deviation(data)
local n = #data
local sum = 0
local sum_of_squared_deviations = 0
if n < 1 then
return 0 -- Standard deviation is undefined for small sample sizes
end
-- Calculate mean
for _, value in ipairs(data) do
sum = sum + value
end
local mean = sum / n
-- Calculate sum of squared deviations
for _, value in ipairs(data) do
local deviation = value - mean
sum_of_squared_deviations = sum_of_squared_deviations + deviation^2
end
-- Calculate standard deviation
local variance = sum_of_squared_deviations / (n - 1)
local standard_deviation = math.sqrt(variance)
return standard_deviation
end
-- Function to calculate a histogram
function calculate_histogram(data, num_bins)
local histogram = {}
local min_value = math.min(unpack(data))
local max_value = math.max(unpack(data))
local bin_width = (max_value - min_value) / num_bins
for i = 1, num_bins do
local bin_start = min_value + (i - 1) * bin_width
local bin_end = bin_start + bin_width
histogram[i] = {bin_start, bin_end, 0}
end
for _, value in ipairs(data) do
local bin_index = math.floor((value - min_value) / bin_width) + 1
if bin_index <= num_bins then
histogram[bin_index][3] = histogram[bin_index][3] + 1
else
-- the largest file always calculates to an nth + 1 bin
histogram[num_bins][3] = histogram[num_bins][3] + 1
end
end
return histogram
end
-- Function to print histogram results
function print_histogram(histogram)
for i, bin in ipairs(histogram) do
local bin_start, bin_end, count = unpack(bin)
print(string.format("%.2f - %.2f:", bin_start, bin_end), count, "files")
end
end
-- Function to print histogram results with logarithmic scaling and aligned graphical representation
function print_histogram_graphical(histogram, graph_width)
local max_count = 0
-- Find the maximum count to determine the scale
for _, bin in ipairs(histogram) do
local count = bin[3]
if count > max_count then
max_count = count
end
end
local max_log_scaled = math.log(max_count + 1) -- Add 1 to avoid log(0)
-- Print the histogram with graphical representation and aligned text data
for _, bin in ipairs(histogram) do
local bin_start, bin_end, count = unpack(bin)
local log_scaled_count = math.log(count + 1) -- Add 1 to avoid log(0)
local scaled_width = math.floor((log_scaled_count / max_log_scaled) * graph_width) -- Adjust the width as needed
local bar = string.rep("#", scaled_width)
local empty_spaces = string.rep(" ", graph_width - scaled_width) -- Add empty spaces for alignment
print(string.format("[%s%s] %.2f - %.2f: %d files", bar, empty_spaces, bin_start, bin_end, count))
end
end
local root_directory = "." -- bodge to work in-place
local total_size, total_files, total_file_sizes = traverse_directory(root_directory)
if total_files > 0 then
print("")
print(total_files, "files found.")
local average_size = total_size / total_files
print("Average (mean) file size:", average_size, "bytes")
local standard_deviation = calculate_standard_deviation(total_file_sizes)
print("Standard deviation:", standard_deviation)
local mode_results, max_freq = calculate_mode(total_file_sizes)
print_mode_results(mode_results, max_freq)
local bin_size = math.ceil(math.sqrt(total_files)) -- Square Root Rule
local histogram_results = calculate_histogram(total_file_sizes, bin_size)
print_histogram_graphical(histogram_results, 40)
local percentiles = calculate_percentiles(total_file_sizes, 11)
print_percentiles(percentiles)
else
print("No files found.")
end