-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathtextalyze.rb
More file actions
104 lines (81 loc) · 2.1 KB
/
textalyze.rb
File metadata and controls
104 lines (81 loc) · 2.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
def textalyze(text, options = {})
format = options.fetch(:format) { :frequency }
characters = chars_in(sanitize(text))
if format == :count
# Return the raw count for each character
counts = item_counts(characters)
format_counts(counts)
elsif format == :frequency
# Return the frequency percentage for each character
freq_counts = frequencies(characters)
format_frequencies(freq_counts)
else
fail "Format #{format} not recognized."
end
end
def frequencies(array)
total_count = array.count
counts = item_counts(array)
frequencies = counts.map do |item, count|
frequency = (count / total_count.to_f).round(4)
[item, frequency]
end
frequencies.to_h
end
def item_counts(array)
counts = Hash.new(0)
array.each do |item|
counts[item] += 1
end
counts
end
def chars_in(string)
string.chars
end
def sanitize(string)
string.downcase.gsub(/[^a-z0-9]/, "")
end
def sorted(stats)
stats.sort_by { |item, _stat| item.to_s }
end
def to_percent(freq)
freq_percent = (freq * 100).round(2)
freq_percent.to_s.rjust(5) + "%"
end
def screen_width
if system "which tput 1>/dev/null"
# If tput is available, use it
`tput cols`.to_i
else
# Otherwise, just return a default width of 80 chars
80
end
end
def histogram_bar(percent, width, offset)
"#" * (percent * (width - offset))
end
def format_counts(counts)
sorted(counts).map do |item, count|
"#{item.inspect} - #{count}"
end.join("\n")
end
def format_frequencies(frequencies)
max = frequencies.values.max
sorted(frequencies).map do |item, freq|
item_info = "#{item} [#{to_percent(freq)}] "
percent_of_screen = freq / max
item_info + histogram_bar(percent_of_screen, screen_width, item_info.length)
end.join("\n")
end
if __FILE__ == $PROGRAM_NAME
if ARGV.empty?
puts "Please supply a text file to analyze."
puts ""
puts "Example:"
puts "$ ruby #{__FILE__} ./sample_data/moby-dick.txt"
exit 1
end
source_file = ARGV.first
puts "The counts for #{source_file} are..."
puts textalyze(File.read(source_file), :format => :frequency)
end