-
Notifications
You must be signed in to change notification settings - Fork 11
/
data_generator.rb
101 lines (89 loc) · 2.46 KB
/
data_generator.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# encoding: utf-8
require 'csv'
require 'json'
module Datanames
module Data
DATA_FILE = root_path('data/nombre_nacim_por_anio_y_sexo.csv')
TOP_NAMES_PER_YEAR_SIZE = 10
#
#
#
def self.extract_data
names = Hash.new { |h, k| h[k] = [] }
years = Hash.new { |h, k| h[k] = { f: [], m: [] } }
# CSV columns
# 0: Year
# 1: Gender
# 2: Name
# 3: Quantity
CSV.foreach(DATA_FILE) do |row|
name = format_name(row[2])
year = row[0].to_i
quantity = row[3].to_i
gender = case row[1]
when 'Femenino' then :f
when 'Masculino' then :m
else raise "Invalid gender: #{row[1].inspect}"
end
current_name_data = names[name].find { |nd| nd[:year] == year }
if current_name_data
current_name_data[:quantity] += quantity
else
names[name] << { quantity: quantity, year: year }
end
year_data = years[year][gender]
if year_data.size < TOP_NAMES_PER_YEAR_SIZE
year_data << { name: name, quantity: quantity }
else
lowest_name = year_data.shift
if lowest_name[:quantity] < quantity
year_data.push({ name: name, quantity: quantity })
else
year_data.push(lowest_name)
end
end
year_data.sort_by! { |name| name[:quantity] }
end
return [names, years]
end
#
#
#
def self.export_data
names, years = extract_data
names_folder = root_path('public', 'names')
names.each do |name, name_data|
File.open(File.join(names_folder, "#{name}.json"), 'w') do |file|
file.write(JSON.generate(name_data))
end
end
years_folder = root_path('public', 'years')
years.each do |year, year_data|
File.open(File.join(years_folder, "#{year}.json"), 'w') do |file|
file.write(JSON.generate(year_data))
end
end
end
#
#
#
def self.format_name(name)
replacements = [
[/Á/, "a"],
[/É/, "e"],
[/Í/, "i"],
[/Ó/, "o"],
[/Ú/, "u"],
[/Ñ/, "n"],
[/( de los| del| de las| de la| de)(\s.*)?/, " "],
[/[^\sa-zA-Z\d]+/, " "],
[/\s+/, "_"],
[/^_+/, ""],
[/_+$/, ""]
]
replacements.inject(name.strip.downcase) do |str, (from, to)|
str.gsub(from, to)
end
end
end
end