-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
lost-metal.rb
85 lines (70 loc) · 2.35 KB
/
lost-metal.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# frozen_string_literal: true
require 'uri'
require 'date'
require 'fileutils'
require 'nokogiri'
require_relative './methods'
FileUtils.mkdir_p('lost-metal')
BASE = 'https://www.tor.com/2022/'
links = [
'09/19/read-the-lost-metal-by-brandon-sanderson-prologue-and-chapters-1-2/',
'09/26/read-the-lost-metal-by-brandon-sanderson-chapters-3-4/',
'10/03/read-the-lost-metal-by-brandon-sanderson-chapters-5-8/',
'10/10/read-the-lost-metal-by-brandon-sanderson-chapter-nine/',
'10/17/read-the-lost-metal-by-brandon-sanderson-chapters-ten-and-eleven/',
'10/24/read-the-lost-metal-by-brandon-sanderson-chapters-twelve-and-thirteen/',
'10/31/read-the-lost-metal-by-brandon-sanderson-chapters-fourteen-and-fifteen/',
'11/07/read-the-lost-metal-by-brandon-sanderson-chapters-sixteen-through-eighteen/',
'11/14/read-the-lost-metal-by-brandon-sanderson-chapter-nineteen/',
]
episode = 1
counter = 0
links.each do |link|
url = BASE + link
puts "Download #{url}"
unless File.exist? "lost-metal/#{episode}.html"
`wget --no-clobber "#{url}" --output-document "lost-metal/#{episode}.html" -o /dev/null`
end
episode += 1
end
# Now we have all the files
html = ''
for i in 1..(links.length)
page = Nokogiri::HTML(open("lost-metal/#{i}.html")).css('.entry-content')
start = ending = false
page.children.each do |e|
if ['h1', 'h2', 'h3', 'h4', 'hr'].include? e.name
e.remove
end
if e.text ==' '
e.remove
end
if e.name == 'p'
e.children.each do |ee|
if ee.name == 'img'
u = URI::parse ee['src']
if counter == 0
e.add_previous_sibling "<h1>Prologue</h1>"
else
e.add_previous_sibling "<hr><h1> Chapter #{counter}"
end
counter += 1
ee.delete 'srcset'
ee.delete 'class'
ee.delete 'loading'
ee.delete 'sizes'
ee.delete 'data-recalc-dims'
end
end
end
start = true if e.class?('ebook-link-wrapper')
ending = true if e.class?('frontmatter') && start
e.remove if !start || ending || e.class?('ebook-link-wrapper')
end
html += page.inner_html
url = BASE + links[i - 1]
html += "<p>Visit <a href='#{url}'>tor.com</a> for discussion.</p>"
end
File.open('books/lost-metal.html', 'w') { |file| file.write(html) }
puts '[html] Generated HTML file'
generate('lost-metal', :all)