From 0e5206b6617f29a2621ff89ba2d099f448a6d158 Mon Sep 17 00:00:00 2001 From: JT Archie Date: Mon, 2 Oct 2023 13:02:23 -0600 Subject: [PATCH] add parallel --- lib/follow_the_snow/csv_builder.rb | 4 +++- lib/follow_the_snow/scrape/wikipedia.rb | 14 +++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/follow_the_snow/csv_builder.rb b/lib/follow_the_snow/csv_builder.rb index b9d44c46c5..7f4a0cfea4 100644 --- a/lib/follow_the_snow/csv_builder.rb +++ b/lib/follow_the_snow/csv_builder.rb @@ -21,11 +21,13 @@ def build! file.sync = true file.puts 'name,closed,lat,lng,city,state,country,url' - wikipedia.map do |resort| + Parallel.each(wikipedia.resorts, in_threads: 3) do |resort| logger = @logger.child(resort: resort.name) address = geo.to_address(lat: resort.lat, lng: resort.lng, logger: logger) metadata = OpenStruct.new(closed: false) + sleep(rand(0.0..1.0)) + file.puts([ resort.name, metadata.closed, diff --git a/lib/follow_the_snow/scrape/wikipedia.rb b/lib/follow_the_snow/scrape/wikipedia.rb index 35da17ec84..2600c18526 100644 --- a/lib/follow_the_snow/scrape/wikipedia.rb +++ b/lib/follow_the_snow/scrape/wikipedia.rb @@ -1,22 +1,18 @@ # frozen_string_literal: true +require 'geo/coord' require 'http' require 'nokogiri' require 'ostruct' -require 'geo/coord' +require 'parallel' module FollowTheSnow module Scrape Wikipedia = Struct.new(:url, :logger, keyword_init: true) do - include Enumerable - - def each(&block) - resorts(&block) - end - def resorts doc = Nokogiri::HTML(HTTP.follow.timeout(10).get(url).to_s) - doc.css('#mw-content-text ul > li > a:first-child').map do |link| + links = doc.css('#mw-content-text ul > li > a:first-child') + Parallel.map(links, in_threads: 4) do |link| href = link['href'] next if href =~ /Template|Category|Comparison|List|Former/i @@ -49,7 +45,7 @@ def resorts lng: geo.lng, url: url }) - yield(resort) if block_given? + sleep(rand(0.0..1.0)) resort end.compact end