-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
TDLR-2550 - new rake task to update Collections, adding finding aid l…
…ink and call number.
- Loading branch information
Showing
2 changed files
with
149 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
# frozen_string_literal: true | ||
require 'active_fedora' | ||
|
||
namespace :tufts do | ||
desc "Add Archives@Tufts Finding Aid URL and Call Number to collections." | ||
|
||
task add_collection_fa_cn: :environment do | ||
debug = false | ||
save_updates = true | ||
|
||
# Can we connect to Fedora? This also causes this task's output to come after all the deprecation warnings. | ||
begin | ||
collection = Collection.where(title: "Foobar") | ||
foobar = collection.first unless collection.nil? | ||
rescue StandardError => ex | ||
puts("\nError when connecting to Fedora: #{ex}.") | ||
exit | ||
end | ||
|
||
puts("") # Leave a blank line after all the deprecation warnings. | ||
|
||
unless ARGV.size == 2 | ||
puts("example usage: bundle exec rake tufts:add_collection_fa_cn collection_info.csv") | ||
exit | ||
end | ||
|
||
filename = ARGV[1] | ||
csv_file = open(filename) | ||
column_names = csv_file.first.strip.split(",") | ||
|
||
unless column_names.length() == 3 && column_names[0] == "finding aid link" && column_names[1] == "collection number" && column_names[2] == "collection title" | ||
puts("The first line of #{filename} should contain three comma-separated column names: finding aid link,collection number,collection title.") | ||
exit | ||
end | ||
|
||
puts("line 1: #{column_names}") if debug | ||
|
||
# These hash tables are used to check for duplicates. The key is the value from the corresponding column of the CSV file, and the value is the line number within the CSV file. | ||
finding_aid_links = Hash.new | ||
call_numbers = Hash.new | ||
collection_titles = Hash.new | ||
lines = Hash.new | ||
errors = Array.new | ||
|
||
csv_file.each.with_index(2) do |line, line_number| | ||
begin | ||
row_values = line.strip.split(',', 3) # split the line into three values on the first two commas | ||
|
||
unless row_values.length() == 3 && !row_values[2].blank? | ||
errors.append("Line #{line_number} of #{filename} should contain three comma-separated values.") | ||
next | ||
end | ||
|
||
# For the third value, remove leading/trailing quotes and replace two consecutive quotes with one quote. | ||
row_values[2] = row_values[2].delete_prefix('"').delete_suffix('"').gsub('""', '"') | ||
|
||
puts("line #{line_number}: #{row_values}") if debug | ||
|
||
finding_aid_link = row_values[0] | ||
call_number = row_values[1] | ||
collection_title = row_values[2] | ||
|
||
# Check for previous lines with duplicate values. | ||
dup_finding_aid_link_line_number = finding_aid_links[finding_aid_link] | ||
dup_call_number_line_number = call_numbers[call_number] | ||
dup_collection_title_line_number = collection_titles[collection_title] | ||
|
||
if dup_finding_aid_link_line_number.nil? | ||
finding_aid_links[finding_aid_link] = line_number | ||
else | ||
lines.delete(dup_finding_aid_link_line_number) | ||
errors.append("Lines #{dup_finding_aid_link_line_number} and #{line_number} of #{filename} contain duplicate finding aid links: #{finding_aid_link}.") | ||
end | ||
|
||
if dup_call_number_line_number.nil? | ||
call_numbers[call_number] = line_number | ||
else | ||
lines.delete(dup_call_number_line_number) | ||
errors.append("Lines #{dup_call_number_line_number} and #{line_number} of #{filename} contain duplicate collection numbers: #{call_number}.") | ||
end | ||
|
||
if dup_collection_title_line_number.nil? | ||
collection_titles[collection_title] = line_number | ||
else | ||
lines.delete(dup_collection_title_line_number) | ||
errors.append("Lines #{dup_collection_title_line_number} and #{line_number} of #{filename} contain duplicate collection titles: #{collection_title}.") | ||
end | ||
|
||
if dup_finding_aid_link_line_number.nil? && dup_call_number_line_number.nil? && dup_collection_title_line_number.nil? | ||
lines[line_number] = {finding_aid_link: finding_aid_link, call_number: call_number, collection_title: collection_title} | ||
end | ||
rescue StandardError => ex | ||
errors.append("Check line #{line_number} of #{filename} for errors: #{ex}.") | ||
end | ||
end | ||
|
||
line_numbers = lines.keys.sort | ||
|
||
line_numbers.each do |line_number| | ||
begin | ||
line = lines[line_number] | ||
collection_title = line[:collection_title] | ||
collections = Collection.where(title: collection_title) | ||
|
||
if collections.nil? || collections.first.nil? | ||
errors.append("The collection #{line[:collection_title]} on line #{line_number} of #{filename} is not found in MIRA.") | ||
elsif collections.length > 1 | ||
errors.append("The title #{line[:collection_title]} on line #{line_number} of #{filename} matches #{collections.length} collections.") | ||
else | ||
collection = collections.first | ||
old_call_number = collection[:call_number].first | ||
old_finding_aid_link = collection[:finding_aid].first | ||
new_call_number = line[:call_number] | ||
new_finding_aid_link = line[:finding_aid_link] | ||
update_call_number = new_call_number != old_call_number | ||
update_finding_aid_link = new_finding_aid_link != old_finding_aid_link | ||
|
||
if update_call_number || update_finding_aid_link | ||
puts("Updating collection #{collection_title} old call number: #{old_call_number} old finding aid link: #{old_finding_aid_link} new call number: #{new_call_number} new finding aid link: #{new_finding_aid_link}.") | ||
|
||
collection[:call_number] = [new_call_number] if update_call_number | ||
collection[:finding_aid] = [new_finding_aid_link] if update_finding_aid_link | ||
collection.save! if save_updates | ||
else | ||
puts(" collection #{collection_title} has call number #{old_call_number} and finding aid link #{old_finding_aid_link}; no need to update.") | ||
end | ||
end | ||
rescue StandardError => ex | ||
errors.append("Error updating line #{line_number} of #{filename}: #{ex}.") | ||
end | ||
end | ||
|
||
# Output all the error messages after all the processing has been done. | ||
puts("") unless errors.length() == 0 | ||
errors.each do |error| | ||
puts(error) | ||
end | ||
end | ||
end |