Skip to content

Commit

Permalink
TDLR-2550 - new rake task to update Collections, adding finding aid l…
Browse files Browse the repository at this point in the history
…ink and call number.
  • Loading branch information
bgoodmon committed Mar 29, 2024
1 parent 5f69b6a commit 34354e4
Show file tree
Hide file tree
Showing 2 changed files with 149 additions and 6 deletions.
16 changes: 10 additions & 6 deletions app/lib/tufts/contribute_collections.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# frozen_string_literal: true
module Tufts
# Create and maintain the Collection objects required by the Contribute controller
# Note that the rake task that uses this class is called at the initialization of a new TDL environment.
# It runs in production only once (e.g. Fedora 3 -> 4 migration), but it's useful in development environments
# since they are recreated more frequently.

class ContributeCollections
attr_reader :seed_data

Expand Down Expand Up @@ -79,37 +83,37 @@ def collection_for_work_type(work_type)

SEED_DATA = [
{
title: "Tufts Published Scholarship, 1987-2014",
title: "Tufts Published Scholarship",
call_number: "PB",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/100",
work_types: [GenericDeposit, GenericTischDeposit, GisPoster, UndergradSummerScholar, FacultyScholarship]
},
{
title: "Fletcher School Records, 1923 -- 2016",
title: "Fletcher School Records",
call_number: "UA015",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/120",
work_types: [CapstoneProject]
},
{
title: "Cummings School of Veterinary Medicine records, 1969-2012",
title: "Cummings School of Veterinary Medicine Records",
call_number: "UA041",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/4",
work_types: [CummingsThesis]
},
{
title: "Undergraduate honors theses, 1929-2015",
title: "Senior Honors Theses",
call_number: "UA005",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/123",
work_types: [HonorsThesis]
},
{
title: "Public Health and Professional Degree Programs Records, 1990 -- 2011",
title: "Public Health and Professional Degree Programs Records",
call_number: "UA187",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/253",
work_types: [PublicHealth]
},
{
title: "Department of Education records, 2007-02-01-2014",
title: "Department of Education Records",
call_number: "UA071",
finding_aid: "https://archives.tufts.edu/repositories/2/resources/9",
work_types: [QualifyingPaper]
Expand Down
139 changes: 139 additions & 0 deletions lib/tasks/add_collection_fa_cn.rake
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# frozen_string_literal: true
require 'active_fedora'

namespace :tufts do
desc "Add Archives@Tufts Finding Aid URL and Call Number to collections."

task add_collection_fa_cn: :environment do
debug = false
save_updates = true

# Can we connect to Fedora? This also causes this task's output to come after all the deprecation warnings.
begin
collection = Collection.where(title: "Foobar")
foobar = collection.first unless collection.nil?
rescue StandardError => ex
puts("\nError when connecting to Fedora: #{ex}.")
exit
end

puts("") # Leave a blank line after all the deprecation warnings.

unless ARGV.size == 2
puts("example usage: bundle exec rake tufts:add_collection_fa_cn collection_info.csv")
exit
end

filename = ARGV[1]
csv_file = open(filename)
column_names = csv_file.first.strip.split(",")

unless column_names.length() == 3 && column_names[0] == "finding aid link" && column_names[1] == "collection number" && column_names[2] == "collection title"
puts("The first line of #{filename} should contain three comma-separated column names: finding aid link,collection number,collection title.")
exit
end

puts("line 1: #{column_names}") if debug

# These hash tables are used to check for duplicates. The key is the value from the corresponding column of the CSV file, and the value is the line number within the CSV file.
finding_aid_links = Hash.new
call_numbers = Hash.new
collection_titles = Hash.new
lines = Hash.new
errors = Array.new

csv_file.each.with_index(2) do |line, line_number|
begin
row_values = line.strip.split(',', 3) # split the line into three values on the first two commas

unless row_values.length() == 3 && !row_values[2].blank?
errors.append("Line #{line_number} of #{filename} should contain three comma-separated values.")
next
end

# For the third value, remove leading/trailing quotes and replace two consecutive quotes with one quote.
row_values[2] = row_values[2].delete_prefix('"').delete_suffix('"').gsub('""', '"')

puts("line #{line_number}: #{row_values}") if debug

finding_aid_link = row_values[0]
call_number = row_values[1]
collection_title = row_values[2]

# Check for previous lines with duplicate values.
dup_finding_aid_link_line_number = finding_aid_links[finding_aid_link]
dup_call_number_line_number = call_numbers[call_number]
dup_collection_title_line_number = collection_titles[collection_title]

if dup_finding_aid_link_line_number.nil?
finding_aid_links[finding_aid_link] = line_number
else
lines.delete(dup_finding_aid_link_line_number)
errors.append("Lines #{dup_finding_aid_link_line_number} and #{line_number} of #{filename} contain duplicate finding aid links: #{finding_aid_link}.")
end

if dup_call_number_line_number.nil?
call_numbers[call_number] = line_number
else
lines.delete(dup_call_number_line_number)
errors.append("Lines #{dup_call_number_line_number} and #{line_number} of #{filename} contain duplicate collection numbers: #{call_number}.")
end

if dup_collection_title_line_number.nil?
collection_titles[collection_title] = line_number
else
lines.delete(dup_collection_title_line_number)
errors.append("Lines #{dup_collection_title_line_number} and #{line_number} of #{filename} contain duplicate collection titles: #{collection_title}.")
end

if dup_finding_aid_link_line_number.nil? && dup_call_number_line_number.nil? && dup_collection_title_line_number.nil?
lines[line_number] = {finding_aid_link: finding_aid_link, call_number: call_number, collection_title: collection_title}
end
rescue StandardError => ex
errors.append("Check line #{line_number} of #{filename} for errors: #{ex}.")
end
end

line_numbers = lines.keys.sort

line_numbers.each do |line_number|
begin
line = lines[line_number]
collection_title = line[:collection_title]
collections = Collection.where(title: collection_title)

if collections.nil? || collections.first.nil?
errors.append("The collection #{line[:collection_title]} on line #{line_number} of #{filename} is not found in MIRA.")
elsif collections.length > 1
errors.append("The title #{line[:collection_title]} on line #{line_number} of #{filename} matches #{collections.length} collections.")
else
collection = collections.first
old_call_number = collection[:call_number].first
old_finding_aid_link = collection[:finding_aid].first
new_call_number = line[:call_number]
new_finding_aid_link = line[:finding_aid_link]
update_call_number = new_call_number != old_call_number
update_finding_aid_link = new_finding_aid_link != old_finding_aid_link

if update_call_number || update_finding_aid_link
puts("Updating collection #{collection_title} old call number: #{old_call_number} old finding aid link: #{old_finding_aid_link} new call number: #{new_call_number} new finding aid link: #{new_finding_aid_link}.")

collection[:call_number] = [new_call_number] if update_call_number
collection[:finding_aid] = [new_finding_aid_link] if update_finding_aid_link
collection.save! if save_updates
else
puts(" collection #{collection_title} has call number #{old_call_number} and finding aid link #{old_finding_aid_link}; no need to update.")
end
end
rescue StandardError => ex
errors.append("Error updating line #{line_number} of #{filename}: #{ex}.")
end
end

# Output all the error messages after all the processing has been done.
puts("") unless errors.length() == 0
errors.each do |error|
puts(error)
end
end
end

0 comments on commit 34354e4

Please sign in to comment.