TDLR-2550 - new rake task to update Collections, adding finding aid l…

…ink and call number.
TuftsUniversity · Mar 29, 2024 · 34354e4 · 34354e4
1 parent 5f69b6a
commit 34354e4
Show file tree

Hide file tree

Showing 2 changed files with 149 additions and 6 deletions.
diff --git a/app/lib/tufts/contribute_collections.rb b/app/lib/tufts/contribute_collections.rb
@@ -1,6 +1,10 @@
 # frozen_string_literal: true
 module Tufts
   # Create and maintain the Collection objects required by the Contribute controller
+  # Note that the rake task that uses this class is called at the initialization of a new TDL environment.
+  # It runs in production only once (e.g. Fedora 3 -> 4 migration), but it's useful in development environments
+  # since they are recreated more frequently.
+
   class ContributeCollections
     attr_reader :seed_data
 
@@ -79,37 +83,37 @@ def collection_for_work_type(work_type)
 
     SEED_DATA = [
       {
-        title: "Tufts Published Scholarship, 1987-2014",
+        title: "Tufts Published Scholarship",
         call_number: "PB",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/100",
         work_types: [GenericDeposit, GenericTischDeposit, GisPoster, UndergradSummerScholar, FacultyScholarship]
       },
       {
-        title: "Fletcher School Records, 1923 -- 2016",
+        title: "Fletcher School Records",
         call_number: "UA015",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/120",
         work_types: [CapstoneProject]
       },
       {
-        title: "Cummings School of Veterinary Medicine records, 1969-2012",
+        title: "Cummings School of Veterinary Medicine Records",
         call_number: "UA041",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/4",
         work_types: [CummingsThesis]
       },
       {
-        title: "Undergraduate honors theses, 1929-2015",
+        title: "Senior Honors Theses",
         call_number: "UA005",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/123",
         work_types: [HonorsThesis]
       },
       {
-        title: "Public Health and Professional Degree Programs Records, 1990 -- 2011",
+        title: "Public Health and Professional Degree Programs Records",
         call_number: "UA187",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/253",
         work_types: [PublicHealth]
       },
       {
-        title: "Department of Education records, 2007-02-01-2014",
+        title: "Department of Education Records",
         call_number: "UA071",
         finding_aid: "https://archives.tufts.edu/repositories/2/resources/9",
         work_types: [QualifyingPaper]

diff --git a/lib/tasks/add_collection_fa_cn.rake b/lib/tasks/add_collection_fa_cn.rake
@@ -0,0 +1,139 @@
+# frozen_string_literal: true
+require 'active_fedora'
+
+namespace :tufts do
+  desc "Add Archives@Tufts Finding Aid URL and Call Number to collections."
+
+  task add_collection_fa_cn: :environment do
+    debug        = false
+    save_updates = true
+
+    # Can we connect to Fedora?  This also causes this task's output to come after all the deprecation warnings.
+    begin
+      collection = Collection.where(title: "Foobar")
+      foobar = collection.first unless collection.nil?
+    rescue StandardError => ex
+      puts("\nError when connecting to Fedora:  #{ex}.")
+      exit
+    end
+
+    puts("")  # Leave a blank line after all the deprecation warnings.
+
+    unless ARGV.size == 2
+      puts("example usage: bundle exec rake tufts:add_collection_fa_cn collection_info.csv")
+      exit
+    end
+
+    filename = ARGV[1]
+    csv_file = open(filename)
+    column_names = csv_file.first.strip.split(",")
+
+    unless column_names.length() == 3  && column_names[0] == "finding aid link" && column_names[1] == "collection number" && column_names[2] == "collection title"
+      puts("The first line of #{filename} should contain three comma-separated column names: finding aid link,collection number,collection title.")
+      exit
+    end
+
+    puts("line 1: #{column_names}") if debug
+
+    # These hash tables are used to check for duplicates.  The key is the value from the corresponding column of the CSV file, and the value is the line number within the CSV file.
+    finding_aid_links = Hash.new
+    call_numbers      = Hash.new
+    collection_titles = Hash.new
+    lines             = Hash.new
+    errors            = Array.new
+
+    csv_file.each.with_index(2) do |line, line_number|
+      begin
+        row_values = line.strip.split(',', 3)  # split the line into three values on the first two commas
+
+        unless row_values.length() == 3 && !row_values[2].blank?
+          errors.append("Line #{line_number} of #{filename} should contain three comma-separated values.")
+          next
+        end
+
+        # For the third value, remove leading/trailing quotes and replace two consecutive quotes with one quote.
+        row_values[2] = row_values[2].delete_prefix('"').delete_suffix('"').gsub('""', '"')
+
+        puts("line #{line_number}:  #{row_values}") if debug
+
+        finding_aid_link = row_values[0]
+        call_number      = row_values[1]
+        collection_title = row_values[2]
+
+        # Check for previous lines with duplicate values.
+        dup_finding_aid_link_line_number = finding_aid_links[finding_aid_link]
+        dup_call_number_line_number      = call_numbers[call_number]
+        dup_collection_title_line_number = collection_titles[collection_title]
+
+        if dup_finding_aid_link_line_number.nil?
+          finding_aid_links[finding_aid_link] = line_number
+        else
+          lines.delete(dup_finding_aid_link_line_number)
+          errors.append("Lines #{dup_finding_aid_link_line_number} and #{line_number} of #{filename} contain duplicate finding aid links: #{finding_aid_link}.")
+        end
+
+        if dup_call_number_line_number.nil?
+          call_numbers[call_number] = line_number
+        else
+          lines.delete(dup_call_number_line_number)
+          errors.append("Lines #{dup_call_number_line_number} and #{line_number} of #{filename} contain duplicate collection numbers: #{call_number}.")
+        end
+
+        if dup_collection_title_line_number.nil?
+          collection_titles[collection_title] = line_number
+        else
+          lines.delete(dup_collection_title_line_number)
+          errors.append("Lines #{dup_collection_title_line_number} and #{line_number} of #{filename} contain duplicate collection titles: #{collection_title}.")
+        end
+
+        if dup_finding_aid_link_line_number.nil? && dup_call_number_line_number.nil? && dup_collection_title_line_number.nil?
+          lines[line_number] = {finding_aid_link: finding_aid_link, call_number: call_number, collection_title: collection_title}
+        end
+      rescue StandardError => ex
+        errors.append("Check line #{line_number} of #{filename} for errors: #{ex}.")
+      end
+    end
+
+    line_numbers = lines.keys.sort
+
+    line_numbers.each do |line_number|
+      begin
+        line = lines[line_number]
+        collection_title = line[:collection_title]
+        collections = Collection.where(title: collection_title)
+
+        if collections.nil? || collections.first.nil?
+          errors.append("The collection #{line[:collection_title]} on line #{line_number} of #{filename} is not found in MIRA.")
+        elsif collections.length > 1
+          errors.append("The title #{line[:collection_title]} on line #{line_number} of #{filename} matches #{collections.length} collections.")
+        else
+          collection              = collections.first
+          old_call_number         = collection[:call_number].first
+          old_finding_aid_link    = collection[:finding_aid].first
+          new_call_number         = line[:call_number]
+          new_finding_aid_link    = line[:finding_aid_link]
+          update_call_number      = new_call_number      != old_call_number
+          update_finding_aid_link = new_finding_aid_link != old_finding_aid_link
+
+          if  update_call_number || update_finding_aid_link
+            puts("Updating collection #{collection_title}  old call number: #{old_call_number}  old finding aid link: #{old_finding_aid_link}  new call number: #{new_call_number}  new finding aid link: #{new_finding_aid_link}.")
+
+            collection[:call_number] = [new_call_number]      if update_call_number
+            collection[:finding_aid] = [new_finding_aid_link] if update_finding_aid_link
+            collection.save!                                  if save_updates
+          else
+            puts("         collection #{collection_title} has call number #{old_call_number} and finding aid link #{old_finding_aid_link};  no need to update.")
+          end
+        end
+      rescue StandardError => ex
+        errors.append("Error updating line #{line_number} of #{filename}: #{ex}.")
+      end
+    end
+
+    # Output all the error messages after all the processing has been done.
+    puts("") unless errors.length() == 0
+    errors.each do |error|
+      puts(error)
+    end
+  end
+end