Skip to content

Commit

Permalink
Fixes to allow compatibility with Tenejo (#5)
Browse files Browse the repository at this point in the history
These are changes/fixes required to get the
importer UI working with Tenejo.

After these are merged all specs will be able to
pass on Tenejo with the mounted engine.
  • Loading branch information
little9 authored and bess committed Jul 29, 2019
1 parent d939be9 commit 286183e
Show file tree
Hide file tree
Showing 6 changed files with 196 additions and 3 deletions.
31 changes: 31 additions & 0 deletions app/importers/modular_importer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# frozen_string_literal: true
require 'zizia'

class ModularImporter
DEDUPLICATION_FIELD = 'identifier'

def initialize(csv_import)
@csv_import = csv_import
@csv_file = csv_import.manifest.file.file.to_s
@collection_id = csv_import.fedora_collection_id
@user_id = csv_import.user_id
@user_email = User.find(csv_import.user_id).email
end

def import
raise "Cannot find expected input file #{@csv_file}" unless File.exist?(@csv_file)

attrs = {
collection_id: @collection_id,
depositor_id: @user_id,
batch_id: @csv_import.id,
deduplication_field: DEDUPLICATION_FIELD
}

file = File.open(@csv_file)

Zizia.config.default_info_stream << "event: start_import, batch_id: #{@csv_import.id}, collection_id: #{@collection_id}, user: #{@user_email}"
Zizia::Importer.new(parser: Zizia::CsvParser.new(file: file), record_importer: Zizia::HyraxRecordImporter.new(attributes: attrs)).import
file.close
end
end
1 change: 0 additions & 1 deletion app/jobs/zizia/start_csv_import_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ def perform(csv_import_id)
log_stream << "Starting import with batch ID: #{csv_import_id}"
importer = ModularImporter.new(csv_import)
importer.import
endcs
end
end
end
163 changes: 163 additions & 0 deletions app/uploaders/zizia/csv_manifest_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# frozen_string_literal: true

# Validate a CSV file.
#
# Don't put expensive validations in this class.
# This is meant to be used for running a few quick
# validations before starting a CSV-based import.
# It will be called during the HTTP request/response,
# so long-running validations will make the page load
# slowly for the user. Any validations that are slow
# should be run in background jobs during the import
# instead of here.
module Zizia
class CsvManifestValidator
# @param manifest_uploader [CsvManifestUploader] The manifest that's mounted to a CsvImport record. See carrierwave gem documentation. This is basically a wrapper for the CSV file.
def initialize(manifest_uploader)
@csv_file = manifest_uploader.file
@errors = []
@warnings = []
end

# Errors and warnings for the CSV file.
attr_reader :errors, :warnings
attr_reader :csv_file

def validate
parse_csv
return unless @rows

missing_headers
duplicate_headers
unrecognized_headers
missing_values
invalid_license
invalid_resource_type
invalid_rights_statement
end

# One record per row
def record_count
return nil unless @rows
@rows.size - 1 # Don't include the header row
end

def delimiter
@delimiter ||= default_delimiter
end
attr_writer :delimiter

private

def default_delimiter
Zizia::HyraxBasicMetadataMapper.new.delimiter
end

def valid_headers
['title', 'files', 'representative media',
'thumbnail', 'rendering', 'depositor',
'date_uploaded', 'date_modified', 'label',
'relative_path', 'import url', 'resource type',
'creator', 'contributor', 'abstract or summary',
'keyword', 'license', 'rights statement',
'publisher', 'date created', 'subject',
'language', 'identifier', 'location',
'related url', 'bibliographic_citation',
'source', 'visibility']
end

def parse_csv
@rows = CSV.read(csv_file.path)
@headers = @rows.first || []
@transformed_headers = @headers.map { |header| header.downcase.strip }
rescue
@errors << 'We are unable to read this CSV file.'
end

def missing_headers
required_headers.each do |header|
next if @transformed_headers.include?(header)
@errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column."
end
end

def required_headers
['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files']
end

def duplicate_headers
duplicates = []
sorted_headers = @transformed_headers.sort
sorted_headers.each_with_index do |x, i|
duplicates << x if x == sorted_headers[i + 1]
end
duplicates.uniq.each do |header|
@errors << "Duplicate column names: You can have only one \"#{header.titleize}\" column."
end
end

# Warn the user if we find any unexpected headers.
def unrecognized_headers
extra_headers = @transformed_headers - valid_headers
extra_headers.each do |header|
@warnings << "The field name \"#{header}\" is not supported. This field will be ignored, and the metadata for this field will not be imported."
end
end

def missing_values
column_numbers = required_headers.map { |header| @transformed_headers.find_index(header) }.compact

@rows.each_with_index do |row, i|
column_numbers.each_with_index do |column_number, j|
next unless row[column_number].blank?
@errors << "Missing required metadata in row #{i + 1}: \"#{required_headers[j].titleize}\" field cannot be blank"
end
end
end

# Only allow valid license values expected by Hyrax.
# Otherwise the app throws an error when it displays the work.
def invalid_license
validate_values('license', :valid_licenses)
end

def invalid_resource_type
validate_values('resource type', :valid_resource_types)
end

def invalid_rights_statement
validate_values('rights statement', :valid_rights_statements)
end

def valid_licenses
@valid_license_ids ||= Hyrax::LicenseService.new.authority.all.select { |license| license[:active] }.map { |license| license[:id] }
end

def valid_resource_types
@valid_resource_type_ids ||= Qa::Authorities::Local.subauthority_for('resource_types').all.select { |term| term[:active] }.map { |term| term[:id] }
end

def valid_rights_statements
@valid_rights_statement_ids ||= Qa::Authorities::Local.subauthority_for('rights_statements').all.select { |term| term[:active] }.map { |term| term[:id] }
end

# Make sure this column contains only valid values
def validate_values(header_name, valid_values_method)
column_number = @transformed_headers.find_index(header_name)
return unless column_number

@rows.each_with_index do |row, i|
next if i.zero? # Skip the header row
next unless row[column_number]

values = row[column_number].split(delimiter)
valid_values = method(valid_values_method).call
invalid_values = values.select { |value| !valid_values.include?(value) }

invalid_values.each do |value|
@errors << "Invalid #{header_name.titleize} in row #{i + 1}: #{value}"
end
end
end
end
end
2 changes: 1 addition & 1 deletion app/views/zizia/csv_imports/show.html.erb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<div class="col-md-6">
<div class='well'>
<p> Your records will be imported in the background. To check the current status, please check the background job status page. </p><br />
<div class="text-center"> <%= link_to 'Background Job Status', sidekiq_web_path, class: "btn btn-primary btn-lg" %> </div>
<div class="text-center"> <%= link_to 'Background Job Status', '/sidekiq', class: "btn btn-primary btn-lg" %> </div>
</div>
</div>
</div>
Expand Down
2 changes: 1 addition & 1 deletion lib/zizia/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Zizia
VERSION = '2.0.0.alpha.01'
VERSION = '2.1.0.alpha.01'
end
Binary file added spec/fixtures/dog.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 286183e

Please sign in to comment.