-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixes to allow compatibility with Tenejo (#5)
These are changes/fixes required to get the importer UI working with Tenejo. After these are merged all specs will be able to pass on Tenejo with the mounted engine.
- Loading branch information
Showing
6 changed files
with
196 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# frozen_string_literal: true | ||
require 'zizia' | ||
|
||
class ModularImporter | ||
DEDUPLICATION_FIELD = 'identifier' | ||
|
||
def initialize(csv_import) | ||
@csv_import = csv_import | ||
@csv_file = csv_import.manifest.file.file.to_s | ||
@collection_id = csv_import.fedora_collection_id | ||
@user_id = csv_import.user_id | ||
@user_email = User.find(csv_import.user_id).email | ||
end | ||
|
||
def import | ||
raise "Cannot find expected input file #{@csv_file}" unless File.exist?(@csv_file) | ||
|
||
attrs = { | ||
collection_id: @collection_id, | ||
depositor_id: @user_id, | ||
batch_id: @csv_import.id, | ||
deduplication_field: DEDUPLICATION_FIELD | ||
} | ||
|
||
file = File.open(@csv_file) | ||
|
||
Zizia.config.default_info_stream << "event: start_import, batch_id: #{@csv_import.id}, collection_id: #{@collection_id}, user: #{@user_email}" | ||
Zizia::Importer.new(parser: Zizia::CsvParser.new(file: file), record_importer: Zizia::HyraxRecordImporter.new(attributes: attrs)).import | ||
file.close | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# frozen_string_literal: true | ||
|
||
# Validate a CSV file. | ||
# | ||
# Don't put expensive validations in this class. | ||
# This is meant to be used for running a few quick | ||
# validations before starting a CSV-based import. | ||
# It will be called during the HTTP request/response, | ||
# so long-running validations will make the page load | ||
# slowly for the user. Any validations that are slow | ||
# should be run in background jobs during the import | ||
# instead of here. | ||
module Zizia | ||
class CsvManifestValidator | ||
# @param manifest_uploader [CsvManifestUploader] The manifest that's mounted to a CsvImport record. See carrierwave gem documentation. This is basically a wrapper for the CSV file. | ||
def initialize(manifest_uploader) | ||
@csv_file = manifest_uploader.file | ||
@errors = [] | ||
@warnings = [] | ||
end | ||
|
||
# Errors and warnings for the CSV file. | ||
attr_reader :errors, :warnings | ||
attr_reader :csv_file | ||
|
||
def validate | ||
parse_csv | ||
return unless @rows | ||
|
||
missing_headers | ||
duplicate_headers | ||
unrecognized_headers | ||
missing_values | ||
invalid_license | ||
invalid_resource_type | ||
invalid_rights_statement | ||
end | ||
|
||
# One record per row | ||
def record_count | ||
return nil unless @rows | ||
@rows.size - 1 # Don't include the header row | ||
end | ||
|
||
def delimiter | ||
@delimiter ||= default_delimiter | ||
end | ||
attr_writer :delimiter | ||
|
||
private | ||
|
||
def default_delimiter | ||
Zizia::HyraxBasicMetadataMapper.new.delimiter | ||
end | ||
|
||
def valid_headers | ||
['title', 'files', 'representative media', | ||
'thumbnail', 'rendering', 'depositor', | ||
'date_uploaded', 'date_modified', 'label', | ||
'relative_path', 'import url', 'resource type', | ||
'creator', 'contributor', 'abstract or summary', | ||
'keyword', 'license', 'rights statement', | ||
'publisher', 'date created', 'subject', | ||
'language', 'identifier', 'location', | ||
'related url', 'bibliographic_citation', | ||
'source', 'visibility'] | ||
end | ||
|
||
def parse_csv | ||
@rows = CSV.read(csv_file.path) | ||
@headers = @rows.first || [] | ||
@transformed_headers = @headers.map { |header| header.downcase.strip } | ||
rescue | ||
@errors << 'We are unable to read this CSV file.' | ||
end | ||
|
||
def missing_headers | ||
required_headers.each do |header| | ||
next if @transformed_headers.include?(header) | ||
@errors << "Missing required column: \"#{header.titleize}\". Your spreadsheet must have this column." | ||
end | ||
end | ||
|
||
def required_headers | ||
['title', 'creator', 'keyword', 'rights statement', 'visibility', 'files'] | ||
end | ||
|
||
def duplicate_headers | ||
duplicates = [] | ||
sorted_headers = @transformed_headers.sort | ||
sorted_headers.each_with_index do |x, i| | ||
duplicates << x if x == sorted_headers[i + 1] | ||
end | ||
duplicates.uniq.each do |header| | ||
@errors << "Duplicate column names: You can have only one \"#{header.titleize}\" column." | ||
end | ||
end | ||
|
||
# Warn the user if we find any unexpected headers. | ||
def unrecognized_headers | ||
extra_headers = @transformed_headers - valid_headers | ||
extra_headers.each do |header| | ||
@warnings << "The field name \"#{header}\" is not supported. This field will be ignored, and the metadata for this field will not be imported." | ||
end | ||
end | ||
|
||
def missing_values | ||
column_numbers = required_headers.map { |header| @transformed_headers.find_index(header) }.compact | ||
|
||
@rows.each_with_index do |row, i| | ||
column_numbers.each_with_index do |column_number, j| | ||
next unless row[column_number].blank? | ||
@errors << "Missing required metadata in row #{i + 1}: \"#{required_headers[j].titleize}\" field cannot be blank" | ||
end | ||
end | ||
end | ||
|
||
# Only allow valid license values expected by Hyrax. | ||
# Otherwise the app throws an error when it displays the work. | ||
def invalid_license | ||
validate_values('license', :valid_licenses) | ||
end | ||
|
||
def invalid_resource_type | ||
validate_values('resource type', :valid_resource_types) | ||
end | ||
|
||
def invalid_rights_statement | ||
validate_values('rights statement', :valid_rights_statements) | ||
end | ||
|
||
def valid_licenses | ||
@valid_license_ids ||= Hyrax::LicenseService.new.authority.all.select { |license| license[:active] }.map { |license| license[:id] } | ||
end | ||
|
||
def valid_resource_types | ||
@valid_resource_type_ids ||= Qa::Authorities::Local.subauthority_for('resource_types').all.select { |term| term[:active] }.map { |term| term[:id] } | ||
end | ||
|
||
def valid_rights_statements | ||
@valid_rights_statement_ids ||= Qa::Authorities::Local.subauthority_for('rights_statements').all.select { |term| term[:active] }.map { |term| term[:id] } | ||
end | ||
|
||
# Make sure this column contains only valid values | ||
def validate_values(header_name, valid_values_method) | ||
column_number = @transformed_headers.find_index(header_name) | ||
return unless column_number | ||
|
||
@rows.each_with_index do |row, i| | ||
next if i.zero? # Skip the header row | ||
next unless row[column_number] | ||
|
||
values = row[column_number].split(delimiter) | ||
valid_values = method(valid_values_method).call | ||
invalid_values = values.select { |value| !valid_values.include?(value) } | ||
|
||
invalid_values.each do |value| | ||
@errors << "Invalid #{header_name.titleize} in row #{i + 1}: #{value}" | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
# frozen_string_literal: true | ||
|
||
module Zizia | ||
VERSION = '2.0.0.alpha.01' | ||
VERSION = '2.1.0.alpha.01' | ||
end |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.