From 4ea556f91666f8f7762f1a23ecf2af9950825319 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Wed, 7 Jun 2017 09:55:27 +0200 Subject: [PATCH 1/9] Added support for PDF/A-1b --- .gitignore | 1 + lib/prawn/document.rb | 3 +- lib/prawn/vera_pdf.rb | 55 +++++++++++++++++++++++++++++++++ manual/contents.rb | 7 ++++- prawn.gemspec | 1 + spec/manual_spec.rb | 2 +- spec/prawn/document_spec.rb | 16 +++++++--- spec/prawn/pdfa_1b_spec.rb | 11 +++++++ spec/prawn/pdfa_1b_spec_impl.rb | 31 +++++++++++++++++++ spec/prawn/stamp_spec.rb | 2 +- 10 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 lib/prawn/vera_pdf.rb create mode 100644 spec/prawn/pdfa_1b_spec.rb create mode 100644 spec/prawn/pdfa_1b_spec_impl.rb diff --git a/.gitignore b/.gitignore index c5df388ed..1f831f30a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ drop_to_console.rb /bin .DS_Store *.pdf +/.byebug_history diff --git a/lib/prawn/document.rb b/lib/prawn/document.rb index 247be8eba..9ebc1f38e 100644 --- a/lib/prawn/document.rb +++ b/lib/prawn/document.rb @@ -66,7 +66,8 @@ class Document :page_size, :page_layout, :margin, :left_margin, :right_margin, :top_margin, :bottom_margin, :skip_page_creation, :compress, :background, :info, - :text_formatter, :print_scaling + :text_formatter, :print_scaling, + :trailer, :enable_pdfa_1b ].freeze # Any module added to this array will be included into instances of diff --git a/lib/prawn/vera_pdf.rb b/lib/prawn/vera_pdf.rb new file mode 100644 index 000000000..4cfe9cd13 --- /dev/null +++ b/lib/prawn/vera_pdf.rb @@ -0,0 +1,55 @@ +require 'nokogiri' +require 'open3' + +module Prawn + module VeraPdf + VERA_PDF_EXECUTABLE = 'verapdf'.freeze + VERA_PDF_COMMAND = "#{VERA_PDF_EXECUTABLE} --flavour 1b --format xml".freeze + + def which(cmd) + exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] + ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| + exts.each do |ext| + exe = File.join(path, "#{cmd}#{ext}") + return exe if File.executable?(exe) && !File.directory?(exe) + end + end + return nil + end + + def vera_pdf_available? + which VERA_PDF_EXECUTABLE + end + + def valid_pdfa_1b?(pdf_data) + stdout, stderr, status = Open3.capture3(VERA_PDF_COMMAND, stdin_data: pdf_data) + raise Exception, "VeraPDF could not be run. #{stderr}" unless status.success? + + reported_as_compliant? stdout.lines[4..-1].join + end + + def reported_as_compliant?(xml_data) + xml_doc = Nokogiri::XML xml_data + raise Exception, 'The veraPDF xml report was not well formed.' unless xml_doc.errors.empty? + + xml_doc.remove_namespaces! + validation_result = xml_doc.xpath('/processorResult/validationResult') + assertions = validation_result.xpath('assertions/assertion') + assertions.each do |assertion| + message = assertion.at_xpath('message').content + clause = assertion.at_xpath('ruleId').attribute('clause').content + test = assertion.at_xpath('ruleId').attribute('testNumber').content + context = assertion.at_xpath('location/context').content + url = 'https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Part-1-rules' + url_anchor = "rule-#{clause.delete('.')}-#{test}" + puts + puts 'PDF/A-1b VIOLATION' + puts " Message: #{message}" + puts " Context: #{context}" + puts " Details: #{url}##{url_anchor}" + puts + end + validation_result.attribute('isCompliant').content == 'true' + end + end +end diff --git a/manual/contents.rb b/manual/contents.rb index 3a1a59da7..26d407676 100644 --- a/manual/contents.rb +++ b/manual/contents.rb @@ -6,9 +6,14 @@ def prawn_manual_document old_default_external_encoding = Encoding.default_external Encoding.default_external = Encoding::UTF_8 + # We need to use a fixed trailer ID, otherwise the test for + # unintended manual changes will always trigger because of + # a random trailer ID. + trailer_id = PDF::Core::ByteString.new('PrawnPrawnPrawnP') Prawn::ManualBuilder::Example.new( skip_page_creation: true, - page_size: 'FOLIO' + page_size: 'FOLIO', + trailer: { ID: [trailer_id, trailer_id] } ) do load_page '', 'cover' load_page '', 'how_to_read_this_manual' diff --git a/prawn.gemspec b/prawn.gemspec index a1bbd080d..52764b701 100644 --- a/prawn.gemspec +++ b/prawn.gemspec @@ -46,6 +46,7 @@ Gem::Specification.new do |spec| spec.add_development_dependency('pdf-reader', ['~> 1.4', '>= 1.4.1']) spec.add_development_dependency('rubocop', '~> 0.47.1') spec.add_development_dependency('rubocop-rspec', '~> 1.10') + spec.add_development_dependency('nokogiri', '~> 1.7') spec.homepage = 'http://prawnpdf.org' spec.description = < { + normal: "#{Prawn::DATADIR}/fonts/DejaVuSans.ttf" + } + ) + pdf.font 'DejaVuSans' do + pdf.text_box 'Some text', at: [100, 100] + end + expect(valid_pdfa_1b?(pdf.render)).to be true + end + + it 'document with some image' do + pdf.image "#{Prawn::DATADIR}/images/pigs.jpg" + expect(valid_pdfa_1b?(pdf.render)).to be true + end + end +end diff --git a/spec/prawn/stamp_spec.rb b/spec/prawn/stamp_spec.rb index 8a6add1b6..fdbe6350a 100644 --- a/spec/prawn/stamp_spec.rb +++ b/spec/prawn/stamp_spec.rb @@ -95,7 +95,7 @@ next unless obj =~ %r{/Type /Page$} # The page object must contain the annotation reference # to render a clickable link - expect(obj).to match(%r{^/Annots \[\d \d .\]$}) + expect(obj).to match(%r{^/Annots \[\d+ \d .\]$}) end end From eb9b6675e780d6358e9e302861aac000f93ac353 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Wed, 12 Jul 2017 14:38:48 +0200 Subject: [PATCH 2/9] Moved some ignores from local ignore file to global ignore file. --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1f831f30a..c5df388ed 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,3 @@ drop_to_console.rb /bin .DS_Store *.pdf -/.byebug_history From 82960354306d8c5896e2e87d7833d119eb472064 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Thu, 13 Jul 2017 11:12:11 +0200 Subject: [PATCH 3/9] Made trailer ID deterministic --- lib/prawn/document.rb | 4 ++-- manual/contents.rb | 7 +------ spec/manual_spec.rb | 2 +- spec/prawn/document_spec.rb | 9 +-------- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/lib/prawn/document.rb b/lib/prawn/document.rb index 9ebc1f38e..74e2e8328 100644 --- a/lib/prawn/document.rb +++ b/lib/prawn/document.rb @@ -67,7 +67,7 @@ class Document :right_margin, :top_margin, :bottom_margin, :skip_page_creation, :compress, :background, :info, :text_formatter, :print_scaling, - :trailer, :enable_pdfa_1b + :enable_pdfa_1b ].freeze # Any module added to this array will be included into instances of @@ -384,7 +384,7 @@ def render(*a, &b) # pdf.render_file "foo.pdf" # def render_file(filename) - File.open(filename, 'wb') { |f| render(f) } + File.open(filename, 'rb+') { |f| render(f) } end # The bounds method returns the current bounding box you are currently in, diff --git a/manual/contents.rb b/manual/contents.rb index 26d407676..3a1a59da7 100644 --- a/manual/contents.rb +++ b/manual/contents.rb @@ -6,14 +6,9 @@ def prawn_manual_document old_default_external_encoding = Encoding.default_external Encoding.default_external = Encoding::UTF_8 - # We need to use a fixed trailer ID, otherwise the test for - # unintended manual changes will always trigger because of - # a random trailer ID. - trailer_id = PDF::Core::ByteString.new('PrawnPrawnPrawnP') Prawn::ManualBuilder::Example.new( skip_page_creation: true, - page_size: 'FOLIO', - trailer: { ID: [trailer_id, trailer_id] } + page_size: 'FOLIO' ) do load_page '', 'cover' load_page '', 'how_to_read_this_manual' diff --git a/spec/manual_spec.rb b/spec/manual_spec.rb index 5387ed261..e4d1fac86 100644 --- a/spec/manual_spec.rb +++ b/spec/manual_spec.rb @@ -5,7 +5,7 @@ MANUAL_HASH = case RUBY_ENGINE when 'ruby' - 'c7202f015e36d02ac36dac38d88bb78a4dd439ec6d23268ebddaa15a8bcf7e790f203fd3e92d9c1b92c1a2806a03d7f5706c1550da29f281d25bb5540568445e' + 'ac6e99bf22dd31c21c95295ae7b956a610ce98afcbae2e23df1648e0128b3be8728342d6d9622c73ff3702506e8b00d43557d19a77e7ebc313e8133155568efd' when 'jruby' 'd2eb71ea3ddc35acb185de671a6fa48862ebad5727ce372e3a742f45d31447765c4004fbe5fbfdc1f5a32903ac87182c75e6abe021ab003c8af6e6cc33e0d01e' end diff --git a/spec/prawn/document_spec.rb b/spec/prawn/document_spec.rb index ceac24c7c..4b2529d22 100644 --- a/spec/prawn/document_spec.rb +++ b/spec/prawn/document_spec.rb @@ -454,14 +454,7 @@ def self.format(string) end it 'is idempotent' do - # We need to overwrite the trailer ID, otherwise each render - # pass will generate a new random ID and the documents would - # not match. - trailer_id = PDF::Core::ByteString.new(SecureRandom.random_bytes(16)) - pdf = described_class.new(trailer: { - ID: [trailer_id, trailer_id] - }) - + pdf = described_class.new contents = pdf.render contents2 = pdf.render expect(contents2).to eq(contents) From bd70bcd44211807a7e844e4e9dcbf6b0af044655 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Thu, 13 Jul 2017 13:45:49 +0200 Subject: [PATCH 4/9] Trailer ID generation does not use seek any more. --- lib/prawn/document.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/prawn/document.rb b/lib/prawn/document.rb index 74e2e8328..c3d1b55b7 100644 --- a/lib/prawn/document.rb +++ b/lib/prawn/document.rb @@ -384,7 +384,7 @@ def render(*a, &b) # pdf.render_file "foo.pdf" # def render_file(filename) - File.open(filename, 'rb+') { |f| render(f) } + File.open(filename, 'wb') { |f| render(f) } end # The bounds method returns the current bounding box you are currently in, From d575d6e5716d3d9f5adeb8808ef256191c72c1bc Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Fri, 14 Jul 2017 11:33:13 +0200 Subject: [PATCH 5/9] Replaced nokogiri with REXML for parsing the veraPDF report. --- prawn.gemspec | 1 - spec/prawn/pdfa_1b_spec_impl.rb | 2 +- {lib => spec}/prawn/vera_pdf.rb | 21 ++++++++------------- 3 files changed, 9 insertions(+), 15 deletions(-) rename {lib => spec}/prawn/vera_pdf.rb (63%) diff --git a/prawn.gemspec b/prawn.gemspec index 52764b701..a1bbd080d 100644 --- a/prawn.gemspec +++ b/prawn.gemspec @@ -46,7 +46,6 @@ Gem::Specification.new do |spec| spec.add_development_dependency('pdf-reader', ['~> 1.4', '>= 1.4.1']) spec.add_development_dependency('rubocop', '~> 0.47.1') spec.add_development_dependency('rubocop-rspec', '~> 1.10') - spec.add_development_dependency('nokogiri', '~> 1.7') spec.homepage = 'http://prawnpdf.org' spec.description = < Date: Fri, 14 Jul 2017 12:19:20 +0200 Subject: [PATCH 6/9] Moved veraPDF helpers into the extensions directory. --- spec/extensions/vera_pdf_helpers.rb | 48 +++++++++++++++++++++++++++ spec/prawn/pdfa_1b_spec.rb | 3 +- spec/prawn/pdfa_1b_spec_impl.rb | 3 +- spec/prawn/vera_pdf.rb | 50 ----------------------------- 4 files changed, 50 insertions(+), 54 deletions(-) create mode 100644 spec/extensions/vera_pdf_helpers.rb delete mode 100644 spec/prawn/vera_pdf.rb diff --git a/spec/extensions/vera_pdf_helpers.rb b/spec/extensions/vera_pdf_helpers.rb new file mode 100644 index 000000000..74a85aefc --- /dev/null +++ b/spec/extensions/vera_pdf_helpers.rb @@ -0,0 +1,48 @@ +require 'rexml/document' +require 'open3' + +module VeraPdfHelpers + VERA_PDF_EXECUTABLE = 'verapdf'.freeze + VERA_PDF_COMMAND = "#{VERA_PDF_EXECUTABLE} --flavour 1b --format xml".freeze + + def which(cmd) + exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] + ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| + exts.each do |ext| + exe = File.join(path, "#{cmd}#{ext}") + return exe if File.executable?(exe) && !File.directory?(exe) + end + end + return nil + end + + def vera_pdf_available? + which VERA_PDF_EXECUTABLE + end + + def valid_pdfa_1b?(pdf_data) + stdout, stderr, status = Open3.capture3(VERA_PDF_COMMAND, stdin_data: pdf_data) + raise Exception, "VeraPDF could not be run. #{stderr}" unless status.success? + + reported_as_compliant? stdout.lines[4..-1].join + end + + def reported_as_compliant?(xml_data) + xml_doc = REXML::Document.new xml_data + xml_doc.elements.each('/processorResult/validationResult/ns2:assertions/ns2:assertion') do |element| + message = element.elements.to_a('ns2:message').first.text + clause = element.elements.to_a('ns2:ruleId').first.attributes['clause'] + test = element.elements.to_a('ns2:ruleId').first.attributes['testNumber'] + context = element.elements.to_a('ns2:location/ns2:context').first.text + url = 'https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Part-1-rules' + url_anchor = "rule-#{clause.delete('.')}-#{test}" + puts + puts 'PDF/A-1b VIOLATION' + puts " Message: #{message}" + puts " Context: #{context}" + puts " Details: #{url}##{url_anchor}" + puts + end + xml_doc.elements.to_a('/processorResult/validationResult').first.attributes['isCompliant'] == 'true' + end +end diff --git a/spec/prawn/pdfa_1b_spec.rb b/spec/prawn/pdfa_1b_spec.rb index d68730012..580ce85bf 100644 --- a/spec/prawn/pdfa_1b_spec.rb +++ b/spec/prawn/pdfa_1b_spec.rb @@ -1,7 +1,6 @@ require 'spec_helper' -require 'prawn/vera_pdf' -include Prawn::VeraPdf +include VeraPdfHelpers if vera_pdf_available? require_relative 'pdfa_1b_spec_impl' diff --git a/spec/prawn/pdfa_1b_spec_impl.rb b/spec/prawn/pdfa_1b_spec_impl.rb index f7fc8bd12..049e369ab 100644 --- a/spec/prawn/pdfa_1b_spec_impl.rb +++ b/spec/prawn/pdfa_1b_spec_impl.rb @@ -1,8 +1,7 @@ require 'spec_helper' -require_relative 'vera_pdf' describe Prawn::Document do - include Prawn::VeraPdf + include VeraPdfHelpers let(:pdf) { described_class.new(enable_pdfa_1b: true) } diff --git a/spec/prawn/vera_pdf.rb b/spec/prawn/vera_pdf.rb deleted file mode 100644 index 2b65fcaee..000000000 --- a/spec/prawn/vera_pdf.rb +++ /dev/null @@ -1,50 +0,0 @@ -require 'rexml/document' -require 'open3' - -module Prawn - module VeraPdf - VERA_PDF_EXECUTABLE = 'verapdf'.freeze - VERA_PDF_COMMAND = "#{VERA_PDF_EXECUTABLE} --flavour 1b --format xml".freeze - - def which(cmd) - exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] - ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| - exts.each do |ext| - exe = File.join(path, "#{cmd}#{ext}") - return exe if File.executable?(exe) && !File.directory?(exe) - end - end - return nil - end - - def vera_pdf_available? - which VERA_PDF_EXECUTABLE - end - - def valid_pdfa_1b?(pdf_data) - stdout, stderr, status = Open3.capture3(VERA_PDF_COMMAND, stdin_data: pdf_data) - raise Exception, "VeraPDF could not be run. #{stderr}" unless status.success? - - reported_as_compliant? stdout.lines[4..-1].join - end - - def reported_as_compliant?(xml_data) - xml_doc = REXML::Document.new xml_data - xml_doc.elements.each('/processorResult/validationResult/ns2:assertions/ns2:assertion') do |element| - message = element.elements.to_a('ns2:message').first.text - clause = element.elements.to_a('ns2:ruleId').first.attributes['clause'] - test = element.elements.to_a('ns2:ruleId').first.attributes['testNumber'] - context = element.elements.to_a('ns2:location/ns2:context').first.text - url = 'https://github.com/veraPDF/veraPDF-validation-profiles/wiki/PDFA-Part-1-rules' - url_anchor = "rule-#{clause.delete('.')}-#{test}" - puts - puts 'PDF/A-1b VIOLATION' - puts " Message: #{message}" - puts " Context: #{context}" - puts " Details: #{url}##{url_anchor}" - puts - end - xml_doc.elements.to_a('/processorResult/validationResult').first.attributes['isCompliant'] == 'true' - end - end -end From 38ae30cd6c46af7698ba30cbd71b340e55ba19f9 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Fri, 14 Jul 2017 15:29:51 +0200 Subject: [PATCH 7/9] XMP metadata is only added to PDF/A-1b compliant documents --- spec/manual_spec.rb | 2 +- spec/prawn/document_spec.rb | 9 +++++---- spec/prawn/stamp_spec.rb | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/spec/manual_spec.rb b/spec/manual_spec.rb index e4d1fac86..000546936 100644 --- a/spec/manual_spec.rb +++ b/spec/manual_spec.rb @@ -5,7 +5,7 @@ MANUAL_HASH = case RUBY_ENGINE when 'ruby' - 'ac6e99bf22dd31c21c95295ae7b956a610ce98afcbae2e23df1648e0128b3be8728342d6d9622c73ff3702506e8b00d43557d19a77e7ebc313e8133155568efd' + '9b2fde84364fdde4d879c4a29072bab38dfb89e3397923ae7a5a430e96fe7fb3c5dd59962152f47940b854a2d0806c555010258e03e8d462ae2feace8637c653' when 'jruby' 'd2eb71ea3ddc35acb185de671a6fa48862ebad5727ce372e3a742f45d31447765c4004fbe5fbfdc1f5a32903ac87182c75e6abe021ab003c8af6e6cc33e0d01e' end diff --git a/spec/prawn/document_spec.rb b/spec/prawn/document_spec.rb index 4b2529d22..f65e171e5 100644 --- a/spec/prawn/document_spec.rb +++ b/spec/prawn/document_spec.rb @@ -455,6 +455,7 @@ def self.format(string) it 'is idempotent' do pdf = described_class.new + contents = pdf.render contents2 = pdf.render expect(contents2).to eq(contents) @@ -507,7 +508,7 @@ def self.format(string) end describe 'content stream characteristics' do - it 'has 2 content streams for a single page PDF' do + it 'has 1 single content stream for a single page PDF' do pdf = described_class.new pdf.text 'James' output = StringIO.new(pdf.render) @@ -515,10 +516,10 @@ def self.format(string) streams = hash.values.select { |obj| obj.is_a?(PDF::Reader::Stream) } - expect(streams.size).to eq(2) + expect(streams.size).to eq(1) end - it 'has 2 content streams for a single page PDF, even if go_to_page '\ + it 'has 1 single content stream for a single page PDF, even if go_to_page '\ 'is used' do pdf = described_class.new pdf.text 'James' @@ -529,7 +530,7 @@ def self.format(string) streams = hash.values.select { |obj| obj.is_a?(PDF::Reader::Stream) } - expect(streams.size).to eq(2) + expect(streams.size).to eq(1) end end diff --git a/spec/prawn/stamp_spec.rb b/spec/prawn/stamp_spec.rb index fdbe6350a..8a6add1b6 100644 --- a/spec/prawn/stamp_spec.rb +++ b/spec/prawn/stamp_spec.rb @@ -95,7 +95,7 @@ next unless obj =~ %r{/Type /Page$} # The page object must contain the annotation reference # to render a clickable link - expect(obj).to match(%r{^/Annots \[\d+ \d .\]$}) + expect(obj).to match(%r{^/Annots \[\d \d .\]$}) end end From 4dc6db8616f5f4be620e43a4bb2f0b526618f554 Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Sat, 15 Jul 2017 15:11:40 +0200 Subject: [PATCH 8/9] Updated Travis CI config to include veraPDF --- .travis.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.travis.yml b/.travis.yml index bc519ec61..41efafdc0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,16 @@ language: ruby +env: + - VERAPDF_VERSION=1.6.3 + - VERAPDF_VERSION_SHORT=1.6 before_install: - gem update --system - gem install bundler - bundle --version + - wget http://software.verapdf.org/rel/${VERAPDF_VERSION_SHORT}/verapdf-${VERAPDF_VERSION}-installer.zip + - unzip verapdf-${VERAPDF_VERSION}-installer.zip + - java -jar -DINSTALL_PATH=${PWD}/verapdf verapdf-${VERAPDF_VERSION}/verapdf-izpack-installer-${VERAPDF_VERSION}.jar -options-system + - export PATH=$PATH:$PWD/verapdf + - verapdf --version rvm: - 2.1.0-p0 - 2.1.10 From d9295726dd1170d038c6a02be13e2192327b56ae Mon Sep 17 00:00:00 2001 From: Steffen Rauh Date: Sat, 15 Jul 2017 15:15:17 +0200 Subject: [PATCH 9/9] Fixed environemnt variables for Travis CI --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 41efafdc0..a4ad149e7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,6 @@ language: ruby env: - - VERAPDF_VERSION=1.6.3 - - VERAPDF_VERSION_SHORT=1.6 + - VERAPDF_VERSION=1.6.3 VERAPDF_VERSION_SHORT=1.6 before_install: - gem update --system - gem install bundler