Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for PDF/A-1b #34

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Binary file added data/sRGB2014.icc
Binary file not shown.
1 change: 1 addition & 0 deletions lib/pdf/core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
require_relative 'core/outline_item'
require_relative 'core/renderer'
require_relative 'core/text'
require_relative 'core/xmp_metadata'

module PDF
module Core
Expand Down
13 changes: 4 additions & 9 deletions lib/pdf/core/document_state.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,10 @@ class DocumentState #:nodoc:
def initialize(options)
normalize_metadata(options)

@store =
if options[:print_scaling]
PDF::Core::ObjectStore.new(
info: options[:info],
print_scaling: options[:print_scaling]
)
else
PDF::Core::ObjectStore.new(info: options[:info])
end
store_params = options.select do |key|
%i[info print_scaling enable_pdfa_1b].include?(key)
end
@store = PDF::Core::ObjectStore.new(store_params)

@version = 1.3
@pages = []
Expand Down
36 changes: 36 additions & 0 deletions lib/pdf/core/object_store.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@ def initialize(opts = {})

@info ||= ref(opts[:info] || {}).identifier
@root ||= ref(Type: :Catalog).identifier

if opts[:enable_pdfa_1b]
# PDF/A-1b requirement: XMP metadata
@xmp_metadata ||= ref(Type: :Metadata, Subtype: :XML).identifier
root.data[:Metadata] = xmp_metadata
xmp_metadata_content = XmpMetadata.new(opts[:info] || {})
xmp_metadata_content.enable_pdfa_1b = true
xmp_metadata.stream = Stream.new
xmp_metadata.stream << xmp_metadata_content.render
Comment on lines +29 to +30
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

xmp_metadata is a reference. It's smart enough to create a stream when needed so this can be a bit simpler:

Suggested change
xmp_metadata.stream = Stream.new
xmp_metadata.stream << xmp_metadata_content.render
xmp_metadata << xmp_metadata_content.render


# PDF/A-1b requirement: OutputIntent with ICC profile stream
initialize_output_intent
end

if opts[:print_scaling] == :none
root.data[:ViewerPreferences] = { PrintScaling: :None }
end
Expand All @@ -39,6 +53,10 @@ def root
@objects[@root]
end

def xmp_metadata
@objects[@xmp_metadata]
end

def pages
root.data[:Pages]
end
Expand Down Expand Up @@ -96,6 +114,24 @@ def object_id_for_page(k)
flat_page_ids = get_page_objects(pages).flatten
flat_page_ids[k]
end

private

def initialize_output_intent
icc_profile_name = 'sRGB2014.icc'.freeze

icc_profile_stream = ref(N: 3)
icc_profile_stream.stream = Stream.new
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not needed.

icc_profile_stream << File.binread(File.join(File.dirname(__FILE__), '..', '..', '..', 'data', icc_profile_name))

root.data[:OutputIntents] = [{
Type: :OutputIntent,
S: :GTS_PDFA1,
OutputConditionIdentifier: LiteralString.new('Custom'),
Info: LiteralString.new(File.basename(icc_profile_name, '.*')),
DestOutputProfile: icc_profile_stream
}]
end
end
end
end
28 changes: 24 additions & 4 deletions lib/pdf/core/renderer.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'stringio'
require 'digest'

module PDF
module Core
Expand Down Expand Up @@ -162,9 +163,20 @@ def render(output = StringIO.new)
finalize_all_page_contents

render_header(output)
render_body(output)

# We render the body into a temporary buffer to avoid
# two render passes for output and hashing.
# We must have the same offset as in output, otherwise
# the refs won't match.
body_output = StringIO.new(' ' * output.tell)
body_output.set_encoding(::Encoding::ASCII_8BIT)
body_output.seek(output.tell)
render_body(body_output)
body_output = body_output.string[output.tell..-1]
output.write(body_output)

render_xref(output)
render_trailer(output)
render_trailer(output, hash_body(body_output))
if output.instance_of?(StringIO)
str = output.string
str.force_encoding(::Encoding::ASCII_8BIT)
Expand Down Expand Up @@ -200,6 +212,12 @@ def render_body(output)
state.render_body(output)
end

# Create a hash from the body data. Needed for creating the trailer ID.
#
def hash_body(body_data)
Digest::MD5.digest(body_data)
end

# Write out the PDF Cross Reference Table, as per spec 3.4.3
#
def render_xref(output)
Expand All @@ -215,11 +233,13 @@ def render_xref(output)

# Write out the PDF Trailer, as per spec 3.4.4
#
def render_trailer(output)
def render_trailer(output, body_hash)
trailer_id = PDF::Core::ByteString.new(body_hash)
trailer_hash = {
Size: state.store.size + 1,
Root: state.store.root,
Info: state.store.info
Info: state.store.info,
ID: [trailer_id, trailer_id] # PDF/A-1b requirement
}
trailer_hash.merge!(state.trailer) if state.trailer

Expand Down
95 changes: 95 additions & 0 deletions lib/pdf/core/xmp_metadata.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
module PDF
module Core
class XmpMetadata
attr_accessor :enable_pdfa_1b

# These attributes must all be synchronized with their counterparts in the
# document information dictionary to be PDF/A-1b compliant.
attr_accessor :dc_title, :dc_creator, :dc_description,
:pdf_keywords, :xmp_creator_tool, :pdf_producer,
:xmp_create_date, :xmp_modify_date

def initialize(options = {})
# Convert options for the document information dictionary to their
# counterparts in XMP.
@dc_title = options[:Title] if options[:Title]
@dc_creator = options[:Author] if options[:Author]
@dc_description = options[:Subject] if options[:Subject]
@pdf_keywords = options[:Keywords] if options[:Keywords]
@xmp_creator_tool = options[:Creator] if options[:Creator]
@pdf_producer = options[:Producer] if options[:Producer]
@xmp_create_date = options[:CreationDate] if options[:CreationDate]
@xmp_modify_date = options[:ModDate] if options[:ModDate]
end

def render
result = "<?xpacket begin=\"\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n"
result << "<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n"
result << render_pdfaid if @enable_pdfa_1b
result << render_xmp if @xmp_creator_tool || @xmp_create_date || @xmp_modify_date
result << render_pdf if @pdf_keywords || @pdf_producer
result << render_dc if @dc_title || @dc_creator || @dc_description
result << "</rdf:RDF>\n"
result << '<?xpacket end="r"?>'
end

private

def render_pdfaid
" <rdf:Description xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\" rdf:about=\"\">\n" \
" <pdfaid:part>1</pdfaid:part>\n" \
" <pdfaid:conformance>B</pdfaid:conformance>\n" \
" </rdf:Description>\n"
end

def render_xmp
result = " <rdf:Description xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\" rdf:about=\"\">\n"
result << " <xmp:CreatorTool>#{xml_char_data(@xmp_creator_tool)}</xmp:CreatorTool>\n" if @xmp_creator_tool
result << " <xmp:CreateDate>#{xml_char_data(to_xmp_timestamp(@xmp_create_date))}</xmp:CreateDate>\n" if @xmp_create_date
result << " <xmp:ModifyDate>#{xml_char_data(to_xmp_timestamp(@xmp_modify_date))}</xmp:ModifyDate>\n" if @xmp_modify_date
result << " </rdf:Description>\n"
end

def render_pdf
result = " <rdf:Description xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\" rdf:about=\"\">\n"
result << " <pdf:Keywords>#{xml_char_data(@pdf_keywords)}</pdf:Keywords>\n" if @pdf_keywords
result << " <pdf:Producer>#{xml_char_data(@pdf_producer)}</pdf:Producer>\n" if @pdf_producer
result << " </rdf:Description>\n"
end

def render_dc
result = " <rdf:Description xmlns:dc=\"http://purl.org/dc/elements/1.1/\" rdf:about=\"\">\n"
if @dc_title
result << " <dc:title>\n"
result << " <rdf:Alt>\n"
result << " <rdf:li xml:lang=\"x-default\">#{xml_char_data(@dc_title)}</rdf:li>\n"
result << " </rdf:Alt>\n"
result << " </dc:title>\n"
end
if @dc_creator
result << " <dc:creator>\n"
result << " <rdf:Seq>\n"
result << " <rdf:li>#{xml_char_data(@dc_creator)}</rdf:li>\n"
result << " </rdf:Seq>\n"
result << " </dc:creator>\n"
end
if @dc_description
result << " <dc:description>\n"
result << " <rdf:Alt>\n"
result << " <rdf:li xml:lang=\"x-default\">#{xml_char_data(@dc_description)}</rdf:li>\n"
result << " </rdf:Alt>\n"
result << " </dc:description>\n"
end
result << " </rdf:Description>\n" \
end

def to_xmp_timestamp(time)
time.strftime('%Y-%m-%dT%H:%M:%S%:z')
end

def xml_char_data(string)
string.gsub('&', '&amp;').gsub('<', '&lt;')
end
end
end
end
1 change: 1 addition & 0 deletions pdf-core.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Gem::Specification.new do |spec|
spec.files = Dir.glob('lib/**/**/*') +
%w[COPYING GPLv2 GPLv3 LICENSE] +
%w[Gemfile Rakefile] +
['data/sRGB2014.icc'] +
['pdf-core.gemspec']
spec.require_path = 'lib'
spec.required_ruby_version = '>= 1.9.3'
Expand Down
Loading