Skip to content

Commit

Permalink
Merge pull request #1 from eLocal/fix_relationship_cache
Browse files Browse the repository at this point in the history
Fix Relationship.instances cache
  • Loading branch information
MatthewTFarley authored Aug 6, 2019
2 parents c593a08 + 5ec94a3 commit b150674
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 48 deletions.
9 changes: 6 additions & 3 deletions lib/axlsx/package.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,13 @@ def workbook=(workbook) DataTypeValidator.validate :Package_workbook, Workbook,
# File.open('example_streamed.xlsx', 'w') { |f| f.write(s.read) }
def serialize(output, confirm_valid=false)
return false unless !confirm_valid || self.validate.empty?
Relationship.clear_cached_instances
Relationship.initialize_ids_cache
Zip::OutputStream.open(output) do |zip|
write_parts(zip)
end
true
ensure
Relationship.clear_ids_cache
end


Expand All @@ -113,11 +115,13 @@ def serialize(output, confirm_valid=false)
# @return [StringIO|Boolean] False if confirm_valid and validation errors exist. rewound string IO if not.
def to_stream(confirm_valid=false)
return false unless !confirm_valid || self.validate.empty?
Relationship.clear_cached_instances
Relationship.initialize_ids_cache
zip = write_parts(Zip::OutputStream.new(StringIO.new, true))
stream = zip.close_buffer
stream.rewind
stream
ensure
Relationship.clear_ids_cache
end

# Encrypt the package into a CFB using the password provided
Expand Down Expand Up @@ -351,4 +355,3 @@ def relationships
end
end
end

81 changes: 41 additions & 40 deletions lib/axlsx/rels/relationship.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,51 @@ module Axlsx
# A relationship defines a reference between package parts.
# @note Packages automatically manage relationships.
class Relationship

class << self
# Keeps track of all instances of this class.
# Keeps track of relationship ids in use.
# @return [Array]
def instances
@instances ||= []
def ids_cache
Thread.current[:axlsx_relationship_ids_cache] ||= {}
end
# Clear cached instances.
#

# Initialize cached ids.
#
# This should be called before serializing a package (see {Package#serialize} and
# {Package#to_stream}) to make sure that serialization is idempotent (i.e.
# {Package#to_stream}) to make sure that serialization is idempotent (i.e.
# Relationship instances are generated with the same IDs everytime the package
# is serialized).
#
# Also, calling this avoids memory leaks (cached instances lingering around
# forever).
def clear_cached_instances
@instances = []
def initialize_ids_cache
Thread.current[:axlsx_relationship_ids_cache] = {}
end

# Clear cached ids.
#
# This should be called after serializing a package (see {Package#serialize} and
# {Package#to_stream}) to free the memory allocated for cache.
#
# Also, calling this avoids memory leaks (cached ids lingering around
# forever).
def clear_ids_cache
Thread.current[:axlsx_relationship_ids_cache] = nil
end
# Generate and return a unique id (eg. `rId123`) Used for setting {#Id}.

# Generate and return a unique id (eg. `rId123`) Used for setting {#Id}.
#
# The generated id depends on the number of cached instances, so using
# {clear_cached_instances} will automatically reset the generated ids, too.
# The generated id depends on the number of previously cached ids, so using
# {clear_ids_cache} will automatically reset the generated ids, too.
# @return [String]
def next_free_id
"rId#{@instances.size + 1}"
"rId#{ids_cache.size + 1}"
end
end

# The id of the relationship (eg. "rId123"). Most instances get their own unique id.
# The id of the relationship (eg. "rId123"). Most instances get their own unique id.
# However, some instances need to share the same id – see {#should_use_same_id_as?}
# for details.
# @return [String]
attr_reader :Id

# The location of the relationship target
# @return [String]
attr_reader :Target
Expand Down Expand Up @@ -69,8 +77,8 @@ def next_free_id
# The source object the relations belongs to (e.g. a hyperlink, drawing, ...). Needed when
# looking up the relationship for a specific object (see {Relationships#for}).
attr_reader :source_obj
# Initializes a new relationship.

# Initializes a new relationship.
# @param [Object] source_obj see {#source_obj}
# @param [String] type The type of the relationship
# @param [String] target The target for the relationship
Expand All @@ -80,12 +88,7 @@ def initialize(source_obj, type, target, options={})
self.Target=target
self.Type=type
self.TargetMode = options[:target_mode] if options[:target_mode]
@Id = if (existing = self.class.instances.find{ |i| should_use_same_id_as?(i) })
existing.Id
else
self.class.next_free_id
end
self.class.instances << self
@Id = (self.class.ids_cache[ids_cache_key] ||= self.class.next_free_id)
end

# @see Target
Expand All @@ -105,25 +108,23 @@ def to_xml_string(str = '')
str << (h.map { |key, value| '' << key.to_s << '="' << Axlsx::coder.encode(value.to_s) << '"'}.join(' '))
str << '/>'
end
# Whether this relationship should use the same id as `other`.

# A key that determines whether this relationship should use already generated id.
#
# Instances designating the same relationship need to use the same id. We can not simply
# compare the {#Target} attribute, though: `foo/bar.xml`, `../foo/bar.xml`,
# `../../foo/bar.xml` etc. are all different but probably mean the same file (this
# compare the {#Target} attribute, though: `foo/bar.xml`, `../foo/bar.xml`,
# `../../foo/bar.xml` etc. are all different but probably mean the same file (this
# is especially an issue for relationships in the context of pivot tables). So lets
# just ignore this attribute for now (except when {#TargetMode} is set to `:External` –
# then {#Target} will be an absolute URL and thus can safely be compared).
#
# @todo Implement comparison of {#Target} based on normalized path names.
# @param other [Relationship]
def should_use_same_id_as?(other)
result = self.source_obj == other.source_obj && self.Type == other.Type && self.TargetMode == other.TargetMode
if self.TargetMode == :External
result &&= self.Target == other.Target
end
result
# @return [Array]
def ids_cache_key
key = [source_obj, self.Type, self.TargetMode]
key << self.Target if self.TargetMode == :External
key
end

end
end
18 changes: 13 additions & 5 deletions test/rels/tc_relationship.rb
Original file line number Diff line number Diff line change
@@ -1,30 +1,38 @@
require 'tc_helper.rb'

class TestRelationships < Test::Unit::TestCase

def test_instances_with_different_attributes_have_unique_ids
rel_1 = Axlsx::Relationship.new(Object.new, Axlsx::WORKSHEET_R, 'target')
rel_2 = Axlsx::Relationship.new(Object.new, Axlsx::COMMENT_R, 'foobar')
assert_not_equal rel_1.Id, rel_2.Id
end

def test_instances_with_same_attributes_share_id
source_obj = Object.new
instance = Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target')
assert_equal instance.Id, Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target').Id
end


def test_ids_cache_is_thread_safe
cache1, cache2 = nil
t1 = Thread.new { cache1 = Axlsx::Relationship.ids_cache }
t2 = Thread.new { cache2 = Axlsx::Relationship.ids_cache }
[t1, t2].each(&:join)
assert_not_same(cache1, cache2)
end

def test_target_is_only_considered_for_same_attributes_check_if_target_mode_is_external
source_obj = Object.new
rel_1 = Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target')
rel_2 = Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, '../target')
assert_equal rel_1.Id, rel_2.Id

rel_3 = Axlsx::Relationship.new(source_obj, Axlsx::HYPERLINK_R, 'target', :target_mode => :External)
rel_4 = Axlsx::Relationship.new(source_obj, Axlsx::HYPERLINK_R, '../target', :target_mode => :External)
assert_not_equal rel_3.Id, rel_4.Id
end

def test_type
assert_raise(ArgumentError) { Axlsx::Relationship.new nil, 'type', 'target' }
assert_nothing_raised { Axlsx::Relationship.new nil, Axlsx::WORKSHEET_R, 'target' }
Expand Down
2 changes: 2 additions & 0 deletions test/tc_package.rb
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,8 @@ def test_to_stream
# this is just a roundabout guess for a package as it is build now
# in testing.
assert(stream.size > 80000)
# Cached ids should be cleared
assert(Axlsx::Relationship.ids_cache.empty?)
end

def test_encrypt
Expand Down

0 comments on commit b150674

Please sign in to comment.