Skip to content

Commit

Permalink
read compat for many hdf5 files (#388)
Browse files Browse the repository at this point in the history
* read compat for many hdf5 files

* support for more types

* allow nonlinear group chunk structure

* old syntax compat

* basic JLD support

* h5 testfiles

* some support for netcdf

* big endian numbers

* compat for a few more files

* some chunking support

* add test and bugfix

* opaque datatype and fixed length string fixes

* sequential filters

* iostream bug fixes

* padded compound types

* some type info

* changelog + version bump

Co-authored-by: Jonas Isensee <[email protected]>
  • Loading branch information
JonasIsensee and Jonas Isensee authored Oct 1, 2022
1 parent 35baacc commit b5c09ef
Show file tree
Hide file tree
Showing 41 changed files with 2,464 additions and 337 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 0.4.24
- read-only support for `JLD.jl` files
- read-only support for many HDF5 files. Most test files of HDF5.jl are covered
- read Opaque bit fields
- read some other string encodings
- read big endian numbers
- read typical chunking formats

## 0.4.23
- Support for `const` fields in mutable structs

Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "JLD2"
uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
version = "0.4.23"
version = "0.4.24"

[deps]
FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
Expand Down
87 changes: 65 additions & 22 deletions src/JLD2.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ InternalError() = InternalError("")
# but define our own to avoid type piracy
jlwrite(io, x) = Base.write(io, x)
jlread(io, x) = Base.read(io, x)
jlread(io::IO, ::Type{T}, n::Integer) where {T} = T[jlread(io, T) for _=1:n]

jlsizeof(x) = Base.sizeof(x)
jlunsafe_store!(p, x) = Base.unsafe_store!(p, x)
jlunsafe_load(p) = Base.unsafe_load(p)
Expand All @@ -49,13 +51,13 @@ include("Lookup3.jl")
include("mmapio.jl")
include("bufferedio.jl")
include("misc.jl")
include("superblock.jl")

"""
RelOffset
Represents an HDF5 relative offset. This differs from a file offset (used elsewhere) in
that it is relative to the superblock base address. In practice, this means that
`FILE_HEADER_LENGTH `has been subtracted. `fileoffset` and `h5offset` convert between
that it is relative to the superblock base address. `fileoffset` and `h5offset` convert between
`RelOffsets` and file offsets.
"""
struct RelOffset
Expand All @@ -64,6 +66,7 @@ end
define_packed(RelOffset)
Base.:(==)(x::RelOffset, y::RelOffset) = x === y
Base.hash(x::RelOffset) = hash(x.offset)
Base.:(+)(x::RelOffset, y::Integer) = RelOffset(UInt64(x.offset + y))

const UNDEFINED_ADDRESS = RelOffset(0xffffffffffffffff)
const NULL_REFERENCE = RelOffset(0)
Expand Down Expand Up @@ -105,11 +108,27 @@ Supertype of all HDF5 datatypes.
"""
abstract type H5Datatype end

"""
SharedDatatype
Reference to a shared datatype message (stored elsewhere in a file).
"""
struct SharedDatatype <: H5Datatype
header_offset::RelOffset
end

"""
CommittedDatatype
Reference to a shared datatype message (stored elsewhere in a file).
These are stored in the `_types` group and indexed.
"""
struct CommittedDatatype <: H5Datatype
header_offset::RelOffset
index::Int
end


"""
ReadRepresentation{T,ODR}
Expand All @@ -126,6 +145,21 @@ read as type `S`.
"""
struct CustomSerialization{T,S} end


struct Filter
id::UInt16
flags::UInt16
name::String
client_data::Vector{UInt32}
end

struct FilterPipeline
filters::Vector{Filter}
end

FilterPipeline() = FilterPipeline(Filter[])
iscompressed(fp::FilterPipeline) = !isempty(fp.filters)

"""
Group(file)
Expand Down Expand Up @@ -166,6 +200,8 @@ mutable struct Group{T}
written_links)
end



"""
JLDFile{T<:IO}
Expand Down Expand Up @@ -193,14 +229,15 @@ mutable struct JLDFile{T<:IO}
root_group_offset::RelOffset
root_group::Group{JLDFile{T}}
types_group::Group{JLDFile{T}}
base_address::UInt64

function JLDFile{T}(io::IO, path::AbstractString, writable::Bool, written::Bool,
compress,#::Union{Bool,Symbol},
mmaparrays::Bool) where T
f = new(io, path, writable, written, compress, mmaparrays, 1,
OrderedDict{RelOffset,CommittedDatatype}(), H5Datatype[],
JLDWriteSession(), Dict{String,Any}(), IdDict(), IdDict(), Dict{RelOffset,WeakRef}(),
Int64(FILE_HEADER_LENGTH + jlsizeof(Superblock)), Dict{RelOffset,GlobalHeap}(),
DATA_START, Dict{RelOffset,GlobalHeap}(),
GlobalHeap(0, 0, 0, Int64[]), Dict{RelOffset,Group{JLDFile{T}}}(), UNDEFINED_ADDRESS)
finalizer(jld_finalizer, f)
f
Expand All @@ -214,14 +251,14 @@ JLDFile(io::IO, path::AbstractString, writable::Bool, written::Bool, compress, m
Converts an offset `x` relative to the superblock of file `f` to an absolute offset.
"""
fileoffset(f::JLDFile, x::RelOffset) = Int64(x.offset + FILE_HEADER_LENGTH)
fileoffset(f::JLDFile, x::RelOffset) = Int64(x.offset + f.base_address)

"""
h5offset(f::JLDFile, x::RelOffset)
h5offset(f::JLDFile, x::Integer)
Converts an absolute file offset `x` to an offset relative to the superblock of file `f`.
"""
h5offset(f::JLDFile, x::Int64) = RelOffset(x - FILE_HEADER_LENGTH)
h5offset(f::JLDFile, x::Integer) = RelOffset(UInt64(x - f.base_address))

#
# File
Expand Down Expand Up @@ -306,6 +343,7 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
unlock(OPEN_FILES_LOCK)
end
if f.written
f.base_address = 512
if f isa JLDFile{MmapIO}
f.root_group = Group{JLDFile{MmapIO}}(f)
f.types_group = Group{JLDFile{MmapIO}}(f)
Expand All @@ -321,13 +359,21 @@ function jldopen(fname::AbstractString, wr::Bool, create::Bool, truncate::Bool,
end

function load_file_metadata!(f)
verify_file_header(f)

seek(f.io, FILE_HEADER_LENGTH)
superblock = jlread(f.io, Superblock)
superblock = find_superblock(f)
f.end_of_data = superblock.end_of_file_address
f.base_address = superblock.base_address
f.root_group_offset = superblock.root_group_object_header_address
f.root_group = load_group(f, superblock.root_group_object_header_address)
if superblock.version >= 2
verify_file_header(f)
else
@warn "This file was not written with JLD2. Some things may not work."
if f.writable
close(f)
throw(UnsupportedVersionException("This file can not be edited by JLD2. Please open in read-only mode."))
end
end
try
f.root_group = load_group(f, f.root_group_offset)

if haskey(f.root_group.written_links, "_types")
types_group_offset = f.root_group.written_links["_types"]::RelOffset
Expand All @@ -340,6 +386,11 @@ function load_file_metadata!(f)
else
f.types_group = Group{typeof(f)}(f)
end
catch e
show(e)
f.types_group = Group{typeof(f)}(f)

end
nothing
end

Expand Down Expand Up @@ -442,15 +493,7 @@ function Base.close(f::JLDFile)
f.root_group_offset = res
end

# Write JLD2 header
seek(io, 0)
jlwrite(io, FILE_HEADER)

# Write superblock
seek(io, FILE_HEADER_LENGTH)
jlwrite(io, Superblock(0, FILE_HEADER_LENGTH, UNDEFINED_ADDRESS,
f.end_of_data, f.root_group_offset))

write_file_header(f)
truncate_and_close(io, f.end_of_data)
else
close(io)
Expand Down Expand Up @@ -520,14 +563,14 @@ printtoc(io::IO, f::JLDFile; numlines = typemax(Int64)) =



include("superblock.jl")
include("object_headers.jl")
include("groups.jl")
include("dataspaces.jl")
include("attributes.jl")
include("datatypes.jl")
include("datasets.jl")
include("global_heaps.jl")
include("fractal_heaps.jl")

include("data/type_defs.jl")
include("data/specialcased_types.jl")
Expand All @@ -548,4 +591,4 @@ if ccall(:jl_generating_output, Cint, ()) == 1 # if we're precompiling the pac
include("precompile.jl")
end

end # module
end
Loading

2 comments on commit b5c09ef

@JonasIsensee
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator register()

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/69359

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.4.24 -m "<description of version>" b5c09ef8fcf36597d62b0d6dbbfd4cefb8fa5c12
git push origin v0.4.24

Please sign in to comment.