diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml index 06dc278e..25b89549 100644 --- a/.github/workflows/Downgrade.yml +++ b/.github/workflows/Downgrade.yml @@ -5,7 +5,7 @@ on: paths-ignore: - 'docs/**' push: - branches: [main, master, dev] + branches: [main, master] paths-ignore: - 'docs/**' concurrency: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7383441c..392a42ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,7 +3,7 @@ on: pull_request: branches: [main, master, dev] push: - branches: [main, master, dev] + branches: [main, master] tags: '*' concurrency: group: ${{ github.workflow }}-${{ github.ref }} diff --git a/.github/workflows/stale_preview_removal.yml b/.github/workflows/stale_preview_removal.yml index 67fa01bf..61d01fd0 100644 --- a/.github/workflows/stale_preview_removal.yml +++ b/.github/workflows/stale_preview_removal.yml @@ -2,7 +2,7 @@ name: Doc Preview Cleanup on: schedule: - - cron: "0 0 * * *" + - cron: "0 0 1 * *" jobs: doc-preview-cleanup: @@ -17,8 +17,8 @@ jobs: shell: julia {0} run: | using Pkg - pkg"activate --temp" - pkg"add HTTP JSON3" + Pkg.activate(; temp=true) + Pkg.add(["HTTP", "JSON3"]) using HTTP using JSON3 @@ -43,8 +43,7 @@ jobs: end return prs end - prs = all_prs() - open_within_threshold = map(x -> x.number, filter(prs) do pr + open_within_threshold = map(x -> x.number, filter(all_prs()) do pr time = DateTime(pr.updated_at[1:19], ISODateTimeFormat) return pr.state == "open" && Dates.days(now() - time) <= retention_days end) @@ -54,7 +53,7 @@ jobs: if isempty(stale_previews) @info "No stale previews" - exit(1) + exit(0) end for pr in stale_previews diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ebd5eb2..6cddef6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## 0.5.0 + - Improved encoding of committed datatypes. + This fixes longstanding issues but new files will not be loaded correctly with JLD2 versions prior to `v0.5.0`. + Existing files remain readable. + - JLD2 now warns when storing structures containing `<: Function` objects. + ## 0.4.53 - Experimental: Slicing and inplace updating of array datasets - updated CI workflows diff --git a/Project.toml b/Project.toml index 981b946d..e6595265 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "JLD2" uuid = "033835bb-8acc-5ee8-8aae-3f567f8a3819" -version = "0.4.53" +version = "0.5.0" [deps] FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" diff --git a/src/committed_datatype_introspection.jl b/src/committed_datatype_introspection.jl index b3d6ea96..4554e2b7 100644 --- a/src/committed_datatype_introspection.jl +++ b/src/committed_datatype_introspection.jl @@ -31,11 +31,11 @@ function stringify_committed_datatype(f, cdt; showfields=false) return julia_type_str, written_type_str, String[] end - field_datatypes = read_field_datatypes(f, attrs) + field_datatypes = read_field_datatypes(f, dt, attrs) field_strs = String[] do_report = false - for i = 1:length(dt.names) - if !isempty(field_datatypes) && (ref = field_datatypes[i]) != NULL_REFERENCE + for (i, key) in enumerate(keys(field_datatypes)) + if (ref = field_datatypes[string(key)]) != NULL_REFERENCE fieldtype = stringify_committed_datatype(f, f.datatype_locations[ref])[1] do_report = true else diff --git a/src/data/reconstructing_datatypes.jl b/src/data/reconstructing_datatypes.jl index f55036a7..861d00a8 100644 --- a/src/data/reconstructing_datatypes.jl +++ b/src/data/reconstructing_datatypes.jl @@ -1,20 +1,22 @@ -function read_field_datatypes(f::JLDFile, attrs::Vector{ReadAttribute}) +function read_field_datatypes(f::JLDFile, dt::CompoundDatatype, attrs::Vector{ReadAttribute}) + offsets = nothing + namevec = nothing for attr in attrs - if attr.name == :field_datatypes - return read_attr_data(f, attr, ReferenceDatatype(), - ReadRepresentation{RelOffset,RelOffset}()) + if attr.name == :field_types + offsets = read_attr_data(f, attr, ReferenceDatatype(), + ReadRepresentation{RelOffset,RelOffset}()) + elseif attr.name == :field_names + namevec = read_attr_data(f, attr, H5TYPE_VLEN_UTF8, + ReadRepresentation{String,Vlen{String}}()) + elseif attr.name == :field_datatypes + # Legacy: Files written before JLD2 v0.4.54 + offsets = read_attr_data(f, attr, ReferenceDatatype(), + ReadRepresentation{RelOffset,RelOffset}()) end end - RelOffset[] -end - -function check_empty(attrs::Vector{ReadAttribute}) - for attr in attrs - if attr.name == :empty - return true - end - end - false + isnothing(namevec) && (namevec = string.(dt.names)) + isnothing(offsets) && (offsets = fill(NULL_REFERENCE, length(namevec))) + OrderedDict{String, RelOffset}(namevec .=> offsets) end """ @@ -124,29 +126,28 @@ end function constructrr(::JLDFile, T::DataType, dt::BasicDatatype, attrs::Vector{ReadAttribute}) dt.class == DT_OPAQUE || throw(UnsupportedFeatureException()) if sizeof(T) == dt.size && isempty(T.types) - (ReadRepresentation{T,T}(), true) + return (ReadRepresentation{T,T}(), true) + end + empty = any(a->a.name==:empty, attrs) + if empty + !hasdata(T) && return (ReadRepresentation{T,nothing}(), true) + @warn("$T has $(sizeof(T)*8) bytes, but written type was empty; reconstructing") + elseif isempty(T.types) + @warn("primitive type $T has $(sizeof(T)*8) bits, but written type has $(dt.size*8) bits; reconstructing") else - empty = check_empty(attrs) - if empty - if !hasdata(T) - (ReadRepresentation{T,nothing}(), true) - else - @warn("$T has $(T.size*8) bytes, but written type was empty; reconstructing") - reconstruct_bitstype(T.name.name, dt.size, empty) - end - else - if isempty(T.types) - @warn("primitive type $T has $(T.size*8) bits, but written type has $(dt.size*8) bits; reconstructing") - else - @warn("$T is a non-primitive type, but written type is a primitive type with $(dt.size*8) bits; reconstructing") - end - reconstruct_bitstype(T.name.name, dt.size, empty) - end + @warn("$T is a non-primitive type, but written type is a primitive type with $(dt.size*8) bits; reconstructing") end + reconstruct_bitstype(T.name.name, dt.size, empty) end struct TypeMappingException <: Exception end + +unpack_odr(::OnDiskRepresentation{Offsets,JLTypes,H5Types}) where {Offsets,JLTypes,H5Types} = + (Offsets, JLTypes.parameters, H5Types.parameters) + + + """ constructrr(f::JLDFile, T::DataType, dt::CompoundType, attrs::Vector{ReadAttribute}, hard_failure::Bool=false) @@ -161,19 +162,13 @@ will have a matching memory layout without first inspecting the memory layout. function constructrr(f::JLDFile, T::DataType, dt::CompoundDatatype, attrs::Vector{ReadAttribute}, hard_failure::Bool=false) - field_datatypes = read_field_datatypes(f, attrs) - + field_datatypes = read_field_datatypes(f, dt, attrs) # If read type is not a leaf type, reconstruct if !isconcretetype(T) @warn("read type $T is not a leaf type in workspace; reconstructing") return reconstruct_compound(f, string(T), dt, field_datatypes) end - - # Map names in dt to their indices - dtnames = Dict{Symbol,Int}() - for i = 1:length(dt.names) - dtnames[dt.names[i]] = i - end + dtnames = Dict{Symbol,Int}(sym => n for (n,sym) in enumerate(dt.names)) mapped = falses(length(dt.names)) offsets = Vector{Int}(undef, length(T.types)) @@ -197,7 +192,7 @@ function constructrr(f::JLDFile, T::DataType, dt::CompoundDatatype, end dtindex = dtnames[fn[i]] - if !isempty(field_datatypes) && (ref = field_datatypes[dtindex]) != NULL_REFERENCE + if (ref = field_datatypes[string(fn[i])]) != NULL_REFERENCE dtrr = jltype(f, f.datatype_locations[ref]) else dtrr = jltype(f, dt.members[dtindex]) @@ -230,7 +225,6 @@ function constructrr(f::JLDFile, T::DataType, dt::CompoundDatatype, # for this field !(odrs[i] isa OnDiskRepresentation) && !(odrs[i] <: CustomSerialization) - mapped[dtindex] = true end end @@ -242,49 +236,27 @@ function constructrr(f::JLDFile, T::DataType, dt::CompoundDatatype, "\n\nData in these fields will not be accessible") end - if samelayout - (ReadRepresentation{T,T}(), true) - else - wodr = odr(T) - # This should theoretically be moved inside the if statement, but then it returns - # the wrong result due to a bug in type inference on 0.6 - typeof_wodr = typeof(wodr) - offsets = (offsets...,) - if wodr isa OnDiskRepresentation - odr_offsets = typeof_wodr.parameters[1] - odr_types = typeof_wodr.parameters[2].parameters - odr_h5types = typeof_wodr.parameters[3].parameters - tequal = length(odr_types) == length(types) - if tequal - for i = 1:length(types) - if !(odr_types[i] <: types[i]) || odr_h5types[i] != odrs[i] - tequal = false - break - end - end - if tequal && odr_offsets == offsets - # This should not be necessary, but type inference mistakenly changes - # the value of wodr here - wodr = typeof_wodr() - return (ReadRepresentation{T,wodr}(), true) - end - end + samelayout && return (ReadRepresentation{T,T}(), true) + offsets = (offsets...,) + if (wodr = odr(T)) isa OnDiskRepresentation + odr_offsets, odr_types, odr_h5types = unpack_odr(wodr) + tequal = length(odr_types) == length(types) + for i = 1:length(types) + tequal || break + tequal &= odr_types[i] <: types[i] + tequal &= odr_h5types[i] == odrs[i] end - return (ReadRepresentation{T,OnDiskRepresentation{offsets, Tuple{types...}, Tuple{odrs...}, offsets[end]+odr_sizeof(odrs[end])}()}(), false) + tequal &= odr_offsets == offsets + tequal && return (ReadRepresentation{T,wodr}(), true) end + return (ReadRepresentation{T,OnDiskRepresentation{offsets, Tuple{types...}, Tuple{odrs...}, offsets[end]+odr_sizeof(odrs[end])}()}(), false) end function constructrr(f::JLDFile, u::Upgrade, dt::CompoundDatatype, attrs::Vector{ReadAttribute}, hard_failure::Bool=false) - field_datatypes = read_field_datatypes(f, attrs) - - - rodr = reconstruct_odr(f, dt, field_datatypes) - types = typeof(rodr).parameters[2].parameters - + rodr = reconstruct_odr(f, dt, read_field_datatypes(f, dt, attrs)) T2 = NamedTuple{tuple(dt.names...), typeof(rodr).parameters[2]} - return (ReadRepresentation{u.target, CustomSerialization{T2, rodr}}(), false) end @@ -298,7 +270,7 @@ function constructrr(f::JLDFile, T::UnionAll, dt::CompoundDatatype, attrs::Vector{ReadAttribute}, hard_failure::Bool=false) @warn("read type $T is not a leaf type in workspace; reconstructing") - return reconstruct_compound(f, string(T), dt, read_field_datatypes(f, attrs)) + return reconstruct_compound(f, string(T), dt, read_field_datatypes(f, dt, attrs)) end # Find types in modules @@ -311,14 +283,10 @@ function _resolve_type_singlemodule(::ReadRepresentation{T,DataTypeODR()}, params) where T for part in parts sym = Symbol(part) - if !isa(m, Module) || !isdefined(m, sym) - return nothing - end + (!isa(m, Module) || !isdefined(m, sym)) && return nothing m = getfield(m, sym) end - if !isa(m, DataType) && !isa(m, UnionAll) - return nothing - end + (!isa(m, DataType) && !isa(m, UnionAll)) && return nothing return m end @@ -343,11 +311,9 @@ function _resolve_type(rr::ReadRepresentation{T,DataTypeODR()}, mypath, hasparams, params) - if !isnothing(resolution_attempt) - return resolution_attempt - end + !isnothing(resolution_attempt) && return resolution_attempt end - return hasparams ? UnknownType{Symbol(mypath), Tuple{params...}} : UnknownType{Symbol(mypath), Tuple{}} + return UnknownType{Symbol(mypath), Tuple{ifelse(hasparams,params,())...}} end @@ -363,7 +329,7 @@ function types_from_refs(f::JLDFile, ptr::Ptr) nulldt = CommittedDatatype(UNDEFINED_ADDRESS, 0) cdt = get(f.datatype_locations, ref, nulldt) res = cdt !== nulldt ? eltype(jltype(f, cdt)) : load_dataset(f, ref) - unknown_params = unknown_params || isunknowntype(res) || isreconstructed(res) + unknown_params |= isunknowntype(res) || isreconstructed(res) res end for ref in refs] return params, unknown_params @@ -487,7 +453,7 @@ end function constructrr(f::JLDFile, unk::Type{<:UnknownType}, dt::BasicDatatype, attrs::Vector{ReadAttribute}) @warn("type $(typestring(unk)) does not exist in workspace; reconstructing") - reconstruct_bitstype(shorttypestring(unk), dt.size, check_empty(attrs)) + reconstruct_bitstype(shorttypestring(unk), dt.size, any(a->a.name==:empty, attrs)) end """ @@ -511,15 +477,7 @@ function typestring(UT)# ::Type{<:UnknownType} print(tn, T) if !isempty(params) write(tn, '{') - for i = 1:length(params) - x = params[i] - i != 1 && write(tn, ',') - if isunknowntype(x) - write(tn, typestring(x)) - else - print(tn, x) - end - end + join(tn, (isunknowntype(x) ? typestring(x) : x for x in params), ',') write(tn, '}') end String(take!(tn)) @@ -547,15 +505,7 @@ function shorttypestring(UT) #::Type{<:UnknownType} print(tn, T isa Symbol ? split(string(T),'.')[end] : T) if !isempty(params) write(tn, '{') - for i = 1:length(params) - x = params[i] - i != 1 && write(tn, ',') - if isunknowntype(x) - write(tn, shorttypestring(x)) - else - print(tn, x) - end - end + join(tn, (isunknowntype(x) ? shorttypestring(x) : x for x in params), ',') write(tn, '}') end String(take!(tn)) @@ -572,7 +522,7 @@ behead(@nospecialize T) = T function constructrr(f::JLDFile, unk::Type{UnknownType{T,P}}, dt::CompoundDatatype, attrs::Vector{ReadAttribute}) where {T,P} - field_datatypes = read_field_datatypes(f, attrs) + field_datatypes = read_field_datatypes(f, dt, attrs) if T isa DataType if T === Tuple # For a tuple with unknown fields, we should reconstruct the fields @@ -583,7 +533,6 @@ function constructrr(f::JLDFile, unk::Type{UnknownType{T,P}}, dt::CompoundDataty else @warn("read type $(typestring(unk)) was parametrized, but type " * "$(T) in workspace is not; reconstructing") - end elseif T isa UnionAll body = behead(T) @@ -631,46 +580,45 @@ end # Reconstruct the ODR of a type from the CompoundDatatype and field_datatypes # attribute function reconstruct_odr(f::JLDFile, dt::CompoundDatatype, - field_datatypes::Vector{RelOffset}) + field_datatypes::OrderedDict{String,RelOffset}) # Get the type and ODR information for each field - types = Vector{Any}(undef, length(dt.names)) - h5types = Vector{Any}(undef, length(dt.names)) - for i = 1:length(dt.names) - if !isempty(field_datatypes) && (ref = field_datatypes[i]) != NULL_REFERENCE - dtrr = jltype(f, f.datatype_locations[ref]) - else + types = [] + h5types = [] + offsets = Int[] + offset = 0 + for (k,typeref) in field_datatypes + i = findfirst(==(Symbol(k)), dt.names) + if typeref != NULL_REFERENCE + dtrr = jltype(f, f.datatype_locations[typeref]) + odr_sizeof(dtrr) != 0 && @warn "Field $k has non-zero size in file, this should not happen" + elseif !isnothing(i) + offset = dt.offsets[i] dtrr = jltype(f, dt.members[i]) + else + throw(InternalError("Field $k not found in datatype")) end - types[i], h5types[i] = typeof(dtrr).parameters + push!(types, typeof(dtrr).parameters[1]) + push!(h5types, typeof(dtrr).parameters[2]) + push!(offsets, offset) + offset += odr_sizeof(dtrr) end - return OnDiskRepresentation{(dt.offsets...,), Tuple{types...}, Tuple{h5types...},dt.size}() + OnDiskRepresentation{(offsets...,), Tuple{types...}, Tuple{h5types...},dt.size}() end # Reconstruct type that is a "lost cause": either we were not able to resolve # the name, or the workspace type has additional fields, or cannot convert # fields to workspace types function reconstruct_compound(f::JLDFile, T::String, dt::H5Datatype, - field_datatypes::Union{Vector{RelOffset},Nothing}) + field_datatypes::OrderedDict{String,RelOffset}) rodr = reconstruct_odr(f, dt, field_datatypes) - types = typeof(rodr).parameters[2].parameters - odrs = typeof(rodr).parameters[3].parameters - - fullyinit = true - for i = 1:length(types) - if jlconvert_canbeuninitialized(ReadRepresentation{types[i], odrs[i]}()) - fullyinit = false - break - end - end - if fullyinit - return (ReadRepresentation{ReconstructedStatic{Symbol(T),tuple(dt.names...),Tuple{types...}}, OnDiskRepresentation{(0,), Tuple{NamedTuple{tuple(dt.names...),Tuple{types...}}}, Tuple{rodr}, dt.size}()}(), false) + _, types, odrs = unpack_odr(rodr) + if !any(jlconvert_canbeuninitialized(ReadRepresentation{types[i], odrs[i]}()) for i = 1:length(types)) + rt = ReconstructedStatic{Symbol(T), tuple(dt.names...), Tuple{types...}} + odr = OnDiskRepresentation{(0,), Tuple{NamedTuple{tuple(dt.names...),Tuple{types...}}}, Tuple{rodr}, dt.size}() + return (ReadRepresentation{rt, odr}(), false) end - - # Now reconstruct the type T = ReconstructedMutable{Symbol(T), tuple(dt.names...), Tuple{types...}} - - rr = ReadRepresentation{T, rodr}() - return rr, false + return ReadRepresentation{T, rodr}(), false end # At present, we write Union{} as an object of Core.TypeofBottom. The method above @@ -680,19 +628,12 @@ jlconvert(::ReadRepresentation{Core.TypeofBottom,nothing}, f::JLDFile, ptr::Ptr, header_offset::RelOffset) = Union{} function jlconvert(::ReadRepresentation{T, S}, f::JLDFile, ptr::Ptr, header_offset::RelOffset) where {T<:ReconstructedMutable, S} - offsets = typeof(S).parameters[1] - types = typeof(S).parameters[2].parameters - odrs = typeof(S).parameters[3].parameters - + offsets, types, odrs = unpack_odr(S) res = Vector{Any}(undef, length(types)) for i = 1:length(types) - offset = offsets[i] - rtype = types[i] - odr = odrs[i] - - rr = ReadRepresentation{rtype,odr}() - if !(jlconvert_canbeuninitialized(rr)) || jlconvert_isinitialized(rr, ptr+offset) - res[i] = jlconvert(rr, f, ptr+offset, NULL_REFERENCE) + rr = ReadRepresentation{types[i],odrs[i]}() + if !(jlconvert_canbeuninitialized(rr)) || jlconvert_isinitialized(rr, ptr+offsets[i]) + res[i] = jlconvert(rr, f, ptr+offsets[i], NULL_REFERENCE) end end return T(res) @@ -703,104 +644,68 @@ end header_offset::RelOffset) where {T,S} isa(S, DataType) && return :(convert(T, jlunsafe_load(pconvert(Ptr{S}, ptr)))) @assert isa(S, OnDiskRepresentation) + offsets, types, odrs = unpack_odr(S) + fn = T === Tuple ? [Symbol(i) for i = 1:length(types)] : fieldnames(T) - offsets = typeof(S).parameters[1] - types = typeof(S).parameters[2].parameters - odrs = typeof(S).parameters[3].parameters - - blk = Expr(:block) - args = blk.args - # Separate treatment for mutable structs for better readability if ismutabletype(T) - push!(args, quote + blk = quote obj = $(Expr(:new, T)) track_weakref_if_untracked!(f, header_offset, obj) - end) - fn = fieldnames(T) + end for i = 1:length(types) - offset = offsets[i] rtype = types[i] - odr = odrs[i] - - rr = ReadRepresentation{rtype,odr}() - - fni = QuoteNode(fn[i]) + rr = ReadRepresentation{rtype,odrs[i]}() ttype = T.types[i] - if odr === nothing + if isnothing(odrs[i]) # Type is not stored or single instance - newi = Expr(:new, ttype) - # use jl_set_nth_field instead of setfield! since the former also works for const fields - # in mutable structs. - push!(args, :(ccall(:jl_set_nth_field, Nothing, (Any, Csize_t, Any), obj, ($i)-1, $newi))) + fieldval = Expr(:new, ttype) else - loadfield = quote - fieldval = rconvert($ttype, jlconvert($rr, f, ptr+$offset, NULL_REFERENCE)::$rtype)::$ttype - ccall(:jl_set_nth_field, Nothing, (Any, Csize_t, Any), obj, ($i)-1, fieldval) - end - if jlconvert_canbeuninitialized(rr) - push!(args, :(jlconvert_isinitialized($rr, ptr+$offset) && $(loadfield))) - else - push!(args, loadfield) - end + fieldval = :(rconvert($ttype, jlconvert($rr, f, ptr+$(offsets[i]), NULL_REFERENCE)::$rtype)::$ttype) end + # use jl_set_nth_field instead of setfield! since the former also works for const fields + # in mutable structs. + setfield = :(ccall(:jl_set_nth_field, Nothing, (Any, Csize_t, Any), obj, ($i)-1, $fieldval)) + if jlconvert_canbeuninitialized(rr) + setfield = :(jlconvert_isinitialized($rr, ptr+$(offsets[i])) && $(setfield)) + end + push!(blk.args, setfield) end - - push!(args, (:obj)) + push!(blk.args, (:obj)) return blk end - if isbitstype(T) - # For bits types, we should always inline, because otherwise we'll just - # pass a lot of crap around in registers - push!(args, Expr(:meta, :inline)) - end - fsyms = [] - fn = T === Tuple ? [Symbol(i) for i = 1:length(types)] : fieldnames(T) + blk = Expr(:block) + fsyms = Symbol[] for i = 1:length(types) - offset = offsets[i] + ptr = :(ptr + $(offsets[i])) rtype = types[i] - odr = odrs[i] - fsym = Symbol("field_", fn[i]) push!(fsyms, fsym) + rr = ReadRepresentation{rtype,odrs[i]}() - rr = ReadRepresentation{rtype,odr}() - - if odr === nothing - # Type is not stored or single instance - if T.types[i] == Union{} - # This cannot be defined - @assert !ismutabletype(T) - push!(args, Expr(:return, Expr(:new, T, fsyms[1:i-1]...))) - return blk - else - newi = Expr(:new, T.types[i]) - push!(args, :($fsym = $newi)) - end - else - if jlconvert_canbeuninitialized(rr) - push!(args, quote - if !jlconvert_isinitialized($rr, ptr+$offset) - $(if T <: Tuple || i <= ninitialized(T) - # Reference must always be initialized - :(throw(UndefinedFieldException(T,$(QuoteNode(fn[i]))))) - else - Expr(:return, Expr(:new, T, fsyms[1:i-1]...)) - end) - end - end) - end - if T === Tuple - # Special case for reconstructed tuples, where we don't know the - # field types in advance - push!(args, :($fsym = jlconvert($rr, f, ptr+$offset, NULL_REFERENCE)::$rtype)) - else - ttype = T.types[i] - push!(args, :($fsym = rconvert($ttype, jlconvert($rr, f, ptr+$offset, NULL_REFERENCE)::$rtype)::$ttype)) - end + if isnothing(odrs[i]) + push!(blk.args, :($fsym = $(Expr(:new, rtype)))) + continue end + if jlconvert_canbeuninitialized(rr) + push!(blk.args, quote + if !jlconvert_isinitialized($rr, $ptr) + $(if T <: Tuple || i <= ninitialized(T) + # Reference must always be initialized + :(throw(UndefinedFieldException(T,$(QuoteNode(fn[i]))))) + else + Expr(:return, Expr(:new, T, fsyms[1:i-1]...)) + end) + end + end) + end + fieldval = :(jlconvert($rr, f, $ptr, NULL_REFERENCE)::$rtype) + if T !== Tuple + # Special case for reconstructed tuples, where we don't know the + # field types in advance + fieldval = :(rconvert($(T.types[i]), $fieldval)::$(T.types[i])) + end + push!(blk.args, :($fsym = $fieldval)) end - - push!(args, T <: Tuple ? Expr(:tuple, fsyms...) : Expr(:new, T, fsyms...)) - + push!(blk.args, T <: Tuple ? Expr(:tuple, fsyms...) : Expr(:new, T, fsyms...)) blk end diff --git a/src/data/writing_datatypes.jl b/src/data/writing_datatypes.jl index bf8ba472..68570202 100644 --- a/src/data/writing_datatypes.jl +++ b/src/data/writing_datatypes.jl @@ -45,7 +45,6 @@ end # Gets the size of an on-disk representation function odr_sizeof(::OnDiskRepresentation{Offsets,JLTypes,H5Types,Size}) where {Offsets,JLTypes,H5Types,Size} - #Offsets[end]+odr_sizeof(H5Types.parameters[end]) Size end @@ -151,26 +150,44 @@ h5type(f::JLDFile, @nospecialize(x)) = h5type(f, writeas(typeof(x)), x) offsets = Int[] h5names = Symbol[] members = H5Datatype[] - fieldtypes = RelOffset[] - hasfieldtype = false - + field_names = String[] + field_types = RelOffset[] + anynondefault = false offset = 0 for i = 1:length(types) fieldty = types[i] fieldwrittenas = writeas(fieldty) - !hasfielddata(fieldwrittenas) && continue dtype = h5fieldtype(f, fieldwrittenas, fieldty, Val{i <= ninitialized(writtenas)}) - dtype === nothing && continue - push!(h5names, names[i]) + if isnothing(dtype) + # this is the function body of h5type(f, fieldwrittenas, x) but x is an instance of fieldty unknownavailable here + cdt = get(f.jlh5type, fieldty, nothing) + dtype = if !isnothing(cdt) + cdt + elseif !hasdata(fieldwrittenas) + commit(f, OpaqueDatatype(1), fieldwrittenas, fieldty, WrittenAttribute(f, :empty, UInt8(1))) + elseif isempty(fieldwrittenas.types) # bitstype + commit(f, OpaqueDatatype(sizeof(fieldwrittenas)), fieldwrittenas, fieldty) + else + commit_compound(f, fieldnames(fieldwrittenas), fieldwrittenas, fieldty) + end + push!(field_names, string(names[i])) + push!(field_types, dtype.header_offset) + anynondefault = true + continue + end + if isa(dtype, CommittedDatatype) # HDF5 cannot store relationships among committed # datatypes. We store these separately in an attribute. - push!(fieldtypes, dtype.header_offset) + type_offset = dtype.header_offset dtype = f.datatypes[dtype.index] - hasfieldtype = true + anynondefault = true else - push!(fieldtypes, NULL_REFERENCE) + type_offset = NULL_REFERENCE end + push!(field_names, string(names[i])) + push!(field_types, type_offset) + push!(h5names, names[i]) push!(members, dtype) push!(offsets, offset) offset += dtype.size::UInt32 @@ -178,12 +195,16 @@ h5type(f::JLDFile, @nospecialize(x)) = h5type(f, writeas(typeof(x)), x) @assert offset != 0 compound = CompoundDatatype(offset, h5names, offsets, members) - if hasfieldtype - fieldtypeattr = WrittenAttribute(:field_datatypes, - WriteDataspace(f, fieldtypes, DataType), - ReferenceDatatype(), - fieldtypes) - commit(f, compound, writtenas, readas, fieldtypeattr)::CommittedDatatype + if anynondefault + commit(f, compound, writtenas, readas, + WrittenAttribute(:field_names, + WriteDataspace(f, field_names, Vlen{String}), + h5type(f, field_names), + field_names), + WrittenAttribute(:field_types, + WriteDataspace(f, field_types, DataType), + ReferenceDatatype(), + field_types)) else commit(f, compound, writtenas, readas)::CommittedDatatype end @@ -606,6 +627,9 @@ end # fieldodr, but actually encoding the data for things that odr stores # as references @nospecializeinfer function odr(@nospecialize(T::Type)) + if T <: Function + @warn LazyString("Attempting to store ", T, ".\n Function types cannot be propertly stored in JLD2 files.\n Loading may yield unexpected results.") + end if !hasdata(T) # A pointer singleton or ghost. We need to write something, but we'll # just write a single byte. diff --git a/src/datalayouts.jl b/src/datalayouts.jl index 50db3fc6..5c8b2323 100644 --- a/src/datalayouts.jl +++ b/src/datalayouts.jl @@ -65,12 +65,8 @@ function FilterPipeline(msg_::Hmessage) name_length = (version == 2 && id < 255) ? zero(UInt16) : jlread(io, UInt16) flags = jlread(io, UInt16) nclient_vals = jlread(io, UInt16) - if iszero(name_length) - name = "" - else - name = read_bytestring(io) - skip(io, 8-mod1(sizeof(name), 8)-1) - end + name = iszero(name_length) ? "" : read_bytestring(io) + skip(io, max(0, 8-mod1(name_length, 8)-1)) client_data = jlread(io, UInt32, nclient_vals) (version == 1 && isodd(nclient_vals)) && skip(io, 4) Filter(id, flags, name, client_data) diff --git a/src/datasets.jl b/src/datasets.jl index 92d50072..4d981df7 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -102,6 +102,8 @@ end read_scalar(f, rr, header_offset) elseif dataspace.dataspace_type == DS_V1 read_array(f, dataspace, rr, layout, filters, header_offset, attributes) + elseif dataspace.dataspace_type == DS_NULL + read_empty(f, rr, dataspace, attributes, header_offset) else throw(UnsupportedFeatureException()) end @@ -138,9 +140,7 @@ function read_data(f::JLDFile, end end elseif dataspace.dataspace_type == DS_NULL - return read_empty(ReadRepresentation{Union{},nothing}(), f, - attributes[find_dimensions_attr(attributes)], - header_offset) + return read_empty(f, ReadRepresentation{Union{},nothing}(), dataspace, attributes, header_offset) elseif dataspace.dataspace_type == DS_V1 return read_array(f, dataspace, ReadRepresentation{Any,RelOffset}(), layout, FilterPipeline(), header_offset, attributes) @@ -157,13 +157,11 @@ function read_data(f::JLDFile, dataspace, header_offset, layout, filters = read_dataspace iscompressed(filters) && throw(UnsupportedFeatureException()) dataspace.dataspace_type == DS_NULL || throw(UnsupportedFeatureException()) - - dimensions_attr_index = find_dimensions_attr(attributes) - if dimensions_attr_index == 0 + if !any(a->a.name==:dimensions, attributes) jlconvert(rr, f, Ptr{Cvoid}(0), header_offset) else # dimensions attribute => array of empty type - read_empty(rr, f, attributes[dimensions_attr_index], header_offset) + read_empty(f, rr, dataspace, attributes, header_offset) end end @@ -178,18 +176,10 @@ function find_dimensions_attr(attributes::Vector{ReadAttribute}) dimensions_attr_index end -function read_empty(rr::ReadRepresentation{T}, f::JLDFile, - dimensions_attr::ReadAttribute, header_offset::RelOffset) where T - dimensions_attr.datatype.class == DT_FIXED_POINT || throw(UnsupportedFeatureException()) - - io = f.io - seek(io, dimensions_attr.dataspace.dimensions_offset) - ndims = Int(jlread(io, Length)) - - dimensions_attr.datatype == h5fieldtype(f, Int64, Int64, Val{true}) || throw(UnsupportedFeatureException()) - - seek(io, dimensions_attr.data_offset) - v = construct_array(io, T, ndims) +function read_empty(f::JLDFile, rr::ReadRepresentation{T}, dataspace, attributes, header_offset::RelOffset) where T + ndims, offset = get_ndims_offset(f, dataspace, attributes) + seek(f.io, offset) + v = construct_array(f.io, T, Int(ndims)) if isconcretetype(T) for i = 1:length(v) @inbounds v[i] = jlconvert(rr, f, Ptr{Cvoid}(0), header_offset) @@ -210,6 +200,7 @@ function get_ndims_offset(f::JLDFile, dataspace::ReadDataspace, attributes::Abst if x.name == :dimensions (x.dataspace.dataspace_type == DS_SIMPLE && x.dataspace.dimensionality == 1) || throw(InvalidDataException()) + x.datatype == h5fieldtype(f, Int64, Int64, Val{true}) || throw(UnsupportedFeatureException()) seek(f.io, x.dataspace.dimensions_offset) ndims = UInt8(jlread(f.io, Length)) offset = x.data_offset diff --git a/src/dataspaces.jl b/src/dataspaces.jl index 738abb3c..6bbd74a2 100644 --- a/src/dataspaces.jl +++ b/src/dataspaces.jl @@ -34,7 +34,7 @@ struct DataspaceStart end define_packed(DataspaceStart) -const EMPTY_DIMENSIONS = Int[] +const EMPTY_DIMENSIONS = Int64[] # Pass-through for custom serializations # Need a bunch of methods to avoid ambiguity diff --git a/src/datatypes.jl b/src/datatypes.jl index 90f77d93..9bb03392 100644 --- a/src/datatypes.jl +++ b/src/datatypes.jl @@ -50,53 +50,29 @@ function Base.:(==)(dt1::BasicDatatype, dt2::BasicDatatype) ret end -# Replace a symbol or expression in an AST with a new one -function replace_expr(x, from, to) - ex = copy(x) - for i = 1:length(ex.args) - x = ex.args[i] - if x == from - ex.args[i] = to - elseif isa(x, Expr) - ex.args[i] = replace_expr(x, from, to) - end - end - ex -end - -# Macro to read an entire datatype from a file, avoiding dynamic -# dispatch for all but variable length types -macro read_datatype(io, datatype_class, datatype, then) - esc(quote - if $datatype_class%16== DT_FIXED_POINT - $(replace_expr(then, datatype, :(jlread($io, FixedPointDatatype)))) - elseif $datatype_class%16 == DT_FLOATING_POINT - $(replace_expr(then, datatype, :(jlread($io, FloatingPointDatatype)))) - elseif $datatype_class%16 in (DT_STRING, DT_OPAQUE, DT_REFERENCE) - $(replace_expr(then, datatype, :(jlread($io, BasicDatatype)))) - elseif $datatype_class%16 == DT_COMPOUND - $(replace_expr(then, datatype, :(jlread($io, CompoundDatatype)))) - elseif $datatype_class%16 == DT_VARIABLE_LENGTH - $(replace_expr(then, datatype, :(jlread($io, VariableLengthDatatype)))) - elseif $datatype_class%16 == DT_BITFIELD - $(replace_expr(then, datatype, :(jlread($io, BitFieldDatatype)))) - elseif $datatype_class%16 == DT_TIME - throw(UnsupportedFeatureException("Time datatype (rarely used) not supported")) - elseif $datatype_class%16 == DT_ARRAY - $(replace_expr(then, datatype, :(jlread($io, ArrayDatatype)))) - elseif $datatype_class%16 == DT_ENUMERATED - $(replace_expr(then, datatype, :(jlread($io, EnumerationDatatype)))) - else - throw(UnsupportedFeatureException("invalid datatype class $datatype_class")) - end - end) -end - function jlread(io::IO, ::Type{H5Datatype}) - datatype_class = jlread(io, UInt8) + dtclass = jlread(io, UInt8) % 16 seek(io, position(io)-1) - @read_datatype io datatype_class dt begin - dt + if dtclass == DT_FIXED_POINT + jlread(io, FixedPointDatatype) + elseif dtclass == DT_FLOATING_POINT + jlread(io, FloatingPointDatatype) + elseif dtclass in (DT_STRING, DT_OPAQUE, DT_REFERENCE) + jlread(io, BasicDatatype) + elseif dtclass == DT_COMPOUND + jlread(io, CompoundDatatype) + elseif dtclass == DT_VARIABLE_LENGTH + jlread(io, VariableLengthDatatype) + elseif dtclass == DT_BITFIELD + jlread(io, BitFieldDatatype) + elseif dtclass == DT_TIME + throw(UnsupportedFeatureException("Time datatype (rarely used) not supported")) + elseif dtclass == DT_ARRAY + jlread(io, ArrayDatatype) + elseif dtclass == DT_ENUMERATED + jlread(io, EnumerationDatatype) + else + throw(UnsupportedFeatureException("invalid datatype class $dtclass")) end end @@ -176,7 +152,8 @@ struct CompoundDatatype <: H5Datatype end function jltype(f::JLDFile, dt::CompoundDatatype) - odr = reconstruct_odr(f, dt, RelOffset[]) + field_datatypes = OrderedDict{String, RelOffset}(string.(dt.names) .=> fill(NULL_REFERENCE, length(dt.names))) + odr = reconstruct_odr(f, dt, field_datatypes) T = NamedTuple{tuple(dt.names...), typeof(odr).parameters[2]} return ReadRepresentation{T, odr}() end @@ -254,11 +231,7 @@ function jlread(io::IO, ::Type{CompoundDatatype}) end # Member type message - datatype_class = jlread(io, UInt8) - skip(io, -1) - @read_datatype io datatype_class member begin - members[i] = member - end + members[i] = jlread(io, H5Datatype) end CompoundDatatype(dt.size, names, offsets, members) @@ -294,14 +267,11 @@ end function jlread(io::IO, ::Type{VariableLengthDatatype}) dtype = jlread(io, BasicDatatype) - datatype_class = jlread(io, UInt8) - skip(io, -1) - @read_datatype io datatype_class dt begin - VariableLengthDatatype(dtype.class, dtype.bitfield1, dtype.bitfield2, dtype.bitfield3, dtype.size, dt) - end + dt = jlread(io, H5Datatype) + VariableLengthDatatype(dtype.class, dtype.bitfield1, dtype.bitfield2, dtype.bitfield3, dtype.size, dt) end -jlsizeof(dt::CommittedDatatype) = 2 + jlsizeof(RelOffset) +jlsizeof(::CommittedDatatype) = 2 + jlsizeof(RelOffset) function jlwrite(io::IO, dt::CommittedDatatype) jlwrite(io, UInt8(3)) @@ -379,12 +349,8 @@ function jlread(io::IO, ::Type{ArrayDatatype}) # unsupported permutation index skip(io, 4*dimensionality) end - - datatype_class = jlread(io, UInt8) - skip(io, -1) - @read_datatype io datatype_class base_type begin - ArrayDatatype(dt.class, 0x0, 0x0, 0x0, dimensionality, dims, base_type) - end + base_type = jlread(io, H5Datatype) + ArrayDatatype(dt.class, 0x0, 0x0, 0x0, dimensionality, dims, base_type) end struct ArrayPlaceHolder{T, D} end diff --git a/src/explicit_datasets.jl b/src/explicit_datasets.jl index 1616a516..d61a2704 100644 --- a/src/explicit_datasets.jl +++ b/src/explicit_datasets.jl @@ -518,17 +518,20 @@ struct ArrayDataset{T, N, ODR, io} <: AbstractArray{T, N} dims::NTuple{N, Int} data_address::Int64 rr::ReadRepresentation{T, ODR} + writable::Bool end function ArrayDataset(dset::Dataset) isarraydataset(dset) || throw(ArgumentError("Dataset is not an array")) iscompressed(dset.filters) && throw(UnsupportedFeatureException("Compressed datasets are not supported.")) f = dset.parent.f dt = dset.datatype + writable = f.writable && (dset.layout.layout_class == LcContiguous) return ArrayDataset( f, dset, Int.(reverse(dset.dataspace.dimensions)), fileoffset(f, dset.layout.data_address), - jltype(f, !(f.plain) && dt isa SharedDatatype ? get(f.datatype_locations, dt.header_offset, dt) : dt) + jltype(f, !(f.plain) && dt isa SharedDatatype ? get(f.datatype_locations, dt.header_offset, dt) : dt), + writable ) end @@ -556,6 +559,7 @@ end function Base.setindex!(A::ArrayDataset{T,N,ODR}, v, i::Integer) where {T,N,ODR} @boundscheck checkbounds(A, i) A.f.writable || throw(ArgumentError("Cannot edit in read-only mode")) + A.writable || throw(ArgumentError("Dataset cannot be edited")) seek(A.f.io, A.data_address + (i-1)*odr_sizeof(A.rr)) write_data(A.f.io, A.f, v, T, datamode(ODR), JLDWriteSession()) return v diff --git a/src/file_header.jl b/src/file_header.jl index 7e3cb863..5d33d3f8 100644 --- a/src/file_header.jl +++ b/src/file_header.jl @@ -1,11 +1,11 @@ # Currently we specify a 512 byte header const FILE_HEADER_LENGTH = 512 -const FORMAT_VERSION = v"0.1.1" +const FORMAT_VERSION = v"0.2.0" # Range of file format versions that can be read # Publish patch release relaxing upper version bound # if the imminent major release is not breaking -const COMPATIBLE_VERSIONS = (lower=v"0.1", upper=v"0.2") +const COMPATIBLE_VERSIONS = (lower=v"0.1", upper=v"0.3") const REQUIRED_FILE_HEADER = "HDF5-based Julia Data Format, version " const FILE_HEADER = "$(REQUIRED_FILE_HEADER)$(FORMAT_VERSION)\x00 (Julia $(VERSION) $(sizeof(Int)*8)-bit $(htol(1) == 1 ? "LE" : "BE"))\x00" @assert length(FILE_HEADER) <= FILE_HEADER_LENGTH diff --git a/test/loadsave.jl b/test/loadsave.jl index 7dcb3afc..ec87d90b 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -757,3 +757,32 @@ end end end end + + +@testset "Missing Types in Tuples" begin + cd(mktempdir()) do + eval(:(module ModuleWithFunction + fun(x) = x+1 + end)) + if VERSION ≥ v"1.7" + @test_warn( + contains("Function types cannot"), + eval(:(save_object("test.jld2", (1, ModuleWithFunction.fun, 2)))) + ) + else + eval(:(save_object("test.jld2", (1, ModuleWithFunction.fun, 2)))) + end + obj = load_object("test.jld2") + @test length(obj) == 3 + @test obj[1] == 1 + @test obj[2] == eval(:(ModuleWithFunction.fun)) + @test obj[3] == 2 + + eval(:(module ModuleWithFunction end)) + obj = load_object("test.jld2") + @test length(obj) == 3 + @test obj[1] == 1 + @test JLD2.isreconstructed(obj[2]) + @test obj[3] == 2 + end +end \ No newline at end of file