-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
629 additions
and
547 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,22 +1,24 @@ | ||
name = "ScientificTypes" | ||
uuid = "321657f4-b219-11e9-178b-2701a2544e81" | ||
authors = ["Anthony D. Blaom <[email protected]>"] | ||
version = "0.2.5" | ||
version = "0.2.6" | ||
This comment has been minimized.
Sorry, something went wrong. |
||
|
||
[deps] | ||
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" | ||
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f" | ||
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" | ||
|
||
[compat] | ||
CategoricalArrays = "^0.7" | ||
CategoricalArrays = "^0.7.3" | ||
ColorTypes = "^0.8" | ||
Tables = "^0.2" | ||
julia = "1" | ||
|
||
[extras] | ||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" | ||
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" | ||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | ||
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | ||
|
||
[targets] | ||
test = ["Random", "Test"] | ||
test = ["Random", "Test", "CSV", "DataFrames"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
function _coerce_col(X, name, types; args...) | ||
y = getproperty(X, name) | ||
if haskey(types, name) | ||
# HACK isa LazyArrays.ApplyArray, see issue #49 | ||
if is_type(y, :LazyArrays, :ApplyArray) | ||
y = convert(Vector, y) | ||
end | ||
return coerce(y, types[name]; args...) | ||
else | ||
return y | ||
end | ||
end | ||
|
||
""" | ||
coerce(X, col1=>scitype1, col2=>scitype2, ... ; verbosity=1) | ||
coerce(X, d::AbstractDict; verbosity=1) | ||
Return a copy of the table `X` with the scitypes of the specified | ||
columns coerced to those specified, or to missing-value versions of | ||
these scitypes, with warnings issued (for positive `verbosity`). | ||
Alternatively, the specifications can be wrapped in a dictionary. | ||
### Example | ||
```julia | ||
using CategoricalArrays, DataFrames, Tables | ||
X = DataFrame(name=["Siri", "Robo", "Alexa", "Cortana"], | ||
height=[152, missing, 148, 163], | ||
rating=[1, 5, 2, 1]) | ||
coerce(X, :name=>Multiclass, :height=>Continuous, :rating=>OrderedFactor) | ||
See also [`scitype`](@ref), [`schema`](@ref). | ||
``` | ||
""" | ||
function coerce(X, pairs::Pair{Symbol}...; verbosity=1) | ||
trait(X) == :table || | ||
error("Non-tabular data encountered or Tables pkg not loaded.") | ||
names = Tables.schema(X).names | ||
dpairs = Dict(pairs) | ||
X_ct = Tables.columntable(X) | ||
ct_new = (_coerce_col(X_ct, col, dpairs; verbosity=verbosity) for col in names) | ||
return Tables.materializer(X)(NamedTuple{names}(ct_new)) | ||
end | ||
coerce(X, types::Dict; kw_args...) = coerce(X, (p for p in types)...; kw_args...) | ||
|
||
|
||
""" | ||
coerce!(X, ...) | ||
Same as [`coerce`](@ref) except it does the modification in place provided `X` | ||
supports in-place modification (at the moment, only the DataFrame! does). | ||
An error is thrown otherwise. The arguments are the same as `coerce`. | ||
""" | ||
function coerce!(X, args...; kwargs...) | ||
# DataFrame --> coerce_dataframe! (see convention) | ||
is_type(X, :DataFrames, :DataFrame) && return coerce_df!(X, args...; kwargs...) | ||
# Everything else | ||
throw(ArgumentError("In place coercion not supported for $(typeof(X)). Try `coerce` instead.")) | ||
end | ||
coerce!(X, types::Dict; kwargs...) = coerce!(X, (p for p in types)..., kwargs...) | ||
|
||
function coerce_df!(df, pairs::Pair{Symbol}...; verbosity=1) | ||
names = Tables.schema(df).names | ||
types = Dict(pairs) | ||
for name in names | ||
name in keys(types) || continue | ||
# for DataFrames >= 0.19 df[!, name] = coerce(df[!, name], types(name)) | ||
# but we want something that works more robustly... even for older DataFrames | ||
# the only way to do this is to use the `df.name = something` but we cannot use | ||
# setindex! which will throw a deprecation warning... | ||
name_str = "$name" | ||
ex = quote | ||
$df.$name = coerce($df.$name, $types[Symbol($name_str)]) | ||
end | ||
eval(ex) | ||
end | ||
return df | ||
end |
Oops, something went wrong.
1 comment
on commit f114943
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Registration pull request created: JuliaRegistries/General/5855
After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.
This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:
git tag -a v0.2.6 -m "<description of version>" f1149435c4c9b75417e04319ef32ece194d751d3
git push origin v0.2.6
@JuliaRegistrator register
Release note:
coerce!(df, ...)
(#50)elscitype
functionality which complements that ofscitype_union
(#59)