Skip to content

Commit

Permalink
Refactoring: Remove transforms (#38)
Browse files Browse the repository at this point in the history
* Refactoring: Remove transforms

* Update README
  • Loading branch information
eliascarv authored Oct 17, 2023
1 parent afe8d1f commit f0f445c
Show file tree
Hide file tree
Showing 18 changed files with 90 additions and 492 deletions.
4 changes: 0 additions & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ version = "1.1.0"

[deps]
AxisArrays = "39de3d68-74b9-583c-8d2d-e117c070f3a9"
DataScienceTraits = "6cb2f572-2d2b-4ba6-bdb3-e710fa044d6c"
Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b"
Expand All @@ -15,17 +14,14 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91"
TableTransforms = "0d432bfd-3ee1-4ac1-886a-39f05cc69a3e"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"

[compat]
AxisArrays = "0.4"
DataScienceTraits = "0.1"
Distances = "0.10"
Distributions = "0.25"
FillArrays = "1.6"
StaticArrays = "1.6"
StatsBase = "0.34"
TableTransforms = "1.16"
Tables = "1.11"
julia = "1.9"
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,9 @@ julia> c == cₒ
false
```

### Transforms
### Log-ratio transformations

Currently, the following transforms are implemented:
Currently, the following log-ratio transformations are implemented:

```julia
julia> alr(c)
Expand All @@ -167,8 +167,8 @@ julia> ilr(c)

and their inverses `alrinv`, `clrinv` and `ilrinv`.

The package also defines transforms for tables following to the
[TableTransforms.jl](https://github.com/JuliaML/TableTransforms.jl) interface, including `Closure`, `Remainder`, `ALR`, `CLR`, `ILR`.
The transforms for tables are defined in the [TableTransforms.jl](https://github.com/JuliaML/TableTransforms.jl)
package, they are: `Closure`, `Remainder`, `ALR`, `CLR`, `ILR`.
These transforms are functors that can be used as follows:

```julia
Expand Down
31 changes: 9 additions & 22 deletions src/CoDa.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,27 @@
module CoDa

using Tables
using TableTransforms
using Distributions
using StatsBase
using StaticArrays
using Statistics
using Distributions
using LinearAlgebra
using StaticArrays
using FillArrays
using AxisArrays
using Statistics
using Random
using Printf

import Tables
import DataScienceTraits as DST
import TableTransforms: FeatureTransform
import TableTransforms: StatelessFeatureTransform
import TableTransforms: SciTypeAssertion
import TableTransforms: assertions, isrevertible
import TableTransforms: applyfeat, revertfeat
import TableTransforms: preprocess, reapply
import Distances: Metric, result_type
import Base: +, -, *, /, ==
import Base: zero, adjoint, inv
import Distances: Metric, result_type
import Statistics: mean, var, std
import LinearAlgebra: norm,
import LinearAlgebra: norm, dot
import Random: rand

include("compositions.jl")
include("codaarrays.jl")
include("distances.jl")
include("transforms.jl")
include("logratio.jl")
include("covariances.jl")
include("matrices.jl")

Expand All @@ -44,6 +35,7 @@ export
parts,
components,
norm,
dot,
,
smooth,
𝒞,
Expand All @@ -59,13 +51,7 @@ export
Aitchison,
aitchison,

# transforms
Closure,
Remainder,
LogRatio,
ALR,
CLR,
ILR,
# log-ratio
alr,
alrinv,
clr,
Expand All @@ -90,4 +76,5 @@ export
G,
HMatrix,
H

end
21 changes: 11 additions & 10 deletions src/codaarrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,26 @@ Parts in compositional `array`.
parts(::CoDaArray{D,PARTS}) where {D,PARTS} = PARTS

"""
compose(table, cols; keepcols=true, as=:coda)
compose(table, colnames; keepcols=true, as=:coda)
Convert columns `cols` of `table` into parts of a
Convert columns `colnames` of `table` into parts of a
composition and save the result in a [`CoDaArray`](@ref).
If `keepcols` is set to `true`, then save the result `as`
a column in a new table with all other columns preserved.
"""
function compose(table, cols=Tables.columnnames(table); keepcols=true, as=:coda)
function compose(table, colnames=Tables.columnnames(Tables.columns(table)); keepcols=true, as=:coda)
cols = Tables.columns(table)
names = Tables.columnnames(cols)
scols = (nm => Tables.getcolumn(cols, nm) for nm in colnames)
# construct compositional array from selected columns
coda = table |> Select(cols) |> CoDaArray
coda = (; scols...) |> CoDaArray

# different types of return
if keepcols
other = setdiff(Tables.columnnames(table), cols)
osel = table |> Select(other)
ocol = [o => Tables.getcolumn(osel, o) for o in other]
other = setdiff(names, colnames)
ocols = (nm => Tables.getcolumn(cols, nm) for nm in other)
# preserve input table type
𝒯 = Tables.materializer(table)
𝒯((; ocol..., as => coda))
(; ocols..., as => coda) |> Tables.materializer(table)
else
coda
end
Expand All @@ -76,4 +77,4 @@ Tables.rows(array::CoDaArray) = array
# implement row interface for Composition
Tables.getcolumn(c::Composition, i::Int) = getfield(c, :data)[i]
Tables.getcolumn(c::Composition, n::Symbol) = getfield(c, :data)[n]
Tables.columnnames(c::Composition{D,PARTS}) where {D,PARTS} = PARTS
Tables.columnnames(::Composition{D,PARTS}) where {D,PARTS} = PARTS
12 changes: 4 additions & 8 deletions src/compositions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ julia> Composition((:a, :b), (0.2, 0.8))
```
When the names of the parts are not specified, the
constructor uses default names `part1`, `part2`,
..., `partD`:
constructor uses default names `w1`, `w2`, ..., `wD`:
```
julia> Composition(0.1, 0.8)
Expand All @@ -40,7 +39,7 @@ Composition(; data...) = Composition((; data...))

Composition(parts::NTuple, comps) = Composition((; zip(parts, Tuple(comps))...))

Composition(comps) = Composition(ntuple(i -> Symbol("w$i"), length(comps)), comps)
Composition(comps) = Composition(ntuple(i -> Symbol(:w, i), length(comps)), comps)

Composition(comp::Real, comps...) = Composition((comp, comps...))

Expand Down Expand Up @@ -81,7 +80,7 @@ zero(T::Type{<:Composition{D}}) where {D} = Composition(parts(T), ntuple(i -> 1

==(c₁::Composition, c₂::Composition) = parts(c₁) == parts(c₂) && 𝒞(components(c₁)) 𝒞(components(c₂))

(c₁::Composition{D}, c₂::Composition{D}) where {D} = begin
function dot(c₁::Composition{D}, c₂::Composition{D}) where {D}
x, y = components(c₁), components(c₂)
sum(log(x[i] / x[j]) * log(y[i] / y[j]) for j in 1:D for i in (j + 1):D) / D
end
Expand Down Expand Up @@ -147,7 +146,4 @@ end
# IO METHODS
# -----------

function Base.show(io::IO, c::Composition)
w = [(@sprintf "%.03f" w) for w in components(c)]
show(io, join(w, " : "))
end
Base.show(io::IO, c::Composition) = join(io, (@sprintf("%.03f", w) for w in components(c)), " : ")
33 changes: 19 additions & 14 deletions src/covariances.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ Return the variation matrix `Τ` of the `table` such that:
"""
function variation(table)
X = Tables.matrix(table)
n = Tables.columnnames(table) |> collect
D = size(X, 2)
L = log.(X .+ eps())

Expand All @@ -27,7 +26,9 @@ function variation(table)
end
end

AxisArray(T, row=n, col=n)
cols = Tables.columns(table)
names = Tables.columnnames(cols) |> collect
AxisArray(T, row=names, col=names)
end

"""
Expand All @@ -38,12 +39,14 @@ Return the log-ratio covariance matrix `Σ` of the `table` such that:
- `Σ[i,j] = cov(log(x[i]/x[D]), log(x[j]/x[D]))` for `i, j = 1, ..., d`
"""
function alrcov(table)
alrtable = table |> ALR()

Σ = cov(Tables.matrix(alrtable), dims=1)
X = Tables.matrix(table)
Y = mapslices(alr Composition, X, dims=2)
Σ = cov(Y, dims=1)

vars = Tables.columnnames(alrtable) |> collect
AxisArray(Σ, row=vars, col=vars)
cols = Tables.columns(table)
names = Tables.columnnames(cols) |> collect
names = names[begin:(end - 1)]
AxisArray(Σ, row=names, col=names)
end

"""
Expand All @@ -55,12 +58,13 @@ Return the centered log-ratio covariance matrix `Γ` of the `table` such that:
where `g(x)` is the geometric mean.
"""
function clrcov(table)
clrtable = table |> CLR()

Γ = cov(Tables.matrix(clrtable), dims=1)
X = Tables.matrix(table)
Y = mapslices(clr Composition, X, dims=2)
Γ = cov(Y, dims=1)

vars = Tables.columnnames(clrtable) |> collect
AxisArray(Γ, row=vars, col=vars)
cols = Tables.columns(table)
names = Tables.columnnames(cols) |> collect
AxisArray(Γ, row=names, col=names)
end

"""
Expand All @@ -87,6 +91,7 @@ function lrarray(table)
A[i, i] = 0.0
end

vars = Tables.columnnames(table) |> collect
AxisArray(A, row=vars, col=vars)
cols = Tables.columns(table)
names = Tables.columnnames(cols) |> collect
AxisArray(A, row=names, col=names)
end
7 changes: 7 additions & 0 deletions src/logratio.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENCE in the project root.
# ------------------------------------------------------------------

include("logratio/alr.jl")
include("logratio/clr.jl")
include("logratio/ilr.jl")
22 changes: 22 additions & 0 deletions src/logratio/alr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# ------------------------------------------------------------------
# Licensed under the MIT License. See LICENCE in the project root.
# ------------------------------------------------------------------

"""
alr(c)
Additive log-ratio transformation of composition `c`.
"""
function alr(c::Composition{D}) where {D}
w = components(c) .+ eps()
SVector(ntuple(i -> log(w[i] / w[D]), D - 1))
end

"""
alrinv(x)
Inverse alr transformation of coordinates `x`.
"""
alrinv(x::SVector{D,T}) where {D,T<:Real} = Composition(𝒞([exp.(x); SVector(one(T))]))

alrinv(x::AbstractVector) = alrinv(SVector{length(x)}(x))
35 changes: 0 additions & 35 deletions src/transforms/clr.jl → src/logratio/clr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
# Licensed under the MIT License. See LICENCE in the project root.
# ------------------------------------------------------------------

# -------------
# COMPOSITIONS
# -------------

"""
clr(c)
Expand All @@ -25,34 +21,3 @@ Inverse clr transformation of coordinates `x`.
clrinv(x::SVector{D,T}) where {D,T<:Real} = Composition(𝒞(exp.(x)))

clrinv(x::AbstractVector) = clrinv(SVector{length(x)}(x))

# -------
# TABLES
# -------

"""
CLR()
Centered log-ratio transform following the
[TableTransforms.jl](https://github.com/JuliaML/TableTransforms.jl)
interface.
"""
struct CLR <: LogRatio end

refvar(::CLR, vars) = last(vars)

newvars(::CLR, n) = collect(n)

oldvars(::CLR, vars, rvar) = collect(vars)

function applymatrix(::CLR, X)
μ = geomean.(eachrow(X))
L = log.(X .+ eps())
l = log.(μ .+ eps())
L .- l
end

function revertmatrix(::CLR, Y)
E = exp.(Y)
mapslices(𝒞, E, dims=2)
end
38 changes: 0 additions & 38 deletions src/transforms/ilr.jl → src/logratio/ilr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,6 @@
# Licensed under the MIT License. See LICENCE in the project root.
# ------------------------------------------------------------------

# -------------
# COMPOSITIONS
# -------------

"""
ilr(c)
Expand Down Expand Up @@ -54,37 +50,3 @@ function ilrinv(x::SVector{D}) where {D}
end

ilrinv(x::AbstractVector) = ilrinv(SVector{length(x)}(x))

# -------
# TABLES
# -------

"""
ILR([refvar])
Isometric log-ratio transform following the
[TableTransforms.jl](https://github.com/JuliaML/TableTransforms.jl)
interface.
Optionally, specify the reference variable `refvar` for the ratios.
Default to the last column of the input table.
"""
struct ILR <: LogRatio
refvar::Union{Symbol,Nothing}
end

ILR() = ILR(nothing)

refvar(transform::ILR, vars) = isnothing(transform.refvar) ? last(vars) : transform.refvar

newvars(::ILR, n) = collect(n)[begin:(end - 1)]

oldvars(::ILR, vars, rvar) = [collect(vars); rvar]

applymatrix(::ILR, X) = mapslices(ilr Composition, X, dims=2)

function revertmatrix(::ILR, Y)
D = size(Y, 2)
f = components ilrinv SVector{D}
mapslices(f, Y, dims=2)
end
Loading

0 comments on commit f0f445c

Please sign in to comment.