Skip to content

Commit

Permalink
Merge pull request #159 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For a 2.2.2 release
  • Loading branch information
ablaom authored Sep 7, 2021
2 parents 26f10e8 + 39e7aab commit 81dddff
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ScientificTypes"
uuid = "321657f4-b219-11e9-178b-2701a2544e81"
authors = ["Anthony D. Blaom <[email protected]>"]
version = "2.2.1"
version = "2.2.2"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand Down
15 changes: 14 additions & 1 deletion src/convention/scitype.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,14 @@ ST.scitype(::Distributions.Distribution{F,S}) where {F,S} =

# Text analysis - EXPERIMENTAL

# This would be less of a hack if some of #155 were adopted.

type2scitype(T::Type) = ST.Scitype(T, DefaultConvention())
type2scitype(::Type{<:AbstractVector{T}}) where T =
AbstractVector{type2scitype(T)}
type2scitype(::NTuple{N,T}) where {N,T} = NTuple{type2scitype{T}}
const PlainNGram{N} = NTuple{N,<:AbstractString}
const TaggedNGram{N} = NTuple{N,<:CorpusLoaders.TaggedWord}
ST.scitype(::TaggedWord, ::DefaultConvention) = Annotated{Textual}
ST.scitype(::Document{<:AbstractVector{T}}, ::DefaultConvention) where T =
Annotated{AbstractVector{type2scitype(T)}}
Expand All @@ -80,7 +85,15 @@ ST.scitype(::AbstractDict{<:TaggedWord,<:Integer},
::DefaultConvention) = Multiset{Annotated{Textual}}
ST.scitype(::AbstractDict{<:Union{TaggedWord,AbstractString},<:Integer},
::DefaultConvention) =
Multiset{Annotated{Textual}}
Multiset{Union{Textual,Annotated{Textual}}}
ST.scitype(::AbstractDict{<:PlainNGram{N}}) where N =
Multiset{NTuple{N,Textual}}
ST.scitype(::AbstractDict{<:TaggedNGram{N}}) where N =
Multiset{NTuple{N,Annotated{Textual}}}
ST.scitype(::AbstractDict{<:PlainNGram}) =
Multiset{NTuple{<:Any,Textual}}
ST.scitype(::AbstractDict{<:TaggedNGram}) =
Multiset{NTuple{<:Any,Annotated{Textual}}}

# Scitype for fast array broadcasting

Expand Down
39 changes: 35 additions & 4 deletions test/scitypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,43 @@ end
@test scitype(bag_of_words) == Multiset{Textual}
bag_of_tagged_words = Dict(tagged_word => 5)
@test scitype(bag_of_tagged_words) == Multiset{Annotated{Textual}}
@test scitype(Document("kadsfkj", "My Document")) == Unknown
@test scitype(Document([tagged_word, tagged_word2], "My Other Doc")) ==
@test scitype(Document("My Document", "kadsfkj")) == Unknown
@test scitype(Document([tagged_word, tagged_word2])) ==
Annotated{AbstractVector{Annotated{Textual}}}
@test scitype(Document("My Other Doc", [tagged_word, tagged_word2])) ==
Annotated{AbstractVector{Annotated{Textual}}}
nested_tokens = [["dog", "cat"], ["bird", "cat"]]
@test scitype(Document(nested_tokens), "Essay Number 1") ==
Annotated{AbstractVector{AbstractVector{Textual}}}
@test scitype(Document("Essay Number 1", nested_tokens)) ==
Annotated{AbstractVector{AbstractVector{Textual}}}

@test scitype(Dict(("cat", "in") => 3)) == Multiset{Tuple{Textual,Textual}}
bag_of_words = Dict("cat in" => 1,
"the hat" => 1,
"the" => 2,
"cat" => 1,
"hat" => 1,
"in the" => 1,
"in" => 1,
"the cat" => 1)
bag_of_ngrams =
Dict(Tuple(String.(split(k))) => v for (k, v) in bag_of_words)
# Dict{Tuple{String, Vararg{String, N} where N}, Int64} with 8 entries:
# ("cat",) => 1
# ("cat", "in") => 1
# ("in",) => 1
# ("the", "hat") => 1
# ("the",) => 2
# ("hat",) => 1
# ("in", "the") => 1
# ("the", "cat") => 1
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Textual}}

@test scitype(Dict((tagged_word, tagged_word2) => 3)) ==
Multiset{Tuple{Annotated{Textual},Annotated{Textual}}}
bag_of_ngrams = Dict((tagged_word, tagged_word2) => 3,
(tagged_word,) => 7)
@test scitype(bag_of_ngrams) == Multiset{NTuple{<:Any,Annotated{Textual}}}

end

@testset "Autotype+tight" begin
Expand Down

0 comments on commit 81dddff

Please sign in to comment.