From 222be7023e489be2dc69cc1f7e46606e50ffdee0 Mon Sep 17 00:00:00 2001
From: vogu66 <24728720+vogu66@users.noreply.github.com>
Date: Mon, 3 Oct 2022 19:13:34 +0200
Subject: [PATCH] Allowing reading for BD Accuri C6 and Attune files (#13)

* fix BD Accuri skiiping offsets for analysis segment

* first try of support for floats

* Added tests for floating point files and Accuri files

* Replaced file for FCS2.0 and fix test so it recognizes the exception
[fixing tests for rebase]

* respond comments at https://github.com/tlnagy/FCSFiles.jl/pull/13#pullrequestreview-1127558551

* remove extra files and dependencies
---
 src/parse.jl      | 30 +++++++++++++++++++++++++-----
 test/Project.toml |  1 -
 test/runtests.jl  | 16 +++++++++++++++-
 3 files changed, 40 insertions(+), 7 deletions(-)

diff --git a/src/parse.jl b/src/parse.jl
index 5c625e8..6e22f22 100644
--- a/src/parse.jl
+++ b/src/parse.jl
@@ -9,11 +9,24 @@ function parse_header(io)
     seek(io, 10)
     # start, end positions of TEXT, DATA, and ANALYSIS sections
     offsets = Array{Int64}(undef, 6)
+
     for i in 1:6
         # offsets are encoded as ASCII strings
         raw_str = Array{UInt8}(undef, 8)
         read!(io, raw_str)
         offsets_str = String(raw_str)
+
+        # the last two numbers are for the analysis segment
+        # the analysis segment is facultative, although the bytes should
+        # always be there
+        # (FCS 3.1 ref at https://isac-net.org/page/Data-Standards)
+        # some cytometers (BD Accuri) do not put the last two bytes
+        # putting "0" bytes in their files is what other cytometers do
+        # see github discussion:
+        # https://github.com/tlnagy/FCSFiles.jl/pull/13#discussion_r985251676
+        if isempty(lstrip(offsets_str)) && i>4
+            offsets_str="0"
+        end
         offsets[i] = parse(Int, strip(join(offsets_str)))
     end
 
@@ -23,7 +36,7 @@ function parse_header(io)
         offsets[3] = parse(Int64, text_mappings["\$BEGINDATA"])
         offsets[4] = parse(Int64, text_mappings["\$ENDDATA"])
     end
-    offsets
+    return offsets
 end
 
 
@@ -58,10 +71,17 @@ function parse_data(io,
                     end_data::Int,
                     text_mappings::Dict{String, String})
     seek(io, start_data)
-    # Add support for data types other than float
-    (text_mappings["\$DATATYPE"] != "F") && error("Non float32 support not implemented yet. Please see github issues for this project.")
 
-    flat_data = Array{Float32}(undef, (end_data - start_data + 1) ÷ 4)
+    # data type in FCS3.1 can be I (integer), F (float32), A (Ascii)
+    if text_mappings["\$DATATYPE"] == "I"
+        dtype = Int32
+    elseif text_mappings["\$DATATYPE"] == "F"
+        dtype = Float32
+    else
+        error("Only float and integer data types are implemented for now, the required .fcs file is using another number encoding.")
+    end
+
+    flat_data = Array{dtype}(undef, (end_data - start_data + 1) ÷ 4)
     read!(io, flat_data)
     endian_func = get_endian_func(text_mappings)
     map!(endian_func, flat_data, flat_data)
@@ -71,7 +91,7 @@ function parse_data(io,
     # data should be in multiples of `n_params` for list mode
     (mod(length(flat_data), n_params) != 0) && error("FCS file is corrupt. DATA and TEXT sections don't match.")
 
-    data = Dict{String, Vector{Float32}}()
+    data = Dict{String, Vector{dtype}}()
 
     for i in 1:n_params
         data[text_mappings["\$P$(i)N"]] = flat_data[i:n_params:end]
diff --git a/test/Project.toml b/test/Project.toml
index ce23579..e071e2f 100644
--- a/test/Project.toml
+++ b/test/Project.toml
@@ -1,4 +1,3 @@
 [deps]
 FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
-HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/test/runtests.jl b/test/runtests.jl
index af616fa..a1cde72 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,6 +1,6 @@
 using FCSFiles
 using FileIO
-using Test, HTTP
+using Test
 
 project_root = isfile("runtests.jl") ? abspath("..") : abspath(".")
 testdata_dir = joinpath(project_root, "test", "fcsexamples")
@@ -27,4 +27,18 @@ end
         @test length(flowrun.data) == 50
         @test length(flowrun.params) == 268
     end
+
+    @testset "Loading float-encoded file" begin
+        flowrun = load(joinpath(testdata_dir, "Applied Biosystems - Attune.fcs"))
+
+        @test length(flowrun["SSC-A"]) == 22188
+        @test flowrun["FSC-A"][2] == 244982.11f0
+    end
+
+    @testset "Loading Accuri file" begin
+        flowrun = load(joinpath(testdata_dir, "Accuri - C6.fcs"))
+        @test length(flowrun["SSC-A"]) == 63273
+        @test flowrun["SSC-A"][2] == 370971
+
+    end
 end