diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 938cde6cddf..3499c7ad89b 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -138,8 +138,8 @@ struct orcdec_state_s { * This class is used to address a special case, where the first run spans two adjacent row groups * and its length is greater than the maximum length allowed to be consumed. This limit is imposed * by the decoder when processing the SECONDARY stream. This class shall be instantiated in the - * shared memory. As an optimization, the actual cache is a local variable and does not reside in - * the shared memory. + * shared memory, and be used to cache the DATA stream with a decoded data type of `int64_t`. As an + * optimization, the actual cache is a local variable and does not reside in the shared memory. */ class run_cache_manager { private: @@ -187,7 +187,7 @@ class run_cache_manager { * @brief Adjust the maximum length allowed to be consumed when the length of the first run is * greater than it. * - * @param[in] max_length The maximum length allowed to be consumed. + * @param[in] max_length The maximum length allowed to be consumed for the DATA stream. * @return A new maximum length. */ __device__ uint32_t adjust_max_length(uint32_t max_length) @@ -203,7 +203,7 @@ class run_cache_manager { * @brief Copy the excess data from the intermediate buffer for the DATA stream to the cache. * * @param[in] src Intermediate buffer for the DATA stream. - * @param[out] cache Local variable serving as the cache. + * @param[out] cache Local variable serving as the cache for the DATA stream. */ __device__ void write_to_cache(int64_t* src, int64_t& cache) { @@ -235,7 +235,7 @@ class run_cache_manager { * * @param[in,out] dst Intermediate buffer for the DATA stream. * @param[in,out] rle Run length decoder state object. - * @param[in] cache Local variable serving as the cache. + * @param[in] cache Local variable serving as the cache for the DATA stream. */ __device__ void read_from_cache(int64_t* dst, orc_rlev2_state_s* rle, int64_t cache) { diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestampDesynced.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.snappy.RLEv2.orc similarity index 100% rename from python/cudf/cudf/tests/data/orc/TestOrcFile.timestampDesynced.orc rename to python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.snappy.RLEv2.orc diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestampDesyncedSnappy.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc similarity index 93% rename from python/cudf/cudf/tests/data/orc/TestOrcFile.timestampDesyncedSnappy.orc rename to python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc index a0ea4fbbfc2..8a7969cdbbb 100644 Binary files a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestampDesyncedSnappy.orc and b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc differ diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 47c067e1c0b..c2be65bce74 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -1975,8 +1975,8 @@ def test_row_group_alignment(datadir): @pytest.mark.parametrize( "inputfile", [ - "TestOrcFile.timestampDesynced.orc", - "TestOrcFile.timestampDesyncedSnappy.orc", + "TestOrcFile.timestamp.desynced.uncompressed.RLEv2.orc", + "TestOrcFile.timestamp.desynced.snappy.RLEv2.orc", ], ) def test_orc_reader_desynced_timestamp(datadir, inputfile):