From 769ee4032b5eaa66ee7586daa6f11494235c5607 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 22 Aug 2024 16:24:40 +0200 Subject: [PATCH 01/21] test: loosen message on test_2757 due to upstream Cython changes (#3224) --- tests/test_2757_attrs_metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_2757_attrs_metadata.py b/tests/test_2757_attrs_metadata.py index ed5ddf8639..de04074860 100644 --- a/tests/test_2757_attrs_metadata.py +++ b/tests/test_2757_attrs_metadata.py @@ -42,7 +42,8 @@ def test_serialise_with_nonserialisable_attrs(array_pickler): attrs = {**SOME_ATTRS, "non_transient_key": lambda: None} array = ak.Array([1, 2, 3], attrs=attrs) with pytest.raises( - (AttributeError, array_pickler.PicklingError), match=r"Can't pickle" + (AttributeError, array_pickler.PicklingError), + match=r"(pickle|local object)", ): array_pickler.loads(array_pickler.dumps(array)) From 81c48fce08bd5f919ff349266adcd404ab6297e5 Mon Sep 17 00:00:00 2001 From: maxymnaumchyk <70752300+maxymnaumchyk@users.noreply.github.com> Date: Tue, 27 Aug 2024 18:26:06 +0300 Subject: [PATCH 02/21] feat: to TensorFlow RaggedTensor (#3210) * feat: to TensorFlow RaggedTensor * style: pre-commit fixes * fix the tensorflow library import * style: pre-commit fixes * update exception * added some tests for different data types conversions * change end of line formats * add tensorflow library to the test-full-requirements * add tensorflow library to the test-full-requirements * change tensorflow library version * change tensorflow library version * change tensorflow library version * add a new github actions test for ml libraries * delete tensorflow from full test requirements * update requirements-test-ml.txt * update requirements-test-full.txt * update requirements-test-ml.txt * update the docstring for the main function * add a new function from_raggedtensor * delete import of tensorflow library * minor changes * fix the tests names Co-authored-by: Ianna Osborne * Apply suggestions from code review Co-authored-by: Jim Pivarski --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Ianna Osborne Co-authored-by: Jim Pivarski --- .github/workflows/test.yml | 4 + requirements-test-ml.txt | 6 + src/awkward/operations/__init__.py | 2 + .../operations/ak_from_raggedtensor.py | 67 ++++++++++ src/awkward/operations/ak_to_raggedtensor.py | 84 ++++++++++++ ..._3210_to_raggedtensor_from_raggedtensor.py | 122 ++++++++++++++++++ 6 files changed, 285 insertions(+) create mode 100644 requirements-test-ml.txt create mode 100644 src/awkward/operations/ak_from_raggedtensor.py create mode 100644 src/awkward/operations/ak_to_raggedtensor.py create mode 100644 tests/test_3210_to_raggedtensor_from_raggedtensor.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ea52274645..5ef43543af 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -65,6 +65,10 @@ jobs: python-architecture: x64 runs-on: ubuntu-latest dependencies-kind: pypy + - python-version: '3.11' + python-architecture: x64 + runs-on: ubuntu-latest + dependencies-kind: ml runs-on: ${{ matrix.runs-on }} diff --git a/requirements-test-ml.txt b/requirements-test-ml.txt new file mode 100644 index 0000000000..d715854439 --- /dev/null +++ b/requirements-test-ml.txt @@ -0,0 +1,6 @@ +fsspec>=2022.11.0;sys_platform != "win32" +pytest>=6 +pytest-cov +pytest-xdist +tensorflow >= 2.12 +torch >= 2.4.0 diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index d0cee81508..6d4a84c565 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -44,6 +44,7 @@ from awkward.operations.ak_from_json import * from awkward.operations.ak_from_numpy import * from awkward.operations.ak_from_parquet import * +from awkward.operations.ak_from_raggedtensor import * from awkward.operations.ak_from_rdataframe import * from awkward.operations.ak_from_regular import * from awkward.operations.ak_full_like import * @@ -97,6 +98,7 @@ from awkward.operations.ak_to_parquet import * from awkward.operations.ak_to_parquet_dataset import * from awkward.operations.ak_to_parquet_row_groups import * +from awkward.operations.ak_to_raggedtensor import * from awkward.operations.ak_to_rdataframe import * from awkward.operations.ak_to_regular import * from awkward.operations.ak_transform import * diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py new file mode 100644 index 0000000000..ba20382caf --- /dev/null +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -0,0 +1,67 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import awkward as ak +from awkward._dispatch import high_level_function + +__all__ = ("from_raggedtensor",) + + +@high_level_function() +def from_raggedtensor(array): + """ + Args: + array: (`tensorflow.RaggedTensor`): + RaggedTensor to convert into an Awkward Array. + + Converts a TensorFlow RaggedTensor into an Awkward Array. + + If `array` contains any other data types the function raises an error. + """ + + # Dispatch + yield (array,) + + # Implementation + return _impl(array) + + +def _impl(array): + try: + # get the flat values + content = array.flat_values.numpy() + except AttributeError as err: + raise TypeError( + """only RaggedTensor can be converted to awkward array""" + ) from err + # convert them to ak.contents right away + content = ak.contents.NumpyArray(content) + + # get the offsets + offsets_arr = [] + for splits in array.nested_row_splits: + split = splits.numpy() + # convert to ak.index + offset = ak.index.Index64(split) + offsets_arr.append(offset) + + # if a tensor has one *ragged dimension* + if len(offsets_arr) == 1: + result = ak.contents.ListOffsetArray(offsets_arr[0], content) + return ak.Array(result) + + # if a tensor has multiple *ragged dimensions* + return ak.Array(_recursive_call(content, offsets_arr, 0)) + + +def _recursive_call(content, offsets_arr, count): + if count == len(offsets_arr) - 2: + return ak.contents.ListOffsetArray( + offsets_arr[count], + ak.contents.ListOffsetArray(offsets_arr[count + 1], content), + ) + else: + return ak.contents.ListOffsetArray( + offsets_arr[count], _recursive_call(content, offsets_arr, count) + ) diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py new file mode 100644 index 0000000000..16c15dcd47 --- /dev/null +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -0,0 +1,84 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import awkward as ak +from awkward._dispatch import high_level_function + +__all__ = ("to_raggedtensor",) + + +@high_level_function() +def to_raggedtensor(array): + """ + Args: + array: Array-like data. May be a high level #ak.Array, + or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray, + #ak.contents.RegularArray, #ak.contents.NumpyArray + + Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported) + into a ragged tensor, if possible. + + If `array` contains any other data types (RecordArray for example) the function raises an error. + """ + + # Dispatch + yield (array,) + + # Implementation + return _impl(array) + + +def _impl(array): + try: + import tensorflow as tf + except ImportError as err: + raise ImportError( + """to use ak.to_raggedtensor, you must install the 'tensorflow' package with: + + pip install tensorflow +or + conda install tensorflow""" + ) from err + + # unwrap the awkward array if it was made with ak.Array function + # also transforms a python list to awkward array + array = ak.to_layout(array, allow_record=False) + + if isinstance(array, ak.contents.numpyarray.NumpyArray): + return tf.RaggedTensor.from_row_splits( + values=array.data, row_splits=[0, array.__len__()] + ) + else: + flat_values, nested_row_splits = _recursive_call(array, ()) + + return tf.RaggedTensor.from_nested_row_splits(flat_values, nested_row_splits) + + +def _recursive_call(layout, offsets_arr): + try: + # change all the possible layout types to ListOffsetArray + if isinstance(layout, ak.contents.listarray.ListArray): + layout = layout.to_ListOffsetArray64() + elif isinstance(layout, ak.contents.regulararray.RegularArray): + layout = layout.to_ListOffsetArray64() + elif not isinstance( + layout, + ( + ak.contents.listoffsetarray.ListOffsetArray, + ak.contents.numpyarray.NumpyArray, + ), + ): + raise TypeError( + "Only arrays containing variable-length lists (var *) or" + " regular-length lists (# *) of numbers can be converted into a TensorFlow RaggedTensor" + ) + + # recursively gather all of the offsets of an array + offsets_arr += (layout.offsets.data,) + + except AttributeError: + # at the last iteration form a ragged tensor from the + # accumulated offsets and flattened values of the array + return layout.data, offsets_arr + return _recursive_call(layout.content, offsets_arr) diff --git a/tests/test_3210_to_raggedtensor_from_raggedtensor.py b/tests/test_3210_to_raggedtensor_from_raggedtensor.py new file mode 100644 index 0000000000..d250910c09 --- /dev/null +++ b/tests/test_3210_to_raggedtensor_from_raggedtensor.py @@ -0,0 +1,122 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import numpy as np +import pytest + +import awkward as ak + +to_raggedtensor = ak.operations.to_raggedtensor +from_raggedtensor = ak.operations.from_raggedtensor + +tf = pytest.importorskip("tensorflow") + +content = ak.contents.NumpyArray( + np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]) +) +starts1 = ak.index.Index64(np.array([0, 3, 3, 5, 6])) +stops1 = ak.index.Index64(np.array([3, 3, 5, 6, 9])) +starts2 = ak.index.Index64(np.array([0, 3])) +stops2 = ak.index.Index64(np.array([3, 5])) + +array = np.arange(2 * 3 * 5).reshape(2, 3, 5) +content2 = ak.contents.NumpyArray(array.reshape(-1)) +inneroffsets = ak.index.Index64(np.array([0, 5, 10, 15, 20, 25, 30])) +outeroffsets = ak.index.Index64(np.array([0, 3, 6])) + + +def test_convert_to_raggedtensor(): + # a test for ListArray -> RaggedTensor + array1 = ak.contents.ListArray(starts1, stops1, content) + assert to_raggedtensor(array1).to_list() == [ + [1.1, 2.2, 3.3], + [], + [4.4, 5.5], + [6.6], + [7.7, 8.8, 9.9], + ] + + # a test for awkward.highlevel.Array -> RaggedTensor + array2 = ak.Array(array1) + assert to_raggedtensor(array2).to_list() == [ + [1.1, 2.2, 3.3], + [], + [4.4, 5.5], + [6.6], + [7.7, 8.8, 9.9], + ] + + # a test for NumpyArray -> RaggedTensor + array3 = content + assert to_raggedtensor(array3).to_list() == [ + [1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9] + ] + + # a test for RegularArray -> RaggedTensor + array4 = ak.contents.RegularArray(content, size=2) + assert to_raggedtensor(array4).to_list() == [ + [1.1, 2.2], + [3.3, 4.4], + [5.5, 6.6], + [7.7, 8.8], + ] + + # try a single line awkward array + array5 = ak.Array([3, 1, 4, 1, 9, 2, 6]) + assert to_raggedtensor(array5).to_list() == [[3, 1, 4, 1, 9, 2, 6]] + + # try a multiple ragged array + array6 = ak.Array([[[1.1, 2.2], [3.3]], [], [[4.4, 5.5]]]) + assert to_raggedtensor(array6).to_list() == [[[1.1, 2.2], [3.3]], [], [[4.4, 5.5]]] + + # try a listoffset array inside a listoffset array + array7 = ak.contents.ListOffsetArray( + outeroffsets, ak.contents.ListOffsetArray(inneroffsets, content2) + ) + assert to_raggedtensor(array7).to_list() == [ + [[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], + [[15, 16, 17, 18, 19], [20, 21, 22, 23, 24], [25, 26, 27, 28, 29]], + ] + + # try a list array inside a list array + + array8 = ak.contents.ListArray( + starts2, stops2, ak.contents.ListArray(starts1, stops1, content) + ) + assert to_raggedtensor(array8).to_list() == [ + [[1.1, 2.2, 3.3], [], [4.4, 5.5]], + [[6.6], [7.7, 8.8, 9.9]], + ] + + # try just a python list + array9 = [3, 1, 4, 1, 9, 2, 6] + assert to_raggedtensor(array9).to_list() == [[3, 1, 4, 1, 9, 2, 6]] + + +np_array1 = np.array([1.1, 2.2, 3.3, 4.4, 5.5], dtype=np.float32) + +offsets1 = ak.index.Index64(np.array([0, 2, 3, 3, 5])) +content1 = ak.contents.NumpyArray(np_array1) + + +def test_convert_from_raggedtensor(): + tf_array1 = tf.RaggedTensor.from_row_splits( + values=[1.1, 2.2, 3.3, 4.4, 5.5], row_splits=[0, 2, 3, 3, 5] + ) + + ak_array1 = ak.contents.ListOffsetArray(offsets1, content1) + result1 = ak.to_layout(from_raggedtensor(tf_array1), allow_record=False) + assert (result1.content.data == np_array1).all() + assert (result1.offsets.data == [0, 2, 3, 3, 5]).all() + assert from_raggedtensor(tf_array1).to_list() == ak_array1.to_list() + + tf_array2 = tf.RaggedTensor.from_nested_row_splits( + flat_values=[3, 1, 4, 1, 5, 9, 2, 6], + nested_row_splits=([0, 3, 3, 5], [0, 4, 4, 7, 8, 8]), + ) + assert from_raggedtensor(tf_array2).to_list() == [ + [[3, 1, 4, 1], [], [5, 9, 2]], + [], + [[6], []], + ] From c30c14eaad07c8733a8718ad687e5848b8833950 Mon Sep 17 00:00:00 2001 From: Topher Cawlfield <4094385+tcawlfield@users.noreply.github.com> Date: Wed, 28 Aug 2024 11:33:55 -0600 Subject: [PATCH 03/21] fix: add `ak.array_equal` to NEP18 overrides, documentation, and add one more test (#3225) * Fixing missed changes with ak.array_equal * Now overrides numpy.array_equal * Adding link to toctree * Unit test checks numpy override * np.array_equal no longer supports any numpy =? awkward * Fixing issue with ak_almost_equal and backend checks Using ensure_same_backend now, which raises an exception instead of returning False. This is a new behavior for ak.almost_equal. This also removes a shortcut in the code, and we get a NotImplementedError on a typetracer backend. Test_2678 updated to match this expectation. --- docs/reference/toctree.txt | 3 ++- src/awkward/operations/ak_almost_equal.py | 18 +++++++++--------- src/awkward/operations/ak_array_equal.py | 1 + tests/test_1105_ak_aray_equal.py | 7 +++++++ tests/test_2305_nep_18_lazy_conversion.py | 5 ++++- tests/test_2678_same_backend.py | 9 +++++---- 6 files changed, 28 insertions(+), 15 deletions(-) diff --git a/docs/reference/toctree.txt b/docs/reference/toctree.txt index 126e0a2b9c..4b6ae1154d 100644 --- a/docs/reference/toctree.txt +++ b/docs/reference/toctree.txt @@ -299,11 +299,12 @@ generated/ak.backend .. toctree:: - :caption: Approximation + :caption: Approximation and comparison generated/ak.round generated/ak.isclose generated/ak.almost_equal + generated/ak.array_equal .. toctree:: :caption: NumPy compatibility diff --git a/src/awkward/operations/ak_almost_equal.py b/src/awkward/operations/ak_almost_equal.py index 78461de65c..66f67e4d8a 100644 --- a/src/awkward/operations/ak_almost_equal.py +++ b/src/awkward/operations/ak_almost_equal.py @@ -2,10 +2,11 @@ from __future__ import annotations -from awkward._backends.dispatch import backend_of_obj +from awkward._backends.dispatch import backend_of from awkward._backends.numpy import NumpyBackend from awkward._behavior import behavior_of, get_array_class, get_record_class from awkward._dispatch import high_level_function +from awkward._layout import ensure_same_backend from awkward._nplikes.numpy_like import NumpyMetadata from awkward._parameters import parameters_are_equal from awkward.operations.ak_to_layout import to_layout @@ -82,14 +83,13 @@ def _impl( left_behavior = behavior_of(left) right_behavior = behavior_of(right) - left_backend = backend_of_obj(left, default=cpu) - right_backend = backend_of_obj(right, default=cpu) - if left_backend is not right_backend: - return False - backend = left_backend - - left_layout = to_layout(left, allow_record=False).to_packed() - right_layout = to_layout(right, allow_record=False).to_packed() + layouts = ensure_same_backend( + to_layout(left, allow_record=False), + to_layout(right, allow_record=False), + ) + left_layout = layouts[0].to_packed() + right_layout = layouts[1].to_packed() + backend = backend_of(left_layout) if not backend.nplike.known_data: raise NotImplementedError( diff --git a/src/awkward/operations/ak_array_equal.py b/src/awkward/operations/ak_array_equal.py index 2a7221baab..398db6b2a6 100644 --- a/src/awkward/operations/ak_array_equal.py +++ b/src/awkward/operations/ak_array_equal.py @@ -8,6 +8,7 @@ __all__ = ("array_equal",) +@ak._connect.numpy.implements("array_equal") @high_level_function() def array_equal( a1, diff --git a/tests/test_1105_ak_aray_equal.py b/tests/test_1105_ak_aray_equal.py index 519d0039c5..7512f50316 100644 --- a/tests/test_1105_ak_aray_equal.py +++ b/tests/test_1105_ak_aray_equal.py @@ -88,3 +88,10 @@ def test_array_equal_with_params(): ) assert not ak.array_equal(a1, a2) assert ak.array_equal(a1, a2, check_parameters=False) + + +def test_array_equal_numpy_override(): + assert np.array_equal( + ak.Array([[1, 2], [], [3, 4, 5]]), + ak.Array([[1, 2], [], [3, 4, 5]]), + ) diff --git a/tests/test_2305_nep_18_lazy_conversion.py b/tests/test_2305_nep_18_lazy_conversion.py index 8ce4c88763..a3c9cd71d0 100644 --- a/tests/test_2305_nep_18_lazy_conversion.py +++ b/tests/test_2305_nep_18_lazy_conversion.py @@ -11,7 +11,10 @@ def test_binary(): ak_array = ak.Array(np.arange(10, dtype=" Date: Thu, 29 Aug 2024 12:15:36 -0600 Subject: [PATCH 04/21] feat: provide a way for `Form.select_columns` to distinguish structural dots from dots in the names of record fields (#3222) * Allowing list/tuple in specifier of Form.select_columns Actually, allowing a list of lists of str in case a literal "." appears as a record name. * Adding docstring to Form.select_columns * Moving unit tests to test_3088_... Added a new test for from_parquet. Added some documentation to from_parquet. * style: pre-commit fixes * select_columns uses isinstance Iterable Also correcting some documentation and exception text. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski --- src/awkward/forms/form.py | 52 ++++++++++--- src/awkward/operations/ak_from_parquet.py | 5 +- ...88_select_columns_supports_literal_dots.py | 76 +++++++++++++++++++ 3 files changed, 120 insertions(+), 13 deletions(-) create mode 100644 tests/test_3088_select_columns_supports_literal_dots.py diff --git a/src/awkward/forms/form.py b/src/awkward/forms/form.py index 1c72bf740f..3f9bec55eb 100644 --- a/src/awkward/forms/form.py +++ b/src/awkward/forms/form.py @@ -333,7 +333,7 @@ def __call__(self, field: str, *, next_match_if_empty: bool = False) -> Self | N has_matched = True next_specifiers.extend(self._match_to_next_specifiers[field]) - # Fixed-strings are an O(n) lookup + # Patterns are an O(n) lookup for pattern in self._patterns: if fnmatchcase(field, pattern): has_matched = True @@ -437,29 +437,59 @@ def columns(self, list_indicator=None, column_prefix=()): def select_columns( self, specifier, expand_braces=True, *, prune_unions_and_records: bool = True ): + """ + select_columns returns a new Form with only columns and sub-columns selected. + Returns an empty Form if no columns matched the specifier(s). + + `specifier` can be a `str | Iterable[str | Iterable[str]]`. + Strings may include shell-globbing-style wildcards "*" and "?". + If `expand_braces` is `True` (the default), strings may include alternatives in braces. + For example, `["a.{b,c}.d"]` is equivalent to `["a.b.d", "a.c.d"]`. + Glob-style matching would also suit this single-character instance: `"a.[bc].d"`. + If specifier is a list which contains a list/tuple, that inner list will be interpreted as + column and subcolumn specifiers. They *may* contain wildcards, but "." will not be + interpreted as a `.` pattern. + """ if isinstance(specifier, str): specifier = {specifier} # Only take unique specifiers for item in specifier: - if not isinstance(item, str): + if isinstance(item, str): + if item == "": + raise ValueError( + "a column-selection specifier cannot be an empty string" + ) + elif isinstance(item, Iterable): + for field in item: + if not isinstance(field, str): + raise ValueError("a sub-column specifier must be a string") + else: raise TypeError( - "a column-selection specifier must be a list of non-empty strings" - ) - if not item: - raise ValueError( - "a column-selection specifier must be a list of non-empty strings" + "a column specifier must be a string or an iterable of strings" ) if expand_braces: next_specifier = [] for item in specifier: - for result in _expand_braces(item): - next_specifier.append(result) + if isinstance(item, str): + for result in _expand_braces(item): + next_specifier.append(result) + else: + next_specifier.append(item) specifier = next_specifier - specifier = [[] if item == "" else item.split(".") for item in set(specifier)] - match_specifier = _SpecifierMatcher(specifier, match_if_empty=False) + # specifier = set(specifier) + specifier_lists: list[list[str]] = [] + for item in specifier: + if isinstance(item, str): + if item == "": + specifier_lists.append([]) + else: + specifier_lists.append(item.split(".")) + else: + specifier_lists.append(item) + match_specifier = _SpecifierMatcher(specifier_lists, match_if_empty=False) selection = self._select_columns(match_specifier) assert selection is not None, "top-level selections always return a Form" diff --git a/src/awkward/operations/ak_from_parquet.py b/src/awkward/operations/ak_from_parquet.py index 83c8732b56..1cb3fdfbc7 100644 --- a/src/awkward/operations/ak_from_parquet.py +++ b/src/awkward/operations/ak_from_parquet.py @@ -32,10 +32,11 @@ def from_parquet( Args: path (str): Local filename or remote URL, passed to fsspec for resolution. May contain glob patterns. - columns (None, str, or list of str): Glob pattern(s) with bash-like curly + columns (None, str, or iterable of (str or iterable of str)): Glob pattern(s) including bash-like curly brackets for matching column names. Nested records are separated by dots. If a list of patterns, the logical-or is matched. If None, all columns - are read. + are read. A list of lists can be provided to select columns with literal dots + in their names -- The inner list provides column names or patterns. row_groups (None or set of int): Row groups to read; must be non-negative. Order is ignored: the output array is presented in the order specified by Parquet metadata. If None, all row groups/all rows are read. diff --git a/tests/test_3088_select_columns_supports_literal_dots.py b/tests/test_3088_select_columns_supports_literal_dots.py new file mode 100644 index 0000000000..a84370cadc --- /dev/null +++ b/tests/test_3088_select_columns_supports_literal_dots.py @@ -0,0 +1,76 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE + +from __future__ import annotations + +import os + +import pytest + +import awkward as ak + + +def array_with_dotted_fields(): + return ak.Array( + [ + { + "x": [ + { + "y": { + "z": [1, 2, 3], + "w.1": 4, + } + } + ] + } + ] + ) + + +def test_alternative_specifiers(): + array = array_with_dotted_fields() + form = array.layout.form + assert form.select_columns("*") == form + assert form.select_columns([("x", "y", "w.1")]) == form.select_columns("x.y.w*") + assert form.select_columns([["x", "y", "w.1"], "x.y.z"]) == form + + +def test_columns_with_dots_from_parquet(tmp_path): + # ruff: noqa: F841 + _pq = pytest.importorskip("pyarrow.parquet") + array = array_with_dotted_fields() + parquet_file = os.path.join(tmp_path, "test_3088_array1.parquet") + ak.to_parquet(array, parquet_file) + array_selected = ak.from_parquet(parquet_file, columns=[("x", "y", "w.1")]) + assert array_selected.to_list() == [ + { + "x": [ + { + "y": { + # "z": [1, 2, 3], Excluded + "w.1": 4, # Selected + } + } + ] + } + ] + + ambig_array = ak.Array( + [ + { + "crazy": { + "dot": [11, 12, 13], + }, + "crazy.dot": [21, 22, 23], + } + ] + ) + parquet_file_ambig = os.path.join(tmp_path, "test_3088_array_ambig.parquet") + ak.to_parquet(ambig_array, parquet_file_ambig) + ambig_selected = ak.from_parquet(parquet_file_ambig, columns=[("crazy.dot",)]) + # Note: Currently, pyarrow.parquet cannot distinguish dots as separators + # from dots as field names. It builds a dict of all possible indices, + # and returns those. Even so, we still need the ability within Awkward to + # disambiguate these two, which we now have. We would need further + # feature work to create column name substitutions to work around this pyarrow + # limitation should this be justified. + assert ak.array_equal(ambig_selected, ambig_array) # Slurped everything. From ebf8bf3052e1483b9a1024382b2a13a49e144fc2 Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Mon, 2 Sep 2024 17:00:18 +0200 Subject: [PATCH 05/21] fix: cuda reducer bugfix and more tests (#3228) --- .../cuda/cuda_kernels/awkward_reduce_max.cu | 2 +- tests-cuda/test_3149_complex_reducers.py | 72 ++++++++--------- .../test_3150_combinations_n_equal_2.py | 2 - .../test_3162_block_boundary_reducers.py | 77 +++++++++++++------ 4 files changed, 85 insertions(+), 68 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu index 2941aa417e..0d1ecdebcd 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu @@ -60,7 +60,7 @@ awkward_reduce_max_b( T val = identity; if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { - val = temp[idx - stride]; + val = temp[thread_id - stride]; } __syncthreads(); temp[thread_id] = val > temp[thread_id] ? val : temp[thread_id]; diff --git a/tests-cuda/test_3149_complex_reducers.py b/tests-cuda/test_3149_complex_reducers.py index bd53020721..abc921b30d 100644 --- a/tests-cuda/test_3149_complex_reducers.py +++ b/tests-cuda/test_3149_complex_reducers.py @@ -302,8 +302,8 @@ def test_0652_minmax(): def test_block_boundary_sum_complex(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -323,7 +323,6 @@ def test_block_boundary_sum_complex(): def test_block_boundary_prod_complex1(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -341,7 +340,6 @@ def test_block_boundary_prod_complex1(): def test_block_boundary_prod_complex2(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -359,7 +357,6 @@ def test_block_boundary_prod_complex2(): def test_block_boundary_prod_complex3(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -377,7 +374,6 @@ def test_block_boundary_prod_complex3(): def test_block_boundary_prod_complex4(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1.01)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -397,7 +393,6 @@ def test_block_boundary_prod_complex4(): def test_block_boundary_prod_complex5(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1.01)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -417,7 +412,6 @@ def test_block_boundary_prod_complex5(): def test_block_boundary_prod_complex6(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1.01)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -437,7 +431,6 @@ def test_block_boundary_prod_complex6(): def test_block_boundary_prod_complex7(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 0.99)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -457,7 +450,6 @@ def test_block_boundary_prod_complex7(): def test_block_boundary_prod_complex8(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 0.99)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -477,7 +469,6 @@ def test_block_boundary_prod_complex8(): def test_block_boundary_prod_complex9(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 0.99)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -497,7 +488,6 @@ def test_block_boundary_prod_complex9(): def test_block_boundary_prod_complex10(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1000, 0), np.full(1000, 1.1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -517,7 +507,6 @@ def test_block_boundary_prod_complex10(): def test_block_boundary_prod_complex11(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1001, 0), np.full(1001, 1.1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -537,7 +526,6 @@ def test_block_boundary_prod_complex11(): def test_block_boundary_prod_complex12(): - np.random.seed(42) complex_array = np.vectorize(complex)(np.full(1002, 0), np.full(1002, 1.1)) content = ak.contents.NumpyArray(complex_array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -557,8 +545,8 @@ def test_block_boundary_prod_complex12(): def test_block_boundary_prod_complex13(): - np.random.seed(42) - array = np.random.randint(50, size=1000) + rng = np.random.default_rng(seed=42) + array = rng.integers(50, size=1000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -580,8 +568,8 @@ def test_block_boundary_prod_complex13(): def test_block_boundary_any_complex(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -601,8 +589,8 @@ def test_block_boundary_any_complex(): def test_block_boundary_all_complex(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -622,8 +610,8 @@ def test_block_boundary_all_complex(): def test_block_boundary_min_complex1(): - np.random.seed(42) - array = np.random.randint(5, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(5, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -643,8 +631,8 @@ def test_block_boundary_min_complex1(): def test_block_boundary_min_complex2(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -664,8 +652,8 @@ def test_block_boundary_min_complex2(): def test_block_boundary_max_complex1(): - np.random.seed(42) - array = np.random.randint(5, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(5, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -685,8 +673,8 @@ def test_block_boundary_max_complex1(): def test_block_boundary_max_complex2(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -706,8 +694,8 @@ def test_block_boundary_max_complex2(): def test_block_boundary_sum_bool_complex(): - np.random.seed(42) - array = np.random.randint(2, size=6000, dtype=np.bool_) + rng = np.random.default_rng(seed=42) + array = rng.integers(2, size=6000, dtype=np.bool_) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -727,8 +715,8 @@ def test_block_boundary_sum_bool_complex(): def test_block_boundary_countnonzero_complex_1(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -749,8 +737,8 @@ def test_block_boundary_countnonzero_complex_1(): def test_block_boundary_countnonzero_complex_2(): - np.random.seed(42) - array = np.random.randint(2, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(2, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -772,8 +760,8 @@ def test_block_boundary_countnonzero_complex_2(): @pytest.mark.skip(reason="awkward_reduce_argmax_complex is not implemented") def test_block_boundary_argmax_complex1(): - np.random.seed(42) - array = np.random.randint(5, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(5, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -794,8 +782,8 @@ def test_block_boundary_argmax_complex1(): @pytest.mark.skip(reason="awkward_reduce_argmax_complex is not implemented") def test_block_boundary_argmax_complex2(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -816,8 +804,8 @@ def test_block_boundary_argmax_complex2(): @pytest.mark.skip(reason="awkward_reduce_argmin_complex is not implemented") def test_block_boundary_argmin_complex1(): - np.random.seed(42) - array = np.random.randint(5, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(5, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) @@ -838,8 +826,8 @@ def test_block_boundary_argmin_complex1(): @pytest.mark.skip(reason="awkward_reduce_argmin_complex is not implemented") def test_block_boundary_argmin_complex2(): - np.random.seed(42) - array = np.random.randint(6000, size=6000) + rng = np.random.default_rng(seed=42) + array = rng.integers(6000, size=6000) complex_array = np.vectorize(complex)( array[0 : len(array) : 2], array[1 : len(array) : 2] ) diff --git a/tests-cuda/test_3150_combinations_n_equal_2.py b/tests-cuda/test_3150_combinations_n_equal_2.py index d65ef8416a..801d83599a 100644 --- a/tests-cuda/test_3150_combinations_n_equal_2.py +++ b/tests-cuda/test_3150_combinations_n_equal_2.py @@ -1188,7 +1188,6 @@ def test_1074_combinations_UnmaskedArray(): def test_block_boundary_combinations(): - np.random.seed(42) content = ak.contents.NumpyArray(np.arange(300)) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -1219,7 +1218,6 @@ def test_block_boundary_combinations(): def test_block_boundary_argcombinations(): - np.random.seed(42) content = ak.contents.NumpyArray(np.arange(300)) cuda_content = ak.to_backend(content, "cuda", highlevel=False) diff --git a/tests-cuda/test_3162_block_boundary_reducers.py b/tests-cuda/test_3162_block_boundary_reducers.py index cd0b57a0c8..deb52da002 100644 --- a/tests-cuda/test_3162_block_boundary_reducers.py +++ b/tests-cuda/test_3162_block_boundary_reducers.py @@ -17,8 +17,9 @@ def cleanup_cuda(): def test_block_boundary_sum(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.sum(cuda_content, -1, highlevel=False) == ak.sum( content, -1, highlevel=False @@ -34,8 +35,9 @@ def test_block_boundary_sum(): def test_block_boundary_any(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.any(cuda_content, -1, highlevel=False) == ak.any( content, -1, highlevel=False @@ -51,8 +53,9 @@ def test_block_boundary_any(): def test_block_boundary_all(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.all(cuda_content, -1, highlevel=False) == ak.all( content, -1, highlevel=False @@ -68,8 +71,9 @@ def test_block_boundary_all(): def test_block_boundary_sum_bool(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(2, size=3000, dtype=np.bool_)) + rng = np.random.default_rng(seed=42) + array = rng.integers(2, size=3000, dtype=np.bool_) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.sum(cuda_content, -1, highlevel=False) == ak.sum( content, -1, highlevel=False @@ -85,9 +89,13 @@ def test_block_boundary_sum_bool(): def test_block_boundary_max(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + print(array) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) + print(ak.max(content, -1, highlevel=False)) + print(ak.max(cuda_content, -1, highlevel=False)) assert ak.max(cuda_content, -1, highlevel=False) == ak.max( content, -1, highlevel=False ) @@ -102,8 +110,27 @@ def test_block_boundary_max(): def test_block_boundary_min(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) + cuda_content = ak.to_backend(content, "cuda", highlevel=False) + assert ak.min(cuda_content, -1, highlevel=False) == ak.min( + content, -1, highlevel=False + ) + + offsets = ak.index.Index64(np.array([0, 1, 2998, 3000], dtype=np.int64)) + depth1 = ak.contents.ListOffsetArray(offsets, content) + cuda_depth1 = ak.to_backend(depth1, "cuda", highlevel=False) + assert to_list(ak.min(cuda_depth1, -1, highlevel=False)) == to_list( + ak.min(depth1, -1, highlevel=False) + ) + del cuda_content, cuda_depth1 + + +def test_block_boundary_negative_min(): + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) * -1 + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.min(cuda_content, -1, highlevel=False) == ak.min( content, -1, highlevel=False @@ -120,8 +147,9 @@ def test_block_boundary_min(): @pytest.mark.skip(reason="awkward_reduce_argmin is not implemented") def test_block_boundary_argmin(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.argmin(cuda_content, -1, highlevel=False) == ak.argmin( content, -1, highlevel=False @@ -138,8 +166,9 @@ def test_block_boundary_argmin(): @pytest.mark.skip(reason="awkward_reduce_argmax is not implemented") def test_block_boundary_argmax(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.argmax(cuda_content, -1, highlevel=False) == ak.argmax( content, -1, highlevel=False @@ -155,8 +184,9 @@ def test_block_boundary_argmax(): def test_block_boundary_count(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(3000, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(3000, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.count(cuda_content, -1, highlevel=False) == ak.count( content, -1, highlevel=False @@ -172,8 +202,9 @@ def test_block_boundary_count(): def test_block_boundary_count_nonzero(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(2, size=3000)) + rng = np.random.default_rng(seed=42) + array = rng.integers(2, size=3000) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.count_nonzero(cuda_content, -1, highlevel=False) == ak.count_nonzero( content, -1, highlevel=False @@ -189,7 +220,6 @@ def test_block_boundary_count_nonzero(): def test_block_boundary_prod(): - np.random.seed(42) primes = [x for x in range(2, 30000) if all(x % n != 0 for n in range(2, x))] content = ak.contents.NumpyArray(primes) cuda_content = ak.to_backend(content, "cuda", highlevel=False) @@ -207,8 +237,9 @@ def test_block_boundary_prod(): def test_block_boundary_prod_bool(): - np.random.seed(42) - content = ak.contents.NumpyArray(np.random.randint(2, size=3000, dtype=np.bool_)) + rng = np.random.default_rng(seed=42) + array = rng.integers(2, size=3000, dtype=np.bool_) + content = ak.contents.NumpyArray(array) cuda_content = ak.to_backend(content, "cuda", highlevel=False) assert ak.prod(cuda_content, -1, highlevel=False) == ak.prod( content, -1, highlevel=False From dc79aa15581aa0ac90cfc891121b4082668aacdc Mon Sep 17 00:00:00 2001 From: maxymnaumchyk <70752300+maxymnaumchyk@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:45:47 +0300 Subject: [PATCH 06/21] fix documentation (#3227) --- src/awkward/operations/ak_from_raggedtensor.py | 2 +- src/awkward/operations/ak_to_raggedtensor.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/awkward/operations/ak_from_raggedtensor.py b/src/awkward/operations/ak_from_raggedtensor.py index ba20382caf..1c895506c2 100644 --- a/src/awkward/operations/ak_from_raggedtensor.py +++ b/src/awkward/operations/ak_from_raggedtensor.py @@ -13,7 +13,7 @@ def from_raggedtensor(array): """ Args: array: (`tensorflow.RaggedTensor`): - RaggedTensor to convert into an Awkward Array. + RaggedTensor to convert into an Awkward Array. Converts a TensorFlow RaggedTensor into an Awkward Array. diff --git a/src/awkward/operations/ak_to_raggedtensor.py b/src/awkward/operations/ak_to_raggedtensor.py index 16c15dcd47..5fcb2e2d5f 100644 --- a/src/awkward/operations/ak_to_raggedtensor.py +++ b/src/awkward/operations/ak_to_raggedtensor.py @@ -13,8 +13,8 @@ def to_raggedtensor(array): """ Args: array: Array-like data. May be a high level #ak.Array, - or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray, - #ak.contents.RegularArray, #ak.contents.NumpyArray + or low-level #ak.contents.ListOffsetArray, #ak.contents.ListArray, + #ak.contents.RegularArray, #ak.contents.NumpyArray Converts `array` (only ListOffsetArray, ListArray, RegularArray and NumpyArray data types supported) into a ragged tensor, if possible. From 6b39e43ae538606e59e84797ba022da515b55218 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 9 Sep 2024 18:30:20 -0500 Subject: [PATCH 07/21] fix: empty arrays in ak.to_parquet with extensionarray=True (#3234) --- src/awkward/_connect/pyarrow/table_conv.py | 2 +- tests/test_2772_parquet_extn_array_metadata.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/awkward/_connect/pyarrow/table_conv.py b/src/awkward/_connect/pyarrow/table_conv.py index 53f286c730..4434c94009 100644 --- a/src/awkward/_connect/pyarrow/table_conv.py +++ b/src/awkward/_connect/pyarrow/table_conv.py @@ -221,7 +221,7 @@ def replace_schema(table: pyarrow.Table, new_schema: pyarrow.Schema) -> pyarrow. new_batches.append( pyarrow.RecordBatch.from_arrays(arrays=columns, schema=new_schema) ) - return pyarrow.Table.from_batches(new_batches) + return pyarrow.Table.from_batches(new_batches, schema=new_schema) def array_with_replacement_type( diff --git a/tests/test_2772_parquet_extn_array_metadata.py b/tests/test_2772_parquet_extn_array_metadata.py index fd4c9fede6..aafdb84338 100644 --- a/tests/test_2772_parquet_extn_array_metadata.py +++ b/tests/test_2772_parquet_extn_array_metadata.py @@ -206,3 +206,12 @@ def test_selective_parquet(tmp_path): ak.to_parquet(ak_tbl, filename) tbl_tr = ak.from_parquet(filename, columns=["struct_array", "indexed"]) assert to_list(tbl_tr["struct_array"]) == to_list(ak_tbl["struct_array"]) + + +@pytest.mark.parametrize("doit", [False, True]) +def test_empty(tmp_path, doit): + filename = os.path.join(tmp_path, "whatever.parquet") + + ak.to_parquet(ak.Array([{"x": 1, "y": 1.1}])[0:0], filename, extensionarray=doit) + + assert str(ak.from_parquet(filename).type) == "0 * {x: int64, y: float64}" From a0b3858a3b6ebd9e6ef7851a64cdb8116d8ec85b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 15:57:49 +0000 Subject: [PATCH 08/21] chore: update pre-commit hooks (#3204) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * chore: update pre-commit hooks updates: - [github.com/astral-sh/ruff-pre-commit: v0.5.5 → v0.6.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.5.5...v0.6.4) - [github.com/python-jsonschema/check-jsonschema: 0.29.1 → 0.29.2](https://github.com/python-jsonschema/check-jsonschema/compare/0.29.1...0.29.2) - [github.com/pre-commit/mirrors-mypy: v1.11.0 → v1.11.2](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.0...v1.11.2) - [github.com/abravalheri/validate-pyproject: v0.18 → v0.19](https://github.com/abravalheri/validate-pyproject/compare/v0.18...v0.19) * chore: drop some unneeded config --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski Co-authored-by: Henry Schreiner --- .pre-commit-config.yaml | 8 ++++---- pyproject.toml | 7 +------ 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index acd55d8d29..d8dac77444 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: additional_dependencies: [pyyaml] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.5 + rev: v0.6.4 hooks: - id: ruff args: ["--fix", "--show-fixes"] @@ -62,13 +62,13 @@ repos: files: ^tests/ - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.1 + rev: 0.29.2 hooks: - id: check-github-workflows args: ["--verbose"] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.0 + rev: v1.11.2 hooks: - id: mypy files: src @@ -76,6 +76,6 @@ repos: - numpy>=1.24 - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.18 + rev: v0.19 hooks: - id: validate-pyproject diff --git a/pyproject.toml b/pyproject.toml index 678c07a279..20e701f5de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -136,7 +136,7 @@ filterwarnings = [ ] log_cli_level = "info" -[tool.pylint.master] +[tool.pylint] py-version = "3.8" jobs = "0" ignore-paths = [ @@ -251,7 +251,6 @@ ignore_errors = true ignore_missing_imports = true [tool.ruff] -src = ["src"] extend-exclude = [ "studies", "pybind11", @@ -289,10 +288,6 @@ ignore = [ "PLC1901", # x == "" can be simplified to not x (empty string is falsey) "ISC001", # Conflicts with the formatter in 0.1.2 ] -unfixable = [ - "T20", # Removes print statements - "F841", # Removes unused variables -] typing-modules = ["awkward._typing"] external = [] mccabe.max-complexity = 100 From 7a825bfc2e8a05010d2cfd79a52b4ac25347de4e Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 12 Sep 2024 18:53:49 +0200 Subject: [PATCH 09/21] fix: GPU complex reducer prod for empty lists (#3235) * fix: make sure that both CPU and GPU produce identical results * fix: one more --- .../cuda/cuda_kernels/awkward_reduce_prod_complex.cu | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu index 9a0c66846f..2e84a0cf9c 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_prod_complex.cu @@ -30,8 +30,8 @@ awkward_reduce_prod_complex_a( int64_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; if (thread_id < outlength) { - toptr[thread_id * 2] = (T)1.0f; - toptr[thread_id * 2 + 1] = (T)0.0f; + toptr[thread_id * 2] = (T)1; + toptr[thread_id * 2 + 1] = (T)0; } } } @@ -59,8 +59,8 @@ awkward_reduce_prod_complex_b( if (thread_id < lenparents) { for (int64_t stride = 1; stride < blockDim.x; stride *= 2) { - T real = (T)1.0f; - T imag = (T)0.0f; + T real = (T)1; + T imag = (T)0; if (idx >= stride && thread_id < lenparents && parents[thread_id] == parents[thread_id - stride]) { real = temp[(idx - stride) * 2]; imag = temp[(idx - stride) * 2 + 1]; From af0cceab9988314d01ffd9719027c6f988e1d43d Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 12 Sep 2024 14:32:57 -0400 Subject: [PATCH 10/21] ci: add 3.13 wheels (#3217) * ci: add 3.13 wheels Signed-off-by: Henry Schreiner * style: pre-commit fixes * Update requirements-test-full.txt * Update cibuildwheel.toml * ci: build pyo3 in forward compat mode * Update test.yml --------- Signed-off-by: Henry Schreiner Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski --- .github/workflows/build-wheels.yml | 6 +++--- .github/workflows/packaging-test.yml | 4 ++-- .github/workflows/test.yml | 6 ++++++ awkward-cpp/pyproject.toml | 4 ++-- cibuildwheel.toml | 4 ++-- pyproject.toml | 1 + requirements-test-full.txt | 10 +++++----- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/.github/workflows/build-wheels.yml b/.github/workflows/build-wheels.yml index d6f992588e..361d292285 100644 --- a/.github/workflows/build-wheels.yml +++ b/.github/workflows/build-wheels.yml @@ -105,7 +105,7 @@ jobs: - name: Prepare build files run: pipx run nox -s prepare - - uses: pypa/cibuildwheel@v2.19 + - uses: pypa/cibuildwheel@v2.20 env: CIBW_BUILD: "${{ matrix.build }}*" CIBW_ARCHS: ${{ matrix.arch }} @@ -136,7 +136,7 @@ jobs: SOURCE_DATE_EPOCH: ${{ needs.determine-source-date-epoch.outputs.source-date-epoch }} strategy: matrix: - python: [38, 39, 310, 311, 312] + python: [38, 39, 310, 311, 312, 313] arch: [aarch64] steps: @@ -157,7 +157,7 @@ jobs: - uses: docker/setup-qemu-action@v3.2.0 - - uses: pypa/cibuildwheel@v2.19 + - uses: pypa/cibuildwheel@v2.20 env: CIBW_BUILD: cp${{ matrix.python }}-* CIBW_ARCHS: ${{ matrix.arch }} diff --git a/.github/workflows/packaging-test.yml b/.github/workflows/packaging-test.yml index 8317cc58a2..20ae0fe714 100644 --- a/.github/workflows/packaging-test.yml +++ b/.github/workflows/packaging-test.yml @@ -68,7 +68,7 @@ jobs: - name: Prepare build files run: pipx run nox -s prepare - - uses: pypa/cibuildwheel@v2.19 + - uses: pypa/cibuildwheel@v2.20 env: CIBW_ARCHS_MACOS: universal2 CIBW_BUILD: cp39-win_amd64 cp310-manylinux_x86_64 cp38-macosx_universal2 @@ -76,7 +76,7 @@ jobs: config-file: cibuildwheel.toml package-dir: awkward-cpp - - uses: pypa/cibuildwheel@v2.19 + - uses: pypa/cibuildwheel@v2.20 if: matrix.os == 'ubuntu-latest' env: CIBW_BUILD: cp312-manylinux_x86_64 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5ef43543af..2c58f66746 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,6 +39,7 @@ jobs: - ubuntu-latest - macos-13 python-version: + - '3.13' - '3.12' - '3.11' - '3.10' @@ -110,6 +111,11 @@ jobs: files: | awkward-cpp/dist/*.whl + - name: Add workaround for 3.13 + cramjam + if: matrix.python-version == '3.13' + run: echo 'PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1' >> $GITHUB_ENV + shell: bash + - name: Install awkward, awkward-cpp, and dependencies run: >- python -m pip install -v . ${{ steps.find-wheel.outputs.paths }} pytest-github-actions-annotate-failures diff --git a/awkward-cpp/pyproject.toml b/awkward-cpp/pyproject.toml index 9d64da31ab..9c5637b5d3 100644 --- a/awkward-cpp/pyproject.toml +++ b/awkward-cpp/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "scikit-build-core>=0.9", + "scikit-build-core>=0.10", "pybind11", ] build-backend = "scikit_build_core.build" @@ -59,7 +59,7 @@ Releases = "https://github.com/scikit-hep/awkward-1.0/releases" [tool.scikit-build] -minimum-version = "0.9" +minimum-version = "build-system.requires" build-dir = "build/{cache_tag}" sdist.reproducible = true sdist.include = [ diff --git a/cibuildwheel.toml b/cibuildwheel.toml index 96f94455fe..91827c69a0 100644 --- a/cibuildwheel.toml +++ b/cibuildwheel.toml @@ -23,5 +23,5 @@ build-verbosity = 1 PIP_ONLY_BINARY = "cmake,numpy" [[tool.cibuildwheel.overrides]] -select = "cp312-*" -environment.PIP_PRE = "1" +select = "cp313*" +environment.PYO3_USE_ABI3_FORWARD_COMPATIBILITY = "1" diff --git a/pyproject.toml b/pyproject.toml index 20e701f5de..b985ae05cd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,6 +32,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Mathematics", diff --git a/requirements-test-full.txt b/requirements-test-full.txt index 51e47e53f2..ded83569b7 100644 --- a/requirements-test-full.txt +++ b/requirements-test-full.txt @@ -1,9 +1,9 @@ fsspec>=2022.11.0;sys_platform != "win32" -jax[cpu]>=0.2.15;sys_platform != "win32" and python_version < "3.12" -numba>=0.50.0;sys_platform != "win32" and python_version < "3.12" -numexpr>=2.7; python_version < "3.12" -pandas>=0.24.0;sys_platform != "win32" and python_version < "3.12" -pyarrow==16.0.0;sys_platform != "win32" and python_version < "3.12" +jax[cpu]>=0.2.15;sys_platform != "win32" and python_version < "3.13" +numba>=0.50.0;sys_platform != "win32" and python_version < "3.13" +numexpr>=2.7; python_version < "3.13" +pandas>=0.24.0;sys_platform != "win32" and python_version < "3.13" +pyarrow==16.0.0;sys_platform != "win32" and python_version < "3.13" pytest>=6 pytest-cov pytest-xdist From 181fa9844d85b73fa24931a44368173c86bf9e0a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 14:43:07 -0500 Subject: [PATCH 11/21] chore(deps): bump the actions group across 1 directory with 3 updates (#3233) Bumps the actions group with 3 updates in the / directory: [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel), [actions/attest-build-provenance](https://github.com/actions/attest-build-provenance) and [pypa/gh-action-pypi-publish](https://github.com/pypa/gh-action-pypi-publish). Updates `pypa/cibuildwheel` from 2.19 to 2.20 - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.19...v2.20) Updates `actions/attest-build-provenance` from 1.3.3 to 1.4.3 - [Release notes](https://github.com/actions/attest-build-provenance/releases) - [Changelog](https://github.com/actions/attest-build-provenance/blob/main/RELEASE.md) - [Commits](https://github.com/actions/attest-build-provenance/compare/5e9cb68e95676991667494a6a4e59b8a2f13e1d0...1c608d11d69870c2092266b3f9a6f3abbf17002c) Updates `pypa/gh-action-pypi-publish` from 1.9.0 to 1.10.1 - [Release notes](https://github.com/pypa/gh-action-pypi-publish/releases) - [Commits](https://github.com/pypa/gh-action-pypi-publish/compare/v1.9.0...v1.10.1) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: actions/attest-build-provenance dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions - dependency-name: pypa/gh-action-pypi-publish dependency-type: direct:production update-type: version-update:semver-minor dependency-group: actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski --- .github/workflows/deploy-cpp.yml | 4 ++-- .github/workflows/deploy.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/deploy-cpp.yml b/.github/workflows/deploy-cpp.yml index cfa540fbc2..67fa3624bf 100644 --- a/.github/workflows/deploy-cpp.yml +++ b/.github/workflows/deploy-cpp.yml @@ -35,8 +35,8 @@ jobs: run: ls -l dist/ - name: Generate artifact attestation for sdist and wheel - uses: actions/attest-build-provenance@5e9cb68e95676991667494a6a4e59b8a2f13e1d0 # v1.3.3 + uses: actions/attest-build-provenance@1c608d11d69870c2092266b3f9a6f3abbf17002c # v1.4.3 with: subject-path: "dist/awkward*cpp-*" - - uses: pypa/gh-action-pypi-publish@v1.9.0 + - uses: pypa/gh-action-pypi-publish@v1.10.1 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 4e6c2aa746..e62bca6c2b 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -88,7 +88,7 @@ jobs: run: pipx run twine check dist/* - name: Generate artifact attestation for sdist and wheel - uses: actions/attest-build-provenance@5e9cb68e95676991667494a6a4e59b8a2f13e1d0 # v1.3.3 + uses: actions/attest-build-provenance@1c608d11d69870c2092266b3f9a6f3abbf17002c # v1.4.3 with: subject-path: "dist/awkward-*" @@ -135,7 +135,7 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: gh attestation verify dist/awkward-*.whl --repo ${{ github.repository }} - - uses: pypa/gh-action-pypi-publish@v1.9.0 + - uses: pypa/gh-action-pypi-publish@v1.10.1 publish-headers: name: "Publish header-only libraries alongside release" From af113fd6da98290c2d95dec982cbfed30d66c930 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 12 Sep 2024 14:52:33 -0500 Subject: [PATCH 12/21] The next awkward-cpp release is 38. --- awkward-cpp/pyproject.toml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/awkward-cpp/pyproject.toml b/awkward-cpp/pyproject.toml index 9c5637b5d3..43411b9299 100644 --- a/awkward-cpp/pyproject.toml +++ b/awkward-cpp/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "scikit_build_core.build" [project] name = "awkward_cpp" -version = "37" +version = "38" dependencies = [ "numpy>=1.18.0", "importlib_resources;python_version < \"3.9\"" diff --git a/pyproject.toml b/pyproject.toml index b985ae05cd..c90871835e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ classifiers = [ "Topic :: Utilities", ] dependencies = [ - "awkward_cpp==37", + "awkward_cpp==38", "importlib_metadata>=4.13.0;python_version < \"3.12\"", "numpy>=1.18.0", "packaging", From dea122a71a2be7cd1b534814c0284b64993a4d10 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Thu, 12 Sep 2024 15:59:19 -0500 Subject: [PATCH 13/21] The next release will be 2.6.8. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c90871835e..b2876972bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ build-backend = "hatchling.build" [project] name = "awkward" -version = "2.6.7" +version = "2.6.8" description = "Manipulate JSON-like data with NumPy-like idioms." license = { text = "BSD-3-Clause" } requires-python = ">=3.8" From 8619299f32d7abcc2cd13aeb61421f39a3999e34 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:03:27 -0500 Subject: [PATCH 14/21] docs: add ariostas as a contributor for code (#3240) * docs: update README.md [skip ci] * docs: update .all-contributorsrc [skip ci] --------- Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> --- .all-contributorsrc | 9 +++++++++ README.md | 1 + 2 files changed, 10 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index 68800f50f0..b629839afd 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -465,6 +465,15 @@ "contributions": [ "code" ] + }, + { + "login": "ariostas", + "name": "Andres Rios Tascon", + "avatar_url": "https://avatars.githubusercontent.com/u/7596837?v=4", + "profile": "http://www.ariostas.com", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index dd5d38399f..53faad33df 100644 --- a/README.md +++ b/README.md @@ -228,6 +228,7 @@ Thanks especially to the gracious help of Awkward Array contributors (including Peter Fackeldey
Peter Fackeldey

💻 + Andres Rios Tascon
Andres Rios Tascon

💻 From 14201213315d2c15465e999471ff4c3b4e5af59a Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 16:04:07 -0500 Subject: [PATCH 15/21] docs: add maxymnaumchyk as a contributor for code (#3241) * docs: update README.md [skip ci] * docs: update .all-contributorsrc [skip ci] --------- Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> --- .all-contributorsrc | 9 +++++++++ README.md | 1 + 2 files changed, 10 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index b629839afd..ccaf3440b8 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -474,6 +474,15 @@ "contributions": [ "code" ] + }, + { + "login": "maxymnaumchyk", + "name": "maxymnaumchyk", + "avatar_url": "https://avatars.githubusercontent.com/u/70752300?v=4", + "profile": "https://github.com/maxymnaumchyk", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index 53faad33df..b71f4e7e9b 100644 --- a/README.md +++ b/README.md @@ -229,6 +229,7 @@ Thanks especially to the gracious help of Awkward Array contributors (including Peter Fackeldey
Peter Fackeldey

💻 Andres Rios Tascon
Andres Rios Tascon

💻 + maxymnaumchyk
maxymnaumchyk

💻 From 1c368f16f27e5f18e9c1d55ff0993e40a67dbd88 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 13 Sep 2024 14:00:01 -0400 Subject: [PATCH 16/21] feat: Add to_cudf (#3051) * Start * style: pre-commit fixes * results of chatting * style: pre-commit fixes * Add toplevel func * fix * fix in indexedoptionarray * Add tests and fixes * style: pre-commit fixes * use direct np module (for now) * style: pre-commit fixes * Don't accidentally iterate cupy with CPU * style: pre-commit fixes * Update src/awkward/_errors.py * add docstring * Add string * style: pre-commit fixes * Simpler for numerics This works also for time types, without going via cupy --------- Co-authored-by: Martin Durant Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- awkward-cpp/rapidjson | 2 +- src/awkward/contents/bitmaskedarray.py | 19 ++++++++ src/awkward/contents/bytemaskedarray.py | 13 +++++ src/awkward/contents/content.py | 4 ++ src/awkward/contents/emptyarray.py | 9 ++++ src/awkward/contents/indexedarray.py | 10 ++++ src/awkward/contents/indexedoptionarray.py | 3 ++ src/awkward/contents/listarray.py | 3 ++ src/awkward/contents/listoffsetarray.py | 34 +++++++++++++ src/awkward/contents/numpyarray.py | 15 ++++++ src/awkward/contents/recordarray.py | 17 +++++++ src/awkward/contents/unmaskedarray.py | 3 ++ src/awkward/operations/__init__.py | 1 + src/awkward/operations/ak_to_cudf.py | 21 ++++++++ tests-cuda/test_3051_to_cuda.py | 57 ++++++++++++++++++++++ 15 files changed, 210 insertions(+), 1 deletion(-) create mode 100644 src/awkward/operations/ak_to_cudf.py create mode 100644 tests-cuda/test_3051_to_cuda.py diff --git a/awkward-cpp/rapidjson b/awkward-cpp/rapidjson index 3b2441b87f..f54b0e47a0 160000 --- a/awkward-cpp/rapidjson +++ b/awkward-cpp/rapidjson @@ -1 +1 @@ -Subproject commit 3b2441b87f99ab65f37b141a7b548ebadb607b96 +Subproject commit f54b0e47a08782a6131cc3d60f94d038fa6e0a51 diff --git a/src/awkward/contents/bitmaskedarray.py b/src/awkward/contents/bitmaskedarray.py index 0e12133d6e..9c70bfc4b5 100644 --- a/src/awkward/contents/bitmaskedarray.py +++ b/src/awkward/contents/bitmaskedarray.py @@ -11,6 +11,7 @@ from awkward._backends.backend import Backend from awkward._meta.bitmaskedmeta import BitMaskedMeta from awkward._nplikes.array_like import ArrayLike +from awkward._nplikes.cupy import Cupy from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata from awkward._nplikes.placeholder import PlaceholderArray @@ -687,6 +688,24 @@ def _to_arrow( pyarrow, mask_node, validbytes, length, options ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + cp = Cupy.instance()._module + + assert mask is None # this class has its own mask + if not self.lsb_order: + m = cp.flip( + cp.packbits(cp.flip(cp.unpackbits(cp.asarray(self._mask.data)))) + ) + else: + m = self._mask.data + + if m.nbytes % 64: + m = cp.resize(m, ((m.nbytes // 64) + 1) * 64) + m = cudf.core.buffer.as_buffer(m) + inner = self._content._to_cudf(cudf, mask=None, length=length) + inner.set_base_mask(m) + return inner + def _to_backend_array(self, allow_missing, backend): return self.to_ByteMaskedArray()._to_backend_array(allow_missing, backend) diff --git a/src/awkward/contents/bytemaskedarray.py b/src/awkward/contents/bytemaskedarray.py index 65ad948a16..87beb5f59f 100644 --- a/src/awkward/contents/bytemaskedarray.py +++ b/src/awkward/contents/bytemaskedarray.py @@ -12,6 +12,7 @@ from awkward._layout import maybe_posaxis from awkward._meta.bytemaskedmeta import ByteMaskedMeta from awkward._nplikes.array_like import ArrayLike +from awkward._nplikes.cupy import Cupy from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata from awkward._nplikes.placeholder import PlaceholderArray @@ -1051,6 +1052,18 @@ def _to_arrow( options, ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + cp = Cupy.instance()._module + + assert mask is None # this class has its own mask + m = cp.packbits(cp.asarray(self._mask), bitorder="little") + if m.nbytes % 64: + m = cp.resize(m, ((m.nbytes // 64) + 1) * 64) + m = cudf.core.buffer.as_buffer(m) + inner = self._content._to_cudf(cudf, mask=None, length=length) + inner.set_base_mask(m) + return inner + def _to_backend_array(self, allow_missing, backend): return self.to_IndexedOptionArray64()._to_backend_array(allow_missing, backend) diff --git a/src/awkward/contents/content.py b/src/awkward/contents/content.py index 1a0fe080a9..d0169ee2eb 100644 --- a/src/awkward/contents/content.py +++ b/src/awkward/contents/content.py @@ -1010,6 +1010,10 @@ def _to_arrow( ): raise NotImplementedError + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + # prototype abstract signature + raise NotImplementedError + def to_backend_array( self, allow_missing: bool = True, *, backend: Backend | str | None = None ): diff --git a/src/awkward/contents/emptyarray.py b/src/awkward/contents/emptyarray.py index 112effddf0..06447f2d8b 100644 --- a/src/awkward/contents/emptyarray.py +++ b/src/awkward/contents/emptyarray.py @@ -387,6 +387,15 @@ def _to_arrow( ) return next._to_arrow(pyarrow, mask_node, validbytes, length, options) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + dtype = np.dtype("float64") + next = ak.contents.NumpyArray( + numpy.empty(length, dtype=dtype), + parameters=self._parameters, + backend=self._backend, + ) + return next._to_cudf(cudf, None, 0) + @classmethod def _arrow_needs_option_type(cls): return True # This overrides Content._arrow_needs_option_type diff --git a/src/awkward/contents/indexedarray.py b/src/awkward/contents/indexedarray.py index 6fb4ea3c69..6421f51742 100644 --- a/src/awkward/contents/indexedarray.py +++ b/src/awkward/contents/indexedarray.py @@ -1049,6 +1049,16 @@ def _to_arrow( ) return next2._to_arrow(pyarrow, mask_node, validbytes, length, options) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + if self._content.length == 0: + # IndexedOptionArray._to_arrow replaces -1 in the index with 0. So behind + # every masked value is self._content[0], unless self._content.length == 0. + # In that case, don't call self._content[index]; it's empty anyway. + next = self._content + else: + next = self._content._carry(self._index, False) + return next._to_cudf(cudf, None, len(next)) + def _to_backend_array(self, allow_missing, backend): return self.project()._to_backend_array(allow_missing, backend) diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 0e68461dc5..2162fb72c4 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -1576,6 +1576,9 @@ def _to_arrow( options, ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + return self.to_ByteMaskedArray(True)._to_cudf(cudf, mask, length) + def _to_backend_array(self, allow_missing, backend): nplike = backend.nplike index_nplike = backend.index_nplike diff --git a/src/awkward/contents/listarray.py b/src/awkward/contents/listarray.py index 722b9044dd..a05eeaea55 100644 --- a/src/awkward/contents/listarray.py +++ b/src/awkward/contents/listarray.py @@ -1498,6 +1498,9 @@ def _to_arrow( pyarrow, mask_node, validbytes, length, options ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + return self.to_ListOffsetArray64(False)._to_cudf(cudf, mask, length) + def _to_backend_array(self, allow_missing, backend): array_param = self.parameter("__array__") if array_param in {"bytestring", "string"}: diff --git a/src/awkward/contents/listoffsetarray.py b/src/awkward/contents/listoffsetarray.py index 4aa149b69d..003467c24b 100644 --- a/src/awkward/contents/listoffsetarray.py +++ b/src/awkward/contents/listoffsetarray.py @@ -10,6 +10,7 @@ from awkward._layout import maybe_posaxis from awkward._meta.listoffsetmeta import ListOffsetMeta from awkward._nplikes.array_like import ArrayLike +from awkward._nplikes.cupy import Cupy from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata from awkward._nplikes.placeholder import PlaceholderArray @@ -1999,6 +2000,39 @@ def _to_arrow( ), ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + cupy = Cupy.instance() + index = self._offsets.raw(cupy).astype("int32") + buf = cudf.core.buffer.as_buffer(index) + ind_buf = cudf.core.column.numerical.NumericalColumn( + buf, index.dtype, None, size=len(index) + ) + cont = self._content._to_cudf(cudf, None, len(self._content)) + if mask is not None: + m = np._module.packbits(mask, bitorder="little") + if m.nbytes % 64: + m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64) + m = cudf.core.buffer.as_buffer(cupy.asarray(m)) + else: + m = None + if self.parameters.get("__array__") == "string": + from cudf.core.column.string import StringColumn + + data = cudf.core.buffer.as_buffer(cupy.asarray(self._content.data)) + # docs for StringColumn says there should be two children instead of a data= + return StringColumn( + data=data, + children=(ind_buf,), + mask=m, + ) + + return cudf.core.column.lists.ListColumn( + length, + mask=m, + children=(ind_buf, cont), + dtype=cudf.core.dtypes.ListDtype(cont.dtype), + ) + def _to_backend_array(self, allow_missing, backend): array_param = self.parameter("__array__") if array_param == "string": diff --git a/src/awkward/contents/numpyarray.py b/src/awkward/contents/numpyarray.py index 11a73bb124..315d9383b7 100644 --- a/src/awkward/contents/numpyarray.py +++ b/src/awkward/contents/numpyarray.py @@ -14,6 +14,7 @@ from awkward._meta.numpymeta import NumpyMeta from awkward._nplikes import to_nplike from awkward._nplikes.array_like import ArrayLike +from awkward._nplikes.cupy import Cupy from awkward._nplikes.jax import Jax from awkward._nplikes.numpy import Numpy from awkward._nplikes.numpy_like import IndexType, NumpyMetadata @@ -1220,6 +1221,20 @@ def _to_arrow( ), ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + cupy = Cupy.instance() + from cudf.core.column.column import as_column + + assert self._backend.nplike.known_data + data = as_column(self._data) + if mask is not None: + m = cupy.packbits(cupy.asarray(mask), bitorder="little") + if m.nbytes % 64: + m = cupy.resize(m, ((m.nbytes // 64) + 1) * 64) + m = cudf.core.buffer.as_buffer(m) + data.set_base_data(m) + return data + def _to_backend_array(self, allow_missing, backend): return to_nplike(self.data, backend.nplike, from_nplike=self._backend.nplike) diff --git a/src/awkward/contents/recordarray.py b/src/awkward/contents/recordarray.py index c091d45365..4aafcfd6b2 100644 --- a/src/awkward/contents/recordarray.py +++ b/src/awkward/contents/recordarray.py @@ -1101,6 +1101,23 @@ def _to_arrow( children=values, ) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + children = tuple( + c._to_cudf(cudf, mask=None, length=length) for c in self.contents + ) + dt = cudf.core.dtypes.StructDtype( + {field: c.dtype for field, c in zip(self.fields, children)} + ) + m = mask._to_cudf(cudf, None, length) if mask else None + return cudf.core.column.struct.StructColumn( + data=None, + children=children, + dtype=dt, + mask=m, + size=length, + offset=0, + ) + def _to_backend_array(self, allow_missing, backend): if self.fields is None: return backend.nplike.empty(self.length, dtype=[]) diff --git a/src/awkward/contents/unmaskedarray.py b/src/awkward/contents/unmaskedarray.py index cbc726b310..0dd500ebc1 100644 --- a/src/awkward/contents/unmaskedarray.py +++ b/src/awkward/contents/unmaskedarray.py @@ -498,6 +498,9 @@ def _to_arrow( ): return self._content._to_arrow(pyarrow, self, None, length, options) + def _to_cudf(self, cudf: Any, mask: Content | None, length: int): + return self._content._to_cudf(cudf, mask, length) + def _to_backend_array(self, allow_missing, backend): content = self.content._to_backend_array(allow_missing, backend) if allow_missing: diff --git a/src/awkward/operations/__init__.py b/src/awkward/operations/__init__.py index 6d4a84c565..e9b1a3818b 100644 --- a/src/awkward/operations/__init__.py +++ b/src/awkward/operations/__init__.py @@ -86,6 +86,7 @@ from awkward.operations.ak_to_arrow_table import * from awkward.operations.ak_to_backend import * from awkward.operations.ak_to_buffers import * +from awkward.operations.ak_to_cudf import * from awkward.operations.ak_to_cupy import * from awkward.operations.ak_to_dataframe import * from awkward.operations.ak_to_feather import * diff --git a/src/awkward/operations/ak_to_cudf.py b/src/awkward/operations/ak_to_cudf.py new file mode 100644 index 0000000000..e45fe041a2 --- /dev/null +++ b/src/awkward/operations/ak_to_cudf.py @@ -0,0 +1,21 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward/blob/main/LICENSE +from __future__ import annotations + +import awkward as ak +from awkward._dispatch import high_level_function + +__all__ = ("to_cudf",) + + +@high_level_function() +def to_cudf( + array: ak.Array, +): + """Create a cuDF.Series out of the given ak array + + Buffers that are not already in GPU memory will be transferred, and some + structural reformatting may happen to account for differences in architecture. + """ + import cudf + + return cudf.Series(array.layout._to_cudf(cudf, None, len(array))) diff --git a/tests-cuda/test_3051_to_cuda.py b/tests-cuda/test_3051_to_cuda.py new file mode 100644 index 0000000000..af02ed798f --- /dev/null +++ b/tests-cuda/test_3051_to_cuda.py @@ -0,0 +1,57 @@ +from __future__ import annotations + +import pytest + +import awkward as ak + +cudf = pytest.importorskip("cudf") +cupy = pytest.importorskip("cupy") + + +def test_jagged(): + arr = ak.Array([[[1, 2, 3], [], [3, 4]], []]) + out = ak.to_cudf(arr) + assert isinstance(out, cudf.Series) + assert out.to_arrow().tolist() == [[[1, 2, 3], [], [3, 4]], []] + + +def test_nested(): + arr = ak.Array( + [{"a": 0, "b": 1.0, "c": {"d": 0}}, {"a": 1, "b": 0.0, "c": {"d": 1}}] + ) + out = ak.to_cudf(arr) + assert isinstance(out, cudf.Series) + assert out.to_arrow().tolist() == [ + {"a": 0, "b": 1.0, "c": {"d": 0}}, + {"a": 1, "b": 0.0, "c": {"d": 1}}, + ] + + +def test_null(): + arr = ak.Array([12, None, 21, 12]) + # calls ByteMaskedArray._to_cudf not NumpyArray + out = ak.to_cudf(arr) + assert isinstance(out, cudf.Series) + assert out.to_arrow().tolist() == [12, None, 21, 12] + + # True is valid, LSB order + arr2 = ak.Array(arr.layout.to_BitMaskedArray(True, True)) + out = ak.to_cudf(arr2) + assert isinstance(out, cudf.Series) + assert out.to_arrow().tolist() == [12, None, 21, 12] + + # reversed LSB (should be rare, involves extra work!) + arr3 = ak.Array(arr.layout.to_BitMaskedArray(True, False)) + out = ak.to_cudf(arr3) + assert isinstance(out, cudf.Series) + assert out.to_arrow().tolist() == [12, None, 21, 12] + + +def test_strings(): + arr = ak.Array(["hey", "hi", "hum"]) + out = ak.to_cudf(arr) + assert out.to_arrow().tolist() == ["hey", "hi", "hum"] + + arr = ak.Array(["hey", "hi", None, "hum"]) + out = ak.to_cudf(arr) + assert out.to_arrow().tolist() == ["hey", "hi", None, "hum"] From bb9d11113ed0c260b8d8f28c94da879b42b5a2b6 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 18 Sep 2024 09:37:57 -0500 Subject: [PATCH 17/21] docs: update funding list on awkward-array.org and GitHub README --- README.md | 2 +- docs/_templates/funding.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b71f4e7e9b..65d916050d 100644 --- a/README.md +++ b/README.md @@ -152,7 +152,7 @@ year = {2018} # Acknowledgements -Support for this work was provided by NSF cooperative agreements [OAC-1836650](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1836650) and [PHY-2323298](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2323298) (IRIS-HEP), grant [OAC-1450377](https://nsf.gov/awardsearch/showAward?AWD_ID=1450377) (DIANA/HEP), [PHY-2121686](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2121686) (US-CMS LHC Ops), and [OAC-2103945](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2103945) (Awkward Array). +Support for this work was provided by NSF cooperative agreement [OAC-1836650](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1836650) (IRIS-HEP 1), [PHY-2323298](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2323298) (IRIS-HEP 2), grant [OAC-1450377](https://nsf.gov/awardsearch/showAward?AWD_ID=1450377) (DIANA/HEP), [PHY-1520942](https://www.nsf.gov/awardsearch/showAward?AWD_ID=1520942) and [PHY-2121686](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2121686) (US-CMS LHC Ops), and [OAC-2103945](https://www.nsf.gov/awardsearch/showAward?AWD_ID=2103945) (Awkward Array). We also thank [Erez Shinan](https://github.com/erezsh) and the developers of the [Lark standalone parser](https://github.com/lark-parser/lark), which is used to parse type strings as type objects. diff --git a/docs/_templates/funding.html b/docs/_templates/funding.html index 8fb3a94738..09bb9ad107 100644 --- a/docs/_templates/funding.html +++ b/docs/_templates/funding.html @@ -1 +1 @@ -

Support for this work was provided by NSF cooperative agreement OAC-1836650 (IRIS-HEP), grant OAC-1450377 (DIANA/HEP), PHY-1520942 (US-CMS LHC Ops), and OAC-2103945 (Awkward Array).

+

Support for this work was provided by NSF cooperative agreement OAC-1836650 (IRIS-HEP 1), PHY-2323298 (IRIS-HEP 2), grant OAC-1450377 (DIANA/HEP), PHY-1520942 and PHY-2121686 (US-CMS LHC Ops), and OAC-2103945 (Awkward Array).

From 1ab633670e43cf2b12db31946ab2a2958d925cd1 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Thu, 19 Sep 2024 10:47:39 -0400 Subject: [PATCH 18/21] chore: update RapidJSON (#3249) Co-authored-by: Jim Pivarski --- awkward-cpp/rapidjson | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/awkward-cpp/rapidjson b/awkward-cpp/rapidjson index f54b0e47a0..3b2441b87f 160000 --- a/awkward-cpp/rapidjson +++ b/awkward-cpp/rapidjson @@ -1 +1 @@ -Subproject commit f54b0e47a08782a6131cc3d60f94d038fa6e0a51 +Subproject commit 3b2441b87f99ab65f37b141a7b548ebadb607b96 From 37120327790f916088ba3fb2ea307bc98e845863 Mon Sep 17 00:00:00 2001 From: Manasvi Goyal <55101825+ManasviGoyal@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:52:36 -0400 Subject: [PATCH 19/21] chore: update affiliation in CITATION.cff (#3251) --- CITATION.cff | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CITATION.cff b/CITATION.cff index 110ddc1c16..326f2ed02a 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -51,6 +51,6 @@ authors: email: "nick.smith@cern.ch" - family-names: "Goyal" given-names: "Manasvi" - affiliation: "Delhi Technological University" + affiliation: "Harvard University" orcid: "https://orcid.org/0000-0001-6321-7491" email: "mg.manasvi@gmail.com" From eaa43ff31d37c525e33a8226693ed273909568d5 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Tue, 24 Sep 2024 16:19:16 -0500 Subject: [PATCH 20/21] perf: avoid inflating UnmaskedArrays in broadcasting when you can (#3254) --- src/awkward/_broadcasting.py | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index 7c69212dc2..de5699f73a 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -701,6 +701,36 @@ def broadcast_any_list(): for x, p in zip(outcontent, parameters) ) + def broadcast_any_option_all_UnmaskedArray(): + nextinputs = [] + nextparameters = [] + for x in inputs: + if isinstance(x, UnmaskedArray): + nextinputs.append(x.content) + nextparameters.append(x._parameters) + elif isinstance(x, Content): + nextinputs.append(x) + nextparameters.append(x._parameters) + else: + nextinputs.append(x) + nextparameters.append(NO_PARAMETERS) + + outcontent = apply_step( + backend, + nextinputs, + action, + depth, + copy.copy(depth_context), + lateral_context, + options, + ) + assert isinstance(outcontent, tuple) + parameters = parameters_factory(nextparameters, len(outcontent)) + + return tuple( + UnmaskedArray(x, parameters=p) for x, p in zip(outcontent, parameters) + ) + def broadcast_any_option(): mask = None for x in contents: @@ -1045,7 +1075,9 @@ def continuation(): # Any option-types? elif any(x.is_option for x in contents): - if options["function_name"] == "ak.where": + if all(not x.is_option or isinstance(x, UnmaskedArray) for x in contents): + return broadcast_any_option_all_UnmaskedArray() + elif options["function_name"] == "ak.where": return broadcast_any_option_akwhere() else: return broadcast_any_option() From 704fb45dc32cbfd516f40151bf554e9b2815377c Mon Sep 17 00:00:00 2001 From: Ianna Osborne Date: Thu, 26 Sep 2024 10:30:50 +0200 Subject: [PATCH 21/21] fix: TypeError fix for `can_cast` (#3255) * fix type error for can_cast * cleanup --- src/awkward/_broadcasting.py | 2 +- .../cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu | 4 ++-- .../_connect/cuda/cuda_kernels/awkward_reduce_max.cu | 2 +- .../_connect/cuda/cuda_kernels/awkward_reduce_min.cu | 2 +- src/awkward/_nplikes/array_module.py | 6 ++++-- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/awkward/_broadcasting.py b/src/awkward/_broadcasting.py index de5699f73a..7eb2300372 100644 --- a/src/awkward/_broadcasting.py +++ b/src/awkward/_broadcasting.py @@ -742,7 +742,7 @@ def broadcast_any_option(): mask = backend.index_nplike.logical_or(mask, m, maybe_out=mask) nextmask = Index8(mask.view(np.int8)) - index = backend.index_nplike.full(mask.shape[0], -1, dtype=np.int64) + index = backend.index_nplike.full(mask.shape[0], np.int64(-1), dtype=np.int64) index[~mask] = backend.index_nplike.arange( backend.index_nplike.shape_item_as_index(mask.shape[0]) - backend.index_nplike.count_nonzero(mask), diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu index 7026072b04..9905358181 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_NumpyArray_subrange_equal_bool.cu @@ -4,9 +4,9 @@ // def f(grid, block, args): // (tmpptr, fromstarts, fromstops, length, toequal, invocation_index, err_code) = args // if length > 1: -// scan_in_array = cupy.full((length - 1) * (length - 2), 0, dtype=cupy.int64) +// scan_in_array = cupy.full((length - 1) * (length - 2), cupy.array(0), dtype=cupy.int64) // else: -// scan_in_array = cupy.full(0, 0, dtype=cupy.int64) +// scan_in_array = cupy.full(0, cupy.array(0), dtype=cupy.int64) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_NumpyArray_subrange_equal_bool", bool_, fromstarts.dtype, fromstops.dtype, bool_]))(grid, block, (tmpptr, fromstarts, fromstops, length, toequal, scan_in_array, invocation_index, err_code)) // toequal[0] = cupy.any(scan_in_array == True) // out["awkward_NumpyArray_subrange_equal_bool", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu index 0d1ecdebcd..878d87fc85 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_max.cu @@ -7,7 +7,7 @@ // grid_size = math.floor((lenparents + block[0] - 1) / block[0]) // else: // grid_size = 1 -// temp = cupy.full(lenparents, identity, dtype=toptr.dtype) +// temp = cupy.full(lenparents, cupy.array([identity]), dtype=toptr.dtype) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_max_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // out["awkward_reduce_max_a", {dtype_specializations}] = None diff --git a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu index e709d687f8..d27c5e1b80 100644 --- a/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu +++ b/src/awkward/_connect/cuda/cuda_kernels/awkward_reduce_min.cu @@ -7,7 +7,7 @@ // grid_size = math.floor((lenparents + block[0] - 1) / block[0]) // else: // grid_size = 1 -// temp = cupy.full(lenparents, identity, dtype=toptr.dtype) +// temp = cupy.full(lenparents, cupy.array([identity]), dtype=toptr.dtype) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_a", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // cuda_kernel_templates.get_function(fetch_specialization(["awkward_reduce_min_b", cupy.dtype(toptr.dtype).type, cupy.dtype(fromptr.dtype).type, parents.dtype]))((grid_size,), block, (toptr, fromptr, parents, lenparents, outlength, toptr.dtype.type(identity), temp, invocation_index, err_code)) // out["awkward_reduce_min_a", {dtype_specializations}] = None diff --git a/src/awkward/_nplikes/array_module.py b/src/awkward/_nplikes/array_module.py index 5217ace411..568c7fc29e 100644 --- a/src/awkward/_nplikes/array_module.py +++ b/src/awkward/_nplikes/array_module.py @@ -118,7 +118,7 @@ def full( *, dtype: DTypeLike | None = None, ) -> ArrayLikeT: - return self._module.full(shape, fill_value, dtype=dtype) + return self._module.full(shape, self._module.array(fill_value), dtype=dtype) def zeros_like( self, x: ArrayLikeT | PlaceholderArray, *, dtype: DTypeLike | None = None @@ -146,7 +146,9 @@ def full_like( if isinstance(x, PlaceholderArray): return self.full(x.shape, fill_value, dtype=dtype or x.dtype) else: - return self._module.full_like(x, fill_value, dtype=dtype) + return self._module.full_like( + x, self._module.array(fill_value), dtype=dtype + ) def arange( self,