Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Ensemble Functions & Change Ensemble APIs #61

Merged
merged 28 commits into from
Sep 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions thicket/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# make flake8 unused names in this file.
# flake8: noqa: F401

from .ensemble import Ensemble
from .thicket import Thicket
from .thicket import InvalidFilter
from .thicket import EmptyMetadataTable
Expand Down
400 changes: 400 additions & 0 deletions thicket/ensemble.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions thicket/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ def _resolve_missing_indicies(th_list):
def _sync_nodes(gh, df):
"""Set the node objects to be equal in both the graph and the dataframe.

Operations: (n tree nodes) X (m df nodes) X (m)

id(graph_node) == id(df_node) after this function for nodes with equivalent hatchet
nid's.
"""
Expand Down
41 changes: 21 additions & 20 deletions thicket/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@


@pytest.fixture
def columnar_join_thicket(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
"""Generator for 'columnar_join' thicket.
def thicket_axis_columns(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
"""Generator for 'concat_thickets(axis="columns")' thicket.

Arguments:
mpi_scaling_cali (list): List of Caliper files for MPI scaling study.
rajaperf_basecuda_xl_cali (list): List of Caliper files for base cuda variant.

Returns:
list: List of original thickets, list of deepcopies of original thickets, and
columnar-joined thicket.
column-joined thicket.
"""
th_mpi_1 = Thicket.from_caliperreader(mpi_scaling_cali[0:2])
th_mpi_2 = Thicket.from_caliperreader(mpi_scaling_cali[2:4])
Expand All @@ -39,45 +39,46 @@ def columnar_join_thicket(mpi_scaling_cali, rajaperf_basecuda_xl_cali):
th_mpi_2_deep = th_mpi_2.deepcopy()
th_cuda128_deep = th_cuda128.deepcopy()

thicket_list = [th_mpi_1, th_mpi_2, th_cuda128]
thicket_list_cp = [th_mpi_1_deep, th_mpi_2_deep, th_cuda128_deep]
thickets = [th_mpi_1, th_mpi_2, th_cuda128]
thickets_cp = [th_mpi_1_deep, th_mpi_2_deep, th_cuda128_deep]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
header_list=["MPI1", "MPI2", "Cuda128"],
column_name="ProblemSize",
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
headers=["MPI1", "MPI2", "Cuda128"],
metadata_key="ProblemSize",
)

return thicket_list, thicket_list_cp, combined_th
return thickets, thickets_cp, combined_th


@pytest.fixture
def stats_columnar_join_thicket(rajaperf_basecuda_xl_cali):
"""Generator for 'columnar_join' thicket for test_stats.py.
def stats_thicket_axis_columns(rajaperf_basecuda_xl_cali):
"""Generator for 'concat_thickets(axis="columns")' thicket for test_stats.py.

Arguments:
mpi_scaling_cali (list): List of Caliper files for MPI scaling study.
rajaperf_basecuda_xl_cali (list): List of Caliper files for base cuda variant.

Returns:
list: List of original thickets, list of deepcopies of original thickets, and
columnar-joined thicket.
column-joined thicket.
"""
th_cuda128_1 = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali[0:4])
th_cuda128_2 = Thicket.from_caliperreader(rajaperf_basecuda_xl_cali[5:9])

# To check later if modifications were unexpectedly made
th_cuda128_1_deep = th_cuda128_1.deepcopy()
th_cuda128_2_deep = th_cuda128_2.deepcopy()
thicket_list = [th_cuda128_1, th_cuda128_2]
thicket_list_cp = [th_cuda128_1_deep, th_cuda128_2_deep]
thickets = [th_cuda128_1, th_cuda128_2]
thickets_cp = [th_cuda128_1_deep, th_cuda128_2_deep]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
header_list=["Cuda 1", "Cuda 2"],
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
headers=["Cuda 1", "Cuda 2"],
)

return thicket_list, thicket_list_cp, combined_th
return thickets, thickets_cp, combined_th


@pytest.fixture
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,40 +12,58 @@
from test_filter_metadata import filter_multiple_and
from test_filter_stats import check_filter_stats
from test_query import check_query
from thicket import Thicket


def test_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_concat_thickets_index(mpi_scaling_cali):
th_27 = Thicket.from_caliperreader(mpi_scaling_cali[0])
th_64 = Thicket.from_caliperreader(mpi_scaling_cali[1])

tk = Thicket.concat_thickets([th_27, th_64])

# Check dataframe shape
tk.dataframe.shape == (90, 7)

# Check that the two Thickets are equivalent
assert tk

# Check specific values. Row order can vary so use "sum" to check
node = tk.dataframe.index.get_level_values("node")[8]
assert sum(tk.dataframe.loc[node, "Min time/rank"]) == 0.000453


def test_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# Check no original objects modified
for i in range(len(thicket_list)):
assert thicket_list[i].dataframe.equals(thicket_list_cp[i].dataframe)
assert thicket_list[i].metadata.equals(thicket_list_cp[i].metadata)
for i in range(len(thickets)):
assert thickets[i].dataframe.equals(thickets_cp[i].dataframe)
assert thickets[i].metadata.equals(thickets_cp[i].metadata)

# Check dataframe shape. Should be columnar-joined
assert combined_th.dataframe.shape[0] <= sum(
[th.dataframe.shape[0] for th in thicket_list]
[th.dataframe.shape[0] for th in thickets]
) # Rows. Should be <= because some rows will exist across multiple thickets.
assert (
combined_th.dataframe.shape[1]
== sum([th.dataframe.shape[1] for th in thicket_list]) - len(thicket_list) + 1
== sum([th.dataframe.shape[1] for th in thickets]) - len(thickets) + 1
) # Columns. (-1) for each name column removed, (+1) singular name column created.

# Check metadata shape. Should be columnar-joined
assert combined_th.metadata.shape[0] == max(
[th.metadata.shape[0] for th in thicket_list]
[th.metadata.shape[0] for th in thickets]
) # Rows. Should be max because all rows should exist in all thickets.
assert combined_th.metadata.shape[1] == sum(
[th.metadata.shape[1] for th in thicket_list]
[th.metadata.shape[1] for th in thickets]
) - len(
thicket_list
thickets
) # Columns. (-1) Since we added an additional column "ProblemSize".

# Check profiles
assert len(combined_th.profile) == sum([len(th.profile) for th in thicket_list])
assert len(combined_th.profile) == sum([len(th.profile) for th in thickets])

# Check profile_mapping
assert len(combined_th.profile_mapping) == sum(
[len(th.profile_mapping) for th in thicket_list]
[len(th.profile_mapping) for th in thickets]
)

# PerfData and StatsFrame nodes should be in the same order.
Expand All @@ -55,8 +73,8 @@ def test_columnar_join(columnar_join_thicket):
).all()


def test_filter_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_filter_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# columns and corresponding values to filter by
columns_values = {
("MPI1", "mpi.world.size"): [27],
Expand All @@ -67,8 +85,8 @@ def test_filter_columnar_join(columnar_join_thicket):
filter_multiple_and(combined_th, columns_values)


def test_filter_stats_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_filter_stats_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# columns and corresponding values to filter by
columns_values = {
("test", "test_string_column"): ["less than 20"],
Expand All @@ -86,8 +104,8 @@ def test_filter_stats_columnar_join(columnar_join_thicket):
check_filter_stats(combined_th, columns_values)


def test_query_columnar_join(columnar_join_thicket):
thicket_list, thicket_list_cp, combined_th = columnar_join_thicket
def test_query_concat_thickets_columns(thicket_axis_columns):
thickets, thickets_cp, combined_th = thicket_axis_columns
# test arguments
hnids = [0, 1, 2, 3, 5, 6, 8, 9]
query = (
Expand Down
32 changes: 17 additions & 15 deletions thicket/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

from thicket import Thicket, EmptyMetadataTable
from test_columnar_join import test_columnar_join
from test_concat_thickets import test_concat_thickets_columns
from utils import check_identity


Expand Down Expand Up @@ -89,7 +89,7 @@ def test_groupby(example_cali):
check_groupby(th, columns_values)


def test_groupby_columnar_join(example_cali):
def test_groupby_concat_thickets_columns(example_cali):
"""Tests case where the Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(example_cali)
Expand All @@ -106,23 +106,24 @@ def test_groupby_columnar_join(example_cali):
th_list[2].metadata[selected_column] = problem_size
th_list[3].metadata[selected_column] = problem_size

thicket_list = [th_list[0], th_list[1], th_list[2], th_list[3]]
thicket_list_cp = [
thickets = [th_list[0], th_list[1], th_list[2], th_list[3]]
thickets_cp = [
th_list[0].deepcopy(),
th_list[1].deepcopy(),
th_list[2].deepcopy(),
th_list[3].deepcopy(),
]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
column_name=selected_column,
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
metadata_key=selected_column,
)

test_columnar_join((thicket_list, thicket_list_cp, combined_th))
test_concat_thickets_columns((thickets, thickets_cp, combined_th))


def test_groupby_columnar_join_subthickets(example_cali):
def test_groupby_concat_thickets_columns_subthickets(example_cali):
"""Tests case where some specific Sub-Thickets of a groupby are used in a columnar join"""
# example thicket
th = Thicket.from_caliperreader(example_cali)
Expand All @@ -137,15 +138,16 @@ def test_groupby_columnar_join_subthickets(example_cali):
th_list[0].metadata[selected_column] = problem_size
th_list[1].metadata[selected_column] = problem_size

thicket_list = [th_list[0], th_list[1]]
thicket_list_cp = [
thickets = [th_list[0], th_list[1]]
thickets_cp = [
th_list[0].deepcopy(),
th_list[1].deepcopy(),
]

combined_th = Thicket.columnar_join(
thicket_list=thicket_list,
column_name=selected_column,
combined_th = Thicket.concat_thickets(
thickets=thickets,
axis="columns",
metadata_key=selected_column,
)

test_columnar_join((thicket_list, thicket_list_cp, combined_th))
test_concat_thickets_columns((thickets, thickets_cp, combined_th))
5 changes: 5 additions & 0 deletions thicket/tests/test_intersection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ def test_intersection(example_cali):

intersected_th = th_ens.intersection()

intersected_th_other = th.from_caliperreader(example_cali, intersection=True)

# Check other methodology
assert len(intersected_th.graph) == len(intersected_th_other.graph)

# Check original and intersected thickets
assert len(th_ens.dataframe) == 344
assert len(intersected_th.dataframe) == 4
Expand Down
Loading
Loading