Skip to content

Commit

Permalink
Merge branch 'bugfix/fix_fails' into feature/agg_mode
Browse files Browse the repository at this point in the history
  • Loading branch information
wjsi committed Oct 3, 2023
2 parents 0c2f34b + 624fb5a commit ecdfe8c
Show file tree
Hide file tree
Showing 11 changed files with 33 additions and 27 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/benchmark-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:
git fetch upstream
git merge upstream/master
asv machine --yes
asv continuous -e -f 1.1 --strict upstream/master HEAD
asv continuous -e -f 1.1 upstream/master HEAD
if: ${{ steps.build.outcome == 'success' }}

- name: Publish benchmarks artifact
Expand Down
32 changes: 19 additions & 13 deletions mars/dataframe/merge/tests/test_merge_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,15 @@ def test_join_on(setup):
expected4.set_index("a2", inplace=True)
result4.set_index("a2", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected4, 0), sort_dataframe_inplace(result4, 0)
sort_dataframe_inplace(expected4, 0, kind="mergesort"),
sort_dataframe_inplace(result4, 0, kind="mergesort"),
)


def test_merge_one_chunk(setup):
def sort_by_col1(df):
return df.sort_values(by=df.columns[1], kind="mergesort")

df1 = pd.DataFrame(
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
index=["a1", "a2", "a3", "a4"],
Expand Down Expand Up @@ -348,8 +352,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)

# right have one chunk
Expand All @@ -361,8 +365,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)

# left have one chunk and how="left", then one chunk tile
Expand All @@ -377,8 +381,8 @@ def test_merge_one_chunk(setup):
result = jdf.execute().fetch()

pd.testing.assert_frame_equal(
expected.sort_values(by=expected.columns[1]).reset_index(drop=True),
result.sort_values(by=result.columns[1]).reset_index(drop=True),
sort_by_col1(expected).reset_index(drop=True),
sort_by_col1(result).reset_index(drop=True),
)


Expand Down Expand Up @@ -418,7 +422,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
sort_dataframe_inplace(expected, 0, kind="mergesort"),
sort_dataframe_inplace(result, 0, kind="mergesort"),
)

# test broadcast right and how="left"
Expand All @@ -438,8 +443,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
expected.sort_values(by=["key", "value_x"]),
result.sort_values(by=["key", "value_x"]),
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
result.sort_values(by=["key", "value_x"], kind="mergesort"),
)

# test broadcast left
Expand All @@ -459,7 +464,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
sort_dataframe_inplace(expected, 0), sort_dataframe_inplace(result, 0)
sort_dataframe_inplace(expected, 0, kind="mergesort"),
sort_dataframe_inplace(result, 0, kind="mergesort"),
)

# test broadcast left and how="right"
Expand All @@ -479,8 +485,8 @@ def test_broadcast_merge(setup):
expected.set_index("key", inplace=True)
result.set_index("key", inplace=True)
pd.testing.assert_frame_equal(
expected.sort_values(by=["key", "value_x"]),
result.sort_values(by=["key", "value_x"]),
expected.sort_values(by=["key", "value_x"], kind="mergesort"),
result.sort_values(by=["key", "value_x"], kind="mergesort"),
)


Expand Down
4 changes: 2 additions & 2 deletions mars/dataframe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ def hash_dtypes(dtypes, size):
return [dtypes[index] for index in hashed_indexes]


def sort_dataframe_inplace(df, *axis):
def sort_dataframe_inplace(df, *axis, **kw):
for ax in axis:
df.sort_index(axis=ax, inplace=True)
df.sort_index(axis=ax, inplace=True, **kw)
return df


Expand Down
2 changes: 1 addition & 1 deletion mars/learn/contrib/lightgbm/_predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def __call__(self):
elif hasattr(self.model, "classes_"):
dtype = np.array(self.model.classes_).dtype
else:
dtype = getattr(self.model, "out_dtype_", np.dtype("float"))
dtype = getattr(self.model, "out_dtype_", [np.dtype("float")])[0]

if self.output_types[0] == OutputType.tensor:
# tensor
Expand Down
6 changes: 3 additions & 3 deletions mars/learn/contrib/lightgbm/_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,11 +406,11 @@ def execute(cls, ctx, op: "LGBMTrain"):
op.model_type == LGBMModelType.RANKER
or op.model_type == LGBMModelType.REGRESSOR
):
model.set_params(out_dtype_=np.dtype("float"))
model.set_params(out_dtype_=[np.dtype("float")])
elif hasattr(label_val, "dtype"):
model.set_params(out_dtype_=label_val.dtype)
model.set_params(out_dtype_=[label_val.dtype])
else:
model.set_params(out_dtype_=label_val.dtypes[0])
model.set_params(out_dtype_=[label_val.dtypes[0]])

ctx[op.outputs[0].key] = pickle.dumps(model)
finally:
Expand Down
2 changes: 1 addition & 1 deletion mars/learn/linear_model/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def fit(self, X, y, sample_weight=None):
self.coef_.execute()
except LinAlgError:
# TODO: implement linalg.lstsq first
raise NotImplementedError("Does not support sigular matrix!")
raise NotImplementedError("Does not support singular matrix!")

if y.ndim == 1:
self.coef_ = mt.ravel(self.coef_)
Expand Down
4 changes: 2 additions & 2 deletions mars/learn/linear_model/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ def test_linear_regression(setup):
assert_array_almost_equal(reg.predict(X), model.predict(X))

# Regular model fitting, #samples <= 2, # features < 2
error_msg = re.escape("Does not support sigular matrix!")
error_msg = re.escape("Does not support singular matrix!")

X = [[1], [2]]
Y = [1, 2]
Expand All @@ -69,7 +69,7 @@ def test_linear_regression(setup):
assert_array_almost_equal(reg.predict(X), model.predict(X))

# Extra case #1: singular matrix, degenerate input
error_msg = re.escape("Does not support sigular matrix!")
error_msg = re.escape("Does not support singular matrix!")

X = [[1]]
Y = [0]
Expand Down
2 changes: 1 addition & 1 deletion mars/learn/metrics/pairwise/pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"precomputed": None, # HACK: precomputed is always allowed, never called
}

# These distances recquire boolean tensors, when using mars.tensor.spatial.distance
# These distances require boolean tensors, when using mars.tensor.spatial.distance
PAIRWISE_BOOLEAN_FUNCTIONS = [
"dice",
"jaccard",
Expand Down
2 changes: 1 addition & 1 deletion mars/tensor/base/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def tile(A, reps):
behavior, promote `A` to d-dimensions manually before calling this
function.
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it.
If ``A.ndim > d``, `reps` is promoted to `A`.ndim by prepending 1's to it.
Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as
(1, 1, 2, 2).
Expand Down
2 changes: 1 addition & 1 deletion mars/tensor/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def fetch_corner_data(tensor, session=None):
# the tensor must have been executed,
# thus the size could not be NaN
if tensor.size > threshold:
# two edges for each exis
# two edges for each axis
indices_iter = list(itertools.product(*(range(2) for _ in range(tensor.ndim))))
corners = np.empty(shape=(2,) * tensor.ndim, dtype=object)
shape = [0 for _ in range(tensor.ndim)]
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ install_requires =
pandas>=1.0.0,<2.0.0
scipy>=1.0.0
scikit-learn>=0.20
numexpr>=2.6.4,!=2.8.5
numexpr>=2.6.4,!=2.8.5,!=2.8.6
cloudpickle>=1.5.0
pyyaml>=5.1
psutil>=5.9.0
Expand Down

0 comments on commit ecdfe8c

Please sign in to comment.