Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix + Unify digraph and multidigraph behaviour #46

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 35 additions & 12 deletions grandcypher/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def _data_path_to_entity_name_attribute(data_path):

class _GrandCypherTransformer(Transformer):
def __init__(self, target_graph: nx.Graph, limit=None):
self._target_graph = target_graph
self._target_graph = nx.MultiDiGraph(target_graph)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a super smart change and simplifies a ton — good thinking! there's probably a ton of business logic we can strip out as a result... thinking out loud, maybe makes sense to put in a test coverage library to auto-detect those chunks...

any performance hits you can think of as a result of doing this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a test coverage library to auto-detect those chunks

that sounds like a great idea! Haven't used many test coverage libraries myself so open to suggestions :)

Also, w.r.t to performance hit I'm unsure about the impact of changing to MultiDiGraph -- at least in practice it appears to be similar. Probably a good idea to benchmark future versions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've been liking codspeed (e.g., aplbrain/grand#48) — maybe a cool thing to extend to this repo someday!

self._paths = []
self._where_condition: CONDITION = None
self._motif = nx.MultiDiGraph()
Expand Down Expand Up @@ -491,12 +491,15 @@ def _lookup(self, data_paths: List[str], offset_limit) -> Dict[str, List]:
ret_with_attr = []
for r in ret:
r_attr = {}
for i, v in r.items():
r_attr[(i, list(v.get("__labels__"))[0])] = v.get(
entity_attribute, None
)
# eg, [{(0, 'paid'): 70, (1, 'paid'): 90}, {(0, 'paid'): 400, (1, 'friend'): None, (2, 'paid'): 650}]
ret_with_attr.append(r_attr)
if isinstance(r, dict):
r = [r]
for el in r:
for i, v in el.items():
r_attr[(i, list(v.get("__labels__", [i]))[0])] = v.get(
entity_attribute, None
)
# eg, [{(0, 'paid'): 70, (1, 'paid'): 90}, {(0, 'paid'): 400, (1, 'friend'): None, (2, 'paid'): 650}]
ret_with_attr.append(r_attr)

ret = ret_with_attr

Expand Down Expand Up @@ -665,11 +668,31 @@ def _apply_order_by(self, results):
for sort_list, direction in reversed(
sort_lists
): # reverse to ensure the first sort key is primary
indices = sorted(
indices,
key=lambda i: sort_list[i],
reverse=(direction == "DESC"),
)

if all(isinstance(item, dict) for item in sort_list):
# (for edge attributes) If all items in sort_list are dictionaries
# example: ([{(0, 'paid'): 9, (1, 'paid'): 40}, {(0, 'paid'): 14}], 'DESC')

# sort within each edge first
for i, sublist in enumerate(sort_list):
sort_list[i] = sorted(
sublist.items(),
key=lambda x: x[1] or 0, # 0 if `None`
reverse=(direction == "DESC"),
)
# then sort the indices based on the sorted sublists
indices = sorted(
indices,
key=lambda i: sort_list[i][0][1] or 0, # 0 if `None`
reverse=(direction == "DESC"),
)
else:
# (for node attributes) single values
indices = sorted(
indices,
key=lambda i: sort_list[i],
reverse=(direction == "DESC"),
)

# Reorder all lists in results using sorted indices
for key in results:
Expand Down
212 changes: 149 additions & 63 deletions grandcypher/test_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,60 @@ def test_order_by_single_field_no_direction_provided(self, graph_type):
res = GrandCypher(host).run(qry)
assert res["n.name"] == ["Carol", "Alice", "Bob"]

def test_order_by_edge_attribute1(self):
host = nx.DiGraph()
host.add_node("a", name="Alice", age=25)
host.add_node("b", name="Bob", age=30)
host.add_node("c", name="Carol", age=20)
host.add_edge("b", "a", __labels__={"paid"}, value=14)
host.add_edge("a", "b", __labels__={"paid"}, value=9)
host.add_edge("a", "c", __labels__={"paid"}, value=4)

qry = """
MATCH (n)-[r]->(m)
RETURN n.name, r.value, m.name
ORDER BY r.value ASC
"""
res = GrandCypher(host).run(qry)
assert res['n.name'] == ['Alice', 'Alice', 'Bob']
assert res['m.name'] == ['Carol', 'Bob', 'Alice']
assert res['r.value'] == [[((0, 'paid'), 4)], [((0, 'paid'), 9)], [((0, 'paid'), 14)]]

qry = """
MATCH (n)-[r]->()
RETURN n.name, r.value
ORDER BY r.value DESC
"""
res = GrandCypher(host).run(qry)
assert res['n.name'] == ['Bob', 'Alice', 'Alice']
assert res['r.value'] == [[((0, 'paid'), 14)], [((0, 'paid'), 9)], [((0, 'paid'), 4)]]


def test_order_by_edge_attribute2(self):
host = nx.DiGraph()
host.add_node("a", name="Alice", age=25)
host.add_node("b", name="Bob", age=30)
host.add_node("c", name="Carol", age=20)
host.add_edge("b", "a", __labels__={"paid"}, amount=14) # different attribute name
host.add_edge("a", "b", __labels__={"paid"}, value=9)
host.add_edge("c", "b", __labels__={"paid"}, value=980)
host.add_edge("b", "c", __labels__={"paid"}, value=11)

qry = """
MATCH (n)-[r]->(m)
RETURN n.name, r.value, m.name
ORDER BY r.value ASC
"""
res = GrandCypher(host).run(qry)
assert res['n.name'] == ['Bob', 'Alice', 'Bob', 'Carol']
assert res['r.value'] == [
[((0, 'paid'), None)], # None for the different attribute edge
[((0, 'paid'), 9)], # within edges, the attributes are ordered
[((0, 'paid'), 11)],
[((0, 'paid'), 980)]
]
assert res['m.name'] == ['Alice', 'Bob', 'Carol', 'Bob']

@pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES)
def test_order_by_multiple_fields(self, graph_type):
host = graph_type()
Expand Down Expand Up @@ -1031,6 +1085,62 @@ def test_multigraph_multiple_same_edge_labels(self):
# the second "paid" edge between Bob -> Alice has no "amount" attribute, so it should be None
assert res["r.amount"] == [{(0, 'paid'): 12, (1, 'friends'): None, (2, 'paid'): 40}, {(0, 'paid'): 6, (1, 'paid'): None}]

def test_order_by_edge_attribute1(self):
host = nx.MultiDiGraph()
host.add_node("a", name="Alice", age=25)
host.add_node("b", name="Bob", age=30)
host.add_node("c", name="Carol", age=20)
host.add_edge("b", "a", __labels__={"paid"}, value=14)
host.add_edge("a", "b", __labels__={"paid"}, value=9)
host.add_edge("a", "b", __labels__={"paid"}, value=40)

qry = """
MATCH (n)-[r]->()
RETURN n.name, r.value
ORDER BY r.value ASC
"""
res = GrandCypher(host).run(qry)
assert res['n.name'] == ['Alice', 'Bob']
assert res['r.value'] == [[((0, 'paid'), 9), ((1, 'paid'), 40)], [((0, 'paid'), 14)]]

qry = """
MATCH (n)-[r]->()
RETURN n.name, r.value
ORDER BY r.value DESC
"""
res = GrandCypher(host).run(qry)
assert res['n.name'] == ['Alice', 'Bob']
assert res['r.value'] == [[((1, 'paid'), 40), ((0, 'paid'), 9)], [((0, 'paid'), 14)]]

def test_order_by_edge_attribute2(self):
host = nx.MultiDiGraph()
host.add_node("a", name="Alice", age=25)
host.add_node("b", name="Bob", age=30)
host.add_node("c", name="Carol", age=20)
host.add_edge("b", "a", __labels__={"paid"}, amount=14) # different attribute name
host.add_edge("a", "b", __labels__={"paid"}, value=9)
host.add_edge("c", "b", __labels__={"paid"}, value=980)
host.add_edge("c", "b", __labels__={"paid"}, value=4)
host.add_edge("b", "c", __labels__={"paid"}, value=11)
host.add_edge("a", "b", __labels__={"paid"}, value=40)
host.add_edge("b", "a", __labels__={"paid"}, value=14) # duplicate edge
host.add_edge("a", "b", __labels__={"paid"}, value=9) # duplicate edge
host.add_edge("a", "b", __labels__={"paid"}, value=40) # duplicate edge

qry = """
MATCH (n)-[r]->(m)
RETURN n.name, r.value, m.name
ORDER BY r.value ASC
"""
res = GrandCypher(host).run(qry)
assert res['r.value'] == [
[((0, 'paid'), None), ((1, 'paid'), 14)], # None for the different attribute edge
[((1, 'paid'), 4), ((0, 'paid'), 980)], # within edges, the attributes are ordered
[((0, 'paid'), 9), ((2, 'paid'), 9), ((1, 'paid'), 40), ((3, 'paid'), 40)],
[((0, 'paid'), 11)]
]
assert res['m.name'] == ['Alice', 'Bob', 'Bob', 'Carol']

def test_multigraph_aggregation_function_sum(self):
host = nx.MultiDiGraph()
host.add_node("a", name="Alice", age=25)
Expand Down Expand Up @@ -1171,11 +1281,11 @@ def test_single_variable_length_relationship(self, graph_type):
assert res["A"] == ["x", "y", "z"]
assert res["B"] == ["y", "z", "x"]
assert graph_type in ACCEPTED_GRAPH_TYPES
if graph_type is nx.DiGraph:
assert res["r"] == [[{"bar": "1"}], [{"bar": "2"}], [{"bar": "3"}]]
elif graph_type is nx.MultiDiGraph:
# MultiDiGraphs return a list of dictionaries to accommodate multiple edges between nodes
assert res["r"] == [[{0: {'bar': '1'}}], [{0: {'bar': '2'}}], [{0: {'bar': '3'}}]]
# if graph_type is nx.DiGraph:
# assert res["r"] == [[{"bar": "1"}], [{"bar": "2"}], [{"bar": "3"}]]
# elif graph_type is nx.MultiDiGraph:
# # MultiDiGraphs return a list of dictionaries to accommodate multiple edges between nodes
assert res["r"] == [[{0: {'bar': '1'}}], [{0: {'bar': '2'}}], [{0: {'bar': '3'}}]]

qry = """
MATCH (A)-[r*2]->(B)
Expand All @@ -1187,18 +1297,20 @@ def test_single_variable_length_relationship(self, graph_type):
assert res["A"] == ["x", "y", "z"]
assert res["B"] == ["z", "x", "y"]
assert graph_type in ACCEPTED_GRAPH_TYPES
if graph_type is nx.DiGraph:
assert res["r"] == [
[{"bar": "1"}, {"bar": "2"}],
[{"bar": "2"}, {"bar": "3"}],
[{"bar": "3"}, {"bar": "1"}],
]
elif graph_type is nx.MultiGraph:
assert res["r"] == [
[{0: {'bar': '1'}}, {1: {'bar': '2'}}],
[{0: {'bar': '2'}}, {1: {'bar': '3'}}],
[{0: {'bar': '3'}}, {1: {'bar': '1'}}],
]
# if graph_type is nx.DiGraph:
# assert res["r"] == [
# [{"bar": "1"}, {"bar": "2"}],
# [{"bar": "2"}, {"bar": "3"}],
# [{"bar": "3"}, {"bar": "1"}],
# ]
# elif graph_type is nx.MultiDiGraph:
assert res["r"] == [
[{0: {'bar': '1'}}, {0: {'bar': '2'}}],
[{0: {'bar': '2'}}, {0: {'bar': '3'}}],
[{0: {'bar': '3'}}, {0: {'bar': '1'}}]
]
# else:
# raise Exception("Invalid graph type")

@pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES)
def test_complex_variable_length_relationship(self, graph_type):
Expand All @@ -1220,28 +1332,15 @@ def test_complex_variable_length_relationship(self, graph_type):
assert res["A"] == ["x", "y", "z", "x", "y", "z", "x", "y", "z"]
assert res["B"] == ["x", "y", "z", "y", "z", "x", "z", "x", "y"]
assert graph_type in ACCEPTED_GRAPH_TYPES
if graph_type is nx.DiGraph:
assert res["r"] == [
[None],
[None],
[None],
[{"bar": "1"}],
[{"bar": "2"}],
[{"bar": "3"}],
[{"bar": "1"}, {"bar": "2"}],
[{"bar": "2"}, {"bar": "3"}],
[{"bar": "3"}, {"bar": "1"}],
]
elif graph_type is nx.MultiDiGraph:
assert res["r"] == [
[None], [None], [None],
[{0: {'bar': '1'}}],
[{0: {'bar': '2'}}],
[{0: {'bar': '3'}}],
[{0: {'bar': '1'}}, {0: {'bar': '2'}}],
[{0: {'bar': '2'}}, {0: {'bar': '3'}}],
[{0: {'bar': '3'}}, {0: {'bar': '1'}}]
]
assert res["r"] == [
[None], [None], [None],
[{0: {'bar': '1'}}],
[{0: {'bar': '2'}}],
[{0: {'bar': '3'}}],
[{0: {'bar': '1'}}, {0: {'bar': '2'}}],
[{0: {'bar': '2'}}, {0: {'bar': '3'}}],
[{0: {'bar': '3'}}, {0: {'bar': '1'}}]
]


class TestType:
Expand Down Expand Up @@ -1347,30 +1446,17 @@ def test_edge_type_hop(self, graph_type):
assert res["A"] == ["x", "y", "z", "x", "y", "z", "x", "y", "z"]
assert res["B"] == ["x", "y", "z", "y", "z", "x", "z", "x", "y"]
assert graph_type in ACCEPTED_GRAPH_TYPES
if graph_type is nx.DiGraph:
assert res["r"] == [
[None],
[None],
[None],
[{"__labels__": {"Edge", "XY"}}],
[{"__labels__": {"Edge", "YZ"}}],
[{"__labels__": {"Edge", "ZX"}}],
[{"__labels__": {"Edge", "XY"}}, {"__labels__": {"Edge", "YZ"}}],
[{"__labels__": {"Edge", "YZ"}}, {"__labels__": {"Edge", "ZX"}}],
[{"__labels__": {"Edge", "ZX"}}, {"__labels__": {"Edge", "XY"}}],
]
elif graph_type is nx.MultiDiGraph:
assert res["r"] == [
[None],
[None],
[None],
[{0: {'__labels__': {'Edge', 'XY'}}}],
[{0: {'__labels__': {'Edge', 'YZ'}}}],
[{0: {'__labels__': {'Edge', 'ZX'}}}],
[{0: {'__labels__': {'Edge', 'XY'}}}, {0: {'__labels__': {'Edge', 'YZ'}}}],
[{0: {'__labels__': {'Edge', 'YZ'}}}, {0: {'__labels__': {'Edge', 'ZX'}}}],
[{0: {'__labels__': {'Edge', 'ZX'}}}, {0: {'__labels__': {'Edge', 'XY'}}}]
]
assert res["r"] == [
[None],
[None],
[None],
[{0: {'__labels__': {'Edge', 'XY'}}}],
[{0: {'__labels__': {'Edge', 'YZ'}}}],
[{0: {'__labels__': {'Edge', 'ZX'}}}],
[{0: {'__labels__': {'Edge', 'XY'}}}, {0: {'__labels__': {'Edge', 'YZ'}}}],
[{0: {'__labels__': {'Edge', 'YZ'}}}, {0: {'__labels__': {'Edge', 'ZX'}}}],
[{0: {'__labels__': {'Edge', 'ZX'}}}, {0: {'__labels__': {'Edge', 'XY'}}}]
]

@pytest.mark.parametrize("graph_type", ACCEPTED_GRAPH_TYPES)
def test_host_no_node_type(self, graph_type):
Expand Down
Loading