Skip to content

Commit

Permalink
Implementation changes on the merge algorithm.
Browse files Browse the repository at this point in the history
- The optimistic merge implementation is now
  easier to read and reason about.
- The parameters are not mutated by a side-effect,
  a new copy is returned instead.
- Tests have been updated, and are more readable
- A test has been added to ensure that conflicts
  are handled properly when they arise.
  • Loading branch information
almet committed Nov 24, 2023
1 parent cad5afc commit a9f80d4
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 78 deletions.
152 changes: 124 additions & 28 deletions umap/tests/test_datalayer_views.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import time
from copy import deepcopy
from pathlib import Path

import pytest
Expand Down Expand Up @@ -392,62 +393,157 @@ def test_anonymous_user_can_edit_if_inherit_and_map_in_public_mode(
assert modified_datalayer.name == name


def test_optimistic_merge(client, datalayer, map):
@pytest.fixture
def reference_data():
return {
"type": "FeatureCollection",
"features": [
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [-1, 2]},
"properties": {"_umap_options": {}, "name": "foo"},
},
{
"type": "Feature",
"geometry": {"type": "LineString", "coordinates": [2, 3]},
"properties": {"_umap_options": {}, "name": "bar"},
},
{
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [3, 4]},
"properties": {"_umap_options": {}, "name": "marker"},
},
],
"_umap_options": {
"displayOnLoad": True,
"name": "new name",
"id": 1668,
"remoteData": {},
"color": "LightSeaGreen",
"description": "test",
},
}


def test_optimistic_merge_both_added(client, datalayer, map, reference_data):
url = reverse("datalayer_update", args=(map.pk, datalayer.pk))
client.login(username=map.owner.username, password="123123")

# Client 1 data contains
# Point (-1, 2); Linestring(2, 3); Point 3, 4
# Reference data is:
# Point (-1, 2); Linestring (2, 3); Point (3, 4).

post_data = {
"name": "name",
"display_on_load": True,
"rank": 0,
"geojson": SimpleUploadedFile(
"foo.json",
b'{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-1,2]},"properties":{"_umap_options":{},"name":"foo"}},{"type":"Feature","geometry":{"type":"LineString","coordinates": [2,3]},"properties":{"_umap_options":{},"name":"bar"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[3,4]},"properties":{"_umap_options":{},"name":"marker"}}],"_umap_options":{"displayOnLoad":true,"name":"new name","id":1668,"remoteData":{},"color":"LightSeaGreen","description":"test"}}',
"foo.json", json.dumps(reference_data).encode("utf-8")
),
}
response = client.post(url, post_data, follow=True)
assert response.status_code == 200

response = client.get(reverse("datalayer_view", args=(map.pk, datalayer.pk)))
last_modified = response["Last-Modified"]
reference_timestamp = response["Last-Modified"]

# XXX Change the Last modified otherwise ? (fake time ?)
# Client 2 data, add "Point 5, 6" to the existing data

# Pretend someone else is adding one data
# Client 1 adds "Point 5, 6" to the existing data
client1_feature = {
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [5, 6]},
"properties": {"_umap_options": {}, "name": "marker"},
}
client1_data = deepcopy(reference_data)
client1_data["features"].append(client1_feature)
# Sleep to change the current timestamp (used in the If-Unmodified-Since header)
time.sleep(1)
post_data["geojson"] = SimpleUploadedFile(
"foo.json",
b'{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-1,2]},"properties":{"_umap_options":{},"name":"foo"}},{"type":"Feature","geometry":{"type":"LineString","coordinates": [2,3]},"properties":{"_umap_options":{},"name":"bar"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[3,4]},"properties":{"_umap_options":{},"name":"marker"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[5,6]},"properties":{"_umap_options":{},"name":"new from someone else"}}],"_umap_options":{"displayOnLoad":true,"name":"new name","id":1668,"remoteData":{},"color":"LightSeaGreen","description":"test"}}',
json.dumps(client1_data).encode("utf-8"),
)
response = client.post(
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=last_modified
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=reference_timestamp
)
assert response.status_code == 200

# Client 1 data, adds "Point 7, 8" instead (there is a conflict)
# Client 2 adds "Point 7, 8" instead, on the same reference.
client2_feature = {
"type": "Feature",
"geometry": {"type": "Point", "coordinates": [7, 8]},
"properties": {"_umap_options": {}, "name": "marker"},
}
client2_data = deepcopy(reference_data)
client2_data["features"].append(client2_feature)

post_data["geojson"] = SimpleUploadedFile(
"foo.json",
b'{"type":"FeatureCollection","features":[{"type":"Feature","geometry":{"type":"Point","coordinates":[-1,2]},"properties":{"_umap_options":{},"name":"foo"}},{"type":"Feature","geometry":{"type":"LineString","coordinates": [2,3]},"properties":{"_umap_options":{},"name":"bar"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[3,4]},"properties":{"_umap_options":{},"name":"marker"}},{"type":"Feature","geometry":{"type":"Point","coordinates":[7,8]},"properties":{"_umap_options":{},"name":"new from us"}}],"_umap_options":{"displayOnLoad":true,"name":"new name","id":1668,"remoteData":{},"color":"LightSeaGreen","description":"test"}}',
json.dumps(client2_data).encode("utf-8"),
)
response = client.post(
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=last_modified
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=reference_timestamp
)
assert response.status_code == 200
modified_datalayer = DataLayer.objects.get(pk=datalayer.pk)
assert modified_datalayer.geojson.read().decode() == (
'{"type": "FeatureCollection", "features": [{"type": "Feature", "geometry": {'
'"type": "Point", "coordinates": [-1, 2]}, "properties": {"_umap_options": {}'
', "name": "foo"}}, {"type": "Feature", "geometry": {"type": "LineString", "c'
'oordinates": [2, 3]}, "properties": {"_umap_options": {}, "name": "bar"}}, {'
'"type": "Feature", "geometry": {"type": "Point", "coordinates": [3, 4]}, "pr'
'operties": {"_umap_options": {}, "name": "marker"}}, {"type": "Feature", "ge'
'ometry": {"type": "Point", "coordinates": [5, 6]}, "properties": {"_umap_opt'
'ions": {}, "name": "new from someone else"}}, {"type": "Feature", "geometry"'
': {"type": "Point", "coordinates": [7, 8]}, "properties": {"_umap_options": '
'{}, "name": "new from us"}}], "_umap_options": {"displayOnLoad": true, "name'
'": "new name", "id": 1668, "remoteData": {}, "color": "LightSeaGreen", "desc'
'ription": "test"}}'
merged_features = json.load(modified_datalayer.geojson)["features"]

for reference_feature in reference_data["features"]:
assert reference_feature in merged_features

assert client1_feature in merged_features
assert client2_feature in merged_features


def test_optimistic_merge_conflicting_change_raises(
client, datalayer, map, reference_data
):
url = reverse("datalayer_update", args=(map.pk, datalayer.pk))
client.login(username=map.owner.username, password="123123")

# Reference data is:
# Point (-1, 2); Linestring (2, 3); Point (3, 4).

post_data = {
"name": "name",
"display_on_load": True,
"rank": 0,
"geojson": SimpleUploadedFile(
"foo.json", json.dumps(reference_data).encode("utf-8")
),
}
response = client.post(url, post_data, follow=True)
assert response.status_code == 200

response = client.get(reverse("datalayer_view", args=(map.pk, datalayer.pk)))
reference_timestamp = response["Last-Modified"]

# First client changes the first feature.
client1_data = deepcopy(reference_data)
client1_data["features"][0]["geometry"] = {"type": "Point", "coordinates": [5, 6]}

# Sleep to change the current timestamp (used in the If-Unmodified-Since header)
time.sleep(1)
post_data["geojson"] = SimpleUploadedFile(
"foo.json",
json.dumps(client1_data).encode("utf-8"),
)
response = client.post(
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=reference_timestamp
)
assert response.status_code == 200

# Second client changes the first feature as well.
client2_data = deepcopy(reference_data)
client2_data["features"][0]["geometry"] = {"type": "Point", "coordinates": [7, 8]}

post_data["geojson"] = SimpleUploadedFile(
"foo.json",
json.dumps(client2_data).encode("utf-8"),
)
response = client.post(
url, post_data, follow=True, HTTP_IF_UNMODIFIED_SINCE=reference_timestamp
)
assert response.status_code == 412

# Check that the server rejected conflicting changes.
modified_datalayer = DataLayer.objects.get(pk=datalayer.pk)
merged_features = json.load(modified_datalayer.geojson)["features"]
assert merged_features == client1_data["features"]
53 changes: 25 additions & 28 deletions umap/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,33 +118,30 @@ def is_ajax(request):
return request.headers.get("x-requested-with") == "XMLHttpRequest"


def merge_features(reference: list, latest: list, entrant: list):
# Just in case (eg. both removed the same element, or changed only metadatas)
if latest == entrant:
class ConflictError(ValueError):
pass


def merge_features(reference: list, latest: list, incoming: list):
"""Finds the changes between reference and incoming, and reapplies them on top of latest."""
if latest == incoming:
return latest

# Remove common features between entrant and reference versions (unchanged ones).
for feature in reference[:]:
for other in entrant[:]:
if feature == other:
entrant.remove(feature)
reference.remove(feature)
break # no need to continue looking

# Now make sure remaining features are still in latest version
for feature in reference:
for other in latest:
if other == feature:
break
else:
# We cannot distinguish the case where both deleted the same
# element and added others, or where both modified the same
# element, so let's raise a conflict by caution.
raise ValueError

# We can merge.
for feature in reference:
latest.remove(feature)
for feature in entrant:
latest.append(feature)
return latest
removed = [item for item in reference if item not in incoming]
added = [item for item in incoming if item not in reference]

# Ensure that items changed in the reference weren't also changed in the latest.
for item in removed:
if item not in latest:
raise ConflictError()

merged = latest[:]

# Reapply the changes on top of the latest.
for item in removed:
merged.delete(item)

for item in added:
merged.append(item)

return merged
48 changes: 26 additions & 22 deletions umap/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
UserProfileForm,
)
from .models import DataLayer, Licence, Map, Pictogram, Star, TileLayer
from .utils import get_uri_template, gzip_file, is_ajax, merge_features
from .utils import ConflictError, get_uri_template, gzip_file, is_ajax, merge_features

User = get_user_model()

Expand Down Expand Up @@ -933,36 +933,28 @@ class DataLayerUpdate(FormLessEditMixin, GZipMixin, UpdateView):
model = DataLayer
form_class = DataLayerForm

def form_valid(self, form):
self.object = form.save()
# Simple response with only metadata (client should not reload all data
# on save)

data = {**self.object.metadata(self.request.user, self.request)}
if self.request.session.get("needs_reload"):
data["geojson"] = json.loads(self.object.geojson.read().decode())
self.request.session["needs_reload"] = False
response = simple_json_response(**data)

response["Last-Modified"] = self.last_modified
return response

def has_been_modified_since(self, if_unmodified_since):
if if_unmodified_since and self.last_modified != if_unmodified_since:
return True
return False

def merge(self, if_unmodified_since):
# Reference data source of the edition.
"""
Attempt to apply the incoming changes to the document the client was using, and
then merge it with the last document we have on storage.
Returns either None (if the merge failed) or the merged python GeoJSON object.
"""

# Use If-Modified-Since to find the correct version in our storage.
for name in self.object.get_versions():
path = os.path.join(settings.MEDIA_ROOT, self.object.get_version_path(name))
if if_unmodified_since == self.compute_last_modified(path):
with open(path) as f:
reference = json.loads(f.read())
break
else:
# No reference version found, can't merge.
# If the document is not found, we can't merge.
return None

# New data received in the request.
Expand All @@ -973,14 +965,14 @@ def merge(self, if_unmodified_since):
latest = json.loads(f.read())

try:
merge_features(
merged_features = merge_features(
reference["features"], latest["features"], entrant["features"]
)
except ValueError:
latest["features"] = merged_features
return latest
except ConflictError:
return None

return latest

def post(self, request, *args, **kwargs):
self.object = self.get_object()
if self.object.map.pk != int(self.kwargs["map_id"]):
Expand All @@ -1000,10 +992,22 @@ def post(self, request, *args, **kwargs):
self.request.FILES["geojson"].file = BytesIO(
json.dumps(merged).encode("utf-8")
)
# Mark the

# Mark the data to be reloaded by form_valid
self.request.session["needs_reload"] = True
return super().post(request, *args, **kwargs)

def form_valid(self, form):
self.object = form.save()
data = {**self.object.metadata(self.request.user, self.request)}
if self.request.session.get("needs_reload"):
data["geojson"] = json.loads(self.object.geojson.read().decode())
self.request.session["needs_reload"] = False
response = simple_json_response(**data)

response["Last-Modified"] = self.last_modified
return response


class DataLayerDelete(DeleteView):
model = DataLayer
Expand Down

0 comments on commit a9f80d4

Please sign in to comment.