From a096f9e650258d3d9c4b7098f7a94f5607efbc3b Mon Sep 17 00:00:00 2001 From: alejandromumo Date: Tue, 4 Jun 2024 13:54:58 +0200 Subject: [PATCH 01/18] release: v5.3.0 --- CHANGES.rst | 4 ++++ invenio_users_resources/__init__.py | 2 +- setup.cfg | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7b82f91..532c068 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,10 @@ Changes ======= +Version 5.3.0 (released 2024-06-04) + +- installation: bump invenio-records-resources + Version 5.2.0 (released 2024-05-07) - groups: add permissions and config to control groups feature flag diff --git a/invenio_users_resources/__init__.py b/invenio_users_resources/__init__.py index 314b305..0230f76 100644 --- a/invenio_users_resources/__init__.py +++ b/invenio_users_resources/__init__.py @@ -9,6 +9,6 @@ """Invenio module providing management APIs for users and roles/groups.""" -__version__ = "5.2.0" +__version__ = "5.3.0" __all__ = ("__version__",) diff --git a/setup.cfg b/setup.cfg index 1bac82a..7d9a5b3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,7 +32,7 @@ install_requires = invenio-i18n>=2.0.0 invenio-notifications>=0.1.0,<1.0.0 invenio-oauthclient>=4.0.0,<5.0.0 - invenio-records-resources>=5.0.0,<6.0.0 + invenio-records-resources>=6.0.0,<7.0.0 [options.extras_require] tests = From 26494f938c275048e83531abb23bbf2c9ac1512b Mon Sep 17 00:00:00 2001 From: Sam Arbid Date: Thu, 27 Jun 2024 14:55:12 +0200 Subject: [PATCH 02/18] moderation: fix admin record / draft links * Fix admin user lists show no results * Closes https://github.com/inveniosoftware/invenio-app-rdm/issues/2721 * No need to change moderation links as they are functional --- invenio_users_resources/services/users/config.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/invenio_users_resources/services/users/config.py b/invenio_users_resources/services/users/config.py index 5542d5e..93f3b7c 100644 --- a/invenio_users_resources/services/users/config.py +++ b/invenio_users_resources/services/users/config.py @@ -2,6 +2,7 @@ # # Copyright (C) 2022 TU Wien. # Copyright (C) 2022 CERN. +# Copyright (C) 2024 KTH Royal Institute of Technology. # # Invenio-Users-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -194,10 +195,12 @@ class UsersServiceConfig(RecordServiceConfig, ConfiguratorMixin): "avatar": Link("{+api}/users/{id}/avatar.svg"), "records_html": Link("{+ui}/search/records?q=user:{id}"), "admin_records_html": Link( - "{+ui}/administration/records?q=user:{id}&f=allversions", when=can_manage + "{+ui}/administration/records?q=parent.access.owned_by.user:{id}&f=allversions", + when=can_manage, ), "admin_drafts_html": Link( - "{+ui}/administration/drafts?q=user:{id}&f=allversions", when=can_manage + "{+ui}/administration/drafts?q=parent.access.owned_by.user:{id}&f=allversions", + when=can_manage, ), "admin_moderation_html": Link( "{+ui}/administration/moderation?q=topic.user:{id}", when=can_manage From 6754cbf97cc704b7bb12852760ce1eb585c58cdc Mon Sep 17 00:00:00 2001 From: Sam Arbid Date: Wed, 10 Jul 2024 15:36:25 +0200 Subject: [PATCH 03/18] config: Update records_html link * The records_html link in the UsersServiceConfig class has been updated to include the "parent.access.owned_by.user" query parameter. * Add tests for admin links * Add tests for admin visibility links --- .../services/users/config.py | 2 +- tests/resources/test_resources_users.py | 60 ++++++++++++++++++- 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/invenio_users_resources/services/users/config.py b/invenio_users_resources/services/users/config.py index 93f3b7c..2b8d00e 100644 --- a/invenio_users_resources/services/users/config.py +++ b/invenio_users_resources/services/users/config.py @@ -193,7 +193,7 @@ class UsersServiceConfig(RecordServiceConfig, ConfiguratorMixin): links_item = { "self": Link("{+api}/users/{id}"), "avatar": Link("{+api}/users/{id}/avatar.svg"), - "records_html": Link("{+ui}/search/records?q=user:{id}"), + "records_html": Link("{+ui}/search/records?q=parent.access.owned_by.user:{id}"), "admin_records_html": Link( "{+ui}/administration/records?q=parent.access.owned_by.user:{id}&f=allversions", when=can_manage, diff --git a/tests/resources/test_resources_users.py b/tests/resources/test_resources_users.py index 30c456c..78db21a 100644 --- a/tests/resources/test_resources_users.py +++ b/tests/resources/test_resources_users.py @@ -2,6 +2,7 @@ # # Copyright (C) 2022 European Union. # Copyright (C) 2022 CERN. +# Copyright (C) 2024 KTH Royal Institute of Technology. # # Invenio-Users-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -56,7 +57,7 @@ def test_read_self_serialization(client, headers, users, user_pub): assert data["links"] == { "self": f"https://127.0.0.1:5000/api/users/{user_pub.id}", "avatar": f"https://127.0.0.1:5000/api/users/{user_pub.id}/avatar.svg", - "records_html": f"https://127.0.0.1:5000/search/records?q=user:{user_pub.id}", + "records_html": f"https://127.0.0.1:5000/search/records?q=parent.access.owned_by.user:{user_pub.id}", } @@ -87,7 +88,7 @@ def test_read_anon_serialization(client, headers, users, username, public_email) assert data["links"] == { "self": f"https://127.0.0.1:5000/api/users/{u.id}", "avatar": f"https://127.0.0.1:5000/api/users/{u.id}/avatar.svg", - "records_html": f"https://127.0.0.1:5000/search/records?q=user:{u.id}", + "records_html": f"https://127.0.0.1:5000/search/records?q=parent.access.owned_by.user:{u.id}", } for k in [ @@ -182,6 +183,61 @@ def test_impersonate_user(client, headers, user_pub, user_moderator, db): assert res.status_code == 403 +@pytest.mark.parametrize( + "link_name,expected_url", + [ + ( + "admin_records_html", + "/administration/records?q=parent.access.owned_by.user:{id}&f=allversions", + ), + ( + "admin_drafts_html", + "/administration/drafts?q=parent.access.owned_by.user:{id}&f=allversions", + ), + ("admin_moderation_html", "/administration/moderation?q=topic.user:{id}"), + ], +) +def test_admin_links( + client, headers, user_moderator, user_pub, link_name, expected_url +): + """Test admin links.""" + client = user_moderator.login(client) + res = client.get(f"/users/{user_pub.id}", headers=headers) + assert res.status_code == 200 + data = res.json + assert link_name in data["links"] + assert ( + data["links"][link_name] + == f"https://127.0.0.1:5000{expected_url.format(id=user_pub.id)}" + ) + + +@pytest.mark.parametrize( + "username,expected_admin_links", + [ + ("user_moderator", True), # user_moderator should have admin links + ("pub", False), # regular user should not have admin links + ("res", False), # regular user should not have admin links + ], +) +def test_admin_links_visibility(client, headers, users, username, expected_admin_links): + """Test admin links visibility based on user permissions.""" + user = users[username] + client = user.login(client) + res = client.get(f"/users/{user.id}", headers=headers) + assert res.status_code == 200 + data = res.json + + if expected_admin_links: + assert "admin_records_html" in data["links"] + assert "admin_drafts_html" in data["links"] + assert "admin_moderation_html" in data["links"] + else: + assert "admin_records_html" not in data["links"] + assert "admin_drafts_html" not in data["links"] + assert "admin_moderation_html" not in data["links"] + + # TODO: test conditional requests # TODO: test caching headers # TODO: test invalid identifiers From 64b1a73cfce4bad004a045473a045dd60cfd4cc2 Mon Sep 17 00:00:00 2001 From: Christoph Ladurner Date: Tue, 30 Jul 2024 11:34:48 +0200 Subject: [PATCH 04/18] release: v5.3.1 --- CHANGES.rst | 5 +++++ invenio_users_resources/__init__.py | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 532c068..6717163 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,11 @@ Changes ======= +Version 5.3.1 (released 2024-07-30) + +- config: Update records_html link +- moderation: fix admin record / draft links + Version 5.3.0 (released 2024-06-04) - installation: bump invenio-records-resources diff --git a/invenio_users_resources/__init__.py b/invenio_users_resources/__init__.py index 0230f76..b3afc68 100644 --- a/invenio_users_resources/__init__.py +++ b/invenio_users_resources/__init__.py @@ -2,6 +2,7 @@ # # Copyright (C) 2022-2024 CERN. # Copyright (C) 2022-2023 TU Wien. +# Copyright (C) 2024 Graz University of Technology. # # Invenio-Users-Resources is free software; you can redistribute it and/or # modify it under the terms of the MIT License; see LICENSE file for more @@ -9,6 +10,6 @@ """Invenio module providing management APIs for users and roles/groups.""" -__version__ = "5.3.0" +__version__ = "5.3.1" __all__ = ("__version__",) From bf882ad7a1b77ddefe77794352ef40b7354675cf Mon Sep 17 00:00:00 2001 From: Anika Churilova Date: Tue, 30 Jan 2024 18:14:39 +0100 Subject: [PATCH 05/18] http headers: use and adjust vnd.inveniordm.v1+json http accept header * closes https://github.com/zenodo/rdm-project/issues/598 --- .github/workflows/tests.yml | 2 +- invenio_users_resources/resources/groups/config.py | 7 +++++++ invenio_users_resources/resources/users/config.py | 7 +++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4fb6e59..8039854 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -73,7 +73,7 @@ jobs: pip install ".[$EXTRAS]" pip freeze docker --version - docker-compose --version + docker compose --version - name: Run tests run: | diff --git a/invenio_users_resources/resources/groups/config.py b/invenio_users_resources/resources/groups/config.py index dc52f79..32746e2 100644 --- a/invenio_users_resources/resources/groups/config.py +++ b/invenio_users_resources/resources/groups/config.py @@ -45,3 +45,10 @@ class GroupsResourceConfig(RecordResourceConfig): } request_search_args = GroupSearchRequestArgsSchema + + response_handlers = { + "application/vnd.inveniordm.v1+json": RecordResourceConfig.response_handlers[ + "application/json" + ], + **RecordResourceConfig.response_handlers, + } diff --git a/invenio_users_resources/resources/users/config.py b/invenio_users_resources/resources/users/config.py index 6bf3d38..ce23026 100644 --- a/invenio_users_resources/resources/users/config.py +++ b/invenio_users_resources/resources/users/config.py @@ -69,3 +69,10 @@ class UsersResourceConfig(RecordResourceConfig): ) ), } + + response_handlers = { + "application/vnd.inveniordm.v1+json": RecordResourceConfig.response_handlers[ + "application/json" + ], + **RecordResourceConfig.response_handlers, + } From 5a10294c5aff4ba0967904c544a2e2d8bab79c90 Mon Sep 17 00:00:00 2001 From: Karolina Przerwa Date: Fri, 9 Aug 2024 11:24:47 +0200 Subject: [PATCH 06/18] release: v5.4.0 --- CHANGES.rst | 4 ++++ invenio_users_resources/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6717163..9433e93 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,10 @@ Changes ======= +Version 5.4.0 (released 2024-08-09) + +- resources: use and adjust vnd.inveniordm.v1+json http accept header + Version 5.3.1 (released 2024-07-30) - config: Update records_html link diff --git a/invenio_users_resources/__init__.py b/invenio_users_resources/__init__.py index b3afc68..0b6fdde 100644 --- a/invenio_users_resources/__init__.py +++ b/invenio_users_resources/__init__.py @@ -10,6 +10,6 @@ """Invenio module providing management APIs for users and roles/groups.""" -__version__ = "5.3.1" +__version__ = "5.4.0" __all__ = ("__version__",) From 4332d2664a64a1e5085c9ac72dbae416a083705e Mon Sep 17 00:00:00 2001 From: rekt-hard Date: Tue, 6 Aug 2024 08:57:40 +0000 Subject: [PATCH 07/18] i18n:push translations --- .../translations/messages.pot | 207 +++++++++++++++++- 1 file changed, 197 insertions(+), 10 deletions(-) diff --git a/invenio_users_resources/translations/messages.pot b/invenio_users_resources/translations/messages.pot index 9264682..596e7e7 100644 --- a/invenio_users_resources/translations/messages.pot +++ b/invenio_users_resources/translations/messages.pot @@ -1,32 +1,219 @@ # Translations template for invenio-users-resources. -# Copyright (C) 2022 CERN +# Copyright (C) 2024 CERN # This file is distributed under the same license as the # invenio-users-resources project. -# FIRST AUTHOR , 2022. +# FIRST AUTHOR , 2024. # #, fuzzy msgid "" msgstr "" -"Project-Id-Version: invenio-users-resources 0.3.2\n" +"Project-Id-Version: invenio-users-resources 5.3.1\n" "Report-Msgid-Bugs-To: info@inveniosoftware.org\n" -"POT-Creation-Date: 2022-10-12 12:12+0000\n" +"POT-Creation-Date: 2024-08-06 08:57+0000\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=utf-8\n" "Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel 2.10.3\n" +"Generated-By: Babel 2.15.0\n" -#: invenio_users_resources/services/schemas.py:21 -msgid "Value must be either 'public' or 'restricted'." +#: invenio_users_resources/config.py:60 invenio_users_resources/config.py:154 +#: invenio_users_resources/services/groups/config.py:44 +msgid "Best match" msgstr "" -#: invenio_users_resources/services/groups/config.py:43 -msgid "Best match" +#: invenio_users_resources/config.py:64 +msgid "Username" +msgstr "" + +#: invenio_users_resources/config.py:68 +msgid "Email" +msgstr "" + +#: invenio_users_resources/config.py:72 invenio_users_resources/config.py:158 +#: invenio_users_resources/services/users/facets.py:16 +msgid "Domain" +msgstr "" + +#: invenio_users_resources/config.py:76 invenio_users_resources/config.py:162 +msgid "Newest" +msgstr "" + +#: invenio_users_resources/config.py:80 invenio_users_resources/config.py:166 +msgid "Oldest" +msgstr "" + +#: invenio_users_resources/config.py:84 invenio_users_resources/config.py:170 +msgid "Recently updated" +msgstr "" + +#: invenio_users_resources/config.py:174 +msgid "# Users" +msgstr "" + +#: invenio_users_resources/config.py:178 +msgid "# Active" +msgstr "" + +#: invenio_users_resources/config.py:182 +msgid "# Inactive" +msgstr "" + +#: invenio_users_resources/config.py:186 +msgid "# Confirmed" +msgstr "" + +#: invenio_users_resources/config.py:190 +msgid "# Verified" +msgstr "" + +#: invenio_users_resources/config.py:194 +msgid "# Blocked" +msgstr "" + +#: invenio_users_resources/forms.py:24 +msgid "Notify me" +msgstr "" + +#: invenio_users_resources/forms.py:25 +msgid "Turn on to enable notifications for relevant events." +msgstr "" + +#: invenio_users_resources/resources/domains/resource.py:29 +msgid "Domain already exists." +msgstr "" + +#: invenio_users_resources/resources/users/config.py:65 +msgid "User is locked due to concurrent running operation." +msgstr "" + +#: invenio_users_resources/services/schemas.py:147 +#: invenio_users_resources/services/schemas.py:151 +msgid "Deleted user" +msgstr "" + +#: invenio_users_resources/services/schemas.py:160 +#: invenio_users_resources/services/schemas.py:164 +msgid "System" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:16 +msgid "Status" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:18 +#: invenio_users_resources/services/users/facets.py:23 +#: invenio_users_resources/services/users/facets.py:39 +msgid "New" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:19 +#: invenio_users_resources/services/users/facets.py:24 +msgid "Moderated" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:20 +#: invenio_users_resources/services/users/facets.py:25 +#: invenio_users_resources/services/users/facets.py:40 +msgid "Verified" msgstr "" -#: invenio_users_resources/services/groups/config.py:47 +#: invenio_users_resources/services/domains/facets.py:21 +#: invenio_users_resources/services/users/facets.py:26 +#: invenio_users_resources/services/users/facets.py:42 +msgid "Blocked" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:28 +msgid "Flagged" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:30 +msgid "Yes" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:31 +msgid "No" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:38 +msgid "Category" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:44 +msgid "Organisation" +msgstr "" + +#: invenio_users_resources/services/domains/facets.py:50 +msgid "Top-level domain" +msgstr "" + +#: invenio_users_resources/services/groups/config.py:48 msgid "Name" msgstr "" +#: invenio_users_resources/services/users/facets.py:21 +msgid "Domain status" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:32 +msgid "Affiliations" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:37 +msgid "Account status" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:41 +msgid "Confirmed" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:43 +msgid "Inactive" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:49 +msgid "Profile visibility" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:51 +msgid "Hidden" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:52 +msgid "Profile" +msgstr "" + +#: invenio_users_resources/services/users/facets.py:53 +msgid "Full" +msgstr "" + +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:14 +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:17 +msgid "Notifications" +msgstr "" + +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:50 +msgid "Notifications email" +msgstr "" + +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:53 +#, python-format +msgid "" +"\n" +" We use your primary email address to notify you on. " +"You can change your email on\n" +" your profile settings" +"\n" +" " +msgstr "" + +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:67 +msgid "Cancel" +msgstr "" + +#: invenio_users_resources/templates/semantic-ui/invenio_users_resources/settings/notifications.html:71 +msgid "Update notification preferences" +msgstr "" + From 403ca7a8c70ae50870d9572fe9167853c365f4ff Mon Sep 17 00:00:00 2001 From: Javier Romero Castro Date: Wed, 21 Feb 2024 17:52:16 +0100 Subject: [PATCH 08/18] mappings: add ngram analyzer * Allows partial matches on search * closes https://github.com/CERNDocumentServer/cds-rdm/issues/114 --- .../mappings/os-v1/users/user-v2.0.0.json | 40 +++++++++++++++++-- .../mappings/os-v2/users/user-v2.0.0.json | 40 +++++++++++++++++-- .../mappings/v7/users/user-v2.0.0.json | 40 +++++++++++++++++-- 3 files changed, 111 insertions(+), 9 deletions(-) diff --git a/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json index d2c6864..dec6f5d 100644 --- a/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json @@ -1,4 +1,25 @@ { + "settings": { + "index": { + "max_ngram_diff": 2 + }, + "analysis": { + "analyzer": { + "ngram_3_to_5_analyzer": { + "type": "custom", + "tokenizer": "ngram_tokenizer_3_to_5", + "filter": ["lowercase"] + } + }, + "tokenizer": { + "ngram_tokenizer_3_to_5": { + "type": "ngram", + "min_gram": 3, + "max_gram": 5 + } + } + } + }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -75,10 +96,22 @@ "type": "boolean" }, "username": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email_hidden": { "type": "keyword" @@ -110,7 +143,8 @@ "profile": { "properties": { "full_name": { - "type": "text" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer" }, "affiliations": { "type": "text", diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json index d2c6864..dec6f5d 100644 --- a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json @@ -1,4 +1,25 @@ { + "settings": { + "index": { + "max_ngram_diff": 2 + }, + "analysis": { + "analyzer": { + "ngram_3_to_5_analyzer": { + "type": "custom", + "tokenizer": "ngram_tokenizer_3_to_5", + "filter": ["lowercase"] + } + }, + "tokenizer": { + "ngram_tokenizer_3_to_5": { + "type": "ngram", + "min_gram": 3, + "max_gram": 5 + } + } + } + }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -75,10 +96,22 @@ "type": "boolean" }, "username": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email_hidden": { "type": "keyword" @@ -110,7 +143,8 @@ "profile": { "properties": { "full_name": { - "type": "text" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer" }, "affiliations": { "type": "text", diff --git a/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json index d2c6864..dec6f5d 100644 --- a/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json @@ -1,4 +1,25 @@ { + "settings": { + "index": { + "max_ngram_diff": 2 + }, + "analysis": { + "analyzer": { + "ngram_3_to_5_analyzer": { + "type": "custom", + "tokenizer": "ngram_tokenizer_3_to_5", + "filter": ["lowercase"] + } + }, + "tokenizer": { + "ngram_tokenizer_3_to_5": { + "type": "ngram", + "min_gram": 3, + "max_gram": 5 + } + } + } + }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -75,10 +96,22 @@ "type": "boolean" }, "username": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email": { - "type": "keyword" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } }, "email_hidden": { "type": "keyword" @@ -110,7 +143,8 @@ "profile": { "properties": { "full_name": { - "type": "text" + "type": "text", + "analyzer": "ngram_3_to_5_analyzer" }, "affiliations": { "type": "text", From 5dac1c0f9b3404fa69f2da6ef381871e9393c320 Mon Sep 17 00:00:00 2001 From: Carlin MacKenzie Date: Thu, 18 Jul 2024 16:49:23 +0200 Subject: [PATCH 09/18] mappings: change from ngram to edge_ngram --- .../mappings/os-v2/users/user-v2.0.0.json | 19 +++++++++++++------ tests/services/users/test_service_users.py | 4 ++-- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json index dec6f5d..324d638 100644 --- a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json @@ -5,10 +5,12 @@ }, "analysis": { "analyzer": { - "ngram_3_to_5_analyzer": { + "edge_analyzer": { "type": "custom", - "tokenizer": "ngram_tokenizer_3_to_5", - "filter": ["lowercase"] + "tokenizer": "edge_ngram_tokenizer", + "filter": [ + "lowercase" + ] } }, "tokenizer": { @@ -16,6 +18,11 @@ "type": "ngram", "min_gram": 3, "max_gram": 5 + }, + "edge_ngram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 } } } @@ -97,7 +104,7 @@ }, "username": { "type": "text", - "analyzer": "ngram_3_to_5_analyzer", + "analyzer": "edge_analyzer", "fields": { "keyword": { "type": "keyword" @@ -106,7 +113,7 @@ }, "email": { "type": "text", - "analyzer": "ngram_3_to_5_analyzer", + "analyzer": "edge_analyzer", "fields": { "keyword": { "type": "keyword" @@ -144,7 +151,7 @@ "properties": { "full_name": { "type": "text", - "analyzer": "ngram_3_to_5_analyzer" + "analyzer": "edge_analyzer" }, "affiliations": { "type": "text", diff --git a/tests/services/users/test_service_users.py b/tests/services/users/test_service_users.py index 7afe142..23766d4 100644 --- a/tests/services/users/test_service_users.py +++ b/tests/services/users/test_service_users.py @@ -59,8 +59,8 @@ def test_search_public_users(user_service, user_pub): [ "email:res@inveniosoftware.org", "res@inveniosoftware.org", - "email:pubres@inveniosoftware.org", - "pubres@inveniosoftware.org", + 'email:"pubres@inveniosoftware.org"', + '"pubres@inveniosoftware.org"', "Plazi", "+name:Jose -affiliation:CERN", "name:Jose AND NOT affiliation:CERN", From cbba26e8a4d0a764e70e79ee1b28455eecd79f04 Mon Sep 17 00:00:00 2001 From: Carlin MacKenzie Date: Thu, 18 Jul 2024 17:38:13 +0200 Subject: [PATCH 10/18] refactor: move to v3 as types have changed --- .../mappings/os-v1/users/user-v2.0.0.json | 40 +--- .../mappings/os-v1/users/user-v3.0.0.json | 191 ++++++++++++++++++ .../mappings/os-v2/users/user-v2.0.0.json | 47 +---- .../mappings/os-v2/users/user-v3.0.0.json | 191 ++++++++++++++++++ .../mappings/v7/users/user-v2.0.0.json | 40 +--- 5 files changed, 391 insertions(+), 118 deletions(-) create mode 100644 invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json create mode 100644 invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json diff --git a/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json index dec6f5d..d2c6864 100644 --- a/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/os-v1/users/user-v2.0.0.json @@ -1,25 +1,4 @@ { - "settings": { - "index": { - "max_ngram_diff": 2 - }, - "analysis": { - "analyzer": { - "ngram_3_to_5_analyzer": { - "type": "custom", - "tokenizer": "ngram_tokenizer_3_to_5", - "filter": ["lowercase"] - } - }, - "tokenizer": { - "ngram_tokenizer_3_to_5": { - "type": "ngram", - "min_gram": 3, - "max_gram": 5 - } - } - } - }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -96,22 +75,10 @@ "type": "boolean" }, "username": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email_hidden": { "type": "keyword" @@ -143,8 +110,7 @@ "profile": { "properties": { "full_name": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer" + "type": "text" }, "affiliations": { "type": "text", diff --git a/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json new file mode 100644 index 0000000..ad844a0 --- /dev/null +++ b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json @@ -0,0 +1,191 @@ +{ + "settings": { + "analysis": { + "analyzer": { + "edge_analyzer": { + "type": "custom", + "tokenizer": "edge_ngram_tokenizer", + "filter": [ + "lowercase" + ] + } + }, + "tokenizer": { + "edge_ngram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "dynamic": "strict", + "dynamic_templates": [ + { + "profile": { + "path_match": "profile.*", + "mapping": { + "type": "keyword" + } + } + }, + { + "preferences": { + "path_match": "preferences.*", + "mapping": { + "type": "keyword" + } + } + }, + { + "identities": { + "path_match": "identities.*", + "mapping": { + "type": "keyword" + } + } + } + ], + "properties": { + "$schema": { + "type": "keyword", + "index": "false" + }, + "id": { + "type": "keyword" + }, + "version_id": { + "type": "integer" + }, + "uuid": { + "type": "keyword" + }, + "created": { + "type": "date" + }, + "updated": { + "type": "date" + }, + "current_login_at": { + "type": "date" + }, + "active": { + "type": "boolean" + }, + "confirmed_at": { + "type": "date" + }, + "indexed_at": { + "type": "date" + }, + "confirmed": { + "type": "boolean" + }, + "blocked_at": { + "type": "date" + }, + "blocked": { + "type": "boolean" + }, + "verified_at": { + "type": "date" + }, + "verified": { + "type": "boolean" + }, + "username": { + "type": "text", + "analyzer": "edge_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "email": { + "type": "text", + "analyzer": "edge_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "email_hidden": { + "type": "keyword" + }, + "domain": { + "type": "keyword" + }, + "domaininfo": { + "properties": { + "status": { + "type": "integer" + }, + "flagged": { + "type": "boolean" + }, + "category": { + "type": "integer" + }, + "tld": { + "type": "keyword" + } + } + }, + "identities": { + "type": "object", + "properties": {}, + "dynamic": "true" + }, + "profile": { + "properties": { + "full_name": { + "type": "text", + "analyzer": "edge_analyzer" + }, + "affiliations": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + }, + "dynamic": "true" + }, + "preferences": { + "properties": { + "visibility": { + "type": "keyword" + }, + "email_visibility": { + "type": "keyword" + }, + "locale": { + "type": "keyword" + }, + "timezone": { + "type": "keyword" + }, + "notifications": { + "properties": { + "enabled": { + "type": "boolean" + } + } + } + }, + "dynamic": "true" + }, + "status": { + "type": "keyword" + }, + "visibility": { + "type": "keyword" + } + } + } +} diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json index 324d638..d2c6864 100644 --- a/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v2.0.0.json @@ -1,32 +1,4 @@ { - "settings": { - "index": { - "max_ngram_diff": 2 - }, - "analysis": { - "analyzer": { - "edge_analyzer": { - "type": "custom", - "tokenizer": "edge_ngram_tokenizer", - "filter": [ - "lowercase" - ] - } - }, - "tokenizer": { - "ngram_tokenizer_3_to_5": { - "type": "ngram", - "min_gram": 3, - "max_gram": 5 - }, - "edge_ngram_tokenizer": { - "type": "edge_ngram", - "min_gram": 2, - "max_gram": 20 - } - } - } - }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -103,22 +75,10 @@ "type": "boolean" }, "username": { - "type": "text", - "analyzer": "edge_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email": { - "type": "text", - "analyzer": "edge_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email_hidden": { "type": "keyword" @@ -150,8 +110,7 @@ "profile": { "properties": { "full_name": { - "type": "text", - "analyzer": "edge_analyzer" + "type": "text" }, "affiliations": { "type": "text", diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json new file mode 100644 index 0000000..ad844a0 --- /dev/null +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json @@ -0,0 +1,191 @@ +{ + "settings": { + "analysis": { + "analyzer": { + "edge_analyzer": { + "type": "custom", + "tokenizer": "edge_ngram_tokenizer", + "filter": [ + "lowercase" + ] + } + }, + "tokenizer": { + "edge_ngram_tokenizer": { + "type": "edge_ngram", + "min_gram": 2, + "max_gram": 20 + } + } + } + }, + "mappings": { + "dynamic": "strict", + "dynamic_templates": [ + { + "profile": { + "path_match": "profile.*", + "mapping": { + "type": "keyword" + } + } + }, + { + "preferences": { + "path_match": "preferences.*", + "mapping": { + "type": "keyword" + } + } + }, + { + "identities": { + "path_match": "identities.*", + "mapping": { + "type": "keyword" + } + } + } + ], + "properties": { + "$schema": { + "type": "keyword", + "index": "false" + }, + "id": { + "type": "keyword" + }, + "version_id": { + "type": "integer" + }, + "uuid": { + "type": "keyword" + }, + "created": { + "type": "date" + }, + "updated": { + "type": "date" + }, + "current_login_at": { + "type": "date" + }, + "active": { + "type": "boolean" + }, + "confirmed_at": { + "type": "date" + }, + "indexed_at": { + "type": "date" + }, + "confirmed": { + "type": "boolean" + }, + "blocked_at": { + "type": "date" + }, + "blocked": { + "type": "boolean" + }, + "verified_at": { + "type": "date" + }, + "verified": { + "type": "boolean" + }, + "username": { + "type": "text", + "analyzer": "edge_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "email": { + "type": "text", + "analyzer": "edge_analyzer", + "fields": { + "keyword": { + "type": "keyword" + } + } + }, + "email_hidden": { + "type": "keyword" + }, + "domain": { + "type": "keyword" + }, + "domaininfo": { + "properties": { + "status": { + "type": "integer" + }, + "flagged": { + "type": "boolean" + }, + "category": { + "type": "integer" + }, + "tld": { + "type": "keyword" + } + } + }, + "identities": { + "type": "object", + "properties": {}, + "dynamic": "true" + }, + "profile": { + "properties": { + "full_name": { + "type": "text", + "analyzer": "edge_analyzer" + }, + "affiliations": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword" + } + } + } + }, + "dynamic": "true" + }, + "preferences": { + "properties": { + "visibility": { + "type": "keyword" + }, + "email_visibility": { + "type": "keyword" + }, + "locale": { + "type": "keyword" + }, + "timezone": { + "type": "keyword" + }, + "notifications": { + "properties": { + "enabled": { + "type": "boolean" + } + } + } + }, + "dynamic": "true" + }, + "status": { + "type": "keyword" + }, + "visibility": { + "type": "keyword" + } + } + } +} diff --git a/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json b/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json index dec6f5d..d2c6864 100644 --- a/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json +++ b/invenio_users_resources/records/mappings/v7/users/user-v2.0.0.json @@ -1,25 +1,4 @@ { - "settings": { - "index": { - "max_ngram_diff": 2 - }, - "analysis": { - "analyzer": { - "ngram_3_to_5_analyzer": { - "type": "custom", - "tokenizer": "ngram_tokenizer_3_to_5", - "filter": ["lowercase"] - } - }, - "tokenizer": { - "ngram_tokenizer_3_to_5": { - "type": "ngram", - "min_gram": 3, - "max_gram": 5 - } - } - } - }, "mappings": { "dynamic": "strict", "dynamic_templates": [ @@ -96,22 +75,10 @@ "type": "boolean" }, "username": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer", - "fields": { - "keyword": { - "type": "keyword" - } - } + "type": "keyword" }, "email_hidden": { "type": "keyword" @@ -143,8 +110,7 @@ "profile": { "properties": { "full_name": { - "type": "text", - "analyzer": "ngram_3_to_5_analyzer" + "type": "text" }, "affiliations": { "type": "text", From 0202c61766baa149ba6ca5d0996f05dd25782517 Mon Sep 17 00:00:00 2001 From: Carlin MacKenzie Date: Mon, 22 Jul 2024 16:14:00 +0200 Subject: [PATCH 11/18] search: use email tokeniser as default search analyzer --- invenio_users_resources/records/api.py | 2 +- .../mappings/os-v1/users/user-v3.0.0.json | 25 +++++++++++++++---- .../mappings/os-v2/users/user-v3.0.0.json | 25 +++++++++++++++---- tests/services/users/test_service_users.py | 4 +-- 4 files changed, 43 insertions(+), 13 deletions(-) diff --git a/invenio_users_resources/records/api.py b/invenio_users_resources/records/api.py index d4d4141..6f49f90 100644 --- a/invenio_users_resources/records/api.py +++ b/invenio_users_resources/records/api.py @@ -131,7 +131,7 @@ class UserAggregate(BaseAggregate): ) """Search dumper with configured extensions.""" - index = IndexField("users-user-v2.0.0", search_alias="users") + index = IndexField("users-user-v3.0.0", search_alias="users") """The search engine index to use.""" id = ModelField("id", dump_type=int) diff --git a/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json index ad844a0..3408c0a 100644 --- a/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json +++ b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json @@ -2,19 +2,34 @@ "settings": { "analysis": { "analyzer": { - "edge_analyzer": { + "default": { + "type": "custom", + "tokenizer": "uax_url_email", + "filter": [ + "lowercase" + ] + }, + "default_search": { "type": "custom", - "tokenizer": "edge_ngram_tokenizer", + "tokenizer": "uax_url_email", "filter": [ "lowercase" ] + }, + "edge_analyzer": { + "tokenizer": "uax_url_email", + "filter": [ + "lowercase", + "edgegrams" + ] } }, - "tokenizer": { - "edge_ngram_tokenizer": { + "filter": { + "edgegrams": { "type": "edge_ngram", "min_gram": 2, - "max_gram": 20 + "max_gram": 20, + "preserve_original": true } } } diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json index ad844a0..3408c0a 100644 --- a/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json @@ -2,19 +2,34 @@ "settings": { "analysis": { "analyzer": { - "edge_analyzer": { + "default": { + "type": "custom", + "tokenizer": "uax_url_email", + "filter": [ + "lowercase" + ] + }, + "default_search": { "type": "custom", - "tokenizer": "edge_ngram_tokenizer", + "tokenizer": "uax_url_email", "filter": [ "lowercase" ] + }, + "edge_analyzer": { + "tokenizer": "uax_url_email", + "filter": [ + "lowercase", + "edgegrams" + ] } }, - "tokenizer": { - "edge_ngram_tokenizer": { + "filter": { + "edgegrams": { "type": "edge_ngram", "min_gram": 2, - "max_gram": 20 + "max_gram": 20, + "preserve_original": true } } } diff --git a/tests/services/users/test_service_users.py b/tests/services/users/test_service_users.py index 23766d4..7afe142 100644 --- a/tests/services/users/test_service_users.py +++ b/tests/services/users/test_service_users.py @@ -59,8 +59,8 @@ def test_search_public_users(user_service, user_pub): [ "email:res@inveniosoftware.org", "res@inveniosoftware.org", - 'email:"pubres@inveniosoftware.org"', - '"pubres@inveniosoftware.org"', + "email:pubres@inveniosoftware.org", + "pubres@inveniosoftware.org", "Plazi", "+name:Jose -affiliation:CERN", "name:Jose AND NOT affiliation:CERN", From 86b4d459283c331d4b71092a356e107b30142982 Mon Sep 17 00:00:00 2001 From: Carlin MacKenzie Date: Tue, 23 Jul 2024 17:08:23 +0200 Subject: [PATCH 12/18] tests: add new test case which failed previously --- tests/services/users/test_service_users.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/services/users/test_service_users.py b/tests/services/users/test_service_users.py index 7afe142..76f51d7 100644 --- a/tests/services/users/test_service_users.py +++ b/tests/services/users/test_service_users.py @@ -89,6 +89,7 @@ def test_search_field_not_searchable(user_service, user_pub, query): "Tim", "Tim CERN", "Jose", + "Jos", "Jose CERN", "email:pub@inveniosoftware.org", "username:pub", From a481568a31182d0d9e6d1f3c689daa2c7b639947 Mon Sep 17 00:00:00 2001 From: Saksham Date: Mon, 12 Aug 2024 10:47:34 +0200 Subject: [PATCH 13/18] users: mappings: Add accent analyzing --- .../mappings/os-v1/users/user-v3.0.0.json | 55 +++++++++++++------ .../mappings/os-v2/users/user-v3.0.0.json | 55 +++++++++++++------ 2 files changed, 74 insertions(+), 36 deletions(-) diff --git a/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json index 3408c0a..687862a 100644 --- a/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json +++ b/invenio_users_resources/records/mappings/os-v1/users/user-v3.0.0.json @@ -1,35 +1,54 @@ { "settings": { "analysis": { + "char_filter": { + "strip_special_chars": { + "type": "pattern_replace", + "pattern": "[\\p{Punct}\\p{S}]", + "replacement": "" + } + }, "analyzer": { - "default": { - "type": "custom", + "edge_analyzer": { "tokenizer": "uax_url_email", + "type": "custom", "filter": [ - "lowercase" + "lowercase", + "edgegrams" ] }, - "default_search": { - "type": "custom", + "accent_edge_analyzer": { "tokenizer": "uax_url_email", + "type": "custom", "filter": [ - "lowercase" + "lowercase", + "asciifolding", + "edgegrams" ] }, - "edge_analyzer": { - "tokenizer": "uax_url_email", + "accent_analyzer": { + "tokenizer": "standard", + "type": "custom", + "char_filter": ["strip_special_chars"], "filter": [ "lowercase", - "edgegrams" + "asciifolding" ] } }, "filter": { + "lowercase": { + "type": "lowercase", + "preserve_original": true + }, + "asciifolding": { + "type": "asciifolding", + "preserve_original": true + }, "edgegrams": { "type": "edge_ngram", "min_gram": 2, - "max_gram": 20, - "preserve_original": true + "max_gram": 20 } } } @@ -111,7 +130,8 @@ }, "username": { "type": "text", - "analyzer": "edge_analyzer", + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer", "fields": { "keyword": { "type": "keyword" @@ -121,6 +141,7 @@ "email": { "type": "text", "analyzer": "edge_analyzer", + "search_analyzer": "standard", "fields": { "keyword": { "type": "keyword" @@ -158,15 +179,13 @@ "properties": { "full_name": { "type": "text", - "analyzer": "edge_analyzer" + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer" }, "affiliations": { "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer" } }, "dynamic": "true" diff --git a/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json index 3408c0a..687862a 100644 --- a/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json +++ b/invenio_users_resources/records/mappings/os-v2/users/user-v3.0.0.json @@ -1,35 +1,54 @@ { "settings": { "analysis": { + "char_filter": { + "strip_special_chars": { + "type": "pattern_replace", + "pattern": "[\\p{Punct}\\p{S}]", + "replacement": "" + } + }, "analyzer": { - "default": { - "type": "custom", + "edge_analyzer": { "tokenizer": "uax_url_email", + "type": "custom", "filter": [ - "lowercase" + "lowercase", + "edgegrams" ] }, - "default_search": { - "type": "custom", + "accent_edge_analyzer": { "tokenizer": "uax_url_email", + "type": "custom", "filter": [ - "lowercase" + "lowercase", + "asciifolding", + "edgegrams" ] }, - "edge_analyzer": { - "tokenizer": "uax_url_email", + "accent_analyzer": { + "tokenizer": "standard", + "type": "custom", + "char_filter": ["strip_special_chars"], "filter": [ "lowercase", - "edgegrams" + "asciifolding" ] } }, "filter": { + "lowercase": { + "type": "lowercase", + "preserve_original": true + }, + "asciifolding": { + "type": "asciifolding", + "preserve_original": true + }, "edgegrams": { "type": "edge_ngram", "min_gram": 2, - "max_gram": 20, - "preserve_original": true + "max_gram": 20 } } } @@ -111,7 +130,8 @@ }, "username": { "type": "text", - "analyzer": "edge_analyzer", + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer", "fields": { "keyword": { "type": "keyword" @@ -121,6 +141,7 @@ "email": { "type": "text", "analyzer": "edge_analyzer", + "search_analyzer": "standard", "fields": { "keyword": { "type": "keyword" @@ -158,15 +179,13 @@ "properties": { "full_name": { "type": "text", - "analyzer": "edge_analyzer" + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer" }, "affiliations": { "type": "text", - "fields": { - "keyword": { - "type": "keyword" - } - } + "analyzer": "accent_edge_analyzer", + "search_analyzer": "accent_analyzer" } }, "dynamic": "true" From 5080d8eb185620e5e5c0a67f2e6bddb09b773845 Mon Sep 17 00:00:00 2001 From: Saksham Date: Fri, 16 Aug 2024 17:17:11 +0200 Subject: [PATCH 14/18] tests: Update tests for users and admin --- .../services/users/config.py | 6 +- tests/services/users/test_service_users.py | 63 ++++++++++++------- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/invenio_users_resources/services/users/config.py b/invenio_users_resources/services/users/config.py index 2b8d00e..b280747 100644 --- a/invenio_users_resources/services/users/config.py +++ b/invenio_users_resources/services/users/config.py @@ -29,6 +29,7 @@ FieldValueMapper, QueryParser, SearchFieldTransformer, + SuggestQueryParser, ) from luqum.tree import Word @@ -67,7 +68,7 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): # The user search needs to be highly restricted to avoid leaking # account information, hence do not edit here unless you are # absolutely sure what you're doing. - query_parser_cls = QueryParser.factory( + suggest_parser_cls = SuggestQueryParser.factory( tree_transformer_cls=SearchFieldTransformer, fields=["username^2", "email^2", "profile.full_name^3", "profile.affiliations"], # Only public emails because hidden emails are stored in email_hidden field. @@ -79,6 +80,8 @@ class UserSearchOptions(SearchOptions, SearchOptionsMixin): "fullname": "profile.full_name", "name": "profile.full_name", }, + type="most_fields", # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-multi-match-query.html#multi-match-types + fuzziness="AUTO", # https://www.elastic.co/guide/en/elasticsearch/reference/current/common-options.html#fuzziness ) params_interpreters_cls = [ @@ -104,6 +107,7 @@ class AdminUserSearchOptions(UserSearchOptions): "email_hidden^3", "domain^2", "profile.full_name^3", + "profile.affiliations", ], allow_list=[ "id", diff --git a/tests/services/users/test_service_users.py b/tests/services/users/test_service_users.py index 76f51d7..0dff1dd 100644 --- a/tests/services/users/test_service_users.py +++ b/tests/services/users/test_service_users.py @@ -54,50 +54,67 @@ def test_search_public_users(user_service, user_pub): assert res["hits"]["total"] == 2 # 2 public users in conftest +# Admin search +@pytest.mark.parametrize( + "query", + [ + "affiliations:CERN", + "affiliation:CERN", + "name:Jose affiliation:CERN", + "+name:Jose +affiliation:CERN", + "CERN", + "Tim", + "Tim CERN", + "Jose", + "Jos", + "Jose CERN", + "email:pub@inveniosoftware.org", + "username:pub", + ], +) +def test_admin_search_field(user_service, user_moderator, query): + """Make sure certain fields ARE searchable.""" + res = user_service.search_all(user_moderator.identity, q=query).to_dict() + assert res["hits"]["total"] > 0 + + +# User search @pytest.mark.parametrize( "query", [ - "email:res@inveniosoftware.org", "res@inveniosoftware.org", - "email:pubres@inveniosoftware.org", "pubres@inveniosoftware.org", "Plazi", - "+name:Jose -affiliation:CERN", - "name:Jose AND NOT affiliation:CERN", - "username:inactive", - "username:unconfirmed", - "preferences.visibility:public", - "preferences.email_visibility:restricted", - "profile.affiliations:Plazi", - "invalid:test", + "inactive", + "unconfirmed", + "restricted", + "Plazi", + "test", ], ) -def test_search_field_not_searchable(user_service, user_pub, query): +def test_user_search_field_not_searchable(user_service, user_pub, query): """Make sure certain fields are NOT searchable.""" - res = user_service.search(user_pub.identity, q=query).to_dict() + res = user_service.search(user_pub.identity, suggest=query).to_dict() assert res["hits"]["total"] == 0 @pytest.mark.parametrize( "query", [ - "affiliations:CERN", - "affiliation:CERN", - "name:Jose affiliation:CERN", - "+name:Jose +affiliation:CERN", "CERN", + "Jose CERN", + "Jose AND CERN", "Tim", "Tim CERN", "Jose", "Jos", - "Jose CERN", - "email:pub@inveniosoftware.org", - "username:pub", + "pub@inveniosoftware.org", + "pub", ], ) -def test_search_field(user_service, user_pub, query): +def test_user_search_field(user_service, user_pub, query): """Make sure certain fields ARE searchable.""" - res = user_service.search(user_pub.identity, q=query).to_dict() + res = user_service.search(user_pub.identity, suggest=query).to_dict() assert res["hits"]["total"] > 0 @@ -162,7 +179,9 @@ def test_search_permissions(app, db, user_service, user_moderator, user_res): """Test service search for permissions.""" # User can search for himself search = user_service.search( - user_res.identity, q=f"username:{user_res._user.username}" + user_res.identity, + q=user_res._user.username, + fields=["username"], ) assert search.total > 0 From b609e190c4dc4068cbf3e6096687808753086a34 Mon Sep 17 00:00:00 2001 From: Nicola Tarocco Date: Thu, 22 Aug 2024 16:42:13 +0200 Subject: [PATCH 15/18] release: v6.0.0 --- CHANGES.rst | 4 ++++ invenio_users_resources/__init__.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9433e93..0ea1e3d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,10 @@ Changes ======= +Version 6.0.0 (released 2024-08-22) + +- mappings: add analyzers and filters to improve results when searching users + Version 5.4.0 (released 2024-08-09) - resources: use and adjust vnd.inveniordm.v1+json http accept header diff --git a/invenio_users_resources/__init__.py b/invenio_users_resources/__init__.py index 0b6fdde..5393266 100644 --- a/invenio_users_resources/__init__.py +++ b/invenio_users_resources/__init__.py @@ -10,6 +10,6 @@ """Invenio module providing management APIs for users and roles/groups.""" -__version__ = "5.4.0" +__version__ = "6.0.0" __all__ = ("__version__",) From 22b30a196e354f97cbebf654e4d8bccc14676c16 Mon Sep 17 00:00:00 2001 From: Sam Arbid Date: Mon, 5 Aug 2024 13:28:52 +0200 Subject: [PATCH 16/18] CI: Switch To Centralised Workflows * Change branches type from string to array * Switch to centralised workflows --- .github/workflows/tests.yml | 64 ++++++------------------------------- 1 file changed, 9 insertions(+), 55 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8039854..aadccd8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,6 +3,7 @@ # This file is part of Invenio. # Copyright (C) 2020 CERN. # Copyright (C) 2022 Graz University of Technology. +# Copyright (C) 2024 KTH Royal Institute of Technology. # # Invenio is free software; you can redistribute it and/or modify it # under the terms of the MIT License; see LICENSE file for more details. @@ -11,9 +12,11 @@ name: CI on: push: - branches: master + branches: + - master pull_request: - branches: master + branches: + - master schedule: # * is a special character in YAML so you have to quote this string - cron: "0 3 * * 6" @@ -25,56 +28,7 @@ on: default: "Manual trigger" jobs: - Tests: - runs-on: ubuntu-20.04 - strategy: - matrix: - # You can add/remove combinations e.g. `dev` requirements or `postgresql13` by adding - # a new item to the following lists. - # You can see the complete list of services and versions that are available at: - # https://docker-services-cli.readthedocs.io/en/latest/configuration.html - python-version: [3.8, 3.9] - requirements-level: [pypi] - cache-service: [redis] - db-service: [postgresql14] - mq-service: [rabbitmq] - search-service: [opensearch2] - - env: - CACHE: ${{ matrix.cache-service }} - DB: ${{ matrix.db-service }} - MQ: ${{ matrix.mq-service }} - SEARCH: ${{ matrix.search-service }} - EXTRAS: tests,${{matrix.search-service}} - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Generate dependencies - run: | - pip install wheel requirements-builder - requirements-builder -e "$EXTRAS" --level=${{ matrix.requirements-level }} setup.py > .${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt - - - name: Cache pip - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('.${{ matrix.requirements-level }}-${{ matrix.python-version }}-requirements.txt') }} - - - name: Install dependencies - run: | - pip install -r .${{matrix.requirements-level}}-${{ matrix.python-version }}-requirements.txt - pip install ".[$EXTRAS]" - pip freeze - docker --version - docker compose --version - - - name: Run tests - run: | - ./run-tests.sh + Python: + uses: inveniosoftware/workflows/.github/workflows/tests-python.yml@master + with: + extras: "tests" \ No newline at end of file From c07097b2203c8c34096692f98fe099d826ebb559 Mon Sep 17 00:00:00 2001 From: Saksham Date: Tue, 20 Aug 2024 18:09:44 +0200 Subject: [PATCH 17/18] services: permissions: Use search_all config for admin user search --- invenio_users_resources/services/permissions.py | 6 +++++- invenio_users_resources/services/users/service.py | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/invenio_users_resources/services/permissions.py b/invenio_users_resources/services/permissions.py index e759470..451331e 100644 --- a/invenio_users_resources/services/permissions.py +++ b/invenio_users_resources/services/permissions.py @@ -38,7 +38,11 @@ class UsersPermissionPolicy(BasePermissionPolicy): IfPublicUser(then_=[AnyUser()], else_=[Self()]), SystemProcess(), ] - can_search = [AuthenticatedUser(), SystemProcess()] + can_search = [ + AuthenticatedUser(), + IfPublicUser(then_=[AuthenticatedUser()], else_=[Self()]), + SystemProcess(), + ] can_update = [SystemProcess()] can_delete = [SystemProcess()] diff --git a/invenio_users_resources/services/users/service.py b/invenio_users_resources/services/users/service.py index 970c431..a4f47bc 100644 --- a/invenio_users_resources/services/users/service.py +++ b/invenio_users_resources/services/users/service.py @@ -66,7 +66,7 @@ def create(self, identity, data, raise_errors=True, uow=None): ) def search(self, identity, params=None, search_preference=None, **kwargs): - """Search for active and confirmed users, matching the querystring.""" + """Search for active and confirmed users, matching the query.""" return super().search( identity, params=params, @@ -91,6 +91,7 @@ def search_all( params=params, search_preference=search_preference, search_opts=self.config.search_all, + permission_action="search_all", extra_filter=extra_filters, **kwargs, ) From 19433930ac9581b024db71136a643833255d99af Mon Sep 17 00:00:00 2001 From: Saksham Date: Tue, 27 Aug 2024 12:20:16 +0200 Subject: [PATCH 18/18] users: services: Add can_read_all permission for admin search --- invenio_users_resources/config.py | 2 +- invenio_users_resources/services/permissions.py | 7 ++----- invenio_users_resources/services/users/service.py | 2 +- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/invenio_users_resources/config.py b/invenio_users_resources/config.py index 89298d3..a81b719 100644 --- a/invenio_users_resources/config.py +++ b/invenio_users_resources/config.py @@ -62,7 +62,7 @@ ), "username": dict( title=_("Username"), - fields=["username", "-created"], + fields=["username.keyword", "-created"], ), "email": dict( title=_("Email"), diff --git a/invenio_users_resources/services/permissions.py b/invenio_users_resources/services/permissions.py index 451331e..faab61d 100644 --- a/invenio_users_resources/services/permissions.py +++ b/invenio_users_resources/services/permissions.py @@ -38,11 +38,7 @@ class UsersPermissionPolicy(BasePermissionPolicy): IfPublicUser(then_=[AnyUser()], else_=[Self()]), SystemProcess(), ] - can_search = [ - AuthenticatedUser(), - IfPublicUser(then_=[AuthenticatedUser()], else_=[Self()]), - SystemProcess(), - ] + can_search = [AuthenticatedUser(), SystemProcess()] can_update = [SystemProcess()] can_delete = [SystemProcess()] @@ -52,6 +48,7 @@ class UsersPermissionPolicy(BasePermissionPolicy): SystemProcess(), ] can_read_details = [UserManager, Self(), SystemProcess()] + can_read_all = [UserManager, SystemProcess()] # Moderation permissions can_manage = [UserManager, SystemProcess()] diff --git a/invenio_users_resources/services/users/service.py b/invenio_users_resources/services/users/service.py index a4f47bc..3b69af2 100644 --- a/invenio_users_resources/services/users/service.py +++ b/invenio_users_resources/services/users/service.py @@ -91,7 +91,7 @@ def search_all( params=params, search_preference=search_preference, search_opts=self.config.search_all, - permission_action="search_all", + permission_action="read_all", extra_filter=extra_filters, **kwargs, )