From 047429d1c58b89c07be01c7a35064ce32aa0858d Mon Sep 17 00:00:00 2001 From: Alberto Accomazzi Date: Thu, 11 Aug 2022 16:05:52 -0400 Subject: [PATCH] unique DOIs in the SOLR document (#274) --- aip/classic/solr_adapter.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/aip/classic/solr_adapter.py b/aip/classic/solr_adapter.py index df12f49..db02437 100644 --- a/aip/classic/solr_adapter.py +++ b/aip/classic/solr_adapter.py @@ -340,7 +340,7 @@ def _doctype_facet_hier(ADS_record): @staticmethod def _doi(ADS_record): result = [i['content'] for i in ADS_record['metadata']['general'].get('doi', [])] - return {'doi': result} + return {'doi': case_insensitive_unique_list(result)} @staticmethod def _eid(ADS_record): @@ -580,6 +580,17 @@ def validate(cls, solr_record): assert len(set([type(i) for i in v])) == 1, "{0}: multiple data-types in list: {1}".format(k, v) assert isinstance(v[0], type(SCHEMA[k][0])), "{0}: inner list element has unexpected type ({1}!={2}): {3}".format(k, type(v[0]), type(SCHEMA[k][0]), v) +def case_insensitive_unique_list(array): + """ + Returns the list of unique elements in the input array + in a case-insensitive way, preserving order and case + """ + seen, result = set(), [] + for item in array: + if item.lower() not in seen: + seen.add(item.lower()) + result.append(item) + return result def unroll_unique_list(array): """