From 702c8bd71bdb492d5bb02b5512ba8ff5b9bcdd24 Mon Sep 17 00:00:00 2001 From: mugdhapolimera <35502000+mugdhapolimera@users.noreply.github.com> Date: Wed, 31 Jan 2024 11:46:59 -0500 Subject: [PATCH] Add publisher information to record (#286) * add publisher information to record * removed extra lines of testing code --------- Co-authored-by: Mugdha Polimera --- aip/classic/enforce_schema.py | 1 + aip/classic/read_records.py | 2 -- aip/classic/solr_adapter.py | 5 ++++ tests/classic/test_read_records.py | 11 +++++---- tests/classic/test_solr_adapter.py | 23 +++++++++++++++---- .../ADSRecords/2009AAS...21320006C.classic | 2 +- tests/stubdata/ADSRecords/testbibcode | 1 + tests/stubdata/ADSRecords/testbibcode2 | 1 + tests/stubdata/mergerdata.py | 4 +++- 9 files changed, 38 insertions(+), 12 deletions(-) diff --git a/aip/classic/enforce_schema.py b/aip/classic/enforce_schema.py index b29e989..c31b5ac 100644 --- a/aip/classic/enforce_schema.py +++ b/aip/classic/enforce_schema.py @@ -228,6 +228,7 @@ def _generalEnforcer(self,block): r['publication']['page_count'] = g('number_pages') r['publication']['electronic_id'] = g('electronic_id') r['publication']['altbibcode'] = g('bibcode') + r['publication']['publisher'] = g('publisher') r['publication']['name'] = { 'raw': g('journal'), 'canonical': g('canonical_journal'), diff --git a/aip/classic/read_records.py b/aip/classic/read_records.py index 9060c77..c20409f 100644 --- a/aip/classic/read_records.py +++ b/aip/classic/read_records.py @@ -1,8 +1,6 @@ import os,sys -import json import time import timeout_decorator - import xmltodict from aip.classic import enforce_schema try: diff --git a/aip/classic/solr_adapter.py b/aip/classic/solr_adapter.py index 1cb9175..e33b8a5 100644 --- a/aip/classic/solr_adapter.py +++ b/aip/classic/solr_adapter.py @@ -95,6 +95,7 @@ class SolrAdapter(object): 'pubnote': [u'',], 'pub_raw': u'', 'pubdate': u'', + 'publisher': u'', 'recid': 0, 'series': u'', 'thesis': u'', @@ -472,6 +473,10 @@ def _pubdate(ADS_record): result = get_date_by_datetype(ADS_record) return {'pubdate':result} + @staticmethod + def _publisher(ADS_record): + return {'publisher': ADS_record['metadata']['general'].get('publication', {}).get('publisher')} + @staticmethod def _pubnote(ADS_record): result = [i['content'] for i in ADS_record['metadata']['general'].get('pubnote',[])] diff --git a/tests/classic/test_read_records.py b/tests/classic/test_read_records.py index 9bd4b4d..c9729b4 100644 --- a/tests/classic/test_read_records.py +++ b/tests/classic/test_read_records.py @@ -3,7 +3,6 @@ import mock from collections import OrderedDict from tests.stubdata import ADSRECORDS - if '/proj/ads/soft/python/lib/site-packages' not in sys.path: sys.path.append('/proj/ads/soft/python/lib/site-packages') @@ -131,7 +130,9 @@ def test_readRecordsFromADSExports(self): mock.patch.object(adsrecord, 'export', return_value=adsrecord), \ mock.patch('aip.classic.read_records.xml_to_dict', return_value=ADSRECORDS[u'2009AAS...21320006C.classic']): results = read_records.readRecordsFromADSExports([(u'2009AAS...21320006C', 'fingerprint')]) - self.assertDictContainsSubset({'JSON_fingerprint': 'fingerprint', + self.assertDictContainsSubset( +{ + 'JSON_fingerprint': 'fingerprint', 'bibcode': u'2009AAS...21320006C', 'entry_date': u'2009-01-03', 'metadata': [{'abstracts': [{'lang': u'en', @@ -166,7 +167,8 @@ def test_readRecordsFromADSExports(self): 'page_count': None, 'page_last': None, 'page_range': None, - 'volume': u'213'}, + 'volume': u'213', + 'publisher': u'Publisher'}, 'pubnote': [], 'tempdata': {'alternate_journal': False, 'modtime': u'2016-01-21T23:19:13Z', @@ -205,7 +207,8 @@ def test_readRecordsFromADSExports(self): 'page_count': None, 'page_last': None, 'page_range': u'187', - 'volume': u'41'}, + 'volume': u'41', + 'publisher': None}, 'pubnote': [], 'tempdata': {'alternate_journal': True, 'modtime': u'2016-01-21T23:20:41Z', diff --git a/tests/classic/test_solr_adapter.py b/tests/classic/test_solr_adapter.py index a75aa16..ef24d18 100644 --- a/tests/classic/test_solr_adapter.py +++ b/tests/classic/test_solr_adapter.py @@ -5,7 +5,6 @@ from tests.stubdata import ADSRECORDS from adsputils import get_date - class TestSolrAdapter(unittest.TestCase): def setUp(self): self.maxDiff = None @@ -62,6 +61,7 @@ def test_SolrAdapter(self): 'page': [u'2056-2078'], 'page_count': 0, 'pubdate': u'2013-08-05', + 'publisher': u'Publisher', 'title': [u'This is of the title', u'This is of the alternate'], 'volume': u'l24'}) @@ -109,7 +109,8 @@ def test_SolrAdapter(self): u"Accomazzi, A", u"Grant, C", u"Murray, S", u"Watson, J"], "bibstem_facet": u"A&AS", "pub": u"Astronomy and Astrophysics Supplement Series", - "volume": u"143", + "volume": u"143", + "publisher": u"Publisher", "doi": [u"10.1051/aas:2000170"], "eid": u"here is the eid", "author_norm": [u"Kurtz, M", u"Eichhorn, G", u"Accomazzi, A", u"Grant, C", @@ -154,6 +155,7 @@ def test_SolrAdapter(self): "general" : { "publication" : { "origin" : u"ARXIV", + "publisher": u"Publisher", 'dates' : [ { 'type' : u'date-preprint', @@ -205,6 +207,7 @@ def test_SolrAdapter(self): 'author_count': 2, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'2014arXiv1406.4542H'], + "publisher": u"Publisher", }) # test addition of type = collaboration, which should act similar to type = regular @@ -225,6 +228,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ARXIV", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-preprint', @@ -276,6 +280,7 @@ def test_SolrAdapter(self): 'author_count': 2, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'2014arXiv1406.4542H'], + "publisher": u"Publisher", }) # test identifer arXiv with bibcode @@ -303,6 +308,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ARXIV", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-preprint', @@ -343,6 +349,7 @@ def test_SolrAdapter(self): 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'2018arXiv180710779B', u'arXiv:1807.10779'], + "publisher": u"Publisher", }) # test identifer arXiv with bibcode, different kind of arXiv id, plus made sure links are captured properly @@ -386,6 +393,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ARXIV", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-preprint', @@ -424,7 +432,7 @@ def test_SolrAdapter(self): 'page_count': 0, 'author_count': 1, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], - + "publisher": u"Publisher", 'identifier': [u'1968NuPhB...7...79F', u'arXiv:quant-ph/0206057'], 'links_data': [ u'{"access": "open", "instances": "", "title": "", "type": "preprint", "url": "http://arxiv.org/abs/quant-ph/0206057"}', @@ -458,6 +466,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ASCL", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-published', @@ -496,7 +505,7 @@ def test_SolrAdapter(self): 'page_count': 0, 'author_count': 1, 'doctype_facet_hier': ["0/Non-Article", "1/Non-Article/Software"], - + "publisher": u"Publisher", 'identifier': [u'ascl:1802.007', u'2018ascl.soft02007G'], }) @@ -518,6 +527,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"JSTOR", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-published', @@ -562,6 +572,7 @@ def test_SolrAdapter(self): 'author_count': 1, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'1988Sci...240..668D'], + "publisher": u"Publisher", }) # check the entry_date updates - don't update if the timestamp is setup @@ -582,6 +593,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ARXIV", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-preprint', @@ -633,6 +645,7 @@ def test_SolrAdapter(self): 'author_count': 2, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'2014arXiv1406.4542H'], + "publisher": u"Publisher", }) # check the entry_date updates - don't update if the date is today @@ -655,6 +668,7 @@ def test_SolrAdapter(self): "general": { "publication": { "origin": u"ARXIV", + "publisher": u"Publisher", 'dates': [ { 'type': u'date-preprint', @@ -706,6 +720,7 @@ def test_SolrAdapter(self): 'author_count': 2, 'doctype_facet_hier': [u'0/Article', u'1/Article/Journal Article'], 'identifier': [u'2014arXiv1406.4542H'], + "publisher": u"Publisher", }) class TestBibstemMapper(unittest.TestCase): diff --git a/tests/stubdata/ADSRecords/2009AAS...21320006C.classic b/tests/stubdata/ADSRecords/2009AAS...21320006C.classic index 513832d..fd4f792 100644 --- a/tests/stubdata/ADSRecords/2009AAS...21320006C.classic +++ b/tests/stubdata/ADSRecords/2009AAS...21320006C.classic @@ -1 +1 @@ -OrderedDict([(u'records', OrderedDict([(u'record', OrderedDict([(u'@bibcode', u'2009AAS...21320006C'), (u'@entry_date', u'2009-01-03'), (u'metadata', [OrderedDict([(u'@origin', u'AAS'), (u'@type', u'general'), (u'@primary', u'True'), (u'@alternate_journal', u'False'), (u'creation_time', u'2016-01-21T23:19:13Z'), (u'modification_time', u'2016-01-21T23:19:13Z'), (u'bibcode', u'2009AAS...21320006C'), (u'dates', OrderedDict([(u'date', OrderedDict([(u'@type', u'date-published'), ('#text', u'2009-01-00')]))])), (u'publication_year', u'2009'), (u'title', u"A New Way of Looking: the Amateur Telescope Making Movement in 1920's America"), (u'abstract', u'The hobby of astronomy in America was restricted largely to a relatively few well-off persons prior to the 1920\'s in part due to the difficulty in acquiring adequate instruments. Even modest telescopes were quite expensive and very few in number. The standard "beginner\'s\u201d instrument was a three-inch diameter refracting telescope, precision crafted by expert manufacturers. Early Twentieth-century astronomy popularizers recognized the problem of availability of instruments and saw that this hindered growth of the hobby. The idea of making one\'s own telescope was limited to a hardy few with the time, equipment, machining skills, and information required and very few attempted the task. This situation changed dramatically by the late 1920\'s due to the publication of a series of articles in Scientific American that provided detailed, practical instructions for a six-inch Newtonian reflecting telescope, a project well within the means and skills of the average "handyman". Publication of these articles initiated a profound change in perception for amateur astronomers, who quickly became amateur telescope makers as well, creating precision instruments for themselves and in part leading to a widening of the amateur astronomy hobby and interest in astronomy generally. This paper forms a portion of a doctoral dissertation being written by the author.'), (u'author', OrderedDict([(u'@nr', u'1'), (u'name', OrderedDict([(u'western', u'Cameron, Gary L.'), (u'normalized', u'Cameron, G')])), (u'affiliations', OrderedDict([(u'affiliation', u'Iowa State University')])), (u'type', u'regular')])), (u'ADSaffiliation', u'AA(Iowa State University)'), (u'journal', u'American Astronomical Society, AAS Meeting #213, id.200.06; Bulletin of the American Astronomical Society, Vol. 41, p.187'), (u'canonical_journal', u'American Astronomical Society Meeting Abstracts #213'), (u'electronic_id', u'200'), (u'volume', u'213')]), OrderedDict([(u'@origin', u'AAS'), (u'@type', u'general'), (u'@primary', u'False'), (u'@alternate_journal', u'True'), (u'creation_time', u'2016-01-21T23:20:41Z'), (u'modification_time', u'2016-01-21T23:20:41Z'), (u'bibcode', u'2009BAAS...41..187C'), (u'dates', OrderedDict([(u'date', OrderedDict([(u'@type', u'date-published'), ('#text', u'2009-01-00')]))])), (u'publication_year', u'2009'), (u'title', u"A New Way of Looking: the Amateur Telescope Making Movement in 1920's America"), (u'author', OrderedDict([(u'@nr', u'1'), (u'name', OrderedDict([(u'western', u'Cameron, Gary L.'), (u'normalized', u'Cameron, G')])), (u'affiliations', OrderedDict([(u'affiliation', u'Iowa State University')])), (u'type', u'regular')])), (u'ADSaffiliation', u'AA(Iowa State University)'), (u'journal', u'American Astronomical Society, AAS Meeting #213, id.200.06; Bulletin of the American Astronomical Society, Vol. 41, p.187'), (u'volume', u'41'), (u'page_range', u'187'), (u'canonical_journal', u'Bulletin of the American Astronomical Society'), (u'page', u'187')]), OrderedDict([(u'@origin', u'ADS metadata'), (u'@type', u'properties'), (u'@primary', u'False'), (u'@alternate_journal', u'False'), (u'JSON_timestamp', u'{"abs":[{"p":"/proj/ads/abstracts/ast/text/J39/J39-71877.abs","primary":1,"t":"1453418353"},{"a":"2009BAAS...41..187C","p":"/proj/ads/abstracts/ast/text/S39/S39-71877.abs","t":"1453418441"}],"prop":["book","nonarticle","toc"]}'), (u'databases', OrderedDict([(u'database', u'AST')])), (u'pubtype', u'abstract'), (u'private', u'0'), (u'ocrabstract', u'0'), (u'preprint', None), (u'nonarticle', u'1'), (u'refereed', u'0'), (u'openaccess', u'0'), (u'eprint_openaccess', u'0'), (u'pub_openaccess', u'0'), (u'ads_openaccess', u'0'), (u'toc', u'1')]), OrderedDict([(u'@origin', u'ADS metadata'), (u'@type', u'relations'), (u'@primary', u'False'), (u'@alternate_journal', u'False'), (u'alternates', OrderedDict([(u'alternate', OrderedDict([(u'@type', u'alternate'), ('#text', u'2009BAAS...41..187C')]))])), (u'identifiers', OrderedDict([(u'identifier', OrderedDict([(u'@type', u'identifier'),('#text', u'2009AAS...21320006C')]))])), (u'links', OrderedDict([(u'link', OrderedDict([(u'@url', u'http://adsabs.harvard.edu/abs/2009AAS...21320006C'), (u'@type', u'ADSlink')]))]))])])]))]))]) \ No newline at end of file +OrderedDict([(u'records', OrderedDict([(u'record', OrderedDict([(u'@bibcode', u'2009AAS...21320006C'), (u'@entry_date', u'2009-01-03'), (u'metadata', [OrderedDict([(u'@origin', u'AAS'), (u'@type', u'general'), (u'@primary', u'True'), (u'@alternate_journal', u'False'), (u'creation_time', u'2016-01-21T23:19:13Z'), (u'modification_time', u'2016-01-21T23:19:13Z'), (u'bibcode', u'2009AAS...21320006C'), (u'dates', OrderedDict([(u'date', OrderedDict([(u'@type', u'date-published'), ('#text', u'2009-01-00')]))])), (u'publication_year', u'2009'), (u'publisher', u'Publisher'), (u'title', u"A New Way of Looking: the Amateur Telescope Making Movement in 1920's America"), (u'abstract', u'The hobby of astronomy in America was restricted largely to a relatively few well-off persons prior to the 1920\'s in part due to the difficulty in acquiring adequate instruments. Even modest telescopes were quite expensive and very few in number. The standard "beginner\'s\u201d instrument was a three-inch diameter refracting telescope, precision crafted by expert manufacturers. Early Twentieth-century astronomy popularizers recognized the problem of availability of instruments and saw that this hindered growth of the hobby. The idea of making one\'s own telescope was limited to a hardy few with the time, equipment, machining skills, and information required and very few attempted the task. This situation changed dramatically by the late 1920\'s due to the publication of a series of articles in Scientific American that provided detailed, practical instructions for a six-inch Newtonian reflecting telescope, a project well within the means and skills of the average "handyman". Publication of these articles initiated a profound change in perception for amateur astronomers, who quickly became amateur telescope makers as well, creating precision instruments for themselves and in part leading to a widening of the amateur astronomy hobby and interest in astronomy generally. This paper forms a portion of a doctoral dissertation being written by the author.'), (u'author', OrderedDict([(u'@nr', u'1'), (u'name', OrderedDict([(u'western', u'Cameron, Gary L.'), (u'normalized', u'Cameron, G')])), (u'affiliations', OrderedDict([(u'affiliation', u'Iowa State University')])), (u'type', u'regular')])), (u'ADSaffiliation', u'AA(Iowa State University)'), (u'journal', u'American Astronomical Society, AAS Meeting #213, id.200.06; Bulletin of the American Astronomical Society, Vol. 41, p.187'), (u'canonical_journal', u'American Astronomical Society Meeting Abstracts #213'), (u'electronic_id', u'200'), (u'volume', u'213')]), OrderedDict([(u'@origin', u'AAS'), (u'@type', u'general'), (u'@primary', u'False'), (u'@alternate_journal', u'True'), (u'creation_time', u'2016-01-21T23:20:41Z'), (u'modification_time', u'2016-01-21T23:20:41Z'), (u'bibcode', u'2009BAAS...41..187C'), (u'dates', OrderedDict([(u'date', OrderedDict([(u'@type', u'date-published'), ('#text', u'2009-01-00')]))])), (u'publication_year', u'2009'), (u'title', u"A New Way of Looking: the Amateur Telescope Making Movement in 1920's America"), (u'author', OrderedDict([(u'@nr', u'1'), (u'name', OrderedDict([(u'western', u'Cameron, Gary L.'), (u'normalized', u'Cameron, G')])), (u'affiliations', OrderedDict([(u'affiliation', u'Iowa State University')])), (u'type', u'regular')])), (u'ADSaffiliation', u'AA(Iowa State University)'), (u'journal', u'American Astronomical Society, AAS Meeting #213, id.200.06; Bulletin of the American Astronomical Society, Vol. 41, p.187'), (u'volume', u'41'), (u'page_range', u'187'), (u'canonical_journal', u'Bulletin of the American Astronomical Society'), (u'page', u'187')]), OrderedDict([(u'@origin', u'ADS metadata'), (u'@type', u'properties'), (u'@primary', u'False'), (u'@alternate_journal', u'False'), (u'JSON_timestamp', u'{"abs":[{"p":"/proj/ads/abstracts/ast/text/J39/J39-71877.abs","primary":1,"t":"1453418353"},{"a":"2009BAAS...41..187C","p":"/proj/ads/abstracts/ast/text/S39/S39-71877.abs","t":"1453418441"}],"prop":["book","nonarticle","toc"]}'), (u'databases', OrderedDict([(u'database', u'AST')])), (u'pubtype', u'abstract'), (u'private', u'0'), (u'ocrabstract', u'0'), (u'preprint', None), (u'nonarticle', u'1'), (u'refereed', u'0'), (u'openaccess', u'0'), (u'eprint_openaccess', u'0'), (u'pub_openaccess', u'0'), (u'ads_openaccess', u'0'), (u'toc', u'1')]), OrderedDict([(u'@origin', u'ADS metadata'), (u'@type', u'relations'), (u'@primary', u'False'), (u'@alternate_journal', u'False'), (u'alternates', OrderedDict([(u'alternate', OrderedDict([(u'@type', u'alternate'), ('#text', u'2009BAAS...41..187C')]))])), (u'identifiers', OrderedDict([(u'identifier', OrderedDict([(u'@type', u'identifier'),('#text', u'2009AAS...21320006C')]))])), (u'links', OrderedDict([(u'link', OrderedDict([(u'@url', u'http://adsabs.harvard.edu/abs/2009AAS...21320006C'), (u'@type', u'ADSlink')]))]))])])]))]))]) \ No newline at end of file diff --git a/tests/stubdata/ADSRecords/testbibcode b/tests/stubdata/ADSRecords/testbibcode index 5b9d40b..1743d8d 100644 --- a/tests/stubdata/ADSRecords/testbibcode +++ b/tests/stubdata/ADSRecords/testbibcode @@ -42,6 +42,7 @@ 'origin': '', 'page': u'2056-2078', 'page_count': 0, + 'publisher': u'Publisher', 'volume': u'l24'}, 'titles': [{'lang': u'en', 'text': u'This is of the title'}, {'lang': '', 'text': u'This is of the alternate'}]}, diff --git a/tests/stubdata/ADSRecords/testbibcode2 b/tests/stubdata/ADSRecords/testbibcode2 index 36e9aef..d772208 100644 --- a/tests/stubdata/ADSRecords/testbibcode2 +++ b/tests/stubdata/ADSRecords/testbibcode2 @@ -52,6 +52,7 @@ "page_count" : u"19", "page" : u"41", "volume" : u"143", + "publisher": u"Publisher", "page_range" : u"41-59", "electronic_id" : u"here is the eid", "issue" : None, diff --git a/tests/stubdata/mergerdata.py b/tests/stubdata/mergerdata.py index 6f84b7c..3c15a53 100644 --- a/tests/stubdata/mergerdata.py +++ b/tests/stubdata/mergerdata.py @@ -62,6 +62,7 @@ (u'electronic_id', u'428.16'), (u'publication_year', u'2010'), (u'volume', u'215'), + (u'publisher', u'Publisher'), (u'series', '123')]) PROPERTIES = OrderedDict([(u'@origin', u'ADS metadata'), @@ -263,7 +264,8 @@ 'page_last': None, 'page_range': None, 'series': '123', - 'volume': u'215'}, + 'volume': u'215', + 'publisher': 'Publisher'}, 'pubnote': [], 'tempdata': {'alternate_journal': False, 'modtime': u'2012-08-06T19:12:47',