Skip to content

Commit

Permalink
Add logic to check Wikidata parents and exclude subsidiaries with ide…
Browse files Browse the repository at this point in the history
…ntical IATA codes #1439
  • Loading branch information
jpatokal committed Sep 6, 2023
1 parent 2f7ca77 commit 40b07f5
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 11 deletions.
21 changes: 19 additions & 2 deletions tools/update_airlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,12 +142,23 @@ def diff(self, of, wp):

fields = {}
for field in ['name', 'callsign', 'icao', 'iata', 'source', 'country', 'country_code', 'start_date', 'end_date', 'duplicate']:
# Do we have a new value?
if field in wp and wp[field] and wp[field] != of[field]:

# Is the change more than just case?
if not of[field] or str(wp[field]).upper() != str(of[field]).upper():

# Only alter main name if it's not an abbreviation
if field != 'name' or len(wp[field]) > 3:

# Reliable sources can overwrite, unreliable ones can only append
if reliable or not of[field]:
fields[field] = wp[field]

# Special case: If it's a new name, retain old name as alias
if field == 'name':
fields['alias'] = of['name']

# Special case: Only override activeness if new source says airline is defunct
if of['active'] == 'Y' and wp['active'] == 'N':
fields['active'] = 'N'
Expand Down Expand Up @@ -266,15 +277,21 @@ class Wikidata(object):
def load(self, filename):
with open(filename, 'rb') as csvfile:
reader = unicodecsv.DictReader(csvfile, delimiter=',')
# entity,airlineLabel,iata,icao,callsign,countryLabel,countryIso,startDate,endDate
# entity,airlineLabel,iata,icao,parentIata,parentIcao,callsign,countryLabel,countryIso,startDate,endDate
self.parse(reader)

def parse(self, rows):
for airline in rows:
# Ignore airlines where the name is just the Wikidata entity code
if airline['entity'].rsplit('/', 1)[1] == airline['airlineLabel']:
print(". IGNORE[WIKIDATA] %s" % airline)
print(". FILTER[WIKIDATA:NONAME] %s" % airline)
continue

# Ignore subsidiaries where the IATA code is identical to parent (commuter brands, cargo operations etc)
if airline['iata'] == airline.get('parentIata'):
print(". FILTER[WIKIDATA:SUBSIDIARY] %s" % airline)
continue

start_date, end_date = airline['startDate'], airline['endDate']
if start_date and not end_date:
active = 'Y'
Expand Down
17 changes: 11 additions & 6 deletions tools/update_airlines_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@ def testExactMatchLessReliableSourceUpdateNulls(self):
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Åland Lol Airlines', 'callsign': 'NEWXA', 'country': 'Åland', 'source': 'User'}
self.assertOnlyChange(wp, diff={'callsign': 'NEWXA'})

def testNameChange(self):
def testNameChangeKeepingOldNameAsAlias(self):
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Ahvenanmaa Airlines', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata'}
self.assertOnlyChange(wp, diff={'name': 'Ahvenanmaa Airlines'})
self.assertOnlyChange(wp, diff={'name': 'Ahvenanmaa Airlines', 'alias': 'Åland Airlines'})

def testCountryChange(self):
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Åland Airlines', 'callsign': 'ALAXA', 'country': 'Ahvenanmaa', 'source': 'Wikidata'}
Expand All @@ -80,12 +80,12 @@ def testCountryCodeChange(self):

def testActiveToInactiveChange(self):
self.of['active'] = 'Y'
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Ahvenanmaa Airlines', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata', 'active': 'N', 'end_date': '2019-01-01'}
self.assertOnlyChange(wp, diff={'name': 'Ahvenanmaa Airlines', 'active': 'N', 'end_date': '2019-01-01'})
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Åland Airlines', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata', 'active': 'N', 'end_date': '2019-01-01'}
self.assertOnlyChange(wp, diff={'active': 'N', 'end_date': '2019-01-01'})

def testIgnoreInactiveToActiveChange(self):
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Ahvenanmaa Airlines', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata', 'active': 'N', 'end_date': '2019-01-01'}
self.assertOnlyChange(wp, diff={'name': 'Ahvenanmaa Airlines', 'end_date': '2019-01-01'})
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Åland Airlines', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata', 'active': 'N', 'end_date': '2019-01-01'}
self.assertOnlyChange(wp, diff={'end_date': '2019-01-01'})

def testIgnoreCaseChange(self):
wp = {'icao': 'ABC', 'iata': 'AB', 'name': 'Åland AIRLINES', 'callsign': 'ALAXA', 'country': 'Åland', 'source': 'Wikidata'}
Expand Down Expand Up @@ -220,6 +220,11 @@ def testWikidataIgnoreIfNameEqualsEntity(self):
self.wd.parse([wiki])
self.assertEqual(self.wd.airlines, [])

def testWikidataIgnoreIfSameIATAAsParent(self):
wiki = {'entity': 'http://www.wikidata.org/entity/Q3487216', 'airlineLabel': 'Abba Airlines', 'iata': 'TF', 'icao': '', 'parentIata': 'TF', 'callsign': '', 'countryLabel': 'Sweden', 'countryIso': 'SE', 'startDate': '2023-01-01', 'endDate': ''}
self.wd.parse([wiki])
self.assertEqual(self.wd.airlines, [])

#
# Processing tests
#
Expand Down
8 changes: 5 additions & 3 deletions tools/wikidata-airlines.sparql
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
# SPARQL query for extracting airline data from Wikidata
# Execute at https://query.wikidata.org/ and download results as CSV

SELECT (?airline as ?entity) ?airlineLabel ?iata ?icao ?callsign ?countryLabel ?countryIso
SELECT (?airline as ?entity) ?airlineLabel ?iata ?icao ?parentIata ?parentIcao ?callsign ?countryLabel ?countryIso
(xsd:date(?start) AS ?startDate) (xsd:date(?end) AS ?endDate) # format as ISO dates
WHERE
{
?airline wdt:P31 wd:Q46970 . # instance of airline
?airline wdt:P229 ?iata . # IATA code not optional
OPTIONAL{?airline wdt:P230 ?icao .}
OPTIONAL{?airline wdt:P749 ?parent .
?parent wdt:P229 ?parentIata .
?parent wdt:P230 ?parentIcao }
OPTIONAL{?airline wdt:P571 ?start .}
OPTIONAL{?airline wdt:P576 ?end .}
OPTIONAL { ?airline wdt:P17 ?country .
?country wdt:P297 ?countryIso }
OPTIONAL{?airline wdt:P432 ?callsign .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}

0 comments on commit 40b07f5

Please sign in to comment.