From f015df30a76b8760caec10ebf7155fcf3736fd56 Mon Sep 17 00:00:00 2001 From: Jake Low Date: Mon, 9 Sep 2024 22:02:37 -0700 Subject: [PATCH] Use @osmcha/osmchange-parser and @osmcha/osm-changeset-xml-parser; remove lib/xml.js --- lib/get-changesets.js | 96 +++++++++++++++++++---------- lib/xml.js | 138 ------------------------------------------ package.json | 3 +- yarn.lock | 70 +++++---------------- 4 files changed, 82 insertions(+), 225 deletions(-) delete mode 100644 lib/xml.js diff --git a/lib/get-changesets.js b/lib/get-changesets.js index 789a2a1..5c31744 100644 --- a/lib/get-changesets.js +++ b/lib/get-changesets.js @@ -2,21 +2,23 @@ const _ = require('lodash'); const moment = require('moment'); -const { parseOsmChangeXml, parseChangesetXml, parseAugmentedDiff } = require('../lib/xml'); +const parseOsmChangeXml = require("@osmcha/osmchange-parser"); +const parseChangesetXml = require("@osmcha/osm-changeset-xml-parser"); +const parseAugmentedDiff = require("@osmcha/osm-adiff-parser"); + const { getStateForMinute } = require('../util/get-states'); const { request } = require('../util/request'); const { OSM_CHANGESET_API, CHANGE_STATES, - TYPES, OVERPASS_PRIMARY_URL, OVERPASS_SECONDARY_URL } = require('./constants'); const getChangesets = async (xml) => { - const jsonData = parseOsmChangeXml(xml); + const jsonData = await parseOsmChangeXml(xml); - if (!jsonData.osmChange || !jsonData.osmChange[0]) { + if (!jsonData) { throw new Error('OSM data missing from XML file'); } @@ -49,7 +51,7 @@ const getChangesets = async (xml) => { getDataParam( meta.created_at, meta.closed_at ? meta.closed_at : meta.created_at, - meta.open === 'false' + !meta.open ), getBboxParam(meta.bbox) ); @@ -91,7 +93,7 @@ const getChangesets = async (xml) => { const diffFeatureMap = {}; changesetIds.forEach((cid) => { if (featureMap[cid] && realFeatureMap[cid]) { - diffFeatureMap[cid] = _.difference(featureMap[cid], realFeatureMap[cid]); + diffFeatureMap[cid] = _.difference(featureMap[cid], realFeatureMap[cid].map(s => +s)); if (diffFeatureMap[cid].length) { results[cid].metadata.incomplete = true; console.log('# Incomplete changeset', cid); @@ -101,42 +103,37 @@ const getChangesets = async (xml) => { console.log('# Feature diff', JSON.stringify(diffFeatureMap)); + for (let realChangeset of Object.values(results)) { + makeBackwardsCompatible(realChangeset); + } + return results; }; -const parseJsonData = (jsonData) => { +const parseJsonData = (osmChange) => { const featureMap = {}; let timestamps = []; let changesetIds = []; - CHANGE_STATES.map((changeState) => { - if(jsonData.osmChange[0][changeState]) { - jsonData.osmChange[0][changeState].forEach((stateSection) => { - TYPES.forEach((type) => { - if(stateSection[type]) { - stateSection[type].forEach(({ timestamp, changeset, id }) => { - timestamps.push(timestamp); - changesetIds.push(changeset); - - if (!featureMap[changeset]) { - featureMap[changeset] = []; - } - featureMap[changeset].push(id); - }) - } - }); - }); + for (let changeState of CHANGE_STATES) { + if (osmChange[changeState]) { + for (let element of osmChange[changeState]) { + let { timestamp, changeset, id } = element; + timestamps.push(timestamp); + changesetIds.push(changeset); + + if (!featureMap[changeset]) { + featureMap[changeset] = []; + } + featureMap[changeset].push(id); + } } - }); + } timestamps = _.uniq(timestamps); changesetIds = _.uniq(changesetIds); - return { - timestamps, - changesetIds, - featureMap, - }; + return { timestamps, changesetIds, featureMap }; }; const getTimestampsStates = (timestamps) => { @@ -164,9 +161,9 @@ const queryOverpass = async (changesetId, data, bbox) => { const getChangesetMetadata = async (changesetId) => { try { const body = await request(`${OSM_CHANGESET_API}/${changesetId}`).then(res => res.text()); - const changesetData = parseChangesetXml(body); + const changesetData = await parseChangesetXml(body); - const meta = changesetData.osm[0].changeset[0]; + const meta = changesetData.changeset; const bbox = { left: meta.min_lon ? meta.min_lon : -180, @@ -232,6 +229,41 @@ const getBboxParam = (bbox) => { return [bbox.left, bbox.bottom, bbox.right, bbox.top].join(','); } +/* + * Convert some field types and object structures from the format returned by + * osm-changeset-xml-parser to the format expected by current consumers of the + * real-changesets dataset. + */ +const makeBackwardsCompatible = (realChangeset) => { + // these fields need to be converted from Numbers (or Booleans) to strings + let fields = [ + "metadata.min_lon", + "metadata.min_lat", + "metadata.max_lon", + "metadata.max_lat", + "metadata.bbox.left", + "metadata.bbox.bottom", + "metadata.bbox.right", + "metadata.bbox.top", + "metadata.id", + "metadata.uid", + "metadata.changes_count", + "metadata.comments_count", + "metadata.open", + ]; + + for (let field of fields) { + let value = _.get(realChangeset, field); + if (value !== undefined) { + _.set(realChangeset, field, JSON.stringify(value)); + } + } + + // tags need to be converted from { foo: "bar" } to [{ k: "foo", v: "bar" }] form + realChangeset.metadata.tag = Object.entries(realChangeset.metadata.tags).map(([k, v]) => ({ k, v })); + delete realChangeset.metadata.tags; +} + module.exports = { getChangesets, getChangesetMetadata, diff --git a/lib/xml.js b/lib/xml.js deleted file mode 100644 index b0cb944..0000000 --- a/lib/xml.js +++ /dev/null @@ -1,138 +0,0 @@ -'use strict'; - -const _ = require('lodash'); -const htmlparser = require('htmlparser2'); -const osmAdiffParser = require('@osmcha/osm-adiff-parser'); - -/* - * Parse osmChange XML format documented here: https://wiki.openstreetmap.org/wiki/OsmChange - * (Contains new versions of each modified element) - */ -const parseOsmChangeXml = (xmlString) => { - let buffer = {}; - let items = []; - let tempType = ''; - const json = {}; - - const opts = { - onopentag: (name, attr) => { - switch (name) { - case 'osmChange': - json[name] = [attr]; - break; - case 'modify': - case 'delete': - case 'create': - if(! json.osmChange[0][name]) { - json.osmChange[0][name] = []; - } - - break; - case 'node': - case 'way': - case 'relation': - buffer = { - ...attr - }; - - tempType = name; - - if (name === 'way') { - buffer.geometry = []; - } - if (name === 'relation') { - buffer.member = []; - buffer.node = []; - buffer.geometry = []; - } - - break; - case 'tag': - if(! buffer.tag) buffer.tag = []; - buffer.tag.push({ k: attr.k, v: attr.v }); - break; - case 'nd': - if(! buffer.nd) buffer.nd = []; - buffer.nd.push({ ref: attr.ref }); - buffer.geometry.push(null); - break; - case 'member': - buffer.member.push(attr); - break; - } - }, - onclosetag: (name) => { - if (name === 'node' || name === 'way' || name === 'relation' || name === 'area') { - if (buffer.geometry && buffer.geometry.every((g) => g === null)) { - delete buffer.geometry; - } - if (name === 'relation') { - delete buffer.node; - } - - items.push(buffer); - } - - if(name === 'modify' || name === 'delete' || name === 'create') { - const obj = {}; - obj[tempType] = items; - json.osmChange[0][name].push(obj); - items = []; - tempType = ''; - } - }, - }; - - const parser = new htmlparser.Parser(opts, { decodeEntities: true, xmlMode: true }); - parser.write(xmlString); - parser.end(); - - return json; -}; - - -/* - * Parse OSM Changeset Metadata XML, of the form returned by - * https://www.openstreetmap.org/api/0.6/changeset/:id - * (Contains changeset's bbox, timestamp, comment, and authorship) - */ -const parseChangesetXml = (xmlString) => { - const result = {}; - - const opts = { - onopentag: (name, attr) => { - switch (name) { - case 'osm': - if(! result[name]) result[name] = []; - result[name].push(attr); - break; - case 'changeset': - if(! result.osm[name]) result.osm[0][name] = []; - result.osm[0][name].push(attr); - break; - case 'tag': - if(! result.osm[0].changeset[0][name]) result.osm[0].changeset[0][name] = []; - result.osm[0].changeset[0][name].push(attr); - break; - } - }, - }; - - const parser = new htmlparser.Parser(opts, { - decodeEntities: true, - xmlMode: true - }); - parser.write(xmlString); - parser.end(); - - return result; -}; - -/* - * Parse OSM Augmented Diff format, documented here: - * https://wiki.openstreetmap.org/wiki/Overpass_API/Augmented_Diffs - * (Contains both old and new versions of all modified elements) - */ -const parseAugmentedDiff = osmAdiffParser; - -module.exports = { parseOsmChangeXml, parseChangesetXml, parseAugmentedDiff }; diff --git a/package.json b/package.json index 45b8a4b..85766ac 100644 --- a/package.json +++ b/package.json @@ -25,9 +25,10 @@ "homepage": "https://github.com/OSMCha/osm-adiff-service", "dependencies": { "@osmcha/osm-adiff-parser": "^2.0.0", + "@osmcha/osm-changeset-xml-parser": "^1.0.0", + "@osmcha/osmchange-parser": "^1.0.0", "aws-sdk": "^2.625.0", "changetags": "^0.1.2", - "htmlparser2": "^4.1.0", "lodash": "^4.17.15", "minimist": "^1.2.0", "moment": "^2.25.3", diff --git a/yarn.lock b/yarn.lock index 353d08a..ee3bbc8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -19,11 +19,25 @@ "@osmcha/osm-adiff-parser@^2.0.0": version "2.0.0" - resolved "https://registry.yarnpkg.com/@osmcha/osm-adiff-parser/-/osm-adiff-parser-2.0.0.tgz#8d199d51613f8fb9229ada87652d447bb0cfa9fc" + resolved "https://registry.npmjs.org/@osmcha/osm-adiff-parser/-/osm-adiff-parser-2.0.0.tgz" integrity sha512-KXpEHYaYQiDaUijYXDAIxsTv9XJ3sxzB/LbyLe8elXTcgcSdyg+RlbTT27Uh4MhBejWSk9ozVBNSpdSkuHW4NA== dependencies: sax "^1.4.1" +"@osmcha/osm-changeset-xml-parser@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@osmcha/osm-changeset-xml-parser/-/osm-changeset-xml-parser-1.0.0.tgz#c301a9cce80befce75b1992ca247b27a0d0cb887" + integrity sha512-j2NsEJPzZDTMMTdUNRYaYi2kNoGQadhF1DHeiiKFHwK6XQi65X9KhZXSMtOu8jOJ8oZmjyzR2xNYQdITJ841cw== + dependencies: + sax "^1.4.1" + +"@osmcha/osmchange-parser@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@osmcha/osmchange-parser/-/osmchange-parser-1.0.0.tgz#05c96d63c2569235fbcd6e284ad2e2f9f7658ef2" + integrity sha512-cGGKwlm3BX0iVN3olmp0rGlNxW1XdFC82VmyUS0xXstEi2RlJXn/b/ENrromwBjNNKDFMDWb3V1dwpjAFnwR5g== + dependencies: + sax "^1.4.1" + "@redis/bloom@1.2.0": version "1.2.0" resolved "https://registry.npmjs.org/@redis/bloom/-/bloom-1.2.0.tgz" @@ -460,43 +474,6 @@ doctrine@^2.0.0, doctrine@^2.1.0: dependencies: esutils "^2.0.2" -dom-serializer@^1.0.1: - version "1.4.1" - resolved "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz" - integrity sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag== - dependencies: - domelementtype "^2.0.1" - domhandler "^4.2.0" - entities "^2.0.0" - -domelementtype@^2.0.1, domelementtype@^2.2.0: - version "2.3.0" - resolved "https://registry.npmjs.org/domelementtype/-/domelementtype-2.3.0.tgz" - integrity sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw== - -domhandler@^3.0.0: - version "3.3.0" - resolved "https://registry.npmjs.org/domhandler/-/domhandler-3.3.0.tgz" - integrity sha512-J1C5rIANUbuYK+FuFL98650rihynUOEzRLxW+90bKZRWB6A1X1Tf82GxR1qAWLyfNPRvjqfip3Q5tdYlmAa9lA== - dependencies: - domelementtype "^2.0.1" - -domhandler@^4.2.0: - version "4.3.1" - resolved "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz" - integrity sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ== - dependencies: - domelementtype "^2.2.0" - -domutils@^2.0.0: - version "2.8.0" - resolved "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz" - integrity sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A== - dependencies: - dom-serializer "^1.0.1" - domelementtype "^2.2.0" - domhandler "^4.2.0" - dotignore@^0.1.2: version "0.1.2" resolved "https://registry.npmjs.org/dotignore/-/dotignore-0.1.2.tgz" @@ -511,11 +488,6 @@ encoding@^0.1.11: dependencies: iconv-lite "^0.6.2" -entities@^2.0.0: - version "2.2.0" - resolved "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz" - integrity sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A== - es-abstract@^1.22.1, es-abstract@^1.22.3, es-abstract@^1.23.0, es-abstract@^1.23.2: version "1.23.3" resolved "https://registry.npmjs.org/es-abstract/-/es-abstract-1.23.3.tgz" @@ -1085,16 +1057,6 @@ hasown@^2.0.0, hasown@^2.0.1, hasown@^2.0.2: dependencies: function-bind "^1.1.2" -htmlparser2@^4.1.0: - version "4.1.0" - resolved "https://registry.npmjs.org/htmlparser2/-/htmlparser2-4.1.0.tgz" - integrity sha512-4zDq1a1zhE4gQso/c5LP1OtrhYTncXNSpvJYtWJBtXAETPlMfi3IFNjGuQbYLuVY4ZR0QMqRVvo4Pdy9KLyP8Q== - dependencies: - domelementtype "^2.0.1" - domhandler "^3.0.0" - domutils "^2.0.0" - entities "^2.0.0" - iconv-lite@^0.6.2: version "0.6.3" resolved "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.6.3.tgz" @@ -1802,7 +1764,7 @@ sax@>=0.6.0: sax@^1.4.1: version "1.4.1" - resolved "https://registry.yarnpkg.com/sax/-/sax-1.4.1.tgz#44cc8988377f126304d3b3fc1010c733b929ef0f" + resolved "https://registry.npmjs.org/sax/-/sax-1.4.1.tgz" integrity sha512-+aWOz7yVScEGoKNd4PA10LZ8sk0A/z5+nXQG5giUO5rprX9jgYsTdov9qCchZiPIZezbZH+jRut8nPodFAX4Jg== semver@5.3.0: