diff --git a/controller/libpostal.js b/controller/libpostal.js index c2f7cf360..75a21331c 100644 --- a/controller/libpostal.js +++ b/controller/libpostal.js @@ -76,11 +76,18 @@ function setup(libpostalService, should_execute) { // push err.message or err onto req.errors req.errors.push( _.get(err, 'message', err) ); + } else if (!Array.isArray(response) || !response.length) { + return next(); + } else if (_.some(_.countBy(response, o => o.label), count => count > 1)) { logger.warn(`discarding libpostal parse of '${req.clean.text}' due to duplicate field assignments`); return next(); - } else if (_.isEmpty(response)) { + // libpostal classifies some airports as 'suburb' + // when we see a single 'suburb' label, discard it. + // examples: 'john f kennedy international airport', 'soho' + } else if (response.length === 1 && response[0].label === 'suburb') { + logger.warn(`discarding libpostal parse of '${req.clean.text}' due to solo 'suburb' label`); return next(); } else { @@ -121,7 +128,6 @@ const IS_NUMERIC_REGEXP = /^\d+$/; // apply fixes for known bugs in libpostal function patchBuggyResponses(response){ - if( !Array.isArray(response) || !response.length ){ return response; } // patches which are only applied when a single label is generated if( response.length === 1 ){ diff --git a/test/unit/controller/libpostal.js b/test/unit/controller/libpostal.js index 1d1881649..0e86e061e 100644 --- a/test/unit/controller/libpostal.js +++ b/test/unit/controller/libpostal.js @@ -770,7 +770,7 @@ module.exports.tests.bug_fixes = (test, common) => { test('bug fix: recast entirely numeric input - 99', t => { const service = (req, callback) => { callback(null, [{ - 'label': 'suburb', + 'label': 'house_number', 'value': '99' }]); }; @@ -831,6 +831,67 @@ module.exports.tests.bug_fixes = (test, common) => { }); }); + test('bug fix: discard single label of type "suburb"', t => { + const service = (req, callback) => { + callback(null, [{ + 'label': 'suburb', + 'value': 'example' + }]); + }; + const controller = libpostal(service, () => true); + const req = { + clean: { + text: 'example' + }, + errors: [] + }; + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'example', + // parse discarded + }, + errors: [] + }); + + t.end(); + }); + }); + + test('bug fix: do not discard "suburb" when accompanied by another label', t => { + const service = (req, callback) => { + callback(null, [{ + 'label': 'road', + 'value': 'avenue' + },{ + 'label': 'suburb', + 'value': 'example' + }]); + }; + const controller = libpostal(service, () => true); + const req = { + clean: { + text: 'avenue example' + }, + errors: [] + }; + controller(req, undefined, () => { + t.deepEquals(req, { + clean: { + text: 'avenue example', + parser: 'libpostal', + parsed_text: { + street: 'avenue', + neighbourhood: 'example' + } + }, + errors: [] + }); + + t.end(); + }); + }); + }; module.exports.all = (tape, common) => {