diff --git a/README.md b/README.md index 3fc4a96e7..1ac436319 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Requirements Additional functionality is provided by installation of the following packages: -- `apertium-streamparser` enables spell checking +- `apertium-streamparser` enables analysis, generation and spell checking - `requests` enables suggestion handling - `chromium_compact_language_detector` enables improved language detection (cld2) - `chardet` enables website character encoding detection diff --git a/apertium_apy/apy.py b/apertium_apy/apy.py index f2efe05bb..d3ec8e8f5 100755 --- a/apertium_apy/apy.py +++ b/apertium_apy/apy.py @@ -282,8 +282,6 @@ def setup_application(args): (r'/translateDoc', TranslateDocHandler), (r'/translatePage', TranslateWebpageHandler), (r'/translateRaw', TranslateRawHandler), - (r'/analy[sz]e', AnalyzeHandler), - (r'/generate', GenerateHandler), (r'/listLanguageNames', ListLanguageNamesHandler), (r'/perWord', PerWordHandler), (r'/calcCoverage', CoverageHandler), @@ -293,7 +291,11 @@ def setup_application(args): ] if importlib.util.find_spec('streamparser'): - handlers.append((r'/speller', SpellerHandler)) + handlers.extend([ + (r'/analy[sz]e', AnalyzeHandler), + (r'/generate', GenerateHandler), + (r'/speller', SpellerHandler), + ]) if all([args.wiki_username, args.wiki_password]) and importlib.util.find_spec('requests'): import requests @@ -327,7 +329,7 @@ def main(): logging.warning('Unable to import chardet, assuming utf-8 encoding for all websites') if importlib.util.find_spec('streamparser') is None: - logging.warning('Apertium streamparser not installed, spelling handler disabled') + logging.warning('Apertium streamparser not installed, analysis, generation and spelling handlers disabled') if importlib.util.find_spec('requests') is None: logging.warning('requests not installed, suggestions disabled') diff --git a/apertium_apy/handlers/analyze.py b/apertium_apy/handlers/analyze.py index a1e96974f..115042956 100644 --- a/apertium_apy/handlers/analyze.py +++ b/apertium_apy/handlers/analyze.py @@ -1,7 +1,10 @@ -import re - from tornado import gen +try: + import streamparser +except ImportError: + streamparser = None + from apertium_apy.handlers.base import BaseHandler from apertium_apy.utils import to_alpha3_code, remove_dot_from_deformat from apertium_apy.utils.translation import translate_simple @@ -9,10 +12,12 @@ class AnalyzeHandler(BaseHandler): def postproc_text(self, in_text, result): - lexical_units = remove_dot_from_deformat(in_text, re.findall(r'\^([^\$]*)\$([^\^]*)', result)) # TODO: replace with streamparser - return [(lu[0], lu[0].split('/')[0] + lu[1]) - for lu - in lexical_units] + lexical_units_with_text = remove_dot_from_deformat(in_text, list(streamparser.parse(result, with_text=True))) + return [ + (text_and_lu[1].lexical_unit, text_and_lu[0] + text_and_lu[1].wordform) + for text_and_lu + in lexical_units_with_text + ] @gen.coroutine def get(self): diff --git a/apertium_apy/handlers/generate.py b/apertium_apy/handlers/generate.py index 9851fb97b..66f6bcf21 100644 --- a/apertium_apy/handlers/generate.py +++ b/apertium_apy/handlers/generate.py @@ -1,23 +1,34 @@ -import re - from tornado import gen +try: + import streamparser +except ImportError: + streamparser = None + from apertium_apy.handlers.base import BaseHandler from apertium_apy.utils import to_alpha3_code from apertium_apy.utils.translation import translate_simple class GenerateHandler(BaseHandler): + seperator = '[SEP]' + + def wrap(self, text): + return '^{}$'.format(text) + def preproc_text(self, in_text): - lexical_units = re.findall(r'(\^[^\$]*\$[^\^]*)', in_text) # TODO: replace with streamparser - if len(lexical_units) == 0: - lexical_units = ['^%s$' % (in_text,)] - return lexical_units, '[SEP]'.join(lexical_units) + lexical_units_with_text = list(streamparser.parse(in_text, with_text=True)) + if len(lexical_units_with_text) == 0: + lexical_units_with_text = list(streamparser.parse(self.wrap(in_text), with_text=True)) + lexical_units = [self.wrap(text_and_lu[1].lexical_unit) for text_and_lu in lexical_units_with_text] + return lexical_units_with_text, self.seperator.join(lexical_units) - def postproc_text(self, lexical_units, result): - return [(generation, lexical_units[i]) - for (i, generation) - in enumerate(result.split('[SEP]'))] + def postproc_text(self, lexical_units_with_text, result): + return [ + (generation, self.wrap(text_and_lu[0] + text_and_lu[1].lexical_unit)) + for (generation, text_and_lu) + in zip(result.split(self.seperator), lexical_units_with_text) + ] @gen.coroutine def get(self): @@ -27,8 +38,8 @@ def get(self): [path, mode] = self.generators[in_mode] formatting = 'none' commands = [['apertium', '-d', path, '-f', formatting, mode]] - lexical_units, to_generate = self.preproc_text(in_text) + lexical_units_with_text, to_generate = self.preproc_text(in_text) result = yield translate_simple(to_generate, commands) - self.send_response(self.postproc_text(lexical_units, result)) + self.send_response(self.postproc_text(lexical_units_with_text, result)) else: self.send_error(400, explanation='That mode is not installed')