apertium · sushain97 · Apr 27, 2018 · Apr 27, 2018 · May 17, 2018
diff --git a/README.md b/README.md
@@ -20,7 +20,7 @@ Requirements
 Additional functionality is provided by installation
 of the following packages:
 
-- `apertium-streamparser` enables spell checking
+- `apertium-streamparser` enables analysis, generation and spell checking
 - `requests` enables suggestion handling
 - `chromium_compact_language_detector` enables improved language detection (cld2)
 - `chardet` enables website character encoding detection

diff --git a/apertium_apy/apy.py b/apertium_apy/apy.py
@@ -282,8 +282,6 @@ def setup_application(args):
         (r'/translateDoc', TranslateDocHandler),
         (r'/translatePage', TranslateWebpageHandler),
         (r'/translateRaw', TranslateRawHandler),
-        (r'/analy[sz]e', AnalyzeHandler),
-        (r'/generate', GenerateHandler),
         (r'/listLanguageNames', ListLanguageNamesHandler),
         (r'/perWord', PerWordHandler),
         (r'/calcCoverage', CoverageHandler),
@@ -293,7 +291,11 @@ def setup_application(args):
     ]
 
     if importlib.util.find_spec('streamparser'):
-        handlers.append((r'/speller', SpellerHandler))
+        handlers.extend([
+            (r'/analy[sz]e', AnalyzeHandler),
+            (r'/generate', GenerateHandler),
+            (r'/speller', SpellerHandler),
+        ])
 
     if all([args.wiki_username, args.wiki_password]) and importlib.util.find_spec('requests'):
         import requests
@@ -327,7 +329,7 @@ def main():
         logging.warning('Unable to import chardet, assuming utf-8 encoding for all websites')
 
     if importlib.util.find_spec('streamparser') is None:
-        logging.warning('Apertium streamparser not installed, spelling handler disabled')
+        logging.warning('Apertium streamparser not installed, analysis, generation and spelling handlers disabled')
 
     if importlib.util.find_spec('requests') is None:
         logging.warning('requests not installed, suggestions disabled')

diff --git a/apertium_apy/handlers/analyze.py b/apertium_apy/handlers/analyze.py
@@ -1,18 +1,23 @@
-import re
-
 from tornado import gen
 
+try:
+    import streamparser
+except ImportError:
+    streamparser = None
+
 from apertium_apy.handlers.base import BaseHandler
 from apertium_apy.utils import to_alpha3_code, remove_dot_from_deformat
 from apertium_apy.utils.translation import translate_simple
 
 
 class AnalyzeHandler(BaseHandler):
     def postproc_text(self, in_text, result):
-        lexical_units = remove_dot_from_deformat(in_text, re.findall(r'\^([^\$]*)\$([^\^]*)', result))  # TODO: replace with streamparser
-        return [(lu[0], lu[0].split('/')[0] + lu[1])
-                for lu
-                in lexical_units]
+        lexical_units_with_text = remove_dot_from_deformat(in_text, list(streamparser.parse(result, with_text=True)))
+        return [
+            (text_and_lu[1].lexical_unit, text_and_lu[0] + text_and_lu[1].wordform)
+            for text_and_lu
+            in lexical_units_with_text
+        ]
 
     @gen.coroutine
     def get(self):

diff --git a/apertium_apy/handlers/generate.py b/apertium_apy/handlers/generate.py
@@ -1,23 +1,34 @@
-import re
-
 from tornado import gen
 
+try:
+    import streamparser
+except ImportError:
+    streamparser = None
+
 from apertium_apy.handlers.base import BaseHandler
 from apertium_apy.utils import to_alpha3_code
 from apertium_apy.utils.translation import translate_simple
 
 
 class GenerateHandler(BaseHandler):
+    seperator = '[SEP]'
+
+    def wrap(self, text):
+        return '^{}$'.format(text)
+
     def preproc_text(self, in_text):
-        lexical_units = re.findall(r'(\^[^\$]*\$[^\^]*)', in_text)  # TODO: replace with streamparser
-        if len(lexical_units) == 0:
-            lexical_units = ['^%s$' % (in_text,)]
-        return lexical_units, '[SEP]'.join(lexical_units)
+        lexical_units_with_text = list(streamparser.parse(in_text, with_text=True))
+        if len(lexical_units_with_text) == 0:
+            lexical_units_with_text = list(streamparser.parse(self.wrap(in_text), with_text=True))
+        lexical_units = [self.wrap(text_and_lu[1].lexical_unit) for text_and_lu in lexical_units_with_text]
+        return lexical_units_with_text, self.seperator.join(lexical_units)
 
-    def postproc_text(self, lexical_units, result):
-        return [(generation, lexical_units[i])
-                for (i, generation)
-                in enumerate(result.split('[SEP]'))]
+    def postproc_text(self, lexical_units_with_text, result):
+        return [
+            (generation, self.wrap(text_and_lu[0] + text_and_lu[1].lexical_unit))
+            for (generation, text_and_lu)
+            in zip(result.split(self.seperator), lexical_units_with_text)
+        ]
 
     @gen.coroutine
     def get(self):
@@ -27,8 +38,8 @@ def get(self):
             [path, mode] = self.generators[in_mode]
             formatting = 'none'
             commands = [['apertium', '-d', path, '-f', formatting, mode]]
-            lexical_units, to_generate = self.preproc_text(in_text)
+            lexical_units_with_text, to_generate = self.preproc_text(in_text)
             result = yield translate_simple(to_generate, commands)
-            self.send_response(self.postproc_text(lexical_units, result))
+            self.send_response(self.postproc_text(lexical_units_with_text, result))
         else:
             self.send_error(400, explanation='That mode is not installed')