Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Requirements
Additional functionality is provided by installation
of the following packages:

- `apertium-streamparser` enables spell checking
- `apertium-streamparser` enables analysis, generation and spell checking
- `requests` enables suggestion handling
- `chromium_compact_language_detector` enables improved language detection (cld2)
- `chardet` enables website character encoding detection
Expand Down
10 changes: 6 additions & 4 deletions apertium_apy/apy.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,8 +282,6 @@ def setup_application(args):
(r'/translateDoc', TranslateDocHandler),
(r'/translatePage', TranslateWebpageHandler),
(r'/translateRaw', TranslateRawHandler),
(r'/analy[sz]e', AnalyzeHandler),
(r'/generate', GenerateHandler),
(r'/listLanguageNames', ListLanguageNamesHandler),
(r'/perWord', PerWordHandler),
(r'/calcCoverage', CoverageHandler),
Expand All @@ -293,7 +291,11 @@ def setup_application(args):
]

if importlib.util.find_spec('streamparser'):
handlers.append((r'/speller', SpellerHandler))
handlers.extend([
(r'/analy[sz]e', AnalyzeHandler),
(r'/generate', GenerateHandler),
(r'/speller', SpellerHandler),
])

if all([args.wiki_username, args.wiki_password]) and importlib.util.find_spec('requests'):
import requests
Expand Down Expand Up @@ -327,7 +329,7 @@ def main():
logging.warning('Unable to import chardet, assuming utf-8 encoding for all websites')

if importlib.util.find_spec('streamparser') is None:
logging.warning('Apertium streamparser not installed, spelling handler disabled')
logging.warning('Apertium streamparser not installed, analysis, generation and spelling handlers disabled')

if importlib.util.find_spec('requests') is None:
logging.warning('requests not installed, suggestions disabled')
Expand Down
17 changes: 11 additions & 6 deletions apertium_apy/handlers/analyze.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import re

from tornado import gen

try:
import streamparser
except ImportError:
streamparser = None

from apertium_apy.handlers.base import BaseHandler
from apertium_apy.utils import to_alpha3_code, remove_dot_from_deformat
from apertium_apy.utils.translation import translate_simple


class AnalyzeHandler(BaseHandler):
def postproc_text(self, in_text, result):
lexical_units = remove_dot_from_deformat(in_text, re.findall(r'\^([^\$]*)\$([^\^]*)', result)) # TODO: replace with streamparser
return [(lu[0], lu[0].split('/')[0] + lu[1])
for lu
in lexical_units]
lexical_units_with_text = remove_dot_from_deformat(in_text, list(streamparser.parse(result, with_text=True)))
return [
(text_and_lu[1].lexical_unit, text_and_lu[0] + text_and_lu[1].wordform)
for text_and_lu
in lexical_units_with_text
]

@gen.coroutine
def get(self):
Expand Down
35 changes: 23 additions & 12 deletions apertium_apy/handlers/generate.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,34 @@
import re

from tornado import gen

try:
import streamparser
except ImportError:
streamparser = None

from apertium_apy.handlers.base import BaseHandler
from apertium_apy.utils import to_alpha3_code
from apertium_apy.utils.translation import translate_simple


class GenerateHandler(BaseHandler):
seperator = '[SEP]'

def wrap(self, text):
return '^{}$'.format(text)

def preproc_text(self, in_text):
lexical_units = re.findall(r'(\^[^\$]*\$[^\^]*)', in_text) # TODO: replace with streamparser
if len(lexical_units) == 0:
lexical_units = ['^%s$' % (in_text,)]
return lexical_units, '[SEP]'.join(lexical_units)
lexical_units_with_text = list(streamparser.parse(in_text, with_text=True))
if len(lexical_units_with_text) == 0:
lexical_units_with_text = list(streamparser.parse(self.wrap(in_text), with_text=True))
lexical_units = [self.wrap(text_and_lu[1].lexical_unit) for text_and_lu in lexical_units_with_text]
return lexical_units_with_text, self.seperator.join(lexical_units)

def postproc_text(self, lexical_units, result):
return [(generation, lexical_units[i])
for (i, generation)
in enumerate(result.split('[SEP]'))]
def postproc_text(self, lexical_units_with_text, result):
return [
(generation, self.wrap(text_and_lu[0] + text_and_lu[1].lexical_unit))
for (generation, text_and_lu)
in zip(result.split(self.seperator), lexical_units_with_text)
]

@gen.coroutine
def get(self):
Expand All @@ -27,8 +38,8 @@ def get(self):
[path, mode] = self.generators[in_mode]
formatting = 'none'
commands = [['apertium', '-d', path, '-f', formatting, mode]]
lexical_units, to_generate = self.preproc_text(in_text)
lexical_units_with_text, to_generate = self.preproc_text(in_text)
result = yield translate_simple(to_generate, commands)
self.send_response(self.postproc_text(lexical_units, result))
self.send_response(self.postproc_text(lexical_units_with_text, result))
else:
self.send_error(400, explanation='That mode is not installed')