Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions apertium_apy/apy.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from apertium_apy.handlers import (
AnalyzeHandler,
BaseHandler,
BilsearchHandler,
CoverageHandler,
GenerateHandler,
GuesserHandler,
Expand Down Expand Up @@ -136,6 +137,8 @@ def setup_handler(
handler.spellers[lang_src] = (dirpath, modename)
for dirpath, modename, lang_pair in modes['guesser']:
handler.guessers[lang_pair] = (dirpath, modename)
for dirpath, modename, lang_pair in modes['bilsearch']:
handler.bilsearch[lang_pair] = (dirpath, modename)

handler.init_pairs_graph()
handler.init_paths()
Expand Down Expand Up @@ -285,6 +288,7 @@ def setup_application(args):
(r'/identifyLang', IdentifyLangHandler),
(r'/getLocale', GetLocaleHandler),
(r'/pipedebug', PipeDebugHandler),
(r'/bilsearch', BilsearchHandler),
] # type: List[Tuple[str, Type[tornado.web.RequestHandler]]]

if importlib_util.find_spec('streamparser'):
Expand Down
1 change: 1 addition & 0 deletions apertium_apy/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
from apertium_apy.handlers.translate_doc import TranslateDocHandler # noqa: F401
from apertium_apy.handlers.translate_raw import TranslateRawHandler # noqa: F401
from apertium_apy.handlers.translate_webpage import TranslateWebpageHandler # noqa: F401
from apertium_apy.handlers.bilsearch import BilsearchHandler # noqa: F401
1 change: 1 addition & 0 deletions apertium_apy/handlers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class BaseHandler(tornado.web.RequestHandler):
taggers = {} # type: Dict[str, Tuple[str, str]]
spellers = {} # type: Dict[str, Tuple[str, str]]
guessers = {} # type: Dict[str, Tuple[str, str]]
bilsearch = {} # type: Dict[str, Tuple[str, str]]
pairprefs = {} # type: Dict[str, Dict[str, Dict[str, str]]]
# (l1, l2): [translation.Pipeline], only contains flushing pairs!
pipelines = {} # type: Dict[Tuple[str, str], List[Union[FlushingPipeline, SimplePipeline]]]
Expand Down
54 changes: 54 additions & 0 deletions apertium_apy/handlers/bilsearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logging
from tornado import gen

from apertium_apy.handlers.base import BaseHandler
from apertium_apy.utils.translation import translate_simple
from apertium_apy.utils import to_alpha3_code

class BilsearchHandler(BaseHandler):
def get_pair_or_error(self, langpair):
try:
l1, l2 = map(to_alpha3_code, langpair.split('|'))
in_mode = '%s-%s' % (l1, l2)
except ValueError:
self.send_error(400, explanation='That pair is invalid, use e.g. eng|spa')
return None
in_mode = self.find_fallback_mode(in_mode, self.pairs)
if in_mode not in self.pairs:
self.send_error(400, explanation='That pair is not installed')
return None
else:
return tuple(in_mode.split('-'))

@gen.coroutine
def search_and_respond(self, pair, query):
try:
path, mode = self.bilsearch["-".join(pair)]
commands = [['apertium', '-d', path, '-f', 'none', mode]]
result = yield translate_simple(query, commands)
resultPerSearch = result.split('\n\n')
results = []
for i, resultSet in enumerate(resultPerSearch):
results.append({})
for word in resultSet.strip().split('\n'):
(l,r) = word.split(':')
if l not in results[i]:
results[i][l] = []
results[i][l].append(r)
self.send_response({
'responseData': {
'searchResults': results,
},
'responseDetails': None,
'responseStatus': 200,
})
except Exception as e:
logging.warning('Search error in pair %s-%s: %s', pair[0], pair[1], e)
self.send_error(503, explanation='internal error')

@gen.coroutine
def get(self):
pair = self.get_pair_or_error(self.get_argument('langpair'))

if pair is not None:
yield self.search_and_respond(pair, self.get_argument('q'))
8 changes: 8 additions & 0 deletions apertium_apy/handlers/list_modes.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@ def get(self):
self.send_response({lang_src: modename for (lang_src, (path, modename)) in self.spellers.items()})
elif query == 'guessers':
self.send_response({lang_src: modename for (lang_src, (path, modename)) in self.guessers.items()})
elif query == 'bilsearch':
response_data = []
for pair, (path, modename) in self.bilsearch.items():
l1, l2 = pair.split('-')
response_data.append({'sourceLanguage': l1, 'targetLanguage': l2})
if self.get_arguments('include_deprecated_codes'):
response_data.append({'sourceLanguage': to_alpha2_code(l1), 'targetLanguage': to_alpha2_code(l2)})
self.send_response({'responseData': response_data, 'responseDetails': None, 'responseStatus': 200})

else:
self.send_error(400, explanation='Expecting q argument to be one of analysers, generators, guessers, spellers, disambiguators, or pairs')
20 changes: 14 additions & 6 deletions apertium_apy/mode_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def search_path(rootpath, include_pairs=True, verbosity=1):
'spell': re.compile(r'(({0}(-{0})?)-spell)\.mode'.format(lang_code)),
'tokenise': re.compile(r'(({0}(-{0})?)-tokenise)\.mode'.format(lang_code)),
'guesser': re.compile(r'(({0}(-{0})?)-guess(er)?)\.mode'.format(lang_code)),
'bilsearch': re.compile(r'({0})-({0})-bilsearch\.mode'.format(lang_code)),
}
modes = {
'pair': [],
Expand All @@ -51,6 +52,7 @@ def search_path(rootpath, include_pairs=True, verbosity=1):
'spell': [],
'tokenise': [],
'guesser': [],
'bilsearch': [],
} # type: Dict[str, List[Tuple[str, str, str]]]

real_root = os.path.abspath(os.path.realpath(rootpath))
Expand All @@ -63,14 +65,20 @@ def search_path(rootpath, include_pairs=True, verbosity=1):
for mtype, regex in type_re.items():
m = regex.match(filename)
if m:
if mtype != 'pair':
modename = m.group(1) # e.g. en-es-anmorph
if mtype == 'bilsearch':
lang_src = to_alpha3_code(m.group(1))
lang_trg = to_alpha3_code(m.group(2))
lang_pair = f"{lang_src}-{lang_trg}"
modename = f"{lang_pair}-bilsearch"
dir_of_modes = os.path.dirname(dirpath)
mode = (dir_of_modes, modename, lang_pair)
modes[mtype].append(mode)
elif mtype != 'pair':
modename = m.group(1)
langlist = [to_alpha3_code(x) for x in m.group(2).split('-')]
lang_pair = '-'.join(langlist) # e.g. en-es
lang_pair = '-'.join(langlist)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@vykliuk, could you restore these comments (including the one up a few lines where modename is set)?

dir_of_modes = os.path.dirname(dirpath)
mode = (dir_of_modes,
modename,
lang_pair)
mode = (dir_of_modes, modename, lang_pair)
modes[mtype].append(mode)
elif include_pairs:
lang_src = m.group(1)
Expand Down
1 change: 1 addition & 0 deletions language_names/manual-additions.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ szl xh xosa
szl zh chińskŏ
szl zu zulu
tki tki توركمانجه
uk uum урумська
vro vro võro
wad wad Wamesa
wal wal Wolaytta
Expand Down
Loading