Trying to get some sort of 'implicit' bidi with luatex, I though of using an (extenal) OCP filter as it is much easier than messing with luatex nodes, and it respects TeX grouping.
The attached example loads the do_bidi.py script as an \externalocp, it works fine with plain luatex, but not with 'texexec', using 'context' I get: MtxRun | fatal error, message: luatex: execution interrupted Any ideas? Is there a lua equivalent to \externalocp ? Regards, Khaled -- Khaled Hosny Arabic localizer and member of Arabeyes.org team
% engine=luatex \externalocp\OCPbidi=do_bidi.py {} \ocplist\bidi= \addbeforeocplist 100 \OCPbidi \nullocplist \pushocplist\bidi \pardir TRT \textdir TRT Hello world. % this should appear LTR despite \textdir \bye
#!/usr/bin/env python # subflip - Prepare subtitle files for non-bidi-aware players. # Copyright (C) 2006 Noam Raphael < spam.noam at gmail.com > # Copyright (C) 2008 Khaled Hosny <[EMAIL PROTECTED]> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ This class defines the function tag_text, which tags a text for directionality, by using the relevant part of the Unicode BIDI algorithm (see http://unicode.org/reports/tr9/). It defines the function tag_text, which gets an iterator over characters, and return an iterator over characters and the constants OPEN_L, CLOSE_L, OPEN_R and CLOSE_R. These constants are inserted between the given characters, to give the directionality structure of the text. This module currently doesn't implement the algorithm for arabic characters, since those require some special treatment. """ __all__ = ['tag_text', 'OPEN_L', 'CLOSE_L', 'OPEN_R', 'CLOSE_R'] from collections import deque from unicodedata import bidirectional def dir(c): types = { 'ON' : 'N', 'B' : 'N', 'BN' : 'N', 'S' : 'N', 'WS' : 'N', 'R' : 'R', 'NSM' : 'R', 'AL' : 'R', 'L' : 'L', 'EN' : 'EN', 'AN' : 'EN','ET' : 'T', 'ES' : 'S', 'CS' : 'S', } d = bidirectional(c) return types[d] class StateMachine(object): ''' This class is the base class for the state machine classes defined here. The state machine implemented has a state, which is a string. In each step, the machine gets two arguments, and according to these and the state, it produces some output and changes the state. In this implementation, the input is taken from one iterable, and the output is given using the iterable protocol. When output is requested, by a call to the next() method, the given iterable's next() method is called to produce the two input arguments (arg1, arg2). Then, the method named "%s_%s" % (current_state, arg1) is called with arg2 as its argument. The method is expected to return an iterable over output elements, which will be given upon subsequent calls to the instance's next() method. ''' def __init__(self, iterable): if not hasattr(self, 'state'): raise NotImplementedError, \ 'You must implement an __init__ method that will initialize '\ 'self.state.' self._input_source = iter(iterable) self._current_output_generator = iter([]) self._finishing = False def next(self): while True: try: return self._current_output_generator.next() except StopIteration: if self._finishing: raise # Make another call to the appropriate method. try: letter, input = self._input_source.next() except StopIteration: self._finishing = True self._current_output_generator = \ getattr(self, '%s_END' % self.state)() if self._current_output_generator is None: self._current_output_generator = iter([]) continue self._current_output_generator = \ getattr(self, '%s_%s' % (self.state, letter))(input) if self._current_output_generator is None: self._current_output_generator = iter([]) def __iter__(self): return self # For us, there are six types of chars: # # L - strong left-to-right # R - strong right-to-left # N - neutral # EN - european number # S - number seperator # T - number terminator class BaseLTR(StateMachine): ''' Tag text when the base is LTR. ''' def __init__(self, iterable, openL, closeL, openR, closeR): self.state = 'L' self.queueN = deque() self.queueT = deque() self.S = None self.openL, self.closeL, self.openR, self.closeR = \ openL, closeL, openR, closeR return StateMachine.__init__(self, iterable) # L state - last strong character was L. def L_R(self, input): yield self.openR yield input self.state = 'R' def L_otherwise(self, input): yield input L_N = L_L = L_EN = L_T = L_S = L_otherwise def L_END(self): pass # R state - last strong char was R. # There are two queues in this state, one for N characters (which might # include ET chars not in the right), and one for ET chars. def R_R(self, input): while self.queueN: yield self.queueN.popleft() while self.queueT: yield self.queueT.popleft() yield input def R_L(self, input): yield self.closeR while self.queueN: yield self.queueN.popleft() while self.queueT: yield self.queueT.popleft() yield input self.state = 'L' def R_N(self, input): while self.queueT: self.queueN.append(self.queueT.popleft()) self.queueN.append(input) R_S = R_N # A Number Seperator is like a neutral def R_T(self, input): self.queueT.append(input) def R_EN(self, input): while self.queueN: yield self.queueN.popleft() yield self.openL while self.queueT: yield self.queueT.popleft() yield input self.state = 'EN' def R_END(self): yield self.closeR while self.queueN: yield self.queueN.popleft() while self.queueT: yield self.queueT.popleft() # EN state - inside a number in an RTL context, the last char was not # a Number Seperator. def EN_EN(self, input): yield input def EN_T(self, input): # A Number Terminator in that place behaves exactly like a digit. yield input def EN_S(self, input): self.S = input self.state = 'S' def EN_R(self, input): yield self.closeL yield input self.state = 'R' def EN_L(self, input): yield self.closeL yield self.closeR yield input self.state = 'L' def EN_N(self, input): yield self.closeL self.queueN.append(input) self.state = 'R' def EN_END(self): yield self.closeL yield self.closeR # S state - after a number, a Number Seperator was given (it is in self.S). # Will it be a part of the number? def S_EN(self, input): yield self.S self.S = None yield input self.state = 'EN' def S_R(self, input): yield self.closeL yield self.S self.S = None yield input self.state = 'R' def S_L(self, input): yield self.closeL yield self.closeR yield self.S self.S = None yield input self.state = 'L' def S_N(self, input): yield self.closeL self.queueN.append(self.S) self.S = None self.queueN.append(input) self.state = 'R' S_S = S_N def S_T(self, input): yield self.closeL self.queueN.append(self.S) self.S = None self.queueT.append(input) self.state = 'R' def S_END(self): yield self.closeL yield self.closeR yield self.S self.S = None class BaseRTL(StateMachine): ''' Tag text when the base is RTL. ''' def __init__(self, iterable, openL, closeL, openR, closeR): self.state = 'R' self.queue = deque() self.S = None self.openL, self.closeL, self.openR, self.closeR = \ openL, closeL, openR, closeR return StateMachine.__init__(self, iterable) # R state - last strong character was R. The queue may contain terminators # which may be part of a number. def R_continue(self, input): while self.queue: yield self.queue.popleft() yield input R_N = R_S = R_R = R_continue def R_T(self, input): self.queue.append(input) def R_L(self, input): while self.queue: yield self.queue.popleft() yield self.openL yield input self.state = 'L' def R_EN(self, input): yield self.openL while self.queue: yield self.queue.popleft() yield input self.state = 'ENR' def R_END(self): while self.queue: yield self.queue.popleft() # ENR state - last character was a number, and the strong character before # it was R. def ENR_EN(self, input): yield input ENR_T = ENR_EN def ENR_S(self, input): self.S = input self.state = 'SR' def ENR_R(self, input): yield self.closeL yield input self.state = 'R' ENR_N = ENR_R def ENR_L(self, input): # According to the Unicode algorithm, in an RTL part, numbers are # ordered together with LTR chars. yield input self.state = 'L' def ENR_END(self): yield self.closeL # SR state - last character was a seperator, which can be a part of a # number (it it saves in self.S), and we are in an RTL context. def SR_EN(self, input): yield self.S; self.S = None yield input self.state = 'ENR' def SR_back_to_R(self, input): yield self.closeL yield self.S; self.S = None yield input self.state = 'R' SR_S = SR_N = SR_R = SR_back_to_R def SR_T(self, input): yield self.closeL yield self.S; self.S = None self.queue.append(input) self.state = 'R' def SR_L(self, input): yield self.closeL yield self.S; self.S = None yield self.openL yield input self.state = 'L' def SR_END(self): yield self.closeL yield self.S; self.S = None # L state - last character was a strong right-to-left char. # Terminals for us are the same as simple Neutrals, so we fill only queue. def L_L(self, input): while self.queue: yield self.queue.popleft() yield input def L_neutral(self, input): self.queue.append(input) L_T = L_S = L_N = L_neutral def L_EN(self, input): while self.queue: yield self.queue.popleft() yield input self.state = 'ENL' def L_R(self, input): yield self.closeL while self.queue: yield self.queue.popleft() yield input self.state = 'R' def L_END(self): yield self.closeL while self.queue: yield self.queue.popleft() # ENL state - last character was a Number, in an LTR context. def ENL_L(self, input): yield input self.state = 'L' def ENL_N(self, input): self.queue.append(input) self.state = 'L' def ENL_expand_number(self, input): yield input ENL_EN = ENL_T = ENL_expand_number def ENL_S(self, input): self.S = input self.state = 'SL' def ENL_R(self, input): yield self.closeL yield input self.state = 'R' def ENL_END(self): yield self.closeL # SL state - last character was a seperator (saved in self.S), which came # after a number, and we are in LTR context. def SL_EN(self, input): yield self.S; self.S = None yield input self.state = 'ENL' def SL_L(self, input): yield self.S; self.S = None yield input self.state = 'L' def SL_neutral(self, input): self.queue.append(self.S); self.S = None self.queue.append(input) self.state = 'L' SL_N = SL_T = SL_S = SL_neutral def SL_R(self, input): yield self.closeL yield self.S; self.S = None yield input self.state = 'R' def SL_END(self): yield self.closeL yield self.S; self.S = None class Constant(object): def __init__(self, name): self._name = name def __repr__(self): return self.name OPEN_L = Constant('OPEN_L') CLOSE_L = Constant('CLOSE_L') OPEN_R = Constant('OPEN_R') CLOSE_R = Constant('CLOSE_R') def tag_text(text, is_ltr): def typer(s): for c in s: yield dir(c), c if is_ltr: return BaseLTR(typer(text), OPEN_L, CLOSE_L, OPEN_R, CLOSE_R) else: return BaseRTL(typer(text), OPEN_L, CLOSE_L, OPEN_R, CLOSE_R) def main(): import sys const_conv = { OPEN_L: '{\\textdir TLT', CLOSE_L: '}', OPEN_R: '{\\textdir TRT', CLOSE_R: '}' } def tag_string(s, is_ltr): return ''.join(const_conv.get(x, x) for x in tag_text(s, is_ltr)) print tag_string(unicode(sys.stdin.read(), 'utf-8'), 0), sys.stdout.softspace = 0 if __name__ == '__main__': main()
signature.asc
Description: Digital signature
___________________________________________________________________________________ If your question is of interest to others as well, please add an entry to the Wiki! maillist : ntg-context@ntg.nl / http://www.ntg.nl/mailman/listinfo/ntg-context webpage : http://www.pragma-ade.nl / http://tex.aanhet.net archive : https://foundry.supelec.fr/projects/contextrev/ wiki : http://contextgarden.net ___________________________________________________________________________________