Source code for blessed.sequences

# encoding: utf-8
" This sub-module provides 'sequence awareness' for blessed."

__author__ = 'Jeff Quast <contact@jeffquast.com>'
__license__ = 'MIT'

__all__ = ['init_sequence_patterns', 'Sequence', 'SequenceTextWrapper']

# built-ins
import functools
import textwrap
import warnings
import math
import sys
import re

# 3rd-party
import wcwidth  # https://github.com/jquast/wcwidth

_BINTERM_UNSUPPORTED = ('kermit', 'avatar')
_BINTERM_UNSUPPORTED_MSG = ('sequence-awareness for terminals emitting '
                            'binary-packed capabilities are not supported.')

if sys.version_info[0] == 3:
    text_type = str
else:
    text_type = unicode  # noqa


def _merge_sequences(inp):
    """Merge a list of input sequence patterns for use in a regular expression.
    Order by lengthyness (full sequence set precedent over subset),
    and exclude any empty (u'') sequences.
    """
    return sorted(list(filter(None, inp)), key=len, reverse=True)


def _build_numeric_capability(term, cap, optional=False,
                              base_num=99, nparams=1):
    """ Build regexp from capabilities having matching numeric
        parameter contained within termcap value: n->(\d+).
    """
    _cap = getattr(term, cap)
    opt = '?' if optional else ''
    if _cap:
        args = (base_num,) * nparams
        cap_re = re.escape(_cap(*args))
        for num in range(base_num - 1, base_num + 2):
            # search for matching ascii, n-1 through n+1
            if str(num) in cap_re:
                # modify & return n to matching digit expression
                cap_re = cap_re.replace(str(num), r'(\d+)%s' % (opt,))
                return cap_re
        warnings.warn('Unknown parameter in %r (%r, %r)' % (cap, _cap, cap_re))
    return None  # no such capability


def _build_any_numeric_capability(term, cap, num=99, nparams=1):
    """ Build regexp from capabilities having *any* digit parameters
        (substitute matching \d with pattern \d and return).
    """
    _cap = getattr(term, cap)
    if _cap:
        cap_re = re.escape(_cap(*((num,) * nparams)))
        cap_re = re.sub('(\d+)', r'(\d+)', cap_re)
        if r'(\d+)' in cap_re:
            return cap_re
        warnings.warn('Missing numerics in %r, %r' % (cap, cap_re))
    return None  # no such capability


def get_movement_sequence_patterns(term):
    """ Build and return set of regexp for capabilities of ``term`` known
        to cause movement.
    """
    bnc = functools.partial(_build_numeric_capability, term)

    return set([
        # carriage_return
        re.escape(term.cr),
        # column_address: Horizontal position, absolute
        bnc(cap='hpa'),
        # row_address: Vertical position #1 absolute
        bnc(cap='vpa'),
        # cursor_address: Move to row #1 columns #2
        bnc(cap='cup', nparams=2),
        # cursor_down: Down one line
        re.escape(term.cud1),
        # cursor_home: Home cursor (if no cup)
        re.escape(term.home),
        # cursor_left: Move left one space
        re.escape(term.cub1),
        # cursor_right: Non-destructive space (move right one space)
        re.escape(term.cuf1),
        # cursor_up: Up one line
        re.escape(term.cuu1),
        # param_down_cursor: Down #1 lines
        bnc(cap='cud', optional=True),
        # restore_cursor: Restore cursor to position of last save_cursor
        re.escape(term.rc),
        # clear_screen: clear screen and home cursor
        re.escape(term.clear),
        # cursor_up: Up one line
        re.escape(term.enter_fullscreen),
        re.escape(term.exit_fullscreen),
        # forward cursor
        term._cuf,
        # backward cursor
        term._cub,
    ])


def get_wontmove_sequence_patterns(term):
    """ Build and return set of regexp for capabilities of ``term`` known
        not to cause any movement.
    """
    bnc = functools.partial(_build_numeric_capability, term)
    bna = functools.partial(_build_any_numeric_capability, term)

    return list([
        # print_screen: Print contents of screen
        re.escape(term.mc0),
        # prtr_off: Turn off printer
        re.escape(term.mc4),
        # prtr_on: Turn on printer
        re.escape(term.mc5),
        # save_cursor: Save current cursor position (P)
        re.escape(term.sc),
        # set_tab: Set a tab in every row, current columns
        re.escape(term.hts),
        # enter_bold_mode: Turn on bold (extra bright) mode
        re.escape(term.bold),
        # enter_standout_mode
        re.escape(term.standout),
        # enter_subscript_mode
        re.escape(term.subscript),
        # enter_superscript_mode
        re.escape(term.superscript),
        # enter_underline_mode: Begin underline mode
        re.escape(term.underline),
        # enter_blink_mode: Turn on blinking
        re.escape(term.blink),
        # enter_dim_mode: Turn on half-bright mode
        re.escape(term.dim),
        # cursor_invisible: Make cursor invisible
        re.escape(term.civis),
        # cursor_visible: Make cursor very visible
        re.escape(term.cvvis),
        # cursor_normal: Make cursor appear normal (undo civis/cvvis)
        re.escape(term.cnorm),
        # clear_all_tabs: Clear all tab stops
        re.escape(term.tbc),
        # change_scroll_region: Change region to line #1 to line #2
        bnc(cap='csr', nparams=2),
        # clr_bol: Clear to beginning of line
        re.escape(term.el1),
        # clr_eol: Clear to end of line
        re.escape(term.el),
        # clr_eos: Clear to end of screen
        re.escape(term.clear_eos),
        # delete_character: Delete character
        re.escape(term.dch1),
        # delete_line: Delete line (P*)
        re.escape(term.dl1),
        # erase_chars: Erase #1 characters
        bnc(cap='ech'),
        # insert_line: Insert line (P*)
        re.escape(term.il1),
        # parm_dch: Delete #1 characters
        bnc(cap='dch'),
        # parm_delete_line: Delete #1 lines
        bnc(cap='dl'),
        # exit_alt_charset_mode: End alternate character set (P)
        re.escape(term.rmacs),
        # exit_am_mode: Turn off automatic margins
        re.escape(term.rmam),
        # exit_attribute_mode: Turn off all attributes
        re.escape(term.sgr0),
        # exit_ca_mode: Strings to end programs using cup
        re.escape(term.rmcup),
        # exit_insert_mode: Exit insert mode
        re.escape(term.rmir),
        # exit_standout_mode: Exit standout mode
        re.escape(term.rmso),
        # exit_underline_mode: Exit underline mode
        re.escape(term.rmul),
        # flash_hook: Flash switch hook
        re.escape(term.hook),
        # flash_screen: Visible bell (may not move cursor)
        re.escape(term.flash),
        # keypad_local: Leave 'keyboard_transmit' mode
        re.escape(term.rmkx),
        # keypad_xmit: Enter 'keyboard_transmit' mode
        re.escape(term.smkx),
        # meta_off: Turn off meta mode
        re.escape(term.rmm),
        # meta_on: Turn on meta mode (8th-bit on)
        re.escape(term.smm),
        # orig_pair: Set default pair to its original value
        re.escape(term.op),
        # parm_ich: Insert #1 characters
        bnc(cap='ich'),
        # parm_index: Scroll forward #1
        bnc(cap='indn'),
        # parm_insert_line: Insert #1 lines
        bnc(cap='il'),
        # erase_chars: Erase #1 characters
        bnc(cap='ech'),
        # parm_rindex: Scroll back #1 lines
        bnc(cap='rin'),
        # parm_up_cursor: Up #1 lines
        bnc(cap='cuu'),
        # scroll_forward: Scroll text up (P)
        re.escape(term.ind),
        # scroll_reverse: Scroll text down (P)
        re.escape(term.rev),
        # tab: Tab to next 8-space hardware tab stop
        re.escape(term.ht),
        # set_a_background: Set background color to #1, using ANSI escape
        bna(cap='setab', num=1),
        bna(cap='setab', num=(term.number_of_colors - 1)),
        # set_a_foreground: Set foreground color to #1, using ANSI escape
        bna(cap='setaf', num=1),
        bna(cap='setaf', num=(term.number_of_colors - 1)),
    ] + [
        # set_attributes: Define video attributes #1-#9 (PG9)
        # ( not *exactly* legal, being extra forgiving. )
        bna(cap='sgr', nparams=_num) for _num in range(1, 10)
        # reset_{1,2,3}string: Reset string
    ] + list(map(re.escape, (term.r1, term.r2, term.r3,))))


[docs]def init_sequence_patterns(term): """Given a Terminal instance, ``term``, this function processes and parses several known terminal capabilities, and builds and returns a dictionary database of regular expressions, which may be re-attached to the terminal by attributes of the same key-name: ``_re_will_move`` any sequence matching this pattern will cause the terminal cursor to move (such as *term.home*). ``_re_wont_move`` any sequence matching this pattern will not cause the cursor to move (such as *term.bold*). ``_re_cuf`` regular expression that matches term.cuf(N) (move N characters forward), or None if temrinal is without cuf sequence. ``_cuf1`` *term.cuf1* sequence (cursor forward 1 character) as a static value. ``_re_cub`` regular expression that matches term.cub(N) (move N characters backward), or None if terminal is without cub sequence. ``_cub1`` *term.cuf1* sequence (cursor backward 1 character) as a static value. These attributes make it possible to perform introspection on strings containing sequences generated by this terminal, to determine the printable length of a string. """ if term.kind in _BINTERM_UNSUPPORTED: warnings.warn(_BINTERM_UNSUPPORTED_MSG) # Build will_move, a list of terminal capabilities that have # indeterminate effects on the terminal cursor position. _will_move = set() if term.does_styling: _will_move = _merge_sequences(get_movement_sequence_patterns(term)) # Build wont_move, a list of terminal capabilities that mainly affect # video attributes, for use with measure_length(). _wont_move = set() if term.does_styling: _wont_move = _merge_sequences(get_wontmove_sequence_patterns(term)) _wont_move += [ # some last-ditch match efforts; well, xterm and aixterm is going # to throw \x1b(B and other oddities all around, so, when given # input such as ansi art (see test using wall.ans), and well, # theres no reason a vt220 terminal shouldn't be able to recognize # blue_on_red, even if it didn't cause it to be generated. these # are final "ok, i will match this, anyway" re.escape(u'\x1b') + r'\[(\d+)m', re.escape(u'\x1b') + r'\[(\d+)\;(\d+)m', re.escape(u'\x1b') + r'\[(\d+)\;(\d+)\;(\d+)m', re.escape(u'\x1b') + r'\[(\d+)\;(\d+)\;(\d+)\;(\d+)m', re.escape(u'\x1b(B'), ] # compile as regular expressions, OR'd. _re_will_move = re.compile('(%s)' % ('|'.join(_will_move))) _re_wont_move = re.compile('(%s)' % ('|'.join(_wont_move))) # static pattern matching for horizontal_distance(ucs, term) bnc = functools.partial(_build_numeric_capability, term) # parm_right_cursor: Move #1 characters to the right _cuf = bnc(cap='cuf', optional=True) _re_cuf = re.compile(_cuf) if _cuf else None # cursor_right: Non-destructive space (move right one space) _cuf1 = term.cuf1 # parm_left_cursor: Move #1 characters to the left _cub = bnc(cap='cub', optional=True) _re_cub = re.compile(_cub) if _cub else None # cursor_left: Move left one space _cub1 = term.cub1 return {'_re_will_move': _re_will_move, '_re_wont_move': _re_wont_move, '_re_cuf': _re_cuf, '_re_cub': _re_cub, '_cuf1': _cuf1, '_cub1': _cub1, }
[docs]class SequenceTextWrapper(textwrap.TextWrapper): def __init__(self, width, term, **kwargs): self.term = term assert kwargs.get('break_long_words', False) is False, ( 'break_long_words is not sequence-safe') kwargs['break_long_words'] = False textwrap.TextWrapper.__init__(self, width, **kwargs) def _wrap_chunks(self, chunks): """ escape-sequence aware variant of _wrap_chunks. Though movement sequences, such as term.left() are certainly not honored, sequences such as term.bold() are, and are not broken mid-sequence. """ lines = [] if self.width <= 0 or not isinstance(self.width, int): raise ValueError("invalid width %r(%s) (must be integer > 0)" % ( self.width, type(self.width))) term = self.term drop_whitespace = not hasattr(self, 'drop_whitespace' ) or self.drop_whitespace chunks.reverse() while chunks: cur_line = [] cur_len = 0 if lines: indent = self.subsequent_indent else: indent = self.initial_indent width = self.width - len(indent) if drop_whitespace and ( Sequence(chunks[-1], term).strip() == '' and lines): del chunks[-1] while chunks: chunk_len = Sequence(chunks[-1], term).length() if cur_len + chunk_len <= width: cur_line.append(chunks.pop()) cur_len += chunk_len else: break if chunks and Sequence(chunks[-1], term).length() > width: self._handle_long_word(chunks, cur_line, cur_len, width) if drop_whitespace and ( cur_line and Sequence(cur_line[-1], term).strip() == ''): del cur_line[-1] if cur_line: lines.append(indent + u''.join(cur_line)) return lines
SequenceTextWrapper.__doc__ = textwrap.TextWrapper.__doc__
[docs]class Sequence(text_type): """ This unicode-derived class understands the effect of escape sequences of printable length, allowing a properly implemented .rjust(), .ljust(), .center(), and .len() """ def __new__(cls, sequence_text, term): """Sequence(sequence_text, term) -> unicode object :arg sequence_text: A string containing sequences. :arg term: Terminal instance this string was created with. """ new = text_type.__new__(cls, sequence_text) new._term = term return new
[docs] def ljust(self, width, fillchar=u' '): """S.ljust(width, fillchar) -> unicode Returns string derived from unicode string ``S``, left-adjusted by trailing whitespace padding ``fillchar``.""" rightside = fillchar * int((max(0.0, float(width - self.length()))) / float(len(fillchar))) return u''.join((self, rightside))
[docs] def rjust(self, width, fillchar=u' '): """S.rjust(width, fillchar=u'') -> unicode Returns string derived from unicode string ``S``, right-adjusted by leading whitespace padding ``fillchar``.""" leftside = fillchar * int((max(0.0, float(width - self.length()))) / float(len(fillchar))) return u''.join((leftside, self))
[docs] def center(self, width, fillchar=u' '): """S.center(width, fillchar=u'') -> unicode Returns string derived from unicode string ``S``, centered and surrounded with whitespace padding ``fillchar``.""" split = max(0.0, float(width) - self.length()) / 2 leftside = fillchar * int((max(0.0, math.floor(split))) / float(len(fillchar))) rightside = fillchar * int((max(0.0, math.ceil(split))) / float(len(fillchar))) return u''.join((leftside, self, rightside))
[docs] def length(self): """S.length() -> int Returns printable length of unicode string ``S`` that may contain terminal sequences. Although accounted for, strings containing sequences such as ``term.clear`` will not give accurate returns, it is not considered lengthy (a length of 0). Combining characters, are also not considered lengthy. Strings containing ``term.left`` or ``\b`` will cause "overstrike", but a length less than 0 is not ever returned. So ``_\b+`` is a length of 1 (``+``), but ``\b`` is simply a length of 0. Some characters may consume more than one cell, mainly those CJK Unified Ideographs (Chinese, Japanese, Korean) defined by Unicode as half or full-width characters. For example: >>> from blessed import Terminal >>> from blessed.sequences import Sequence >>> term = Terminal() >>> Sequence(term.clear + term.red(u'コンニチハ')).length() 5 """ # because combining characters may return -1, "clip" their length to 0. clip = functools.partial(max, 0) return sum(clip(wcwidth.wcwidth(w_char)) for w_char in self.strip_seqs())
[docs] def strip(self, chars=None): """S.strip([chars]) -> unicode Return a copy of the string S with terminal sequences removed, and leading and trailing whitespace removed. If chars is given and not None, remove characters in chars instead. """ return self.strip_seqs().strip(chars)
[docs] def lstrip(self, chars=None): """S.lstrip([chars]) -> unicode Return a copy of the string S with terminal sequences and leading whitespace removed. If chars is given and not None, remove characters in chars instead. """ return self.strip_seqs().lstrip(chars)
[docs] def rstrip(self, chars=None): """S.rstrip([chars]) -> unicode Return a copy of the string S with terminal sequences and trailing whitespace removed. If chars is given and not None, remove characters in chars instead. """ return self.strip_seqs().rstrip(chars)
[docs] def strip_seqs(self): """S.strip_seqs() -> unicode Return a string without sequences for a string that contains sequences for the Terminal with which they were created. Where sequence ``move_right(n)`` is detected, it is replaced with ``n * u' '``, and where ``move_left()`` or ``\\b`` is detected, those last-most characters are destroyed. All other sequences are simply removed. An example, >>> from blessed import Terminal >>> from blessed.sequences import Sequence >>> term = Terminal() >>> Sequence(term.clear + term.red(u'test')).strip_seqs() u'test' """ # nxt: points to first character beyond current escape sequence. # width: currently estimated display length. input = self.padd() outp = u'' nxt = 0 for idx in range(0, len(input)): if idx == nxt: # at sequence, point beyond it, nxt = idx + measure_length(input[idx:], self._term) if nxt <= idx: # append non-sequence to outp, outp += input[idx] # point beyond next sequence, if any, # otherwise point to next character nxt = idx + measure_length(input[idx:], self._term) + 1 return outp
[docs] def padd(self): """S.padd() -> unicode Make non-destructive space or backspace into destructive ones. Where sequence ``move_right(n)`` is detected, it is replaced with ``n * u' '``. Where sequence ``move_left(n)`` or ``\\b`` is detected, those last-most characters are destroyed. """ outp = u'' nxt = 0 for idx in range(0, text_type.__len__(self)): width = horizontal_distance(self[idx:], self._term) if width != 0: nxt = idx + measure_length(self[idx:], self._term) if width > 0: outp += u' ' * width elif width < 0: outp = outp[:width] if nxt <= idx: outp += self[idx] nxt = idx + 1 return outp
def measure_length(ucs, term): """measure_length(S, term) -> int Returns non-zero for string ``S`` that begins with a terminal sequence, that is: the width of the first unprintable sequence found in S. For use as a *next* pointer to skip past sequences. If string ``S`` is not a sequence, 0 is returned. A sequence may be a typical terminal sequence beginning with Escape (``\x1b``), especially a Control Sequence Initiator (``CSI``, ``\x1b[``, ...), or those of ``\a``, ``\b``, ``\r``, ``\n``, ``\xe0`` (shift in), ``\x0f`` (shift out). They do not necessarily have to begin with CSI, they need only match the capabilities of attributes ``_re_will_move`` and ``_re_wont_move`` of terminal ``term``. """ # simple terminal control characters, ctrl_seqs = u'\a\b\r\n\x0e\x0f' if any([ucs.startswith(_ch) for _ch in ctrl_seqs]): return 1 # known multibyte sequences, matching_seq = term and ( term._re_will_move.match(ucs) or term._re_wont_move.match(ucs) or term._re_cub and term._re_cub.match(ucs) or term._re_cuf and term._re_cuf.match(ucs) ) if matching_seq: start, end = matching_seq.span() return end # none found, must be printable! return 0 def termcap_distance(ucs, cap, unit, term): """termcap_distance(S, cap, unit, term) -> int Match horizontal distance by simple ``cap`` capability name, ``cub1`` or ``cuf1``, with string matching the sequences identified by Terminal instance ``term`` and a distance of ``unit`` *1* or *-1*, for right and left, respectively. Otherwise, by regular expression (using dynamic regular expressions built using ``cub(n)`` and ``cuf(n)``. Failing that, any of the standard SGR sequences (``\033[C``, ``\033[D``, ``\033[nC``, ``\033[nD``). Returns 0 if unmatched. """ assert cap in ('cuf', 'cub') # match cub1(left), cuf1(right) one = getattr(term, '_%s1' % (cap,)) if one and ucs.startswith(one): return unit # match cub(n), cuf(n) using regular expressions re_pattern = getattr(term, '_re_%s' % (cap,)) _dist = re_pattern and re_pattern.match(ucs) if _dist: return unit * int(_dist.group(1)) return 0 def horizontal_distance(ucs, term): """horizontal_distance(S, term) -> int Returns Integer ``<n>`` in SGR sequence of form ``<ESC>[<n>C`` (T.move_right(n)), or ``-(n)`` in sequence of form ``<ESC>[<n>D`` (T.move_left(n)). Returns -1 for backspace (0x08), Otherwise 0. Tabstop (``\t``) cannot be correctly calculated, as the relative column position cannot be determined: 8 is always (and, incorrectly) returned. """ if ucs.startswith('\b'): return -1 elif ucs.startswith('\t'): # As best as I can prove it, a tabstop is always 8 by default. # Though, given that blessings is: # # 1. unaware of the output device's current cursor position, and # 2. unaware of the location the callee may chose to output any # given string, # # It is not possible to determine how many cells any particular # \t would consume on the output device! return 8 return (termcap_distance(ucs, 'cub', -1, term) or termcap_distance(ucs, 'cuf', 1, term) or 0)