# encoding: utf-8
" This sub-module provides 'sequence awareness' for blessed."
__author__ = 'Jeff Quast <contact@jeffquast.com>'
__license__ = 'MIT'
__all__ = ['init_sequence_patterns', 'Sequence', 'SequenceTextWrapper']
# built-ins
import functools
import textwrap
import warnings
import math
import sys
import re
# 3rd-party
import wcwidth # https://github.com/jquast/wcwidth
_BINTERM_UNSUPPORTED = ('kermit', 'avatar')
_BINTERM_UNSUPPORTED_MSG = ('sequence-awareness for terminals emitting '
'binary-packed capabilities are not supported.')
if sys.version_info[0] == 3:
text_type = str
else:
text_type = unicode # noqa
def _merge_sequences(inp):
"""Merge a list of input sequence patterns for use in a regular expression.
Order by lengthyness (full sequence set precedent over subset),
and exclude any empty (u'') sequences.
"""
return sorted(list(filter(None, inp)), key=len, reverse=True)
def _build_numeric_capability(term, cap, optional=False,
base_num=99, nparams=1):
""" Build regexp from capabilities having matching numeric
parameter contained within termcap value: n->(\d+).
"""
_cap = getattr(term, cap)
opt = '?' if optional else ''
if _cap:
args = (base_num,) * nparams
cap_re = re.escape(_cap(*args))
for num in range(base_num - 1, base_num + 2):
# search for matching ascii, n-1 through n+1
if str(num) in cap_re:
# modify & return n to matching digit expression
cap_re = cap_re.replace(str(num), r'(\d+)%s' % (opt,))
return cap_re
warnings.warn('Unknown parameter in %r (%r, %r)' % (cap, _cap, cap_re))
return None # no such capability
def _build_any_numeric_capability(term, cap, num=99, nparams=1):
""" Build regexp from capabilities having *any* digit parameters
(substitute matching \d with pattern \d and return).
"""
_cap = getattr(term, cap)
if _cap:
cap_re = re.escape(_cap(*((num,) * nparams)))
cap_re = re.sub('(\d+)', r'(\d+)', cap_re)
if r'(\d+)' in cap_re:
return cap_re
warnings.warn('Missing numerics in %r, %r' % (cap, cap_re))
return None # no such capability
def get_movement_sequence_patterns(term):
""" Build and return set of regexp for capabilities of ``term`` known
to cause movement.
"""
bnc = functools.partial(_build_numeric_capability, term)
return set([
# carriage_return
re.escape(term.cr),
# column_address: Horizontal position, absolute
bnc(cap='hpa'),
# row_address: Vertical position #1 absolute
bnc(cap='vpa'),
# cursor_address: Move to row #1 columns #2
bnc(cap='cup', nparams=2),
# cursor_down: Down one line
re.escape(term.cud1),
# cursor_home: Home cursor (if no cup)
re.escape(term.home),
# cursor_left: Move left one space
re.escape(term.cub1),
# cursor_right: Non-destructive space (move right one space)
re.escape(term.cuf1),
# cursor_up: Up one line
re.escape(term.cuu1),
# param_down_cursor: Down #1 lines
bnc(cap='cud', optional=True),
# restore_cursor: Restore cursor to position of last save_cursor
re.escape(term.rc),
# clear_screen: clear screen and home cursor
re.escape(term.clear),
# cursor_up: Up one line
re.escape(term.enter_fullscreen),
re.escape(term.exit_fullscreen),
# forward cursor
term._cuf,
# backward cursor
term._cub,
])
def get_wontmove_sequence_patterns(term):
""" Build and return set of regexp for capabilities of ``term`` known
not to cause any movement.
"""
bnc = functools.partial(_build_numeric_capability, term)
bna = functools.partial(_build_any_numeric_capability, term)
return list([
# print_screen: Print contents of screen
re.escape(term.mc0),
# prtr_off: Turn off printer
re.escape(term.mc4),
# prtr_on: Turn on printer
re.escape(term.mc5),
# save_cursor: Save current cursor position (P)
re.escape(term.sc),
# set_tab: Set a tab in every row, current columns
re.escape(term.hts),
# enter_bold_mode: Turn on bold (extra bright) mode
re.escape(term.bold),
# enter_standout_mode
re.escape(term.standout),
# enter_subscript_mode
re.escape(term.subscript),
# enter_superscript_mode
re.escape(term.superscript),
# enter_underline_mode: Begin underline mode
re.escape(term.underline),
# enter_blink_mode: Turn on blinking
re.escape(term.blink),
# enter_dim_mode: Turn on half-bright mode
re.escape(term.dim),
# cursor_invisible: Make cursor invisible
re.escape(term.civis),
# cursor_visible: Make cursor very visible
re.escape(term.cvvis),
# cursor_normal: Make cursor appear normal (undo civis/cvvis)
re.escape(term.cnorm),
# clear_all_tabs: Clear all tab stops
re.escape(term.tbc),
# change_scroll_region: Change region to line #1 to line #2
bnc(cap='csr', nparams=2),
# clr_bol: Clear to beginning of line
re.escape(term.el1),
# clr_eol: Clear to end of line
re.escape(term.el),
# clr_eos: Clear to end of screen
re.escape(term.clear_eos),
# delete_character: Delete character
re.escape(term.dch1),
# delete_line: Delete line (P*)
re.escape(term.dl1),
# erase_chars: Erase #1 characters
bnc(cap='ech'),
# insert_line: Insert line (P*)
re.escape(term.il1),
# parm_dch: Delete #1 characters
bnc(cap='dch'),
# parm_delete_line: Delete #1 lines
bnc(cap='dl'),
# exit_alt_charset_mode: End alternate character set (P)
re.escape(term.rmacs),
# exit_am_mode: Turn off automatic margins
re.escape(term.rmam),
# exit_attribute_mode: Turn off all attributes
re.escape(term.sgr0),
# exit_ca_mode: Strings to end programs using cup
re.escape(term.rmcup),
# exit_insert_mode: Exit insert mode
re.escape(term.rmir),
# exit_standout_mode: Exit standout mode
re.escape(term.rmso),
# exit_underline_mode: Exit underline mode
re.escape(term.rmul),
# flash_hook: Flash switch hook
re.escape(term.hook),
# flash_screen: Visible bell (may not move cursor)
re.escape(term.flash),
# keypad_local: Leave 'keyboard_transmit' mode
re.escape(term.rmkx),
# keypad_xmit: Enter 'keyboard_transmit' mode
re.escape(term.smkx),
# meta_off: Turn off meta mode
re.escape(term.rmm),
# meta_on: Turn on meta mode (8th-bit on)
re.escape(term.smm),
# orig_pair: Set default pair to its original value
re.escape(term.op),
# parm_ich: Insert #1 characters
bnc(cap='ich'),
# parm_index: Scroll forward #1
bnc(cap='indn'),
# parm_insert_line: Insert #1 lines
bnc(cap='il'),
# erase_chars: Erase #1 characters
bnc(cap='ech'),
# parm_rindex: Scroll back #1 lines
bnc(cap='rin'),
# parm_up_cursor: Up #1 lines
bnc(cap='cuu'),
# scroll_forward: Scroll text up (P)
re.escape(term.ind),
# scroll_reverse: Scroll text down (P)
re.escape(term.rev),
# tab: Tab to next 8-space hardware tab stop
re.escape(term.ht),
# set_a_background: Set background color to #1, using ANSI escape
bna(cap='setab', num=1),
bna(cap='setab', num=(term.number_of_colors - 1)),
# set_a_foreground: Set foreground color to #1, using ANSI escape
bna(cap='setaf', num=1),
bna(cap='setaf', num=(term.number_of_colors - 1)),
] + [
# set_attributes: Define video attributes #1-#9 (PG9)
# ( not *exactly* legal, being extra forgiving. )
bna(cap='sgr', nparams=_num) for _num in range(1, 10)
# reset_{1,2,3}string: Reset string
] + list(map(re.escape, (term.r1, term.r2, term.r3,))))
[docs]def init_sequence_patterns(term):
"""Given a Terminal instance, ``term``, this function processes
and parses several known terminal capabilities, and builds and
returns a dictionary database of regular expressions, which may
be re-attached to the terminal by attributes of the same key-name:
``_re_will_move``
any sequence matching this pattern will cause the terminal
cursor to move (such as *term.home*).
``_re_wont_move``
any sequence matching this pattern will not cause the cursor
to move (such as *term.bold*).
``_re_cuf``
regular expression that matches term.cuf(N) (move N characters forward),
or None if temrinal is without cuf sequence.
``_cuf1``
*term.cuf1* sequence (cursor forward 1 character) as a static value.
``_re_cub``
regular expression that matches term.cub(N) (move N characters backward),
or None if terminal is without cub sequence.
``_cub1``
*term.cuf1* sequence (cursor backward 1 character) as a static value.
These attributes make it possible to perform introspection on strings
containing sequences generated by this terminal, to determine the
printable length of a string.
"""
if term.kind in _BINTERM_UNSUPPORTED:
warnings.warn(_BINTERM_UNSUPPORTED_MSG)
# Build will_move, a list of terminal capabilities that have
# indeterminate effects on the terminal cursor position.
_will_move = set()
if term.does_styling:
_will_move = _merge_sequences(get_movement_sequence_patterns(term))
# Build wont_move, a list of terminal capabilities that mainly affect
# video attributes, for use with measure_length().
_wont_move = set()
if term.does_styling:
_wont_move = _merge_sequences(get_wontmove_sequence_patterns(term))
_wont_move += [
# some last-ditch match efforts; well, xterm and aixterm is going
# to throw \x1b(B and other oddities all around, so, when given
# input such as ansi art (see test using wall.ans), and well,
# theres no reason a vt220 terminal shouldn't be able to recognize
# blue_on_red, even if it didn't cause it to be generated. these
# are final "ok, i will match this, anyway"
re.escape(u'\x1b') + r'\[(\d+)m',
re.escape(u'\x1b') + r'\[(\d+)\;(\d+)m',
re.escape(u'\x1b') + r'\[(\d+)\;(\d+)\;(\d+)m',
re.escape(u'\x1b') + r'\[(\d+)\;(\d+)\;(\d+)\;(\d+)m',
re.escape(u'\x1b(B'),
]
# compile as regular expressions, OR'd.
_re_will_move = re.compile('(%s)' % ('|'.join(_will_move)))
_re_wont_move = re.compile('(%s)' % ('|'.join(_wont_move)))
# static pattern matching for horizontal_distance(ucs, term)
bnc = functools.partial(_build_numeric_capability, term)
# parm_right_cursor: Move #1 characters to the right
_cuf = bnc(cap='cuf', optional=True)
_re_cuf = re.compile(_cuf) if _cuf else None
# cursor_right: Non-destructive space (move right one space)
_cuf1 = term.cuf1
# parm_left_cursor: Move #1 characters to the left
_cub = bnc(cap='cub', optional=True)
_re_cub = re.compile(_cub) if _cub else None
# cursor_left: Move left one space
_cub1 = term.cub1
return {'_re_will_move': _re_will_move,
'_re_wont_move': _re_wont_move,
'_re_cuf': _re_cuf,
'_re_cub': _re_cub,
'_cuf1': _cuf1,
'_cub1': _cub1, }
[docs]class SequenceTextWrapper(textwrap.TextWrapper):
def __init__(self, width, term, **kwargs):
self.term = term
assert kwargs.get('break_long_words', False) is False, (
'break_long_words is not sequence-safe')
kwargs['break_long_words'] = False
textwrap.TextWrapper.__init__(self, width, **kwargs)
def _wrap_chunks(self, chunks):
"""
escape-sequence aware variant of _wrap_chunks. Though
movement sequences, such as term.left() are certainly not
honored, sequences such as term.bold() are, and are not
broken mid-sequence.
"""
lines = []
if self.width <= 0 or not isinstance(self.width, int):
raise ValueError("invalid width %r(%s) (must be integer > 0)" % (
self.width, type(self.width)))
term = self.term
drop_whitespace = not hasattr(self, 'drop_whitespace'
) or self.drop_whitespace
chunks.reverse()
while chunks:
cur_line = []
cur_len = 0
if lines:
indent = self.subsequent_indent
else:
indent = self.initial_indent
width = self.width - len(indent)
if drop_whitespace and (
Sequence(chunks[-1], term).strip() == '' and lines):
del chunks[-1]
while chunks:
chunk_len = Sequence(chunks[-1], term).length()
if cur_len + chunk_len <= width:
cur_line.append(chunks.pop())
cur_len += chunk_len
else:
break
if chunks and Sequence(chunks[-1], term).length() > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
if drop_whitespace and (
cur_line and Sequence(cur_line[-1], term).strip() == ''):
del cur_line[-1]
if cur_line:
lines.append(indent + u''.join(cur_line))
return lines
SequenceTextWrapper.__doc__ = textwrap.TextWrapper.__doc__
[docs]class Sequence(text_type):
"""
This unicode-derived class understands the effect of escape sequences
of printable length, allowing a properly implemented .rjust(), .ljust(),
.center(), and .len()
"""
def __new__(cls, sequence_text, term):
"""Sequence(sequence_text, term) -> unicode object
:arg sequence_text: A string containing sequences.
:arg term: Terminal instance this string was created with.
"""
new = text_type.__new__(cls, sequence_text)
new._term = term
return new
[docs] def ljust(self, width, fillchar=u' '):
"""S.ljust(width, fillchar) -> unicode
Returns string derived from unicode string ``S``, left-adjusted
by trailing whitespace padding ``fillchar``."""
rightside = fillchar * int((max(0.0, float(width - self.length())))
/ float(len(fillchar)))
return u''.join((self, rightside))
[docs] def rjust(self, width, fillchar=u' '):
"""S.rjust(width, fillchar=u'') -> unicode
Returns string derived from unicode string ``S``, right-adjusted
by leading whitespace padding ``fillchar``."""
leftside = fillchar * int((max(0.0, float(width - self.length())))
/ float(len(fillchar)))
return u''.join((leftside, self))
[docs] def center(self, width, fillchar=u' '):
"""S.center(width, fillchar=u'') -> unicode
Returns string derived from unicode string ``S``, centered
and surrounded with whitespace padding ``fillchar``."""
split = max(0.0, float(width) - self.length()) / 2
leftside = fillchar * int((max(0.0, math.floor(split)))
/ float(len(fillchar)))
rightside = fillchar * int((max(0.0, math.ceil(split)))
/ float(len(fillchar)))
return u''.join((leftside, self, rightside))
[docs] def length(self):
"""S.length() -> int
Returns printable length of unicode string ``S`` that may contain
terminal sequences.
Although accounted for, strings containing sequences such as
``term.clear`` will not give accurate returns, it is not
considered lengthy (a length of 0). Combining characters,
are also not considered lengthy.
Strings containing ``term.left`` or ``\b`` will cause "overstrike",
but a length less than 0 is not ever returned. So ``_\b+`` is a
length of 1 (``+``), but ``\b`` is simply a length of 0.
Some characters may consume more than one cell, mainly those CJK
Unified Ideographs (Chinese, Japanese, Korean) defined by Unicode
as half or full-width characters.
For example:
>>> from blessed import Terminal
>>> from blessed.sequences import Sequence
>>> term = Terminal()
>>> Sequence(term.clear + term.red(u'コンニチハ')).length()
5
"""
# because combining characters may return -1, "clip" their length to 0.
clip = functools.partial(max, 0)
return sum(clip(wcwidth.wcwidth(w_char))
for w_char in self.strip_seqs())
[docs] def strip(self, chars=None):
"""S.strip([chars]) -> unicode
Return a copy of the string S with terminal sequences removed, and
leading and trailing whitespace removed.
If chars is given and not None, remove characters in chars instead.
"""
return self.strip_seqs().strip(chars)
[docs] def lstrip(self, chars=None):
"""S.lstrip([chars]) -> unicode
Return a copy of the string S with terminal sequences and leading
whitespace removed.
If chars is given and not None, remove characters in chars instead.
"""
return self.strip_seqs().lstrip(chars)
[docs] def rstrip(self, chars=None):
"""S.rstrip([chars]) -> unicode
Return a copy of the string S with terminal sequences and trailing
whitespace removed.
If chars is given and not None, remove characters in chars instead.
"""
return self.strip_seqs().rstrip(chars)
[docs] def strip_seqs(self):
"""S.strip_seqs() -> unicode
Return a string without sequences for a string that contains
sequences for the Terminal with which they were created.
Where sequence ``move_right(n)`` is detected, it is replaced with
``n * u' '``, and where ``move_left()`` or ``\\b`` is detected,
those last-most characters are destroyed.
All other sequences are simply removed. An example,
>>> from blessed import Terminal
>>> from blessed.sequences import Sequence
>>> term = Terminal()
>>> Sequence(term.clear + term.red(u'test')).strip_seqs()
u'test'
"""
# nxt: points to first character beyond current escape sequence.
# width: currently estimated display length.
input = self.padd()
outp = u''
nxt = 0
for idx in range(0, len(input)):
if idx == nxt:
# at sequence, point beyond it,
nxt = idx + measure_length(input[idx:], self._term)
if nxt <= idx:
# append non-sequence to outp,
outp += input[idx]
# point beyond next sequence, if any,
# otherwise point to next character
nxt = idx + measure_length(input[idx:], self._term) + 1
return outp
[docs] def padd(self):
"""S.padd() -> unicode
Make non-destructive space or backspace into destructive ones.
Where sequence ``move_right(n)`` is detected, it is replaced with
``n * u' '``. Where sequence ``move_left(n)`` or ``\\b`` is
detected, those last-most characters are destroyed.
"""
outp = u''
nxt = 0
for idx in range(0, text_type.__len__(self)):
width = horizontal_distance(self[idx:], self._term)
if width != 0:
nxt = idx + measure_length(self[idx:], self._term)
if width > 0:
outp += u' ' * width
elif width < 0:
outp = outp[:width]
if nxt <= idx:
outp += self[idx]
nxt = idx + 1
return outp
def measure_length(ucs, term):
"""measure_length(S, term) -> int
Returns non-zero for string ``S`` that begins with a terminal sequence,
that is: the width of the first unprintable sequence found in S. For use
as a *next* pointer to skip past sequences. If string ``S`` is not a
sequence, 0 is returned.
A sequence may be a typical terminal sequence beginning with Escape
(``\x1b``), especially a Control Sequence Initiator (``CSI``, ``\x1b[``,
...), or those of ``\a``, ``\b``, ``\r``, ``\n``, ``\xe0`` (shift in),
``\x0f`` (shift out). They do not necessarily have to begin with CSI, they
need only match the capabilities of attributes ``_re_will_move`` and
``_re_wont_move`` of terminal ``term``.
"""
# simple terminal control characters,
ctrl_seqs = u'\a\b\r\n\x0e\x0f'
if any([ucs.startswith(_ch) for _ch in ctrl_seqs]):
return 1
# known multibyte sequences,
matching_seq = term and (
term._re_will_move.match(ucs) or
term._re_wont_move.match(ucs) or
term._re_cub and term._re_cub.match(ucs) or
term._re_cuf and term._re_cuf.match(ucs)
)
if matching_seq:
start, end = matching_seq.span()
return end
# none found, must be printable!
return 0
def termcap_distance(ucs, cap, unit, term):
"""termcap_distance(S, cap, unit, term) -> int
Match horizontal distance by simple ``cap`` capability name, ``cub1`` or
``cuf1``, with string matching the sequences identified by Terminal
instance ``term`` and a distance of ``unit`` *1* or *-1*, for right and
left, respectively.
Otherwise, by regular expression (using dynamic regular expressions built
using ``cub(n)`` and ``cuf(n)``. Failing that, any of the standard SGR
sequences (``\033[C``, ``\033[D``, ``\033[nC``, ``\033[nD``).
Returns 0 if unmatched.
"""
assert cap in ('cuf', 'cub')
# match cub1(left), cuf1(right)
one = getattr(term, '_%s1' % (cap,))
if one and ucs.startswith(one):
return unit
# match cub(n), cuf(n) using regular expressions
re_pattern = getattr(term, '_re_%s' % (cap,))
_dist = re_pattern and re_pattern.match(ucs)
if _dist:
return unit * int(_dist.group(1))
return 0
def horizontal_distance(ucs, term):
"""horizontal_distance(S, term) -> int
Returns Integer ``<n>`` in SGR sequence of form ``<ESC>[<n>C``
(T.move_right(n)), or ``-(n)`` in sequence of form ``<ESC>[<n>D``
(T.move_left(n)). Returns -1 for backspace (0x08), Otherwise 0.
Tabstop (``\t``) cannot be correctly calculated, as the relative column
position cannot be determined: 8 is always (and, incorrectly) returned.
"""
if ucs.startswith('\b'):
return -1
elif ucs.startswith('\t'):
# As best as I can prove it, a tabstop is always 8 by default.
# Though, given that blessings is:
#
# 1. unaware of the output device's current cursor position, and
# 2. unaware of the location the callee may chose to output any
# given string,
#
# It is not possible to determine how many cells any particular
# \t would consume on the output device!
return 8
return (termcap_distance(ucs, 'cub', -1, term) or
termcap_distance(ucs, 'cuf', 1, term) or
0)