r756 - in trunk/docutils/src/main/resources/docutils/docutils: . languages parsers parsers/rst parsers/rst/directives parsers/rst/languages readers transforms utils utils/math writers writers/html4css1 writers/latex2e writers/odf_odt writers/pep_html writers/s5_html writers/xetex
Author: echatellier Date: 2014-09-29 14:39:28 +0200 (Mon, 29 Sep 2014) New Revision: 756 Url: http://forge.nuiton.org/projects/jrst/repository/revisions/756 Log: refs #3523: Update docutils python code to 0.12 Added: trunk/docutils/src/main/resources/docutils/docutils/languages/da.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/da.py trunk/docutils/src/main/resources/docutils/docutils/utils/error_reporting.py trunk/docutils/src/main/resources/docutils/docutils/utils/math/ trunk/docutils/src/main/resources/docutils/docutils/utils/math/__init__.py trunk/docutils/src/main/resources/docutils/docutils/utils/math/latex2mathml.py trunk/docutils/src/main/resources/docutils/docutils/utils/math/math2html.py trunk/docutils/src/main/resources/docutils/docutils/utils/math/tex2unichar.py trunk/docutils/src/main/resources/docutils/docutils/utils/math/unichar2tex.py trunk/docutils/src/main/resources/docutils/docutils/utils/smartquotes.py trunk/docutils/src/main/resources/docutils/docutils/utils/urischemes.py Removed: trunk/docutils/src/main/resources/docutils/docutils/Lib/ trunk/docutils/src/main/resources/docutils/docutils/_string_template_compat.py trunk/docutils/src/main/resources/docutils/docutils/error_reporting.py trunk/docutils/src/main/resources/docutils/docutils/math/ trunk/docutils/src/main/resources/docutils/docutils/roman.py trunk/docutils/src/main/resources/docutils/docutils/urischemes.py trunk/docutils/src/main/resources/docutils/docutils/utils.py Modified: trunk/docutils/src/main/resources/docutils/docutils/__init__.py trunk/docutils/src/main/resources/docutils/docutils/_compat.py trunk/docutils/src/main/resources/docutils/docutils/core.py trunk/docutils/src/main/resources/docutils/docutils/frontend.py trunk/docutils/src/main/resources/docutils/docutils/io.py trunk/docutils/src/main/resources/docutils/docutils/languages/__init__.py trunk/docutils/src/main/resources/docutils/docutils/languages/lt.py trunk/docutils/src/main/resources/docutils/docutils/nodes.py trunk/docutils/src/main/resources/docutils/docutils/parsers/__init__.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/__init__.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/__init__.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/admonitions.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/images.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/misc.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/tables.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/__init__.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/lt.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/roles.py trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/states.py trunk/docutils/src/main/resources/docutils/docutils/readers/__init__.py trunk/docutils/src/main/resources/docutils/docutils/statemachine.py trunk/docutils/src/main/resources/docutils/docutils/transforms/frontmatter.py trunk/docutils/src/main/resources/docutils/docutils/transforms/references.py trunk/docutils/src/main/resources/docutils/docutils/transforms/universal.py trunk/docutils/src/main/resources/docutils/docutils/utils/__init__.py trunk/docutils/src/main/resources/docutils/docutils/utils/code_analyzer.py trunk/docutils/src/main/resources/docutils/docutils/utils/punctuation_chars.py trunk/docutils/src/main/resources/docutils/docutils/writers/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/docutils_xml.py trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/html4css1.css trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/math.css trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/xelatex.tex trunk/docutils/src/main/resources/docutils/docutils/writers/manpage.py trunk/docutils/src/main/resources/docutils/docutils/writers/odf_odt/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/pep.css trunk/docutils/src/main/resources/docutils/docutils/writers/s5_html/__init__.py trunk/docutils/src/main/resources/docutils/docutils/writers/xetex/__init__.py Modified: trunk/docutils/src/main/resources/docutils/docutils/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7409 2012-05-02 17:06:37Z grubert $ +# $Id: __init__.py 7756 2014-07-06 11:48:05Z grubert $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -26,9 +26,6 @@ - statemachine.py: A finite state machine specialized for regular-expression-based text filters. -- urischemes.py: Contains a complete mapping of known URI addressing - scheme names to descriptions. - Subpackages: - languages: Language-specific mappings of terms. @@ -44,12 +41,18 @@ - utils: Contains the ``Reporter`` system warning class and miscellaneous utilities used by readers, writers, and transforms. + utils/urischemes.py: Contains a complete mapping of known URI addressing + scheme names to descriptions. + +- utils/math: Contains functions for conversion of mathematical notation + between different formats (LaTeX, MathML, text, ...). + - writers: Format-specific output translators. """ __docformat__ = 'reStructuredText' -__version__ = '0.9' +__version__ = '0.12' """``major.minor.micro`` version number. The micro number is bumped for API changes, for new functionality, and for interim project releases. The minor number is bumped whenever there is a significant project release. The major Modified: trunk/docutils/src/main/resources/docutils/docutils/_compat.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/_compat.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/_compat.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: _compat.py 7316 2012-01-19 11:31:58Z milde $ +# $Id: _compat.py 7486 2012-07-11 12:25:14Z milde $ # Author: Georg Brandl <georg@python.org> # Copyright: This module has been placed in the public domain. @@ -35,3 +35,14 @@ # using this hack since 2to3 "fixes" the relative import # when using ``from io import BytesIO`` BytesIO = __import__('io').BytesIO + +if sys.version_info < (2,5): + import __builtin__ + + def __import__(name, globals={}, locals={}, fromlist=[], level=-1): + """Compatibility definition for Python 2.4. + + Silently ignore the `level` argument missing in Python < 2.5. + """ + # we need the level arg because the default changed in Python 3.3 + return __builtin__.__import__(name, globals, locals, fromlist) Deleted: trunk/docutils/src/main/resources/docutils/docutils/_string_template_compat.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/_string_template_compat.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/_string_template_compat.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,133 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf8 -*- - -# string_template_compat.py: string.Template for Python <= 2.4 -# ===================================================== - -# This is just an excerpt of the standard string module to provide backwards -# compatibility. - -import re as _re - -class _multimap: - """Helper class for combining multiple mappings. - - Used by .{safe_,}substitute() to combine the mapping and keyword - arguments. - """ - def __init__(self, primary, secondary): - self._primary = primary - self._secondary = secondary - - def __getitem__(self, key): - try: - return self._primary[key] - except KeyError: - return self._secondary[key] - - -class _TemplateMetaclass(type): - pattern = r""" - %(delim)s(?: - (?P<escaped>%(delim)s) | # Escape sequence of two delimiters - (?P<named>%(id)s) | # delimiter and a Python identifier - {(?P<braced>%(id)s)} | # delimiter and a braced identifier - (?P<invalid>) # Other ill-formed delimiter exprs - ) - """ - - def __init__(cls, name, bases, dct): - super(_TemplateMetaclass, cls).__init__(name, bases, dct) - if 'pattern' in dct: - pattern = cls.pattern - else: - pattern = _TemplateMetaclass.pattern % { - 'delim' : _re.escape(cls.delimiter), - 'id' : cls.idpattern, - } - cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE) - - -class Template: - """A string class for supporting $-substitutions.""" - __metaclass__ = _TemplateMetaclass - - delimiter = '$' - idpattern = r'[_a-z][_a-z0-9]*' - - def __init__(self, template): - self.template = template - - # Search for $$, $identifier, ${identifier}, and any bare $'s - - def _invalid(self, mo): - i = mo.start('invalid') - lines = self.template[:i].splitlines(True) - if not lines: - colno = 1 - lineno = 1 - else: - colno = i - len(''.join(lines[:-1])) - lineno = len(lines) - raise ValueError('Invalid placeholder in string: line %d, col %d' % - (lineno, colno)) - - def substitute(self, *args, **kws): - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _multimap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - # Check the most common path first. - named = mo.group('named') or mo.group('braced') - if named is not None: - val = mapping[named] - # We use this idiom instead of str() because the latter will - # fail if val is a Unicode containing non-ASCII characters. - return '%s' % (val,) - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - self._invalid(mo) - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - - def safe_substitute(self, *args, **kws): - if len(args) > 1: - raise TypeError('Too many positional arguments') - if not args: - mapping = kws - elif kws: - mapping = _multimap(kws, args[0]) - else: - mapping = args[0] - # Helper function for .sub() - def convert(mo): - named = mo.group('named') - if named is not None: - try: - # We use this idiom instead of str() because the latter - # will fail if val is a Unicode containing non-ASCII - return '%s' % (mapping[named],) - except KeyError: - return self.delimiter + named - braced = mo.group('braced') - if braced is not None: - try: - return '%s' % (mapping[braced],) - except KeyError: - return self.delimiter + '{' + braced + '}' - if mo.group('escaped') is not None: - return self.delimiter - if mo.group('invalid') is not None: - return self.delimiter - raise ValueError('Unrecognized named group in pattern', - self.pattern) - return self.pattern.sub(convert, self.template) - Modified: trunk/docutils/src/main/resources/docutils/docutils/core.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/core.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/core.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: core.py 7384 2012-03-19 22:59:09Z milde $ +# $Id: core.py 7466 2012-06-25 14:56:51Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -20,7 +20,7 @@ from docutils import frontend, io, utils, readers, writers from docutils.frontend import OptionParser from docutils.transforms import Transformer -from docutils.error_reporting import ErrorOutput, ErrorString +from docutils.utils.error_reporting import ErrorOutput, ErrorString import docutils.readers.doctree class Publisher: @@ -176,8 +176,7 @@ try: self.source = self.source_class( source=source, source_path=source_path, - encoding=self.settings.input_encoding, - handle_io_errors=False) + encoding=self.settings.input_encoding) except TypeError: self.source = self.source_class( source=source, source_path=source_path, @@ -192,9 +191,6 @@ destination=destination, destination_path=destination_path, encoding=self.settings.output_encoding, error_handler=self.settings.output_encoding_error_handler) - # Raise IOError instead of system exit with `tracback == True` - # TODO: change io.FileInput's default behaviour and remove this hack - self.destination.handle_io_errors=False def apply_transforms(self): self.document.transformer.populate_from_components( Deleted: trunk/docutils/src/main/resources/docutils/docutils/error_reporting.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/error_reporting.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/error_reporting.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,208 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf8 -*- - -# :Id: $Id: error_reporting.py 7316 2012-01-19 11:31:58Z milde $ -# :Copyright: © 2011 Günter Milde. -# :License: Released under the terms of the `2-Clause BSD license`_, in short: -# -# Copying and distribution of this file, with or without modification, -# are permitted in any medium without royalty provided the copyright -# notice and this notice are preserved. -# This file is offered as-is, without any warranty. -# -# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause - -""" -Error reporting should be safe from encoding/decoding errors. -However, implicit conversions of strings and exceptions like - ->>> u'%s world: %s' % ('H\xe4llo', Exception(u'H\xe4llo') - -fail in some Python versions: - -* In Python <= 2.6, ``unicode(<exception instance>)`` uses - `__str__` and fails with non-ASCII chars in`unicode` arguments. - (work around http://bugs.python.org/issue2517): - -* In Python 2, unicode(<exception instance>) fails, with non-ASCII - chars in arguments. (Use case: in some locales, the errstr - argument of IOError contains non-ASCII chars.) - -* In Python 2, str(<exception instance>) fails, with non-ASCII chars - in `unicode` arguments. - -The `SafeString`, `ErrorString` and `ErrorOutput` classes handle -common exceptions. -""" - -import sys, codecs - -# Guess the locale's encoding. -# If no valid guess can be made, locale_encoding is set to `None`: -try: - import locale # module missing in Jython -except ImportError: - locale_encoding = None -else: - locale_encoding = locale.getlocale()[1] or locale.getdefaultlocale()[1] - # locale.getpreferredencoding([do_setlocale=True|False]) - # has side-effects | might return a wrong guess. - # (cf. Update 1 in http://stackoverflow.com/questions/4082645/using-python-2-xs-locale-module-t...) - try: - codecs.lookup(locale_encoding or '') # None -> '' - except LookupError: - locale_encoding = None - - - -class SafeString(object): - """ - A wrapper providing robust conversion to `str` and `unicode`. - """ - - def __init__(self, data, encoding=None, encoding_errors='backslashreplace', - decoding_errors='replace'): - self.data = data - self.encoding = (encoding or getattr(data, 'encoding', None) or - locale_encoding or 'ascii') - self.encoding_errors = encoding_errors - self.decoding_errors = decoding_errors - - - def __str__(self): - try: - return str(self.data) - except UnicodeEncodeError, err: - if isinstance(self.data, Exception): - args = [str(SafeString(arg, self.encoding, - self.encoding_errors)) - for arg in self.data.args] - return ', '.join(args) - if isinstance(self.data, unicode): - return self.data.encode(self.encoding, self.encoding_errors) - raise - - def __unicode__(self): - """ - Return unicode representation of `self.data`. - - Try ``unicode(self.data)``, catch `UnicodeError` and - - * if `self.data` is an Exception instance, work around - http://bugs.python.org/issue2517 with an emulation of - Exception.__unicode__, - - * else decode with `self.encoding` and `self.decoding_errors`. - """ - try: - u = unicode(self.data) - if isinstance(self.data, EnvironmentError): - u = u.replace(": u'", ": '") # normalize filename quoting - return u - except UnicodeError, error: # catch ..Encode.. and ..Decode.. errors - if isinstance(self.data, EnvironmentError): - return u"[Errno %s] %s: '%s'" % (self.data.errno, - SafeString(self.data.strerror, self.encoding, - self.decoding_errors), - SafeString(self.data.filename, self.encoding, - self.decoding_errors)) - if isinstance(self.data, Exception): - args = [unicode(SafeString(arg, self.encoding, - decoding_errors=self.decoding_errors)) - for arg in self.data.args] - return u', '.join(args) - if isinstance(error, UnicodeDecodeError): - return unicode(self.data, self.encoding, self.decoding_errors) - raise - -class ErrorString(SafeString): - """ - Safely report exception type and message. - """ - def __str__(self): - return '%s: %s' % (self.data.__class__.__name__, - super(ErrorString, self).__str__()) - - def __unicode__(self): - return u'%s: %s' % (self.data.__class__.__name__, - super(ErrorString, self).__unicode__()) - - -class ErrorOutput(object): - """ - Wrapper class for file-like error streams with - failsave de- and encoding of `str`, `bytes`, `unicode` and - `Exception` instances. - """ - - def __init__(self, stream=None, encoding=None, - encoding_errors='backslashreplace', - decoding_errors='replace'): - """ - :Parameters: - - `stream`: a file-like object (which is written to), - a string (opended as a file), - `None` (bind to `sys.stderr`; default). - If evaluating to `False` (but not `None`), - write() requests are ignored. - - `encoding`: `stream` text encoding. Guessed if None. - - `encoding_errors`: how to treat encoding errors. - """ - if stream is None: - stream = sys.stderr - elif not(stream): - stream = False - # if `stream` is a file name, open it - elif isinstance(stream, str): - stream = open(stream, 'w') - elif isinstance(stream, unicode): - stream = open(stream.encode(sys.getfilesystemencoding()), 'w') - - self.stream = stream - """Where warning output is sent.""" - - self.encoding = (encoding or getattr(stream, 'encoding', None) or - locale_encoding or 'ascii') - """The output character encoding.""" - - self.encoding_errors = encoding_errors - """Encoding error handler.""" - - self.decoding_errors = decoding_errors - """Decoding error handler.""" - - def write(self, data): - """ - Write `data` to self.stream. Ignore, if self.stream is False. - - `data` can be a `string`, `unicode`, or `Exception` instance. - """ - if self.stream is False: - return - if isinstance(data, Exception): - data = unicode(SafeString(data, self.encoding, - self.encoding_errors, self.decoding_errors)) - try: - self.stream.write(data) - except UnicodeEncodeError: - self.stream.write(data.encode(self.encoding, self.encoding_errors)) - except TypeError: # in Python 3, stderr expects unicode - if self.stream in (sys.stderr, sys.stdout): - self.stream.buffer.write(data) # write bytes to raw stream - else: - self.stream.write(unicode(data, self.encoding, - self.decoding_errors)) - - def close(self): - """ - Close the error-output stream. - - Ignored if the stream is` sys.stderr` or `sys.stdout` or has no - close() method. - """ - if self.stream in (sys.stdout, sys.stderr): - return - try: - self.stream.close() - except AttributeError: - pass Modified: trunk/docutils/src/main/resources/docutils/docutils/frontend.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/frontend.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/frontend.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: frontend.py 7339 2012-02-03 12:23:27Z milde $ +# $Id: frontend.py 7584 2013-01-01 20:00:21Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -18,8 +18,10 @@ * Option callbacks: `store_multiple`, `read_config_file`. * Setting validators: `validate_encoding`, `validate_encoding_error_handler`, - `validate_encoding_and_error_handler`, `validate_boolean`, - `validate_threshold`, `validate_colon_separated_string_list`, + `validate_encoding_and_error_handler`, + `validate_boolean`, `validate_ternary`, `validate_threshold`, + `validate_colon_separated_string_list`, + `validate_comma_separated_string_list`, `validate_dependency_file`. * `make_paths_absolute`. * SettingSpec manipulation: `filter_settings_spec`. @@ -38,7 +40,7 @@ import docutils import docutils.utils import docutils.nodes -from docutils.error_reporting import locale_encoding, ErrorOutput, ErrorString +from docutils.utils.error_reporting import locale_encoding, ErrorOutput, ErrorString def store_multiple(option, opt, value, parser, *args, **kwargs): @@ -110,14 +112,32 @@ def validate_boolean(setting, value, option_parser, config_parser=None, config_section=None): - if isinstance(value, unicode): - try: - return option_parser.booleans[value.strip().lower()] - except KeyError: - raise (LookupError('unknown boolean value: "%s"' % value), - None, sys.exc_info()[2]) - return value + """Check/normalize boolean settings: + True: '1', 'on', 'yes', 'true' + False: '0', 'off', 'no','false', '' + """ + if isinstance(value, bool): + return value + try: + return option_parser.booleans[value.strip().lower()] + except KeyError: + raise (LookupError('unknown boolean value: "%s"' % value), + None, sys.exc_info()[2]) +def validate_ternary(setting, value, option_parser, + config_parser=None, config_section=None): + """Check/normalize three-value settings: + True: '1', 'on', 'yes', 'true' + False: '0', 'off', 'no','false', '' + any other value: returned as-is. + """ + if isinstance(value, bool) or value is None: + return value + try: + return option_parser.booleans[value.strip().lower()] + except KeyError: + return value + def validate_nonnegative_int(setting, value, option_parser, config_parser=None, config_section=None): value = int(value) @@ -138,13 +158,28 @@ def validate_colon_separated_string_list( setting, value, option_parser, config_parser=None, config_section=None): - if isinstance(value, unicode): + if not isinstance(value, list): value = value.split(':') else: last = value.pop() value.extend(last.split(':')) return value +def validate_comma_separated_list(setting, value, option_parser, + config_parser=None, config_section=None): + """Check/normalize list arguments (split at "," and strip whitespace). + """ + # `value` is already a ``list`` when given as command line option + # and "action" is "append" and ``unicode`` or ``str`` else. + if not isinstance(value, list): + value = [value] + # this function is called for every option added to `value` + # -> split the last item and append the result: + last = value.pop() + items = [i.strip(u' \t\n') for i in last.split(u',') if i.strip(u' \t\n')] + value.extend(items) + return value + def validate_url_trailing_slash( setting, value, option_parser, config_parser=None, config_section=None): if not value: @@ -163,17 +198,15 @@ def validate_strip_class(setting, value, option_parser, config_parser=None, config_section=None): - # convert to list: - if isinstance(value, unicode): - value = [value] - class_values = filter(None, [v.strip() for v in value.pop().split(',')]) - # validate: - for class_value in class_values: - normalized = docutils.nodes.make_id(class_value) - if class_value != normalized: + # value is a comma separated string list: + value = validate_comma_separated_list(setting, value, option_parser, + config_parser, config_section) + # validate list elements: + for cls in value: + normalized = docutils.nodes.make_id(cls) + if cls != normalized: raise ValueError('invalid class value %r (perhaps %r?)' - % (class_value, normalized)) - value.extend(class_values) + % (cls, normalized)) return value def make_paths_absolute(pathdict, keys, base_path=None): @@ -313,8 +346,8 @@ thresholds = {'info': 1, 'warning': 2, 'error': 3, 'severe': 4, 'none': 5} """Lookup table for --report and --halt threshold values.""" - booleans={'1': 1, 'on': 1, 'yes': 1, 'true': 1, - '0': 0, 'off': 0, 'no': 0, 'false': 0, '': 0} + booleans={'1': True, 'on': True, 'yes': True, 'true': True, + '0': False, 'off': False, 'no': False, 'false': False, '': False} """Lookup table for boolean configuration file settings.""" default_error_encoding = getattr(sys.stderr, 'encoding', Modified: trunk/docutils/src/main/resources/docutils/docutils/io.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/io.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/io.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,10 +1,10 @@ -# $Id: io.py 7384 2012-03-19 22:59:09Z milde $ +# $Id: io.py 7596 2013-01-25 13:42:17Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. """ I/O classes provide a uniform API for low-level input and output. Subclasses -will exist for a variety of input/output mechanisms. +exist for a variety of input/output mechanisms. """ __docformat__ = 'reStructuredText' @@ -15,13 +15,28 @@ import codecs from docutils import TransformSpec from docutils._compat import b -from docutils.error_reporting import locale_encoding, ErrorString, ErrorOutput +from docutils.utils.error_reporting import locale_encoding, ErrorString, ErrorOutput class InputError(IOError): pass class OutputError(IOError): pass +def check_encoding(stream, encoding): + """Test, whether the encoding of `stream` matches `encoding`. + Returns + + :None: if `encoding` or `stream.encoding` are not a valid encoding + argument (e.g. ``None``) or `stream.encoding is missing. + :True: if the encoding argument resolves to the same value as `encoding`, + :False: if the encodings differ. + """ + try: + return codecs.lookup(stream.encoding) == codecs.lookup(encoding) + except (LookupError, AttributeError, TypeError): + return None + + class Input(TransformSpec): """ @@ -176,7 +191,7 @@ 'a Unicode string') return data if not isinstance(data, unicode): - # Non-unicode (e.g. binary) output. + # Non-unicode (e.g. bytes) output. return data else: return data.encode(self.encoding, self.error_handler) @@ -189,7 +204,7 @@ """ def __init__(self, source=None, source_path=None, encoding=None, error_handler='strict', - autoclose=True, handle_io_errors=True, mode='rU'): + autoclose=True, handle_io_errors=None, mode='rU'): """ :Parameters: - `source`: either a file-like object (which is read directly), or @@ -199,14 +214,13 @@ - `error_handler`: the encoding error handler to use. - `autoclose`: close automatically after read (except when `sys.stdin` is the source). - - `handle_io_errors`: summarize I/O errors here, and exit? + - `handle_io_errors`: ignored, deprecated, will be removed. - `mode`: how the file is to be opened (see standard function `open`). The default 'rU' provides universal newline support for text files. """ Input.__init__(self, source, source_path, encoding, error_handler) self.autoclose = autoclose - self.handle_io_errors = handle_io_errors self._stderr = ErrorOutput() if source is None: @@ -221,20 +235,11 @@ try: self.source = open(source_path, mode, **kwargs) except IOError, error: - if handle_io_errors: - print >>self._stderr, ErrorString(error) - print >>self._stderr, ( - u'Unable to open source file for reading ("%s").' - u'Exiting.' % source_path) - sys.exit(1) raise InputError(error.errno, error.strerror, source_path) else: self.source = sys.stdin elif (sys.version_info >= (3,0) and - self.encoding and hasattr(self.source, 'encoding') and - self.encoding != self.source.encoding and - codecs.lookup(self.encoding) != - codecs.lookup(self.source.encoding)): + check_encoding(self.source, self.encoding) is False): # TODO: re-open, warn or raise error? raise UnicodeError('Encoding clash: encoding given is "%s" ' 'but source is opened with encoding "%s".' % @@ -292,13 +297,13 @@ mode = 'w' """The mode argument for `open()`.""" - # 'wb' for binary (e.g. OpenOffice) files. + # 'wb' for binary (e.g. OpenOffice) files (see also `BinaryFileOutput`). # (Do not use binary mode ('wb') for text files, as this prevents the # conversion of newlines to the system specific default.) def __init__(self, destination=None, destination_path=None, encoding=None, error_handler='strict', autoclose=True, - handle_io_errors=True, mode=None): + handle_io_errors=None, mode=None): """ :Parameters: - `destination`: either a file-like object (which is written @@ -310,7 +315,7 @@ - `error_handler`: the encoding error handler to use. - `autoclose`: close automatically after write (except when `sys.stdout` or `sys.stderr` is the destination). - - `handle_io_errors`: summarize I/O errors here, and exit? + - `handle_io_errors`: ignored, deprecated, will be removed. - `mode`: how the file is to be opened (see standard function `open`). The default is 'w', providing universal newline support for text files. @@ -319,7 +324,6 @@ encoding, error_handler) self.opened = True self.autoclose = autoclose - self.handle_io_errors = handle_io_errors if mode is not None: self.mode = mode self._stderr = ErrorOutput() @@ -328,6 +332,12 @@ self.opened = False else: self.destination = sys.stdout + elif (# destination is file-type object -> check mode: + mode and hasattr(self.destination, 'mode') + and mode != self.destination.mode): + print >>self._stderr, ('Warning: Destination mode "%s" ' + 'differs from specified mode "%s"' % + (self.destination.mode, mode)) if not destination_path: try: self.destination_path = self.destination.name @@ -336,7 +346,7 @@ def open(self): # Specify encoding in Python 3. - if sys.version_info >= (3,0): + if sys.version_info >= (3,0) and 'b' not in self.mode: kwargs = {'encoding': self.encoding, 'errors': self.error_handler} else: @@ -344,39 +354,42 @@ try: self.destination = open(self.destination_path, self.mode, **kwargs) except IOError, error: - if self.handle_io_errors: - print >>self._stderr, ErrorString(error) - print >>self._stderr, (u'Unable to open destination file' - u" for writing ('%s'). Exiting." % self.destination_path) - sys.exit(1) - raise OutputError(error.errno, error.strerror, + raise OutputError(error.errno, error.strerror, self.destination_path) self.opened = True def write(self, data): """Encode `data`, write it to a single file, and return it. - In Python 3, `data` is returned unchanged. + With Python 3 or binary output mode, `data` is returned unchanged, + except when specified encoding and output encoding differ. """ - if sys.version_info < (3,0): - data = self.encode(data) if not self.opened: self.open() + if ('b' not in self.mode and sys.version_info < (3,0) + or check_encoding(self.destination, self.encoding) is False + ): + if sys.version_info >= (3,0) and os.linesep != '\n': + data = data.replace('\n', os.linesep) # fix endings + data = self.encode(data) + try: # In Python < 2.5, try...except has to be nested in try...finally. try: - if (sys.version_info >= (3,0) and self.encoding and - hasattr(self.destination,'encoding') and - self.encoding != self.destination.encoding and - codecs.lookup(self.encoding) != - codecs.lookup(self.destination.encoding)): - # encode self, write bytes - bdata = self.encode(data) - if os.linesep != '\n': - bdata = bdata.replace('\n', os.linesep) - sys.stdout.buffer.write(bdata) - else: - self.destination.write(data) - except (UnicodeError, LookupError), err: # can only happen in py3k + self.destination.write(data) + except TypeError, e: + if sys.version_info >= (3,0) and isinstance(data, bytes): + try: + self.destination.buffer.write(data) + except AttributeError: + if check_encoding(self.destination, + self.encoding) is False: + raise ValueError('Encoding of %s (%s) differs \n' + ' from specified encoding (%s)' % + (self.destination_path or 'destination', + self.destination.encoding, self.encoding)) + else: + raise e + except (UnicodeError, LookupError), err: raise UnicodeError( 'Unable to encode output data. output-encoding is: ' '%s.\n(%s)' % (self.encoding, ErrorString(err))) Modified: trunk/docutils/src/main/resources/docutils/docutils/languages/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/languages/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/languages/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7126 2011-09-16 19:24:51Z milde $ +# $Id: __init__.py 7648 2013-04-18 07:36:22Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -11,7 +11,11 @@ __docformat__ = 'reStructuredText' +import sys + from docutils.utils import normalize_language_tag +if sys.version_info < (2,5): + from docutils._compat import __import__ _languages = {} @@ -23,18 +27,22 @@ """ # TODO: use a dummy module returning emtpy strings?, configurable? for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: - module = __import__(tag, globals(), locals()) + module = __import__(tag, globals(), locals(), level=1) except ImportError: - continue + try: + module = __import__(tag, globals(), locals(), level=0) + except ImportError: + continue _languages[tag] = module return module if reporter is not None: reporter.warning( 'language "%s" not supported: ' % language_code + 'Docutils-generated text will be in English.') - module = __import__('en', globals(), locals()) + module = __import__('en', globals(), locals(), level=1) _languages[tag] = module # warn only one time! return module Added: trunk/docutils/src/main/resources/docutils/docutils/languages/da.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/languages/da.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/languages/da.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# $Id: da.py 7678 2013-07-03 09:57:36Z milde $ +# Author: E D +# Copyright: This module has been placed in the public domain. + +# New language mappings are welcome. Before doing a new translation, please +# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be +# translated for each language: one in docutils/languages, the other in +# docutils/parsers/rst/languages. + +""" +Danish-language mappings for language-dependent features of Docutils. +""" + +__docformat__ = 'reStructuredText' + +labels = { + # fixed: language-dependent + 'author': u'Forfatter', + 'authors': u'Forfattere', + 'organization': u'Organisation', + 'address': u'Adresse', + 'contact': u'Kontakt', + 'version': u'Version', + 'revision': u'Revision', + 'status': u'Status', + 'date': u'Dato', + 'copyright': u'Copyright', + 'dedication': u'Dedikation', + 'abstract': u'Resumé', + 'attention': u'Giv agt!', + 'caution': u'Pas på!', + 'danger': u'!FARE!', + 'error': u'Fejl', + 'hint': u'Vink', + 'important': u'Vigtigt', + 'note': u'Bemærk', + 'tip': u'Tips', + 'warning': u'Advarsel', + 'contents': u'Indhold'} +"""Mapping of node class name to label text.""" + +bibliographic_fields = { + # language-dependent: fixed + u'forfatter': 'author', + u'forfattere': 'authors', + u'organisation': 'organization', + u'adresse': 'address', + u'kontakt': 'contact', + u'version': 'version', + u'revision': 'revision', + u'status': 'status', + u'dato': 'date', + u'copyright': 'copyright', + u'dedikation': 'dedication', + u'resume': 'abstract', + u'resumé': 'abstract'} +"""Danish (lowcased) to canonical name mapping for bibliographic fields.""" + +author_separators = [';', ','] +"""List of separator strings for the 'Authors' bibliographic field. Tried in +order.""" Modified: trunk/docutils/src/main/resources/docutils/docutils/languages/lt.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/languages/lt.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/languages/lt.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ -# -*- coding: utf8 -*- -# $Id: lt.py 6459 2010-10-29 22:07:34Z milde $ +# -*- coding: utf-8 -*- +# $Id: lt.py 7668 2013-06-04 12:46:30Z milde $ # Author: Dalius Dobravolskas <dalius.do...@gmail.com> # Copyright: This module has been placed in the public domain. @@ -59,8 +59,8 @@ author_separators = [';', ','] """List of separator strings for the 'Authors' bibliographic field. Tried in order.""" -# -*- coding: utf8 -*- -# $Id: lt.py 6459 2010-10-29 22:07:34Z milde $ +# -*- coding: utf-8 -*- +# $Id: lt.py 7668 2013-06-04 12:46:30Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. Modified: trunk/docutils/src/main/resources/docutils/docutils/nodes.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/nodes.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/nodes.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,6 @@ -# $Id: nodes.py 7320 2012-01-19 22:33:02Z milde $ +# $Id: nodes.py 7595 2013-01-21 17:33:56Z milde $ # Author: David Goodger <goodger@python.org> +# Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ @@ -269,7 +270,7 @@ index = node.parent.index(node) for sibling in node.parent[index+1:]: r.extend(sibling.traverse(include_self=True, - descend=descend, + descend=descend, siblings=False, ascend=False, condition=condition)) if not ascend: @@ -298,15 +299,26 @@ if sys.version_info < (3,): class reprunicode(unicode): """ - A class that removes the initial u from unicode's repr. + A unicode sub-class that removes the initial u from unicode's repr. """ def __repr__(self): return unicode.__repr__(self)[1:] + + else: reprunicode = unicode +def ensure_str(s): + """ + Failsave conversion of `unicode` to `str`. + """ + if sys.version_info < (3,) and isinstance(s, unicode): + return s.encode('ascii', 'backslashreplace') + return s + + class Text(Node, reprunicode): """ @@ -340,7 +352,7 @@ data = self if len(data) > maxlen: data = data[:maxlen-4] + ' ...' - return '<%s: %s>' % (self.tagname, repr(reprunicode(data))) + return '<%s: %r>' % (self.tagname, reprunicode(data)) def __repr__(self): return self.shortrepr(maxlen=68) @@ -420,10 +432,24 @@ This is equivalent to ``element.extend([node1, node2])``. """ - list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs') + basic_attributes = ('ids', 'classes', 'names', 'dupnames') + """List attributes which are defined for every Element-derived class + instance and can be safely transferred to a different node.""" + + local_attributes = ('backrefs',) + """A list of class-specific attributes that should not be copied with the + standard attributes when replacing a node. + + NOTE: Derived classes should override this value to prevent any of its + attributes being copied by adding to the value in its parent class.""" + + list_attributes = basic_attributes + local_attributes """List attributes, automatically initialized to empty lists for all nodes.""" + known_attributes = list_attributes + ('source',) + """List attributes that are known to the Element base class.""" + tagname = None """The element generic identifier. If None, it is set as an instance attribute to the name of the class.""" @@ -477,14 +503,14 @@ break if self['names']: return '<%s "%s": %s>' % (self.__class__.__name__, - '; '.join(self['names']), data) + '; '.join([ensure_str(n) for n in self['names']]), data) else: return '<%s: %s>' % (self.__class__.__name__, data) def shortrepr(self): if self['names']: return '<%s "%s"...>' % (self.__class__.__name__, - '; '.join(self['names'])) + '; '.join([ensure_str(n) for n in self['names']])) else: return '<%s...>' % self.tagname @@ -620,9 +646,24 @@ has_key = hasattr - # support operator in + # support operator ``in`` __contains__ = hasattr + def get_language_code(self, fallback=''): + """Return node's language tag. + + Look iteratively in self and parents for a class argument + starting with ``language-`` and return the remainder of it + (which should be a `BCP49` language tag) or the `fallback`. + """ + for cls in self.get('classes', []): + if cls.startswith('language-'): + return cls[9:] + try: + return self.parent.get_language(fallback) + except AttributeError: + return fallback + def append(self, item): self.setup_child(item) self.children.append(item) @@ -653,18 +694,232 @@ else: return 1 - def update_basic_atts(self, dict): + def update_basic_atts(self, dict_): """ Update basic attributes ('ids', 'names', 'classes', - 'dupnames', but not 'source') from node or dictionary `dict`. + 'dupnames', but not 'source') from node or dictionary `dict_`. """ - if isinstance(dict, Node): - dict = dict.attributes - for att in ('ids', 'classes', 'names', 'dupnames'): - for value in dict.get(att, []): - if not value in self[att]: - self[att].append(value) + if isinstance(dict_, Node): + dict_ = dict_.attributes + for att in self.basic_attributes: + self.append_attr_list(att, dict_.get(att, [])) + def append_attr_list(self, attr, values): + """ + For each element in values, if it does not exist in self[attr], append + it. + + NOTE: Requires self[attr] and values to be sequence type and the + former should specifically be a list. + """ + # List Concatenation + for value in values: + if not value in self[attr]: + self[attr].append(value) + + def coerce_append_attr_list(self, attr, value): + """ + First, convert both self[attr] and value to a non-string sequence + type; if either is not already a sequence, convert it to a list of one + element. Then call append_attr_list. + + NOTE: self[attr] and value both must not be None. + """ + # List Concatenation + if not isinstance(self.get(attr), list): + self[attr] = [self[attr]] + if not isinstance(value, list): + value = [value] + self.append_attr_list(attr, value) + + def replace_attr(self, attr, value, force = True): + """ + If self[attr] does not exist or force is True or omitted, set + self[attr] to value, otherwise do nothing. + """ + # One or the other + if force or self.get(attr) is None: + self[attr] = value + + def copy_attr_convert(self, attr, value, replace = True): + """ + If attr is an attribute of self, set self[attr] to + [self[attr], value], otherwise set self[attr] to value. + + NOTE: replace is not used by this function and is kept only for + compatibility with the other copy functions. + """ + if self.get(attr) is not value: + self.coerce_append_attr_list(attr, value) + + def copy_attr_coerce(self, attr, value, replace): + """ + If attr is an attribute of self and either self[attr] or value is a + list, convert all non-sequence values to a sequence of 1 element and + then concatenate the two sequence, setting the result to self[attr]. + If both self[attr] and value are non-sequences and replace is True or + self[attr] is None, replace self[attr] with value. Otherwise, do + nothing. + """ + if self.get(attr) is not value: + if isinstance(self.get(attr), list) or \ + isinstance(value, list): + self.coerce_append_attr_list(attr, value) + else: + self.replace_attr(attr, value, replace) + + def copy_attr_concatenate(self, attr, value, replace): + """ + If attr is an attribute of self and both self[attr] and value are + lists, concatenate the two sequences, setting the result to + self[attr]. If either self[attr] or value are non-sequences and + replace is True or self[attr] is None, replace self[attr] with value. + Otherwise, do nothing. + """ + if self.get(attr) is not value: + if isinstance(self.get(attr), list) and \ + isinstance(value, list): + self.append_attr_list(attr, value) + else: + self.replace_attr(attr, value, replace) + + def copy_attr_consistent(self, attr, value, replace): + """ + If replace is True or selfpattr] is None, replace self[attr] with + value. Otherwise, do nothing. + """ + if self.get(attr) is not value: + self.replace_attr(attr, value, replace) + + def update_all_atts(self, dict_, update_fun = copy_attr_consistent, + replace = True, and_source = False): + """ + Updates all attributes from node or dictionary `dict_`. + + Appends the basic attributes ('ids', 'names', 'classes', + 'dupnames', but not 'source') and then, for all other attributes in + dict_, updates the same attribute in self. When attributes with the + same identifier appear in both self and dict_, the two values are + merged based on the value of update_fun. Generally, when replace is + True, the values in self are replaced or merged with the values in + dict_; otherwise, the values in self may be preserved or merged. When + and_source is True, the 'source' attribute is included in the copy. + + NOTE: When replace is False, and self contains a 'source' attribute, + 'source' is not replaced even when dict_ has a 'source' + attribute, though it may still be merged into a list depending + on the value of update_fun. + NOTE: It is easier to call the update-specific methods then to pass + the update_fun method to this function. + """ + if isinstance(dict_, Node): + dict_ = dict_.attributes + + # Include the source attribute when copying? + if and_source: + filter_fun = self.is_not_list_attribute + else: + filter_fun = self.is_not_known_attribute + + # Copy the basic attributes + self.update_basic_atts(dict_) + + # Grab other attributes in dict_ not in self except the + # (All basic attributes should be copied already) + for att in filter(filter_fun, dict_): + update_fun(self, att, dict_[att], replace) + + def update_all_atts_consistantly(self, dict_, replace = True, + and_source = False): + """ + Updates all attributes from node or dictionary `dict_`. + + Appends the basic attributes ('ids', 'names', 'classes', + 'dupnames', but not 'source') and then, for all other attributes in + dict_, updates the same attribute in self. When attributes with the + same identifier appear in both self and dict_ and replace is True, the + values in self are replaced with the values in dict_; otherwise, the + values in self are preserved. When and_source is True, the 'source' + attribute is included in the copy. + + NOTE: When replace is False, and self contains a 'source' attribute, + 'source' is not replaced even when dict_ has a 'source' + attribute, though it may still be merged into a list depending + on the value of update_fun. + """ + self.update_all_atts(dict_, Element.copy_attr_consistent, replace, + and_source) + + def update_all_atts_concatenating(self, dict_, replace = True, + and_source = False): + """ + Updates all attributes from node or dictionary `dict_`. + + Appends the basic attributes ('ids', 'names', 'classes', + 'dupnames', but not 'source') and then, for all other attributes in + dict_, updates the same attribute in self. When attributes with the + same identifier appear in both self and dict_ whose values aren't each + lists and replace is True, the values in self are replaced with the + values in dict_; if the values from self and dict_ for the given + identifier are both of list type, then the two lists are concatenated + and the result stored in self; otherwise, the values in self are + preserved. When and_source is True, the 'source' attribute is + included in the copy. + + NOTE: When replace is False, and self contains a 'source' attribute, + 'source' is not replaced even when dict_ has a 'source' + attribute, though it may still be merged into a list depending + on the value of update_fun. + """ + self.update_all_atts(dict_, Element.copy_attr_concatenate, replace, + and_source) + + def update_all_atts_coercion(self, dict_, replace = True, + and_source = False): + """ + Updates all attributes from node or dictionary `dict_`. + + Appends the basic attributes ('ids', 'names', 'classes', + 'dupnames', but not 'source') and then, for all other attributes in + dict_, updates the same attribute in self. When attributes with the + same identifier appear in both self and dict_ whose values are both + not lists and replace is True, the values in self are replaced with + the values in dict_; if either of the values from self and dict_ for + the given identifier are of list type, then first any non-lists are + converted to 1-element lists and then the two lists are concatenated + and the result stored in self; otherwise, the values in self are + preserved. When and_source is True, the 'source' attribute is + included in the copy. + + NOTE: When replace is False, and self contains a 'source' attribute, + 'source' is not replaced even when dict_ has a 'source' + attribute, though it may still be merged into a list depending + on the value of update_fun. + """ + self.update_all_atts(dict_, Element.copy_attr_coerce, replace, + and_source) + + def update_all_atts_convert(self, dict_, and_source = False): + """ + Updates all attributes from node or dictionary `dict_`. + + Appends the basic attributes ('ids', 'names', 'classes', + 'dupnames', but not 'source') and then, for all other attributes in + dict_, updates the same attribute in self. When attributes with the + same identifier appear in both self and dict_ then first any non-lists + are converted to 1-element lists and then the two lists are + concatenated and the result stored in self; otherwise, the values in + self are preserved. When and_source is True, the 'source' attribute + is included in the copy. + + NOTE: When replace is False, and self contains a 'source' attribute, + 'source' is not replaced even when dict_ has a 'source' + attribute, though it may still be merged into a list depending + on the value of update_fun. + """ + self.update_all_atts(dict_, Element.copy_attr_convert, + and_source = and_source) + def clear(self): self.children = [] @@ -694,7 +949,7 @@ else: # `update` is a Text node or `new` is an empty list. # Assert that we aren't losing any attributes. - for att in ('ids', 'names', 'classes', 'dupnames'): + for att in self.basic_attributes: assert not self[att], \ 'Losing "%s" attribute: %s' % (att, self[att]) self.parent.replace(self, new) @@ -778,7 +1033,23 @@ assert id is not None by_id.referenced = 1 + @classmethod + def is_not_list_attribute(cls, attr): + """ + Returns True if and only if the given attribute is NOT one of the + basic list attributes defined for all Elements. + """ + return attr not in cls.list_attributes + @classmethod + def is_not_known_attribute(cls, attr): + """ + Returns True if and only if the given attribute is NOT recognized by + this class. + """ + return attr not in cls.known_attributes + + class TextElement(Element): """ Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 5618 2008-07-28 08:37:32Z strank $ +# $Id: __init__.py 7646 2013-04-17 14:17:37Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -8,7 +8,10 @@ __docformat__ = 'reStructuredText' +import sys from docutils import Component +if sys.version_info < (2,5): + from docutils._compat import __import__ class Parser(Component): @@ -43,5 +46,8 @@ parser_name = parser_name.lower() if parser_name in _parser_aliases: parser_name = _parser_aliases[parser_name] - module = __import__(parser_name, globals(), locals()) + try: + module = __import__(parser_name, globals(), locals(), level=1) + except ImportError: + module = __import__(parser_name, globals(), locals(), level=0) return module.Parser Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7320 2012-01-19 22:33:02Z milde $ +# $Id: __init__.py 7598 2013-01-30 12:39:24Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -73,7 +73,8 @@ import docutils.parsers import docutils.statemachine from docutils.parsers.rst import states -from docutils import frontend, nodes +from docutils import frontend, nodes, Component +from docutils.transforms import universal class Parser(docutils.parsers.Parser): @@ -133,10 +134,15 @@ ['--raw-enabled'], {'action': 'store_true'}), ('Token name set for parsing code with Pygments: one of ' - '"long", "short", or "none (no parsing)". Default is "short".', + '"long", "short", or "none (no parsing)". Default is "long".', ['--syntax-highlight'], {'choices': ['long', 'short', 'none'], - 'default': 'short', 'metavar': '<format>'}),)) + 'default': 'long', 'metavar': '<format>'}), + ('Change straight quotation marks to typographic form: ' + 'one of "yes", "no", "alt[ernative]" (default "no").', + ['--smart-quotes'], + {'default': False, 'validator': frontend.validate_ternary}), + )) config_section = 'restructuredtext parser' config_section_dependencies = ('parsers',) @@ -149,6 +155,10 @@ self.state_classes = states.state_classes self.inliner = inliner + def get_transforms(self): + return Component.get_transforms(self) + [ + universal.SmartQuotes] + def parse(self, inputstring, document): """Parse `inputstring` and populate `document`, a document tree.""" self.setup_parse(inputstring, document) @@ -321,7 +331,7 @@ and the line number added. Preferably use the `debug`, `info`, `warning`, `error`, or `severe` - wrapper methods, e.g. ``self.error(message)`` to generate an + wrapper methods, e.g. ``self.error(message)`` to generate an ERROR-level directive error. """ return DirectiveError(level, message) Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7119 2011-09-02 13:00:23Z milde $ +# $Id: __init__.py 7621 2013-03-04 13:20:49Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -10,8 +10,12 @@ import re import codecs +import sys + from docutils import nodes from docutils.parsers.rst.languages import en as _fallback_language_module +if sys.version_info < (2,5): + from docutils._compat import __import__ _directive_registry = { @@ -109,7 +113,7 @@ # Error handling done by caller. return None, messages try: - module = __import__(modulename, globals(), locals()) + module = __import__(modulename, globals(), locals(), level=1) except ImportError, detail: messages.append(document.reporter.error( 'Error importing directive module "%s" (directive "%s"):\n%s' @@ -228,9 +232,8 @@ """ match = re.match(r'^([0-9.]+) *(%s)$' % '|'.join(units), argument) try: - assert match is not None float(match.group(1)) - except (AssertionError, ValueError): + except (AttributeError, ValueError): raise ValueError( 'not a positive measure of one of the following units:\n%s' % ' '.join(['"%s"' % i for i in units])) @@ -258,7 +261,11 @@ try: return get_measure(argument, length_units + ['%']) except ValueError: - return get_measure(argument, ['']) + default + try: + return get_measure(argument, ['']) + default + except ValueError: + # raise ValueError with list of valid units: + return get_measure(argument, length_units + ['%']) def class_option(argument): """ Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/admonitions.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/admonitions.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/admonitions.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: admonitions.py 7072 2011-07-06 15:52:30Z milde $ +# $Id: admonitions.py 7681 2013-07-12 07:52:27Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -35,7 +35,10 @@ title_text = self.arguments[0] textnodes, messages = self.state.inline_text(title_text, self.lineno) - admonition_node += nodes.title(title_text, '', *textnodes) + title = nodes.title(title_text, '', *textnodes) + title.source, title.line = ( + self.state_machine.get_source_and_line(self.lineno)) + admonition_node += title admonition_node += messages if not 'classes' in self.options: admonition_node['classes'] += ['admonition-' + Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/images.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/images.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/images.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: images.py 7256 2011-12-14 23:53:38Z milde $ +# $Id: images.py 7753 2014-06-24 14:52:59Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -17,7 +17,7 @@ from docutils.nodes import fully_normalize_name, whitespace_normalize_name from docutils.parsers.rst.roles import set_classes try: # check for the Python Imaging Library - import PIL + import PIL.Image except ImportError: try: # sometimes PIL modules are put in PYTHONPATH's root import Image @@ -134,7 +134,7 @@ else: self.state.document.settings.record_dependencies.add( imagepath.replace('\\', '/')) - figure_node['width'] = img.size[0] + figure_node['width'] = '%dpx' % img.size[0] del img elif figwidth is not None: figure_node['width'] = figwidth @@ -149,6 +149,8 @@ if isinstance(first_node, nodes.paragraph): caption = nodes.caption(first_node.rawsource, '', *first_node.children) + caption.source = first_node.source + caption.line = first_node.line figure_node += caption elif not (isinstance(first_node, nodes.comment) and len(first_node) == 0): Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/misc.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/misc.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/misc.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: misc.py 7328 2012-01-27 08:41:35Z milde $ +# $Id: misc.py 7487 2012-07-22 21:20:28Z milde $ # Authors: David Goodger <goodger@python.org>; Dethe Elza # Copyright: This module has been placed in the public domain. @@ -11,7 +11,8 @@ import re import time from docutils import io, nodes, statemachine, utils -from docutils.error_reporting import SafeString, ErrorString +from docutils.utils.error_reporting import SafeString, ErrorString +from docutils.utils.error_reporting import locale_encoding from docutils.parsers.rst import Directive, convert_directive_function from docutils.parsers.rst import directives, roles, states from docutils.parsers.rst.directives.body import CodeBlock, NumberLines @@ -64,15 +65,19 @@ path = nodes.reprunicode(path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) + e_handler=self.state.document.settings.input_encoding_error_handler tab_width = self.options.get( 'tab-width', self.state.document.settings.tab_width) try: self.state.document.settings.record_dependencies.add(path) - include_file = io.FileInput( - source_path=path, encoding=encoding, - error_handler=(self.state.document.settings.\ - input_encoding_error_handler), - handle_io_errors=None) + include_file = io.FileInput(source_path=path, + encoding=encoding, + error_handler=e_handler) + except UnicodeEncodeError, error: + raise self.severe(u'Problems with "%s" directive path:\n' + 'Cannot encode input file path "%s" ' + '(wrong locale?).' % + (self.name, SafeString(path))) except IOError, error: raise self.severe(u'Problems with "%s" directive path:\n%s.' % (self.name, ErrorString(error))) @@ -181,6 +186,7 @@ attributes = {'format': ' '.join(self.arguments[0].lower().split())} encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) + e_handler=self.state.document.settings.input_encoding_error_handler if self.content: if 'file' in self.options or 'url' in self.options: raise self.error( @@ -198,11 +204,9 @@ self.options['file'])) path = utils.relative_path(None, path) try: - raw_file = io.FileInput( - source_path=path, encoding=encoding, - error_handler=(self.state.document.settings.\ - input_encoding_error_handler), - handle_io_errors=None) + raw_file = io.FileInput(source_path=path, + encoding=encoding, + error_handler=e_handler) # TODO: currently, raw input files are recorded as # dependencies even if not used for the chosen output format. self.state.document.settings.record_dependencies.add(path) @@ -226,10 +230,9 @@ except (urllib2.URLError, IOError, OSError), error: raise self.severe(u'Problems with "%s" directive URL "%s":\n%s.' % (self.name, self.options['url'], ErrorString(error))) - raw_file = io.StringInput( - source=raw_text, source_path=source, encoding=encoding, - error_handler=(self.state.document.settings.\ - input_encoding_error_handler)) + raw_file = io.StringInput(source=raw_text, source_path=source, + encoding=encoding, + error_handler=e_handler) try: text = raw_file.read() except UnicodeError, error: @@ -467,8 +470,22 @@ raise self.error( 'Invalid context: the "%s" directive can only be used within ' 'a substitution definition.' % self.name) - format = '\n'.join(self.content) or '%Y-%m-%d' - text = time.strftime(format) + format_str = '\n'.join(self.content) or '%Y-%m-%d' + if sys.version_info< (3, 0): + try: + format_str = format_str.encode(locale_encoding or 'utf-8') + except UnicodeEncodeError: + raise self.warning(u'Cannot encode date format string ' + u'with locale encoding "%s".' % locale_encoding) + text = time.strftime(format_str) + if sys.version_info< (3, 0): + # `text` is a byte string that may contain non-ASCII characters: + try: + text = text.decode(locale_encoding or 'utf-8') + except UnicodeDecodeError: + text = text.decode(locale_encoding or 'utf-8', 'replace') + raise self.warning(u'Error decoding "%s"' + u'with locale encoding "%s".' % (text, locale_encoding)) return [nodes.Text(text)] Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/tables.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/tables.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/directives/tables.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: tables.py 7328 2012-01-27 08:41:35Z milde $ +# $Id: tables.py 7747 2014-03-20 10:51:10Z milde $ # Authors: David Goodger <goodger@python.org>; David Priest # Copyright: This module has been placed in the public domain. @@ -14,7 +14,7 @@ import csv from docutils import io, nodes, statemachine, utils -from docutils.error_reporting import SafeString +from docutils.utils.error_reporting import SafeString from docutils.utils import SystemMessagePropagation from docutils.parsers.rst import Directive from docutils.parsers.rst import directives @@ -164,19 +164,20 @@ quotechar = '"' doublequote = True skipinitialspace = True + strict = True lineterminator = '\n' quoting = csv.QUOTE_MINIMAL def __init__(self, options): if 'delim' in options: - self.delimiter = str(options['delim']) + self.delimiter = CSVTable.encode_for_csv(options['delim']) if 'keepspace' in options: self.skipinitialspace = False if 'quote' in options: - self.quotechar = str(options['quote']) + self.quotechar = CSVTable.encode_for_csv(options['quote']) if 'escape' in options: self.doublequote = False - self.escapechar = str(options['escape']) + self.escapechar = CSVTable.encode_for_csv(options['escape']) csv.Dialect.__init__(self) @@ -189,6 +190,7 @@ escapechar = '\\' doublequote = False skipinitialspace = True + strict = True lineterminator = '\n' quoting = csv.QUOTE_MINIMAL @@ -223,9 +225,12 @@ except SystemMessagePropagation, detail: return [detail.args[0]] except csv.Error, detail: + message = str(detail) + if sys.version_info < (3,) and '1-character string' in message: + message += '\nwith Python 2.x this must be an ASCII character.' error = self.state_machine.reporter.error( 'Error with CSV data in "%s" directive:\n%s' - % (self.name, detail), nodes.literal_block( + % (self.name, message), nodes.literal_block( self.block_text, self.block_text), line=self.lineno) return [error] table = (col_widths, table_head, table_body) @@ -244,6 +249,7 @@ """ encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) + error_handler = self.state.document.settings.input_encoding_error_handler if self.content: # CSV data is from directive content. if 'file' in self.options or 'url' in self.options: @@ -270,11 +276,9 @@ source = utils.relative_path(None, source) try: self.state.document.settings.record_dependencies.add(source) - csv_file = io.FileInput( - source_path=source, encoding=encoding, - error_handler=(self.state.document.settings.\ - input_encoding_error_handler), - handle_io_errors=None) + csv_file = io.FileInput(source_path=source, + encoding=encoding, + error_handler=error_handler) csv_data = csv_file.read().splitlines() except IOError, error: severe = self.state_machine.reporter.severe( Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 6423 2010-09-17 21:38:29Z milde $ +# $Id: __init__.py 7648 2013-04-18 07:36:22Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -12,18 +12,26 @@ __docformat__ = 'reStructuredText' +import sys + from docutils.utils import normalize_language_tag +if sys.version_info < (2,5): + from docutils._compat import __import__ _languages = {} def get_language(language_code): for tag in normalize_language_tag(language_code): + tag = tag.replace('-','_') # '-' not valid in module names if tag in _languages: return _languages[tag] try: - module = __import__(tag, globals(), locals()) + module = __import__(tag, globals(), locals(), level=1) except ImportError: - continue + try: + module = __import__(tag, globals(), locals(), level=0) + except ImportError: + continue _languages[tag] = module return module return None Added: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/da.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/da.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/da.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# $Id: da.py 7678 2013-07-03 09:57:36Z milde $ +# Author: E D +# Copyright: This module has been placed in the public domain. + +# New language mappings are welcome. Before doing a new translation, please +# read <http://docutils.sf.net/docs/howto/i18n.html>. Two files must be +# translated for each language: one in docutils/languages, the other in +# docutils/parsers/rst/languages. + +""" +Danish-language mappings for language-dependent features of +reStructuredText. +""" + +__docformat__ = 'reStructuredText' + + +directives = { + # language-dependent: fixed + u'giv agt': 'attention', + u'pas på': 'caution', + u'kode': 'code', + u'kode-blok': 'code', + u'kildekode': 'code', + u'fare': 'danger', + u'fejl': 'error', + u'vink': 'hint', + u'vigtigt': 'important', + u'bemærk': 'note', + u'tips': 'tip', + u'advarsel': 'warning', + u'formaning': 'admonition', + u'sidebjælke': 'sidebar', + u'emne': 'topic', + u'linje-blok': 'line-block', + u'linie-blok': 'line-block', + u'parset-literal': 'parsed-literal', + u'rubrik': 'rubric', + u'epigraf': 'epigraph', + u'fremhævninger': 'highlights', + u'pull-quote (translation required)': 'pull-quote', + u'compound (translation required)': 'compound', + u'container (translation required)': 'container', + #'questions': 'questions', + u'tabel': 'table', + u'csv-tabel': 'csv-table', + u'liste-tabel': 'list-table', + #'qa': 'questions', + #'faq': 'questions', + u'meta': 'meta', + u'math (translation required)': 'math', + #'imagemap': 'imagemap', + u'billede': 'image', + u'figur': 'figure', + u'inkludér': 'include', + u'inkluder': 'include', + u'rå': 'raw', + u'erstat': 'replace', + u'unicode': 'unicode', + u'dato': 'date', + u'klasse': 'class', + u'rolle': 'role', + u'forvalgt-rolle': 'default-role', + u'titel': 'title', + u'indhold': 'contents', + u'sektnum': 'sectnum', + u'sektions-nummerering': 'sectnum', + u'sidehovede': 'header', + u'sidefod': 'footer', + #'footnotes': 'footnotes', + #'citations': 'citations', + u'target-notes (translation required)': 'target-notes', + u'restructuredtext-test-direktiv': 'restructuredtext-test-directive'} +"""Danish name to registered (in directives/__init__.py) directive name +mapping.""" + +roles = { + # language-dependent: fixed + u'forkortelse': 'abbreviation', + u'fork': 'abbreviation', + u'akronym': 'acronym', + u'ac (translation required)': 'acronym', + u'kode': 'code', + u'indeks': 'index', + u'i': 'index', + u'subscript (translation required)': 'subscript', + u'sub (translation required)': 'subscript', + u'superscript (translation required)': 'superscript', + u'sup (translation required)': 'superscript', + u'titel-reference': 'title-reference', + u'titel': 'title-reference', + u't': 'title-reference', + u'pep-reference': 'pep-reference', + u'pep': 'pep-reference', + u'rfc-reference': 'rfc-reference', + u'rfc': 'rfc-reference', + u'emfase': 'emphasis', + u'kraftig': 'strong', + u'literal': 'literal', + u'math (translation required)': 'math', + u'navngivet-reference': 'named-reference', + u'anonym-reference': 'anonymous-reference', + u'fodnote-reference': 'footnote-reference', + u'citation-reference (translation required)': 'citation-reference', + u'substitutions-reference': 'substitution-reference', + u'target (translation required)': 'target', + u'uri-reference': 'uri-reference', + u'uri': 'uri-reference', + u'url': 'uri-reference', + u'rå': 'raw',} +"""Mapping of Danish role names to canonical role names for interpreted text. +""" Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/lt.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/lt.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/languages/lt.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ -# -*- coding: utf8 -*- -# $Id: lt.py 7119 2011-09-02 13:00:23Z milde $ +# -*- coding: utf-8 -*- +# $Id: lt.py 7668 2013-06-04 12:46:30Z milde $ # Author: Dalius Dobravolskas <dalius.do...@gmail.com> # Copyright: This module has been placed in the public domain. Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/roles.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/roles.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/roles.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: roles.py 7310 2012-01-09 15:01:15Z milde $ +# $Id: roles.py 7514 2012-09-14 14:27:12Z milde $ # Author: Edward Loper <edloper@gradient.cis.upenn.edu> # Copyright: This module has been placed in the public domain. @@ -320,11 +320,10 @@ set_classes(options) language = options.get('language', '') classes = ['code'] - if language: - classes.append(language) if 'classes' in options: classes.extend(options['classes']) - + if language and language not in classes: + classes.append(language) try: tokens = Lexer(utils.unescape(text, 1), language, inliner.document.settings.syntax_highlight) Modified: trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/states.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/states.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/parsers/rst/states.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: states.py 7363 2012-02-20 21:31:48Z goodger $ +# $Id: states.py 7640 2013-03-25 20:57:52Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -105,13 +105,9 @@ import sys import re -try: - import roman -except ImportError: - import docutils.utils.roman as roman from types import FunctionType, MethodType -from docutils import nodes, statemachine, utils, urischemes +from docutils import nodes, statemachine, utils from docutils import ApplicationError, DataError from docutils.statemachine import StateMachineWS, StateWS from docutils.nodes import fully_normalize_name as normalize_name @@ -120,7 +116,7 @@ from docutils.parsers.rst import directives, languages, tableparser, roles from docutils.parsers.rst.languages import en as _fallback_language_module from docutils.utils import escape2null, unescape, column_width -from docutils.utils import punctuation_chars +from docutils.utils import punctuation_chars, roman, urischemes class MarkupError(DataError): pass class UnknownInterpretedRoleError(DataError): pass @@ -608,13 +604,14 @@ ) %(end_string_suffix)s """ % locals(), re.VERBOSE | re.UNICODE), - embedded_uri=re.compile( + embedded_link=re.compile( r""" ( (?:[ \n]+|^) # spaces or beginning of line/string < # open bracket %(non_whitespace_after)s - ([^<>\x00]+) # anything but angle brackets & nulls + ([^<>\x00]+(\x00_)?) # anything but angle brackets & nulls + # except escaped trailing low line %(non_whitespace_before)s > # close bracket w/o whitespace before ) @@ -787,41 +784,64 @@ return string[:matchstart], [prb], string[matchend:], [msg] def phrase_ref(self, before, after, rawsource, escaped, text): - match = self.patterns.embedded_uri.search(escaped) - if match: + match = self.patterns.embedded_link.search(escaped) + if match: # embedded <URI> or <alias_> text = unescape(escaped[:match.start(0)]) - uri_text = match.group(2) - uri = ''.join(uri_text.split()) - uri = self.adjust_uri(uri) - if uri: - target = nodes.target(match.group(1), refuri=uri) + aliastext = unescape(match.group(2), restore_backslashes=True) + if aliastext.endswith('_') and not (aliastext.endswith(r'\_') + or self.patterns.uri.match(aliastext)): + aliastype = 'name' + alias = normalize_name(aliastext[:-1]) + target = nodes.target(match.group(1), refname=alias) + target.indirect_reference_name = aliastext[:-1] + else: + aliastype = 'uri' + alias = ''.join(aliastext.split()) + alias = self.adjust_uri(alias) + if alias.endswith(r'\_'): + alias = alias[:-2] + '_' + target = nodes.target(match.group(1), refuri=alias) target.referenced = 1 - else: - raise ApplicationError('problem with URI: %r' % uri_text) + if not aliastext: + raise ApplicationError('problem with embedded link: %r' + % aliastext) if not text: - text = uri + text = alias else: target = None + refname = normalize_name(text) reference = nodes.reference(rawsource, text, name=whitespace_normalize_name(text)) node_list = [reference] + if rawsource[-2:] == '__': - if target: - reference['refuri'] = uri + if target and (aliastype == 'name'): + reference['refname'] = alias + self.document.note_refname(reference) + # self.document.note_indirect_target(target) # required? + elif target and (aliastype == 'uri'): + reference['refuri'] = alias else: reference['anonymous'] = 1 else: if target: - reference['refuri'] = uri target['names'].append(refname) - self.document.note_explicit_target(target, self.parent) + if aliastype == 'name': + reference['refname'] = alias + self.document.note_indirect_target(target) + self.document.note_refname(reference) + else: + reference['refuri'] = alias + self.document.note_explicit_target(target, self.parent) + # target.note_referenced_by(name=refname) node_list.append(target) else: reference['refname'] = refname self.document.note_refname(reference) return before, node_list, after, [] + def adjust_uri(self, uri): match = self.patterns.email.match(uri) if match: @@ -1570,7 +1590,7 @@ def nest_line_block_lines(self, block): for index in range(1, len(block)): - if block[index].indent is None: + if getattr(block[index], 'indent', None) is None: block[index].indent = block[index - 1].indent self.nest_line_block_segment(block) @@ -2783,6 +2803,9 @@ assert len(lines) == 1 text_nodes, messages = self.inline_text(lines[0], lineno) term_node = nodes.term() + (term_node.source, + term_node.line) = self.state_machine.get_source_and_line(lineno) + term_node.rawsource = unescape(lines[0]) node_list = [term_node] for i in range(len(text_nodes)): node = text_nodes[i] Modified: trunk/docutils/src/main/resources/docutils/docutils/readers/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/readers/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/readers/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 5618 2008-07-28 08:37:32Z strank $ +# $Id: __init__.py 7648 2013-04-18 07:36:22Z milde $ # Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer # Copyright: This module has been placed in the public domain. @@ -8,9 +8,12 @@ __docformat__ = 'reStructuredText' +import sys from docutils import utils, parsers, Component from docutils.transforms import universal +if sys.version_info < (2,5): + from docutils._compat import __import__ class Reader(Component): @@ -103,5 +106,8 @@ reader_name = reader_name.lower() if reader_name in _reader_aliases: reader_name = _reader_aliases[reader_name] - module = __import__(reader_name, globals(), locals()) + try: + module = __import__(reader_name, globals(), locals(), level=1) + except ImportError: + module = __import__(reader_name, globals(), locals(), level=0) return module.Reader Deleted: trunk/docutils/src/main/resources/docutils/docutils/roman.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/roman.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/roman.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,81 +0,0 @@ -"""Convert to and from Roman numerals""" - -__author__ = "Mark Pilgrim (f8dy@diveintopython.org)" -__version__ = "1.4" -__date__ = "8 August 2001" -__copyright__ = """Copyright (c) 2001 Mark Pilgrim - -This program is part of "Dive Into Python", a free Python tutorial for -experienced programmers. Visit http://diveintopython.org/ for the -latest version. - -This program is free software; you can redistribute it and/or modify -it under the terms of the Python 2.1.1 license, available at -http://www.python.org/2.1.1/license.html -""" - -import re - -#Define exceptions -class RomanError(Exception): pass -class OutOfRangeError(RomanError): pass -class NotIntegerError(RomanError): pass -class InvalidRomanNumeralError(RomanError): pass - -#Define digit mapping -romanNumeralMap = (('M', 1000), - ('CM', 900), - ('D', 500), - ('CD', 400), - ('C', 100), - ('XC', 90), - ('L', 50), - ('XL', 40), - ('X', 10), - ('IX', 9), - ('V', 5), - ('IV', 4), - ('I', 1)) - -def toRoman(n): - """convert integer to Roman numeral""" - if not (0 < n < 5000): - raise OutOfRangeError, "number out of range (must be 1..4999)" - if int(n) != n: - raise NotIntegerError, "decimals can not be converted" - - result = "" - for numeral, integer in romanNumeralMap: - while n >= integer: - result += numeral - n -= integer - return result - -#Define pattern to detect valid Roman numerals -romanNumeralPattern = re.compile(""" - ^ # beginning of string - M{0,4} # thousands - 0 to 4 M's - (CM|CD|D?C{0,3}) # hundreds - 900 (CM), 400 (CD), 0-300 (0 to 3 C's), - # or 500-800 (D, followed by 0 to 3 C's) - (XC|XL|L?X{0,3}) # tens - 90 (XC), 40 (XL), 0-30 (0 to 3 X's), - # or 50-80 (L, followed by 0 to 3 X's) - (IX|IV|V?I{0,3}) # ones - 9 (IX), 4 (IV), 0-3 (0 to 3 I's), - # or 5-8 (V, followed by 0 to 3 I's) - $ # end of string - """ ,re.VERBOSE) - -def fromRoman(s): - """convert Roman numeral to integer""" - if not s: - raise InvalidRomanNumeralError, 'Input can not be blank' - if not romanNumeralPattern.search(s): - raise InvalidRomanNumeralError, 'Invalid Roman numeral: %s' % s - - result = 0 - index = 0 - for numeral, integer in romanNumeralMap: - while s[index:index+len(numeral)] == numeral: - result += integer - index += len(numeral) - return result - Modified: trunk/docutils/src/main/resources/docutils/docutils/statemachine.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/statemachine.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/statemachine.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ - # $Id: statemachine.py 7320 2012-01-19 22:33:02Z milde $ + # $Id: statemachine.py 7464 2012-06-25 13:16:03Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -111,7 +111,7 @@ import types import unicodedata from docutils import utils -from docutils.error_reporting import ErrorOutput +from docutils.utils.error_reporting import ErrorOutput class StateMachine: Modified: trunk/docutils/src/main/resources/docutils/docutils/transforms/frontmatter.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/transforms/frontmatter.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/transforms/frontmatter.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: frontmatter.py 5618 2008-07-28 08:37:32Z strank $ +# $Id: frontmatter.py 7595 2013-01-21 17:33:56Z milde $ # Author: David Goodger, Ueli Schlaepfer <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -49,13 +49,24 @@ `node` is normally a document. """ + # Type check + if not isinstance(node, nodes.Element): + raise TypeError, 'node must be of Element-derived type.' + # `node` must not have a title yet. assert not (len(node) and isinstance(node[0], nodes.title)) section, index = self.candidate_index(node) if index is None: return None + # Transfer the section's attributes to the node: - node.attributes.update(section.attributes) + # NOTE: Change second parameter to False to NOT replace + # attributes that already exist in node with those in + # section + # NOTE: Remove third parameter to NOT copy the 'source' + # attribute from section + node.update_all_atts_concatenating(section, True, True) + # setup_child is called automatically for all nodes. node[:] = (section[:1] # section title + node[:index] # everything that was in the @@ -81,18 +92,23 @@ <subtitle> ... """ + # Type check + if not isinstance(node, nodes.Element): + raise TypeError, 'node must be of Element-derived type.' + subsection, index = self.candidate_index(node) if index is None: return None subtitle = nodes.subtitle() - # Transfer the subsection's attributes to the new subtitle: - # This causes trouble with list attributes! To do: Write a - # test case which catches direct access to the `attributes` - # dictionary and/or write a test case which shows problems in - # this particular case. - subtitle.attributes.update(subsection.attributes) - # We're losing the subtitle's attributes here! To do: Write a - # test case which shows this behavior. + + # Transfer the subsection's attributes to the new subtitle + # NOTE: Change second parameter to False to NOT replace + # attributes that already exist in node with those in + # section + # NOTE: Remove third parameter to NOT copy the 'source' + # attribute from section + subtitle.update_all_atts_concatenating(subsection, True, True) + # Transfer the contents of the subsection's title to the # subtitle: subtitle[:] = subsection[0][:] Modified: trunk/docutils/src/main/resources/docutils/docutils/transforms/references.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/transforms/references.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/transforms/references.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: references.py 7320 2012-01-19 22:33:02Z milde $ +# $Id: references.py 7624 2013-03-07 14:10:26Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -275,7 +275,8 @@ reflist.extend(self.document.refnames.get(name, [])) for id in target['ids']: reflist.extend(self.document.refids.get(id, [])) - naming += '(id="%s")' % target['ids'][0] + if target['ids']: + naming += '(id="%s")' % target['ids'][0] msg = self.document.reporter.error( 'Indirect hyperlink target %s refers to target "%s", %s.' % (naming, target['refname'], explanation), base_node=target) @@ -391,15 +392,16 @@ <target id="id1" name="direct internal"> """ for name in target['names']: - refid = self.document.nameids[name] + refid = self.document.nameids.get(name) reflist = self.document.refnames.get(name, []) if reflist: target.note_referenced_by(name=name) for ref in reflist: if ref.resolved: continue - del ref['refname'] - ref['refid'] = refid + if refid: + del ref['refname'] + ref['refid'] = refid ref.resolved = 1 Modified: trunk/docutils/src/main/resources/docutils/docutils/transforms/universal.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/transforms/universal.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/transforms/universal.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,7 @@ -# $Id: universal.py 6112 2009-09-03 07:27:59Z milde $ -# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer +# $Id: universal.py 7668 2013-06-04 12:46:30Z milde $ +# -*- coding: utf-8 -*- +# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde +# Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. """ @@ -19,8 +21,8 @@ import time from docutils import nodes, utils from docutils.transforms import TransformError, Transform +from docutils.utils import smartquotes - class Decorations(Transform): """ @@ -201,3 +203,90 @@ node['classes'].remove(class_value) if class_value in self.strip_elements: return 1 + +class SmartQuotes(Transform): + + """ + Replace ASCII quotation marks with typographic form. + + Also replace multiple dashes with em-dash/en-dash characters. + """ + + default_priority = 850 + + def __init__(self, document, startnode): + Transform.__init__(self, document, startnode=startnode) + self.unsupported_languages = set() + + def get_tokens(self, txtnodes): + # A generator that yields ``(texttype, nodetext)`` tuples for a list + # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). + + texttype = {True: 'literal', # "literal" text is not changed: + False: 'plain'} + for txtnode in txtnodes: + nodetype = texttype[isinstance(txtnode.parent, + (nodes.literal, + nodes.math, + nodes.image, + nodes.raw, + nodes.problematic))] + yield (nodetype, txtnode.astext()) + + + def apply(self): + smart_quotes = self.document.settings.smart_quotes + if not smart_quotes: + return + try: + alternative = smart_quotes.startswith('alt') + except AttributeError: + alternative = False + # print repr(alternative) + + document_language = self.document.settings.language_code + + # "Educate" quotes in normal text. Handle each block of text + # (TextElement node) as a unit to keep context around inline nodes: + for node in self.document.traverse(nodes.TextElement): + # skip preformatted text blocks and special elements: + if isinstance(node, (nodes.FixedTextElement, nodes.Special)): + continue + # nested TextElements are not "block-level" elements: + if isinstance(node.parent, nodes.TextElement): + continue + + # list of text nodes in the "text block": + txtnodes = [txtnode for txtnode in node.traverse(nodes.Text) + if not isinstance(txtnode.parent, + nodes.option_string)] + + # language: use typographical quotes for language "lang" + lang = node.get_language_code(document_language) + # use alternative form if `smart-quotes` setting starts with "alt": + if alternative: + if '-x-altquot' in lang: + lang = lang.replace('-x-altquot', '') + else: + lang += '-x-altquot' + # drop subtags missing in quotes: + for tag in utils.normalize_language_tag(lang): + if tag in smartquotes.smartchars.quotes: + lang = tag + break + else: # language not supported: (keep ASCII quotes) + if lang not in self.unsupported_languages: + self.document.reporter.warning('No smart quotes ' + 'defined for language "%s".'%lang, base_node=node) + self.unsupported_languages.add(lang) + lang = '' + + # Iterator educating quotes in plain text: + # '2': set all, using old school en- and em- dash shortcuts + teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes), + attr='2', language=lang) + + for txtnode, newtext in zip(txtnodes, teacher): + txtnode.parent.replace(txtnode, nodes.Text(newtext)) + + self.unsupported_languages = set() # reset Deleted: trunk/docutils/src/main/resources/docutils/docutils/urischemes.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/urischemes.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/urischemes.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,136 +0,0 @@ -# $Id: urischemes.py 4564 2006-05-21 20:44:42Z wiemann $ -# Author: David Goodger <goodger@python.org> -# Copyright: This module has been placed in the public domain. - -""" -`schemes` is a dictionary with lowercase URI addressing schemes as -keys and descriptions as values. It was compiled from the index at -http://www.iana.org/assignments/uri-schemes (revised 2005-11-28) -and an older list at http://www.w3.org/Addressing/schemes.html. -""" - -# Many values are blank and should be filled in with useful descriptions. - -schemes = { - 'about': 'provides information on Navigator', - 'acap': 'Application Configuration Access Protocol; RFC 2244', - 'addbook': "To add vCard entries to Communicator's Address Book", - 'afp': 'Apple Filing Protocol', - 'afs': 'Andrew File System global file names', - 'aim': 'AOL Instant Messenger', - 'callto': 'for NetMeeting links', - 'castanet': 'Castanet Tuner URLs for Netcaster', - 'chttp': 'cached HTTP supported by RealPlayer', - 'cid': 'content identifier; RFC 2392', - 'crid': 'TV-Anytime Content Reference Identifier; RFC 4078', - 'data': ('allows inclusion of small data items as "immediate" data; ' - 'RFC 2397'), - 'dav': 'Distributed Authoring and Versioning Protocol; RFC 2518', - 'dict': 'dictionary service protocol; RFC 2229', - 'dns': 'Domain Name System resources', - 'eid': ('External ID; non-URL data; general escape mechanism to allow ' - 'access to information for applications that are too ' - 'specialized to justify their own schemes'), - 'fax': ('a connection to a terminal that can handle telefaxes ' - '(facsimiles); RFC 2806'), - 'feed' : 'NetNewsWire feed', - 'file': 'Host-specific file names; RFC 1738', - 'finger': '', - 'freenet': '', - 'ftp': 'File Transfer Protocol; RFC 1738', - 'go': 'go; RFC 3368', - 'gopher': 'The Gopher Protocol', - 'gsm-sms': ('Global System for Mobile Communications Short Message ' - 'Service'), - 'h323': ('video (audiovisual) communication on local area networks; ' - 'RFC 3508'), - 'h324': ('video and audio communications over low bitrate connections ' - 'such as POTS modem connections'), - 'hdl': 'CNRI handle system', - 'hnews': 'an HTTP-tunneling variant of the NNTP news protocol', - 'http': 'Hypertext Transfer Protocol; RFC 2616', - 'https': 'HTTP over SSL; RFC 2818', - 'hydra': 'SubEthaEdit URI. See http://www.codingmonkeys.de/subethaedit.', - 'iioploc': 'Internet Inter-ORB Protocol Location?', - 'ilu': 'Inter-Language Unification', - 'im': 'Instant Messaging; RFC 3860', - 'imap': 'Internet Message Access Protocol; RFC 2192', - 'info': 'Information Assets with Identifiers in Public Namespaces', - 'ior': 'CORBA interoperable object reference', - 'ipp': 'Internet Printing Protocol; RFC 3510', - 'irc': 'Internet Relay Chat', - 'iris.beep': 'iris.beep; RFC 3983', - 'iseek' : 'See www.ambrosiasw.com; a little util for OS X.', - 'jar': 'Java archive', - 'javascript': ('JavaScript code; evaluates the expression after the ' - 'colon'), - 'jdbc': 'JDBC connection URI.', - 'ldap': 'Lightweight Directory Access Protocol', - 'lifn': '', - 'livescript': '', - 'lrq': '', - 'mailbox': 'Mail folder access', - 'mailserver': 'Access to data available from mail servers', - 'mailto': 'Electronic mail address; RFC 2368', - 'md5': '', - 'mid': 'message identifier; RFC 2392', - 'mocha': '', - 'modem': ('a connection to a terminal that can handle incoming data ' - 'calls; RFC 2806'), - 'mtqp': 'Message Tracking Query Protocol; RFC 3887', - 'mupdate': 'Mailbox Update (MUPDATE) Protocol; RFC 3656', - 'news': 'USENET news; RFC 1738', - 'nfs': 'Network File System protocol; RFC 2224', - 'nntp': 'USENET news using NNTP access; RFC 1738', - 'opaquelocktoken': 'RFC 2518', - 'phone': '', - 'pop': 'Post Office Protocol; RFC 2384', - 'pop3': 'Post Office Protocol v3', - 'pres': 'Presence; RFC 3859', - 'printer': '', - 'prospero': 'Prospero Directory Service; RFC 4157', - 'rdar' : ('URLs found in Darwin source ' - '(http://www.opensource.apple.com/darwinsource/).'), - 'res': '', - 'rtsp': 'real time streaming protocol; RFC 2326', - 'rvp': '', - 'rwhois': '', - 'rx': 'Remote Execution', - 'sdp': '', - 'service': 'service location; RFC 2609', - 'shttp': 'secure hypertext transfer protocol', - 'sip': 'Session Initiation Protocol; RFC 3261', - 'sips': 'secure session intitiaion protocol; RFC 3261', - 'smb': 'SAMBA filesystems.', - 'snews': 'For NNTP postings via SSL', - 'snmp': 'Simple Network Management Protocol; RFC 4088', - 'soap.beep': 'RFC 3288', - 'soap.beeps': 'RFC 3288', - 'ssh': 'Reference to interactive sessions via ssh.', - 't120': 'real time data conferencing (audiographics)', - 'tag': 'RFC 4151', - 'tcp': '', - 'tel': ('a connection to a terminal that handles normal voice ' - 'telephone calls, a voice mailbox or another voice messaging ' - 'system or a service that can be operated using DTMF tones; ' - 'RFC 2806.'), - 'telephone': 'telephone', - 'telnet': 'Reference to interactive sessions; RFC 4248', - 'tftp': 'Trivial File Transfer Protocol; RFC 3617', - 'tip': 'Transaction Internet Protocol; RFC 2371', - 'tn3270': 'Interactive 3270 emulation sessions', - 'tv': '', - 'urn': 'Uniform Resource Name; RFC 2141', - 'uuid': '', - 'vemmi': 'versatile multimedia interface; RFC 2122', - 'videotex': '', - 'view-source': 'displays HTML code that was generated with JavaScript', - 'wais': 'Wide Area Information Servers; RFC 4156', - 'whodp': '', - 'whois++': 'Distributed directory service.', - 'x-man-page': ('Opens man page in Terminal.app on OS X ' - '(see macosxhints.com)'), - 'xmlrpc.beep': 'RFC 3529', - 'xmlrpc.beeps': 'RFC 3529', - 'z39.50r': 'Z39.50 Retrieval; RFC 2056', - 'z39.50s': 'Z39.50 Session; RFC 2056',} Modified: trunk/docutils/src/main/resources/docutils/docutils/utils/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ -# coding: utf8 -# $Id: __init__.py 7338 2012-02-03 12:22:14Z milde $ +# coding: utf-8 +# $Id: __init__.py 7668 2013-06-04 12:46:30Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -12,12 +12,13 @@ import sys import os import os.path +import re import warnings import unicodedata from docutils import ApplicationError, DataError from docutils import nodes -from docutils.io import FileOutput -from docutils.error_reporting import ErrorOutput, SafeString +import docutils.io +from docutils.utils.error_reporting import ErrorOutput, SafeString class SystemMessage(ApplicationError): @@ -509,15 +510,32 @@ """ assert not (settings.stylesheet and settings.stylesheet_path), ( 'stylesheet and stylesheet_path are mutually exclusive.') - if settings.stylesheet_path: - sheets = settings.stylesheet_path.split(",") - elif settings.stylesheet: - sheets = settings.stylesheet.split(",") - else: - sheets = [] - # strip whitespace (frequently occuring in config files) - return [sheet.strip(u' \t\n') for sheet in sheets] + stylesheets = settings.stylesheet_path or settings.stylesheet or [] + # programmatically set default can be string or unicode: + if not isinstance(stylesheets, list): + stylesheets = [path.strip() for path in stylesheets.split(',')] + # expand relative paths if found in stylesheet-dirs: + return [find_file_in_dirs(path, settings.stylesheet_dirs) + for path in stylesheets] +def find_file_in_dirs(path, dirs): + """ + Search for `path` in the list of directories `dirs`. + + Return the first expansion that matches an existing file. + """ + if os.path.isabs(path): + return path + for d in dirs: + if d == '.': + f = path + else: + d = os.path.expanduser(d) + f = os.path.join(d, path) + if os.path.exists(f): + return f + return path + def get_trim_footnote_ref_space(settings): """ Return whether or not to trim footnote space. @@ -645,20 +663,20 @@ Example: - >>> normalize_language_tag('de-AT-1901') - ['de_at_1901', 'de_at', 'de_1901', 'de'] + >>> normalize_language_tag('de_AT-1901') + ['de-at-1901', 'de-at', 'de-1901', 'de'] """ # normalize: - tag = tag.lower().replace('-','_') + tag = tag.lower().replace('_','-') + # split (except singletons, which mark the following tag as non-standard): + tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag) + taglist = [] + subtags = [subtag.replace('_', '-') for subtag in tag.split('-')] + base_tag = [subtags.pop(0)] # find all combinations of subtags - taglist = [] - base_tag= tag.split('_')[:1] - subtags = tag.split('_')[1:] - # print base_tag, subtags for n in range(len(subtags), 0, -1): for tags in unique_combinations(subtags, n): - # print tags - taglist.append('_'.join(base_tag + tags)) + taglist.append('-'.join(base_tag+tags)) taglist += base_tag return taglist @@ -699,7 +717,7 @@ of = None else: of = output_file - self.file = FileOutput(destination_path=of, + self.file = docutils.io.FileOutput(destination_path=of, encoding='utf8', autoclose=False) else: self.file = None Modified: trunk/docutils/src/main/resources/docutils/docutils/utils/code_analyzer.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/code_analyzer.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/code_analyzer.py 2014-09-29 12:39:28 UTC (rev 756) @@ -4,7 +4,7 @@ """Lexical analysis of formal languages (i.e. code) using Pygments.""" # :Author: Georg Brandl; Felix Wiemann; Günter Milde -# :Date: $Date: 2011-12-20 15:14:21 +0100 (Die, 20 Dez 2011) $ +# :Date: $Date: 2011-12-20 15:14:21 +0100 (Die, 20. Dez 2011) $ # :Copyright: This module has been placed in the public domain. from docutils import ApplicationError Added: trunk/docutils/src/main/resources/docutils/docutils/utils/error_reporting.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/error_reporting.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/error_reporting.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,211 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# :Id: $Id: error_reporting.py 7668 2013-06-04 12:46:30Z milde $ +# :Copyright: © 2011 Günter Milde. +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + +""" +Error reporting should be safe from encoding/decoding errors. +However, implicit conversions of strings and exceptions like + +>>> u'%s world: %s' % ('H\xe4llo', Exception(u'H\xe4llo') + +fail in some Python versions: + +* In Python <= 2.6, ``unicode(<exception instance>)`` uses + `__str__` and fails with non-ASCII chars in`unicode` arguments. + (work around http://bugs.python.org/issue2517): + +* In Python 2, unicode(<exception instance>) fails, with non-ASCII + chars in arguments. (Use case: in some locales, the errstr + argument of IOError contains non-ASCII chars.) + +* In Python 2, str(<exception instance>) fails, with non-ASCII chars + in `unicode` arguments. + +The `SafeString`, `ErrorString` and `ErrorOutput` classes handle +common exceptions. +""" + +import sys, codecs + +# Guess the locale's encoding. +# If no valid guess can be made, locale_encoding is set to `None`: +try: + import locale # module missing in Jython +except ImportError: + locale_encoding = None +else: + locale_encoding = locale.getlocale()[1] or locale.getdefaultlocale()[1] + # locale.getpreferredencoding([do_setlocale=True|False]) + # has side-effects | might return a wrong guess. + # (cf. Update 1 in http://stackoverflow.com/questions/4082645/using-python-2-xs-locale-module-t...) + try: + codecs.lookup(locale_encoding or '') # None -> '' + except LookupError: + locale_encoding = None + + + +class SafeString(object): + """ + A wrapper providing robust conversion to `str` and `unicode`. + """ + + def __init__(self, data, encoding=None, encoding_errors='backslashreplace', + decoding_errors='replace'): + self.data = data + self.encoding = (encoding or getattr(data, 'encoding', None) or + locale_encoding or 'ascii') + self.encoding_errors = encoding_errors + self.decoding_errors = decoding_errors + + + def __str__(self): + try: + return str(self.data) + except UnicodeEncodeError, err: + if isinstance(self.data, Exception): + args = [str(SafeString(arg, self.encoding, + self.encoding_errors)) + for arg in self.data.args] + return ', '.join(args) + if isinstance(self.data, unicode): + if sys.version_info > (3,0): + return self.data + else: + return self.data.encode(self.encoding, + self.encoding_errors) + raise + + def __unicode__(self): + """ + Return unicode representation of `self.data`. + + Try ``unicode(self.data)``, catch `UnicodeError` and + + * if `self.data` is an Exception instance, work around + http://bugs.python.org/issue2517 with an emulation of + Exception.__unicode__, + + * else decode with `self.encoding` and `self.decoding_errors`. + """ + try: + u = unicode(self.data) + if isinstance(self.data, EnvironmentError): + u = u.replace(": u'", ": '") # normalize filename quoting + return u + except UnicodeError, error: # catch ..Encode.. and ..Decode.. errors + if isinstance(self.data, EnvironmentError): + return u"[Errno %s] %s: '%s'" % (self.data.errno, + SafeString(self.data.strerror, self.encoding, + self.decoding_errors), + SafeString(self.data.filename, self.encoding, + self.decoding_errors)) + if isinstance(self.data, Exception): + args = [unicode(SafeString(arg, self.encoding, + decoding_errors=self.decoding_errors)) + for arg in self.data.args] + return u', '.join(args) + if isinstance(error, UnicodeDecodeError): + return unicode(self.data, self.encoding, self.decoding_errors) + raise + +class ErrorString(SafeString): + """ + Safely report exception type and message. + """ + def __str__(self): + return '%s: %s' % (self.data.__class__.__name__, + super(ErrorString, self).__str__()) + + def __unicode__(self): + return u'%s: %s' % (self.data.__class__.__name__, + super(ErrorString, self).__unicode__()) + + +class ErrorOutput(object): + """ + Wrapper class for file-like error streams with + failsave de- and encoding of `str`, `bytes`, `unicode` and + `Exception` instances. + """ + + def __init__(self, stream=None, encoding=None, + encoding_errors='backslashreplace', + decoding_errors='replace'): + """ + :Parameters: + - `stream`: a file-like object, + a string (path to a file), + `None` (write to `sys.stderr`, default), or + evaluating to `False` (write() requests are ignored). + - `encoding`: `stream` text encoding. Guessed if None. + - `encoding_errors`: how to treat encoding errors. + """ + if stream is None: + stream = sys.stderr + elif not(stream): + stream = False + # if `stream` is a file name, open it + elif isinstance(stream, str): + stream = open(stream, 'w') + elif isinstance(stream, unicode): + stream = open(stream.encode(sys.getfilesystemencoding()), 'w') + + self.stream = stream + """Where warning output is sent.""" + + self.encoding = (encoding or getattr(stream, 'encoding', None) or + locale_encoding or 'ascii') + """The output character encoding.""" + + self.encoding_errors = encoding_errors + """Encoding error handler.""" + + self.decoding_errors = decoding_errors + """Decoding error handler.""" + + def write(self, data): + """ + Write `data` to self.stream. Ignore, if self.stream is False. + + `data` can be a `string`, `unicode`, or `Exception` instance. + """ + if self.stream is False: + return + if isinstance(data, Exception): + data = unicode(SafeString(data, self.encoding, + self.encoding_errors, self.decoding_errors)) + try: + self.stream.write(data) + except UnicodeEncodeError: + self.stream.write(data.encode(self.encoding, self.encoding_errors)) + except TypeError: # in Python 3, stderr expects unicode + if self.stream in (sys.stderr, sys.stdout): + self.stream.buffer.write(data) # write bytes to raw stream + else: + self.stream.write(unicode(data, self.encoding, + self.decoding_errors)) + + def close(self): + """ + Close the error-output stream. + + Ignored if the stream is` sys.stderr` or `sys.stdout` or has no + close() method. + """ + if self.stream in (sys.stdout, sys.stderr): + return + try: + self.stream.close() + except AttributeError: + pass Added: trunk/docutils/src/main/resources/docutils/docutils/utils/math/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/math/__init__.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/math/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,47 @@ +# :Id: $Id: __init__.py 7218 2011-11-08 17:42:40Z milde $ +# :Author: Guenter Milde. +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + +""" +This is the Docutils (Python Documentation Utilities) "math" sub-package. + +It contains various modules for conversion between different math formats +(LaTeX, MathML, HTML). + +:math2html: LaTeX math -> HTML conversion from eLyXer +:latex2mathml: LaTeX math -> presentational MathML +:unichar2tex: Unicode character to LaTeX math translation table +:tex2unichar: LaTeX math to Unicode character translation dictionaries +""" + +# helpers for Docutils math support +# ================================= + +def pick_math_environment(code, numbered=False): + """Return the right math environment to display `code`. + + The test simply looks for line-breaks (``\\``) outside environments. + Multi-line formulae are set with ``align``, one-liners with + ``equation``. + + If `numbered` evaluates to ``False``, the "starred" versions are used + to suppress numbering. + """ + # cut out environment content: + chunks = code.split(r'\begin{') + toplevel_code = ''.join([chunk.split(r'\end{')[-1] + for chunk in chunks]) + if toplevel_code.find(r'\\') >= 0: + env = 'align' + else: + env = 'equation' + if not numbered: + env += '*' + return env Added: trunk/docutils/src/main/resources/docutils/docutils/utils/math/latex2mathml.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/math/latex2mathml.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/math/latex2mathml.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,560 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# :Id: $Id: latex2mathml.py 7668 2013-06-04 12:46:30Z milde $ +# :Copyright: © 2010 Günter Milde. +# Based on rst2mathml.py from the latex_math sandbox project +# © 2005 Jens Jørgen Mortensen +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + + +"""Convert LaTex math code into presentational MathML""" + +# Based on the `latex_math` sandbox project by Jens Jørgen Mortensen + +import docutils.utils.math.tex2unichar as tex2unichar + +# TeX spacing combining +over = {'acute': u'\u00B4', # u'\u0301', + 'bar': u'\u00AF', # u'\u0304', + 'breve': u'\u02D8', # u'\u0306', + 'check': u'\u02C7', # u'\u030C', + 'dot': u'\u02D9', # u'\u0307', + 'ddot': u'\u00A8', # u'\u0308', + 'dddot': u'\u20DB', + 'grave': u'`', # u'\u0300', + 'hat': u'^', # u'\u0302', + 'mathring': u'\u02DA', # u'\u030A', + 'overleftrightarrow': u'\u20e1', + # 'overline': # u'\u0305', + 'tilde': u'\u02DC', # u'\u0303', + 'vec': u'\u20D7'} + +Greek = { # Capital Greek letters: (upright in TeX style) + 'Phi':u'\u03a6', 'Xi':u'\u039e', 'Sigma':u'\u03a3', + 'Psi':u'\u03a8', 'Delta':u'\u0394', 'Theta':u'\u0398', + 'Upsilon':u'\u03d2', 'Pi':u'\u03a0', 'Omega':u'\u03a9', + 'Gamma':u'\u0393', 'Lambda':u'\u039b'} + +letters = tex2unichar.mathalpha + +special = tex2unichar.mathbin # Binary symbols +special.update(tex2unichar.mathrel) # Relation symbols, arrow symbols +special.update(tex2unichar.mathord) # Miscellaneous symbols +special.update(tex2unichar.mathop) # Variable-sized symbols +special.update(tex2unichar.mathopen) # Braces +special.update(tex2unichar.mathclose) # Braces +special.update(tex2unichar.mathfence) + +sumintprod = ''.join([special[symbol] for symbol in + ['sum', 'int', 'oint', 'prod']]) + +functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos', 'cosh', + 'cot', 'coth', 'csc', 'deg', 'det', 'dim', + 'exp', 'gcd', 'hom', 'inf', 'ker', 'lg', + 'lim', 'liminf', 'limsup', 'ln', 'log', 'max', + 'min', 'Pr', 'sec', 'sin', 'sinh', 'sup', + 'tan', 'tanh', + 'injlim', 'varinjlim', 'varlimsup', + 'projlim', 'varliminf', 'varprojlim'] + + +mathbb = { + 'A': u'\U0001D538', + 'B': u'\U0001D539', + 'C': u'\u2102', + 'D': u'\U0001D53B', + 'E': u'\U0001D53C', + 'F': u'\U0001D53D', + 'G': u'\U0001D53E', + 'H': u'\u210D', + 'I': u'\U0001D540', + 'J': u'\U0001D541', + 'K': u'\U0001D542', + 'L': u'\U0001D543', + 'M': u'\U0001D544', + 'N': u'\u2115', + 'O': u'\U0001D546', + 'P': u'\u2119', + 'Q': u'\u211A', + 'R': u'\u211D', + 'S': u'\U0001D54A', + 'T': u'\U0001D54B', + 'U': u'\U0001D54C', + 'V': u'\U0001D54D', + 'W': u'\U0001D54E', + 'X': u'\U0001D54F', + 'Y': u'\U0001D550', + 'Z': u'\u2124', + } + +mathscr = { + 'A': u'\U0001D49C', + 'B': u'\u212C', # bernoulli function + 'C': u'\U0001D49E', + 'D': u'\U0001D49F', + 'E': u'\u2130', + 'F': u'\u2131', + 'G': u'\U0001D4A2', + 'H': u'\u210B', # hamiltonian + 'I': u'\u2110', + 'J': u'\U0001D4A5', + 'K': u'\U0001D4A6', + 'L': u'\u2112', # lagrangian + 'M': u'\u2133', # physics m-matrix + 'N': u'\U0001D4A9', + 'O': u'\U0001D4AA', + 'P': u'\U0001D4AB', + 'Q': u'\U0001D4AC', + 'R': u'\u211B', + 'S': u'\U0001D4AE', + 'T': u'\U0001D4AF', + 'U': u'\U0001D4B0', + 'V': u'\U0001D4B1', + 'W': u'\U0001D4B2', + 'X': u'\U0001D4B3', + 'Y': u'\U0001D4B4', + 'Z': u'\U0001D4B5', + 'a': u'\U0001D4B6', + 'b': u'\U0001D4B7', + 'c': u'\U0001D4B8', + 'd': u'\U0001D4B9', + 'e': u'\u212F', + 'f': u'\U0001D4BB', + 'g': u'\u210A', + 'h': u'\U0001D4BD', + 'i': u'\U0001D4BE', + 'j': u'\U0001D4BF', + 'k': u'\U0001D4C0', + 'l': u'\U0001D4C1', + 'm': u'\U0001D4C2', + 'n': u'\U0001D4C3', + 'o': u'\u2134', # order of + 'p': u'\U0001D4C5', + 'q': u'\U0001D4C6', + 'r': u'\U0001D4C7', + 's': u'\U0001D4C8', + 't': u'\U0001D4C9', + 'u': u'\U0001D4CA', + 'v': u'\U0001D4CB', + 'w': u'\U0001D4CC', + 'x': u'\U0001D4CD', + 'y': u'\U0001D4CE', + 'z': u'\U0001D4CF', + } + +negatables = {'=': u'\u2260', + '\in': u'\u2209', + '\equiv': u'\u2262'} + +# LaTeX to MathML translation stuff: +class math: + """Base class for MathML elements.""" + + nchildren = 1000000 + """Required number of children""" + + def __init__(self, children=None, inline=None): + """math([children]) -> MathML element + + children can be one child or a list of children.""" + + self.children = [] + if children is not None: + if type(children) is list: + for child in children: + self.append(child) + else: + # Only one child: + self.append(children) + + if inline is not None: + self.inline = inline + + def __repr__(self): + if hasattr(self, 'children'): + return self.__class__.__name__ + '(%s)' % \ + ','.join([repr(child) for child in self.children]) + else: + return self.__class__.__name__ + + def full(self): + """Room for more children?""" + + return len(self.children) >= self.nchildren + + def append(self, child): + """append(child) -> element + + Appends child and returns self if self is not full or first + non-full parent.""" + + assert not self.full() + self.children.append(child) + child.parent = self + node = self + while node.full(): + node = node.parent + return node + + def delete_child(self): + """delete_child() -> child + + Delete last child and return it.""" + + child = self.children[-1] + del self.children[-1] + return child + + def close(self): + """close() -> parent + + Close element and return first non-full element.""" + + parent = self.parent + while parent.full(): + parent = parent.parent + return parent + + def xml(self): + """xml() -> xml-string""" + + return self.xml_start() + self.xml_body() + self.xml_end() + + def xml_start(self): + if not hasattr(self, 'inline'): + return ['<%s>' % self.__class__.__name__] + xmlns = 'http://www.w3.org/1998/Math/MathML' + if self.inline: + return ['<math xmlns="%s">' % xmlns] + else: + return ['<math xmlns="%s" mode="display">' % xmlns] + + def xml_end(self): + return ['</%s>' % self.__class__.__name__] + + def xml_body(self): + xml = [] + for child in self.children: + xml.extend(child.xml()) + return xml + +class mrow(math): + def xml_start(self): + return ['\n<%s>' % self.__class__.__name__] + +class mtable(math): + def xml_start(self): + return ['\n<%s>' % self.__class__.__name__] + +class mtr(mrow): pass +class mtd(mrow): pass + +class mx(math): + """Base class for mo, mi, and mn""" + + nchildren = 0 + def __init__(self, data): + self.data = data + + def xml_body(self): + return [self.data] + +class mo(mx): + translation = {'<': '<', '>': '>'} + def xml_body(self): + return [self.translation.get(self.data, self.data)] + +class mi(mx): pass +class mn(mx): pass + +class msub(math): + nchildren = 2 + +class msup(math): + nchildren = 2 + +class msqrt(math): + nchildren = 1 + +class mroot(math): + nchildren = 2 + +class mfrac(math): + nchildren = 2 + +class msubsup(math): + nchildren = 3 + def __init__(self, children=None, reversed=False): + self.reversed = reversed + math.__init__(self, children) + + def xml(self): + if self.reversed: +## self.children[1:3] = self.children[2:0:-1] + self.children[1:3] = [self.children[2], self.children[1]] + self.reversed = False + return math.xml(self) + +class mfenced(math): + translation = {'\\{': '{', '\\langle': u'\u2329', + '\\}': '}', '\\rangle': u'\u232A', + '.': ''} + def __init__(self, par): + self.openpar = par + math.__init__(self) + + def xml_start(self): + open = self.translation.get(self.openpar, self.openpar) + close = self.translation.get(self.closepar, self.closepar) + return ['<mfenced open="%s" close="%s">' % (open, close)] + +class mspace(math): + nchildren = 0 + +class mstyle(math): + def __init__(self, children=None, nchildren=None, **kwargs): + if nchildren is not None: + self.nchildren = nchildren + math.__init__(self, children) + self.attrs = kwargs + + def xml_start(self): + return ['<mstyle '] + ['%s="%s"' % item + for item in self.attrs.items()] + ['>'] + +class mover(math): + nchildren = 2 + def __init__(self, children=None, reversed=False): + self.reversed = reversed + math.__init__(self, children) + + def xml(self): + if self.reversed: + self.children.reverse() + self.reversed = False + return math.xml(self) + +class munder(math): + nchildren = 2 + +class munderover(math): + nchildren = 3 + def __init__(self, children=None): + math.__init__(self, children) + +class mtext(math): + nchildren = 0 + def __init__(self, text): + self.text = text + + def xml_body(self): + return [self.text] + +def parse_latex_math(string, inline=True): + """parse_latex_math(string [,inline]) -> MathML-tree + + Returns a MathML-tree parsed from string. inline=True is for + inline math and inline=False is for displayed math. + + tree is the whole tree and node is the current element.""" + + # Normalize white-space: + string = ' '.join(string.split()) + + if inline: + node = mrow() + tree = math(node, inline=True) + else: + node = mtd() + tree = math(mtable(mtr(node)), inline=False) + + while len(string) > 0: + n = len(string) + c = string[0] + skip = 1 # number of characters consumed + if n > 1: + c2 = string[1] + else: + c2 = '' +## print n, string, c, c2, node.__class__.__name__ + if c == ' ': + pass + elif c == '\\': + if c2 in '{}': + node = node.append(mo(c2)) + skip = 2 + elif c2 == ' ': + node = node.append(mspace()) + skip = 2 + elif c2 == ',': # TODO: small space + node = node.append(mspace()) + skip = 2 + elif c2.isalpha(): + # We have a LaTeX-name: + i = 2 + while i < n and string[i].isalpha(): + i += 1 + name = string[1:i] + node, skip = handle_keyword(name, node, string[i:]) + skip += i + elif c2 == '\\': + # End of a row: + entry = mtd() + row = mtr(entry) + node.close().close().append(row) + node = entry + skip = 2 + else: + raise SyntaxError(ur'Syntax error: "%s%s"' % (c, c2)) + elif c.isalpha(): + node = node.append(mi(c)) + elif c.isdigit(): + node = node.append(mn(c)) + elif c in "+-*/=()[]|<>,.!?':;@": + node = node.append(mo(c)) + elif c == '_': + child = node.delete_child() + if isinstance(child, msup): + sub = msubsup(child.children, reversed=True) + elif isinstance(child, mo) and child.data in sumintprod: + sub = munder(child) + else: + sub = msub(child) + node.append(sub) + node = sub + elif c == '^': + child = node.delete_child() + if isinstance(child, msub): + sup = msubsup(child.children) + elif isinstance(child, mo) and child.data in sumintprod: + sup = mover(child) + elif (isinstance(child, munder) and + child.children[0].data in sumintprod): + sup = munderover(child.children) + else: + sup = msup(child) + node.append(sup) + node = sup + elif c == '{': + row = mrow() + node.append(row) + node = row + elif c == '}': + node = node.close() + elif c == '&': + entry = mtd() + node.close().append(entry) + node = entry + else: + raise SyntaxError(ur'Illegal character: "%s"' % c) + string = string[skip:] + return tree + + +def handle_keyword(name, node, string): + skip = 0 + if len(string) > 0 and string[0] == ' ': + string = string[1:] + skip = 1 + if name == 'begin': + if not string.startswith('{matrix}'): + raise SyntaxError(u'Environment not supported! ' + u'Supported environment: "matrix".') + skip += 8 + entry = mtd() + table = mtable(mtr(entry)) + node.append(table) + node = entry + elif name == 'end': + if not string.startswith('{matrix}'): + raise SyntaxError(ur'Expected "\end{matrix}"!') + skip += 8 + node = node.close().close().close() + elif name in ('text', 'mathrm'): + if string[0] != '{': + raise SyntaxError(ur'Expected "\text{...}"!') + i = string.find('}') + if i == -1: + raise SyntaxError(ur'Expected "\text{...}"!') + node = node.append(mtext(string[1:i])) + skip += i + 1 + elif name == 'sqrt': + sqrt = msqrt() + node.append(sqrt) + node = sqrt + elif name == 'frac': + frac = mfrac() + node.append(frac) + node = frac + elif name == 'left': + for par in ['(', '[', '|', '\\{', '\\langle', '.']: + if string.startswith(par): + break + else: + raise SyntaxError(u'Missing left-brace!') + fenced = mfenced(par) + node.append(fenced) + row = mrow() + fenced.append(row) + node = row + skip += len(par) + elif name == 'right': + for par in [')', ']', '|', '\\}', '\\rangle', '.']: + if string.startswith(par): + break + else: + raise SyntaxError(u'Missing right-brace!') + node = node.close() + node.closepar = par + node = node.close() + skip += len(par) + elif name == 'not': + for operator in negatables: + if string.startswith(operator): + break + else: + raise SyntaxError(ur'Expected something to negate: "\not ..."!') + node = node.append(mo(negatables[operator])) + skip += len(operator) + elif name == 'mathbf': + style = mstyle(nchildren=1, fontweight='bold') + node.append(style) + node = style + elif name == 'mathbb': + if string[0] != '{' or not string[1].isupper() or string[2] != '}': + raise SyntaxError(ur'Expected something like "\mathbb{A}"!') + node = node.append(mi(mathbb[string[1]])) + skip += 3 + elif name in ('mathscr', 'mathcal'): + if string[0] != '{' or string[2] != '}': + raise SyntaxError(ur'Expected something like "\mathscr{A}"!') + node = node.append(mi(mathscr[string[1]])) + skip += 3 + elif name == 'colon': # "normal" colon, not binary operator + node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` + elif name in Greek: # Greek capitals (upright in "TeX style") + node = node.append(mo(Greek[name])) + # TODO: "ISO style" sets them italic. Could we use a class argument + # to enable styling via CSS? + elif name in letters: + node = node.append(mi(letters[name])) + elif name in special: + node = node.append(mo(special[name])) + elif name in functions: + node = node.append(mo(name)) + elif name in over: + ovr = mover(mo(over[name]), reversed=True) + node.append(ovr) + node = ovr + else: + raise SyntaxError(u'Unknown LaTeX command: ' + name) + + return node, skip Added: trunk/docutils/src/main/resources/docutils/docutils/utils/math/math2html.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/math/math2html.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/math/math2html.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,5249 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +# math2html: convert LaTeX equations to HTML output. +# +# Copyright (C) 2009-2011 Alex Fernández +# +# Released under the terms of the `2-Clause BSD license'_, in short: +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + +# Based on eLyXer: convert LyX source files to HTML output. +# http://elyxer.nongnu.org/ + +# --end-- +# Alex 20101110 +# eLyXer standalone formula conversion to HTML. + + + + +import sys + +class Trace(object): + "A tracing class" + + debugmode = False + quietmode = False + showlinesmode = False + + prefix = None + + def debug(cls, message): + "Show a debug message" + if not Trace.debugmode or Trace.quietmode: + return + Trace.show(message, sys.stdout) + + def message(cls, message): + "Show a trace message" + if Trace.quietmode: + return + if Trace.prefix and Trace.showlinesmode: + message = Trace.prefix + message + Trace.show(message, sys.stdout) + + def error(cls, message): + "Show an error message" + message = '* ' + message + if Trace.prefix and Trace.showlinesmode: + message = Trace.prefix + message + Trace.show(message, sys.stderr) + + def fatal(cls, message): + "Show an error message and terminate" + Trace.error('FATAL: ' + message) + exit(-1) + + def show(cls, message, channel): + "Show a message out of a channel" + if sys.version_info < (3,0): + message = message.encode('utf-8') + channel.write(message + '\n') + + debug = classmethod(debug) + message = classmethod(message) + error = classmethod(error) + fatal = classmethod(fatal) + show = classmethod(show) + + + + +import os.path +import sys + + +class BibStylesConfig(object): + "Configuration class from elyxer.config file" + + abbrvnat = { + + u'@article':u'$authors. $title. <i>$journal</i>,{ {$volume:}$pages,} $month $year.{ doi: $doi.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$surname($year)', + u'default':u'$authors. <i>$title</i>. $publisher, $year.{ URL <a href="$url">$url</a>.}{ $note.}', + } + + alpha = { + + u'@article':u'$authors. $title.{ <i>$journal</i>{, {$volume}{($number)}}{: $pages}{, $year}.}{ <a href="$url">$url</a>.}{ <a href="$filename">$filename</a>.}{ $note.}', + u'cite':u'$Sur$YY', + u'default':u'$authors. $title.{ <i>$journal</i>,} $year.{ <a href="$url">$url</a>.}{ <a href="$filename">$filename</a>.}{ $note.}', + } + + authordate2 = { + + u'@article':u'$authors. $year. $title. <i>$journal</i>, <b>$volume</b>($number), $pages.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@book':u'$authors. $year. <i>$title</i>. $publisher.{ URL <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$surname, $year', + u'default':u'$authors. $year. <i>$title</i>. $publisher.{ URL <a href="$url">$url</a>.}{ $note.}', + } + + default = { + + u'@article':u'$authors: “$title”, <i>$journal</i>,{ pp. $pages,} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@book':u'{$authors: }<i>$title</i>{ ($editor, ed.)}.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@booklet':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@conference':u'$authors: “$title”, <i>$journal</i>,{ pp. $pages,} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@inbook':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@incollection':u'$authors: <i>$title</i>{ in <i>$booktitle</i>{ ($editor, ed.)}}.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@inproceedings':u'$authors: “$title”, <i>$journal</i>,{ pp. $pages,} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@manual':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@mastersthesis':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@misc':u'$authors: <i>$title</i>.{{ $publisher,}{ $howpublished,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@phdthesis':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@proceedings':u'$authors: “$title”, <i>$journal</i>,{ pp. $pages,} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@techreport':u'$authors: <i>$title</i>, $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@unpublished':u'$authors: “$title”, <i>$journal</i>, $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$index', + u'default':u'$authors: <i>$title</i>.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + } + + defaulttags = { + u'YY':u'??', u'authors':u'', u'surname':u'', + } + + ieeetr = { + + u'@article':u'$authors, “$title”, <i>$journal</i>, vol. $volume, no. $number, pp. $pages, $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@book':u'$authors, <i>$title</i>. $publisher, $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$index', + u'default':u'$authors, “$title”. $year.{ URL <a href="$url">$url</a>.}{ $note.}', + } + + plain = { + + u'@article':u'$authors. $title.{ <i>$journal</i>{, {$volume}{($number)}}{:$pages}{, $year}.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'@book':u'$authors. <i>$title</i>. $publisher,{ $month} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@incollection':u'$authors. $title.{ In <i>$booktitle</i> {($editor, ed.)}.} $publisher,{ $month} $year.{ URL <a href="$url">$url</a>.}{ $note.}', + u'@inproceedings':u'$authors. $title. { <i>$booktitle</i>{, {$volume}{($number)}}{:$pages}{, $year}.}{ URL <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$index', + u'default':u'{$authors. }$title.{{ $publisher,} $year.}{ URL <a href="$url">$url</a>.}{ $note.}', + } + + vancouver = { + + u'@article':u'$authors. $title. <i>$journal</i>, $year{;{<b>$volume</b>}{($number)}{:$pages}}.{ URL: <a href="$url">$url</a>.}{ $note.}', + u'@book':u'$authors. $title. {$publisher, }$year.{ URL: <a href="$url">$url</a>.}{ $note.}', + u'cite':u'$index', + u'default':u'$authors. $title; {$publisher, }$year.{ $howpublished.}{ URL: <a href="$url">$url</a>.}{ $note.}', + } + +class BibTeXConfig(object): + "Configuration class from elyxer.config file" + + replaced = { + u'--':u'—', u'..':u'.', + } + +class ContainerConfig(object): + "Configuration class from elyxer.config file" + + endings = { + u'Align':u'\\end_layout', u'BarredText':u'\\bar', + u'BoldText':u'\\series', u'Cell':u'</cell', + u'ChangeDeleted':u'\\change_unchanged', + u'ChangeInserted':u'\\change_unchanged', u'ColorText':u'\\color', + u'EmphaticText':u'\\emph', u'Hfill':u'\\hfill', u'Inset':u'\\end_inset', + u'Layout':u'\\end_layout', u'LyXFooter':u'\\end_document', + u'LyXHeader':u'\\end_header', u'Row':u'</row', u'ShapedText':u'\\shape', + u'SizeText':u'\\size', u'StrikeOut':u'\\strikeout', + u'TextFamily':u'\\family', u'VersalitasText':u'\\noun', + } + + extracttext = { + u'allowed':[u'StringContainer',u'Constant',u'FormulaConstant',], + u'cloned':[u'',], + u'extracted':[u'PlainLayout',u'TaggedText',u'Align',u'Caption',u'TextFamily',u'EmphaticText',u'VersalitasText',u'BarredText',u'SizeText',u'ColorText',u'LangLine',u'Formula',u'Bracket',u'RawText',u'BibTag',u'FormulaNumber',u'AlphaCommand',u'EmptyCommand',u'OneParamFunction',u'SymbolFunction',u'TextFunction',u'FontFunction',u'CombiningFunction',u'DecoratingFunction',u'FormulaSymbol',u'BracketCommand',u'TeXCode',], + } + + startendings = { + u'\\begin_deeper':u'\\end_deeper', u'\\begin_inset':u'\\end_inset', + u'\\begin_layout':u'\\end_layout', + } + + starts = { + u'':u'StringContainer', u'#LyX':u'BlackBox', u'</lyxtabular':u'BlackBox', + u'<cell':u'Cell', u'<column':u'Column', u'<row':u'Row', + u'\\align':u'Align', u'\\bar':u'BarredText', + u'\\bar default':u'BlackBox', u'\\bar no':u'BlackBox', + u'\\begin_body':u'BlackBox', u'\\begin_deeper':u'DeeperList', + u'\\begin_document':u'BlackBox', u'\\begin_header':u'LyXHeader', + u'\\begin_inset Argument':u'ShortTitle', + u'\\begin_inset Box':u'BoxInset', u'\\begin_inset Branch':u'Branch', + u'\\begin_inset Caption':u'Caption', + u'\\begin_inset CommandInset bibitem':u'BiblioEntry', + u'\\begin_inset CommandInset bibtex':u'BibTeX', + u'\\begin_inset CommandInset citation':u'BiblioCitation', + u'\\begin_inset CommandInset href':u'URL', + u'\\begin_inset CommandInset include':u'IncludeInset', + u'\\begin_inset CommandInset index_print':u'PrintIndex', + u'\\begin_inset CommandInset label':u'Label', + u'\\begin_inset CommandInset line':u'LineInset', + u'\\begin_inset CommandInset nomencl_print':u'PrintNomenclature', + u'\\begin_inset CommandInset nomenclature':u'NomenclatureEntry', + u'\\begin_inset CommandInset ref':u'Reference', + u'\\begin_inset CommandInset toc':u'TableOfContents', + u'\\begin_inset ERT':u'ERT', u'\\begin_inset Flex':u'FlexInset', + u'\\begin_inset Flex Chunkref':u'NewfangledChunkRef', + u'\\begin_inset Flex Marginnote':u'SideNote', + u'\\begin_inset Flex Sidenote':u'SideNote', + u'\\begin_inset Flex URL':u'FlexURL', u'\\begin_inset Float':u'Float', + u'\\begin_inset FloatList':u'ListOf', u'\\begin_inset Foot':u'Footnote', + u'\\begin_inset Formula':u'Formula', + u'\\begin_inset FormulaMacro':u'FormulaMacro', + u'\\begin_inset Graphics':u'Image', + u'\\begin_inset Index':u'IndexReference', + u'\\begin_inset Info':u'InfoInset', + u'\\begin_inset LatexCommand bibitem':u'BiblioEntry', + u'\\begin_inset LatexCommand bibtex':u'BibTeX', + u'\\begin_inset LatexCommand cite':u'BiblioCitation', + u'\\begin_inset LatexCommand citealt':u'BiblioCitation', + u'\\begin_inset LatexCommand citep':u'BiblioCitation', + u'\\begin_inset LatexCommand citet':u'BiblioCitation', + u'\\begin_inset LatexCommand htmlurl':u'URL', + u'\\begin_inset LatexCommand index':u'IndexReference', + u'\\begin_inset LatexCommand label':u'Label', + u'\\begin_inset LatexCommand nomenclature':u'NomenclatureEntry', + u'\\begin_inset LatexCommand prettyref':u'Reference', + u'\\begin_inset LatexCommand printindex':u'PrintIndex', + u'\\begin_inset LatexCommand printnomenclature':u'PrintNomenclature', + u'\\begin_inset LatexCommand ref':u'Reference', + u'\\begin_inset LatexCommand tableofcontents':u'TableOfContents', + u'\\begin_inset LatexCommand url':u'URL', + u'\\begin_inset LatexCommand vref':u'Reference', + u'\\begin_inset Marginal':u'SideNote', + u'\\begin_inset Newline':u'NewlineInset', + u'\\begin_inset Newpage':u'NewPageInset', u'\\begin_inset Note':u'Note', + u'\\begin_inset OptArg':u'ShortTitle', + u'\\begin_inset Phantom':u'PhantomText', + u'\\begin_inset Quotes':u'QuoteContainer', + u'\\begin_inset Tabular':u'Table', u'\\begin_inset Text':u'InsetText', + u'\\begin_inset VSpace':u'VerticalSpace', u'\\begin_inset Wrap':u'Wrap', + u'\\begin_inset listings':u'Listing', u'\\begin_inset space':u'Space', + u'\\begin_layout':u'Layout', u'\\begin_layout Abstract':u'Abstract', + u'\\begin_layout Author':u'Author', + u'\\begin_layout Bibliography':u'Bibliography', + u'\\begin_layout Chunk':u'NewfangledChunk', + u'\\begin_layout Description':u'Description', + u'\\begin_layout Enumerate':u'ListItem', + u'\\begin_layout Itemize':u'ListItem', u'\\begin_layout List':u'List', + u'\\begin_layout LyX-Code':u'LyXCode', + u'\\begin_layout Plain':u'PlainLayout', + u'\\begin_layout Standard':u'StandardLayout', + u'\\begin_layout Title':u'Title', u'\\begin_preamble':u'LyXPreamble', + u'\\change_deleted':u'ChangeDeleted', + u'\\change_inserted':u'ChangeInserted', + u'\\change_unchanged':u'BlackBox', u'\\color':u'ColorText', + u'\\color inherit':u'BlackBox', u'\\color none':u'BlackBox', + u'\\emph default':u'BlackBox', u'\\emph off':u'BlackBox', + u'\\emph on':u'EmphaticText', u'\\emph toggle':u'EmphaticText', + u'\\end_body':u'LyXFooter', u'\\family':u'TextFamily', + u'\\family default':u'BlackBox', u'\\family roman':u'BlackBox', + u'\\hfill':u'Hfill', u'\\labelwidthstring':u'BlackBox', + u'\\lang':u'LangLine', u'\\length':u'InsetLength', + u'\\lyxformat':u'LyXFormat', u'\\lyxline':u'LyXLine', + u'\\newline':u'Newline', u'\\newpage':u'NewPage', + u'\\noindent':u'BlackBox', u'\\noun default':u'BlackBox', + u'\\noun off':u'BlackBox', u'\\noun on':u'VersalitasText', + u'\\paragraph_spacing':u'BlackBox', u'\\series bold':u'BoldText', + u'\\series default':u'BlackBox', u'\\series medium':u'BlackBox', + u'\\shape':u'ShapedText', u'\\shape default':u'BlackBox', + u'\\shape up':u'BlackBox', u'\\size':u'SizeText', + u'\\size normal':u'BlackBox', u'\\start_of_appendix':u'StartAppendix', + u'\\strikeout default':u'BlackBox', u'\\strikeout on':u'StrikeOut', + } + + string = { + u'startcommand':u'\\', + } + + table = { + u'headers':[u'<lyxtabular',u'<features',], + } + +class EscapeConfig(object): + "Configuration class from elyxer.config file" + + chars = { + u'\n':u'', u' -- ':u' — ', u'\'':u'’', u'---':u'—', u'`':u'‘', + } + + commands = { + u'\\InsetSpace \\space{}':u' ', u'\\InsetSpace \\thinspace{}':u' ', + u'\\InsetSpace ~':u' ', u'\\SpecialChar \\-':u'', + u'\\SpecialChar \\@.':u'.', u'\\SpecialChar \\ldots{}':u'…', + u'\\SpecialChar \\menuseparator':u' ▷ ', + u'\\SpecialChar \\nobreakdash-':u'-', u'\\SpecialChar \\slash{}':u'/', + u'\\SpecialChar \\textcompwordmark{}':u'', u'\\backslash':u'\\', + } + + entities = { + u'&':u'&', u'<':u'<', u'>':u'>', + } + + html = { + u'/>':u'>', + } + + iso885915 = { + u' ':u' ', u' ':u' ', u' ':u' ', + } + + nonunicode = { + u' ':u' ', + } + +class FormulaConfig(object): + "Configuration class from elyxer.config file" + + alphacommands = { + u'\\AA':u'Å', u'\\AE':u'Æ', + u'\\AmS':u'<span class="versalitas">AmS</span>', u'\\DH':u'Ð', + u'\\L':u'Ł', u'\\O':u'Ø', u'\\OE':u'Œ', u'\\TH':u'Þ', u'\\aa':u'å', + u'\\ae':u'æ', u'\\alpha':u'α', u'\\beta':u'β', u'\\delta':u'δ', + u'\\dh':u'ð', u'\\epsilon':u'ϵ', u'\\eta':u'η', u'\\gamma':u'γ', + u'\\i':u'ı', u'\\imath':u'ı', u'\\iota':u'ι', u'\\j':u'ȷ', + u'\\jmath':u'ȷ', u'\\kappa':u'κ', u'\\l':u'ł', u'\\lambda':u'λ', + u'\\mu':u'μ', u'\\nu':u'ν', u'\\o':u'ø', u'\\oe':u'œ', u'\\omega':u'ω', + u'\\phi':u'φ', u'\\pi':u'π', u'\\psi':u'ψ', u'\\rho':u'ρ', + u'\\sigma':u'σ', u'\\ss':u'ß', u'\\tau':u'τ', u'\\textcrh':u'ħ', + u'\\th':u'þ', u'\\theta':u'θ', u'\\upsilon':u'υ', u'\\varDelta':u'∆', + u'\\varGamma':u'Γ', u'\\varLambda':u'Λ', u'\\varOmega':u'Ω', + u'\\varPhi':u'Φ', u'\\varPi':u'Π', u'\\varPsi':u'Ψ', u'\\varSigma':u'Σ', + u'\\varTheta':u'Θ', u'\\varUpsilon':u'Υ', u'\\varXi':u'Ξ', + u'\\varepsilon':u'ε', u'\\varkappa':u'ϰ', u'\\varphi':u'φ', + u'\\varpi':u'ϖ', u'\\varrho':u'ϱ', u'\\varsigma':u'ς', + u'\\vartheta':u'ϑ', u'\\xi':u'ξ', u'\\zeta':u'ζ', + } + + array = { + u'begin':u'\\begin', u'cellseparator':u'&', u'end':u'\\end', + u'rowseparator':u'\\\\', + } + + bigbrackets = { + u'(':[u'⎛',u'⎜',u'⎝',], u')':[u'⎞',u'⎟',u'⎠',], u'[':[u'⎡',u'⎢',u'⎣',], + u']':[u'⎤',u'⎥',u'⎦',], u'{':[u'⎧',u'⎪',u'⎨',u'⎩',], u'|':[u'|',], + u'}':[u'⎫',u'⎪',u'⎬',u'⎭',], u'∥':[u'∥',], + } + + bigsymbols = { + u'∑':[u'⎲',u'⎳',], u'∫':[u'⌠',u'⌡',], + } + + bracketcommands = { + u'\\left':u'span class="symbol"', + u'\\left.':u'<span class="leftdot"></span>', + u'\\middle':u'span class="symbol"', u'\\right':u'span class="symbol"', + u'\\right.':u'<span class="rightdot"></span>', + } + + combiningfunctions = { + u'\\"':u'̈', u'\\\'':u'́', u'\\^':u'̂', u'\\`':u'̀', u'\\acute':u'́', + u'\\bar':u'̄', u'\\breve':u'̆', u'\\c':u'̧', u'\\check':u'̌', + u'\\dddot':u'⃛', u'\\ddot':u'̈', u'\\dot':u'̇', u'\\grave':u'̀', + u'\\hat':u'̂', u'\\mathring':u'̊', u'\\overleftarrow':u'⃖', + u'\\overrightarrow':u'⃗', u'\\r':u'̊', u'\\s':u'̩', + u'\\textcircled':u'⃝', u'\\textsubring':u'̥', u'\\tilde':u'̃', + u'\\v':u'̌', u'\\vec':u'⃗', u'\\~':u'̃', + } + + commands = { + u'\\ ':u' ', u'\\!':u'', u'\\#':u'#', u'\\$':u'$', u'\\%':u'%', + u'\\&':u'&', u'\\,':u' ', u'\\:':u' ', u'\\;':u' ', + u'\\APLdownarrowbox':u'⍗', u'\\APLleftarrowbox':u'⍇', + u'\\APLrightarrowbox':u'⍈', u'\\APLuparrowbox':u'⍐', u'\\Box':u'□', + u'\\Bumpeq':u'≎', u'\\CIRCLE':u'●', u'\\Cap':u'⋒', u'\\CheckedBox':u'☑', + u'\\Circle':u'○', u'\\Coloneqq':u'⩴', u'\\Corresponds':u'≙', + u'\\Cup':u'⋓', u'\\Delta':u'Δ', u'\\Diamond':u'◇', u'\\Downarrow':u'⇓', + u'\\EUR':u'€', u'\\Game':u'⅁', u'\\Gamma':u'Γ', u'\\Im':u'ℑ', + u'\\Join':u'⨝', u'\\LEFTCIRCLE':u'◖', u'\\LEFTcircle':u'◐', + u'\\Lambda':u'Λ', u'\\Leftarrow':u'⇐', u'\\Lleftarrow':u'⇚', + u'\\Longleftarrow':u'⟸', u'\\Longleftrightarrow':u'⟺', + u'\\Longrightarrow':u'⟹', u'\\Lsh':u'↰', u'\\Mapsfrom':u'⇐|', + u'\\Mapsto':u'|⇒', u'\\Omega':u'Ω', u'\\P':u'¶', u'\\Phi':u'Φ', + u'\\Pi':u'Π', u'\\Pr':u'Pr', u'\\Psi':u'Ψ', u'\\RIGHTCIRCLE':u'◗', + u'\\RIGHTcircle':u'◑', u'\\Re':u'ℜ', u'\\Rrightarrow':u'⇛', + u'\\Rsh':u'↱', u'\\S':u'§', u'\\Sigma':u'Σ', u'\\Square':u'☐', + u'\\Subset':u'⋐', u'\\Supset':u'⋑', u'\\Theta':u'Θ', u'\\Uparrow':u'⇑', + u'\\Updownarrow':u'⇕', u'\\Upsilon':u'Υ', u'\\Vdash':u'⊩', + u'\\Vert':u'∥', u'\\Vvdash':u'⊪', u'\\XBox':u'☒', u'\\Xi':u'Ξ', + u'\\Yup':u'⅄', u'\\\\':u'<br/>', u'\\_':u'_', u'\\aleph':u'ℵ', + u'\\amalg':u'∐', u'\\angle':u'∠', u'\\aquarius':u'♒', + u'\\arccos':u'arccos', u'\\arcsin':u'arcsin', u'\\arctan':u'arctan', + u'\\arg':u'arg', u'\\aries':u'♈', u'\\ast':u'∗', u'\\asymp':u'≍', + u'\\backepsilon':u'∍', u'\\backprime':u'‵', u'\\backsimeq':u'⋍', + u'\\backslash':u'\\', u'\\barwedge':u'⊼', u'\\because':u'∵', + u'\\beth':u'ℶ', u'\\between':u'≬', u'\\bigcap':u'∩', u'\\bigcirc':u'○', + u'\\bigcup':u'∪', u'\\bigodot':u'⊙', u'\\bigoplus':u'⊕', + u'\\bigotimes':u'⊗', u'\\bigsqcup':u'⊔', u'\\bigstar':u'★', + u'\\bigtriangledown':u'▽', u'\\bigtriangleup':u'△', u'\\biguplus':u'⊎', + u'\\bigvee':u'∨', u'\\bigwedge':u'∧', u'\\blacklozenge':u'⧫', + u'\\blacksmiley':u'☻', u'\\blacksquare':u'■', u'\\blacktriangle':u'▲', + u'\\blacktriangledown':u'▼', u'\\blacktriangleright':u'▶', u'\\bot':u'⊥', + u'\\bowtie':u'⋈', u'\\box':u'▫', u'\\boxdot':u'⊡', u'\\bullet':u'•', + u'\\bumpeq':u'≏', u'\\cancer':u'♋', u'\\cap':u'∩', u'\\capricornus':u'♑', + u'\\cdot':u'⋅', u'\\cdots':u'⋯', u'\\centerdot':u'∙', + u'\\checkmark':u'✓', u'\\chi':u'χ', u'\\circ':u'○', u'\\circeq':u'≗', + u'\\circledR':u'®', u'\\circledast':u'⊛', u'\\circledcirc':u'⊚', + u'\\circleddash':u'⊝', u'\\clubsuit':u'♣', u'\\coloneqq':u'≔', + u'\\complement':u'∁', u'\\cong':u'≅', u'\\coprod':u'∐', + u'\\copyright':u'©', u'\\cos':u'cos', u'\\cosh':u'cosh', u'\\cot':u'cot', + u'\\coth':u'coth', u'\\csc':u'csc', u'\\cup':u'∪', + u'\\curvearrowleft':u'↶', u'\\curvearrowright':u'↷', u'\\dag':u'†', + u'\\dagger':u'†', u'\\daleth':u'ℸ', u'\\dashleftarrow':u'⇠', + u'\\dashv':u'⊣', u'\\ddag':u'‡', u'\\ddagger':u'‡', u'\\ddots':u'⋱', + u'\\deg':u'deg', u'\\det':u'det', u'\\diagdown':u'╲', u'\\diagup':u'╱', + u'\\diamond':u'◇', u'\\diamondsuit':u'♦', u'\\dim':u'dim', u'\\div':u'÷', + u'\\divideontimes':u'⋇', u'\\dotdiv':u'∸', u'\\doteq':u'≐', + u'\\doteqdot':u'≑', u'\\dotplus':u'∔', u'\\dots':u'…', + u'\\doublebarwedge':u'⌆', u'\\downarrow':u'↓', u'\\downdownarrows':u'⇊', + u'\\downharpoonleft':u'⇃', u'\\downharpoonright':u'⇂', u'\\earth':u'♁', + u'\\ell':u'ℓ', u'\\emptyset':u'∅', u'\\eqcirc':u'≖', u'\\eqcolon':u'≕', + u'\\eqsim':u'≂', u'\\euro':u'€', u'\\exists':u'∃', u'\\exp':u'exp', + u'\\fallingdotseq':u'≒', u'\\female':u'♀', u'\\flat':u'♭', + u'\\forall':u'∀', u'\\frown':u'⌢', u'\\frownie':u'☹', u'\\gcd':u'gcd', + u'\\gemini':u'♊', u'\\geq)':u'≥', u'\\geqq':u'≧', u'\\geqslant':u'≥', + u'\\gets':u'←', u'\\gg':u'≫', u'\\ggg':u'⋙', u'\\gimel':u'ℷ', + u'\\gneqq':u'≩', u'\\gnsim':u'⋧', u'\\gtrdot':u'⋗', u'\\gtreqless':u'⋚', + u'\\gtreqqless':u'⪌', u'\\gtrless':u'≷', u'\\gtrsim':u'≳', + u'\\guillemotleft':u'«', u'\\guillemotright':u'»', u'\\hbar':u'ℏ', + u'\\heartsuit':u'♥', u'\\hfill':u'<span class="hfill"> </span>', + u'\\hom':u'hom', u'\\hookleftarrow':u'↩', u'\\hookrightarrow':u'↪', + u'\\hslash':u'ℏ', u'\\idotsint':u'<span class="bigsymbol">∫⋯∫</span>', + u'\\iiint':u'<span class="bigsymbol">∭</span>', + u'\\iint':u'<span class="bigsymbol">∬</span>', u'\\imath':u'ı', + u'\\inf':u'inf', u'\\infty':u'∞', u'\\invneg':u'⌐', u'\\jmath':u'ȷ', + u'\\jupiter':u'♃', u'\\ker':u'ker', u'\\land':u'∧', + u'\\landupint':u'<span class="bigsymbol">∱</span>', u'\\langle':u'⟨', + u'\\lbrace':u'{', u'\\lbrace)':u'{', u'\\lbrack':u'[', u'\\lceil':u'⌈', + u'\\ldots':u'…', u'\\leadsto':u'⇝', u'\\leftarrow)':u'←', + u'\\leftarrowtail':u'↢', u'\\leftarrowtobar':u'⇤', + u'\\leftharpoondown':u'↽', u'\\leftharpoonup':u'↼', + u'\\leftleftarrows':u'⇇', u'\\leftleftharpoons':u'⥢', u'\\leftmoon':u'☾', + u'\\leftrightarrow':u'↔', u'\\leftrightarrows':u'⇆', + u'\\leftrightharpoons':u'⇋', u'\\leftthreetimes':u'⋋', u'\\leo':u'♌', + u'\\leq)':u'≤', u'\\leqq':u'≦', u'\\leqslant':u'≤', u'\\lessdot':u'⋖', + u'\\lesseqgtr':u'⋛', u'\\lesseqqgtr':u'⪋', u'\\lessgtr':u'≶', + u'\\lesssim':u'≲', u'\\lfloor':u'⌊', u'\\lg':u'lg', u'\\lhd':u'⊲', + u'\\libra':u'♎', u'\\lightning':u'↯', u'\\liminf':u'liminf', + u'\\limsup':u'limsup', u'\\ll':u'≪', u'\\lll':u'⋘', u'\\ln':u'ln', + u'\\lneqq':u'≨', u'\\lnot':u'¬', u'\\lnsim':u'⋦', u'\\log':u'log', + u'\\longleftarrow':u'⟵', u'\\longleftrightarrow':u'⟷', + u'\\longmapsto':u'⟼', u'\\longrightarrow':u'⟶', u'\\looparrowleft':u'↫', + u'\\looparrowright':u'↬', u'\\lor':u'∨', u'\\lozenge':u'◊', + u'\\ltimes':u'⋉', u'\\lyxlock':u'', u'\\male':u'♂', u'\\maltese':u'✠', + u'\\mapsfrom':u'↤', u'\\mapsto':u'↦', u'\\mathcircumflex':u'^', + u'\\max':u'max', u'\\measuredangle':u'∡', u'\\mercury':u'☿', + u'\\mho':u'℧', u'\\mid':u'∣', u'\\min':u'min', u'\\models':u'⊨', + u'\\mp':u'∓', u'\\multimap':u'⊸', u'\\nLeftarrow':u'⇍', + u'\\nLeftrightarrow':u'⇎', u'\\nRightarrow':u'⇏', u'\\nVDash':u'⊯', + u'\\nabla':u'∇', u'\\napprox':u'≉', u'\\natural':u'♮', u'\\ncong':u'≇', + u'\\nearrow':u'↗', u'\\neg':u'¬', u'\\neg)':u'¬', u'\\neptune':u'♆', + u'\\nequiv':u'≢', u'\\newline':u'<br/>', u'\\nexists':u'∄', + u'\\ngeqslant':u'≱', u'\\ngtr':u'≯', u'\\ngtrless':u'≹', u'\\ni':u'∋', + u'\\ni)':u'∋', u'\\nleftarrow':u'↚', u'\\nleftrightarrow':u'↮', + u'\\nleqslant':u'≰', u'\\nless':u'≮', u'\\nlessgtr':u'≸', u'\\nmid':u'∤', + u'\\nolimits':u'', u'\\nonumber':u'', u'\\not':u'¬', u'\\not<':u'≮', + u'\\not=':u'≠', u'\\not>':u'≯', u'\\notbackslash':u'⍀', u'\\notin':u'∉', + u'\\notni':u'∌', u'\\notslash':u'⌿', u'\\nparallel':u'∦', + u'\\nprec':u'⊀', u'\\nrightarrow':u'↛', u'\\nsim':u'≁', u'\\nsimeq':u'≄', + u'\\nsqsubset':u'⊏̸', u'\\nsubseteq':u'⊈', u'\\nsucc':u'⊁', + u'\\nsucccurlyeq':u'⋡', u'\\nsupset':u'⊅', u'\\nsupseteq':u'⊉', + u'\\ntriangleleft':u'⋪', u'\\ntrianglelefteq':u'⋬', + u'\\ntriangleright':u'⋫', u'\\ntrianglerighteq':u'⋭', u'\\nvDash':u'⊭', + u'\\nvdash':u'⊬', u'\\nwarrow':u'↖', u'\\odot':u'⊙', + u'\\officialeuro':u'€', u'\\oiiint':u'<span class="bigsymbol">∰</span>', + u'\\oiint':u'<span class="bigsymbol">∯</span>', + u'\\oint':u'<span class="bigsymbol">∮</span>', + u'\\ointclockwise':u'<span class="bigsymbol">∲</span>', + u'\\ointctrclockwise':u'<span class="bigsymbol">∳</span>', + u'\\ominus':u'⊖', u'\\oplus':u'⊕', u'\\oslash':u'⊘', u'\\otimes':u'⊗', + u'\\owns':u'∋', u'\\parallel':u'∥', u'\\partial':u'∂', u'\\perp':u'⊥', + u'\\pisces':u'♓', u'\\pitchfork':u'⋔', u'\\pluto':u'♇', u'\\pm':u'±', + u'\\pointer':u'➪', u'\\pounds':u'£', u'\\prec':u'≺', + u'\\preccurlyeq':u'≼', u'\\preceq':u'≼', u'\\precsim':u'≾', + u'\\prime':u'′', u'\\prompto':u'∝', u'\\qquad':u' ', u'\\quad':u' ', + u'\\quarternote':u'♩', u'\\rangle':u'⟩', u'\\rbrace':u'}', + u'\\rbrace)':u'}', u'\\rbrack':u']', u'\\rceil':u'⌉', u'\\rfloor':u'⌋', + u'\\rhd':u'⊳', u'\\rightarrow)':u'→', u'\\rightarrowtail':u'↣', + u'\\rightarrowtobar':u'⇥', u'\\rightharpoondown':u'⇁', + u'\\rightharpoonup':u'⇀', u'\\rightharpooondown':u'⇁', + u'\\rightharpooonup':u'⇀', u'\\rightleftarrows':u'⇄', + u'\\rightleftharpoons':u'⇌', u'\\rightmoon':u'☽', + u'\\rightrightarrows':u'⇉', u'\\rightrightharpoons':u'⥤', + u'\\rightthreetimes':u'⋌', u'\\risingdotseq':u'≓', u'\\rtimes':u'⋊', + u'\\sagittarius':u'♐', u'\\saturn':u'♄', u'\\scorpio':u'♏', + u'\\searrow':u'↘', u'\\sec':u'sec', u'\\setminus':u'∖', u'\\sharp':u'♯', + u'\\simeq':u'≃', u'\\sin':u'sin', u'\\sinh':u'sinh', u'\\slash':u'∕', + u'\\smile':u'⌣', u'\\smiley':u'☺', u'\\spadesuit':u'♠', + u'\\sphericalangle':u'∢', u'\\sqcap':u'⊓', u'\\sqcup':u'⊔', + u'\\sqsubset':u'⊏', u'\\sqsubseteq':u'⊑', u'\\sqsupset':u'⊐', + u'\\sqsupseteq':u'⊒', u'\\square':u'□', u'\\star':u'⋆', + u'\\subseteqq':u'⫅', u'\\subsetneqq':u'⫋', u'\\succ':u'≻', + u'\\succcurlyeq':u'≽', u'\\succeq':u'≽', u'\\succnsim':u'⋩', + u'\\succsim':u'≿', u'\\sun':u'☼', u'\\sup':u'sup', u'\\supseteqq':u'⫆', + u'\\supsetneqq':u'⫌', u'\\surd':u'√', u'\\swarrow':u'↙', u'\\tan':u'tan', + u'\\tanh':u'tanh', u'\\taurus':u'♉', u'\\textasciicircum':u'^', + u'\\textasciitilde':u'~', u'\\textbackslash':u'\\', + u'\\textcopyright':u'©\'', u'\\textdegree':u'°', u'\\textellipsis':u'…', + u'\\textemdash':u'—', u'\\textendash':u'—', u'\\texteuro':u'€', + u'\\textgreater':u'>', u'\\textless':u'<', u'\\textordfeminine':u'ª', + u'\\textordmasculine':u'º', u'\\textquotedblleft':u'“', + u'\\textquotedblright':u'”', u'\\textquoteright':u'’', + u'\\textregistered':u'®', u'\\textrightarrow':u'→', + u'\\textsection':u'§', u'\\texttrademark':u'™', + u'\\texttwosuperior':u'²', u'\\textvisiblespace':u' ', + u'\\therefore':u'∴', u'\\top':u'⊤', u'\\triangle':u'△', + u'\\triangleleft':u'⊲', u'\\trianglelefteq':u'⊴', u'\\triangleq':u'≜', + u'\\triangleright':u'▷', u'\\trianglerighteq':u'⊵', + u'\\twoheadleftarrow':u'↞', u'\\twoheadrightarrow':u'↠', + u'\\twonotes':u'♫', u'\\udot':u'⊍', u'\\unlhd':u'⊴', u'\\unrhd':u'⊵', + u'\\unrhl':u'⊵', u'\\uparrow':u'↑', u'\\updownarrow':u'↕', + u'\\upharpoonleft':u'↿', u'\\upharpoonright':u'↾', u'\\uplus':u'⊎', + u'\\upuparrows':u'⇈', u'\\uranus':u'♅', u'\\vDash':u'⊨', + u'\\varclubsuit':u'♧', u'\\vardiamondsuit':u'♦', u'\\varheartsuit':u'♥', + u'\\varnothing':u'∅', u'\\varspadesuit':u'♤', u'\\vdash':u'⊢', + u'\\vdots':u'⋮', u'\\vee':u'∨', u'\\vee)':u'∨', u'\\veebar':u'⊻', + u'\\vert':u'∣', u'\\virgo':u'♍', u'\\wedge':u'∧', u'\\wedge)':u'∧', + u'\\wp':u'℘', u'\\wr':u'≀', u'\\yen':u'¥', u'\\{':u'{', u'\\|':u'∥', + u'\\}':u'}', + } + + decoratedcommand = { + + } + + decoratingfunctions = { + u'\\overleftarrow':u'⟵', u'\\overrightarrow':u'⟶', u'\\widehat':u'^', + } + + endings = { + u'bracket':u'}', u'complex':u'\\]', u'endafter':u'}', + u'endbefore':u'\\end{', u'squarebracket':u']', + } + + environments = { + u'align':[u'r',u'l',], u'eqnarray':[u'r',u'c',u'l',], + u'gathered':[u'l',u'l',], + } + + fontfunctions = { + u'\\boldsymbol':u'b', u'\\mathbb':u'span class="blackboard"', + u'\\mathbb{A}':u'𝔸', u'\\mathbb{B}':u'𝔹', u'\\mathbb{C}':u'ℂ', + u'\\mathbb{D}':u'𝔻', u'\\mathbb{E}':u'𝔼', u'\\mathbb{F}':u'𝔽', + u'\\mathbb{G}':u'𝔾', u'\\mathbb{H}':u'ℍ', u'\\mathbb{J}':u'𝕁', + u'\\mathbb{K}':u'𝕂', u'\\mathbb{L}':u'𝕃', u'\\mathbb{N}':u'ℕ', + u'\\mathbb{O}':u'𝕆', u'\\mathbb{P}':u'ℙ', u'\\mathbb{Q}':u'ℚ', + u'\\mathbb{R}':u'ℝ', u'\\mathbb{S}':u'𝕊', u'\\mathbb{T}':u'𝕋', + u'\\mathbb{W}':u'𝕎', u'\\mathbb{Z}':u'ℤ', u'\\mathbf':u'b', + u'\\mathcal':u'span class="scriptfont"', u'\\mathcal{B}':u'ℬ', + u'\\mathcal{E}':u'ℰ', u'\\mathcal{F}':u'ℱ', u'\\mathcal{H}':u'ℋ', + u'\\mathcal{I}':u'ℐ', u'\\mathcal{L}':u'ℒ', u'\\mathcal{M}':u'ℳ', + u'\\mathcal{R}':u'ℛ', u'\\mathfrak':u'span class="fraktur"', + u'\\mathfrak{C}':u'ℭ', u'\\mathfrak{F}':u'𝔉', u'\\mathfrak{H}':u'ℌ', + u'\\mathfrak{I}':u'ℑ', u'\\mathfrak{R}':u'ℜ', u'\\mathfrak{Z}':u'ℨ', + u'\\mathit':u'i', u'\\mathring{A}':u'Å', u'\\mathring{U}':u'Ů', + u'\\mathring{a}':u'å', u'\\mathring{u}':u'ů', u'\\mathring{w}':u'ẘ', + u'\\mathring{y}':u'ẙ', u'\\mathrm':u'span class="mathrm"', + u'\\mathscr':u'span class="scriptfont"', u'\\mathscr{B}':u'ℬ', + u'\\mathscr{E}':u'ℰ', u'\\mathscr{F}':u'ℱ', u'\\mathscr{H}':u'ℋ', + u'\\mathscr{I}':u'ℐ', u'\\mathscr{L}':u'ℒ', u'\\mathscr{M}':u'ℳ', + u'\\mathscr{R}':u'ℛ', u'\\mathsf':u'span class="mathsf"', + u'\\mathtt':u'tt', + } + + hybridfunctions = { + + u'\\binom':[u'{$1}{$2}',u'f2{(}f0{f1{$1}f1{$2}}f2{)}',u'span class="binom"',u'span class="binomstack"',u'span class="bigsymbol"',], + u'\\boxed':[u'{$1}',u'f0{$1}',u'span class="boxed"',], + u'\\cfrac':[u'[$p!]{$1}{$2}',u'f0{f3{(}f1{$1}f3{)/(}f2{$2}f3{)}}',u'span class="fullfraction"',u'span class="numerator align-$p"',u'span class="denominator"',u'span class="ignored"',], + u'\\color':[u'{$p!}{$1}',u'f0{$1}',u'span style="color: $p;"',], + u'\\colorbox':[u'{$p!}{$1}',u'f0{$1}',u'span class="colorbox" style="background: $p;"',], + u'\\dbinom':[u'{$1}{$2}',u'(f0{f1{f2{$1}}f1{f2{ }}f1{f2{$2}}})',u'span class="binomial"',u'span class="binomrow"',u'span class="binomcell"',], + u'\\dfrac':[u'{$1}{$2}',u'f0{f3{(}f1{$1}f3{)/(}f2{$2}f3{)}}',u'span class="fullfraction"',u'span class="numerator"',u'span class="denominator"',u'span class="ignored"',], + u'\\displaystyle':[u'{$1}',u'f0{$1}',u'span class="displaystyle"',], + u'\\fbox':[u'{$1}',u'f0{$1}',u'span class="fbox"',], + u'\\fboxrule':[u'{$p!}',u'f0{}',u'ignored',], + u'\\fboxsep':[u'{$p!}',u'f0{}',u'ignored',], + u'\\fcolorbox':[u'{$p!}{$q!}{$1}',u'f0{$1}',u'span class="boxed" style="border-color: $p; background: $q;"',], + u'\\frac':[u'{$1}{$2}',u'f0{f3{(}f1{$1}f3{)/(}f2{$2}f3{)}}',u'span class="fraction"',u'span class="numerator"',u'span class="denominator"',u'span class="ignored"',], + u'\\framebox':[u'[$p!][$q!]{$1}',u'f0{$1}',u'span class="framebox align-$q" style="width: $p;"',], + u'\\href':[u'[$o]{$u!}{$t!}',u'f0{$t}',u'a href="$u"',], + u'\\hspace':[u'{$p!}',u'f0{ }',u'span class="hspace" style="width: $p;"',], + u'\\leftroot':[u'{$p!}',u'f0{ }',u'span class="leftroot" style="width: $p;px"',], + u'\\nicefrac':[u'{$1}{$2}',u'f0{f1{$1}⁄f2{$2}}',u'span class="fraction"',u'sup class="numerator"',u'sub class="denominator"',u'span class="ignored"',], + u'\\parbox':[u'[$p!]{$w!}{$1}',u'f0{1}',u'div class="Boxed" style="width: $w;"',], + u'\\raisebox':[u'{$p!}{$1}',u'f0{$1.font}',u'span class="raisebox" style="vertical-align: $p;"',], + u'\\renewenvironment':[u'{$1!}{$2!}{$3!}',u'',], + u'\\rule':[u'[$v!]{$w!}{$h!}',u'f0/',u'hr class="line" style="width: $w; height: $h;"',], + u'\\scriptscriptstyle':[u'{$1}',u'f0{$1}',u'span class="scriptscriptstyle"',], + u'\\scriptstyle':[u'{$1}',u'f0{$1}',u'span class="scriptstyle"',], + u'\\sqrt':[u'[$0]{$1}',u'f0{f1{$0}f2{√}f4{(}f3{$1}f4{)}}',u'span class="sqrt"',u'sup class="root"',u'span class="radical"',u'span class="root"',u'span class="ignored"',], + u'\\stackrel':[u'{$1}{$2}',u'f0{f1{$1}f2{$2}}',u'span class="stackrel"',u'span class="upstackrel"',u'span class="downstackrel"',], + u'\\tbinom':[u'{$1}{$2}',u'(f0{f1{f2{$1}}f1{f2{ }}f1{f2{$2}}})',u'span class="binomial"',u'span class="binomrow"',u'span class="binomcell"',], + u'\\textcolor':[u'{$p!}{$1}',u'f0{$1}',u'span style="color: $p;"',], + u'\\textstyle':[u'{$1}',u'f0{$1}',u'span class="textstyle"',], + u'\\unit':[u'[$0]{$1}',u'$0f0{$1.font}',u'span class="unit"',], + u'\\unitfrac':[u'[$0]{$1}{$2}',u'$0f0{f1{$1.font}⁄f2{$2.font}}',u'span class="fraction"',u'sup class="unit"',u'sub class="unit"',], + u'\\uproot':[u'{$p!}',u'f0{ }',u'span class="uproot" style="width: $p;px"',], + u'\\url':[u'{$u!}',u'f0{$u}',u'a href="$u"',], + u'\\vspace':[u'{$p!}',u'f0{ }',u'span class="vspace" style="height: $p;"',], + } + + hybridsizes = { + u'\\binom':u'$1+$2', u'\\cfrac':u'$1+$2', u'\\dbinom':u'$1+$2+1', + u'\\dfrac':u'$1+$2', u'\\frac':u'$1+$2', u'\\tbinom':u'$1+$2+1', + } + + labelfunctions = { + u'\\label':u'a name="#"', + } + + limitcommands = { + u'\\int':u'∫', u'\\intop':u'∫', u'\\lim':u'lim', u'\\prod':u'∏', + u'\\smallint':u'∫', u'\\sum':u'∑', + } + # TODO: setting for simple enlarged vs. piecewise symbols + for key in (u'\\int', u'\\intop', u'\\prod', u'\\sum'): + limitcommands[key] = '<span class="symbol">%s</span>' % limitcommands[key] + + misccommands = { + u'\\limits':u'LimitPreviousCommand', u'\\newcommand':u'MacroDefinition', + u'\\renewcommand':u'MacroDefinition', + u'\\setcounter':u'SetCounterFunction', u'\\tag':u'FormulaTag', + u'\\tag*':u'FormulaTag', + } + + modified = { + u'\n':u'', u' ':u'', u'$':u'', u'&':u' ', u'\'':u'’', u'+':u' + ', + u',':u', ', u'-':u' − ', u'/':u' ⁄ ', u'<':u' < ', u'=':u' = ', + u'>':u' > ', u'@':u'', u'~':u'', + } + + onefunctions = { + u'\\Big':u'span class="bigsymbol"', u'\\Bigg':u'span class="hugesymbol"', + u'\\bar':u'span class="bar"', u'\\begin{array}':u'span class="arraydef"', + u'\\big':u'span class="symbol"', u'\\bigg':u'span class="largesymbol"', + u'\\bigl':u'span class="bigsymbol"', u'\\bigr':u'span class="bigsymbol"', + u'\\centering':u'span class="align-center"', + u'\\ensuremath':u'span class="ensuremath"', + u'\\hphantom':u'span class="phantom"', + u'\\noindent':u'span class="noindent"', + u'\\overbrace':u'span class="overbrace"', + u'\\overline':u'span class="overline"', + u'\\phantom':u'span class="phantom"', + u'\\underbrace':u'span class="underbrace"', u'\\underline':u'u', + u'\\vphantom':u'span class="phantom"', + } + + spacedcommands = { + u'\\Leftrightarrow':u'⇔', u'\\Rightarrow':u'⇒', u'\\approx':u'≈', + u'\\dashrightarrow':u'⇢', u'\\equiv':u'≡', u'\\ge':u'≥', u'\\geq':u'≥', + u'\\implies':u' ⇒ ', u'\\in':u'∈', u'\\le':u'≤', u'\\leftarrow':u'←', + u'\\leq':u'≤', u'\\ne':u'≠', u'\\neq':u'≠', u'\\not\\in':u'∉', + u'\\propto':u'∝', u'\\rightarrow':u'→', u'\\rightsquigarrow':u'⇝', + u'\\sim':u'~', u'\\subset':u'⊂', u'\\subseteq':u'⊆', u'\\supset':u'⊃', + u'\\supseteq':u'⊇', u'\\times':u'×', u'\\to':u'→', + } + + starts = { + u'beginafter':u'}', u'beginbefore':u'\\begin{', u'bracket':u'{', + u'command':u'\\', u'comment':u'%', u'complex':u'\\[', u'simple':u'$', + u'squarebracket':u'[', u'unnumbered':u'*', + } + + symbolfunctions = { + u'^':u'sup', u'_':u'sub', + } + + textfunctions = { + u'\\mbox':u'span class="mbox"', u'\\text':u'span class="text"', + u'\\textbf':u'b', u'\\textipa':u'span class="textipa"', u'\\textit':u'i', + u'\\textnormal':u'span class="textnormal"', + u'\\textrm':u'span class="textrm"', + u'\\textsc':u'span class="versalitas"', + u'\\textsf':u'span class="textsf"', u'\\textsl':u'i', u'\\texttt':u'tt', + u'\\textup':u'span class="normal"', + } + + unmodified = { + + u'characters':[u'.',u'*',u'€',u'(',u')',u'[',u']',u':',u'·',u'!',u';',u'|',u'§',u'"',], + } + + urls = { + u'googlecharts':u'http://chart.googleapis.com/chart?cht=tx&chl=', + } + +class GeneralConfig(object): + "Configuration class from elyxer.config file" + + version = { + u'date':u'2011-06-27', u'lyxformat':u'413', u'number':u'1.2.3', + } + +class HeaderConfig(object): + "Configuration class from elyxer.config file" + + parameters = { + u'beginpreamble':u'\\begin_preamble', u'branch':u'\\branch', + u'documentclass':u'\\textclass', u'endbranch':u'\\end_branch', + u'endpreamble':u'\\end_preamble', u'language':u'\\language', + u'lstset':u'\\lstset', u'outputchanges':u'\\output_changes', + u'paragraphseparation':u'\\paragraph_separation', + u'pdftitle':u'\\pdf_title', u'secnumdepth':u'\\secnumdepth', + u'tocdepth':u'\\tocdepth', + } + + styles = { + + u'article':[u'article',u'aastex',u'aapaper',u'acmsiggraph',u'sigplanconf',u'achemso',u'amsart',u'apa',u'arab-article',u'armenian-article',u'article-beamer',u'chess',u'dtk',u'elsarticle',u'heb-article',u'IEEEtran',u'iopart',u'kluwer',u'scrarticle-beamer',u'scrartcl',u'extarticle',u'paper',u'mwart',u'revtex4',u'spie',u'svglobal3',u'ltugboat',u'agu-dtd',u'jgrga',u'agums',u'entcs',u'egs',u'ijmpc',u'ijmpd',u'singlecol-new',u'doublecol-new',u'isprs',u'tarticle',u'jsarticle',u'jarticle',u'jss',u'literate-article',u'siamltex',u'cl2emult',u'llncs',u'svglobal',u'svjog',u'svprobth',], + u'book':[u'book',u'amsbook',u'scrbook',u'extbook',u'tufte-book',u'report',u'extreport',u'scrreprt',u'memoir',u'tbook',u'jsbook',u'jbook',u'mwbk',u'svmono',u'svmult',u'treport',u'jreport',u'mwrep',], + } + +class ImageConfig(object): + "Configuration class from elyxer.config file" + + converters = { + + u'imagemagick':u'convert[ -density $scale][ -define $format:use-cropbox=true] "$input" "$output"', + u'inkscape':u'inkscape "$input" --export-png="$output"', + } + + cropboxformats = { + u'.eps':u'ps', u'.pdf':u'pdf', u'.ps':u'ps', + } + + formats = { + u'default':u'.png', u'vector':[u'.svg',u'.eps',], + } + +class LayoutConfig(object): + "Configuration class from elyxer.config file" + + groupable = { + + u'allowed':[u'StringContainer',u'Constant',u'TaggedText',u'Align',u'TextFamily',u'EmphaticText',u'VersalitasText',u'BarredText',u'SizeText',u'ColorText',u'LangLine',u'Formula',], + } + +class NewfangleConfig(object): + "Configuration class from elyxer.config file" + + constants = { + u'chunkref':u'chunkref{', u'endcommand':u'}', u'endmark':u'>', + u'startcommand':u'\\', u'startmark':u'=<', + } + +class NumberingConfig(object): + "Configuration class from elyxer.config file" + + layouts = { + + u'ordered':[u'Chapter',u'Section',u'Subsection',u'Subsubsection',u'Paragraph',], + u'roman':[u'Part',u'Book',], + } + + sequence = { + u'symbols':[u'*',u'**',u'†',u'‡',u'§',u'§§',u'¶',u'¶¶',u'#',u'##',], + } + +class StyleConfig(object): + "Configuration class from elyxer.config file" + + hspaces = { + u'\\enskip{}':u' ', u'\\hfill{}':u'<span class="hfill"> </span>', + u'\\hspace*{\\fill}':u' ', u'\\hspace*{}':u'', u'\\hspace{}':u' ', + u'\\negthinspace{}':u'', u'\\qquad{}':u' ', u'\\quad{}':u' ', + u'\\space{}':u' ', u'\\thinspace{}':u' ', u'~':u' ', + } + + quotes = { + u'ald':u'»', u'als':u'›', u'ard':u'«', u'ars':u'‹', u'eld':u'“', + u'els':u'‘', u'erd':u'”', u'ers':u'’', u'fld':u'«', + u'fls':u'‹', u'frd':u'»', u'frs':u'›', u'gld':u'„', u'gls':u'‚', + u'grd':u'“', u'grs':u'‘', u'pld':u'„', u'pls':u'‚', u'prd':u'”', + u'prs':u'’', u'sld':u'”', u'srd':u'”', + } + + referenceformats = { + u'eqref':u'(@↕)', u'formatted':u'¶↕', u'nameref':u'$↕', u'pageref':u'#↕', + u'ref':u'@↕', u'vpageref':u'on-page#↕', u'vref':u'@on-page#↕', + } + + size = { + u'ignoredtexts':[u'col',u'text',u'line',u'page',u'theight',u'pheight',], + } + + vspaces = { + u'bigskip':u'<div class="bigskip"> </div>', + u'defskip':u'<div class="defskip"> </div>', + u'medskip':u'<div class="medskip"> </div>', + u'smallskip':u'<div class="smallskip"> </div>', + u'vfill':u'<div class="vfill"> </div>', + } + +class TOCConfig(object): + "Configuration class from elyxer.config file" + + extractplain = { + + u'allowed':[u'StringContainer',u'Constant',u'TaggedText',u'Align',u'TextFamily',u'EmphaticText',u'VersalitasText',u'BarredText',u'SizeText',u'ColorText',u'LangLine',u'Formula',], + u'cloned':[u'',], u'extracted':[u'',], + } + + extracttitle = { + u'allowed':[u'StringContainer',u'Constant',u'Space',], + u'cloned':[u'TextFamily',u'EmphaticText',u'VersalitasText',u'BarredText',u'SizeText',u'ColorText',u'LangLine',u'Formula',], + u'extracted':[u'PlainLayout',u'TaggedText',u'Align',u'Caption',u'StandardLayout',u'FlexInset',], + } + +class TagConfig(object): + "Configuration class from elyxer.config file" + + barred = { + u'under':u'u', + } + + family = { + u'sans':u'span class="sans"', u'typewriter':u'tt', + } + + flex = { + u'CharStyle:Code':u'span class="code"', + u'CharStyle:MenuItem':u'span class="menuitem"', + u'Code':u'span class="code"', u'MenuItem':u'span class="menuitem"', + u'Noun':u'span class="noun"', u'Strong':u'span class="strong"', + } + + group = { + u'layouts':[u'Quotation',u'Quote',], + } + + layouts = { + u'Center':u'div', u'Chapter':u'h?', u'Date':u'h2', u'Paragraph':u'div', + u'Part':u'h1', u'Quotation':u'blockquote', u'Quote':u'blockquote', + u'Section':u'h?', u'Subsection':u'h?', u'Subsubsection':u'h?', + } + + listitems = { + u'Enumerate':u'ol', u'Itemize':u'ul', + } + + notes = { + u'Comment':u'', u'Greyedout':u'span class="greyedout"', u'Note':u'', + } + + shaped = { + u'italic':u'i', u'slanted':u'i', u'smallcaps':u'span class="versalitas"', + } + +class TranslationConfig(object): + "Configuration class from elyxer.config file" + + constants = { + u'Appendix':u'Appendix', u'Book':u'Book', u'Chapter':u'Chapter', + u'Paragraph':u'Paragraph', u'Part':u'Part', u'Section':u'Section', + u'Subsection':u'Subsection', u'Subsubsection':u'Subsubsection', + u'abstract':u'Abstract', u'bibliography':u'Bibliography', + u'figure':u'figure', u'float-algorithm':u'Algorithm ', + u'float-figure':u'Figure ', u'float-listing':u'Listing ', + u'float-table':u'Table ', u'float-tableau':u'Tableau ', + u'footnotes':u'Footnotes', u'generated-by':u'Document generated by ', + u'generated-on':u' on ', u'index':u'Index', + u'jsmath-enable':u'Please enable JavaScript on your browser.', + u'jsmath-requires':u' requires JavaScript to correctly process the mathematics on this page. ', + u'jsmath-warning':u'Warning: ', u'list-algorithm':u'List of Algorithms', + u'list-figure':u'List of Figures', u'list-table':u'List of Tables', + u'list-tableau':u'List of Tableaux', u'main-page':u'Main page', + u'next':u'Next', u'nomenclature':u'Nomenclature', + u'on-page':u' on page ', u'prev':u'Prev', u'references':u'References', + u'toc':u'Table of Contents', u'toc-for':u'Contents for ', u'up':u'Up', + } + + languages = { + u'american':u'en', u'british':u'en', u'deutsch':u'de', u'dutch':u'nl', + u'english':u'en', u'french':u'fr', u'ngerman':u'de', u'spanish':u'es', + } + + + + + + +class CommandLineParser(object): + "A parser for runtime options" + + def __init__(self, options): + self.options = options + + def parseoptions(self, args): + "Parse command line options" + if len(args) == 0: + return None + while len(args) > 0 and args[0].startswith('--'): + key, value = self.readoption(args) + if not key: + return 'Option ' + value + ' not recognized' + if not value: + return 'Option ' + key + ' needs a value' + setattr(self.options, key, value) + return None + + def readoption(self, args): + "Read the key and value for an option" + arg = args[0][2:] + del args[0] + if '=' in arg: + key = self.readequalskey(arg, args) + else: + key = arg.replace('-', '') + if not hasattr(self.options, key): + return None, key + current = getattr(self.options, key) + if isinstance(current, bool): + return key, True + # read value + if len(args) == 0: + return key, None + if args[0].startswith('"'): + initial = args[0] + del args[0] + return key, self.readquoted(args, initial) + value = args[0] + del args[0] + if isinstance(current, list): + current.append(value) + return key, current + return key, value + + def readquoted(self, args, initial): + "Read a value between quotes" + value = initial[1:] + while len(args) > 0 and not args[0].endswith('"') and not args[0].startswith('--'): + value += ' ' + args[0] + del args[0] + if len(args) == 0 or args[0].startswith('--'): + return None + value += ' ' + args[0:-1] + return value + + def readequalskey(self, arg, args): + "Read a key using equals" + split = arg.split('=', 1) + key = split[0] + value = split[1] + args.insert(0, value) + return key + + + +class Options(object): + "A set of runtime options" + + instance = None + + location = None + nocopy = False + copyright = False + debug = False + quiet = False + version = False + hardversion = False + versiondate = False + html = False + help = False + showlines = True + unicode = False + iso885915 = False + css = [] + title = None + directory = None + destdirectory = None + toc = False + toctarget = '' + tocfor = None + forceformat = None + lyxformat = False + target = None + splitpart = None + memory = True + lowmem = False + nobib = False + converter = 'imagemagick' + raw = False + jsmath = None + mathjax = None + nofooter = False + simplemath = False + template = None + noconvert = False + notoclabels = False + letterfoot = True + numberfoot = False + symbolfoot = False + hoverfoot = True + marginfoot = False + endfoot = False + supfoot = True + alignfoot = False + footnotes = None + imageformat = None + copyimages = False + googlecharts = False + embedcss = [] + + branches = dict() + + def parseoptions(self, args): + "Parse command line options" + Options.location = args[0] + del args[0] + parser = CommandLineParser(Options) + result = parser.parseoptions(args) + if result: + Trace.error(result) + self.usage() + self.processoptions() + + def processoptions(self): + "Process all options parsed." + if Options.help: + self.usage() + if Options.version: + self.showversion() + if Options.hardversion: + self.showhardversion() + if Options.versiondate: + self.showversiondate() + if Options.lyxformat: + self.showlyxformat() + if Options.splitpart: + try: + Options.splitpart = int(Options.splitpart) + if Options.splitpart <= 0: + Trace.error('--splitpart requires a number bigger than zero') + self.usage() + except: + Trace.error('--splitpart needs a numeric argument, not ' + Options.splitpart) + self.usage() + if Options.lowmem or Options.toc or Options.tocfor: + Options.memory = False + self.parsefootnotes() + if Options.forceformat and not Options.imageformat: + Options.imageformat = Options.forceformat + if Options.imageformat == 'copy': + Options.copyimages = True + if Options.css == []: + Options.css = ['http://elyxer.nongnu.org/lyx.css'] + if Options.html: + Options.simplemath = True + if Options.toc and not Options.tocfor: + Trace.error('Option --toc is deprecated; use --tocfor "page" instead') + Options.tocfor = Options.toctarget + if Options.nocopy: + Trace.error('Option --nocopy is deprecated; it is no longer needed') + # set in Trace if necessary + for param in dir(Trace): + if param.endswith('mode'): + setattr(Trace, param, getattr(self, param[:-4])) + + def usage(self): + "Show correct usage" + Trace.error('Usage: ' + os.path.basename(Options.location) + ' [options] [filein] [fileout]') + Trace.error('Convert LyX input file "filein" to HTML file "fileout".') + Trace.error('If filein (or fileout) is not given use standard input (or output).') + Trace.error('Main program of the eLyXer package (http://elyxer.nongnu.org/).') + self.showoptions() + + def parsefootnotes(self): + "Parse footnotes options." + if not Options.footnotes: + return + Options.marginfoot = False + Options.letterfoot = False + options = Options.footnotes.split(',') + for option in options: + footoption = option + 'foot' + if hasattr(Options, footoption): + setattr(Options, footoption, True) + else: + Trace.error('Unknown footnotes option: ' + option) + if not Options.endfoot and not Options.marginfoot and not Options.hoverfoot: + Options.hoverfoot = True + if not Options.numberfoot and not Options.symbolfoot: + Options.letterfoot = True + + def showoptions(self): + "Show all possible options" + Trace.error(' Common options:') + Trace.error(' --help: show this online help') + Trace.error(' --quiet: disables all runtime messages') + Trace.error('') + Trace.error(' Advanced options:') + Trace.error(' --debug: enable debugging messages (for developers)') + Trace.error(' --version: show version number and release date') + Trace.error(' --lyxformat: return the highest LyX version supported') + Trace.error(' Options for HTML output:') + Trace.error(' --title "title": set the generated page title') + Trace.error(' --css "file.css": use a custom CSS file') + Trace.error(' --embedcss "file.css": embed styles from elyxer.a CSS file into the output') + Trace.error(' --html: output HTML 4.0 instead of the default XHTML') + Trace.error(' --unicode: full Unicode output') + Trace.error(' --iso885915: output a document with ISO-8859-15 encoding') + Trace.error(' --nofooter: remove the footer "generated by eLyXer"') + Trace.error(' --simplemath: do not generate fancy math constructions') + Trace.error(' Options for image output:') + Trace.error(' --directory "img_dir": look for images in the specified directory') + Trace.error(' --destdirectory "dest": put converted images into this directory') + Trace.error(' --imageformat ".ext": image output format, or "copy" to copy images') + Trace.error(' --noconvert: do not convert images, use in original locations') + Trace.error(' --converter "inkscape": use an alternative program to convert images') + Trace.error(' Options for footnote display:') + Trace.error(' --numberfoot: mark footnotes with numbers instead of letters') + Trace.error(' --symbolfoot: mark footnotes with symbols (*, **...)') + Trace.error(' --hoverfoot: show footnotes as hovering text (default)') + Trace.error(' --marginfoot: show footnotes on the page margin') + Trace.error(' --endfoot: show footnotes at the end of the page') + Trace.error(' --supfoot: use superscript for footnote markers (default)') + Trace.error(' --alignfoot: use aligned text for footnote markers') + Trace.error(' --footnotes "options": specify several comma-separated footnotes options') + Trace.error(' Available options are: "number", "symbol", "hover", "margin", "end",') + Trace.error(' "sup", "align"') + Trace.error(' Advanced output options:') + Trace.error(' --splitpart "depth": split the resulting webpage at the given depth') + Trace.error(' --tocfor "page": generate a TOC that points to the given page') + Trace.error(' --target "frame": make all links point to the given frame') + Trace.error(' --notoclabels: omit the part labels in the TOC, such as Chapter') + Trace.error(' --lowmem: do the conversion on the fly (conserve memory)') + Trace.error(' --raw: generate HTML without header or footer.') + Trace.error(' --jsmath "URL": use jsMath from elyxer.the given URL to display equations') + Trace.error(' --mathjax "URL": use MathJax from elyxer.the given URL to display equations') + Trace.error(' --googlecharts: use Google Charts to generate formula images') + Trace.error(' --template "file": use a template, put everything in <!--$content-->') + Trace.error(' --copyright: add a copyright notice at the bottom') + Trace.error(' Deprecated options:') + Trace.error(' --toc: (deprecated) create a table of contents') + Trace.error(' --toctarget "page": (deprecated) generate a TOC for the given page') + Trace.error(' --nocopy: (deprecated) maintained for backwards compatibility') + sys.exit() + + def showversion(self): + "Return the current eLyXer version string" + string = 'eLyXer version ' + GeneralConfig.version['number'] + string += ' (' + GeneralConfig.version['date'] + ')' + Trace.error(string) + sys.exit() + + def showhardversion(self): + "Return just the version string" + Trace.message(GeneralConfig.version['number']) + sys.exit() + + def showversiondate(self): + "Return just the version dte" + Trace.message(GeneralConfig.version['date']) + sys.exit() + + def showlyxformat(self): + "Return just the lyxformat parameter" + Trace.message(GeneralConfig.version['lyxformat']) + sys.exit() + +class BranchOptions(object): + "A set of options for a branch" + + def __init__(self, name): + self.name = name + self.options = {'color':'#ffffff'} + + def set(self, key, value): + "Set a branch option" + if not key.startswith(ContainerConfig.string['startcommand']): + Trace.error('Invalid branch option ' + key) + return + key = key.replace(ContainerConfig.string['startcommand'], '') + self.options[key] = value + + def isselected(self): + "Return if the branch is selected" + if not 'selected' in self.options: + return False + return self.options['selected'] == '1' + + def __unicode__(self): + "String representation" + return 'options for ' + self.name + ': ' + unicode(self.options) + + + + +import urllib + + + + + + + + +class Cloner(object): + "An object used to clone other objects." + + def clone(cls, original): + "Return an exact copy of an object." + "The original object must have an empty constructor." + return cls.create(original.__class__) + + def create(cls, type): + "Create an object of a given class." + clone = type.__new__(type) + clone.__init__() + return clone + + clone = classmethod(clone) + create = classmethod(create) + +class ContainerExtractor(object): + "A class to extract certain containers." + + def __init__(self, config): + "The config parameter is a map containing three lists: allowed, copied and extracted." + "Each of the three is a list of class names for containers." + "Allowed containers are included as is into the result." + "Cloned containers are cloned and placed into the result." + "Extracted containers are looked into." + "All other containers are silently ignored." + self.allowed = config['allowed'] + self.cloned = config['cloned'] + self.extracted = config['extracted'] + + def extract(self, container): + "Extract a group of selected containers from elyxer.a container." + list = [] + locate = lambda c: c.__class__.__name__ in self.allowed + self.cloned + recursive = lambda c: c.__class__.__name__ in self.extracted + process = lambda c: self.process(c, list) + container.recursivesearch(locate, recursive, process) + return list + + def process(self, container, list): + "Add allowed containers, clone cloned containers and add the clone." + name = container.__class__.__name__ + if name in self.allowed: + list.append(container) + elif name in self.cloned: + list.append(self.safeclone(container)) + else: + Trace.error('Unknown container class ' + name) + + def safeclone(self, container): + "Return a new container with contents only in a safe list, recursively." + clone = Cloner.clone(container) + clone.output = container.output + clone.contents = self.extract(container) + return clone + + + + + + +class Parser(object): + "A generic parser" + + def __init__(self): + self.begin = 0 + self.parameters = dict() + + def parseheader(self, reader): + "Parse the header" + header = reader.currentline().split() + reader.nextline() + self.begin = reader.linenumber + return header + + def parseparameter(self, reader): + "Parse a parameter" + if reader.currentline().strip().startswith('<'): + key, value = self.parsexml(reader) + self.parameters[key] = value + return + split = reader.currentline().strip().split(' ', 1) + reader.nextline() + if len(split) == 0: + return + key = split[0] + if len(split) == 1: + self.parameters[key] = True + return + if not '"' in split[1]: + self.parameters[key] = split[1].strip() + return + doublesplit = split[1].split('"') + self.parameters[key] = doublesplit[1] + + def parsexml(self, reader): + "Parse a parameter in xml form: <param attr1=value...>" + strip = reader.currentline().strip() + reader.nextline() + if not strip.endswith('>'): + Trace.error('XML parameter ' + strip + ' should be <...>') + split = strip[1:-1].split() + if len(split) == 0: + Trace.error('Empty XML parameter <>') + return None, None + key = split[0] + del split[0] + if len(split) == 0: + return key, dict() + attrs = dict() + for attr in split: + if not '=' in attr: + Trace.error('Erroneous attribute for ' + key + ': ' + attr) + attr += '="0"' + parts = attr.split('=') + attrkey = parts[0] + value = parts[1].split('"')[1] + attrs[attrkey] = value + return key, attrs + + def parseending(self, reader, process): + "Parse until the current ending is found" + if not self.ending: + Trace.error('No ending for ' + unicode(self)) + return + while not reader.currentline().startswith(self.ending): + process() + + def parsecontainer(self, reader, contents): + container = self.factory.createcontainer(reader) + if container: + container.parent = self.parent + contents.append(container) + + def __unicode__(self): + "Return a description" + return self.__class__.__name__ + ' (' + unicode(self.begin) + ')' + +class LoneCommand(Parser): + "A parser for just one command line" + + def parse(self,reader): + "Read nothing" + return [] + +class TextParser(Parser): + "A parser for a command and a bit of text" + + stack = [] + + def __init__(self, container): + Parser.__init__(self) + self.ending = None + if container.__class__.__name__ in ContainerConfig.endings: + self.ending = ContainerConfig.endings[container.__class__.__name__] + self.endings = [] + + def parse(self, reader): + "Parse lines as long as they are text" + TextParser.stack.append(self.ending) + self.endings = TextParser.stack + [ContainerConfig.endings['Layout'], + ContainerConfig.endings['Inset'], self.ending] + contents = [] + while not self.isending(reader): + self.parsecontainer(reader, contents) + return contents + + def isending(self, reader): + "Check if text is ending" + current = reader.currentline().split() + if len(current) == 0: + return False + if current[0] in self.endings: + if current[0] in TextParser.stack: + TextParser.stack.remove(current[0]) + else: + TextParser.stack = [] + return True + return False + +class ExcludingParser(Parser): + "A parser that excludes the final line" + + def parse(self, reader): + "Parse everything up to (and excluding) the final line" + contents = [] + self.parseending(reader, lambda: self.parsecontainer(reader, contents)) + return contents + +class BoundedParser(ExcludingParser): + "A parser bound by a final line" + + def parse(self, reader): + "Parse everything, including the final line" + contents = ExcludingParser.parse(self, reader) + # skip last line + reader.nextline() + return contents + +class BoundedDummy(Parser): + "A bound parser that ignores everything" + + def parse(self, reader): + "Parse the contents of the container" + self.parseending(reader, lambda: reader.nextline()) + # skip last line + reader.nextline() + return [] + +class StringParser(Parser): + "Parses just a string" + + def parseheader(self, reader): + "Do nothing, just take note" + self.begin = reader.linenumber + 1 + return [] + + def parse(self, reader): + "Parse a single line" + contents = reader.currentline() + reader.nextline() + return contents + +class InsetParser(BoundedParser): + "Parses a LyX inset" + + def parse(self, reader): + "Parse inset parameters into a dictionary" + startcommand = ContainerConfig.string['startcommand'] + while reader.currentline() != '' and not reader.currentline().startswith(startcommand): + self.parseparameter(reader) + return BoundedParser.parse(self, reader) + + + + + + +class ContainerOutput(object): + "The generic HTML output for a container." + + def gethtml(self, container): + "Show an error." + Trace.error('gethtml() not implemented for ' + unicode(self)) + + def isempty(self): + "Decide if the output is empty: by default, not empty." + return False + +class EmptyOutput(ContainerOutput): + + def gethtml(self, container): + "Return empty HTML code." + return [] + + def isempty(self): + "This output is particularly empty." + return True + +class FixedOutput(ContainerOutput): + "Fixed output" + + def gethtml(self, container): + "Return constant HTML code" + return container.html + +class ContentsOutput(ContainerOutput): + "Outputs the contents converted to HTML" + + def gethtml(self, container): + "Return the HTML code" + html = [] + if container.contents == None: + return html + for element in container.contents: + if not hasattr(element, 'gethtml'): + Trace.error('No html in ' + element.__class__.__name__ + ': ' + unicode(element)) + return html + html += element.gethtml() + return html + +class TaggedOutput(ContentsOutput): + "Outputs an HTML tag surrounding the contents." + + tag = None + breaklines = False + empty = False + + def settag(self, tag, breaklines=False, empty=False): + "Set the value for the tag and other attributes." + self.tag = tag + if breaklines: + self.breaklines = breaklines + if empty: + self.empty = empty + return self + + def setbreaklines(self, breaklines): + "Set the value for breaklines." + self.breaklines = breaklines + return self + + def gethtml(self, container): + "Return the HTML code." + if self.empty: + return [self.selfclosing(container)] + html = [self.open(container)] + html += ContentsOutput.gethtml(self, container) + html.append(self.close(container)) + return html + + def open(self, container): + "Get opening line." + if not self.checktag(): + return '' + open = '<' + self.tag + '>' + if self.breaklines: + return open + '\n' + return open + + def close(self, container): + "Get closing line." + if not self.checktag(): + return '' + close = '</' + self.tag.split()[0] + '>' + if self.breaklines: + return '\n' + close + '\n' + return close + + def selfclosing(self, container): + "Get self-closing line." + if not self.checktag(): + return '' + selfclosing = '<' + self.tag + '/>' + if self.breaklines: + return selfclosing + '\n' + return selfclosing + + def checktag(self): + "Check that the tag is valid." + if not self.tag: + Trace.error('No tag in ' + unicode(container)) + return False + if self.tag == '': + return False + return True + +class FilteredOutput(ContentsOutput): + "Returns the output in the contents, but filtered:" + "some strings are replaced by others." + + def __init__(self): + "Initialize the filters." + self.filters = [] + + def addfilter(self, original, replacement): + "Add a new filter: replace the original by the replacement." + self.filters.append((original, replacement)) + + def gethtml(self, container): + "Return the HTML code" + result = [] + html = ContentsOutput.gethtml(self, container) + for line in html: + result.append(self.filter(line)) + return result + + def filter(self, line): + "Filter a single line with all available filters." + for original, replacement in self.filters: + if original in line: + line = line.replace(original, replacement) + return line + +class StringOutput(ContainerOutput): + "Returns a bare string as output" + + def gethtml(self, container): + "Return a bare string" + return [container.string] + + + + + + + +import sys +import codecs + + +class LineReader(object): + "Reads a file line by line" + + def __init__(self, filename): + if isinstance(filename, file): + self.file = filename + else: + self.file = codecs.open(filename, 'rU', 'utf-8') + self.linenumber = 1 + self.lastline = None + self.current = None + self.mustread = True + self.depleted = False + try: + self.readline() + except UnicodeDecodeError: + # try compressed file + import gzip + self.file = gzip.open(filename, 'rb') + self.readline() + + def setstart(self, firstline): + "Set the first line to read." + for i in range(firstline): + self.file.readline() + self.linenumber = firstline + + def setend(self, lastline): + "Set the last line to read." + self.lastline = lastline + + def currentline(self): + "Get the current line" + if self.mustread: + self.readline() + return self.current + + def nextline(self): + "Go to next line" + if self.depleted: + Trace.fatal('Read beyond file end') + self.mustread = True + + def readline(self): + "Read a line from elyxer.file" + self.current = self.file.readline() + if not isinstance(self.file, codecs.StreamReaderWriter): + self.current = self.current.decode('utf-8') + if len(self.current) == 0: + self.depleted = True + self.current = self.current.rstrip('\n\r') + self.linenumber += 1 + self.mustread = False + Trace.prefix = 'Line ' + unicode(self.linenumber) + ': ' + if self.linenumber % 1000 == 0: + Trace.message('Parsing') + + def finished(self): + "Find out if the file is finished" + if self.lastline and self.linenumber == self.lastline: + return True + if self.mustread: + self.readline() + return self.depleted + + def close(self): + self.file.close() + +class LineWriter(object): + "Writes a file as a series of lists" + + file = False + + def __init__(self, filename): + if isinstance(filename, file): + self.file = filename + self.filename = None + else: + self.filename = filename + + def write(self, strings): + "Write a list of strings" + for string in strings: + if not isinstance(string, basestring): + Trace.error('Not a string: ' + unicode(string) + ' in ' + unicode(strings)) + return + self.writestring(string) + + def writestring(self, string): + "Write a string" + if not self.file: + self.file = codecs.open(self.filename, 'w', "utf-8") + if self.file == sys.stdout and sys.version_info < (3,0): + string = string.encode('utf-8') + self.file.write(string) + + def writeline(self, line): + "Write a line to file" + self.writestring(line + '\n') + + def close(self): + self.file.close() + + + + + + +class Globable(object): + """A bit of text which can be globbed (lumped together in bits). + Methods current(), skipcurrent(), checkfor() and isout() have to be + implemented by subclasses.""" + + leavepending = False + + def __init__(self): + self.endinglist = EndingList() + + def checkbytemark(self): + "Check for a Unicode byte mark and skip it." + if self.finished(): + return + if ord(self.current()) == 0xfeff: + self.skipcurrent() + + def isout(self): + "Find out if we are out of the position yet." + Trace.error('Unimplemented isout()') + return True + + def current(self): + "Return the current character." + Trace.error('Unimplemented current()') + return '' + + def checkfor(self, string): + "Check for the given string in the current position." + Trace.error('Unimplemented checkfor()') + return False + + def finished(self): + "Find out if the current text has finished." + if self.isout(): + if not self.leavepending: + self.endinglist.checkpending() + return True + return self.endinglist.checkin(self) + + def skipcurrent(self): + "Return the current character and skip it." + Trace.error('Unimplemented skipcurrent()') + return '' + + def glob(self, currentcheck): + "Glob a bit of text that satisfies a check on the current char." + glob = '' + while not self.finished() and currentcheck(): + glob += self.skipcurrent() + return glob + + def globalpha(self): + "Glob a bit of alpha text" + return self.glob(lambda: self.current().isalpha()) + + def globnumber(self): + "Glob a row of digits." + return self.glob(lambda: self.current().isdigit()) + + def isidentifier(self): + "Return if the current character is alphanumeric or _." + if self.current().isalnum() or self.current() == '_': + return True + return False + + def globidentifier(self): + "Glob alphanumeric and _ symbols." + return self.glob(self.isidentifier) + + def isvalue(self): + "Return if the current character is a value character:" + "not a bracket or a space." + if self.current().isspace(): + return False + if self.current() in '{}()': + return False + return True + + def globvalue(self): + "Glob a value: any symbols but brackets." + return self.glob(self.isvalue) + + def skipspace(self): + "Skip all whitespace at current position." + return self.glob(lambda: self.current().isspace()) + + def globincluding(self, magicchar): + "Glob a bit of text up to (including) the magic char." + glob = self.glob(lambda: self.current() != magicchar) + magicchar + self.skip(magicchar) + return glob + + def globexcluding(self, excluded): + "Glob a bit of text up until (excluding) any excluded character." + return self.glob(lambda: self.current() not in excluded) + + def pushending(self, ending, optional = False): + "Push a new ending to the bottom" + self.endinglist.add(ending, optional) + + def popending(self, expected = None): + "Pop the ending found at the current position" + if self.isout() and self.leavepending: + return expected + ending = self.endinglist.pop(self) + if expected and expected != ending: + Trace.error('Expected ending ' + expected + ', got ' + ending) + self.skip(ending) + return ending + + def nextending(self): + "Return the next ending in the queue." + nextending = self.endinglist.findending(self) + if not nextending: + return None + return nextending.ending + +class EndingList(object): + "A list of position endings" + + def __init__(self): + self.endings = [] + + def add(self, ending, optional = False): + "Add a new ending to the list" + self.endings.append(PositionEnding(ending, optional)) + + def pickpending(self, pos): + "Pick any pending endings from a parse position." + self.endings += pos.endinglist.endings + + def checkin(self, pos): + "Search for an ending" + if self.findending(pos): + return True + return False + + def pop(self, pos): + "Remove the ending at the current position" + if pos.isout(): + Trace.error('No ending out of bounds') + return '' + ending = self.findending(pos) + if not ending: + Trace.error('No ending at ' + pos.current()) + return '' + for each in reversed(self.endings): + self.endings.remove(each) + if each == ending: + return each.ending + elif not each.optional: + Trace.error('Removed non-optional ending ' + each) + Trace.error('No endings left') + return '' + + def findending(self, pos): + "Find the ending at the current position" + if len(self.endings) == 0: + return None + for index, ending in enumerate(reversed(self.endings)): + if ending.checkin(pos): + return ending + if not ending.optional: + return None + return None + + def checkpending(self): + "Check if there are any pending endings" + if len(self.endings) != 0: + Trace.error('Pending ' + unicode(self) + ' left open') + + def __unicode__(self): + "Printable representation" + string = 'endings [' + for ending in self.endings: + string += unicode(ending) + ',' + if len(self.endings) > 0: + string = string[:-1] + return string + ']' + +class PositionEnding(object): + "An ending for a parsing position" + + def __init__(self, ending, optional): + self.ending = ending + self.optional = optional + + def checkin(self, pos): + "Check for the ending" + return pos.checkfor(self.ending) + + def __unicode__(self): + "Printable representation" + string = 'Ending ' + self.ending + if self.optional: + string += ' (optional)' + return string + + + +class Position(Globable): + """A position in a text to parse. + Including those in Globable, functions to implement by subclasses are: + skip(), identifier(), extract(), isout() and current().""" + + def __init__(self): + Globable.__init__(self) + + def skip(self, string): + "Skip a string" + Trace.error('Unimplemented skip()') + + def identifier(self): + "Return an identifier for the current position." + Trace.error('Unimplemented identifier()') + return 'Error' + + def extract(self, length): + "Extract the next string of the given length, or None if not enough text," + "without advancing the parse position." + Trace.error('Unimplemented extract()') + return None + + def checkfor(self, string): + "Check for a string at the given position." + return string == self.extract(len(string)) + + def checkforlower(self, string): + "Check for a string in lower case." + extracted = self.extract(len(string)) + if not extracted: + return False + return string.lower() == self.extract(len(string)).lower() + + def skipcurrent(self): + "Return the current character and skip it." + current = self.current() + self.skip(current) + return current + + def next(self): + "Advance the position and return the next character." + self.skipcurrent() + return self.current() + + def checkskip(self, string): + "Check for a string at the given position; if there, skip it" + if not self.checkfor(string): + return False + self.skip(string) + return True + + def error(self, message): + "Show an error message and the position identifier." + Trace.error(message + ': ' + self.identifier()) + +class TextPosition(Position): + "A parse position based on a raw text." + + def __init__(self, text): + "Create the position from elyxer.some text." + Position.__init__(self) + self.pos = 0 + self.text = text + self.checkbytemark() + + def skip(self, string): + "Skip a string of characters." + self.pos += len(string) + + def identifier(self): + "Return a sample of the remaining text." + length = 30 + if self.pos + length > len(self.text): + length = len(self.text) - self.pos + return '*' + self.text[self.pos:self.pos + length] + '*' + + def isout(self): + "Find out if we are out of the text yet." + return self.pos >= len(self.text) + + def current(self): + "Return the current character, assuming we are not out." + return self.text[self.pos] + + def extract(self, length): + "Extract the next string of the given length, or None if not enough text." + if self.pos + length > len(self.text): + return None + return self.text[self.pos : self.pos + length] + +class FilePosition(Position): + "A parse position based on an underlying file." + + def __init__(self, filename): + "Create the position from a file." + Position.__init__(self) + self.reader = LineReader(filename) + self.pos = 0 + self.checkbytemark() + + def skip(self, string): + "Skip a string of characters." + length = len(string) + while self.pos + length > len(self.reader.currentline()): + length -= len(self.reader.currentline()) - self.pos + 1 + self.nextline() + self.pos += length + + def currentline(self): + "Get the current line of the underlying file." + return self.reader.currentline() + + def nextline(self): + "Go to the next line." + self.reader.nextline() + self.pos = 0 + + def linenumber(self): + "Return the line number of the file." + return self.reader.linenumber + 1 + + def identifier(self): + "Return the current line and line number in the file." + before = self.reader.currentline()[:self.pos - 1] + after = self.reader.currentline()[self.pos:] + return 'line ' + unicode(self.getlinenumber()) + ': ' + before + '*' + after + + def isout(self): + "Find out if we are out of the text yet." + if self.pos > len(self.reader.currentline()): + if self.pos > len(self.reader.currentline()) + 1: + Trace.error('Out of the line ' + self.reader.currentline() + ': ' + unicode(self.pos)) + self.nextline() + return self.reader.finished() + + def current(self): + "Return the current character, assuming we are not out." + if self.pos == len(self.reader.currentline()): + return '\n' + if self.pos > len(self.reader.currentline()): + Trace.error('Out of the line ' + self.reader.currentline() + ': ' + unicode(self.pos)) + return '*' + return self.reader.currentline()[self.pos] + + def extract(self, length): + "Extract the next string of the given length, or None if not enough text." + if self.pos + length > len(self.reader.currentline()): + return None + return self.reader.currentline()[self.pos : self.pos + length] + + + +class Container(object): + "A container for text and objects in a lyx file" + + partkey = None + parent = None + begin = None + + def __init__(self): + self.contents = list() + + def process(self): + "Process contents" + pass + + def gethtml(self): + "Get the resulting HTML" + html = self.output.gethtml(self) + if isinstance(html, basestring): + Trace.error('Raw string ' + html) + html = [html] + return self.escapeall(html) + + def escapeall(self, lines): + "Escape all lines in an array according to the output options." + result = [] + for line in lines: + if Options.html: + line = self.escape(line, EscapeConfig.html) + if Options.iso885915: + line = self.escape(line, EscapeConfig.iso885915) + line = self.escapeentities(line) + elif not Options.unicode: + line = self.escape(line, EscapeConfig.nonunicode) + result.append(line) + return result + + def escape(self, line, replacements = EscapeConfig.entities): + "Escape a line with replacements from elyxer.a map" + pieces = replacements.keys() + # do them in order + pieces.sort() + for piece in pieces: + if piece in line: + line = line.replace(piece, replacements[piece]) + return line + + def escapeentities(self, line): + "Escape all Unicode characters to HTML entities." + result = '' + pos = TextPosition(line) + while not pos.finished(): + if ord(pos.current()) > 128: + codepoint = hex(ord(pos.current())) + if codepoint == '0xd835': + codepoint = hex(ord(pos.next()) + 0xf800) + result += '' + codepoint[1:] + ';' + else: + result += pos.current() + pos.skipcurrent() + return result + + def searchall(self, type): + "Search for all embedded containers of a given type" + list = [] + self.searchprocess(type, lambda container: list.append(container)) + return list + + def searchremove(self, type): + "Search for all containers of a type and remove them" + list = self.searchall(type) + for container in list: + container.parent.contents.remove(container) + return list + + def searchprocess(self, type, process): + "Search for elements of a given type and process them" + self.locateprocess(lambda container: isinstance(container, type), process) + + def locateprocess(self, locate, process): + "Search for all embedded containers and process them" + for container in self.contents: + container.locateprocess(locate, process) + if locate(container): + process(container) + + def recursivesearch(self, locate, recursive, process): + "Perform a recursive search in the container." + for container in self.contents: + if recursive(container): + container.recursivesearch(locate, recursive, process) + if locate(container): + process(container) + + def extracttext(self): + "Extract all text from elyxer.allowed containers." + result = '' + constants = ContainerExtractor(ContainerConfig.extracttext).extract(self) + for constant in constants: + result += constant.string + return result + + def group(self, index, group, isingroup): + "Group some adjoining elements into a group" + if index >= len(self.contents): + return + if hasattr(self.contents[index], 'grouped'): + return + while index < len(self.contents) and isingroup(self.contents[index]): + self.contents[index].grouped = True + group.contents.append(self.contents[index]) + self.contents.pop(index) + self.contents.insert(index, group) + + def remove(self, index): + "Remove a container but leave its contents" + container = self.contents[index] + self.contents.pop(index) + while len(container.contents) > 0: + self.contents.insert(index, container.contents.pop()) + + def tree(self, level = 0): + "Show in a tree" + Trace.debug(" " * level + unicode(self)) + for container in self.contents: + container.tree(level + 1) + + def getparameter(self, name): + "Get the value of a parameter, if present." + if not name in self.parameters: + return None + return self.parameters[name] + + def getparameterlist(self, name): + "Get the value of a comma-separated parameter as a list." + paramtext = self.getparameter(name) + if not paramtext: + return [] + return paramtext.split(',') + + def hasemptyoutput(self): + "Check if the parent's output is empty." + current = self.parent + while current: + if current.output.isempty(): + return True + current = current.parent + return False + + def __unicode__(self): + "Get a description" + if not self.begin: + return self.__class__.__name__ + return self.__class__.__name__ + '@' + unicode(self.begin) + +class BlackBox(Container): + "A container that does not output anything" + + def __init__(self): + self.parser = LoneCommand() + self.output = EmptyOutput() + self.contents = [] + +class LyXFormat(BlackBox): + "Read the lyxformat command" + + def process(self): + "Show warning if version < 276" + version = int(self.header[1]) + if version < 276: + Trace.error('Warning: unsupported old format version ' + str(version)) + if version > int(GeneralConfig.version['lyxformat']): + Trace.error('Warning: unsupported new format version ' + str(version)) + +class StringContainer(Container): + "A container for a single string" + + parsed = None + + def __init__(self): + self.parser = StringParser() + self.output = StringOutput() + self.string = '' + + def process(self): + "Replace special chars from elyxer.the contents." + if self.parsed: + self.string = self.replacespecial(self.parsed) + self.parsed = None + + def replacespecial(self, line): + "Replace all special chars from elyxer.a line" + replaced = self.escape(line, EscapeConfig.entities) + replaced = self.changeline(replaced) + if ContainerConfig.string['startcommand'] in replaced and len(replaced) > 1: + # unprocessed commands + if self.begin: + message = 'Unknown command at ' + unicode(self.begin) + ': ' + else: + message = 'Unknown command: ' + Trace.error(message + replaced.strip()) + return replaced + + def changeline(self, line): + line = self.escape(line, EscapeConfig.chars) + if not ContainerConfig.string['startcommand'] in line: + return line + line = self.escape(line, EscapeConfig.commands) + return line + + def extracttext(self): + "Return all text." + return self.string + + def __unicode__(self): + "Return a printable representation." + result = 'StringContainer' + if self.begin: + result += '@' + unicode(self.begin) + ellipsis = '...' + if len(self.string.strip()) <= 15: + ellipsis = '' + return result + ' (' + self.string.strip()[:15] + ellipsis + ')' + +class Constant(StringContainer): + "A constant string" + + def __init__(self, text): + self.contents = [] + self.string = text + self.output = StringOutput() + + def __unicode__(self): + return 'Constant: ' + self.string + +class TaggedText(Container): + "Text inside a tag" + + output = None + + def __init__(self): + self.parser = TextParser(self) + self.output = TaggedOutput() + + def complete(self, contents, tag, breaklines=False): + "Complete the tagged text and return it" + self.contents = contents + self.output.tag = tag + self.output.breaklines = breaklines + return self + + def constant(self, text, tag, breaklines=False): + "Complete the tagged text with a constant" + constant = Constant(text) + return self.complete([constant], tag, breaklines) + + def __unicode__(self): + "Return a printable representation." + if not hasattr(self.output, 'tag'): + return 'Emtpy tagged text' + if not self.output.tag: + return 'Tagged <unknown tag>' + return 'Tagged <' + self.output.tag + '>' + + + + + + +class DocumentParameters(object): + "Global parameters for the document." + + pdftitle = None + indentstandard = False + tocdepth = 10 + startinglevel = 0 + maxdepth = 10 + language = None + bibliography = None + outputchanges = False + displaymode = False + + + + + + +class FormulaParser(Parser): + "Parses a formula" + + def parseheader(self, reader): + "See if the formula is inlined" + self.begin = reader.linenumber + 1 + type = self.parsetype(reader) + if not type: + reader.nextline() + type = self.parsetype(reader) + if not type: + Trace.error('Unknown formula type in ' + reader.currentline().strip()) + return ['unknown'] + return [type] + + def parsetype(self, reader): + "Get the formula type from the first line." + if reader.currentline().find(FormulaConfig.starts['simple']) >= 0: + return 'inline' + if reader.currentline().find(FormulaConfig.starts['complex']) >= 0: + return 'block' + if reader.currentline().find(FormulaConfig.starts['unnumbered']) >= 0: + return 'block' + if reader.currentline().find(FormulaConfig.starts['beginbefore']) >= 0: + return 'numbered' + return None + + def parse(self, reader): + "Parse the formula until the end" + formula = self.parseformula(reader) + while not reader.currentline().startswith(self.ending): + stripped = reader.currentline().strip() + if len(stripped) > 0: + Trace.error('Unparsed formula line ' + stripped) + reader.nextline() + reader.nextline() + return formula + + def parseformula(self, reader): + "Parse the formula contents" + simple = FormulaConfig.starts['simple'] + if simple in reader.currentline(): + rest = reader.currentline().split(simple, 1)[1] + if simple in rest: + # formula is $...$ + return self.parsesingleliner(reader, simple, simple) + # formula is multiline $...$ + return self.parsemultiliner(reader, simple, simple) + if FormulaConfig.starts['complex'] in reader.currentline(): + # formula of the form \[...\] + return self.parsemultiliner(reader, FormulaConfig.starts['complex'], + FormulaConfig.endings['complex']) + beginbefore = FormulaConfig.starts['beginbefore'] + beginafter = FormulaConfig.starts['beginafter'] + if beginbefore in reader.currentline(): + if reader.currentline().strip().endswith(beginafter): + current = reader.currentline().strip() + endsplit = current.split(beginbefore)[1].split(beginafter) + startpiece = beginbefore + endsplit[0] + beginafter + endbefore = FormulaConfig.endings['endbefore'] + endafter = FormulaConfig.endings['endafter'] + endpiece = endbefore + endsplit[0] + endafter + return startpiece + self.parsemultiliner(reader, startpiece, endpiece) + endpiece + Trace.error('Missing ' + beginafter + ' in ' + reader.currentline()) + return '' + begincommand = FormulaConfig.starts['command'] + beginbracket = FormulaConfig.starts['bracket'] + if begincommand in reader.currentline() and beginbracket in reader.currentline(): + endbracket = FormulaConfig.endings['bracket'] + return self.parsemultiliner(reader, beginbracket, endbracket) + Trace.error('Formula beginning ' + reader.currentline() + ' is unknown') + return '' + + def parsesingleliner(self, reader, start, ending): + "Parse a formula in one line" + line = reader.currentline().strip() + if not start in line: + Trace.error('Line ' + line + ' does not contain formula start ' + start) + return '' + if not line.endswith(ending): + Trace.error('Formula ' + line + ' does not end with ' + ending) + return '' + index = line.index(start) + rest = line[index + len(start):-len(ending)] + reader.nextline() + return rest + + def parsemultiliner(self, reader, start, ending): + "Parse a formula in multiple lines" + formula = '' + line = reader.currentline() + if not start in line: + Trace.error('Line ' + line.strip() + ' does not contain formula start ' + start) + return '' + index = line.index(start) + line = line[index + len(start):].strip() + while not line.endswith(ending): + formula += line + '\n' + reader.nextline() + line = reader.currentline() + formula += line[:-len(ending)] + reader.nextline() + return formula + +class MacroParser(FormulaParser): + "A parser for a formula macro." + + def parseheader(self, reader): + "See if the formula is inlined" + self.begin = reader.linenumber + 1 + return ['inline'] + + def parse(self, reader): + "Parse the formula until the end" + formula = self.parsemultiliner(reader, self.parent.start, self.ending) + reader.nextline() + return formula + + + + + + + + + +class FormulaBit(Container): + "A bit of a formula" + + type = None + size = 1 + original = '' + + def __init__(self): + "The formula bit type can be 'alpha', 'number', 'font'." + self.contents = [] + self.output = ContentsOutput() + + def setfactory(self, factory): + "Set the internal formula factory." + self.factory = factory + return self + + def add(self, bit): + "Add any kind of formula bit already processed" + self.contents.append(bit) + self.original += bit.original + bit.parent = self + + def skiporiginal(self, string, pos): + "Skip a string and add it to the original formula" + self.original += string + if not pos.checkskip(string): + Trace.error('String ' + string + ' not at ' + pos.identifier()) + + def computesize(self): + "Compute the size of the bit as the max of the sizes of all contents." + if len(self.contents) == 0: + return 1 + self.size = max([element.size for element in self.contents]) + return self.size + + def clone(self): + "Return a copy of itself." + return self.factory.parseformula(self.original) + + def __unicode__(self): + "Get a string representation" + return self.__class__.__name__ + ' read in ' + self.original + +class TaggedBit(FormulaBit): + "A tagged string in a formula" + + def constant(self, constant, tag): + "Set the constant and the tag" + self.output = TaggedOutput().settag(tag) + self.add(FormulaConstant(constant)) + return self + + def complete(self, contents, tag, breaklines = False): + "Set the constant and the tag" + self.contents = contents + self.output = TaggedOutput().settag(tag, breaklines) + return self + + def selfcomplete(self, tag): + "Set the self-closing tag, no contents (as in <hr/>)." + self.output = TaggedOutput().settag(tag, empty = True) + return self + +class FormulaConstant(Constant): + "A constant string in a formula" + + def __init__(self, string): + "Set the constant string" + Constant.__init__(self, string) + self.original = string + self.size = 1 + self.type = None + + def computesize(self): + "Compute the size of the constant: always 1." + return self.size + + def clone(self): + "Return a copy of itself." + return FormulaConstant(self.original) + + def __unicode__(self): + "Return a printable representation." + return 'Formula constant: ' + self.string + +class RawText(FormulaBit): + "A bit of text inside a formula" + + def detect(self, pos): + "Detect a bit of raw text" + return pos.current().isalpha() + + def parsebit(self, pos): + "Parse alphabetic text" + alpha = pos.globalpha() + self.add(FormulaConstant(alpha)) + self.type = 'alpha' + +class FormulaSymbol(FormulaBit): + "A symbol inside a formula" + + modified = FormulaConfig.modified + unmodified = FormulaConfig.unmodified['characters'] + + def detect(self, pos): + "Detect a symbol" + if pos.current() in FormulaSymbol.unmodified: + return True + if pos.current() in FormulaSymbol.modified: + return True + return False + + def parsebit(self, pos): + "Parse the symbol" + if pos.current() in FormulaSymbol.unmodified: + self.addsymbol(pos.current(), pos) + return + if pos.current() in FormulaSymbol.modified: + self.addsymbol(FormulaSymbol.modified[pos.current()], pos) + return + Trace.error('Symbol ' + pos.current() + ' not found') + + def addsymbol(self, symbol, pos): + "Add a symbol" + self.skiporiginal(pos.current(), pos) + self.contents.append(FormulaConstant(symbol)) + +class FormulaNumber(FormulaBit): + "A string of digits in a formula" + + def detect(self, pos): + "Detect a digit" + return pos.current().isdigit() + + def parsebit(self, pos): + "Parse a bunch of digits" + digits = pos.glob(lambda: pos.current().isdigit()) + self.add(FormulaConstant(digits)) + self.type = 'number' + +class Comment(FormulaBit): + "A LaTeX comment: % to the end of the line." + + start = FormulaConfig.starts['comment'] + + def detect(self, pos): + "Detect the %." + return pos.current() == self.start + + def parsebit(self, pos): + "Parse to the end of the line." + self.original += pos.globincluding('\n') + +class WhiteSpace(FormulaBit): + "Some white space inside a formula." + + def detect(self, pos): + "Detect the white space." + return pos.current().isspace() + + def parsebit(self, pos): + "Parse all whitespace." + self.original += pos.skipspace() + + def __unicode__(self): + "Return a printable representation." + return 'Whitespace: *' + self.original + '*' + +class Bracket(FormulaBit): + "A {} bracket inside a formula" + + start = FormulaConfig.starts['bracket'] + ending = FormulaConfig.endings['bracket'] + + def __init__(self): + "Create a (possibly literal) new bracket" + FormulaBit.__init__(self) + self.inner = None + + def detect(self, pos): + "Detect the start of a bracket" + return pos.checkfor(self.start) + + def parsebit(self, pos): + "Parse the bracket" + self.parsecomplete(pos, self.innerformula) + return self + + def parsetext(self, pos): + "Parse a text bracket" + self.parsecomplete(pos, self.innertext) + return self + + def parseliteral(self, pos): + "Parse a literal bracket" + self.parsecomplete(pos, self.innerliteral) + return self + + def parsecomplete(self, pos, innerparser): + "Parse the start and end marks" + if not pos.checkfor(self.start): + Trace.error('Bracket should start with ' + self.start + ' at ' + pos.identifier()) + return None + self.skiporiginal(self.start, pos) + pos.pushending(self.ending) + innerparser(pos) + self.original += pos.popending(self.ending) + self.computesize() + + def innerformula(self, pos): + "Parse a whole formula inside the bracket" + while not pos.finished(): + self.add(self.factory.parseany(pos)) + + def innertext(self, pos): + "Parse some text inside the bracket, following textual rules." + specialchars = FormulaConfig.symbolfunctions.keys() + specialchars.append(FormulaConfig.starts['command']) + specialchars.append(FormulaConfig.starts['bracket']) + specialchars.append(Comment.start) + while not pos.finished(): + if pos.current() in specialchars: + self.add(self.factory.parseany(pos)) + if pos.checkskip(' '): + self.original += ' ' + else: + self.add(FormulaConstant(pos.skipcurrent())) + + def innerliteral(self, pos): + "Parse a literal inside the bracket, which does not generate HTML." + self.literal = '' + while not pos.finished() and not pos.current() == self.ending: + if pos.current() == self.start: + self.parseliteral(pos) + else: + self.literal += pos.skipcurrent() + self.original += self.literal + +class SquareBracket(Bracket): + "A [] bracket inside a formula" + + start = FormulaConfig.starts['squarebracket'] + ending = FormulaConfig.endings['squarebracket'] + + def clone(self): + "Return a new square bracket with the same contents." + bracket = SquareBracket() + bracket.contents = self.contents + return bracket + + + +class MathsProcessor(object): + "A processor for a maths construction inside the FormulaProcessor." + + def process(self, contents, index): + "Process an element inside a formula." + Trace.error('Unimplemented process() in ' + unicode(self)) + + def __unicode__(self): + "Return a printable description." + return 'Maths processor ' + self.__class__.__name__ + +class FormulaProcessor(object): + "A processor specifically for formulas." + + processors = [] + + def process(self, bit): + "Process the contents of every formula bit, recursively." + self.processcontents(bit) + self.processinsides(bit) + self.traversewhole(bit) + + def processcontents(self, bit): + "Process the contents of a formula bit." + if not isinstance(bit, FormulaBit): + return + bit.process() + for element in bit.contents: + self.processcontents(element) + + def processinsides(self, bit): + "Process the insides (limits, brackets) in a formula bit." + if not isinstance(bit, FormulaBit): + return + for index, element in enumerate(bit.contents): + for processor in self.processors: + processor.process(bit.contents, index) + # continue with recursive processing + self.processinsides(element) + + def traversewhole(self, formula): + "Traverse over the contents to alter variables and space units." + last = None + for bit, contents in self.traverse(formula): + if bit.type == 'alpha': + self.italicize(bit, contents) + elif bit.type == 'font' and last and last.type == 'number': + bit.contents.insert(0, FormulaConstant(u' ')) + last = bit + + def traverse(self, bit): + "Traverse a formula and yield a flattened structure of (bit, list) pairs." + for element in bit.contents: + if hasattr(element, 'type') and element.type: + yield (element, bit.contents) + elif isinstance(element, FormulaBit): + for pair in self.traverse(element): + yield pair + + def italicize(self, bit, contents): + "Italicize the given bit of text." + index = contents.index(bit) + contents[index] = TaggedBit().complete([bit], 'i') + + + + +class Formula(Container): + "A LaTeX formula" + + def __init__(self): + self.parser = FormulaParser() + self.output = TaggedOutput().settag('span class="formula"') + + def process(self): + "Convert the formula to tags" + if self.header[0] == 'inline': + DocumentParameters.displaymode = False + else: + DocumentParameters.displaymode = True + self.output.settag('div class="formula"', True) + if Options.jsmath: + self.jsmath() + elif Options.mathjax: + self.mathjax() + elif Options.googlecharts: + self.googlecharts() + else: + self.classic() + + def jsmath(self): + "Make the contents for jsMath." + if self.header[0] != 'inline': + self.output = TaggedOutput().settag('div class="math"') + else: + self.output = TaggedOutput().settag('span class="math"') + self.contents = [Constant(self.parsed)] + + def mathjax(self): + "Make the contents for MathJax." + self.output.tag = 'span class="MathJax_Preview"' + tag = 'script type="math/tex' + if self.header[0] != 'inline': + tag += ';mode=display' + self.contents = [TaggedText().constant(self.parsed, tag + '"', True)] + + def googlecharts(self): + "Make the contents using Google Charts http://code.google.com/apis/chart/." + url = FormulaConfig.urls['googlecharts'] + urllib.quote_plus(self.parsed) + img = '<img class="chart" src="' + url + '" alt="' + self.parsed + '"/>' + self.contents = [Constant(img)] + + def classic(self): + "Make the contents using classic output generation with XHTML and CSS." + whole = FormulaFactory().parseformula(self.parsed) + FormulaProcessor().process(whole) + whole.parent = self + self.contents = [whole] + + def parse(self, pos): + "Parse using a parse position instead of self.parser." + if pos.checkskip('$$'): + self.parsedollarblock(pos) + elif pos.checkskip('$'): + self.parsedollarinline(pos) + elif pos.checkskip('\\('): + self.parseinlineto(pos, '\\)') + elif pos.checkskip('\\['): + self.parseblockto(pos, '\\]') + else: + pos.error('Unparseable formula') + self.process() + return self + + def parsedollarinline(self, pos): + "Parse a $...$ formula." + self.header = ['inline'] + self.parsedollar(pos) + + def parsedollarblock(self, pos): + "Parse a $$...$$ formula." + self.header = ['block'] + self.parsedollar(pos) + if not pos.checkskip('$'): + pos.error('Formula should be $$...$$, but last $ is missing.') + + def parsedollar(self, pos): + "Parse to the next $." + pos.pushending('$') + self.parsed = pos.globexcluding('$') + pos.popending('$') + + def parseinlineto(self, pos, limit): + "Parse a \\(...\\) formula." + self.header = ['inline'] + self.parseupto(pos, limit) + + def parseblockto(self, pos, limit): + "Parse a \\[...\\] formula." + self.header = ['block'] + self.parseupto(pos, limit) + + def parseupto(self, pos, limit): + "Parse a formula that ends with the given command." + pos.pushending(limit) + self.parsed = pos.glob(lambda: True) + pos.popending(limit) + + def __unicode__(self): + "Return a printable representation." + if self.partkey and self.partkey.number: + return 'Formula (' + self.partkey.number + ')' + return 'Unnumbered formula' + +class WholeFormula(FormulaBit): + "Parse a whole formula" + + def detect(self, pos): + "Not outside the formula is enough." + return not pos.finished() + + def parsebit(self, pos): + "Parse with any formula bit" + while not pos.finished(): + self.add(self.factory.parseany(pos)) + +class FormulaFactory(object): + "Construct bits of formula" + + # bit types will be appended later + types = [FormulaSymbol, RawText, FormulaNumber, Bracket, Comment, WhiteSpace] + skippedtypes = [Comment, WhiteSpace] + defining = False + + def __init__(self): + "Initialize the map of instances." + self.instances = dict() + + def detecttype(self, type, pos): + "Detect a bit of a given type." + if pos.finished(): + return False + return self.instance(type).detect(pos) + + def instance(self, type): + "Get an instance of the given type." + if not type in self.instances or not self.instances[type]: + self.instances[type] = self.create(type) + return self.instances[type] + + def create(self, type): + "Create a new formula bit of the given type." + return Cloner.create(type).setfactory(self) + + def clearskipped(self, pos): + "Clear any skipped types." + while not pos.finished(): + if not self.skipany(pos): + return + return + + def skipany(self, pos): + "Skip any skipped types." + for type in self.skippedtypes: + if self.instance(type).detect(pos): + return self.parsetype(type, pos) + return None + + def parseany(self, pos): + "Parse any formula bit at the current location." + for type in self.types + self.skippedtypes: + if self.detecttype(type, pos): + return self.parsetype(type, pos) + Trace.error('Unrecognized formula at ' + pos.identifier()) + return FormulaConstant(pos.skipcurrent()) + + def parsetype(self, type, pos): + "Parse the given type and return it." + bit = self.instance(type) + self.instances[type] = None + returnedbit = bit.parsebit(pos) + if returnedbit: + return returnedbit.setfactory(self) + return bit + + def parseformula(self, formula): + "Parse a string of text that contains a whole formula." + pos = TextPosition(formula) + whole = self.create(WholeFormula) + if whole.detect(pos): + whole.parsebit(pos) + return whole + # no formula found + if not pos.finished(): + Trace.error('Unknown formula at: ' + pos.identifier()) + whole.add(TaggedBit().constant(formula, 'span class="unknown"')) + return whole + + + + +import unicodedata + + + + + + + + + + + + +import gettext + + +class Translator(object): + "Reads the configuration file and tries to find a translation." + "Otherwise falls back to the messages in the config file." + + instance = None + + def translate(cls, key): + "Get the translated message for a key." + return cls.instance.getmessage(key) + + translate = classmethod(translate) + + def __init__(self): + self.translation = None + self.first = True + + def findtranslation(self): + "Find the translation for the document language." + self.langcodes = None + if not DocumentParameters.language: + Trace.error('No language in document') + return + if not DocumentParameters.language in TranslationConfig.languages: + Trace.error('Unknown language ' + DocumentParameters.language) + return + if TranslationConfig.languages[DocumentParameters.language] == 'en': + return + langcodes = [TranslationConfig.languages[DocumentParameters.language]] + try: + self.translation = gettext.translation('elyxer', None, langcodes) + except IOError: + Trace.error('No translation for ' + unicode(langcodes)) + + def getmessage(self, key): + "Get the translated message for the given key." + if self.first: + self.findtranslation() + self.first = False + message = self.getuntranslated(key) + if not self.translation: + return message + try: + message = self.translation.ugettext(message) + except IOError: + pass + return message + + def getuntranslated(self, key): + "Get the untranslated message." + if not key in TranslationConfig.constants: + Trace.error('Cannot translate ' + key) + return key + return TranslationConfig.constants[key] + +Translator.instance = Translator() + + + +class NumberCounter(object): + "A counter for numbers (by default)." + "The type can be changed to return letters, roman numbers..." + + name = None + value = None + mode = None + master = None + + letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + symbols = NumberingConfig.sequence['symbols'] + romannumerals = [ + ('M', 1000), ('CM', 900), ('D', 500), ('CD', 400), ('C', 100), + ('XC', 90), ('L', 50), ('XL', 40), ('X', 10), ('IX', 9), ('V', 5), + ('IV', 4), ('I', 1) + ] + + def __init__(self, name): + "Give a name to the counter." + self.name = name + + def setmode(self, mode): + "Set the counter mode. Can be changed at runtime." + self.mode = mode + return self + + def init(self, value): + "Set an initial value." + self.value = value + + def gettext(self): + "Get the next value as a text string." + return unicode(self.value) + + def getletter(self): + "Get the next value as a letter." + return self.getsequence(self.letters) + + def getsymbol(self): + "Get the next value as a symbol." + return self.getsequence(self.symbols) + + def getsequence(self, sequence): + "Get the next value from elyxer.a sequence." + return sequence[(self.value - 1) % len(sequence)] + + def getroman(self): + "Get the next value as a roman number." + result = '' + number = self.value + for numeral, value in self.romannumerals: + if number >= value: + result += numeral * (number / value) + number = number % value + return result + + def getvalue(self): + "Get the current value as configured in the current mode." + if not self.mode or self.mode in ['text', '1']: + return self.gettext() + if self.mode == 'A': + return self.getletter() + if self.mode == 'a': + return self.getletter().lower() + if self.mode == 'I': + return self.getroman() + if self.mode == '*': + return self.getsymbol() + Trace.error('Unknown counter mode ' + self.mode) + return self.gettext() + + def getnext(self): + "Increase the current value and get the next value as configured." + if not self.value: + self.value = 0 + self.value += 1 + return self.getvalue() + + def reset(self): + "Reset the counter." + self.value = 0 + + def __unicode__(self): + "Return a printable representation." + result = 'Counter ' + self.name + if self.mode: + result += ' in mode ' + self.mode + return result + +class DependentCounter(NumberCounter): + "A counter which depends on another one (the master)." + + def setmaster(self, master): + "Set the master counter." + self.master = master + self.last = self.master.getvalue() + return self + + def getnext(self): + "Increase or, if the master counter has changed, restart." + if self.last != self.master.getvalue(): + self.reset() + value = NumberCounter.getnext(self) + self.last = self.master.getvalue() + return value + + def getvalue(self): + "Get the value of the combined counter: master.dependent." + return self.master.getvalue() + '.' + NumberCounter.getvalue(self) + +class NumberGenerator(object): + "A number generator for unique sequences and hierarchical structures. Used in:" + " * ordered part numbers: Chapter 3, Section 5.3." + " * unique part numbers: Footnote 15, Bibliography cite [15]." + " * chaptered part numbers: Figure 3.15, Equation (8.3)." + " * unique roman part numbers: Part I, Book IV." + + chaptered = None + generator = None + + romanlayouts = [x.lower() for x in NumberingConfig.layouts['roman']] + orderedlayouts = [x.lower() for x in NumberingConfig.layouts['ordered']] + + counters = dict() + appendix = None + + def deasterisk(self, type): + "Remove the possible asterisk in a layout type." + return type.replace('*', '') + + def isunique(self, type): + "Find out if the layout type corresponds to a unique part." + return self.isroman(type) + + def isroman(self, type): + "Find out if the layout type should have roman numeration." + return self.deasterisk(type).lower() in self.romanlayouts + + def isinordered(self, type): + "Find out if the layout type corresponds to an (un)ordered part." + return self.deasterisk(type).lower() in self.orderedlayouts + + def isnumbered(self, type): + "Find out if the type for a layout corresponds to a numbered layout." + if '*' in type: + return False + if self.isroman(type): + return True + if not self.isinordered(type): + return False + if self.getlevel(type) > DocumentParameters.maxdepth: + return False + return True + + def isunordered(self, type): + "Find out if the type contains an asterisk, basically." + return '*' in type + + def getlevel(self, type): + "Get the level that corresponds to a layout type." + if self.isunique(type): + return 0 + if not self.isinordered(type): + Trace.error('Unknown layout type ' + type) + return 0 + type = self.deasterisk(type).lower() + level = self.orderedlayouts.index(type) + 1 + return level - DocumentParameters.startinglevel + + def getparttype(self, type): + "Obtain the type for the part: without the asterisk, " + "and switched to Appendix if necessary." + if NumberGenerator.appendix and self.getlevel(type) == 1: + return 'Appendix' + return self.deasterisk(type) + + def generate(self, type): + "Generate a number for a layout type." + "Unique part types such as Part or Book generate roman numbers: Part I." + "Ordered part types return dot-separated tuples: Chapter 5, Subsection 2.3.5." + "Everything else generates unique numbers: Bibliography [1]." + "Each invocation results in a new number." + return self.getcounter(type).getnext() + + def getcounter(self, type): + "Get the counter for the given type." + type = type.lower() + if not type in self.counters: + self.counters[type] = self.create(type) + return self.counters[type] + + def create(self, type): + "Create a counter for the given type." + if self.isnumbered(type) and self.getlevel(type) > 1: + index = self.orderedlayouts.index(type) + above = self.orderedlayouts[index - 1] + master = self.getcounter(above) + return self.createdependent(type, master) + counter = NumberCounter(type) + if self.isroman(type): + counter.setmode('I') + return counter + + def getdependentcounter(self, type, master): + "Get (or create) a counter of the given type that depends on another." + if not type in self.counters or not self.counters[type].master: + self.counters[type] = self.createdependent(type, master) + return self.counters[type] + + def createdependent(self, type, master): + "Create a dependent counter given the master." + return DependentCounter(type).setmaster(master) + + def startappendix(self): + "Start appendices here." + firsttype = self.orderedlayouts[DocumentParameters.startinglevel] + counter = self.getcounter(firsttype) + counter.setmode('A').reset() + NumberGenerator.appendix = True + +class ChapteredGenerator(NumberGenerator): + "Generate chaptered numbers, as in Chapter.Number." + "Used in equations, figures: Equation (5.3), figure 8.15." + + def generate(self, type): + "Generate a number which goes with first-level numbers (chapters). " + "For the article classes a unique number is generated." + if DocumentParameters.startinglevel > 0: + return NumberGenerator.generator.generate(type) + chapter = self.getcounter('Chapter') + return self.getdependentcounter(type, chapter).getnext() + + +NumberGenerator.chaptered = ChapteredGenerator() +NumberGenerator.generator = NumberGenerator() + + + + + + +class ContainerSize(object): + "The size of a container." + + width = None + height = None + maxwidth = None + maxheight = None + scale = None + + def set(self, width = None, height = None): + "Set the proper size with width and height." + self.setvalue('width', width) + self.setvalue('height', height) + return self + + def setmax(self, maxwidth = None, maxheight = None): + "Set max width and/or height." + self.setvalue('maxwidth', maxwidth) + self.setvalue('maxheight', maxheight) + return self + + def readparameters(self, container): + "Read some size parameters off a container." + self.setparameter(container, 'width') + self.setparameter(container, 'height') + self.setparameter(container, 'scale') + self.checkvalidheight(container) + return self + + def setparameter(self, container, name): + "Read a size parameter off a container, and set it if present." + value = container.getparameter(name) + self.setvalue(name, value) + + def setvalue(self, name, value): + "Set the value of a parameter name, only if it's valid." + value = self.processparameter(value) + if value: + setattr(self, name, value) + + def checkvalidheight(self, container): + "Check if the height parameter is valid; otherwise erase it." + heightspecial = container.getparameter('height_special') + if self.height and self.extractnumber(self.height) == '1' and heightspecial == 'totalheight': + self.height = None + + def processparameter(self, value): + "Do the full processing on a parameter." + if not value: + return None + if self.extractnumber(value) == '0': + return None + for ignored in StyleConfig.size['ignoredtexts']: + if ignored in value: + value = value.replace(ignored, '') + return value + + def extractnumber(self, text): + "Extract the first number in the given text." + result = '' + decimal = False + for char in text: + if char.isdigit(): + result += char + elif char == '.' and not decimal: + result += char + decimal = True + else: + return result + return result + + def checkimage(self, width, height): + "Check image dimensions, set them if possible." + if width: + self.maxwidth = unicode(width) + 'px' + if self.scale and not self.width: + self.width = self.scalevalue(width) + if height: + self.maxheight = unicode(height) + 'px' + if self.scale and not self.height: + self.height = self.scalevalue(height) + if self.width and not self.height: + self.height = 'auto' + if self.height and not self.width: + self.width = 'auto' + + def scalevalue(self, value): + "Scale the value according to the image scale and return it as unicode." + scaled = value * int(self.scale) / 100 + return unicode(int(scaled)) + 'px' + + def removepercentwidth(self): + "Remove percent width if present, to set it at the figure level." + if not self.width: + return None + if not '%' in self.width: + return None + width = self.width + self.width = None + if self.height == 'auto': + self.height = None + return width + + def addstyle(self, container): + "Add the proper style attribute to the output tag." + if not isinstance(container.output, TaggedOutput): + Trace.error('No tag to add style, in ' + unicode(container)) + if not self.width and not self.height and not self.maxwidth and not self.maxheight: + # nothing to see here; move along + return + tag = ' style="' + tag += self.styleparameter('width') + tag += self.styleparameter('maxwidth') + tag += self.styleparameter('height') + tag += self.styleparameter('maxheight') + if tag[-1] == ' ': + tag = tag[:-1] + tag += '"' + container.output.tag += tag + + def styleparameter(self, name): + "Get the style for a single parameter." + value = getattr(self, name) + if value: + return name.replace('max', 'max-') + ': ' + value + '; ' + return '' + + + +class QuoteContainer(Container): + "A container for a pretty quote" + + def __init__(self): + self.parser = BoundedParser() + self.output = FixedOutput() + + def process(self): + "Process contents" + self.type = self.header[2] + if not self.type in StyleConfig.quotes: + Trace.error('Quote type ' + self.type + ' not found') + self.html = ['"'] + return + self.html = [StyleConfig.quotes[self.type]] + +class LyXLine(Container): + "A Lyx line" + + def __init__(self): + self.parser = LoneCommand() + self.output = FixedOutput() + + def process(self): + self.html = ['<hr class="line" />'] + +class EmphaticText(TaggedText): + "Text with emphatic mode" + + def process(self): + self.output.tag = 'i' + +class ShapedText(TaggedText): + "Text shaped (italic, slanted)" + + def process(self): + self.type = self.header[1] + if not self.type in TagConfig.shaped: + Trace.error('Unrecognized shape ' + self.header[1]) + self.output.tag = 'span' + return + self.output.tag = TagConfig.shaped[self.type] + +class VersalitasText(TaggedText): + "Text in versalitas" + + def process(self): + self.output.tag = 'span class="versalitas"' + +class ColorText(TaggedText): + "Colored text" + + def process(self): + self.color = self.header[1] + self.output.tag = 'span class="' + self.color + '"' + +class SizeText(TaggedText): + "Sized text" + + def process(self): + self.size = self.header[1] + self.output.tag = 'span class="' + self.size + '"' + +class BoldText(TaggedText): + "Bold text" + + def process(self): + self.output.tag = 'b' + +class TextFamily(TaggedText): + "A bit of text from elyxer.a different family" + + def process(self): + "Parse the type of family" + self.type = self.header[1] + if not self.type in TagConfig.family: + Trace.error('Unrecognized family ' + type) + self.output.tag = 'span' + return + self.output.tag = TagConfig.family[self.type] + +class Hfill(TaggedText): + "Horizontall fill" + + def process(self): + self.output.tag = 'span class="hfill"' + +class BarredText(TaggedText): + "Text with a bar somewhere" + + def process(self): + "Parse the type of bar" + self.type = self.header[1] + if not self.type in TagConfig.barred: + Trace.error('Unknown bar type ' + self.type) + self.output.tag = 'span' + return + self.output.tag = TagConfig.barred[self.type] + +class LangLine(BlackBox): + "A line with language information" + + def process(self): + self.lang = self.header[1] + +class InsetLength(BlackBox): + "A length measure inside an inset." + + def process(self): + self.length = self.header[1] + +class Space(Container): + "A space of several types" + + def __init__(self): + self.parser = InsetParser() + self.output = FixedOutput() + + def process(self): + self.type = self.header[2] + if self.type not in StyleConfig.hspaces: + Trace.error('Unknown space type ' + self.type) + self.html = [' '] + return + self.html = [StyleConfig.hspaces[self.type]] + length = self.getlength() + if not length: + return + self.output = TaggedOutput().settag('span class="hspace"', False) + ContainerSize().set(length).addstyle(self) + + def getlength(self): + "Get the space length from elyxer.the contents or parameters." + if len(self.contents) == 0 or not isinstance(self.contents[0], InsetLength): + return None + return self.contents[0].length + +class VerticalSpace(Container): + "An inset that contains a vertical space." + + def __init__(self): + self.parser = InsetParser() + self.output = FixedOutput() + + def process(self): + "Set the correct tag" + self.type = self.header[2] + if self.type not in StyleConfig.vspaces: + self.output = TaggedOutput().settag('div class="vspace" style="height: ' + self.type + ';"', True) + return + self.html = [StyleConfig.vspaces[self.type]] + +class Align(Container): + "Bit of aligned text" + + def __init__(self): + self.parser = ExcludingParser() + self.output = TaggedOutput().setbreaklines(True) + + def process(self): + self.output.tag = 'div class="' + self.header[1] + '"' + +class Newline(Container): + "A newline" + + def __init__(self): + self.parser = LoneCommand() + self.output = FixedOutput() + + def process(self): + "Process contents" + self.html = ['<br/>\n'] + +class NewPage(Newline): + "A new page" + + def process(self): + "Process contents" + self.html = ['<p><br/>\n</p>\n'] + +class Separator(Container): + "A separator string which is not extracted by extracttext()." + + def __init__(self, constant): + self.output = FixedOutput() + self.contents = [] + self.html = [constant] + +class StrikeOut(TaggedText): + "Striken out text." + + def process(self): + "Set the output tag to strike." + self.output.tag = 'strike' + +class StartAppendix(BlackBox): + "Mark to start an appendix here." + "From this point on, all chapters become appendices." + + def process(self): + "Activate the special numbering scheme for appendices, using letters." + NumberGenerator.generator.startappendix() + + + + + + +class Link(Container): + "A link to another part of the document" + + anchor = None + url = None + type = None + page = None + target = None + destination = None + title = None + + def __init__(self): + "Initialize the link, add target if configured." + self.contents = [] + self.parser = InsetParser() + self.output = LinkOutput() + if Options.target: + self.target = Options.target + + def complete(self, text, anchor = None, url = None, type = None, title = None): + "Complete the link." + self.contents = [Constant(text)] + if anchor: + self.anchor = anchor + if url: + self.url = url + if type: + self.type = type + if title: + self.title = title + return self + + def computedestination(self): + "Use the destination link to fill in the destination URL." + if not self.destination: + return + self.url = '' + if self.destination.anchor: + self.url = '#' + self.destination.anchor + if self.destination.page: + self.url = self.destination.page + self.url + + def setmutualdestination(self, destination): + "Set another link as destination, and set its destination to this one." + self.destination = destination + destination.destination = self + + def __unicode__(self): + "Return a printable representation." + result = 'Link' + if self.anchor: + result += ' #' + self.anchor + if self.url: + result += ' to ' + self.url + return result + +class URL(Link): + "A clickable URL" + + def process(self): + "Read URL from elyxer.parameters" + target = self.escape(self.getparameter('target')) + self.url = target + type = self.getparameter('type') + if type: + self.url = self.escape(type) + target + name = self.getparameter('name') + if not name: + name = target + self.contents = [Constant(name)] + +class FlexURL(URL): + "A flexible URL" + + def process(self): + "Read URL from elyxer.contents" + self.url = self.extracttext() + +class LinkOutput(ContainerOutput): + "A link pointing to some destination" + "Or an anchor (destination)" + + def gethtml(self, link): + "Get the HTML code for the link" + type = link.__class__.__name__ + if link.type: + type = link.type + tag = 'a class="' + type + '"' + if link.anchor: + tag += ' name="' + link.anchor + '"' + if link.destination: + link.computedestination() + if link.url: + tag += ' href="' + link.url + '"' + if link.target: + tag += ' target="' + link.target + '"' + if link.title: + tag += ' title="' + link.title + '"' + return TaggedOutput().settag(tag).gethtml(link) + + + + + +class Postprocessor(object): + "Postprocess a container keeping some context" + + stages = [] + + def __init__(self): + self.stages = StageDict(Postprocessor.stages, self) + self.current = None + self.last = None + + def postprocess(self, next): + "Postprocess a container and its contents." + self.postrecursive(self.current) + result = self.postcurrent(next) + self.last = self.current + self.current = next + return result + + def postrecursive(self, container): + "Postprocess the container contents recursively" + if not hasattr(container, 'contents'): + return + if len(container.contents) == 0: + return + if hasattr(container, 'postprocess'): + if not container.postprocess: + return + postprocessor = Postprocessor() + contents = [] + for element in container.contents: + post = postprocessor.postprocess(element) + if post: + contents.append(post) + # two rounds to empty the pipeline + for i in range(2): + post = postprocessor.postprocess(None) + if post: + contents.append(post) + container.contents = contents + + def postcurrent(self, next): + "Postprocess the current element taking into account next and last." + stage = self.stages.getstage(self.current) + if not stage: + return self.current + return stage.postprocess(self.last, self.current, next) + +class StageDict(object): + "A dictionary of stages corresponding to classes" + + def __init__(self, classes, postprocessor): + "Instantiate an element from elyxer.each class and store as a dictionary" + instances = self.instantiate(classes, postprocessor) + self.stagedict = dict([(x.processedclass, x) for x in instances]) + + def instantiate(self, classes, postprocessor): + "Instantiate an element from elyxer.each class" + stages = [x.__new__(x) for x in classes] + for element in stages: + element.__init__() + element.postprocessor = postprocessor + return stages + + def getstage(self, element): + "Get the stage for a given element, if the type is in the dict" + if not element.__class__ in self.stagedict: + return None + return self.stagedict[element.__class__] + + + +class Label(Link): + "A label to be referenced" + + names = dict() + lastlayout = None + + def __init__(self): + Link.__init__(self) + self.lastnumbered = None + + def process(self): + "Process a label container." + key = self.getparameter('name') + self.create(' ', key) + self.lastnumbered = Label.lastlayout + + def create(self, text, key, type = 'Label'): + "Create the label for a given key." + self.key = key + self.complete(text, anchor = key, type = type) + Label.names[key] = self + if key in Reference.references: + for reference in Reference.references[key]: + reference.destination = self + return self + + def findpartkey(self): + "Get the part key for the latest numbered container seen." + numbered = self.numbered(self) + if numbered and numbered.partkey: + return numbered.partkey + return '' + + def numbered(self, container): + "Get the numbered container for the label." + if container.partkey: + return container + if not container.parent: + if self.lastnumbered: + return self.lastnumbered + return None + return self.numbered(container.parent) + + def __unicode__(self): + "Return a printable representation." + if not hasattr(self, 'key'): + return 'Unnamed label' + return 'Label ' + self.key + +class Reference(Link): + "A reference to a label." + + references = dict() + key = 'none' + + def process(self): + "Read the reference and set the arrow." + self.key = self.getparameter('reference') + if self.key in Label.names: + self.direction = u'↑' + label = Label.names[self.key] + else: + self.direction = u'↓' + label = Label().complete(' ', self.key, 'preref') + self.destination = label + self.formatcontents() + if not self.key in Reference.references: + Reference.references[self.key] = [] + Reference.references[self.key].append(self) + + def formatcontents(self): + "Format the reference contents." + formatkey = self.getparameter('LatexCommand') + if not formatkey: + formatkey = 'ref' + self.formatted = u'↕' + if formatkey in StyleConfig.referenceformats: + self.formatted = StyleConfig.referenceformats[formatkey] + else: + Trace.error('Unknown reference format ' + formatkey) + self.replace(u'↕', self.direction) + self.replace('#', '1') + self.replace('on-page', Translator.translate('on-page')) + partkey = self.destination.findpartkey() + # only if partkey and partkey.number are not null, send partkey.number + self.replace('@', partkey and partkey.number) + self.replace(u'¶', partkey and partkey.tocentry) + if not '$' in self.formatted or not partkey or not partkey.titlecontents: + if '$' in self.formatted: + Trace.error('No title in ' + unicode(partkey)) + self.contents = [Constant(self.formatted)] + return + pieces = self.formatted.split('$') + self.contents = [Constant(pieces[0])] + for piece in pieces[1:]: + self.contents += partkey.titlecontents + self.contents.append(Constant(piece)) + + def replace(self, key, value): + "Replace a key in the format template with a value." + if not key in self.formatted: + return + if not value: + value = '' + self.formatted = self.formatted.replace(key, value) + + def __unicode__(self): + "Return a printable representation." + return 'Reference ' + self.key + + + +class FormulaCommand(FormulaBit): + "A LaTeX command inside a formula" + + types = [] + start = FormulaConfig.starts['command'] + commandmap = None + + def detect(self, pos): + "Find the current command." + return pos.checkfor(FormulaCommand.start) + + def parsebit(self, pos): + "Parse the command." + command = self.extractcommand(pos) + bit = self.parsewithcommand(command, pos) + if bit: + return bit + if command.startswith('\\up') or command.startswith('\\Up'): + upgreek = self.parseupgreek(command, pos) + if upgreek: + return upgreek + if not self.factory.defining: + Trace.error('Unknown command ' + command) + self.output = TaggedOutput().settag('span class="unknown"') + self.add(FormulaConstant(command)) + return None + + def parsewithcommand(self, command, pos): + "Parse the command type once we have the command." + for type in FormulaCommand.types: + if command in type.commandmap: + return self.parsecommandtype(command, type, pos) + return None + + def parsecommandtype(self, command, type, pos): + "Parse a given command type." + bit = self.factory.create(type) + bit.setcommand(command) + returned = bit.parsebit(pos) + if returned: + return returned + return bit + + def extractcommand(self, pos): + "Extract the command from elyxer.the current position." + if not pos.checkskip(FormulaCommand.start): + pos.error('Missing command start ' + FormulaCommand.start) + return + if pos.finished(): + return self.emptycommand(pos) + if pos.current().isalpha(): + # alpha command + command = FormulaCommand.start + pos.globalpha() + # skip mark of short command + pos.checkskip('*') + return command + # symbol command + return FormulaCommand.start + pos.skipcurrent() + + def emptycommand(self, pos): + """Check for an empty command: look for command disguised as ending. + Special case against '{ \{ \} }' situation.""" + command = '' + if not pos.isout(): + ending = pos.nextending() + if ending and pos.checkskip(ending): + command = ending + return FormulaCommand.start + command + + def parseupgreek(self, command, pos): + "Parse the Greek \\up command.." + if len(command) < 4: + return None + if command.startswith('\\up'): + upcommand = '\\' + command[3:] + elif pos.checkskip('\\Up'): + upcommand = '\\' + command[3:4].upper() + command[4:] + else: + Trace.error('Impossible upgreek command: ' + command) + return + upgreek = self.parsewithcommand(upcommand, pos) + if upgreek: + upgreek.type = 'font' + return upgreek + +class CommandBit(FormulaCommand): + "A formula bit that includes a command" + + def setcommand(self, command): + "Set the command in the bit" + self.command = command + if self.commandmap: + self.original += command + self.translated = self.commandmap[self.command] + + def parseparameter(self, pos): + "Parse a parameter at the current position" + self.factory.clearskipped(pos) + if pos.finished(): + return None + parameter = self.factory.parseany(pos) + self.add(parameter) + return parameter + + def parsesquare(self, pos): + "Parse a square bracket" + self.factory.clearskipped(pos) + if not self.factory.detecttype(SquareBracket, pos): + return None + bracket = self.factory.parsetype(SquareBracket, pos) + self.add(bracket) + return bracket + + def parseliteral(self, pos): + "Parse a literal bracket." + self.factory.clearskipped(pos) + if not self.factory.detecttype(Bracket, pos): + if not pos.isvalue(): + Trace.error('No literal parameter found at: ' + pos.identifier()) + return None + return pos.globvalue() + bracket = Bracket().setfactory(self.factory) + self.add(bracket.parseliteral(pos)) + return bracket.literal + + def parsesquareliteral(self, pos): + "Parse a square bracket literally." + self.factory.clearskipped(pos) + if not self.factory.detecttype(SquareBracket, pos): + return None + bracket = SquareBracket().setfactory(self.factory) + self.add(bracket.parseliteral(pos)) + return bracket.literal + + def parsetext(self, pos): + "Parse a text parameter." + self.factory.clearskipped(pos) + if not self.factory.detecttype(Bracket, pos): + Trace.error('No text parameter for ' + self.command) + return None + bracket = Bracket().setfactory(self.factory).parsetext(pos) + self.add(bracket) + return bracket + +class EmptyCommand(CommandBit): + "An empty command (without parameters)" + + commandmap = FormulaConfig.commands + + def parsebit(self, pos): + "Parse a command without parameters" + self.contents = [FormulaConstant(self.translated)] + +class SpacedCommand(CommandBit): + "An empty command which should have math spacing in formulas." + + commandmap = FormulaConfig.spacedcommands + + def parsebit(self, pos): + "Place as contents the command translated and spaced." + self.contents = [FormulaConstant(u' ' + self.translated + u' ')] + +class AlphaCommand(EmptyCommand): + "A command without paramters whose result is alphabetical" + + commandmap = FormulaConfig.alphacommands + + def parsebit(self, pos): + "Parse the command and set type to alpha" + EmptyCommand.parsebit(self, pos) + self.type = 'alpha' + +class OneParamFunction(CommandBit): + "A function of one parameter" + + commandmap = FormulaConfig.onefunctions + simplified = False + + def parsebit(self, pos): + "Parse a function with one parameter" + self.output = TaggedOutput().settag(self.translated) + self.parseparameter(pos) + self.simplifyifpossible() + + def simplifyifpossible(self): + "Try to simplify to a single character." + if self.original in self.commandmap: + self.output = FixedOutput() + self.html = [self.commandmap[self.original]] + self.simplified = True + +class SymbolFunction(CommandBit): + "Find a function which is represented by a symbol (like _ or ^)" + + commandmap = FormulaConfig.symbolfunctions + + def detect(self, pos): + "Find the symbol" + return pos.current() in SymbolFunction.commandmap + + def parsebit(self, pos): + "Parse the symbol" + self.setcommand(pos.current()) + pos.skip(self.command) + self.output = TaggedOutput().settag(self.translated) + self.parseparameter(pos) + +class TextFunction(CommandBit): + "A function where parameters are read as text." + + commandmap = FormulaConfig.textfunctions + + def parsebit(self, pos): + "Parse a text parameter" + self.output = TaggedOutput().settag(self.translated) + self.parsetext(pos) + + def process(self): + "Set the type to font" + self.type = 'font' + +class LabelFunction(CommandBit): + "A function that acts as a label" + + commandmap = FormulaConfig.labelfunctions + + def parsebit(self, pos): + "Parse a literal parameter" + self.key = self.parseliteral(pos) + + def process(self): + "Add an anchor with the label contents." + self.type = 'font' + self.label = Label().create(' ', self.key, type = 'eqnumber') + self.contents = [self.label] + # store as a Label so we know it's been seen + Label.names[self.key] = self.label + +class FontFunction(OneParamFunction): + "A function of one parameter that changes the font" + + commandmap = FormulaConfig.fontfunctions + + def process(self): + "Simplify if possible using a single character." + self.type = 'font' + self.simplifyifpossible() + +FormulaFactory.types += [FormulaCommand, SymbolFunction] +FormulaCommand.types = [ + AlphaCommand, EmptyCommand, OneParamFunction, FontFunction, LabelFunction, + TextFunction, SpacedCommand, + ] + + + + + + + + + + + + +class BigSymbol(object): + "A big symbol generator." + + symbols = FormulaConfig.bigsymbols + + def __init__(self, symbol): + "Create the big symbol." + self.symbol = symbol + + def getpieces(self): + "Get an array with all pieces." + if not self.symbol in self.symbols: + return [self.symbol] + if self.smalllimit(): + return [self.symbol] + return self.symbols[self.symbol] + + def smalllimit(self): + "Decide if the limit should be a small, one-line symbol." + if not DocumentParameters.displaymode: + return True + if len(self.symbols[self.symbol]) == 1: + return True + return Options.simplemath + +class BigBracket(BigSymbol): + "A big bracket generator." + + def __init__(self, size, bracket, alignment='l'): + "Set the size and symbol for the bracket." + self.size = size + self.original = bracket + self.alignment = alignment + self.pieces = None + if bracket in FormulaConfig.bigbrackets: + self.pieces = FormulaConfig.bigbrackets[bracket] + + def getpiece(self, index): + "Return the nth piece for the bracket." + function = getattr(self, 'getpiece' + unicode(len(self.pieces))) + return function(index) + + def getpiece1(self, index): + "Return the only piece for a single-piece bracket." + return self.pieces[0] + + def getpiece3(self, index): + "Get the nth piece for a 3-piece bracket: parenthesis or square bracket." + if index == 0: + return self.pieces[0] + if index == self.size - 1: + return self.pieces[-1] + return self.pieces[1] + + def getpiece4(self, index): + "Get the nth piece for a 4-piece bracket: curly bracket." + if index == 0: + return self.pieces[0] + if index == self.size - 1: + return self.pieces[3] + if index == (self.size - 1)/2: + return self.pieces[2] + return self.pieces[1] + + def getcell(self, index): + "Get the bracket piece as an array cell." + piece = self.getpiece(index) + span = 'span class="bracket align-' + self.alignment + '"' + return TaggedBit().constant(piece, span) + + def getcontents(self): + "Get the bracket as an array or as a single bracket." + if self.size == 1 or not self.pieces: + return self.getsinglebracket() + rows = [] + for index in range(self.size): + cell = self.getcell(index) + rows.append(TaggedBit().complete([cell], 'span class="arrayrow"')) + return [TaggedBit().complete(rows, 'span class="array"')] + + def getsinglebracket(self): + "Return the bracket as a single sign." + if self.original == '.': + return [TaggedBit().constant('', 'span class="emptydot"')] + return [TaggedBit().constant(self.original, 'span class="symbol"')] + + + + + + +class FormulaEquation(CommandBit): + "A simple numbered equation." + + piece = 'equation' + + def parsebit(self, pos): + "Parse the array" + self.output = ContentsOutput() + self.add(self.factory.parsetype(WholeFormula, pos)) + +class FormulaCell(FormulaCommand): + "An array cell inside a row" + + def setalignment(self, alignment): + self.alignment = alignment + self.output = TaggedOutput().settag('span class="arraycell align-' + alignment +'"', True) + return self + + def parsebit(self, pos): + self.factory.clearskipped(pos) + if pos.finished(): + return + self.add(self.factory.parsetype(WholeFormula, pos)) + +class FormulaRow(FormulaCommand): + "An array row inside an array" + + cellseparator = FormulaConfig.array['cellseparator'] + + def setalignments(self, alignments): + self.alignments = alignments + self.output = TaggedOutput().settag('span class="arrayrow"', True) + return self + + def parsebit(self, pos): + "Parse a whole row" + index = 0 + pos.pushending(self.cellseparator, optional=True) + while not pos.finished(): + cell = self.createcell(index) + cell.parsebit(pos) + self.add(cell) + index += 1 + pos.checkskip(self.cellseparator) + if len(self.contents) == 0: + self.output = EmptyOutput() + + def createcell(self, index): + "Create the cell that corresponds to the given index." + alignment = self.alignments[index % len(self.alignments)] + return self.factory.create(FormulaCell).setalignment(alignment) + +class MultiRowFormula(CommandBit): + "A formula with multiple rows." + + def parserows(self, pos): + "Parse all rows, finish when no more row ends" + self.rows = [] + first = True + for row in self.iteraterows(pos): + if first: + first = False + else: + # intersparse empty rows + self.addempty() + row.parsebit(pos) + self.addrow(row) + self.size = len(self.rows) + + def iteraterows(self, pos): + "Iterate over all rows, end when no more row ends" + rowseparator = FormulaConfig.array['rowseparator'] + while True: + pos.pushending(rowseparator, True) + row = self.factory.create(FormulaRow) + yield row.setalignments(self.alignments) + if pos.checkfor(rowseparator): + self.original += pos.popending(rowseparator) + else: + return + + def addempty(self): + "Add an empty row." + row = self.factory.create(FormulaRow).setalignments(self.alignments) + for index, originalcell in enumerate(self.rows[-1].contents): + cell = row.createcell(index) + cell.add(FormulaConstant(u' ')) + row.add(cell) + self.addrow(row) + + def addrow(self, row): + "Add a row to the contents and to the list of rows." + self.rows.append(row) + self.add(row) + +class FormulaArray(MultiRowFormula): + "An array within a formula" + + piece = 'array' + + def parsebit(self, pos): + "Parse the array" + self.output = TaggedOutput().settag('span class="array"', False) + self.parsealignments(pos) + self.parserows(pos) + + def parsealignments(self, pos): + "Parse the different alignments" + # vertical + self.valign = 'c' + literal = self.parsesquareliteral(pos) + if literal: + self.valign = literal + # horizontal + literal = self.parseliteral(pos) + self.alignments = [] + for l in literal: + self.alignments.append(l) + +class FormulaMatrix(MultiRowFormula): + "A matrix (array with center alignment)." + + piece = 'matrix' + + def parsebit(self, pos): + "Parse the matrix, set alignments to 'c'." + self.output = TaggedOutput().settag('span class="array"', False) + self.valign = 'c' + self.alignments = ['c'] + self.parserows(pos) + +class FormulaCases(MultiRowFormula): + "A cases statement" + + piece = 'cases' + + def parsebit(self, pos): + "Parse the cases" + self.output = ContentsOutput() + self.alignments = ['l', 'l'] + self.parserows(pos) + for row in self.contents: + for cell in row.contents: + cell.output.settag('span class="case align-l"', True) + cell.contents.append(FormulaConstant(u' ')) + array = TaggedBit().complete(self.contents, 'span class="bracketcases"', True) + brace = BigBracket(len(self.contents), '{', 'l') + self.contents = brace.getcontents() + [array] + +class EquationEnvironment(MultiRowFormula): + "A \\begin{}...\\end equation environment with rows and cells." + + def parsebit(self, pos): + "Parse the whole environment." + self.output = TaggedOutput().settag('span class="environment"', False) + environment = self.piece.replace('*', '') + if environment in FormulaConfig.environments: + self.alignments = FormulaConfig.environments[environment] + else: + Trace.error('Unknown equation environment ' + self.piece) + self.alignments = ['l'] + self.parserows(pos) + +class BeginCommand(CommandBit): + "A \\begin{}...\end command and what it entails (array, cases, aligned)" + + commandmap = {FormulaConfig.array['begin']:''} + + types = [FormulaEquation, FormulaArray, FormulaCases, FormulaMatrix] + + def parsebit(self, pos): + "Parse the begin command" + command = self.parseliteral(pos) + bit = self.findbit(command) + ending = FormulaConfig.array['end'] + '{' + command + '}' + pos.pushending(ending) + bit.parsebit(pos) + self.add(bit) + self.original += pos.popending(ending) + self.size = bit.size + + def findbit(self, piece): + "Find the command bit corresponding to the \\begin{piece}" + for type in BeginCommand.types: + if piece.replace('*', '') == type.piece: + return self.factory.create(type) + bit = self.factory.create(EquationEnvironment) + bit.piece = piece + return bit + +FormulaCommand.types += [BeginCommand] + + + +class CombiningFunction(OneParamFunction): + + commandmap = FormulaConfig.combiningfunctions + + def parsebit(self, pos): + "Parse a combining function." + self.type = 'alpha' + combining = self.translated + parameter = self.parsesingleparameter(pos) + if not parameter: + Trace.error('Empty parameter for combining function ' + self.command) + elif len(parameter.extracttext()) != 1: + Trace.error('Applying combining function ' + self.command + ' to invalid string "' + parameter.extracttext() + '"') + self.contents.append(Constant(combining)) + + def parsesingleparameter(self, pos): + "Parse a parameter, or a single letter." + self.factory.clearskipped(pos) + if pos.finished(): + Trace.error('Error while parsing single parameter at ' + pos.identifier()) + return None + if self.factory.detecttype(Bracket, pos) \ + or self.factory.detecttype(FormulaCommand, pos): + return self.parseparameter(pos) + letter = FormulaConstant(pos.skipcurrent()) + self.add(letter) + return letter + +class DecoratingFunction(OneParamFunction): + "A function that decorates some bit of text" + + commandmap = FormulaConfig.decoratingfunctions + + def parsebit(self, pos): + "Parse a decorating function" + self.type = 'alpha' + symbol = self.translated + self.symbol = TaggedBit().constant(symbol, 'span class="symbolover"') + self.parameter = self.parseparameter(pos) + self.output = TaggedOutput().settag('span class="withsymbol"') + self.contents.insert(0, self.symbol) + self.parameter.output = TaggedOutput().settag('span class="undersymbol"') + self.simplifyifpossible() + +class LimitCommand(EmptyCommand): + "A command which accepts limits above and below, in display mode." + + commandmap = FormulaConfig.limitcommands + + def parsebit(self, pos): + "Parse a limit command." + pieces = BigSymbol(self.translated).getpieces() + self.output = TaggedOutput().settag('span class="limits"') + for piece in pieces: + self.contents.append(TaggedBit().constant(piece, 'span class="limit"')) + +class LimitPreviousCommand(LimitCommand): + "A command to limit the previous command." + + commandmap = None + + def parsebit(self, pos): + "Do nothing." + self.output = TaggedOutput().settag('span class="limits"') + self.factory.clearskipped(pos) + + def __unicode__(self): + "Return a printable representation." + return 'Limit previous command' + +class LimitsProcessor(MathsProcessor): + "A processor for limits inside an element." + + def process(self, contents, index): + "Process the limits for an element." + if Options.simplemath: + return + if self.checklimits(contents, index): + self.modifylimits(contents, index) + if self.checkscript(contents, index) and self.checkscript(contents, index + 1): + self.modifyscripts(contents, index) + + def checklimits(self, contents, index): + "Check if the current position has a limits command." + if not DocumentParameters.displaymode: + return False + if self.checkcommand(contents, index + 1, LimitPreviousCommand): + self.limitsahead(contents, index) + return False + if not isinstance(contents[index], LimitCommand): + return False + return self.checkscript(contents, index + 1) + + def limitsahead(self, contents, index): + "Limit the current element based on the next." + contents[index + 1].add(contents[index].clone()) + contents[index].output = EmptyOutput() + + def modifylimits(self, contents, index): + "Modify a limits commands so that the limits appear above and below." + limited = contents[index] + subscript = self.getlimit(contents, index + 1) + limited.contents.append(subscript) + if self.checkscript(contents, index + 1): + superscript = self.getlimit(contents, index + 1) + else: + superscript = TaggedBit().constant(u' ', 'sup class="limit"') + limited.contents.insert(0, superscript) + + def getlimit(self, contents, index): + "Get the limit for a limits command." + limit = self.getscript(contents, index) + limit.output.tag = limit.output.tag.replace('script', 'limit') + return limit + + def modifyscripts(self, contents, index): + "Modify the super- and subscript to appear vertically aligned." + subscript = self.getscript(contents, index) + # subscript removed so instead of index + 1 we get index again + superscript = self.getscript(contents, index) + scripts = TaggedBit().complete([superscript, subscript], 'span class="scripts"') + contents.insert(index, scripts) + + def checkscript(self, contents, index): + "Check if the current element is a sub- or superscript." + return self.checkcommand(contents, index, SymbolFunction) + + def checkcommand(self, contents, index, type): + "Check for the given type as the current element." + if len(contents) <= index: + return False + return isinstance(contents[index], type) + + def getscript(self, contents, index): + "Get the sub- or superscript." + bit = contents[index] + bit.output.tag += ' class="script"' + del contents[index] + return bit + +class BracketCommand(OneParamFunction): + "A command which defines a bracket." + + commandmap = FormulaConfig.bracketcommands + + def parsebit(self, pos): + "Parse the bracket." + OneParamFunction.parsebit(self, pos) + + def create(self, direction, character): + "Create the bracket for the given character." + self.original = character + self.command = '\\' + direction + self.contents = [FormulaConstant(character)] + return self + +class BracketProcessor(MathsProcessor): + "A processor for bracket commands." + + def process(self, contents, index): + "Convert the bracket using Unicode pieces, if possible." + if Options.simplemath: + return + if self.checkleft(contents, index): + return self.processleft(contents, index) + + def processleft(self, contents, index): + "Process a left bracket." + rightindex = self.findright(contents, index + 1) + if not rightindex: + return + size = self.findmax(contents, index, rightindex) + self.resize(contents[index], size) + self.resize(contents[rightindex], size) + + def checkleft(self, contents, index): + "Check if the command at the given index is left." + return self.checkdirection(contents[index], '\\left') + + def checkright(self, contents, index): + "Check if the command at the given index is right." + return self.checkdirection(contents[index], '\\right') + + def checkdirection(self, bit, command): + "Check if the given bit is the desired bracket command." + if not isinstance(bit, BracketCommand): + return False + return bit.command == command + + def findright(self, contents, index): + "Find the right bracket starting at the given index, or 0." + depth = 1 + while index < len(contents): + if self.checkleft(contents, index): + depth += 1 + if self.checkright(contents, index): + depth -= 1 + if depth == 0: + return index + index += 1 + return None + + def findmax(self, contents, leftindex, rightindex): + "Find the max size of the contents between the two given indices." + sliced = contents[leftindex:rightindex] + return max([element.size for element in sliced]) + + def resize(self, command, size): + "Resize a bracket command to the given size." + character = command.extracttext() + alignment = command.command.replace('\\', '') + bracket = BigBracket(size, character, alignment) + command.output = ContentsOutput() + command.contents = bracket.getcontents() + + +FormulaCommand.types += [ + DecoratingFunction, CombiningFunction, LimitCommand, BracketCommand, + ] + +FormulaProcessor.processors += [ + LimitsProcessor(), BracketProcessor(), + ] + + + +class ParameterDefinition(object): + "The definition of a parameter in a hybrid function." + "[] parameters are optional, {} parameters are mandatory." + "Each parameter has a one-character name, like {$1} or {$p}." + "A parameter that ends in ! like {$p!} is a literal." + "Example: [$1]{$p!} reads an optional parameter $1 and a literal mandatory parameter p." + + parambrackets = [('[', ']'), ('{', '}')] + + def __init__(self): + self.name = None + self.literal = False + self.optional = False + self.value = None + self.literalvalue = None + + def parse(self, pos): + "Parse a parameter definition: [$0], {$x}, {$1!}..." + for (opening, closing) in ParameterDefinition.parambrackets: + if pos.checkskip(opening): + if opening == '[': + self.optional = True + if not pos.checkskip('$'): + Trace.error('Wrong parameter name, did you mean $' + pos.current() + '?') + return None + self.name = pos.skipcurrent() + if pos.checkskip('!'): + self.literal = True + if not pos.checkskip(closing): + Trace.error('Wrong parameter closing ' + pos.skipcurrent()) + return None + return self + Trace.error('Wrong character in parameter template: ' + pos.skipcurrent()) + return None + + def read(self, pos, function): + "Read the parameter itself using the definition." + if self.literal: + if self.optional: + self.literalvalue = function.parsesquareliteral(pos) + else: + self.literalvalue = function.parseliteral(pos) + if self.literalvalue: + self.value = FormulaConstant(self.literalvalue) + elif self.optional: + self.value = function.parsesquare(pos) + else: + self.value = function.parseparameter(pos) + + def __unicode__(self): + "Return a printable representation." + result = 'param ' + self.name + if self.value: + result += ': ' + unicode(self.value) + else: + result += ' (empty)' + return result + +class ParameterFunction(CommandBit): + "A function with a variable number of parameters defined in a template." + "The parameters are defined as a parameter definition." + + def readparams(self, readtemplate, pos): + "Read the params according to the template." + self.params = dict() + for paramdef in self.paramdefs(readtemplate): + paramdef.read(pos, self) + self.params['$' + paramdef.name] = paramdef + + def paramdefs(self, readtemplate): + "Read each param definition in the template" + pos = TextPosition(readtemplate) + while not pos.finished(): + paramdef = ParameterDefinition().parse(pos) + if paramdef: + yield paramdef + + def getparam(self, name): + "Get a parameter as parsed." + if not name in self.params: + return None + return self.params[name] + + def getvalue(self, name): + "Get the value of a parameter." + return self.getparam(name).value + + def getliteralvalue(self, name): + "Get the literal value of a parameter." + param = self.getparam(name) + if not param or not param.literalvalue: + return None + return param.literalvalue + +class HybridFunction(ParameterFunction): + """ + A parameter function where the output is also defined using a template. + The template can use a number of functions; each function has an associated + tag. + Example: [f0{$1},span class="fbox"] defines a function f0 which corresponds + to a span of class fbox, yielding <span class="fbox">$1</span>. + Literal parameters can be used in tags definitions: + [f0{$1},span style="color: $p;"] + yields <span style="color: $p;">$1</span>, where $p is a literal parameter. + Sizes can be specified in hybridsizes, e.g. adding parameter sizes. By + default the resulting size is the max of all arguments. Sizes are used + to generate the right parameters. + A function followed by a single / is output as a self-closing XHTML tag: + [f0/,hr] + will generate <hr/>. + """ + + commandmap = FormulaConfig.hybridfunctions + + def parsebit(self, pos): + "Parse a function with [] and {} parameters" + readtemplate = self.translated[0] + writetemplate = self.translated[1] + self.readparams(readtemplate, pos) + self.contents = self.writeparams(writetemplate) + self.computehybridsize() + + def writeparams(self, writetemplate): + "Write all params according to the template" + return self.writepos(TextPosition(writetemplate)) + + def writepos(self, pos): + "Write all params as read in the parse position." + result = [] + while not pos.finished(): + if pos.checkskip('$'): + param = self.writeparam(pos) + if param: + result.append(param) + elif pos.checkskip('f'): + function = self.writefunction(pos) + if function: + function.type = None + result.append(function) + elif pos.checkskip('('): + result.append(self.writebracket('left', '(')) + elif pos.checkskip(')'): + result.append(self.writebracket('right', ')')) + else: + result.append(FormulaConstant(pos.skipcurrent())) + return result + + def writeparam(self, pos): + "Write a single param of the form $0, $x..." + name = '$' + pos.skipcurrent() + if not name in self.params: + Trace.error('Unknown parameter ' + name) + return None + if not self.params[name]: + return None + if pos.checkskip('.'): + self.params[name].value.type = pos.globalpha() + return self.params[name].value + + def writefunction(self, pos): + "Write a single function f0,...,fn." + tag = self.readtag(pos) + if not tag: + return None + if pos.checkskip('/'): + # self-closing XHTML tag, such as <hr/> + return TaggedBit().selfcomplete(tag) + if not pos.checkskip('{'): + Trace.error('Function should be defined in {}') + return None + pos.pushending('}') + contents = self.writepos(pos) + pos.popending() + if len(contents) == 0: + return None + return TaggedBit().complete(contents, tag) + + def readtag(self, pos): + "Get the tag corresponding to the given index. Does parameter substitution." + if not pos.current().isdigit(): + Trace.error('Function should be f0,...,f9: f' + pos.current()) + return None + index = int(pos.skipcurrent()) + if 2 + index > len(self.translated): + Trace.error('Function f' + unicode(index) + ' is not defined') + return None + tag = self.translated[2 + index] + if not '$' in tag: + return tag + for variable in self.params: + if variable in tag: + param = self.params[variable] + if not param.literal: + Trace.error('Parameters in tag ' + tag + ' should be literal: {' + variable + '!}') + continue + if param.literalvalue: + value = param.literalvalue + else: + value = '' + tag = tag.replace(variable, value) + return tag + + def writebracket(self, direction, character): + "Return a new bracket looking at the given direction." + return self.factory.create(BracketCommand).create(direction, character) + + def computehybridsize(self): + "Compute the size of the hybrid function." + if not self.command in HybridSize.configsizes: + self.computesize() + return + self.size = HybridSize().getsize(self) + # set the size in all elements at first level + for element in self.contents: + element.size = self.size + +class HybridSize(object): + "The size associated with a hybrid function." + + configsizes = FormulaConfig.hybridsizes + + def getsize(self, function): + "Read the size for a function and parse it." + sizestring = self.configsizes[function.command] + for name in function.params: + if name in sizestring: + size = function.params[name].value.computesize() + sizestring = sizestring.replace(name, unicode(size)) + if '$' in sizestring: + Trace.error('Unconverted variable in hybrid size: ' + sizestring) + return 1 + return eval(sizestring) + + +FormulaCommand.types += [HybridFunction] + + + + + + + + + +class HeaderParser(Parser): + "Parses the LyX header" + + def parse(self, reader): + "Parse header parameters into a dictionary, return the preamble." + contents = [] + self.parseending(reader, lambda: self.parseline(reader, contents)) + # skip last line + reader.nextline() + return contents + + def parseline(self, reader, contents): + "Parse a single line as a parameter or as a start" + line = reader.currentline() + if line.startswith(HeaderConfig.parameters['branch']): + self.parsebranch(reader) + return + elif line.startswith(HeaderConfig.parameters['lstset']): + LstParser().parselstset(reader) + return + elif line.startswith(HeaderConfig.parameters['beginpreamble']): + contents.append(self.factory.createcontainer(reader)) + return + # no match + self.parseparameter(reader) + + def parsebranch(self, reader): + "Parse all branch definitions." + branch = reader.currentline().split()[1] + reader.nextline() + subparser = HeaderParser().complete(HeaderConfig.parameters['endbranch']) + subparser.parse(reader) + options = BranchOptions(branch) + for key in subparser.parameters: + options.set(key, subparser.parameters[key]) + Options.branches[branch] = options + + def complete(self, ending): + "Complete the parser with the given ending." + self.ending = ending + return self + +class PreambleParser(Parser): + "A parser for the LyX preamble." + + preamble = [] + + def parse(self, reader): + "Parse the full preamble with all statements." + self.ending = HeaderConfig.parameters['endpreamble'] + self.parseending(reader, lambda: self.parsepreambleline(reader)) + return [] + + def parsepreambleline(self, reader): + "Parse a single preamble line." + PreambleParser.preamble.append(reader.currentline()) + reader.nextline() + +class LstParser(object): + "Parse global and local lstparams." + + globalparams = dict() + + def parselstset(self, reader): + "Parse a declaration of lstparams in lstset." + paramtext = self.extractlstset(reader) + if not '{' in paramtext: + Trace.error('Missing opening bracket in lstset: ' + paramtext) + return + lefttext = paramtext.split('{')[1] + croppedtext = lefttext[:-1] + LstParser.globalparams = self.parselstparams(croppedtext) + + def extractlstset(self, reader): + "Extract the global lstset parameters." + paramtext = '' + while not reader.finished(): + paramtext += reader.currentline() + reader.nextline() + if paramtext.endswith('}'): + return paramtext + Trace.error('Could not find end of \\lstset settings; aborting') + + def parsecontainer(self, container): + "Parse some lstparams from elyxer.a container." + container.lstparams = LstParser.globalparams.copy() + paramlist = container.getparameterlist('lstparams') + container.lstparams.update(self.parselstparams(paramlist)) + + def parselstparams(self, paramlist): + "Process a number of lstparams from elyxer.a list." + paramdict = dict() + for param in paramlist: + if not '=' in param: + if len(param.strip()) > 0: + Trace.error('Invalid listing parameter ' + param) + else: + key, value = param.split('=', 1) + paramdict[key] = value + return paramdict + + + + +class MacroDefinition(CommandBit): + "A function that defines a new command (a macro)." + + macros = dict() + + def parsebit(self, pos): + "Parse the function that defines the macro." + self.output = EmptyOutput() + self.parameternumber = 0 + self.defaults = [] + self.factory.defining = True + self.parseparameters(pos) + self.factory.defining = False + Trace.debug('New command ' + self.newcommand + ' (' + \ + unicode(self.parameternumber) + ' parameters)') + self.macros[self.newcommand] = self + + def parseparameters(self, pos): + "Parse all optional parameters (number of parameters, default values)" + "and the mandatory definition." + self.newcommand = self.parsenewcommand(pos) + # parse number of parameters + literal = self.parsesquareliteral(pos) + if literal: + self.parameternumber = int(literal) + # parse all default values + bracket = self.parsesquare(pos) + while bracket: + self.defaults.append(bracket) + bracket = self.parsesquare(pos) + # parse mandatory definition + self.definition = self.parseparameter(pos) + + def parsenewcommand(self, pos): + "Parse the name of the new command." + self.factory.clearskipped(pos) + if self.factory.detecttype(Bracket, pos): + return self.parseliteral(pos) + if self.factory.detecttype(FormulaCommand, pos): + return self.factory.create(FormulaCommand).extractcommand(pos) + Trace.error('Unknown formula bit in defining function at ' + pos.identifier()) + return 'unknown' + + def instantiate(self): + "Return an instance of the macro." + return self.definition.clone() + +class MacroParameter(FormulaBit): + "A parameter from elyxer.a macro." + + def detect(self, pos): + "Find a macro parameter: #n." + return pos.checkfor('#') + + def parsebit(self, pos): + "Parse the parameter: #n." + if not pos.checkskip('#'): + Trace.error('Missing parameter start #.') + return + self.number = int(pos.skipcurrent()) + self.original = '#' + unicode(self.number) + self.contents = [TaggedBit().constant('#' + unicode(self.number), 'span class="unknown"')] + +class MacroFunction(CommandBit): + "A function that was defined using a macro." + + commandmap = MacroDefinition.macros + + def parsebit(self, pos): + "Parse a number of input parameters." + self.output = FilteredOutput() + self.values = [] + macro = self.translated + self.parseparameters(pos, macro) + self.completemacro(macro) + + def parseparameters(self, pos, macro): + "Parse as many parameters as are needed." + self.parseoptional(pos, list(macro.defaults)) + self.parsemandatory(pos, macro.parameternumber - len(macro.defaults)) + if len(self.values) < macro.parameternumber: + Trace.error('Missing parameters in macro ' + unicode(self)) + + def parseoptional(self, pos, defaults): + "Parse optional parameters." + optional = [] + while self.factory.detecttype(SquareBracket, pos): + optional.append(self.parsesquare(pos)) + if len(optional) > len(defaults): + break + for value in optional: + default = defaults.pop() + if len(value.contents) > 0: + self.values.append(value) + else: + self.values.append(default) + self.values += defaults + + def parsemandatory(self, pos, number): + "Parse a number of mandatory parameters." + for index in range(number): + parameter = self.parsemacroparameter(pos, number - index) + if not parameter: + return + self.values.append(parameter) + + def parsemacroparameter(self, pos, remaining): + "Parse a macro parameter. Could be a bracket or a single letter." + "If there are just two values remaining and there is a running number," + "parse as two separater numbers." + self.factory.clearskipped(pos) + if pos.finished(): + return None + if self.factory.detecttype(FormulaNumber, pos): + return self.parsenumbers(pos, remaining) + return self.parseparameter(pos) + + def parsenumbers(self, pos, remaining): + "Parse the remaining parameters as a running number." + "For example, 12 would be {1}{2}." + number = self.factory.parsetype(FormulaNumber, pos) + if not len(number.original) == remaining: + return number + for digit in number.original: + value = self.factory.create(FormulaNumber) + value.add(FormulaConstant(digit)) + value.type = number + self.values.append(value) + return None + + def completemacro(self, macro): + "Complete the macro with the parameters read." + self.contents = [macro.instantiate()] + replaced = [False] * len(self.values) + for parameter in self.searchall(MacroParameter): + index = parameter.number - 1 + if index >= len(self.values): + Trace.error('Macro parameter index out of bounds: ' + unicode(index)) + return + replaced[index] = True + parameter.contents = [self.values[index].clone()] + for index in range(len(self.values)): + if not replaced[index]: + self.addfilter(index, self.values[index]) + + def addfilter(self, index, value): + "Add a filter for the given parameter number and parameter value." + original = '#' + unicode(index + 1) + value = ''.join(self.values[0].gethtml()) + self.output.addfilter(original, value) + +class FormulaMacro(Formula): + "A math macro defined in an inset." + + def __init__(self): + self.parser = MacroParser() + self.output = EmptyOutput() + + def __unicode__(self): + "Return a printable representation." + return 'Math macro' + +FormulaFactory.types += [ MacroParameter ] + +FormulaCommand.types += [ + MacroFunction, + ] + + + +def math2html(formula): + "Convert some TeX math to HTML." + factory = FormulaFactory() + whole = factory.parseformula(formula) + FormulaProcessor().process(whole) + whole.process() + return ''.join(whole.gethtml()) + +def main(): + "Main function, called if invoked from elyxer.the command line" + args = sys.argv + Options().parseoptions(args) + if len(args) != 1: + Trace.error('Usage: math2html.py escaped_string') + exit() + result = math2html(args[0]) + Trace.message(result) + +if __name__ == '__main__': + main() + Added: trunk/docutils/src/main/resources/docutils/docutils/utils/math/tex2unichar.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/math/tex2unichar.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/math/tex2unichar.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,662 @@ +# -*- coding: utf-8 -*- + +# LaTeX math to Unicode symbols translation dictionaries. +# Generated with ``write_tex2unichar.py`` from the data in +# http://milde.users.sourceforge.net/LUCR/Math/ + +# Includes commands from: wasysym, stmaryrd, mathdots, mathabx, esint, bbold, amsxtra, amsmath, amssymb, standard LaTeX + +mathaccent = { + 'acute': u'\u0301', # x́ COMBINING ACUTE ACCENT + 'bar': u'\u0304', # x̄ COMBINING MACRON + 'breve': u'\u0306', # x̆ COMBINING BREVE + 'check': u'\u030c', # x̌ COMBINING CARON + 'ddddot': u'\u20dc', # x⃜ COMBINING FOUR DOTS ABOVE + 'dddot': u'\u20db', # x⃛ COMBINING THREE DOTS ABOVE + 'ddot': u'\u0308', # ẍ COMBINING DIAERESIS + 'dot': u'\u0307', # ẋ COMBINING DOT ABOVE + 'grave': u'\u0300', # x̀ COMBINING GRAVE ACCENT + 'hat': u'\u0302', # x̂ COMBINING CIRCUMFLEX ACCENT + 'mathring': u'\u030a', # x̊ COMBINING RING ABOVE + 'not': u'\u0338', # x̸ COMBINING LONG SOLIDUS OVERLAY + 'overleftarrow': u'\u20d6', # x⃖ COMBINING LEFT ARROW ABOVE + 'overleftrightarrow': u'\u20e1', # x⃡ COMBINING LEFT RIGHT ARROW ABOVE + 'overline': u'\u0305', # x̅ COMBINING OVERLINE + 'overrightarrow': u'\u20d7', # x⃗ COMBINING RIGHT ARROW ABOVE + 'tilde': u'\u0303', # x̃ COMBINING TILDE + 'underbar': u'\u0331', # x̱ COMBINING MACRON BELOW + 'underleftarrow': u'\u20ee', # x⃮ COMBINING LEFT ARROW BELOW + 'underline': u'\u0332', # x̲ COMBINING LOW LINE + 'underrightarrow': u'\u20ef', # x⃯ COMBINING RIGHT ARROW BELOW + 'vec': u'\u20d7', # x⃗ COMBINING RIGHT ARROW ABOVE + 'widehat': u'\u0302', # x̂ COMBINING CIRCUMFLEX ACCENT + 'widetilde': u'\u0303', # x̃ COMBINING TILDE + } +mathalpha = { + 'Bbbk': u'\U0001d55c', # 𝕜 MATHEMATICAL DOUBLE-STRUCK SMALL K + 'Delta': u'\u0394', # Δ GREEK CAPITAL LETTER DELTA + 'Gamma': u'\u0393', # Γ GREEK CAPITAL LETTER GAMMA + 'Im': u'\u2111', # ℑ BLACK-LETTER CAPITAL I + 'Lambda': u'\u039b', # Λ GREEK CAPITAL LETTER LAMDA + 'Omega': u'\u03a9', # Ω GREEK CAPITAL LETTER OMEGA + 'Phi': u'\u03a6', # Φ GREEK CAPITAL LETTER PHI + 'Pi': u'\u03a0', # Π GREEK CAPITAL LETTER PI + 'Psi': u'\u03a8', # Ψ GREEK CAPITAL LETTER PSI + 'Re': u'\u211c', # ℜ BLACK-LETTER CAPITAL R + 'Sigma': u'\u03a3', # Σ GREEK CAPITAL LETTER SIGMA + 'Theta': u'\u0398', # Θ GREEK CAPITAL LETTER THETA + 'Upsilon': u'\u03a5', # Υ GREEK CAPITAL LETTER UPSILON + 'Xi': u'\u039e', # Ξ GREEK CAPITAL LETTER XI + 'aleph': u'\u2135', # ℵ ALEF SYMBOL + 'alpha': u'\u03b1', # α GREEK SMALL LETTER ALPHA + 'beta': u'\u03b2', # β GREEK SMALL LETTER BETA + 'beth': u'\u2136', # ℶ BET SYMBOL + 'chi': u'\u03c7', # χ GREEK SMALL LETTER CHI + 'daleth': u'\u2138', # ℸ DALET SYMBOL + 'delta': u'\u03b4', # δ GREEK SMALL LETTER DELTA + 'digamma': u'\u03dc', # Ϝ GREEK LETTER DIGAMMA + 'ell': u'\u2113', # ℓ SCRIPT SMALL L + 'epsilon': u'\u03f5', # ϵ GREEK LUNATE EPSILON SYMBOL + 'eta': u'\u03b7', # η GREEK SMALL LETTER ETA + 'eth': u'\xf0', # ð LATIN SMALL LETTER ETH + 'gamma': u'\u03b3', # γ GREEK SMALL LETTER GAMMA + 'gimel': u'\u2137', # ℷ GIMEL SYMBOL + 'hbar': u'\u210f', # ℏ PLANCK CONSTANT OVER TWO PI + 'hslash': u'\u210f', # ℏ PLANCK CONSTANT OVER TWO PI + 'imath': u'\u0131', # ı LATIN SMALL LETTER DOTLESS I + 'iota': u'\u03b9', # ι GREEK SMALL LETTER IOTA + 'jmath': u'\u0237', # ȷ LATIN SMALL LETTER DOTLESS J + 'kappa': u'\u03ba', # κ GREEK SMALL LETTER KAPPA + 'lambda': u'\u03bb', # λ GREEK SMALL LETTER LAMDA + 'mu': u'\u03bc', # μ GREEK SMALL LETTER MU + 'nu': u'\u03bd', # ν GREEK SMALL LETTER NU + 'omega': u'\u03c9', # ω GREEK SMALL LETTER OMEGA + 'phi': u'\u03d5', # ϕ GREEK PHI SYMBOL + 'pi': u'\u03c0', # π GREEK SMALL LETTER PI + 'psi': u'\u03c8', # ψ GREEK SMALL LETTER PSI + 'rho': u'\u03c1', # ρ GREEK SMALL LETTER RHO + 'sigma': u'\u03c3', # σ GREEK SMALL LETTER SIGMA + 'tau': u'\u03c4', # τ GREEK SMALL LETTER TAU + 'theta': u'\u03b8', # θ GREEK SMALL LETTER THETA + 'upsilon': u'\u03c5', # υ GREEK SMALL LETTER UPSILON + 'varDelta': u'\U0001d6e5', # 𝛥 MATHEMATICAL ITALIC CAPITAL DELTA + 'varGamma': u'\U0001d6e4', # 𝛤 MATHEMATICAL ITALIC CAPITAL GAMMA + 'varLambda': u'\U0001d6ec', # 𝛬 MATHEMATICAL ITALIC CAPITAL LAMDA + 'varOmega': u'\U0001d6fa', # 𝛺 MATHEMATICAL ITALIC CAPITAL OMEGA + 'varPhi': u'\U0001d6f7', # 𝛷 MATHEMATICAL ITALIC CAPITAL PHI + 'varPi': u'\U0001d6f1', # 𝛱 MATHEMATICAL ITALIC CAPITAL PI + 'varPsi': u'\U0001d6f9', # 𝛹 MATHEMATICAL ITALIC CAPITAL PSI + 'varSigma': u'\U0001d6f4', # 𝛴 MATHEMATICAL ITALIC CAPITAL SIGMA + 'varTheta': u'\U0001d6e9', # 𝛩 MATHEMATICAL ITALIC CAPITAL THETA + 'varUpsilon': u'\U0001d6f6', # 𝛶 MATHEMATICAL ITALIC CAPITAL UPSILON + 'varXi': u'\U0001d6ef', # 𝛯 MATHEMATICAL ITALIC CAPITAL XI + 'varepsilon': u'\u03b5', # ε GREEK SMALL LETTER EPSILON + 'varkappa': u'\U0001d718', # 𝜘 MATHEMATICAL ITALIC KAPPA SYMBOL + 'varphi': u'\u03c6', # φ GREEK SMALL LETTER PHI + 'varpi': u'\u03d6', # ϖ GREEK PI SYMBOL + 'varrho': u'\u03f1', # ϱ GREEK RHO SYMBOL + 'varsigma': u'\u03c2', # ς GREEK SMALL LETTER FINAL SIGMA + 'vartheta': u'\u03d1', # ϑ GREEK THETA SYMBOL + 'wp': u'\u2118', # ℘ SCRIPT CAPITAL P + 'xi': u'\u03be', # ξ GREEK SMALL LETTER XI + 'zeta': u'\u03b6', # ζ GREEK SMALL LETTER ZETA + } +mathbin = { + 'Cap': u'\u22d2', # ⋒ DOUBLE INTERSECTION + 'Circle': u'\u25cb', # ○ WHITE CIRCLE + 'Cup': u'\u22d3', # ⋓ DOUBLE UNION + 'LHD': u'\u25c0', # ◀ BLACK LEFT-POINTING TRIANGLE + 'RHD': u'\u25b6', # ▶ BLACK RIGHT-POINTING TRIANGLE + 'amalg': u'\u2a3f', # ⨿ AMALGAMATION OR COPRODUCT + 'ast': u'\u2217', # ∗ ASTERISK OPERATOR + 'barwedge': u'\u22bc', # ⊼ NAND + 'bigtriangledown': u'\u25bd', # ▽ WHITE DOWN-POINTING TRIANGLE + 'bigtriangleup': u'\u25b3', # △ WHITE UP-POINTING TRIANGLE + 'bindnasrepma': u'\u214b', # ⅋ TURNED AMPERSAND + 'blacklozenge': u'\u29eb', # ⧫ BLACK LOZENGE + 'blacktriangledown': u'\u25be', # ▾ BLACK DOWN-POINTING SMALL TRIANGLE + 'blacktriangleleft': u'\u25c2', # ◂ BLACK LEFT-POINTING SMALL TRIANGLE + 'blacktriangleright': u'\u25b8', # ▸ BLACK RIGHT-POINTING SMALL TRIANGLE + 'blacktriangleup': u'\u25b4', # ▴ BLACK UP-POINTING SMALL TRIANGLE + 'boxast': u'\u29c6', # ⧆ SQUARED ASTERISK + 'boxbar': u'\u25eb', # ◫ WHITE SQUARE WITH VERTICAL BISECTING LINE + 'boxbox': u'\u29c8', # ⧈ SQUARED SQUARE + 'boxbslash': u'\u29c5', # ⧅ SQUARED FALLING DIAGONAL SLASH + 'boxcircle': u'\u29c7', # ⧇ SQUARED SMALL CIRCLE + 'boxdot': u'\u22a1', # ⊡ SQUARED DOT OPERATOR + 'boxminus': u'\u229f', # ⊟ SQUARED MINUS + 'boxplus': u'\u229e', # ⊞ SQUARED PLUS + 'boxslash': u'\u29c4', # ⧄ SQUARED RISING DIAGONAL SLASH + 'boxtimes': u'\u22a0', # ⊠ SQUARED TIMES + 'bullet': u'\u2219', # ∙ BULLET OPERATOR + 'cap': u'\u2229', # ∩ INTERSECTION + 'cdot': u'\u22c5', # ⋅ DOT OPERATOR + 'circ': u'\u2218', # ∘ RING OPERATOR + 'circledast': u'\u229b', # ⊛ CIRCLED ASTERISK OPERATOR + 'circledcirc': u'\u229a', # ⊚ CIRCLED RING OPERATOR + 'circleddash': u'\u229d', # ⊝ CIRCLED DASH + 'cup': u'\u222a', # ∪ UNION + 'curlyvee': u'\u22ce', # ⋎ CURLY LOGICAL OR + 'curlywedge': u'\u22cf', # ⋏ CURLY LOGICAL AND + 'dagger': u'\u2020', # † DAGGER + 'ddagger': u'\u2021', # ‡ DOUBLE DAGGER + 'diamond': u'\u22c4', # ⋄ DIAMOND OPERATOR + 'div': u'\xf7', # ÷ DIVISION SIGN + 'divideontimes': u'\u22c7', # ⋇ DIVISION TIMES + 'dotplus': u'\u2214', # ∔ DOT PLUS + 'doublebarwedge': u'\u2a5e', # ⩞ LOGICAL AND WITH DOUBLE OVERBAR + 'intercal': u'\u22ba', # ⊺ INTERCALATE + 'interleave': u'\u2af4', # ⫴ TRIPLE VERTICAL BAR BINARY RELATION + 'land': u'\u2227', # ∧ LOGICAL AND + 'leftthreetimes': u'\u22cb', # ⋋ LEFT SEMIDIRECT PRODUCT + 'lhd': u'\u25c1', # ◁ WHITE LEFT-POINTING TRIANGLE + 'lor': u'\u2228', # ∨ LOGICAL OR + 'ltimes': u'\u22c9', # ⋉ LEFT NORMAL FACTOR SEMIDIRECT PRODUCT + 'mp': u'\u2213', # ∓ MINUS-OR-PLUS SIGN + 'odot': u'\u2299', # ⊙ CIRCLED DOT OPERATOR + 'ominus': u'\u2296', # ⊖ CIRCLED MINUS + 'oplus': u'\u2295', # ⊕ CIRCLED PLUS + 'oslash': u'\u2298', # ⊘ CIRCLED DIVISION SLASH + 'otimes': u'\u2297', # ⊗ CIRCLED TIMES + 'pm': u'\xb1', # ± PLUS-MINUS SIGN + 'rhd': u'\u25b7', # ▷ WHITE RIGHT-POINTING TRIANGLE + 'rightthreetimes': u'\u22cc', # ⋌ RIGHT SEMIDIRECT PRODUCT + 'rtimes': u'\u22ca', # ⋊ RIGHT NORMAL FACTOR SEMIDIRECT PRODUCT + 'setminus': u'\u29f5', # ⧵ REVERSE SOLIDUS OPERATOR + 'slash': u'\u2215', # ∕ DIVISION SLASH + 'smallsetminus': u'\u2216', # ∖ SET MINUS + 'smalltriangledown': u'\u25bf', # ▿ WHITE DOWN-POINTING SMALL TRIANGLE + 'smalltriangleleft': u'\u25c3', # ◃ WHITE LEFT-POINTING SMALL TRIANGLE + 'smalltriangleright': u'\u25b9', # ▹ WHITE RIGHT-POINTING SMALL TRIANGLE + 'smalltriangleup': u'\u25b5', # ▵ WHITE UP-POINTING SMALL TRIANGLE + 'sqcap': u'\u2293', # ⊓ SQUARE CAP + 'sqcup': u'\u2294', # ⊔ SQUARE CUP + 'sslash': u'\u2afd', # ⫽ DOUBLE SOLIDUS OPERATOR + 'star': u'\u22c6', # ⋆ STAR OPERATOR + 'talloblong': u'\u2afe', # ⫾ WHITE VERTICAL BAR + 'times': u'\xd7', # × MULTIPLICATION SIGN + 'triangle': u'\u25b3', # △ WHITE UP-POINTING TRIANGLE + 'triangledown': u'\u25bf', # ▿ WHITE DOWN-POINTING SMALL TRIANGLE + 'triangleleft': u'\u25c3', # ◃ WHITE LEFT-POINTING SMALL TRIANGLE + 'triangleright': u'\u25b9', # ▹ WHITE RIGHT-POINTING SMALL TRIANGLE + 'uplus': u'\u228e', # ⊎ MULTISET UNION + 'vartriangle': u'\u25b3', # △ WHITE UP-POINTING TRIANGLE + 'vee': u'\u2228', # ∨ LOGICAL OR + 'veebar': u'\u22bb', # ⊻ XOR + 'wedge': u'\u2227', # ∧ LOGICAL AND + 'wr': u'\u2240', # ≀ WREATH PRODUCT + } +mathclose = { + 'Rbag': u'\u27c6', # ⟆ RIGHT S-SHAPED BAG DELIMITER + 'lrcorner': u'\u231f', # ⌟ BOTTOM RIGHT CORNER + 'rangle': u'\u27e9', # ⟩ MATHEMATICAL RIGHT ANGLE BRACKET + 'rbag': u'\u27c6', # ⟆ RIGHT S-SHAPED BAG DELIMITER + 'rbrace': u'}', # } RIGHT CURLY BRACKET + 'rbrack': u']', # ] RIGHT SQUARE BRACKET + 'rceil': u'\u2309', # ⌉ RIGHT CEILING + 'rfloor': u'\u230b', # ⌋ RIGHT FLOOR + 'rgroup': u'\u27ef', # ⟯ MATHEMATICAL RIGHT FLATTENED PARENTHESIS + 'rrbracket': u'\u27e7', # ⟧ MATHEMATICAL RIGHT WHITE SQUARE BRACKET + 'rrparenthesis': u'\u2988', # ⦈ Z NOTATION RIGHT IMAGE BRACKET + 'urcorner': u'\u231d', # ⌝ TOP RIGHT CORNER + '}': u'}', # } RIGHT CURLY BRACKET + } +mathfence = { + 'Vert': u'\u2016', # ‖ DOUBLE VERTICAL LINE + 'vert': u'|', # | VERTICAL LINE + '|': u'\u2016', # ‖ DOUBLE VERTICAL LINE + } +mathop = { + 'Join': u'\u2a1d', # ⨝ JOIN + 'bigcap': u'\u22c2', # ⋂ N-ARY INTERSECTION + 'bigcup': u'\u22c3', # ⋃ N-ARY UNION + 'biginterleave': u'\u2afc', # ⫼ LARGE TRIPLE VERTICAL BAR OPERATOR + 'bigodot': u'\u2a00', # ⨀ N-ARY CIRCLED DOT OPERATOR + 'bigoplus': u'\u2a01', # ⨁ N-ARY CIRCLED PLUS OPERATOR + 'bigotimes': u'\u2a02', # ⨂ N-ARY CIRCLED TIMES OPERATOR + 'bigsqcup': u'\u2a06', # ⨆ N-ARY SQUARE UNION OPERATOR + 'biguplus': u'\u2a04', # ⨄ N-ARY UNION OPERATOR WITH PLUS + 'bigvee': u'\u22c1', # ⋁ N-ARY LOGICAL OR + 'bigwedge': u'\u22c0', # ⋀ N-ARY LOGICAL AND + 'coprod': u'\u2210', # ∐ N-ARY COPRODUCT + 'fatsemi': u'\u2a1f', # ⨟ Z NOTATION SCHEMA COMPOSITION + 'fint': u'\u2a0f', # ⨏ INTEGRAL AVERAGE WITH SLASH + 'iiiint': u'\u2a0c', # ⨌ QUADRUPLE INTEGRAL OPERATOR + 'iiint': u'\u222d', # ∭ TRIPLE INTEGRAL + 'iint': u'\u222c', # ∬ DOUBLE INTEGRAL + 'int': u'\u222b', # ∫ INTEGRAL + 'oiint': u'\u222f', # ∯ SURFACE INTEGRAL + 'oint': u'\u222e', # ∮ CONTOUR INTEGRAL + 'ointctrclockwise': u'\u2233', # ∳ ANTICLOCKWISE CONTOUR INTEGRAL + 'prod': u'\u220f', # ∏ N-ARY PRODUCT + 'sqint': u'\u2a16', # ⨖ QUATERNION INTEGRAL OPERATOR + 'sum': u'\u2211', # ∑ N-ARY SUMMATION + 'varointclockwise': u'\u2232', # ∲ CLOCKWISE CONTOUR INTEGRAL + } +mathopen = { + 'Lbag': u'\u27c5', # ⟅ LEFT S-SHAPED BAG DELIMITER + 'langle': u'\u27e8', # ⟨ MATHEMATICAL LEFT ANGLE BRACKET + 'lbag': u'\u27c5', # ⟅ LEFT S-SHAPED BAG DELIMITER + 'lbrace': u'{', # { LEFT CURLY BRACKET + 'lbrack': u'[', # [ LEFT SQUARE BRACKET + 'lceil': u'\u2308', # ⌈ LEFT CEILING + 'lfloor': u'\u230a', # ⌊ LEFT FLOOR + 'lgroup': u'\u27ee', # ⟮ MATHEMATICAL LEFT FLATTENED PARENTHESIS + 'llbracket': u'\u27e6', # ⟦ MATHEMATICAL LEFT WHITE SQUARE BRACKET + 'llcorner': u'\u231e', # ⌞ BOTTOM LEFT CORNER + 'llparenthesis': u'\u2987', # ⦇ Z NOTATION LEFT IMAGE BRACKET + 'ulcorner': u'\u231c', # ⌜ TOP LEFT CORNER + '{': u'{', # { LEFT CURLY BRACKET + } +mathord = { + '#': u'#', # # NUMBER SIGN + '$': u'$', # $ DOLLAR SIGN + '%': u'%', # % PERCENT SIGN + '&': u'&', # & AMPERSAND + 'AC': u'\u223f', # ∿ SINE WAVE + 'APLcomment': u'\u235d', # ⍝ APL FUNCTIONAL SYMBOL UP SHOE JOT + 'APLdownarrowbox': u'\u2357', # ⍗ APL FUNCTIONAL SYMBOL QUAD DOWNWARDS ARROW + 'APLinput': u'\u235e', # ⍞ APL FUNCTIONAL SYMBOL QUOTE QUAD + 'APLinv': u'\u2339', # ⌹ APL FUNCTIONAL SYMBOL QUAD DIVIDE + 'APLleftarrowbox': u'\u2347', # ⍇ APL FUNCTIONAL SYMBOL QUAD LEFTWARDS ARROW + 'APLlog': u'\u235f', # ⍟ APL FUNCTIONAL SYMBOL CIRCLE STAR + 'APLrightarrowbox': u'\u2348', # ⍈ APL FUNCTIONAL SYMBOL QUAD RIGHTWARDS ARROW + 'APLuparrowbox': u'\u2350', # ⍐ APL FUNCTIONAL SYMBOL QUAD UPWARDS ARROW + 'Aries': u'\u2648', # ♈ ARIES + 'CIRCLE': u'\u25cf', # ● BLACK CIRCLE + 'CheckedBox': u'\u2611', # ☑ BALLOT BOX WITH CHECK + 'Diamond': u'\u25c7', # ◇ WHITE DIAMOND + 'Finv': u'\u2132', # Ⅎ TURNED CAPITAL F + 'Game': u'\u2141', # ⅁ TURNED SANS-SERIF CAPITAL G + 'Gemini': u'\u264a', # ♊ GEMINI + 'Jupiter': u'\u2643', # ♃ JUPITER + 'LEFTCIRCLE': u'\u25d6', # ◖ LEFT HALF BLACK CIRCLE + 'LEFTcircle': u'\u25d0', # ◐ CIRCLE WITH LEFT HALF BLACK + 'Leo': u'\u264c', # ♌ LEO + 'Libra': u'\u264e', # ♎ LIBRA + 'Mars': u'\u2642', # ♂ MALE SIGN + 'Mercury': u'\u263f', # ☿ MERCURY + 'Neptune': u'\u2646', # ♆ NEPTUNE + 'Pluto': u'\u2647', # ♇ PLUTO + 'RIGHTCIRCLE': u'\u25d7', # ◗ RIGHT HALF BLACK CIRCLE + 'RIGHTcircle': u'\u25d1', # ◑ CIRCLE WITH RIGHT HALF BLACK + 'Saturn': u'\u2644', # ♄ SATURN + 'Scorpio': u'\u264f', # ♏ SCORPIUS + 'Square': u'\u2610', # ☐ BALLOT BOX + 'Sun': u'\u2609', # ☉ SUN + 'Taurus': u'\u2649', # ♉ TAURUS + 'Uranus': u'\u2645', # ♅ URANUS + 'Venus': u'\u2640', # ♀ FEMALE SIGN + 'XBox': u'\u2612', # ☒ BALLOT BOX WITH X + 'Yup': u'\u2144', # ⅄ TURNED SANS-SERIF CAPITAL Y + '_': u'_', # _ LOW LINE + 'angle': u'\u2220', # ∠ ANGLE + 'aquarius': u'\u2652', # ♒ AQUARIUS + 'aries': u'\u2648', # ♈ ARIES + 'ast': u'*', # * ASTERISK + 'backepsilon': u'\u03f6', # ϶ GREEK REVERSED LUNATE EPSILON SYMBOL + 'backprime': u'\u2035', # ‵ REVERSED PRIME + 'backslash': u'\\', # \ REVERSE SOLIDUS + 'because': u'\u2235', # ∵ BECAUSE + 'bigstar': u'\u2605', # ★ BLACK STAR + 'binampersand': u'&', # & AMPERSAND + 'blacklozenge': u'\u2b27', # ⬧ BLACK MEDIUM LOZENGE + 'blacksmiley': u'\u263b', # ☻ BLACK SMILING FACE + 'blacksquare': u'\u25fc', # ◼ BLACK MEDIUM SQUARE + 'bot': u'\u22a5', # ⊥ UP TACK + 'boy': u'\u2642', # ♂ MALE SIGN + 'cancer': u'\u264b', # ♋ CANCER + 'capricornus': u'\u2651', # ♑ CAPRICORN + 'cdots': u'\u22ef', # ⋯ MIDLINE HORIZONTAL ELLIPSIS + 'cent': u'\xa2', # ¢ CENT SIGN + 'centerdot': u'\u2b1d', # ⬝ BLACK VERY SMALL SQUARE + 'checkmark': u'\u2713', # ✓ CHECK MARK + 'circlearrowleft': u'\u21ba', # ↺ ANTICLOCKWISE OPEN CIRCLE ARROW + 'circlearrowright': u'\u21bb', # ↻ CLOCKWISE OPEN CIRCLE ARROW + 'circledR': u'\xae', # ® REGISTERED SIGN + 'circledcirc': u'\u25ce', # ◎ BULLSEYE + 'clubsuit': u'\u2663', # ♣ BLACK CLUB SUIT + 'complement': u'\u2201', # ∁ COMPLEMENT + 'dasharrow': u'\u21e2', # ⇢ RIGHTWARDS DASHED ARROW + 'dashleftarrow': u'\u21e0', # ⇠ LEFTWARDS DASHED ARROW + 'dashrightarrow': u'\u21e2', # ⇢ RIGHTWARDS DASHED ARROW + 'diameter': u'\u2300', # ⌀ DIAMETER SIGN + 'diamondsuit': u'\u2662', # ♢ WHITE DIAMOND SUIT + 'earth': u'\u2641', # ♁ EARTH + 'exists': u'\u2203', # ∃ THERE EXISTS + 'female': u'\u2640', # ♀ FEMALE SIGN + 'flat': u'\u266d', # ♭ MUSIC FLAT SIGN + 'forall': u'\u2200', # ∀ FOR ALL + 'fourth': u'\u2057', # ⁗ QUADRUPLE PRIME + 'frownie': u'\u2639', # ☹ WHITE FROWNING FACE + 'gemini': u'\u264a', # ♊ GEMINI + 'girl': u'\u2640', # ♀ FEMALE SIGN + 'heartsuit': u'\u2661', # ♡ WHITE HEART SUIT + 'infty': u'\u221e', # ∞ INFINITY + 'invneg': u'\u2310', # ⌐ REVERSED NOT SIGN + 'jupiter': u'\u2643', # ♃ JUPITER + 'ldots': u'\u2026', # … HORIZONTAL ELLIPSIS + 'leftmoon': u'\u263e', # ☾ LAST QUARTER MOON + 'leftturn': u'\u21ba', # ↺ ANTICLOCKWISE OPEN CIRCLE ARROW + 'leo': u'\u264c', # ♌ LEO + 'libra': u'\u264e', # ♎ LIBRA + 'lnot': u'\xac', # ¬ NOT SIGN + 'lozenge': u'\u25ca', # ◊ LOZENGE + 'male': u'\u2642', # ♂ MALE SIGN + 'maltese': u'\u2720', # ✠ MALTESE CROSS + 'mathdollar': u'$', # $ DOLLAR SIGN + 'measuredangle': u'\u2221', # ∡ MEASURED ANGLE + 'mercury': u'\u263f', # ☿ MERCURY + 'mho': u'\u2127', # ℧ INVERTED OHM SIGN + 'nabla': u'\u2207', # ∇ NABLA + 'natural': u'\u266e', # ♮ MUSIC NATURAL SIGN + 'neg': u'\xac', # ¬ NOT SIGN + 'neptune': u'\u2646', # ♆ NEPTUNE + 'nexists': u'\u2204', # ∄ THERE DOES NOT EXIST + 'notbackslash': u'\u2340', # ⍀ APL FUNCTIONAL SYMBOL BACKSLASH BAR + 'partial': u'\u2202', # ∂ PARTIAL DIFFERENTIAL + 'pisces': u'\u2653', # ♓ PISCES + 'pluto': u'\u2647', # ♇ PLUTO + 'pounds': u'\xa3', # £ POUND SIGN + 'prime': u'\u2032', # ′ PRIME + 'quarternote': u'\u2669', # ♩ QUARTER NOTE + 'rightmoon': u'\u263d', # ☽ FIRST QUARTER MOON + 'rightturn': u'\u21bb', # ↻ CLOCKWISE OPEN CIRCLE ARROW + 'sagittarius': u'\u2650', # ♐ SAGITTARIUS + 'saturn': u'\u2644', # ♄ SATURN + 'scorpio': u'\u264f', # ♏ SCORPIUS + 'second': u'\u2033', # ″ DOUBLE PRIME + 'sharp': u'\u266f', # ♯ MUSIC SHARP SIGN + 'sim': u'~', # ~ TILDE + 'slash': u'/', # / SOLIDUS + 'smiley': u'\u263a', # ☺ WHITE SMILING FACE + 'spadesuit': u'\u2660', # ♠ BLACK SPADE SUIT + 'spddot': u'\xa8', # ¨ DIAERESIS + 'sphat': u'^', # ^ CIRCUMFLEX ACCENT + 'sphericalangle': u'\u2222', # ∢ SPHERICAL ANGLE + 'sptilde': u'~', # ~ TILDE + 'square': u'\u25fb', # ◻ WHITE MEDIUM SQUARE + 'sun': u'\u263c', # ☼ WHITE SUN WITH RAYS + 'taurus': u'\u2649', # ♉ TAURUS + 'therefore': u'\u2234', # ∴ THEREFORE + 'third': u'\u2034', # ‴ TRIPLE PRIME + 'top': u'\u22a4', # ⊤ DOWN TACK + 'triangleleft': u'\u25c5', # ◅ WHITE LEFT-POINTING POINTER + 'triangleright': u'\u25bb', # ▻ WHITE RIGHT-POINTING POINTER + 'twonotes': u'\u266b', # ♫ BEAMED EIGHTH NOTES + 'uranus': u'\u2645', # ♅ URANUS + 'varEarth': u'\u2641', # ♁ EARTH + 'varnothing': u'\u2205', # ∅ EMPTY SET + 'virgo': u'\u264d', # ♍ VIRGO + 'wasylozenge': u'\u2311', # ⌑ SQUARE LOZENGE + 'wasytherefore': u'\u2234', # ∴ THEREFORE + 'yen': u'\xa5', # ¥ YEN SIGN + } +mathover = { + 'overbrace': u'\u23de', # ⏞ TOP CURLY BRACKET + 'wideparen': u'\u23dc', # ⏜ TOP PARENTHESIS + } +mathradical = { + 'sqrt': u'\u221a', # √ SQUARE ROOT + 'sqrt[3]': u'\u221b', # ∛ CUBE ROOT + 'sqrt[4]': u'\u221c', # ∜ FOURTH ROOT + } +mathrel = { + 'Bumpeq': u'\u224e', # ≎ GEOMETRICALLY EQUIVALENT TO + 'Doteq': u'\u2251', # ≑ GEOMETRICALLY EQUAL TO + 'Downarrow': u'\u21d3', # ⇓ DOWNWARDS DOUBLE ARROW + 'Leftarrow': u'\u21d0', # ⇐ LEFTWARDS DOUBLE ARROW + 'Leftrightarrow': u'\u21d4', # ⇔ LEFT RIGHT DOUBLE ARROW + 'Lleftarrow': u'\u21da', # ⇚ LEFTWARDS TRIPLE ARROW + 'Longleftarrow': u'\u27f8', # ⟸ LONG LEFTWARDS DOUBLE ARROW + 'Longleftrightarrow': u'\u27fa', # ⟺ LONG LEFT RIGHT DOUBLE ARROW + 'Longmapsfrom': u'\u27fd', # ⟽ LONG LEFTWARDS DOUBLE ARROW FROM BAR + 'Longmapsto': u'\u27fe', # ⟾ LONG RIGHTWARDS DOUBLE ARROW FROM BAR + 'Longrightarrow': u'\u27f9', # ⟹ LONG RIGHTWARDS DOUBLE ARROW + 'Lsh': u'\u21b0', # ↰ UPWARDS ARROW WITH TIP LEFTWARDS + 'Mapsfrom': u'\u2906', # ⤆ LEFTWARDS DOUBLE ARROW FROM BAR + 'Mapsto': u'\u2907', # ⤇ RIGHTWARDS DOUBLE ARROW FROM BAR + 'Rightarrow': u'\u21d2', # ⇒ RIGHTWARDS DOUBLE ARROW + 'Rrightarrow': u'\u21db', # ⇛ RIGHTWARDS TRIPLE ARROW + 'Rsh': u'\u21b1', # ↱ UPWARDS ARROW WITH TIP RIGHTWARDS + 'Subset': u'\u22d0', # ⋐ DOUBLE SUBSET + 'Supset': u'\u22d1', # ⋑ DOUBLE SUPERSET + 'Uparrow': u'\u21d1', # ⇑ UPWARDS DOUBLE ARROW + 'Updownarrow': u'\u21d5', # ⇕ UP DOWN DOUBLE ARROW + 'VDash': u'\u22ab', # ⊫ DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE + 'Vdash': u'\u22a9', # ⊩ FORCES + 'Vvdash': u'\u22aa', # ⊪ TRIPLE VERTICAL BAR RIGHT TURNSTILE + 'apprge': u'\u2273', # ≳ GREATER-THAN OR EQUIVALENT TO + 'apprle': u'\u2272', # ≲ LESS-THAN OR EQUIVALENT TO + 'approx': u'\u2248', # ≈ ALMOST EQUAL TO + 'approxeq': u'\u224a', # ≊ ALMOST EQUAL OR EQUAL TO + 'asymp': u'\u224d', # ≍ EQUIVALENT TO + 'backsim': u'\u223d', # ∽ REVERSED TILDE + 'backsimeq': u'\u22cd', # ⋍ REVERSED TILDE EQUALS + 'barin': u'\u22f6', # ⋶ ELEMENT OF WITH OVERBAR + 'barleftharpoon': u'\u296b', # ⥫ LEFTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH + 'barrightharpoon': u'\u296d', # ⥭ RIGHTWARDS HARPOON WITH BARB DOWN BELOW LONG DASH + 'between': u'\u226c', # ≬ BETWEEN + 'bowtie': u'\u22c8', # ⋈ BOWTIE + 'bumpeq': u'\u224f', # ≏ DIFFERENCE BETWEEN + 'circeq': u'\u2257', # ≗ RING EQUAL TO + 'coloneq': u'\u2254', # ≔ COLON EQUALS + 'cong': u'\u2245', # ≅ APPROXIMATELY EQUAL TO + 'corresponds': u'\u2259', # ≙ ESTIMATES + 'curlyeqprec': u'\u22de', # ⋞ EQUAL TO OR PRECEDES + 'curlyeqsucc': u'\u22df', # ⋟ EQUAL TO OR SUCCEEDS + 'curvearrowleft': u'\u21b6', # ↶ ANTICLOCKWISE TOP SEMICIRCLE ARROW + 'curvearrowright': u'\u21b7', # ↷ CLOCKWISE TOP SEMICIRCLE ARROW + 'dashv': u'\u22a3', # ⊣ LEFT TACK + 'ddots': u'\u22f1', # ⋱ DOWN RIGHT DIAGONAL ELLIPSIS + 'dlsh': u'\u21b2', # ↲ DOWNWARDS ARROW WITH TIP LEFTWARDS + 'doteq': u'\u2250', # ≐ APPROACHES THE LIMIT + 'doteqdot': u'\u2251', # ≑ GEOMETRICALLY EQUAL TO + 'downarrow': u'\u2193', # ↓ DOWNWARDS ARROW + 'downdownarrows': u'\u21ca', # ⇊ DOWNWARDS PAIRED ARROWS + 'downdownharpoons': u'\u2965', # ⥥ DOWNWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT + 'downharpoonleft': u'\u21c3', # ⇃ DOWNWARDS HARPOON WITH BARB LEFTWARDS + 'downharpoonright': u'\u21c2', # ⇂ DOWNWARDS HARPOON WITH BARB RIGHTWARDS + 'downuparrows': u'\u21f5', # ⇵ DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW + 'downupharpoons': u'\u296f', # ⥯ DOWNWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT + 'drsh': u'\u21b3', # ↳ DOWNWARDS ARROW WITH TIP RIGHTWARDS + 'eqcirc': u'\u2256', # ≖ RING IN EQUAL TO + 'eqcolon': u'\u2255', # ≕ EQUALS COLON + 'eqsim': u'\u2242', # ≂ MINUS TILDE + 'eqslantgtr': u'\u2a96', # ⪖ SLANTED EQUAL TO OR GREATER-THAN + 'eqslantless': u'\u2a95', # ⪕ SLANTED EQUAL TO OR LESS-THAN + 'equiv': u'\u2261', # ≡ IDENTICAL TO + 'fallingdotseq': u'\u2252', # ≒ APPROXIMATELY EQUAL TO OR THE IMAGE OF + 'frown': u'\u2322', # ⌢ FROWN + 'ge': u'\u2265', # ≥ GREATER-THAN OR EQUAL TO + 'geq': u'\u2265', # ≥ GREATER-THAN OR EQUAL TO + 'geqq': u'\u2267', # ≧ GREATER-THAN OVER EQUAL TO + 'geqslant': u'\u2a7e', # ⩾ GREATER-THAN OR SLANTED EQUAL TO + 'gets': u'\u2190', # ← LEFTWARDS ARROW + 'gg': u'\u226b', # ≫ MUCH GREATER-THAN + 'ggcurly': u'\u2abc', # ⪼ DOUBLE SUCCEEDS + 'ggg': u'\u22d9', # ⋙ VERY MUCH GREATER-THAN + 'gnapprox': u'\u2a8a', # ⪊ GREATER-THAN AND NOT APPROXIMATE + 'gneq': u'\u2a88', # ⪈ GREATER-THAN AND SINGLE-LINE NOT EQUAL TO + 'gneqq': u'\u2269', # ≩ GREATER-THAN BUT NOT EQUAL TO + 'gnsim': u'\u22e7', # ⋧ GREATER-THAN BUT NOT EQUIVALENT TO + 'gtrapprox': u'\u2a86', # ⪆ GREATER-THAN OR APPROXIMATE + 'gtrdot': u'\u22d7', # ⋗ GREATER-THAN WITH DOT + 'gtreqless': u'\u22db', # ⋛ GREATER-THAN EQUAL TO OR LESS-THAN + 'gtreqqless': u'\u2a8c', # ⪌ GREATER-THAN ABOVE DOUBLE-LINE EQUAL ABOVE LESS-THAN + 'gtrless': u'\u2277', # ≷ GREATER-THAN OR LESS-THAN + 'gtrsim': u'\u2273', # ≳ GREATER-THAN OR EQUIVALENT TO + 'hash': u'\u22d5', # ⋕ EQUAL AND PARALLEL TO + 'hookleftarrow': u'\u21a9', # ↩ LEFTWARDS ARROW WITH HOOK + 'hookrightarrow': u'\u21aa', # ↪ RIGHTWARDS ARROW WITH HOOK + 'iddots': u'\u22f0', # ⋰ UP RIGHT DIAGONAL ELLIPSIS + 'impliedby': u'\u27f8', # ⟸ LONG LEFTWARDS DOUBLE ARROW + 'implies': u'\u27f9', # ⟹ LONG RIGHTWARDS DOUBLE ARROW + 'in': u'\u2208', # ∈ ELEMENT OF + 'le': u'\u2264', # ≤ LESS-THAN OR EQUAL TO + 'leftarrow': u'\u2190', # ← LEFTWARDS ARROW + 'leftarrowtail': u'\u21a2', # ↢ LEFTWARDS ARROW WITH TAIL + 'leftarrowtriangle': u'\u21fd', # ⇽ LEFTWARDS OPEN-HEADED ARROW + 'leftbarharpoon': u'\u296a', # ⥪ LEFTWARDS HARPOON WITH BARB UP ABOVE LONG DASH + 'leftharpoondown': u'\u21bd', # ↽ LEFTWARDS HARPOON WITH BARB DOWNWARDS + 'leftharpoonup': u'\u21bc', # ↼ LEFTWARDS HARPOON WITH BARB UPWARDS + 'leftleftarrows': u'\u21c7', # ⇇ LEFTWARDS PAIRED ARROWS + 'leftleftharpoons': u'\u2962', # ⥢ LEFTWARDS HARPOON WITH BARB UP ABOVE LEFTWARDS HARPOON WITH BARB DOWN + 'leftrightarrow': u'\u2194', # ↔ LEFT RIGHT ARROW + 'leftrightarrows': u'\u21c6', # ⇆ LEFTWARDS ARROW OVER RIGHTWARDS ARROW + 'leftrightarrowtriangle': u'\u21ff', # ⇿ LEFT RIGHT OPEN-HEADED ARROW + 'leftrightharpoon': u'\u294a', # ⥊ LEFT BARB UP RIGHT BARB DOWN HARPOON + 'leftrightharpoons': u'\u21cb', # ⇋ LEFTWARDS HARPOON OVER RIGHTWARDS HARPOON + 'leftrightsquigarrow': u'\u21ad', # ↭ LEFT RIGHT WAVE ARROW + 'leftslice': u'\u2aa6', # ⪦ LESS-THAN CLOSED BY CURVE + 'leftsquigarrow': u'\u21dc', # ⇜ LEFTWARDS SQUIGGLE ARROW + 'leq': u'\u2264', # ≤ LESS-THAN OR EQUAL TO + 'leqq': u'\u2266', # ≦ LESS-THAN OVER EQUAL TO + 'leqslant': u'\u2a7d', # ⩽ LESS-THAN OR SLANTED EQUAL TO + 'lessapprox': u'\u2a85', # ⪅ LESS-THAN OR APPROXIMATE + 'lessdot': u'\u22d6', # ⋖ LESS-THAN WITH DOT + 'lesseqgtr': u'\u22da', # ⋚ LESS-THAN EQUAL TO OR GREATER-THAN + 'lesseqqgtr': u'\u2a8b', # ⪋ LESS-THAN ABOVE DOUBLE-LINE EQUAL ABOVE GREATER-THAN + 'lessgtr': u'\u2276', # ≶ LESS-THAN OR GREATER-THAN + 'lesssim': u'\u2272', # ≲ LESS-THAN OR EQUIVALENT TO + 'lightning': u'\u21af', # ↯ DOWNWARDS ZIGZAG ARROW + 'll': u'\u226a', # ≪ MUCH LESS-THAN + 'llcurly': u'\u2abb', # ⪻ DOUBLE PRECEDES + 'lll': u'\u22d8', # ⋘ VERY MUCH LESS-THAN + 'lnapprox': u'\u2a89', # ⪉ LESS-THAN AND NOT APPROXIMATE + 'lneq': u'\u2a87', # ⪇ LESS-THAN AND SINGLE-LINE NOT EQUAL TO + 'lneqq': u'\u2268', # ≨ LESS-THAN BUT NOT EQUAL TO + 'lnsim': u'\u22e6', # ⋦ LESS-THAN BUT NOT EQUIVALENT TO + 'longleftarrow': u'\u27f5', # ⟵ LONG LEFTWARDS ARROW + 'longleftrightarrow': u'\u27f7', # ⟷ LONG LEFT RIGHT ARROW + 'longmapsfrom': u'\u27fb', # ⟻ LONG LEFTWARDS ARROW FROM BAR + 'longmapsto': u'\u27fc', # ⟼ LONG RIGHTWARDS ARROW FROM BAR + 'longrightarrow': u'\u27f6', # ⟶ LONG RIGHTWARDS ARROW + 'looparrowleft': u'\u21ab', # ↫ LEFTWARDS ARROW WITH LOOP + 'looparrowright': u'\u21ac', # ↬ RIGHTWARDS ARROW WITH LOOP + 'mapsfrom': u'\u21a4', # ↤ LEFTWARDS ARROW FROM BAR + 'mapsto': u'\u21a6', # ↦ RIGHTWARDS ARROW FROM BAR + 'mid': u'\u2223', # ∣ DIVIDES + 'models': u'\u22a7', # ⊧ MODELS + 'multimap': u'\u22b8', # ⊸ MULTIMAP + 'nLeftarrow': u'\u21cd', # ⇍ LEFTWARDS DOUBLE ARROW WITH STROKE + 'nLeftrightarrow': u'\u21ce', # ⇎ LEFT RIGHT DOUBLE ARROW WITH STROKE + 'nRightarrow': u'\u21cf', # ⇏ RIGHTWARDS DOUBLE ARROW WITH STROKE + 'nVDash': u'\u22af', # ⊯ NEGATED DOUBLE VERTICAL BAR DOUBLE RIGHT TURNSTILE + 'nVdash': u'\u22ae', # ⊮ DOES NOT FORCE + 'ncong': u'\u2247', # ≇ NEITHER APPROXIMATELY NOR ACTUALLY EQUAL TO + 'ne': u'\u2260', # ≠ NOT EQUAL TO + 'nearrow': u'\u2197', # ↗ NORTH EAST ARROW + 'neq': u'\u2260', # ≠ NOT EQUAL TO + 'ngeq': u'\u2271', # ≱ NEITHER GREATER-THAN NOR EQUAL TO + 'ngtr': u'\u226f', # ≯ NOT GREATER-THAN + 'ni': u'\u220b', # ∋ CONTAINS AS MEMBER + 'nleftarrow': u'\u219a', # ↚ LEFTWARDS ARROW WITH STROKE + 'nleftrightarrow': u'\u21ae', # ↮ LEFT RIGHT ARROW WITH STROKE + 'nleq': u'\u2270', # ≰ NEITHER LESS-THAN NOR EQUAL TO + 'nless': u'\u226e', # ≮ NOT LESS-THAN + 'nmid': u'\u2224', # ∤ DOES NOT DIVIDE + 'notasymp': u'\u226d', # ≭ NOT EQUIVALENT TO + 'notin': u'\u2209', # ∉ NOT AN ELEMENT OF + 'notowner': u'\u220c', # ∌ DOES NOT CONTAIN AS MEMBER + 'notslash': u'\u233f', # ⌿ APL FUNCTIONAL SYMBOL SLASH BAR + 'nparallel': u'\u2226', # ∦ NOT PARALLEL TO + 'nprec': u'\u2280', # ⊀ DOES NOT PRECEDE + 'npreceq': u'\u22e0', # ⋠ DOES NOT PRECEDE OR EQUAL + 'nrightarrow': u'\u219b', # ↛ RIGHTWARDS ARROW WITH STROKE + 'nsim': u'\u2241', # ≁ NOT TILDE + 'nsubseteq': u'\u2288', # ⊈ NEITHER A SUBSET OF NOR EQUAL TO + 'nsucc': u'\u2281', # ⊁ DOES NOT SUCCEED + 'nsucceq': u'\u22e1', # ⋡ DOES NOT SUCCEED OR EQUAL + 'nsupseteq': u'\u2289', # ⊉ NEITHER A SUPERSET OF NOR EQUAL TO + 'ntriangleleft': u'\u22ea', # ⋪ NOT NORMAL SUBGROUP OF + 'ntrianglelefteq': u'\u22ec', # ⋬ NOT NORMAL SUBGROUP OF OR EQUAL TO + 'ntriangleright': u'\u22eb', # ⋫ DOES NOT CONTAIN AS NORMAL SUBGROUP + 'ntrianglerighteq': u'\u22ed', # ⋭ DOES NOT CONTAIN AS NORMAL SUBGROUP OR EQUAL + 'nvDash': u'\u22ad', # ⊭ NOT TRUE + 'nvdash': u'\u22ac', # ⊬ DOES NOT PROVE + 'nwarrow': u'\u2196', # ↖ NORTH WEST ARROW + 'owns': u'\u220b', # ∋ CONTAINS AS MEMBER + 'parallel': u'\u2225', # ∥ PARALLEL TO + 'perp': u'\u27c2', # ⟂ PERPENDICULAR + 'pitchfork': u'\u22d4', # ⋔ PITCHFORK + 'prec': u'\u227a', # ≺ PRECEDES + 'precapprox': u'\u2ab7', # ⪷ PRECEDES ABOVE ALMOST EQUAL TO + 'preccurlyeq': u'\u227c', # ≼ PRECEDES OR EQUAL TO + 'preceq': u'\u2aaf', # ⪯ PRECEDES ABOVE SINGLE-LINE EQUALS SIGN + 'precnapprox': u'\u2ab9', # ⪹ PRECEDES ABOVE NOT ALMOST EQUAL TO + 'precnsim': u'\u22e8', # ⋨ PRECEDES BUT NOT EQUIVALENT TO + 'precsim': u'\u227e', # ≾ PRECEDES OR EQUIVALENT TO + 'propto': u'\u221d', # ∝ PROPORTIONAL TO + 'restriction': u'\u21be', # ↾ UPWARDS HARPOON WITH BARB RIGHTWARDS + 'rightarrow': u'\u2192', # → RIGHTWARDS ARROW + 'rightarrowtail': u'\u21a3', # ↣ RIGHTWARDS ARROW WITH TAIL + 'rightarrowtriangle': u'\u21fe', # ⇾ RIGHTWARDS OPEN-HEADED ARROW + 'rightbarharpoon': u'\u296c', # ⥬ RIGHTWARDS HARPOON WITH BARB UP ABOVE LONG DASH + 'rightharpoondown': u'\u21c1', # ⇁ RIGHTWARDS HARPOON WITH BARB DOWNWARDS + 'rightharpoonup': u'\u21c0', # ⇀ RIGHTWARDS HARPOON WITH BARB UPWARDS + 'rightleftarrows': u'\u21c4', # ⇄ RIGHTWARDS ARROW OVER LEFTWARDS ARROW + 'rightleftharpoon': u'\u294b', # ⥋ LEFT BARB DOWN RIGHT BARB UP HARPOON + 'rightleftharpoons': u'\u21cc', # ⇌ RIGHTWARDS HARPOON OVER LEFTWARDS HARPOON + 'rightrightarrows': u'\u21c9', # ⇉ RIGHTWARDS PAIRED ARROWS + 'rightrightharpoons': u'\u2964', # ⥤ RIGHTWARDS HARPOON WITH BARB UP ABOVE RIGHTWARDS HARPOON WITH BARB DOWN + 'rightslice': u'\u2aa7', # ⪧ GREATER-THAN CLOSED BY CURVE + 'rightsquigarrow': u'\u21dd', # ⇝ RIGHTWARDS SQUIGGLE ARROW + 'risingdotseq': u'\u2253', # ≓ IMAGE OF OR APPROXIMATELY EQUAL TO + 'searrow': u'\u2198', # ↘ SOUTH EAST ARROW + 'sim': u'\u223c', # ∼ TILDE OPERATOR + 'simeq': u'\u2243', # ≃ ASYMPTOTICALLY EQUAL TO + 'smallfrown': u'\u2322', # ⌢ FROWN + 'smallsmile': u'\u2323', # ⌣ SMILE + 'smile': u'\u2323', # ⌣ SMILE + 'sqsubset': u'\u228f', # ⊏ SQUARE IMAGE OF + 'sqsubseteq': u'\u2291', # ⊑ SQUARE IMAGE OF OR EQUAL TO + 'sqsupset': u'\u2290', # ⊐ SQUARE ORIGINAL OF + 'sqsupseteq': u'\u2292', # ⊒ SQUARE ORIGINAL OF OR EQUAL TO + 'subset': u'\u2282', # ⊂ SUBSET OF + 'subseteq': u'\u2286', # ⊆ SUBSET OF OR EQUAL TO + 'subseteqq': u'\u2ac5', # ⫅ SUBSET OF ABOVE EQUALS SIGN + 'subsetneq': u'\u228a', # ⊊ SUBSET OF WITH NOT EQUAL TO + 'subsetneqq': u'\u2acb', # ⫋ SUBSET OF ABOVE NOT EQUAL TO + 'succ': u'\u227b', # ≻ SUCCEEDS + 'succapprox': u'\u2ab8', # ⪸ SUCCEEDS ABOVE ALMOST EQUAL TO + 'succcurlyeq': u'\u227d', # ≽ SUCCEEDS OR EQUAL TO + 'succeq': u'\u2ab0', # ⪰ SUCCEEDS ABOVE SINGLE-LINE EQUALS SIGN + 'succnapprox': u'\u2aba', # ⪺ SUCCEEDS ABOVE NOT ALMOST EQUAL TO + 'succnsim': u'\u22e9', # ⋩ SUCCEEDS BUT NOT EQUIVALENT TO + 'succsim': u'\u227f', # ≿ SUCCEEDS OR EQUIVALENT TO + 'supset': u'\u2283', # ⊃ SUPERSET OF + 'supseteq': u'\u2287', # ⊇ SUPERSET OF OR EQUAL TO + 'supseteqq': u'\u2ac6', # ⫆ SUPERSET OF ABOVE EQUALS SIGN + 'supsetneq': u'\u228b', # ⊋ SUPERSET OF WITH NOT EQUAL TO + 'supsetneqq': u'\u2acc', # ⫌ SUPERSET OF ABOVE NOT EQUAL TO + 'swarrow': u'\u2199', # ↙ SOUTH WEST ARROW + 'to': u'\u2192', # → RIGHTWARDS ARROW + 'trianglelefteq': u'\u22b4', # ⊴ NORMAL SUBGROUP OF OR EQUAL TO + 'triangleq': u'\u225c', # ≜ DELTA EQUAL TO + 'trianglerighteq': u'\u22b5', # ⊵ CONTAINS AS NORMAL SUBGROUP OR EQUAL TO + 'twoheadleftarrow': u'\u219e', # ↞ LEFTWARDS TWO HEADED ARROW + 'twoheadrightarrow': u'\u21a0', # ↠ RIGHTWARDS TWO HEADED ARROW + 'uparrow': u'\u2191', # ↑ UPWARDS ARROW + 'updownarrow': u'\u2195', # ↕ UP DOWN ARROW + 'updownarrows': u'\u21c5', # ⇅ UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW + 'updownharpoons': u'\u296e', # ⥮ UPWARDS HARPOON WITH BARB LEFT BESIDE DOWNWARDS HARPOON WITH BARB RIGHT + 'upharpoonleft': u'\u21bf', # ↿ UPWARDS HARPOON WITH BARB LEFTWARDS + 'upharpoonright': u'\u21be', # ↾ UPWARDS HARPOON WITH BARB RIGHTWARDS + 'upuparrows': u'\u21c8', # ⇈ UPWARDS PAIRED ARROWS + 'upupharpoons': u'\u2963', # ⥣ UPWARDS HARPOON WITH BARB LEFT BESIDE UPWARDS HARPOON WITH BARB RIGHT + 'vDash': u'\u22a8', # ⊨ TRUE + 'varpropto': u'\u221d', # ∝ PROPORTIONAL TO + 'vartriangleleft': u'\u22b2', # ⊲ NORMAL SUBGROUP OF + 'vartriangleright': u'\u22b3', # ⊳ CONTAINS AS NORMAL SUBGROUP + 'vdash': u'\u22a2', # ⊢ RIGHT TACK + 'vdots': u'\u22ee', # ⋮ VERTICAL ELLIPSIS + } +mathunder = { + 'underbrace': u'\u23df', # ⏟ BOTTOM CURLY BRACKET + } +space = { + ':': u'\u205f', # MEDIUM MATHEMATICAL SPACE + 'medspace': u'\u205f', # MEDIUM MATHEMATICAL SPACE + 'quad': u'\u2001', # EM QUAD + } Added: trunk/docutils/src/main/resources/docutils/docutils/utils/math/unichar2tex.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/math/unichar2tex.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/math/unichar2tex.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,788 @@ +# LaTeX math to Unicode symbols translation table +# for use with the translate() method of unicode objects. +# Generated with ``write_unichar2tex.py`` from the data in +# http://milde.users.sourceforge.net/LUCR/Math/ + +# Includes commands from: standard LaTeX, amssymb, amsmath + +uni2tex_table = { +160: u'~', +163: u'\\pounds ', +165: u'\\yen ', +172: u'\\neg ', +174: u'\\circledR ', +177: u'\\pm ', +215: u'\\times ', +240: u'\\eth ', +247: u'\\div ', +305: u'\\imath ', +567: u'\\jmath ', +915: u'\\Gamma ', +916: u'\\Delta ', +920: u'\\Theta ', +923: u'\\Lambda ', +926: u'\\Xi ', +928: u'\\Pi ', +931: u'\\Sigma ', +933: u'\\Upsilon ', +934: u'\\Phi ', +936: u'\\Psi ', +937: u'\\Omega ', +945: u'\\alpha ', +946: u'\\beta ', +947: u'\\gamma ', +948: u'\\delta ', +949: u'\\varepsilon ', +950: u'\\zeta ', +951: u'\\eta ', +952: u'\\theta ', +953: u'\\iota ', +954: u'\\kappa ', +955: u'\\lambda ', +956: u'\\mu ', +957: u'\\nu ', +958: u'\\xi ', +960: u'\\pi ', +961: u'\\rho ', +962: u'\\varsigma ', +963: u'\\sigma ', +964: u'\\tau ', +965: u'\\upsilon ', +966: u'\\varphi ', +967: u'\\chi ', +968: u'\\psi ', +969: u'\\omega ', +977: u'\\vartheta ', +981: u'\\phi ', +982: u'\\varpi ', +989: u'\\digamma ', +1014: u'\\backepsilon ', +8193: u'\\quad ', +8214: u'\\| ', +8224: u'\\dagger ', +8225: u'\\ddagger ', +8230: u'\\ldots ', +8242: u'\\prime ', +8245: u'\\backprime ', +8287: u'\\: ', +8450: u'\\mathbb{C}', +8459: u'\\mathcal{H}', +8460: u'\\mathfrak{H}', +8461: u'\\mathbb{H}', +8463: u'\\hslash ', +8464: u'\\mathcal{I}', +8465: u'\\Im ', +8466: u'\\mathcal{L}', +8467: u'\\ell ', +8469: u'\\mathbb{N}', +8472: u'\\wp ', +8473: u'\\mathbb{P}', +8474: u'\\mathbb{Q}', +8475: u'\\mathcal{R}', +8476: u'\\Re ', +8477: u'\\mathbb{R}', +8484: u'\\mathbb{Z}', +8487: u'\\mho ', +8488: u'\\mathfrak{Z}', +8492: u'\\mathcal{B}', +8493: u'\\mathfrak{C}', +8496: u'\\mathcal{E}', +8497: u'\\mathcal{F}', +8498: u'\\Finv ', +8499: u'\\mathcal{M}', +8501: u'\\aleph ', +8502: u'\\beth ', +8503: u'\\gimel ', +8504: u'\\daleth ', +8592: u'\\leftarrow ', +8593: u'\\uparrow ', +8594: u'\\rightarrow ', +8595: u'\\downarrow ', +8596: u'\\leftrightarrow ', +8597: u'\\updownarrow ', +8598: u'\\nwarrow ', +8599: u'\\nearrow ', +8600: u'\\searrow ', +8601: u'\\swarrow ', +8602: u'\\nleftarrow ', +8603: u'\\nrightarrow ', +8606: u'\\twoheadleftarrow ', +8608: u'\\twoheadrightarrow ', +8610: u'\\leftarrowtail ', +8611: u'\\rightarrowtail ', +8614: u'\\mapsto ', +8617: u'\\hookleftarrow ', +8618: u'\\hookrightarrow ', +8619: u'\\looparrowleft ', +8620: u'\\looparrowright ', +8621: u'\\leftrightsquigarrow ', +8622: u'\\nleftrightarrow ', +8624: u'\\Lsh ', +8625: u'\\Rsh ', +8630: u'\\curvearrowleft ', +8631: u'\\curvearrowright ', +8634: u'\\circlearrowleft ', +8635: u'\\circlearrowright ', +8636: u'\\leftharpoonup ', +8637: u'\\leftharpoondown ', +8638: u'\\upharpoonright ', +8639: u'\\upharpoonleft ', +8640: u'\\rightharpoonup ', +8641: u'\\rightharpoondown ', +8642: u'\\downharpoonright ', +8643: u'\\downharpoonleft ', +8644: u'\\rightleftarrows ', +8646: u'\\leftrightarrows ', +8647: u'\\leftleftarrows ', +8648: u'\\upuparrows ', +8649: u'\\rightrightarrows ', +8650: u'\\downdownarrows ', +8651: u'\\leftrightharpoons ', +8652: u'\\rightleftharpoons ', +8653: u'\\nLeftarrow ', +8654: u'\\nLeftrightarrow ', +8655: u'\\nRightarrow ', +8656: u'\\Leftarrow ', +8657: u'\\Uparrow ', +8658: u'\\Rightarrow ', +8659: u'\\Downarrow ', +8660: u'\\Leftrightarrow ', +8661: u'\\Updownarrow ', +8666: u'\\Lleftarrow ', +8667: u'\\Rrightarrow ', +8669: u'\\rightsquigarrow ', +8672: u'\\dashleftarrow ', +8674: u'\\dashrightarrow ', +8704: u'\\forall ', +8705: u'\\complement ', +8706: u'\\partial ', +8707: u'\\exists ', +8708: u'\\nexists ', +8709: u'\\varnothing ', +8711: u'\\nabla ', +8712: u'\\in ', +8713: u'\\notin ', +8715: u'\\ni ', +8719: u'\\prod ', +8720: u'\\coprod ', +8721: u'\\sum ', +8722: u'-', +8723: u'\\mp ', +8724: u'\\dotplus ', +8725: u'\\slash ', +8726: u'\\smallsetminus ', +8727: u'\\ast ', +8728: u'\\circ ', +8729: u'\\bullet ', +8730: u'\\sqrt ', +8731: u'\\sqrt[3] ', +8732: u'\\sqrt[4] ', +8733: u'\\propto ', +8734: u'\\infty ', +8736: u'\\angle ', +8737: u'\\measuredangle ', +8738: u'\\sphericalangle ', +8739: u'\\mid ', +8740: u'\\nmid ', +8741: u'\\parallel ', +8742: u'\\nparallel ', +8743: u'\\wedge ', +8744: u'\\vee ', +8745: u'\\cap ', +8746: u'\\cup ', +8747: u'\\int ', +8748: u'\\iint ', +8749: u'\\iiint ', +8750: u'\\oint ', +8756: u'\\therefore ', +8757: u'\\because ', +8758: u':', +8764: u'\\sim ', +8765: u'\\backsim ', +8768: u'\\wr ', +8769: u'\\nsim ', +8770: u'\\eqsim ', +8771: u'\\simeq ', +8773: u'\\cong ', +8775: u'\\ncong ', +8776: u'\\approx ', +8778: u'\\approxeq ', +8781: u'\\asymp ', +8782: u'\\Bumpeq ', +8783: u'\\bumpeq ', +8784: u'\\doteq ', +8785: u'\\Doteq ', +8786: u'\\fallingdotseq ', +8787: u'\\risingdotseq ', +8790: u'\\eqcirc ', +8791: u'\\circeq ', +8796: u'\\triangleq ', +8800: u'\\neq ', +8801: u'\\equiv ', +8804: u'\\leq ', +8805: u'\\geq ', +8806: u'\\leqq ', +8807: u'\\geqq ', +8808: u'\\lneqq ', +8809: u'\\gneqq ', +8810: u'\\ll ', +8811: u'\\gg ', +8812: u'\\between ', +8814: u'\\nless ', +8815: u'\\ngtr ', +8816: u'\\nleq ', +8817: u'\\ngeq ', +8818: u'\\lesssim ', +8819: u'\\gtrsim ', +8822: u'\\lessgtr ', +8823: u'\\gtrless ', +8826: u'\\prec ', +8827: u'\\succ ', +8828: u'\\preccurlyeq ', +8829: u'\\succcurlyeq ', +8830: u'\\precsim ', +8831: u'\\succsim ', +8832: u'\\nprec ', +8833: u'\\nsucc ', +8834: u'\\subset ', +8835: u'\\supset ', +8838: u'\\subseteq ', +8839: u'\\supseteq ', +8840: u'\\nsubseteq ', +8841: u'\\nsupseteq ', +8842: u'\\subsetneq ', +8843: u'\\supsetneq ', +8846: u'\\uplus ', +8847: u'\\sqsubset ', +8848: u'\\sqsupset ', +8849: u'\\sqsubseteq ', +8850: u'\\sqsupseteq ', +8851: u'\\sqcap ', +8852: u'\\sqcup ', +8853: u'\\oplus ', +8854: u'\\ominus ', +8855: u'\\otimes ', +8856: u'\\oslash ', +8857: u'\\odot ', +8858: u'\\circledcirc ', +8859: u'\\circledast ', +8861: u'\\circleddash ', +8862: u'\\boxplus ', +8863: u'\\boxminus ', +8864: u'\\boxtimes ', +8865: u'\\boxdot ', +8866: u'\\vdash ', +8867: u'\\dashv ', +8868: u'\\top ', +8869: u'\\bot ', +8871: u'\\models ', +8872: u'\\vDash ', +8873: u'\\Vdash ', +8874: u'\\Vvdash ', +8876: u'\\nvdash ', +8877: u'\\nvDash ', +8878: u'\\nVdash ', +8879: u'\\nVDash ', +8882: u'\\vartriangleleft ', +8883: u'\\vartriangleright ', +8884: u'\\trianglelefteq ', +8885: u'\\trianglerighteq ', +8888: u'\\multimap ', +8890: u'\\intercal ', +8891: u'\\veebar ', +8892: u'\\barwedge ', +8896: u'\\bigwedge ', +8897: u'\\bigvee ', +8898: u'\\bigcap ', +8899: u'\\bigcup ', +8900: u'\\diamond ', +8901: u'\\cdot ', +8902: u'\\star ', +8903: u'\\divideontimes ', +8904: u'\\bowtie ', +8905: u'\\ltimes ', +8906: u'\\rtimes ', +8907: u'\\leftthreetimes ', +8908: u'\\rightthreetimes ', +8909: u'\\backsimeq ', +8910: u'\\curlyvee ', +8911: u'\\curlywedge ', +8912: u'\\Subset ', +8913: u'\\Supset ', +8914: u'\\Cap ', +8915: u'\\Cup ', +8916: u'\\pitchfork ', +8918: u'\\lessdot ', +8919: u'\\gtrdot ', +8920: u'\\lll ', +8921: u'\\ggg ', +8922: u'\\lesseqgtr ', +8923: u'\\gtreqless ', +8926: u'\\curlyeqprec ', +8927: u'\\curlyeqsucc ', +8928: u'\\npreceq ', +8929: u'\\nsucceq ', +8934: u'\\lnsim ', +8935: u'\\gnsim ', +8936: u'\\precnsim ', +8937: u'\\succnsim ', +8938: u'\\ntriangleleft ', +8939: u'\\ntriangleright ', +8940: u'\\ntrianglelefteq ', +8941: u'\\ntrianglerighteq ', +8942: u'\\vdots ', +8943: u'\\cdots ', +8945: u'\\ddots ', +8968: u'\\lceil ', +8969: u'\\rceil ', +8970: u'\\lfloor ', +8971: u'\\rfloor ', +8988: u'\\ulcorner ', +8989: u'\\urcorner ', +8990: u'\\llcorner ', +8991: u'\\lrcorner ', +8994: u'\\frown ', +8995: u'\\smile ', +9182: u'\\overbrace ', +9183: u'\\underbrace ', +9651: u'\\bigtriangleup ', +9655: u'\\rhd ', +9661: u'\\bigtriangledown ', +9665: u'\\lhd ', +9671: u'\\Diamond ', +9674: u'\\lozenge ', +9723: u'\\square ', +9724: u'\\blacksquare ', +9733: u'\\bigstar ', +9824: u'\\spadesuit ', +9825: u'\\heartsuit ', +9826: u'\\diamondsuit ', +9827: u'\\clubsuit ', +9837: u'\\flat ', +9838: u'\\natural ', +9839: u'\\sharp ', +10003: u'\\checkmark ', +10016: u'\\maltese ', +10178: u'\\perp ', +10216: u'\\langle ', +10217: u'\\rangle ', +10222: u'\\lgroup ', +10223: u'\\rgroup ', +10229: u'\\longleftarrow ', +10230: u'\\longrightarrow ', +10231: u'\\longleftrightarrow ', +10232: u'\\Longleftarrow ', +10233: u'\\Longrightarrow ', +10234: u'\\Longleftrightarrow ', +10236: u'\\longmapsto ', +10731: u'\\blacklozenge ', +10741: u'\\setminus ', +10752: u'\\bigodot ', +10753: u'\\bigoplus ', +10754: u'\\bigotimes ', +10756: u'\\biguplus ', +10758: u'\\bigsqcup ', +10764: u'\\iiiint ', +10781: u'\\Join ', +10815: u'\\amalg ', +10846: u'\\doublebarwedge ', +10877: u'\\leqslant ', +10878: u'\\geqslant ', +10885: u'\\lessapprox ', +10886: u'\\gtrapprox ', +10887: u'\\lneq ', +10888: u'\\gneq ', +10889: u'\\lnapprox ', +10890: u'\\gnapprox ', +10891: u'\\lesseqqgtr ', +10892: u'\\gtreqqless ', +10901: u'\\eqslantless ', +10902: u'\\eqslantgtr ', +10927: u'\\preceq ', +10928: u'\\succeq ', +10935: u'\\precapprox ', +10936: u'\\succapprox ', +10937: u'\\precnapprox ', +10938: u'\\succnapprox ', +10949: u'\\subseteqq ', +10950: u'\\supseteqq ', +10955: u'\\subsetneqq ', +10956: u'\\supsetneqq ', +119808: u'\\mathbf{A}', +119809: u'\\mathbf{B}', +119810: u'\\mathbf{C}', +119811: u'\\mathbf{D}', +119812: u'\\mathbf{E}', +119813: u'\\mathbf{F}', +119814: u'\\mathbf{G}', +119815: u'\\mathbf{H}', +119816: u'\\mathbf{I}', +119817: u'\\mathbf{J}', +119818: u'\\mathbf{K}', +119819: u'\\mathbf{L}', +119820: u'\\mathbf{M}', +119821: u'\\mathbf{N}', +119822: u'\\mathbf{O}', +119823: u'\\mathbf{P}', +119824: u'\\mathbf{Q}', +119825: u'\\mathbf{R}', +119826: u'\\mathbf{S}', +119827: u'\\mathbf{T}', +119828: u'\\mathbf{U}', +119829: u'\\mathbf{V}', +119830: u'\\mathbf{W}', +119831: u'\\mathbf{X}', +119832: u'\\mathbf{Y}', +119833: u'\\mathbf{Z}', +119834: u'\\mathbf{a}', +119835: u'\\mathbf{b}', +119836: u'\\mathbf{c}', +119837: u'\\mathbf{d}', +119838: u'\\mathbf{e}', +119839: u'\\mathbf{f}', +119840: u'\\mathbf{g}', +119841: u'\\mathbf{h}', +119842: u'\\mathbf{i}', +119843: u'\\mathbf{j}', +119844: u'\\mathbf{k}', +119845: u'\\mathbf{l}', +119846: u'\\mathbf{m}', +119847: u'\\mathbf{n}', +119848: u'\\mathbf{o}', +119849: u'\\mathbf{p}', +119850: u'\\mathbf{q}', +119851: u'\\mathbf{r}', +119852: u'\\mathbf{s}', +119853: u'\\mathbf{t}', +119854: u'\\mathbf{u}', +119855: u'\\mathbf{v}', +119856: u'\\mathbf{w}', +119857: u'\\mathbf{x}', +119858: u'\\mathbf{y}', +119859: u'\\mathbf{z}', +119860: u'A', +119861: u'B', +119862: u'C', +119863: u'D', +119864: u'E', +119865: u'F', +119866: u'G', +119867: u'H', +119868: u'I', +119869: u'J', +119870: u'K', +119871: u'L', +119872: u'M', +119873: u'N', +119874: u'O', +119875: u'P', +119876: u'Q', +119877: u'R', +119878: u'S', +119879: u'T', +119880: u'U', +119881: u'V', +119882: u'W', +119883: u'X', +119884: u'Y', +119885: u'Z', +119886: u'a', +119887: u'b', +119888: u'c', +119889: u'd', +119890: u'e', +119891: u'f', +119892: u'g', +119894: u'i', +119895: u'j', +119896: u'k', +119897: u'l', +119898: u'm', +119899: u'n', +119900: u'o', +119901: u'p', +119902: u'q', +119903: u'r', +119904: u's', +119905: u't', +119906: u'u', +119907: u'v', +119908: u'w', +119909: u'x', +119910: u'y', +119911: u'z', +119964: u'\\mathcal{A}', +119966: u'\\mathcal{C}', +119967: u'\\mathcal{D}', +119970: u'\\mathcal{G}', +119973: u'\\mathcal{J}', +119974: u'\\mathcal{K}', +119977: u'\\mathcal{N}', +119978: u'\\mathcal{O}', +119979: u'\\mathcal{P}', +119980: u'\\mathcal{Q}', +119982: u'\\mathcal{S}', +119983: u'\\mathcal{T}', +119984: u'\\mathcal{U}', +119985: u'\\mathcal{V}', +119986: u'\\mathcal{W}', +119987: u'\\mathcal{X}', +119988: u'\\mathcal{Y}', +119989: u'\\mathcal{Z}', +120068: u'\\mathfrak{A}', +120069: u'\\mathfrak{B}', +120071: u'\\mathfrak{D}', +120072: u'\\mathfrak{E}', +120073: u'\\mathfrak{F}', +120074: u'\\mathfrak{G}', +120077: u'\\mathfrak{J}', +120078: u'\\mathfrak{K}', +120079: u'\\mathfrak{L}', +120080: u'\\mathfrak{M}', +120081: u'\\mathfrak{N}', +120082: u'\\mathfrak{O}', +120083: u'\\mathfrak{P}', +120084: u'\\mathfrak{Q}', +120086: u'\\mathfrak{S}', +120087: u'\\mathfrak{T}', +120088: u'\\mathfrak{U}', +120089: u'\\mathfrak{V}', +120090: u'\\mathfrak{W}', +120091: u'\\mathfrak{X}', +120092: u'\\mathfrak{Y}', +120094: u'\\mathfrak{a}', +120095: u'\\mathfrak{b}', +120096: u'\\mathfrak{c}', +120097: u'\\mathfrak{d}', +120098: u'\\mathfrak{e}', +120099: u'\\mathfrak{f}', +120100: u'\\mathfrak{g}', +120101: u'\\mathfrak{h}', +120102: u'\\mathfrak{i}', +120103: u'\\mathfrak{j}', +120104: u'\\mathfrak{k}', +120105: u'\\mathfrak{l}', +120106: u'\\mathfrak{m}', +120107: u'\\mathfrak{n}', +120108: u'\\mathfrak{o}', +120109: u'\\mathfrak{p}', +120110: u'\\mathfrak{q}', +120111: u'\\mathfrak{r}', +120112: u'\\mathfrak{s}', +120113: u'\\mathfrak{t}', +120114: u'\\mathfrak{u}', +120115: u'\\mathfrak{v}', +120116: u'\\mathfrak{w}', +120117: u'\\mathfrak{x}', +120118: u'\\mathfrak{y}', +120119: u'\\mathfrak{z}', +120120: u'\\mathbb{A}', +120121: u'\\mathbb{B}', +120123: u'\\mathbb{D}', +120124: u'\\mathbb{E}', +120125: u'\\mathbb{F}', +120126: u'\\mathbb{G}', +120128: u'\\mathbb{I}', +120129: u'\\mathbb{J}', +120130: u'\\mathbb{K}', +120131: u'\\mathbb{L}', +120132: u'\\mathbb{M}', +120134: u'\\mathbb{O}', +120138: u'\\mathbb{S}', +120139: u'\\mathbb{T}', +120140: u'\\mathbb{U}', +120141: u'\\mathbb{V}', +120142: u'\\mathbb{W}', +120143: u'\\mathbb{X}', +120144: u'\\mathbb{Y}', +120156: u'\\Bbbk ', +120224: u'\\mathsf{A}', +120225: u'\\mathsf{B}', +120226: u'\\mathsf{C}', +120227: u'\\mathsf{D}', +120228: u'\\mathsf{E}', +120229: u'\\mathsf{F}', +120230: u'\\mathsf{G}', +120231: u'\\mathsf{H}', +120232: u'\\mathsf{I}', +120233: u'\\mathsf{J}', +120234: u'\\mathsf{K}', +120235: u'\\mathsf{L}', +120236: u'\\mathsf{M}', +120237: u'\\mathsf{N}', +120238: u'\\mathsf{O}', +120239: u'\\mathsf{P}', +120240: u'\\mathsf{Q}', +120241: u'\\mathsf{R}', +120242: u'\\mathsf{S}', +120243: u'\\mathsf{T}', +120244: u'\\mathsf{U}', +120245: u'\\mathsf{V}', +120246: u'\\mathsf{W}', +120247: u'\\mathsf{X}', +120248: u'\\mathsf{Y}', +120249: u'\\mathsf{Z}', +120250: u'\\mathsf{a}', +120251: u'\\mathsf{b}', +120252: u'\\mathsf{c}', +120253: u'\\mathsf{d}', +120254: u'\\mathsf{e}', +120255: u'\\mathsf{f}', +120256: u'\\mathsf{g}', +120257: u'\\mathsf{h}', +120258: u'\\mathsf{i}', +120259: u'\\mathsf{j}', +120260: u'\\mathsf{k}', +120261: u'\\mathsf{l}', +120262: u'\\mathsf{m}', +120263: u'\\mathsf{n}', +120264: u'\\mathsf{o}', +120265: u'\\mathsf{p}', +120266: u'\\mathsf{q}', +120267: u'\\mathsf{r}', +120268: u'\\mathsf{s}', +120269: u'\\mathsf{t}', +120270: u'\\mathsf{u}', +120271: u'\\mathsf{v}', +120272: u'\\mathsf{w}', +120273: u'\\mathsf{x}', +120274: u'\\mathsf{y}', +120275: u'\\mathsf{z}', +120432: u'\\mathtt{A}', +120433: u'\\mathtt{B}', +120434: u'\\mathtt{C}', +120435: u'\\mathtt{D}', +120436: u'\\mathtt{E}', +120437: u'\\mathtt{F}', +120438: u'\\mathtt{G}', +120439: u'\\mathtt{H}', +120440: u'\\mathtt{I}', +120441: u'\\mathtt{J}', +120442: u'\\mathtt{K}', +120443: u'\\mathtt{L}', +120444: u'\\mathtt{M}', +120445: u'\\mathtt{N}', +120446: u'\\mathtt{O}', +120447: u'\\mathtt{P}', +120448: u'\\mathtt{Q}', +120449: u'\\mathtt{R}', +120450: u'\\mathtt{S}', +120451: u'\\mathtt{T}', +120452: u'\\mathtt{U}', +120453: u'\\mathtt{V}', +120454: u'\\mathtt{W}', +120455: u'\\mathtt{X}', +120456: u'\\mathtt{Y}', +120457: u'\\mathtt{Z}', +120458: u'\\mathtt{a}', +120459: u'\\mathtt{b}', +120460: u'\\mathtt{c}', +120461: u'\\mathtt{d}', +120462: u'\\mathtt{e}', +120463: u'\\mathtt{f}', +120464: u'\\mathtt{g}', +120465: u'\\mathtt{h}', +120466: u'\\mathtt{i}', +120467: u'\\mathtt{j}', +120468: u'\\mathtt{k}', +120469: u'\\mathtt{l}', +120470: u'\\mathtt{m}', +120471: u'\\mathtt{n}', +120472: u'\\mathtt{o}', +120473: u'\\mathtt{p}', +120474: u'\\mathtt{q}', +120475: u'\\mathtt{r}', +120476: u'\\mathtt{s}', +120477: u'\\mathtt{t}', +120478: u'\\mathtt{u}', +120479: u'\\mathtt{v}', +120480: u'\\mathtt{w}', +120481: u'\\mathtt{x}', +120482: u'\\mathtt{y}', +120483: u'\\mathtt{z}', +120484: u'\\imath ', +120485: u'\\jmath ', +120490: u'\\mathbf{\\Gamma}', +120491: u'\\mathbf{\\Delta}', +120495: u'\\mathbf{\\Theta}', +120498: u'\\mathbf{\\Lambda}', +120501: u'\\mathbf{\\Xi}', +120503: u'\\mathbf{\\Pi}', +120506: u'\\mathbf{\\Sigma}', +120508: u'\\mathbf{\\Upsilon}', +120509: u'\\mathbf{\\Phi}', +120511: u'\\mathbf{\\Psi}', +120512: u'\\mathbf{\\Omega}', +120548: u'\\mathit{\\Gamma}', +120549: u'\\mathit{\\Delta}', +120553: u'\\mathit{\\Theta}', +120556: u'\\mathit{\\Lambda}', +120559: u'\\mathit{\\Xi}', +120561: u'\\mathit{\\Pi}', +120564: u'\\mathit{\\Sigma}', +120566: u'\\mathit{\\Upsilon}', +120567: u'\\mathit{\\Phi}', +120569: u'\\mathit{\\Psi}', +120570: u'\\mathit{\\Omega}', +120572: u'\\alpha ', +120573: u'\\beta ', +120574: u'\\gamma ', +120575: u'\\delta ', +120576: u'\\varepsilon ', +120577: u'\\zeta ', +120578: u'\\eta ', +120579: u'\\theta ', +120580: u'\\iota ', +120581: u'\\kappa ', +120582: u'\\lambda ', +120583: u'\\mu ', +120584: u'\\nu ', +120585: u'\\xi ', +120587: u'\\pi ', +120588: u'\\rho ', +120589: u'\\varsigma ', +120590: u'\\sigma ', +120591: u'\\tau ', +120592: u'\\upsilon ', +120593: u'\\varphi ', +120594: u'\\chi ', +120595: u'\\psi ', +120596: u'\\omega ', +120597: u'\\partial ', +120598: u'\\epsilon ', +120599: u'\\vartheta ', +120600: u'\\varkappa ', +120601: u'\\phi ', +120602: u'\\varrho ', +120603: u'\\varpi ', +120782: u'\\mathbf{0}', +120783: u'\\mathbf{1}', +120784: u'\\mathbf{2}', +120785: u'\\mathbf{3}', +120786: u'\\mathbf{4}', +120787: u'\\mathbf{5}', +120788: u'\\mathbf{6}', +120789: u'\\mathbf{7}', +120790: u'\\mathbf{8}', +120791: u'\\mathbf{9}', +120802: u'\\mathsf{0}', +120803: u'\\mathsf{1}', +120804: u'\\mathsf{2}', +120805: u'\\mathsf{3}', +120806: u'\\mathsf{4}', +120807: u'\\mathsf{5}', +120808: u'\\mathsf{6}', +120809: u'\\mathsf{7}', +120810: u'\\mathsf{8}', +120811: u'\\mathsf{9}', +120822: u'\\mathtt{0}', +120823: u'\\mathtt{1}', +120824: u'\\mathtt{2}', +120825: u'\\mathtt{3}', +120826: u'\\mathtt{4}', +120827: u'\\mathtt{5}', +120828: u'\\mathtt{6}', +120829: u'\\mathtt{7}', +120830: u'\\mathtt{8}', +120831: u'\\mathtt{9}', +} Modified: trunk/docutils/src/main/resources/docutils/docutils/utils/punctuation_chars.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/punctuation_chars.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/punctuation_chars.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ #!/usr/bin/env python -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # :Copyright: © 2011 Günter Milde. # :License: Released under the terms of the `2-Clause BSD license`_, in short: # @@ -10,7 +10,7 @@ # # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause -# :Id: $Id: punctuation_chars.py 7401 2012-05-01 09:50:02Z grubert $ +# :Id: $Id: punctuation_chars.py 7668 2013-06-04 12:46:30Z milde $ import sys, re import unicodedata @@ -23,183 +23,311 @@ # (states.py) # # .. _inline markup recognition rules: -# ../../../docs/ref/rst/restructuredtext.html#inline-markup +# ../../docs/ref/rst/restructuredtext.html#inline-markup # Docutils punctuation category sample strings # -------------------------------------------- # # The sample strings are generated by punctuation_samples() and put here -# literal to avoid the time-consuming generation with every Docutils -# run. Running this file as a standalone module checks the definitions below -# against a re-calculation. +# literal to avoid the time-consuming generation with every Docutils run. +# As the samples are used inside ``[ ]`` in regular expressions, hyphen and +# square brackets are escaped. :: -openers = ur"""\"\'\(\<\[\{༺༼᚛⁅⁽₍〈❨❪❬❮❰❲❴⟅⟦⟨⟪⟬⟮⦃⦅⦇⦉⦋⦍⦏⦑⦓⦕⦗⧘⧚⧼⸢⸤⸦⸨〈《「『【〔〖〘〚〝〝﴾︗︵︷︹︻︽︿﹁﹃﹇﹙﹛﹝([{⦅「«‘“‹⸂⸄⸉⸌⸜⸠‚„»’”›⸃⸅⸊⸍⸝⸡‛‟""" -closers = ur"""\"\'\)\>\]\}༻༽᚜⁆⁾₎〉❩❫❭❯❱❳❵⟆⟧⟩⟫⟭⟯⦄⦆⦈⦊⦌⦎⦐⦒⦔⦖⦘⧙⧛⧽⸣⸥⸧⸩〉》」』】〕〗〙〛〞〟﴿︘︶︸︺︼︾﹀﹂﹄﹈﹚﹜﹞)]}⦆」»’”›⸃⸅⸊⸍⸝⸡‛‟«‘“‹⸂⸄⸉⸌⸜⸠‚„""" -delimiters = ur"\-\/\:֊־᐀᠆‐‑‒–—―⸗⸚〜〰゠︱︲﹘﹣-¡·¿;·՚՛՜՝՞՟։׀׃׆׳״؉؊،؍؛؞؟٪٫٬٭۔܀܁܂܃܄܅܆܇܈܉܊܋܌܍߷߸߹࠰࠱࠲࠳࠴࠵࠶࠷࠸࠹࠺࠻࠼࠽࠾।॥॰෴๏๚๛༄༅༆༇༈༉༊་༌།༎༏༐༑༒྅࿐࿑࿒࿓࿔၊။၌၍၎၏჻፡።፣፤፥፦፧፨᙭᙮᛫᛬᛭᜵᜶។៕៖៘៙៚᠀᠁᠂᠃᠄᠅᠇᠈᠉᠊᥄᥅᧞᧟᨞᨟᪠᪡᪢᪣᪤᪥᪦᪨᪩᪪᪫᪬᪭᭚᭛᭜᭝᭞᭟᭠᰻᰼᰽᰾᰿᱾᱿᳓‖‗†‡•‣․‥…‧‰‱′″‴‵‶‷‸※‼‽‾⁁⁂⁃⁇⁈⁉⁊⁋⁌⁍⁎⁏⁐⁑⁓⁕⁖⁗⁘⁙⁚⁛⁜⁝⁞⳹⳺⳻⳼⳾⳿⸀⸁⸆⸇⸈⸋⸎⸏⸐⸑⸒⸓⸔⸕⸖⸘⸙⸛⸞⸟⸪⸫⸬⸭⸮⸰⸱、。〃〽・꓾꓿꘍꘎꘏꙳꙾꛲꛳꛴꛵꛶꛷꡴꡵꡶꡷꣎꣏꣸꣹꣺꤮꤯꥟꧁꧂꧃꧄꧅꧆꧇꧈꧉꧊꧋꧌꧍꧞꧟꩜꩝꩞꩟꫞꫟꯫︐︑︒︓︔︕︖︙︰﹅﹆﹉﹊﹋﹌﹐﹑﹒﹔﹕﹖﹗﹟﹠﹡﹨﹪﹫!"#%&'*,./:;?@\。、・𐄀𐄁𐎟𐏐𐡗𐤟𐤿𐩐𐩑𐩒𐩓𐩔𐩕𐩖𐩗𐩘𐩿𐬹𐬺𐬻𐬼𐬽𐬾𐬿𑂻𑂼𑂾𑂿𑃀𑃁𒑰𒑱𒑲𒑳" -closing_delimiters = ur"\.\,\;\!\?" +openers = (u'"\'(<\\[{\u0f3a\u0f3c\u169b\u2045\u207d\u208d\u2329\u2768' + u'\u276a\u276c\u276e\u2770\u2772\u2774\u27c5\u27e6\u27e8\u27ea' + u'\u27ec\u27ee\u2983\u2985\u2987\u2989\u298b\u298d\u298f\u2991' + u'\u2993\u2995\u2997\u29d8\u29da\u29fc\u2e22\u2e24\u2e26\u2e28' + u'\u3008\u300a\u300c\u300e\u3010\u3014\u3016\u3018\u301a\u301d' + u'\u301d\ufd3e\ufe17\ufe35\ufe37\ufe39\ufe3b\ufe3d\ufe3f\ufe41' + u'\ufe43\ufe47\ufe59\ufe5b\ufe5d\uff08\uff3b\uff5b\uff5f\uff62' + u'\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c\u2e1c\u2e20' + u'\u201a\u201e\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d' + u'\u2e1d\u2e21\u201b\u201f') +closers = (u'"\')>\\]}\u0f3b\u0f3d\u169c\u2046\u207e\u208e\u232a\u2769' + u'\u276b\u276d\u276f\u2771\u2773\u2775\u27c6\u27e7\u27e9\u27eb' + u'\u27ed\u27ef\u2984\u2986\u2988\u298a\u298c\u298e\u2990\u2992' + u'\u2994\u2996\u2998\u29d9\u29db\u29fd\u2e23\u2e25\u2e27\u2e29' + u'\u3009\u300b\u300d\u300f\u3011\u3015\u3017\u3019\u301b\u301e' + u'\u301f\ufd3f\ufe18\ufe36\ufe38\ufe3a\ufe3c\ufe3e\ufe40\ufe42' + u'\ufe44\ufe48\ufe5a\ufe5c\ufe5e\uff09\uff3d\uff5d\uff60\uff63' + u'\xbb\u2019\u201d\u203a\u2e03\u2e05\u2e0a\u2e0d\u2e1d\u2e21' + u'\u201b\u201f\xab\u2018\u201c\u2039\u2e02\u2e04\u2e09\u2e0c' + u'\u2e1c\u2e20\u201a\u201e') +delimiters = (u'\\-/:\u058a\xa1\xb7\xbf\u037e\u0387\u055a-\u055f\u0589' + u'\u05be\u05c0\u05c3\u05c6\u05f3\u05f4\u0609\u060a\u060c' + u'\u060d\u061b\u061e\u061f\u066a-\u066d\u06d4\u0700-\u070d' + u'\u07f7-\u07f9\u0830-\u083e\u0964\u0965\u0970\u0df4\u0e4f' + u'\u0e5a\u0e5b\u0f04-\u0f12\u0f85\u0fd0-\u0fd4\u104a-\u104f' + u'\u10fb\u1361-\u1368\u1400\u166d\u166e\u16eb-\u16ed\u1735' + u'\u1736\u17d4-\u17d6\u17d8-\u17da\u1800-\u180a\u1944\u1945' + u'\u19de\u19df\u1a1e\u1a1f\u1aa0-\u1aa6\u1aa8-\u1aad\u1b5a-' + u'\u1b60\u1c3b-\u1c3f\u1c7e\u1c7f\u1cd3\u2010-\u2017\u2020-' + u'\u2027\u2030-\u2038\u203b-\u203e\u2041-\u2043\u2047-' + u'\u2051\u2053\u2055-\u205e\u2cf9-\u2cfc\u2cfe\u2cff\u2e00' + u'\u2e01\u2e06-\u2e08\u2e0b\u2e0e-\u2e1b\u2e1e\u2e1f\u2e2a-' + u'\u2e2e\u2e30\u2e31\u3001-\u3003\u301c\u3030\u303d\u30a0' + u'\u30fb\ua4fe\ua4ff\ua60d-\ua60f\ua673\ua67e\ua6f2-\ua6f7' + u'\ua874-\ua877\ua8ce\ua8cf\ua8f8-\ua8fa\ua92e\ua92f\ua95f' + u'\ua9c1-\ua9cd\ua9de\ua9df\uaa5c-\uaa5f\uaade\uaadf\uabeb' + u'\ufe10-\ufe16\ufe19\ufe30-\ufe32\ufe45\ufe46\ufe49-\ufe4c' + u'\ufe50-\ufe52\ufe54-\ufe58\ufe5f-\ufe61\ufe63\ufe68\ufe6a' + u'\ufe6b\uff01-\uff03\uff05-\uff07\uff0a\uff0c-\uff0f\uff1a' + u'\uff1b\uff1f\uff20\uff3c\uff61\uff64\uff65') +if sys.maxunicode >= 0x10FFFF: # "wide" build + delimiters += (u'\U00010100\U00010101\U0001039f\U000103d0\U00010857' + u'\U0001091f\U0001093f\U00010a50-\U00010a58\U00010a7f' + u'\U00010b39-\U00010b3f\U000110bb\U000110bc\U000110be-' + u'\U000110c1\U00012470-\U00012473') +closing_delimiters = u'\\\\.,;!?' +# Matching open/close quotes +# -------------------------- + +# Rule (5) requires determination of matching open/close pairs. However, +# the pairing of open/close quotes is ambigue due to different typographic +# conventions in different languages. + +quote_pairs = {u'\xbb': u'\xbb', # Swedish + u'\u2018': u'\u201a', # Greek + u'\u2019': u'\u2019', # Swedish + u'\u201a': u'\u2018\u2019', # German, Polish + u'\u201c': u'\u201e', # German + u'\u201e': u'\u201c\u201d', + u'\u201d': u'\u201d', # Swedish + u'\u203a': u'\u203a', # Swedish + } + +def match_chars(c1, c2): + try: + i = openers.index(c1) + except ValueError: # c1 not in openers + return False + return c2 == closers[i] or c2 in quote_pairs.get(c1, '') + + +# Running this file as a standalone module checks the definitions against a +# re-calculation:: + +if __name__ == '__main__': + + # Unicode punctuation character categories # ---------------------------------------- -unicode_punctuation_categories = { - # 'Pc': 'Connector', # not used in Docutils inline markup recognition - 'Pd': 'Dash', - 'Ps': 'Open', - 'Pe': 'Close', - 'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage - 'Pf': 'Final quote', # may behave like Ps or Pe depending on usage - 'Po': 'Other' - } -"""Unicode character categories for punctuation""" + unicode_punctuation_categories = { + # 'Pc': 'Connector', # not used in Docutils inline markup recognition + 'Pd': 'Dash', + 'Ps': 'Open', + 'Pe': 'Close', + 'Pi': 'Initial quote', # may behave like Ps or Pe depending on usage + 'Pf': 'Final quote', # may behave like Ps or Pe depending on usage + 'Po': 'Other' + } + """Unicode character categories for punctuation""" # generate character pattern strings # ================================== -def unicode_charlists(categories, cp_min=0, cp_max=None): - """Return dictionary of Unicode character lists. + def unicode_charlists(categories, cp_min=0, cp_max=None): + """Return dictionary of Unicode character lists. - For each of the `catagories`, an item contains a list with all Unicode - characters with `cp_min` <= code-point <= `cp_max` that belong to the - category. (The default values check every code-point supported by Python.) - """ - # Determine highest code point with one of the given categories - # (may shorten the search time considerably if there are many - # categories with not too high characters): - if cp_max is None: - # python 2.3: list comprehension instead of generator required - cp_max = max([x for x in xrange(sys.maxunicode + 1) - if unicodedata.category(unichr(x)) in categories]) - # print cp_max # => 74867 for unicode_punctuation_categories - charlists = {} - for cat in categories: - charlists[cat] = [unichr(x) for x in xrange(cp_min, cp_max+1) - if unicodedata.category(unichr(x)) == cat] - return charlists + For each of the `catagories`, an item contains a list with all Unicode + characters with `cp_min` <= code-point <= `cp_max` that belong to + the category. + The default values check every code-point supported by Python + (`sys.maxint` is 0x10FFFF in a "wide" build and 0xFFFF in a "narrow" + build, i.e. ucs4 and ucs2 respectively). + """ + # Determine highest code point with one of the given categories + # (may shorten the search time considerably if there are many + # categories with not too high characters): + if cp_max is None: + cp_max = max(x for x in xrange(sys.maxunicode+1) + if unicodedata.category(unichr(x)) in categories) + # print cp_max # => 74867 for unicode_punctuation_categories + charlists = {} + for cat in categories: + charlists[cat] = [unichr(x) for x in xrange(cp_min, cp_max+1) + if unicodedata.category(unichr(x)) == cat] + return charlists + # Character categories in Docutils # -------------------------------- -def punctuation_samples(): + def punctuation_samples(): - """Docutils punctuation category sample strings. + """Docutils punctuation category sample strings. - Return list of sample strings for the categories "Open", "Close", - "Delimiters" and "Closing-Delimiters" used in the `inline markup - recognition rules`_. - """ + Return list of sample strings for the categories "Open", "Close", + "Delimiters" and "Closing-Delimiters" used in the `inline markup + recognition rules`_. + """ - # Lists with characters in Unicode punctuation character categories - cp_min = 160 # ASCII chars have special rules for backwards compatibility - ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min) + # Lists with characters in Unicode punctuation character categories + cp_min = 160 # ASCII chars have special rules for backwards compatibility + ucharlists = unicode_charlists(unicode_punctuation_categories, cp_min) - # match opening/closing characters - # -------------------------------- - # Rearange the lists to ensure matching characters at the same - # index position. + # match opening/closing characters + # -------------------------------- + # Rearange the lists to ensure matching characters at the same + # index position. - # low quotation marks are also used as closers (e.g. in Greek) - # move them to category Pi: - ucharlists['Ps'].remove(u'‚') # 201A SINGLE LOW-9 QUOTATION MARK - ucharlists['Ps'].remove(u'„') # 201E DOUBLE LOW-9 QUOTATION MARK - ucharlists['Pi'] += [u'‚', u'„'] + # low quotation marks are also used as closers (e.g. in Greek) + # move them to category Pi: + ucharlists['Ps'].remove(u'‚') # 201A SINGLE LOW-9 QUOTATION MARK + ucharlists['Ps'].remove(u'„') # 201E DOUBLE LOW-9 QUOTATION MARK + ucharlists['Pi'] += [u'‚', u'„'] - ucharlists['Pi'].remove(u'‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK - ucharlists['Pi'].remove(u'‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK - ucharlists['Pf'] += [u'‛', u'‟'] + ucharlists['Pi'].remove(u'‛') # 201B SINGLE HIGH-REVERSED-9 QUOTATION MARK + ucharlists['Pi'].remove(u'‟') # 201F DOUBLE HIGH-REVERSED-9 QUOTATION MARK + ucharlists['Pf'] += [u'‛', u'‟'] - # 301F LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant: - ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d') + # 301F LOW DOUBLE PRIME QUOTATION MARK misses the opening pendant: + ucharlists['Ps'].insert(ucharlists['Pe'].index(u'\u301f'), u'\u301d') - # print u''.join(ucharlists['Ps']).encode('utf8') - # print u''.join(ucharlists['Pe']).encode('utf8') - # print u''.join(ucharlists['Pi']).encode('utf8') - # print u''.join(ucharlists['Pf']).encode('utf8') + # print u''.join(ucharlists['Ps']).encode('utf8') + # print u''.join(ucharlists['Pe']).encode('utf8') + # print u''.join(ucharlists['Pi']).encode('utf8') + # print u''.join(ucharlists['Pf']).encode('utf8') - # The Docutils character categories - # --------------------------------- - # - # The categorization of ASCII chars is non-standard to reduce both - # false positives and need for escaping. (see `inline markup recognition - # rules`_) + # The Docutils character categories + # --------------------------------- + # + # The categorization of ASCII chars is non-standard to reduce + # both false positives and need for escaping. (see `inline markup + # recognition rules`_) - # matching, allowed before markup - openers = [re.escape('"\'(<[{')] - for cat in ('Ps', 'Pi', 'Pf'): - openers.extend(ucharlists[cat]) + # allowed before markup if there is a matching closer + openers = [u'"\'(<\\[{'] + for cat in ('Ps', 'Pi', 'Pf'): + openers.extend(ucharlists[cat]) - # matching, allowed after markup - closers = [re.escape('"\')>]}')] - for cat in ('Pe', 'Pf', 'Pi'): - closers.extend(ucharlists[cat]) + # allowed after markup if there is a matching opener + closers = [u'"\')>\\]}'] + for cat in ('Pe', 'Pf', 'Pi'): + closers.extend(ucharlists[cat]) - # non-matching, allowed on both sides - delimiters = [re.escape('-/:')] - for cat in ('Pd', 'Po'): - delimiters.extend(ucharlists[cat]) + # non-matching, allowed on both sides + delimiters = [u'\\-/:'] + for cat in ('Pd', 'Po'): + delimiters.extend(ucharlists[cat]) - # non-matching, after markup - closing_delimiters = [re.escape('.,;!?')] + # non-matching, after markup + closing_delimiters = [r'\\.,;!?'] - # # Test open/close matching: - # for i in range(min(len(openers),len(closers))): - # print '%4d %s %s' % (i, openers[i].encode('utf8'), - # closers[i].encode('utf8')) + # # Test open/close matching: + # for i in range(min(len(openers),len(closers))): + # print '%4d %s %s' % (i, openers[i].encode('utf8'), + # closers[i].encode('utf8')) - return [u''.join(chars) - for chars in (openers, closers, delimiters, closing_delimiters)] + return [u''.join(chars) for chars in (openers, closers, delimiters, + closing_delimiters)] + def separate_wide_chars(s): + """Return (s1,s2) with characters above 0xFFFF in s2""" + maxunicode_narrow = 0xFFFF + l1 = [ch for ch in s if ord(ch) <= maxunicode_narrow] + l2 = [ch for ch in s if ord(ch) > maxunicode_narrow] + return ''.join(l1), ''.join(l2) -# Matching open/close quotes -# -------------------------- + def mark_intervals(s): + """Return s with shortcut notation for runs of consecutive characters -# Rule (5) requires determination of matching open/close pairs. However, -# the pairing of open/close quotes is ambigue due to different typographic -# conventions in different languages. + Sort string and replace 'cdef' by 'c-f' and similar. + """ + l =[] + s = [ord(ch) for ch in s] + s.sort() + for n in s: + try: + if l[-1][-1]+1 == n: + l[-1].append(n) + else: + l.append([n]) + except IndexError: + l.append([n]) -quote_pairs = {u'\xbb': u'\xbb', # Swedish - u'\u2018': u'\u201a', # Greek - u'\u2019': u'\u2019', # Swedish - u'\u201a': u'\u2018\u2019', # German, Polish - u'\u201c': u'\u201e', # German - u'\u201e': u'\u201c\u201d', - u'\u201d': u'\u201d', # Swedish - u'\u203a': u'\u203a', # Swedish - } + l2 = [] + for i in l: + i = [unichr(n) for n in i] + if len(i) > 2: + i = i[0], u'-', i[-1] + l2.extend(i) -def match_chars(c1, c2): - try: - i = openers.index(c1) - except ValueError: # c1 not in openers - return False - return c2 == closers[i] or c2 in quote_pairs.get(c1, '') + return ''.join(l2) + def wrap_string(s, startstring= "(", + endstring = ")", wrap=65): + """Line-wrap a unicode string literal definition.""" + c = len(startstring) + contstring = "'\n" + ' ' * len(startstring) + "u'" + l = [startstring] + for ch in s: + c += 1 + if ch == '\\' and c > wrap: + c = len(startstring) + ch = contstring + ch + l.append(ch) + l.append(endstring) + return ''.join(l) - # print results # ============= -if __name__ == '__main__': +# (re) create and compare the samples: - # (re) create and compare the samples: (o, c, d, cd) = punctuation_samples() + o, o_wide = separate_wide_chars(o) + c, c_wide = separate_wide_chars(c) + d, d_wide = separate_wide_chars(d) + d = d[:5] + mark_intervals(d[5:]) + d_wide = mark_intervals(d_wide) + if sys.maxunicode >= 0x10FFFF: # "wide" build + d += d_wide if o != openers: print '- openers = ur"""%s"""' % openers.encode('utf8') print '+ openers = ur"""%s"""' % o.encode('utf8') + if o_wide: + print '+ openers-wide = ur"""%s"""' % o_wide.encode('utf8') if c != closers: print '- closers = ur"""%s"""' % closers.encode('utf8') print '+ closers = ur"""%s"""' % c.encode('utf8') + if c_wide: + print '+ closers-wide = ur"""%s"""' % c_wide.encode('utf8') if d != delimiters: print '- delimiters = ur"%s"' % delimiters.encode('utf8') print '+ delimiters = ur"%s"' % d.encode('utf8') if cd != closing_delimiters: print '- closing_delimiters = ur"%s"' % closing_delimiters.encode('utf8') print '+ closing_delimiters = ur"%s"' % cd.encode('utf8') + # closing_delimiters are all ASCII characters - # # test prints +# Print literal code to define the character sets: + + # `openers` and `closers` must be verbose and keep order because they are + # also used in `match_chars()`. + print wrap_string(repr(o), startstring='openers = (') + print wrap_string(repr(c), startstring='closers = (') + # delimiters: sort and use shortcut for intervals (saves ~150 characters): + print wrap_string(repr(d), startstring='delimiters = (') + # add characters in the upper plane only in a "wide" build: + print 'if sys.maxunicode >= 0x10FFFF: # "wide" build' + print wrap_string(repr(d_wide), startstring=' delimiters += (') + print 'closing_delimiters =', repr(cd) + +# test prints + + # print "wide" Unicode characters: + # ucharlists = unicode_charlists(unicode_punctuation_categories) + # for key in ucharlists: + # if key.endswith('wide'): + # print key, ucharlists[key] + # print 'openers = ', repr(openers) # print 'closers = ', repr(closers) # print 'delimiters = ', repr(delimiters) @@ -210,3 +338,18 @@ # # print cat, chars # # compact output (visible with a comprehensive font): # print (u":%s: %s" % (cat, u''.join(chars))).encode('utf8') + +# verbose print + + # print 'openers:' + # for ch in openers: + # print ch.encode('utf8'), unicodedata.name(ch) + # print 'closers:' + # for ch in closers: + # print ch.encode('utf8'), unicodedata.name(ch) + # print 'delimiters:' + # for ch in delimiters: + # print ch.encode('utf8'), unicodedata.name(ch) + # print 'closing_delimiters:' + # for ch in closing_delimiters: + # print ch.encode('utf8'), unicodedata.name(ch) Added: trunk/docutils/src/main/resources/docutils/docutils/utils/smartquotes.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/smartquotes.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/smartquotes.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,906 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# :Id: $Id: smartquotes.py 7716 2013-08-21 21:54:57Z milde $ +# :Copyright: © 2010 Günter Milde, +# original `SmartyPants`_: © 2003 John Gruber +# smartypants.py: © 2004, 2007 Chad Miller +# :Maintainer: docutils-develop@lists.sourceforge.net +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notices and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + + +r""" +======================== +SmartyPants for Docutils +======================== + +Synopsis +======== + +Smart-quotes for Docutils. + +The original "SmartyPants" is a free web publishing plug-in for Movable Type, +Blosxom, and BBEdit that easily translates plain ASCII punctuation characters +into "smart" typographic punctuation characters. + +`smartypants.py`, endeavours to be a functional port of +SmartyPants to Python, for use with Pyblosxom_. + +`smartquotes.py` is an adaption of Smartypants to Docutils_. By using Unicode +characters instead of HTML entities for typographic quotes, it works for any +output format that supports Unicode. + +Authors +======= + +`John Gruber`_ did all of the hard work of writing this software in Perl for +`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_ +ported it to Python to use with Pyblosxom_. +Adapted to Docutils_ by Günter Milde + +Additional Credits +================== + +Portions of the SmartyPants original work are based on Brad Choate's nifty +MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to +this plug-in. Brad Choate is a fine hacker indeed. + +`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta +testing of the original SmartyPants. + +`Rael Dornfest`_ ported SmartyPants to Blosxom. + +.. _Brad Choate: http://bradchoate.com/ +.. _Jeremy Hedley: http://antipixel.com/ +.. _Charles Wiltgen: http://playbacktime.com/ +.. _Rael Dornfest: http://raelity.org/ + + +Copyright and License +===================== + +SmartyPants_ license (3-Clause BSD license): + + Copyright (c) 2003 John Gruber (http://daringfireball.net/) + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + * Neither the name "SmartyPants" nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + This software is provided by the copyright holders and contributors + "as is" and any express or implied warranties, including, but not + limited to, the implied warranties of merchantability and fitness for + a particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + +smartypants.py license (2-Clause BSD license): + + smartypants.py is a derivative work of SmartyPants. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + + This software is provided by the copyright holders and contributors + "as is" and any express or implied warranties, including, but not + limited to, the implied warranties of merchantability and fitness for + a particular purpose are disclaimed. In no event shall the copyright + owner or contributors be liable for any direct, indirect, incidental, + special, exemplary, or consequential damages (including, but not + limited to, procurement of substitute goods or services; loss of use, + data, or profits; or business interruption) however caused and on any + theory of liability, whether in contract, strict liability, or tort + (including negligence or otherwise) arising in any way out of the use + of this software, even if advised of the possibility of such damage. + +.. _John Gruber: http://daringfireball.net/ +.. _Chad Miller: http://web.chad.org/ + +.. _Pyblosxom: http://pyblosxom.bluesock.org/ +.. _SmartyPants: http://daringfireball.net/projects/smartypants/ +.. _Movable Type: http://www.movabletype.org/ +.. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause +.. _Docutils: http://docutils.sf.net/ + +Description +=========== + +SmartyPants can perform the following transformations: + +- Straight quotes ( " and ' ) into "curly" quote characters +- Backticks-style quotes (\`\`like this'') into "curly" quote characters +- Dashes (``--`` and ``---``) into en- and em-dash entities +- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity + +This means you can write, edit, and save your posts using plain old +ASCII straight quotes, plain dashes, and plain dots, but your published +posts (and final HTML output) will appear with smart quotes, em-dashes, +and proper ellipses. + +SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``, +``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to +display text where smart quotes and other "smart punctuation" would not be +appropriate, such as source code or example markup. + + +Backslash Escapes +================= + +If you need to use literal straight quotes (or plain hyphens and +periods), SmartyPants accepts the following backslash escape sequences +to force non-smart punctuation. It does so by transforming the escape +sequence into a character: + +======== ===== ========= +Escape Value Character +======== ===== ========= +``\\\\`` \ \\ +\\" " " +\\' ' ' +\\. . . +\\- - \- +\\` ` \` +======== ===== ========= + +This is useful, for example, when you want to use straight quotes as +foot and inch marks: 6\\'2\\" tall; a 17\\" iMac. + +Options +======= + +For Pyblosxom users, the ``smartypants_attributes`` attribute is where you +specify configuration options. + +Numeric values are the easiest way to configure SmartyPants' behavior: + +"0" + Suppress all transformations. (Do nothing.) +"1" + Performs default SmartyPants transformations: quotes (including + \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash) + is used to signify an em-dash; there is no support for en-dashes. + +"2" + Same as smarty_pants="1", except that it uses the old-school typewriter + shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``" + (dash dash dash) + for em-dashes. + +"3" + Same as smarty_pants="2", but inverts the shorthand for dashes: + "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for + en-dashes. + +"-1" + Stupefy mode. Reverses the SmartyPants transformation process, turning + the characters produced by SmartyPants into their ASCII equivalents. + E.g. "“" is turned into a simple double-quote (\"), "—" is + turned into two dashes, etc. + + +The following single-character attribute values can be combined to toggle +individual transformations from within the smarty_pants attribute. For +example, to educate normal quotes and em-dashes, but not ellipses or +\`\`backticks'' -style quotes: + +``py['smartypants_attributes'] = "1"`` + +"q" + Educates normal quote characters: (") and ('). + +"b" + Educates \`\`backticks'' -style double quotes. + +"B" + Educates \`\`backticks'' -style double quotes and \`single' quotes. + +"d" + Educates em-dashes. + +"D" + Educates em-dashes and en-dashes, using old-school typewriter shorthand: + (dash dash) for en-dashes, (dash dash dash) for em-dashes. + +"i" + Educates em-dashes and en-dashes, using inverted old-school typewriter + shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes. + +"e" + Educates ellipses. + +"w" + Translates any instance of ``"`` into a normal double-quote character. + This should be of no interest to most people, but of particular interest + to anyone who writes their posts using Dreamweaver, as Dreamweaver + inexplicably uses this entity to represent a literal double-quote + character. SmartyPants only educates normal quotes, not entities (because + ordinarily, entities are used for the explicit purpose of representing the + specific character they represent). The "w" option must be used in + conjunction with one (or both) of the other quote options ("q" or "b"). + Thus, if you wish to apply all SmartyPants transformations (quotes, en- + and em-dashes, and ellipses) and also translate ``"`` entities into + regular quotes so SmartyPants can educate them, you should pass the + following to the smarty_pants attribute: + + +Caveats +======= + +Why You Might Not Want to Use Smart Quotes in Your Weblog +--------------------------------------------------------- + +For one thing, you might not care. + +Most normal, mentally stable individuals do not take notice of proper +typographic punctuation. Many design and typography nerds, however, break +out in a nasty rash when they encounter, say, a restaurant sign that uses +a straight apostrophe to spell "Joe's". + +If you're the sort of person who just doesn't care, you might well want to +continue not caring. Using straight quotes -- and sticking to the 7-bit +ASCII character set in general -- is certainly a simpler way to live. + +Even if you I *do* care about accurate typography, you still might want to +think twice before educating the quote characters in your weblog. One side +effect of publishing curly quote characters is that it makes your +weblog a bit harder for others to quote from using copy-and-paste. What +happens is that when someone copies text from your blog, the copied text +contains the 8-bit curly quote characters (as well as the 8-bit characters +for em-dashes and ellipses, if you use these options). These characters +are not standard across different text encoding methods, which is why they +need to be encoded as characters. + +People copying text from your weblog, however, may not notice that you're +using curly quotes, and they'll go ahead and paste the unencoded 8-bit +characters copied from their browser into an email message or their own +weblog. When pasted as raw "smart quotes", these characters are likely to +get mangled beyond recognition. + +That said, my own opinion is that any decent text editor or email client +makes it easy to stupefy smart quote characters into their 7-bit +equivalents, and I don't consider it my problem if you're using an +indecent text editor or email client. + + +Algorithmic Shortcomings +------------------------ + +One situation in which quotes will get curled the wrong way is when +apostrophes are used at the start of leading contractions. For example: + +``'Twas the night before Christmas.`` + +In the case above, SmartyPants will turn the apostrophe into an opening +single-quote, when in fact it should be a closing one. I don't think +this problem can be solved in the general case -- every word processor +I've tried gets this wrong as well. In such cases, it's best to use the +proper character for closing single-quotes (``’``) by hand. + + +Version History +=============== + +1.7 2012-11-19 + - Internationalization: language-dependent quotes. + +1.6.1: 2012-11-06 + - Refactor code, code cleanup, + - `educate_tokens()` generator as interface for Docutils. + +1.6: 2010-08-26 + - Adaption to Docutils: + - Use Unicode instead of HTML entities, + - Remove code special to pyblosxom. + +1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400 + - Fixed bug where blocks of precious unalterable text was instead + interpreted. Thanks to Le Roux and Dirk van Oosterbosch. + +1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400 + - Fix bogus magical quotation when there is no hint that the + user wants it, e.g., in "21st century". Thanks to Nathan Hamblen. + - Be smarter about quotes before terminating numbers in an en-dash'ed + range. + +1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500 + - Fix a date-processing bug, as reported by jacob childress. + - Begin a test-suite for ensuring correct output. + - Removed import of "string", since I didn't really need it. + (This was my first every Python program. Sue me!) + +1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400 + - Abort processing if the flavour is in forbidden-list. Default of + [ "rss" ] (Idea of Wolfgang SCHNERRING.) + - Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING. + +1.5_1.2: Mon, 24 May 2004 08:14:54 -0400 + - Some single quotes weren't replaced properly. Diff-tesuji played + by Benjamin GEIGER. + +1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500 + - Support upcoming pyblosxom 0.9 plugin verification feature. + +1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500 + - Initial release +""" + +default_smartypants_attr = "1" + + +import re + +class smartchars(object): + """Smart quotes and dashes + """ + + endash = u'–' # "–" EN DASH + emdash = u'—' # "—" EM DASH + ellipsis = u'…' # "…" HORIZONTAL ELLIPSIS + + # quote characters (language-specific, set in __init__()) + # + # English smart quotes (open primary, close primary, open secondary, close + # secondary) are: + # opquote = u'“' # "“" LEFT DOUBLE QUOTATION MARK + # cpquote = u'”' # "”" RIGHT DOUBLE QUOTATION MARK + # osquote = u'‘' # "‘" LEFT SINGLE QUOTATION MARK + # csquote = u'’' # "’" RIGHT SINGLE QUOTATION MARK + # For other languages see: + # http://en.wikipedia.org/wiki/Non-English_usage_of_quotation_marks + # http://de.wikipedia.org/wiki/Anf%C3%BChrungszeichen#Andere_Sprachen + quotes = {'af': u'“”‘’', + 'af-x-altquot': u'„”‚’', + 'ca': u'«»“”', + 'ca-x-altquot': u'“”‘’', + 'cs': u'„“‚‘', + 'cs-x-altquot': u'»«›‹', + 'da': u'»«‘’', + 'da-x-altquot': u'„“‚‘', + 'de': u'„“‚‘', + 'de-x-altquot': u'»«›‹', + 'de-CH': u'«»‹›', + 'el': u'«»“”', + 'en': u'“”‘’', + 'en-UK': u'‘’“”', + 'eo': u'“”‘’', + 'es': u'«»“”', + 'et': u'„“‚‘', # no secondary quote listed in + 'et-x-altquot': u'»«›‹', # the sources above (wikipedia.org) + 'eu': u'«»‹›', + 'es-x-altquot': u'“”‘’', + 'fi': u'””’’', + 'fi-x-altquot': u'»»’’', + 'fr': (u'« ', u' »', u'‹ ', u' ›'), # with narrow no-break space + 'fr-x-altquot': u'«»‹›', # for use with manually set spaces + # 'fr-x-altquot': (u'“ ', u' ”', u'‘ ', u' ’'), # rarely used + 'fr-CH': u'«»‹›', + 'gl': u'«»“”', + 'he': u'”“»«', + 'he-x-altquot': u'„”‚’', + 'it': u'«»“”', + 'it-CH': u'«»‹›', + 'it-x-altquot': u'“”‘’', + 'ja': u'「」『』', + 'lt': u'„“‚‘', + 'nl': u'“”‘’', + 'nl-x-altquot': u'„”‚’', + 'pl': u'„”«»', + 'pl-x-altquot': u'«»“”', + 'pt': u'«»“”', + 'pt-BR': u'“”‘’', + 'ro': u'„”«»', + 'ro-x-altquot': u'«»„”', + 'ru': u'«»„“', + 'sk': u'„“‚‘', + 'sk-x-altquot': u'»«›‹', + 'sv': u'„“‚‘', + 'sv-x-altquot': u'»«›‹', + 'zh-CN': u'“”‘’', + 'it': u'«»“”', + 'zh-TW': u'「」『』', + } + + def __init__(self, language='en'): + self.language = language + try: + (self.opquote, self.cpquote, + self.osquote, self.csquote) = self.quotes[language] + except KeyError: + self.opquote, self.cpquote, self.osquote, self.csquote = u'""\'\'' + + +def smartyPants(text, attr=default_smartypants_attr, language='en'): + """Main function for "traditional" use.""" + + return "".join([t for t in educate_tokens(tokenize(text), + attr, language)]) + + +def educate_tokens(text_tokens, attr=default_smartypants_attr, language='en'): + """Return iterator that "educates" the items of `text_tokens`. + """ + + # Parse attributes: + # 0 : do nothing + # 1 : set all + # 2 : set all, using old school en- and em- dash shortcuts + # 3 : set all, using inverted old school en and em- dash shortcuts + # + # q : quotes + # b : backtick quotes (``double'' only) + # B : backtick quotes (``double'' and `single') + # d : dashes + # D : old school dashes + # i : inverted old school dashes + # e : ellipses + # w : convert " entities to " for Dreamweaver users + + convert_quot = False # translate " entities into normal quotes? + do_dashes = False + do_backticks = False + do_quotes = False + do_ellipses = False + do_stupefy = False + + if attr == "0": # Do nothing. + yield text + elif attr == "1": # Do everything, turn all options on. + do_quotes = True + do_backticks = True + do_dashes = 1 + do_ellipses = True + elif attr == "2": + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = True + do_backticks = True + do_dashes = 2 + do_ellipses = True + elif attr == "3": + # Do everything, use inverted old school dash shorthand. + do_quotes = True + do_backticks = True + do_dashes = 3 + do_ellipses = True + elif attr == "-1": # Special "stupefy" mode. + do_stupefy = True + else: + if "q" in attr: do_quotes = True + if "b" in attr: do_backticks = True + if "B" in attr: do_backticks = 2 + if "d" in attr: do_dashes = 1 + if "D" in attr: do_dashes = 2 + if "i" in attr: do_dashes = 3 + if "e" in attr: do_ellipses = True + if "w" in attr: convert_quot = True + + prev_token_last_char = " " + # Last character of the previous text token. Used as + # context to curl leading quote characters correctly. + + for (ttype, text) in text_tokens: + + # skip HTML and/or XML tags as well as emtpy text tokens + # without updating the last character + if ttype == 'tag' or not text: + yield text + continue + + # skip literal text (math, literal, raw, ...) + if ttype == 'literal': + prev_token_last_char = text[-1:] + yield text + continue + + last_char = text[-1:] # Remember last char before processing. + + text = processEscapes(text) + + if convert_quot: + text = re.sub('"', '"', text) + + if do_dashes == 1: + text = educateDashes(text) + elif do_dashes == 2: + text = educateDashesOldSchool(text) + elif do_dashes == 3: + text = educateDashesOldSchoolInverted(text) + + if do_ellipses: + text = educateEllipses(text) + + # Note: backticks need to be processed before quotes. + if do_backticks: + text = educateBackticks(text, language) + + if do_backticks == 2: + text = educateSingleBackticks(text, language) + + if do_quotes: + text = educateQuotes(prev_token_last_char+text, language)[1:] + + if do_stupefy: + text = stupefyEntities(text, language) + + # Remember last char as context for the next token + prev_token_last_char = last_char + + text = processEscapes(text, restore=True) + + yield text + + + +def educateQuotes(text, language='en'): + """ + Parameter: - text string (unicode or bytes). + - language (`BCP 47` language tag.) + Returns: The `text`, with "educated" curly quote characters. + + Example input: "Isn't this fun?" + Example output: “Isn’t this fun?“; + """ + + smart = smartchars(language) + + # oldtext = text + punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]""" + + # Special case if the very first character is a quote + # followed by punctuation at a non-word-break. + # Close the quotes by brute force: + text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), smart.csquote, text) + text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), smart.cpquote, text) + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + text = re.sub(r""""'(?=\w)""", smart.opquote+smart.osquote, text) + text = re.sub(r"""'"(?=\w)""", smart.osquote+smart.opquote, text) + + # Special case for decade abbreviations (the '80s): + text = re.sub(r"""\b'(?=\d{2}s)""", smart.csquote, text) + + close_class = r"""[^\ \t\r\n\[\{\(\-]""" + dec_dashes = r"""–|—""" + + # Get most opening single quotes: + opening_single_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + ' # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + text = opening_single_quotes_regex.sub(r'\1'+smart.osquote, text) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (?!\s | s\b | \d) + """ % (close_class,), re.VERBOSE) + text = closing_single_quotes_regex.sub(r'\1'+smart.csquote, text) + + closing_single_quotes_regex = re.compile(r""" + (%s) + ' + (\s | s\b) + """ % (close_class,), re.VERBOSE) + text = closing_single_quotes_regex.sub(r'\1%s\2' % smart.csquote, text) + + # Any remaining single quotes should be opening ones: + text = re.sub(r"""'""", smart.osquote, text) + + # Get most opening double quotes: + opening_double_quotes_regex = re.compile(r""" + ( + \s | # a whitespace char, or + | # a non-breaking space entity, or + -- | # dashes, or + &[mn]dash; | # named dash entities + %s | # or decimal entities + &\#x201[34]; # or hex + ) + " # the quote + (?=\w) # followed by a word character + """ % (dec_dashes,), re.VERBOSE) + text = opening_double_quotes_regex.sub(r'\1'+smart.opquote, text) + + # Double closing quotes: + closing_double_quotes_regex = re.compile(r""" + #(%s)? # character that indicates the quote should be closing + " + (?=\s) + """ % (close_class,), re.VERBOSE) + text = closing_double_quotes_regex.sub(smart.cpquote, text) + + closing_double_quotes_regex = re.compile(r""" + (%s) # character that indicates the quote should be closing + " + """ % (close_class,), re.VERBOSE) + text = closing_double_quotes_regex.sub(r'\1'+smart.cpquote, text) + + # Any remaining quotes should be opening ones. + text = re.sub(r'"', smart.opquote, text) + + return text + + +def educateBackticks(text, language='en'): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with ``backticks'' -style double quotes + translated into HTML curly quote entities. + Example input: ``Isn't this fun?'' + Example output: “Isn't this fun?“; + """ + smart = smartchars(language) + + text = re.sub(r"""``""", smart.opquote, text) + text = re.sub(r"""''""", smart.cpquote, text) + return text + + +def educateSingleBackticks(text, language='en'): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with `backticks' -style single quotes + translated into HTML curly quote entities. + + Example input: `Isn't this fun?' + Example output: ‘Isn’t this fun?’ + """ + smart = smartchars(language) + + text = re.sub(r"""`""", smart.osquote, text) + text = re.sub(r"""'""", smart.csquote, text) + return text + + +def educateDashes(text): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with each instance of "--" translated to + an em-dash character. + """ + + text = re.sub(r"""---""", smartchars.endash, text) # en (yes, backwards) + text = re.sub(r"""--""", smartchars.emdash, text) # em (yes, backwards) + return text + + +def educateDashesOldSchool(text): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with each instance of "--" translated to + an en-dash character, and each "---" translated to + an em-dash character. + """ + + text = re.sub(r"""---""", smartchars.emdash, text) + text = re.sub(r"""--""", smartchars.endash, text) + return text + + +def educateDashesOldSchoolInverted(text): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with each instance of "--" translated to + an em-dash character, and each "---" translated to + an en-dash character. Two reasons why: First, unlike the + en- and em-dash syntax supported by + EducateDashesOldSchool(), it's compatible with existing + entries written before SmartyPants 1.1, back when "--" was + only used for em-dashes. Second, em-dashes are more + common than en-dashes, and so it sort of makes sense that + the shortcut should be shorter to type. (Thanks to Aaron + Swartz for the idea.) + """ + text = re.sub(r"""---""", smartchars.endash, text) # em + text = re.sub(r"""--""", smartchars.emdash, text) # en + return text + + + +def educateEllipses(text): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with each instance of "..." translated to + an ellipsis character. + + Example input: Huh...? + Example output: Huh…? + """ + + text = re.sub(r"""\.\.\.""", smartchars.ellipsis, text) + text = re.sub(r"""\. \. \.""", smartchars.ellipsis, text) + return text + + +def stupefyEntities(text, language='en'): + """ + Parameter: String (unicode or bytes). + Returns: The `text`, with each SmartyPants character translated to + its ASCII counterpart. + + Example input: “Hello — world.” + Example output: "Hello -- world." + """ + smart = smartchars(language) + + text = re.sub(smart.endash, "-", text) # en-dash + text = re.sub(smart.emdash, "--", text) # em-dash + + text = re.sub(smart.osquote, "'", text) # open single quote + text = re.sub(smart.csquote, "'", text) # close single quote + + text = re.sub(smart.opquote, '"', text) # open double quote + text = re.sub(smart.cpquote, '"', text) # close double quote + + text = re.sub(smart.ellipsis, '...', text)# ellipsis + + return text + + +def processEscapes(text, restore=False): + r""" + Parameter: String (unicode or bytes). + Returns: The `text`, with after processing the following backslash + escape sequences. This is useful if you want to force a "dumb" + quote or other character to appear. + + Escape Value + ------ ----- + \\ \ + \" " + \' ' + \. . + \- - + \` ` + """ + replacements = ((r'\\', r'\'), + (r'\"', r'"'), + (r"\'", r'''), + (r'\.', r'.'), + (r'\-', r'-'), + (r'\`', r'`')) + if restore: + for (ch, rep) in replacements: + text = text.replace(rep, ch[1]) + else: + for (ch, rep) in replacements: + text = text.replace(ch, rep) + + return text + + +def tokenize(text): + """ + Parameter: String containing HTML markup. + Returns: An iterator that yields the tokens comprising the input + string. Each token is either a tag (possibly with nested, + tags contained therein, such as <a href="<MTFoo>">, or a + run of text between tags. Each yielded element is a + two-element tuple; the first is either 'tag' or 'text'; + the second is the actual value. + + Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. + <http://www.bradchoate.com/past/mtregex.php> + """ + + pos = 0 + length = len(text) + # tokens = [] + + depth = 6 + nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) + #match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments + # (?: <\? .*? \?> ) | # directives + # %s # nested tags """ % (nested_tags,) + tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") + + token_match = tag_soup.search(text) + + previous_end = 0 + while token_match is not None: + if token_match.group(1): + yield ('text', token_match.group(1)) + + yield ('tag', token_match.group(2)) + + previous_end = token_match.end() + token_match = tag_soup.search(text, token_match.end()) + + if previous_end < len(text): + yield ('text', text[previous_end:]) + + + +if __name__ == "__main__": + + import locale + + try: + locale.setlocale(locale.LC_ALL, '') + except: + pass + + from docutils.core import publish_string + docstring_html = publish_string(__doc__, writer_name='html') + + print docstring_html + + + # Unit test output goes out stderr. + import unittest + sp = smartyPants + + class TestSmartypantsAllAttributes(unittest.TestCase): + # the default attribute is "1", which means "all". + + def test_dates(self): + self.assertEqual(sp("1440-80's"), u"1440-80’s") + self.assertEqual(sp("1440-'80s"), u"1440-‘80s") + self.assertEqual(sp("1440---'80s"), u"1440–‘80s") + self.assertEqual(sp("1960s"), "1960s") # no effect. + self.assertEqual(sp("1960's"), u"1960’s") + self.assertEqual(sp("one two '60s"), u"one two ‘60s") + self.assertEqual(sp("'60s"), u"‘60s") + + def test_ordinal_numbers(self): + self.assertEqual(sp("21st century"), "21st century") # no effect. + self.assertEqual(sp("3rd"), "3rd") # no effect. + + def test_educated_quotes(self): + self.assertEqual(sp('''"Isn't this fun?"'''), u'“Isn’t this fun?”') + + def test_html_tags(self): + text = '<a src="foo">more</a>' + self.assertEqual(sp(text), text) + + unittest.main() + + + + +__author__ = "Chad Miller <smartypantspy@chad.org>" +__version__ = "1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400" +__url__ = "http://wiki.chad.org/SmartyPantsPy" +__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom" Added: trunk/docutils/src/main/resources/docutils/docutils/utils/urischemes.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils/urischemes.py (rev 0) +++ trunk/docutils/src/main/resources/docutils/docutils/utils/urischemes.py 2014-09-29 12:39:28 UTC (rev 756) @@ -0,0 +1,136 @@ +# $Id: urischemes.py 7464 2012-06-25 13:16:03Z milde $ +# Author: David Goodger <goodger@python.org> +# Copyright: This module has been placed in the public domain. + +""" +`schemes` is a dictionary with lowercase URI addressing schemes as +keys and descriptions as values. It was compiled from the index at +http://www.iana.org/assignments/uri-schemes (revised 2005-11-28) +and an older list at http://www.w3.org/Addressing/schemes.html. +""" + +# Many values are blank and should be filled in with useful descriptions. + +schemes = { + 'about': 'provides information on Navigator', + 'acap': 'Application Configuration Access Protocol; RFC 2244', + 'addbook': "To add vCard entries to Communicator's Address Book", + 'afp': 'Apple Filing Protocol', + 'afs': 'Andrew File System global file names', + 'aim': 'AOL Instant Messenger', + 'callto': 'for NetMeeting links', + 'castanet': 'Castanet Tuner URLs for Netcaster', + 'chttp': 'cached HTTP supported by RealPlayer', + 'cid': 'content identifier; RFC 2392', + 'crid': 'TV-Anytime Content Reference Identifier; RFC 4078', + 'data': ('allows inclusion of small data items as "immediate" data; ' + 'RFC 2397'), + 'dav': 'Distributed Authoring and Versioning Protocol; RFC 2518', + 'dict': 'dictionary service protocol; RFC 2229', + 'dns': 'Domain Name System resources', + 'eid': ('External ID; non-URL data; general escape mechanism to allow ' + 'access to information for applications that are too ' + 'specialized to justify their own schemes'), + 'fax': ('a connection to a terminal that can handle telefaxes ' + '(facsimiles); RFC 2806'), + 'feed' : 'NetNewsWire feed', + 'file': 'Host-specific file names; RFC 1738', + 'finger': '', + 'freenet': '', + 'ftp': 'File Transfer Protocol; RFC 1738', + 'go': 'go; RFC 3368', + 'gopher': 'The Gopher Protocol', + 'gsm-sms': ('Global System for Mobile Communications Short Message ' + 'Service'), + 'h323': ('video (audiovisual) communication on local area networks; ' + 'RFC 3508'), + 'h324': ('video and audio communications over low bitrate connections ' + 'such as POTS modem connections'), + 'hdl': 'CNRI handle system', + 'hnews': 'an HTTP-tunneling variant of the NNTP news protocol', + 'http': 'Hypertext Transfer Protocol; RFC 2616', + 'https': 'HTTP over SSL; RFC 2818', + 'hydra': 'SubEthaEdit URI. See http://www.codingmonkeys.de/subethaedit.', + 'iioploc': 'Internet Inter-ORB Protocol Location?', + 'ilu': 'Inter-Language Unification', + 'im': 'Instant Messaging; RFC 3860', + 'imap': 'Internet Message Access Protocol; RFC 2192', + 'info': 'Information Assets with Identifiers in Public Namespaces', + 'ior': 'CORBA interoperable object reference', + 'ipp': 'Internet Printing Protocol; RFC 3510', + 'irc': 'Internet Relay Chat', + 'iris.beep': 'iris.beep; RFC 3983', + 'iseek' : 'See www.ambrosiasw.com; a little util for OS X.', + 'jar': 'Java archive', + 'javascript': ('JavaScript code; evaluates the expression after the ' + 'colon'), + 'jdbc': 'JDBC connection URI.', + 'ldap': 'Lightweight Directory Access Protocol', + 'lifn': '', + 'livescript': '', + 'lrq': '', + 'mailbox': 'Mail folder access', + 'mailserver': 'Access to data available from mail servers', + 'mailto': 'Electronic mail address; RFC 2368', + 'md5': '', + 'mid': 'message identifier; RFC 2392', + 'mocha': '', + 'modem': ('a connection to a terminal that can handle incoming data ' + 'calls; RFC 2806'), + 'mtqp': 'Message Tracking Query Protocol; RFC 3887', + 'mupdate': 'Mailbox Update (MUPDATE) Protocol; RFC 3656', + 'news': 'USENET news; RFC 1738', + 'nfs': 'Network File System protocol; RFC 2224', + 'nntp': 'USENET news using NNTP access; RFC 1738', + 'opaquelocktoken': 'RFC 2518', + 'phone': '', + 'pop': 'Post Office Protocol; RFC 2384', + 'pop3': 'Post Office Protocol v3', + 'pres': 'Presence; RFC 3859', + 'printer': '', + 'prospero': 'Prospero Directory Service; RFC 4157', + 'rdar' : ('URLs found in Darwin source ' + '(http://www.opensource.apple.com/darwinsource/).'), + 'res': '', + 'rtsp': 'real time streaming protocol; RFC 2326', + 'rvp': '', + 'rwhois': '', + 'rx': 'Remote Execution', + 'sdp': '', + 'service': 'service location; RFC 2609', + 'shttp': 'secure hypertext transfer protocol', + 'sip': 'Session Initiation Protocol; RFC 3261', + 'sips': 'secure session intitiaion protocol; RFC 3261', + 'smb': 'SAMBA filesystems.', + 'snews': 'For NNTP postings via SSL', + 'snmp': 'Simple Network Management Protocol; RFC 4088', + 'soap.beep': 'RFC 3288', + 'soap.beeps': 'RFC 3288', + 'ssh': 'Reference to interactive sessions via ssh.', + 't120': 'real time data conferencing (audiographics)', + 'tag': 'RFC 4151', + 'tcp': '', + 'tel': ('a connection to a terminal that handles normal voice ' + 'telephone calls, a voice mailbox or another voice messaging ' + 'system or a service that can be operated using DTMF tones; ' + 'RFC 2806.'), + 'telephone': 'telephone', + 'telnet': 'Reference to interactive sessions; RFC 4248', + 'tftp': 'Trivial File Transfer Protocol; RFC 3617', + 'tip': 'Transaction Internet Protocol; RFC 2371', + 'tn3270': 'Interactive 3270 emulation sessions', + 'tv': '', + 'urn': 'Uniform Resource Name; RFC 2141', + 'uuid': '', + 'vemmi': 'versatile multimedia interface; RFC 2122', + 'videotex': '', + 'view-source': 'displays HTML code that was generated with JavaScript', + 'wais': 'Wide Area Information Servers; RFC 4156', + 'whodp': '', + 'whois++': 'Distributed directory service.', + 'x-man-page': ('Opens man page in Terminal.app on OS X ' + '(see macosxhints.com)'), + 'xmlrpc.beep': 'RFC 3529', + 'xmlrpc.beeps': 'RFC 3529', + 'z39.50r': 'Z39.50 Retrieval; RFC 2056', + 'z39.50s': 'Z39.50 Session; RFC 2056',} Deleted: trunk/docutils/src/main/resources/docutils/docutils/utils.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/utils.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/utils.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,697 +0,0 @@ -# $Id: utils.py 7073 2011-07-07 06:49:19Z milde $ -# Author: David Goodger <goodger@python.org> -# Copyright: This module has been placed in the public domain. - -""" -Miscellaneous utilities for the documentation utilities. -""" - -__docformat__ = 'reStructuredText' - -import sys -import os -import os.path -import warnings -import unicodedata -from docutils import ApplicationError, DataError -from docutils import nodes -from docutils.error_reporting import ErrorOutput, SafeString - - -class SystemMessage(ApplicationError): - - def __init__(self, system_message, level): - Exception.__init__(self, system_message.astext()) - self.level = level - - -class SystemMessagePropagation(ApplicationError): pass - - -class Reporter: - - """ - Info/warning/error reporter and ``system_message`` element generator. - - Five levels of system messages are defined, along with corresponding - methods: `debug()`, `info()`, `warning()`, `error()`, and `severe()`. - - There is typically one Reporter object per process. A Reporter object is - instantiated with thresholds for reporting (generating warnings) and - halting processing (raising exceptions), a switch to turn debug output on - or off, and an I/O stream for warnings. These are stored as instance - attributes. - - When a system message is generated, its level is compared to the stored - thresholds, and a warning or error is generated as appropriate. Debug - messages are produced if the stored debug switch is on, independently of - other thresholds. Message output is sent to the stored warning stream if - not set to ''. - - The Reporter class also employs a modified form of the "Observer" pattern - [GoF95]_ to track system messages generated. The `attach_observer` method - should be called before parsing, with a bound method or function which - accepts system messages. The observer can be removed with - `detach_observer`, and another added in its place. - - .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of - Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA, - 1995. - """ - - levels = 'DEBUG INFO WARNING ERROR SEVERE'.split() - """List of names for system message levels, indexed by level.""" - - # system message level constants: - (DEBUG_LEVEL, - INFO_LEVEL, - WARNING_LEVEL, - ERROR_LEVEL, - SEVERE_LEVEL) = range(5) - - def __init__(self, source, report_level, halt_level, stream=None, - debug=0, encoding=None, error_handler='backslashreplace'): - """ - :Parameters: - - `source`: The path to or description of the source data. - - `report_level`: The level at or above which warning output will - be sent to `stream`. - - `halt_level`: The level at or above which `SystemMessage` - exceptions will be raised, halting execution. - - `debug`: Show debug (level=0) system messages? - - `stream`: Where warning output is sent. Can be file-like (has a - ``.write`` method), a string (file name, opened for writing), - '' (empty string) or `False` (for discarding all stream messages) - or `None` (implies `sys.stderr`; default). - - `encoding`: The output encoding. - - `error_handler`: The error handler for stderr output encoding. - """ - - self.source = source - """The path to or description of the source data.""" - - self.error_handler = error_handler - """The character encoding error handler.""" - - self.debug_flag = debug - """Show debug (level=0) system messages?""" - - self.report_level = report_level - """The level at or above which warning output will be sent - to `self.stream`.""" - - self.halt_level = halt_level - """The level at or above which `SystemMessage` exceptions - will be raised, halting execution.""" - - if not isinstance(stream, ErrorOutput): - stream = ErrorOutput(stream, encoding, error_handler) - - self.stream = stream - """Where warning output is sent.""" - - self.encoding = encoding or getattr(stream, 'encoding', 'ascii') - """The output character encoding.""" - - self.observers = [] - """List of bound methods or functions to call with each system_message - created.""" - - self.max_level = -1 - """The highest level system message generated so far.""" - - def set_conditions(self, category, report_level, halt_level, - stream=None, debug=0): - warnings.warn('docutils.utils.Reporter.set_conditions deprecated; ' - 'set attributes via configuration settings or directly', - DeprecationWarning, stacklevel=2) - self.report_level = report_level - self.halt_level = halt_level - if not isinstance(stream, ErrorOutput): - stream = ErrorOutput(stream, self.encoding, self.error_handler) - self.stream = stream - self.debug_flag = debug - - def attach_observer(self, observer): - """ - The `observer` parameter is a function or bound method which takes one - argument, a `nodes.system_message` instance. - """ - self.observers.append(observer) - - def detach_observer(self, observer): - self.observers.remove(observer) - - def notify_observers(self, message): - for observer in self.observers: - observer(message) - - def system_message(self, level, message, *children, **kwargs): - """ - Return a system_message object. - - Raise an exception or generate a warning if appropriate. - """ - # `message` can be a `string`, `unicode`, or `Exception` instance. - if isinstance(message, Exception): - message = SafeString(message) - - attributes = kwargs.copy() - if 'base_node' in kwargs: - source, line = get_source_line(kwargs['base_node']) - del attributes['base_node'] - if source is not None: - attributes.setdefault('source', source) - if line is not None: - attributes.setdefault('line', line) - # assert source is not None, "node has line- but no source-argument" - if not 'source' in attributes: # 'line' is absolute line number - try: # look up (source, line-in-source) - source, line = self.locator(attributes.get('line')) - # print "locator lookup", kwargs.get('line'), "->", source, line - except AttributeError: - source, line = None, None - if source is not None: - attributes['source'] = source - if line is not None: - attributes['line'] = line - # assert attributes['line'] is not None, (message, kwargs) - # assert attributes['source'] is not None, (message, kwargs) - attributes.setdefault('source', self.source) - - msg = nodes.system_message(message, level=level, - type=self.levels[level], - *children, **attributes) - if self.stream and (level >= self.report_level - or self.debug_flag and level == self.DEBUG_LEVEL - or level >= self.halt_level): - self.stream.write(msg.astext() + '\n') - if level >= self.halt_level: - raise SystemMessage(msg, level) - if level > self.DEBUG_LEVEL or self.debug_flag: - self.notify_observers(msg) - self.max_level = max(level, self.max_level) - return msg - - def debug(self, *args, **kwargs): - """ - Level-0, "DEBUG": an internal reporting issue. Typically, there is no - effect on the processing. Level-0 system messages are handled - separately from the others. - """ - if self.debug_flag: - return self.system_message(self.DEBUG_LEVEL, *args, **kwargs) - - def info(self, *args, **kwargs): - """ - Level-1, "INFO": a minor issue that can be ignored. Typically there is - no effect on processing, and level-1 system messages are not reported. - """ - return self.system_message(self.INFO_LEVEL, *args, **kwargs) - - def warning(self, *args, **kwargs): - """ - Level-2, "WARNING": an issue that should be addressed. If ignored, - there may be unpredictable problems with the output. - """ - return self.system_message(self.WARNING_LEVEL, *args, **kwargs) - - def error(self, *args, **kwargs): - """ - Level-3, "ERROR": an error that should be addressed. If ignored, the - output will contain errors. - """ - return self.system_message(self.ERROR_LEVEL, *args, **kwargs) - - def severe(self, *args, **kwargs): - """ - Level-4, "SEVERE": a severe error that must be addressed. If ignored, - the output will contain severe errors. Typically level-4 system - messages are turned into exceptions which halt processing. - """ - return self.system_message(self.SEVERE_LEVEL, *args, **kwargs) - - -class ExtensionOptionError(DataError): pass -class BadOptionError(ExtensionOptionError): pass -class BadOptionDataError(ExtensionOptionError): pass -class DuplicateOptionError(ExtensionOptionError): pass - - -def extract_extension_options(field_list, options_spec): - """ - Return a dictionary mapping extension option names to converted values. - - :Parameters: - - `field_list`: A flat field list without field arguments, where each - field body consists of a single paragraph only. - - `options_spec`: Dictionary mapping known option names to a - conversion function such as `int` or `float`. - - :Exceptions: - - `KeyError` for unknown option names. - - `ValueError` for invalid option values (raised by the conversion - function). - - `TypeError` for invalid option value types (raised by conversion - function). - - `DuplicateOptionError` for duplicate options. - - `BadOptionError` for invalid fields. - - `BadOptionDataError` for invalid option data (missing name, - missing data, bad quotes, etc.). - """ - option_list = extract_options(field_list) - option_dict = assemble_option_dict(option_list, options_spec) - return option_dict - -def extract_options(field_list): - """ - Return a list of option (name, value) pairs from field names & bodies. - - :Parameter: - `field_list`: A flat field list, where each field name is a single - word and each field body consists of a single paragraph only. - - :Exceptions: - - `BadOptionError` for invalid fields. - - `BadOptionDataError` for invalid option data (missing name, - missing data, bad quotes, etc.). - """ - option_list = [] - for field in field_list: - if len(field[0].astext().split()) != 1: - raise BadOptionError( - 'extension option field name may not contain multiple words') - name = str(field[0].astext().lower()) - body = field[1] - if len(body) == 0: - data = None - elif len(body) > 1 or not isinstance(body[0], nodes.paragraph) \ - or len(body[0]) != 1 or not isinstance(body[0][0], nodes.Text): - raise BadOptionDataError( - 'extension option field body may contain\n' - 'a single paragraph only (option "%s")' % name) - else: - data = body[0][0].astext() - option_list.append((name, data)) - return option_list - -def assemble_option_dict(option_list, options_spec): - """ - Return a mapping of option names to values. - - :Parameters: - - `option_list`: A list of (name, value) pairs (the output of - `extract_options()`). - - `options_spec`: Dictionary mapping known option names to a - conversion function such as `int` or `float`. - - :Exceptions: - - `KeyError` for unknown option names. - - `DuplicateOptionError` for duplicate options. - - `ValueError` for invalid option values (raised by conversion - function). - - `TypeError` for invalid option value types (raised by conversion - function). - """ - options = {} - for name, value in option_list: - convertor = options_spec[name] # raises KeyError if unknown - if convertor is None: - raise KeyError(name) # or if explicitly disabled - if name in options: - raise DuplicateOptionError('duplicate option "%s"' % name) - try: - options[name] = convertor(value) - except (ValueError, TypeError), detail: - raise detail.__class__('(option: "%s"; value: %r)\n%s' - % (name, value, ' '.join(detail.args))) - return options - - -class NameValueError(DataError): pass - - -def decode_path(path): - """ - Ensure `path` is Unicode. Return `nodes.reprunicode` object. - - Decode file/path string in a failsave manner if not already done. - """ - # see also http://article.gmane.org/gmane.text.docutils.user/2905 - if isinstance(path, unicode): - return path - try: - path = path.decode(sys.getfilesystemencoding(), 'strict') - except AttributeError: # default value None has no decode method - return nodes.reprunicode(path) - except UnicodeDecodeError: - try: - path = path.decode('utf-8', 'strict') - except UnicodeDecodeError: - path = path.decode('ascii', 'replace') - return nodes.reprunicode(path) - - -def extract_name_value(line): - """ - Return a list of (name, value) from a line of the form "name=value ...". - - :Exception: - `NameValueError` for invalid input (missing name, missing data, bad - quotes, etc.). - """ - attlist = [] - while line: - equals = line.find('=') - if equals == -1: - raise NameValueError('missing "="') - attname = line[:equals].strip() - if equals == 0 or not attname: - raise NameValueError( - 'missing attribute name before "="') - line = line[equals+1:].lstrip() - if not line: - raise NameValueError( - 'missing value after "%s="' % attname) - if line[0] in '\'"': - endquote = line.find(line[0], 1) - if endquote == -1: - raise NameValueError( - 'attribute "%s" missing end quote (%s)' - % (attname, line[0])) - if len(line) > endquote + 1 and line[endquote + 1].strip(): - raise NameValueError( - 'attribute "%s" end quote (%s) not followed by ' - 'whitespace' % (attname, line[0])) - data = line[1:endquote] - line = line[endquote+1:].lstrip() - else: - space = line.find(' ') - if space == -1: - data = line - line = '' - else: - data = line[:space] - line = line[space+1:].lstrip() - attlist.append((attname.lower(), data)) - return attlist - -def new_reporter(source_path, settings): - """ - Return a new Reporter object. - - :Parameters: - `source` : string - The path to or description of the source text of the document. - `settings` : optparse.Values object - Runtime settings. - """ - reporter = Reporter( - source_path, settings.report_level, settings.halt_level, - stream=settings.warning_stream, debug=settings.debug, - encoding=settings.error_encoding, - error_handler=settings.error_encoding_error_handler) - return reporter - -def new_document(source_path, settings=None): - """ - Return a new empty document object. - - :Parameters: - `source_path` : string - The path to or description of the source text of the document. - `settings` : optparse.Values object - Runtime settings. If none are provided, a default core set will - be used. If you will use the document object with any Docutils - components, you must provide their default settings as well. For - example, if parsing, at least provide the parser settings, - obtainable as follows:: - - settings = docutils.frontend.OptionParser( - components=(docutils.parsers.rst.Parser,) - ).get_default_values() - """ - from docutils import frontend - if settings is None: - settings = frontend.OptionParser().get_default_values() - source_path = decode_path(source_path) - reporter = new_reporter(source_path, settings) - document = nodes.document(settings, reporter, source=source_path) - document.note_source(source_path, -1) - return document - -def clean_rcs_keywords(paragraph, keyword_substitutions): - if len(paragraph) == 1 and isinstance(paragraph[0], nodes.Text): - textnode = paragraph[0] - for pattern, substitution in keyword_substitutions: - match = pattern.search(textnode) - if match: - paragraph[0] = nodes.Text(pattern.sub(substitution, textnode)) - return - -def relative_path(source, target): - """ - Build and return a path to `target`, relative to `source` (both files). - - If there is no common prefix, return the absolute path to `target`. - """ - source_parts = os.path.abspath(source or 'dummy_file').split(os.sep) - target_parts = os.path.abspath(target).split(os.sep) - # Check first 2 parts because '/dir'.split('/') == ['', 'dir']: - if source_parts[:2] != target_parts[:2]: - # Nothing in common between paths. - # Return absolute path, using '/' for URLs: - return '/'.join(target_parts) - source_parts.reverse() - target_parts.reverse() - while (source_parts and target_parts - and source_parts[-1] == target_parts[-1]): - # Remove path components in common: - source_parts.pop() - target_parts.pop() - target_parts.reverse() - parts = ['..'] * (len(source_parts) - 1) + target_parts - return '/'.join(parts) - -def get_stylesheet_reference(settings, relative_to=None): - """ - Retrieve a stylesheet reference from the settings object. - - Deprecated. Use get_stylesheet_reference_list() instead to - enable specification of multiple stylesheets as a comma-separated - list. - """ - if settings.stylesheet_path: - assert not settings.stylesheet, ( - 'stylesheet and stylesheet_path are mutually exclusive.') - if relative_to == None: - relative_to = settings._destination - return relative_path(relative_to, settings.stylesheet_path) - else: - return settings.stylesheet - -# Return 'stylesheet' or 'stylesheet_path' arguments as list. -# -# The original settings arguments are kept unchanged: you can test -# with e.g. ``if settings.stylesheet_path:`` -# -# Differences to ``get_stylesheet_reference``: -# * return value is a list -# * no re-writing of the path (and therefore no optional argument) -# (if required, use ``utils.relative_path(source, target)`` -# in the calling script) -def get_stylesheet_list(settings): - """ - Retrieve list of stylesheet references from the settings object. - """ - assert not (settings.stylesheet and settings.stylesheet_path), ( - 'stylesheet and stylesheet_path are mutually exclusive.') - if settings.stylesheet_path: - sheets = settings.stylesheet_path.split(",") - elif settings.stylesheet: - sheets = settings.stylesheet.split(",") - else: - sheets = [] - # strip whitespace (frequently occuring in config files) - return [sheet.strip(u' \t\n') for sheet in sheets] - -def get_trim_footnote_ref_space(settings): - """ - Return whether or not to trim footnote space. - - If trim_footnote_reference_space is not None, return it. - - If trim_footnote_reference_space is None, return False unless the - footnote reference style is 'superscript'. - """ - if settings.trim_footnote_reference_space is None: - return hasattr(settings, 'footnote_references') and \ - settings.footnote_references == 'superscript' - else: - return settings.trim_footnote_reference_space - -def get_source_line(node): - """ - Return the "source" and "line" attributes from the `node` given or from - its closest ancestor. - """ - while node: - if node.source or node.line: - return node.source, node.line - node = node.parent - return None, None - -def escape2null(text): - """Return a string with escape-backslashes converted to nulls.""" - parts = [] - start = 0 - while 1: - found = text.find('\\', start) - if found == -1: - parts.append(text[start:]) - return ''.join(parts) - parts.append(text[start:found]) - parts.append('\x00' + text[found+1:found+2]) - start = found + 2 # skip character after escape - -def unescape(text, restore_backslashes=0): - """ - Return a string with nulls removed or restored to backslashes. - Backslash-escaped spaces are also removed. - """ - if restore_backslashes: - return text.replace('\x00', '\\') - else: - for sep in ['\x00 ', '\x00\n', '\x00']: - text = ''.join(text.split(sep)) - return text - -east_asian_widths = {'W': 2, # Wide - 'F': 2, # Full-width (wide) - 'Na': 1, # Narrow - 'H': 1, # Half-width (narrow) - 'N': 1, # Neutral (not East Asian, treated as narrow) - 'A': 1} # Ambiguous (s/b wide in East Asian context, - # narrow otherwise, but that doesn't work) -"""Mapping of result codes from `unicodedata.east_asian_widt()` to character -column widths.""" - -def column_width(text): - """Return the column width of text. - - Correct ``len(text)`` for wide East Asian and combining Unicode chars. - """ - if isinstance(text, str) and sys.version_info < (3,0): - return len(text) - combining_correction = sum([-1 for c in text - if unicodedata.combining(c)]) - try: - width = sum([east_asian_widths[unicodedata.east_asian_width(c)] - for c in text]) - except AttributeError: # east_asian_width() New in version 2.4. - width = len(text) - return width + combining_correction - -def uniq(L): - r = [] - for item in L: - if not item in r: - r.append(item) - return r - -# by Li Daobing http://code.activestate.com/recipes/190465/ -# since Python 2.6 there is also itertools.combinations() -def unique_combinations(items, n): - """Return r-length tuples, in sorted order, no repeated elements""" - if n==0: yield [] - else: - for i in xrange(len(items)-n+1): - for cc in unique_combinations(items[i+1:],n-1): - yield [items[i]]+cc - -def normalize_language_tag(tag): - """Return a list of normalized combinations for a `BCP 47` language tag. - - Example: - - >>> normalize_language_tag('de-AT-1901') - ['de_at_1901', 'de_at', 'de_1901', 'de'] - """ - # normalize: - tag = tag.lower().replace('-','_') - # find all combinations of subtags - taglist = [] - base_tag= tag.split('_')[:1] - subtags = tag.split('_')[1:] - # print base_tag, subtags - for n in range(len(subtags), 0, -1): - for tags in unique_combinations(subtags, n): - # print tags - taglist.append('_'.join(base_tag + tags)) - taglist += base_tag - return taglist - -class DependencyList: - - """ - List of dependencies, with file recording support. - - Note that the output file is not automatically closed. You have - to explicitly call the close() method. - """ - - def __init__(self, output_file=None, dependencies=[]): - """ - Initialize the dependency list, automatically setting the - output file to `output_file` (see `set_output()`) and adding - all supplied dependencies. - """ - self.set_output(output_file) - for i in dependencies: - self.add(i) - - def set_output(self, output_file): - """ - Set the output file and clear the list of already added - dependencies. - - `output_file` must be a string. The specified file is - immediately overwritten. - - If output_file is '-', the output will be written to stdout. - If it is None, no file output is done when calling add(). - """ - self.list = [] - if output_file == '-': - self.file = sys.stdout - elif output_file: - self.file = open(output_file, 'w') - else: - self.file = None - - def add(self, *filenames): - """ - If the dependency `filename` has not already been added, - append it to self.list and print it to self.file if self.file - is not None. - """ - for filename in filenames: - if not filename in self.list: - self.list.append(filename) - if self.file is not None: - print >>self.file, filename - - def close(self): - """ - Close the output file. - """ - if self.file not in (sys.stdout, sys.stderr): - self.file.close() - self.file = None - - def __repr__(self): - if self.file: - output_file = self.file.name - else: - output_file = None - return '%s(%r, %s)' % (self.__class__.__name__, output_file, self.list) Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7317 2012-01-19 11:55:26Z milde $ +# $Id: __init__.py 7648 2013-04-18 07:36:22Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -8,11 +8,14 @@ __docformat__ = 'reStructuredText' +import os.path +import sys -import os.path import docutils from docutils import languages, Component from docutils.transforms import universal +if sys.version_info < (2,5): + from docutils._compat import __import__ class Writer(Component): @@ -130,5 +133,8 @@ writer_name = writer_name.lower() if writer_name in _writer_aliases: writer_name = _writer_aliases[writer_name] - module = __import__(writer_name, globals(), locals()) + try: + module = __import__(writer_name, globals(), locals(), level=1) + except ImportError: + module = __import__(writer_name, globals(), locals(), level=0) return module.Writer Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/docutils_xml.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/docutils_xml.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/docutils_xml.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: docutils_xml.py 7315 2012-01-18 10:16:20Z milde $ +# $Id: docutils_xml.py 7497 2012-08-16 15:17:29Z milde $ # Author: David Goodger, Paul Tremblay, Guenter Milde # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. @@ -11,6 +11,19 @@ __docformat__ = 'reStructuredText' import sys + +# Work around broken PyXML and obsolete python stdlib behaviour. (The stdlib +# replaces its own xml module with PyXML if the latter is installed. However, +# PyXML is no longer maintained and partially incompatible/buggy.) Reverse +# the order in which xml module and submodules are searched to import stdlib +# modules if they exist and PyXML modules if they do not exist in the stdlib. +# +# See http://sourceforge.net/tracker/index.php?func=detail&aid=3552403&group_id=38414&atid=422030 +# and http://lists.fedoraproject.org/pipermail/python-devel/2012-July/000406.html +import xml +if "_xmlplus" in xml.__path__[0]: # PyXML sub-module + xml.__path__.reverse() # If both are available, prefer stdlib over PyXML + import xml.sax.saxutils from StringIO import StringIO Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7328 2012-01-27 08:41:35Z milde $ +# $Id: __init__.py 7753 2014-06-24 14:52:59Z milde $ # Author: David Goodger # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. @@ -22,7 +22,7 @@ import re import urllib try: # check for the Python Imaging Library - import PIL + import PIL.Image except ImportError: try: # sometimes PIL modules are put in PYTHONPATH's root import Image @@ -32,11 +32,10 @@ PIL = None import docutils from docutils import frontend, nodes, utils, writers, languages, io -from docutils.error_reporting import SafeString +from docutils.utils.error_reporting import SafeString from docutils.transforms import writer_aux -from docutils.math import unichar2tex, pick_math_environment -from docutils.math.latex2mathml import parse_latex_math -from docutils.math.math2html import math2html +from docutils.utils.math import unichar2tex, pick_math_environment, math2html +from docutils.utils.math.latex2mathml import parse_latex_math class Writer(writers.Writer): @@ -44,11 +43,9 @@ """Formats this writer supports.""" default_stylesheet = 'html4css1.css' + default_stylesheet_dirs = ['.', utils.relative_path( + os.path.join(os.getcwd(), 'dummy'), os.path.dirname(__file__))] - default_stylesheet_path = utils.relative_path( - os.path.join(os.getcwd(), 'dummy'), - os.path.join(os.path.dirname(__file__), default_stylesheet)) - default_template = 'template.txt' default_template_path = utils.relative_path( @@ -62,17 +59,20 @@ % default_template_path, ['--template'], {'default': default_template_path, 'metavar': '<file>'}), - ('Specify comma separated list of stylesheet URLs. ' + ('Comma separated list of stylesheet URLs. ' 'Overrides previous --stylesheet and --stylesheet-path settings.', ['--stylesheet'], - {'metavar': '<URL>', 'overrides': 'stylesheet_path'}), - ('Specify comma separated list of stylesheet paths. ' - 'With --link-stylesheet, ' + {'metavar': '<URL[,URL,...]>', 'overrides': 'stylesheet_path', + 'validator': frontend.validate_comma_separated_list}), + ('Comma separated list of stylesheet paths. ' + 'Relative paths are expanded if a matching file is found in ' + 'the --stylesheet-dirs. With --link-stylesheet, ' 'the path is rewritten relative to the output HTML file. ' - 'Default: "%s"' % default_stylesheet_path, + 'Default: "%s"' % default_stylesheet, ['--stylesheet-path'], - {'metavar': '<file>', 'overrides': 'stylesheet', - 'default': default_stylesheet_path}), + {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet', + 'validator': frontend.validate_comma_separated_list, + 'default': [default_stylesheet]}), ('Embed the stylesheet(s) in the output HTML file. The stylesheet ' 'files must be accessible during processing. This is the default.', ['--embed-stylesheet'], @@ -82,6 +82,13 @@ 'Default: embed stylesheets.', ['--link-stylesheet'], {'dest': 'embed_stylesheet', 'action': 'store_false'}), + ('Comma-separated list of directories where stylesheets are found. ' + 'Used by --stylesheet-path when expanding relative path arguments. ' + 'Default: "%s"' % default_stylesheet_dirs, + ['--stylesheet-dirs'], + {'metavar': '<dir[,dir,...]>', + 'validator': frontend.validate_comma_separated_list, + 'default': default_stylesheet_dirs}), ('Specify the initial header level. Default is 1 for "<h1>". ' 'Does not affect document title & subtitle (see --no-doc-title).', ['--initial-header-level'], @@ -133,9 +140,9 @@ ['--table-style'], {'default': ''}), ('Math output format, one of "MathML", "HTML", "MathJax" ' - 'or "LaTeX". Default: "MathJax"', + 'or "LaTeX". Default: "HTML math.css"', ['--math-output'], - {'default': 'MathJax'}), + {'default': 'HTML math.css'}), ('Omit the XML declaration. Use with caution.', ['--no-xml-declaration'], {'dest': 'xml_declaration', 'default': 1, 'action': 'store_false', @@ -147,8 +154,6 @@ settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} - relative_path_settings = ('stylesheet_path',) - config_section = 'html4css1 writer' config_section_dependencies = ('writers',) @@ -253,7 +258,7 @@ generator = ('<meta name="generator" content="Docutils %s: ' 'http://docutils.sourceforge.net/" />\n') - + # Template for the MathJax script in the header: mathjax_script = '<script type="text/javascript" src="%s"></script>\n' # The latest version of MathJax from the distributed server: @@ -261,12 +266,7 @@ # __http://www.mathjax.org/download/mathjax-cdn-terms-of-service/ mathjax_url = ('http://cdn.mathjax.org/mathjax/latest/MathJax.js?' 'config=TeX-AMS-MML_HTMLorMML') - # TODO: make this configurable: - # - # a) as extra option or - # b) appended to math-output="MathJax"? - # - # If b), which delimiter/delimter-set (':', ',', ' ')? + # may be overwritten by custom URL appended to "mathjax" stylesheet_link = '<link rel="stylesheet" href="%s" type="text/css" />\n' embedded_stylesheet = '<style type="text/css">\n\n%s\n</style>\n' @@ -288,7 +288,7 @@ # encoding not interpolated: self.html_prolog.append(self.xml_declaration) self.head = self.meta[:] - self.stylesheet = [self.stylesheet_call(path) + self.stylesheet = [self.stylesheet_call(path) for path in utils.get_stylesheet_list(settings)] self.body_prefix = ['</head>\n<body>\n'] # document title, subtitle display @@ -300,13 +300,17 @@ self.body_suffix = ['</body>\n</html>\n'] self.section_level = 0 self.initial_header_level = int(settings.initial_header_level) - self.math_output = settings.math_output.lower() + + self.math_output = settings.math_output.split() + self.math_output_options = self.math_output[1:] + self.math_output = self.math_output[0].lower() + # A heterogenous stack used in conjunction with the tree traversal. # Make sure that the pops correspond to the pushes: self.context = [] self.topic_classes = [] self.colspecs = [] - self.compact_p = 1 + self.compact_p = True self.compact_simple = False self.compact_field_list = False self.in_docinfo = False @@ -322,7 +326,7 @@ self.in_document_title = 0 # len(self.body) or 0 self.in_mailto = False self.author_in_authors = False - self.math_header = '' + self.math_header = [] def astext(self): return ''.join(self.head_prefix + self.head @@ -373,8 +377,7 @@ if self.settings.embed_stylesheet: try: content = io.FileInput(source_path=path, - encoding='utf-8', - handle_io_errors=False).read() + encoding='utf-8').read() self.settings.record_dependencies.add(path) except IOError, err: msg = u"Cannot embed stylesheet '%s': %s." % ( @@ -399,19 +402,19 @@ ids = [] for (name, value) in attributes.items(): atts[name.lower()] = value - classes = node.get('classes', []) - if 'class' in atts: - classes.append(atts.pop('class')) - # move language specification to 'lang' attribute - languages = [cls for cls in classes - if cls.startswith('language-')] + classes = [] + languages = [] + # unify class arguments and move language specification + for cls in node.get('classes', []) + atts.pop('class', '').split() : + if cls.startswith('language-'): + languages.append(cls[9:]) + elif cls.strip() and cls not in classes: + classes.append(cls) if languages: # attribute name is 'lang' in XHTML 1.0 but 'xml:lang' in 1.1 - atts[self.lang_attribute] = languages[0][9:] - classes.pop(classes.index(languages[0])) - classes = ' '.join(classes).strip() + atts[self.lang_attribute] = languages[0] if classes: - atts['class'] = classes + atts['class'] = ' '.join(classes) assert 'id' not in atts ids.extend(node.get('ids', [])) if 'ids' in atts: @@ -603,7 +606,13 @@ '</tbody>\n</table>\n') def visit_citation_reference(self, node): - href = '#' + node['refid'] + href = '#' + if 'refid' in node: + href += node['refid'] + elif 'refname' in node: + href += self.document.nameids[node['refname']] + # else: # TODO system message (or already in the transform)? + # 'Citation reference missing.' self.body.append(self.starttag( node, 'a', '[', CLASS='citation-reference', href=href)) @@ -762,7 +771,10 @@ self.meta.insert(0, self.content_type % self.settings.output_encoding) self.head.insert(0, self.content_type % self.settings.output_encoding) if self.math_header: - self.head.append(self.math_header) + if self.math_output == 'mathjax': + self.head.extend(self.math_header) + else: + self.stylesheet.extend(self.math_header) # skip content-type meta tag with interpolated charset value: self.html_head.extend(self.head[1:]) self.body_prefix.append(self.starttag(node, 'div', CLASS='document')) @@ -894,7 +906,8 @@ and len(node.astext()) > self.settings.field_name_limit): atts['colspan'] = 2 self.context.append('</tr>\n' - + self.starttag(node.parent, 'tr', '') + + self.starttag(node.parent, 'tr', '', + CLASS='field') + '<td> </td>') else: self.context.append('') @@ -1034,9 +1047,9 @@ self.settings.record_dependencies.add( imagepath.replace('\\', '/')) if 'width' not in atts: - atts['width'] = str(img.size[0]) + atts['width'] = '%dpx' % img.size[0] if 'height' not in atts: - atts['height'] = str(img.size[1]) + atts['height'] = '%dpx' % img.size[1] del img for att_name in 'width', 'height': if att_name in atts: @@ -1119,7 +1132,13 @@ self.body.append('</li>\n') def visit_literal(self, node): - """Process text to prevent tokens from wrapping.""" + # special case: "code" role + classes = node.get('classes', []) + if 'code' in classes: + # filter 'code' from class arguments + node['classes'] = [cls for cls in classes if cls != 'code'] + self.body.append(self.starttag(node, 'code', '')) + return self.body.append( self.starttag(node, 'tt', '', CLASS='docutils literal')) text = node.astext() @@ -1142,6 +1161,10 @@ # Content already processed: raise nodes.SkipNode + def depart_literal(self, node): + # skipped unless literal element is from "code" role: + self.body.append('</code>') + def visit_literal_block(self, node): self.body.append(self.starttag(node, 'pre', CLASS='literal-block')) @@ -1149,10 +1172,16 @@ self.body.append('\n</pre>\n') def visit_math(self, node, math_env=''): + # If the method is called from visit_math_block(), math_env != ''. + # As there is no native HTML math support, we provide alternatives: # LaTeX and MathJax math_output modes simply wrap the content, # HTML and MathML math_output modes also convert the math_code. - # If the method is called from visit_math_block(), math_env != ''. + if self.math_output not in ('mathml', 'html', 'mathjax', 'latex'): + self.document.reporter.error( + 'math-output format "%s" not supported ' + 'falling back to "latex"'% self.math_output) + self.math_output = 'latex' # # HTML container tags = {# math_output: (block, inline, class-arguments) @@ -1180,10 +1209,18 @@ # settings and conversion if self.math_output in ('latex', 'mathjax'): math_code = self.encode(math_code) - if self.math_output == 'mathjax': - self.math_header = self.mathjax_script % self.mathjax_url + if self.math_output == 'mathjax' and not self.math_header: + if self.math_output_options: + self.mathjax_url = self.math_output_options[0] + self.math_header = [self.mathjax_script % self.mathjax_url] elif self.math_output == 'html': - math_code = math2html(math_code) + if self.math_output_options and not self.math_header: + self.math_header = [self.stylesheet_call( + utils.find_file_in_dirs(s, self.settings.stylesheet_dirs)) + for s in self.math_output_options[0].split(',')] + # TODO: fix display mode in matrices and fractions + math2html.DocumentParameters.displaymode = (math_env != '') + math_code = math2html.math2html(math_code) elif self.math_output == 'mathml': self.doctype = self.doctype_mathml self.content_type = self.content_type_mathml @@ -1196,7 +1233,7 @@ self.body.append(self.starttag(node, 'p')) self.body.append(u','.join(err.args)) self.body.append('</p>\n') - self.body.append(self.starttag(node, 'pre', + self.body.append(self.starttag(node, 'pre', CLASS='literal-block')) self.body.append(self.encode(math_code)) self.body.append('\n</pre>\n') @@ -1204,12 +1241,16 @@ raise nodes.SkipNode # append to document body if tag: - self.body.append(self.starttag(node, tag, CLASS=clsarg)) + self.body.append(self.starttag(node, tag, + suffix='\n'*bool(math_env), + CLASS=clsarg)) self.body.append(math_code) + if math_env: # block mode (equation, display) + self.body.append('\n') + if tag: + self.body.append('</%s>' % tag) if math_env: self.body.append('\n') - if tag: - self.body.append('</%s>\n' % tag) # Content already processed: raise nodes.SkipNode @@ -1305,13 +1346,13 @@ if (isinstance(node.parent, nodes.document) or isinstance(node.parent, nodes.compound)): # Never compact paragraphs in document or compound. - return 0 + return False for key, value in node.attlist(): if (node.is_not_default(key) and not (key == 'classes' and value in ([], ['first'], ['last'], ['first', 'last']))): # Attribute which needs to survive. - return 0 + return False first = isinstance(node.parent[0], nodes.label) # skip label for child in node.parent.children[first:]: # only first paragraph can be compact @@ -1319,14 +1360,14 @@ continue if child is node: break - return 0 + return False parent_length = len([n for n in node.parent if not isinstance( n, (nodes.Invisible, nodes.label))]) if ( self.compact_simple or self.compact_field_list or self.compact_p and parent_length == 1): - return 1 - return 0 + return True + return False def visit_paragraph(self, node): if self.should_be_compact_paragraph(node): @@ -1510,11 +1551,14 @@ self.body.append('</div>\n') def visit_table(self, node): + self.context.append(self.compact_p) + self.compact_p = True classes = ' '.join(['docutils', self.settings.table_style]).strip() self.body.append( self.starttag(node, 'table', CLASS=classes, border="1")) def depart_table(self, node): + self.compact_p = self.context.pop() self.body.append('</table>\n') def visit_target(self, node): Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/html4css1.css =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/html4css1.css 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/html4css1.css 2014-09-29 12:39:28 UTC (rev 756) @@ -1,6 +1,6 @@ /* :Author: David Goodger (goodger@python.org) -:Id: $Id: html4css1.css 7056 2011-06-17 10:50:48Z milde $ +:Id: $Id: html4css1.css 7614 2013-02-21 15:55:51Z milde $ :Copyright: This stylesheet has been placed in the public domain. Default cascading style sheet for the HTML output of Docutils. @@ -68,7 +68,7 @@ div.attention p.admonition-title, div.caution p.admonition-title, div.danger p.admonition-title, div.error p.admonition-title, -div.warning p.admonition-title { +div.warning p.admonition-title, .code .error { color: red ; font-weight: bold ; font-family: sans-serif } @@ -240,10 +240,19 @@ margin-top: 0 ; font: inherit } -pre.literal-block, pre.doctest-block, pre.math { +pre.literal-block, pre.doctest-block, pre.math, pre.code { margin-left: 2em ; margin-right: 2em } +pre.code .ln { color: grey; } /* line numbers */ +pre.code, code { background-color: #eeeeee } +pre.code .comment, code .comment { color: #5C6576 } +pre.code .keyword, code .keyword { color: #3B0D06; font-weight: bold } +pre.code .literal.string, code .literal.string { color: #0C5404 } +pre.code .name.builtin, code .name.builtin { color: #352B84 } +pre.code .deleted, code .deleted { background-color: #DEB0A1} +pre.code .inserted, code .inserted { background-color: #A3D289} + span.classifier { font-family: sans-serif ; font-style: oblique } @@ -295,6 +304,21 @@ white-space: nowrap ; padding-left: 0 } +/* "booktabs" style (no vertical lines) */ +table.docutils.booktabs { + border: 0px; + border-top: 2px solid; + border-bottom: 2px solid; + border-collapse: collapse; +} +table.docutils.booktabs * { + border: 0px; +} +table.docutils.booktabs th { + border-bottom: thin solid; + text-align: left; +} + h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { font-size: 100% } Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/math.css =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/math.css 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/html4css1/math.css 2014-09-29 12:39:28 UTC (rev 756) @@ -21,7 +21,7 @@ /* Formulas */ .formula { text-align: center; - font-family: "DejaVu Serif", serif; + font-family: "Droid Serif", "DejaVu Serif", "STIX", serif; margin: 1.2em 0; } span.formula { @@ -113,9 +113,11 @@ vertical-align: middle; } span.symbol { + line-height: 125%; font-size: 125%; } span.bigsymbol { + line-height: 150%; font-size: 150%; } span.largesymbol { @@ -139,10 +141,10 @@ } .limit { display: table-row; - line-height: 95%; + line-height: 99%; } sup.limit, sub.limit { - line-height: 150%; + line-height: 100%; } span.symbolover { display: inline-block; Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ -# .. coding: utf8 -# $Id: __init__.py 7389 2012-03-30 11:58:21Z milde $ +# .. coding: utf-8 +# $Id: __init__.py 7745 2014-02-28 14:15:59Z milde $ # Author: Engelbert Gruber, Günter Milde # Maintainer: docutils-develop@lists.sourceforge.net # Copyright: This module has been placed in the public domain. @@ -24,15 +24,10 @@ except ImportError: import docutils.utils.roman as roman from docutils import frontend, nodes, languages, writers, utils, io -from docutils.error_reporting import SafeString +from docutils.utils.error_reporting import SafeString from docutils.transforms import writer_aux -from docutils.math import pick_math_environment, unichar2tex +from docutils.utils.math import pick_math_environment, unichar2tex -# compatibility module for Python 2.3 -if not hasattr(string, 'Template'): - import docutils._string_template_compat - string.Template = docutils._string_template_compat.Template - class Writer(writers.Writer): supported = ('latex','latex2e') @@ -92,12 +87,16 @@ '".sty" or omitted and with \\input else. ' ' Overrides previous --stylesheet and --stylesheet-path settings.', ['--stylesheet'], - {'default': '', 'metavar': '<file>', - 'overrides': 'stylesheet_path'}), - ('Like --stylesheet, but the path is rewritten ' - 'relative to the output file. ', + {'default': '', 'metavar': '<file[,file,...]>', + 'overrides': 'stylesheet_path', + 'validator': frontend.validate_comma_separated_list}), + ('Comma separated list of LaTeX packages/stylesheets. ' + 'Relative paths are expanded if a matching file is found in ' + 'the --stylesheet-dirs. With --link-stylesheet, ' + 'the path is rewritten relative to the output *.tex file. ', ['--stylesheet-path'], - {'metavar': '<file>', 'overrides': 'stylesheet'}), + {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet', + 'validator': frontend.validate_comma_separated_list}), ('Link to the stylesheet(s) in the output file. (default)', ['--link-stylesheet'], {'dest': 'embed_stylesheet', 'action': 'store_false'}), @@ -106,6 +105,13 @@ ['--embed-stylesheet'], {'default': 0, 'action': 'store_true', 'validator': frontend.validate_boolean}), + ('Comma-separated list of directories where stylesheets are found. ' + 'Used by --stylesheet-path when expanding relative path arguments. ' + 'Default: "."', + ['--stylesheet-dirs'], + {'metavar': '<dir[,dir,...]>', + 'validator': frontend.validate_comma_separated_list, + 'default': ['.']}), ('Customization by LaTeX code in the preamble. ' 'Default: select PDF standard fonts (Times, Helvetica, Courier).', ['--latex-preamble'], @@ -211,8 +217,6 @@ settings_defaults = {'sectnum_depth': 0 # updated by SectNum transform } - relative_path_settings = ('stylesheet_path',) - config_section = 'latex2e writer' config_section_dependencies = ('writers',) @@ -231,14 +235,11 @@ # Override parent method to add latex-specific transforms def get_transforms(self): - # call the parent class' method - transform_list = writers.Writer.get_transforms(self) - # print transform_list + return writers.Writer.get_transforms(self) + [ # Convert specific admonitions to generic one - transform_list.append(writer_aux.Admonitions) + writer_aux.Admonitions, # TODO: footnote collection transform - # transform_list.append(footnotes.collect) - return transform_list + ] def translate(self): visitor = self.translator_class(self.document) @@ -299,29 +300,29 @@ 'cy': 'welsh', 'da': 'danish', 'de': 'ngerman', # new spelling (de_1996) - 'de_1901': 'german', # old spelling - 'de_at': 'naustrian', - 'de_at_1901': 'austrian', + 'de-1901': 'german', # old spelling + 'de-AT': 'naustrian', + 'de-AT-1901': 'austrian', 'dsb': 'lowersorbian', 'el': 'greek', # monotonic (el-monoton) - 'el_polyton': 'polutonikogreek', + 'el-polyton': 'polutonikogreek', 'en': 'english', # TeX' default language - 'en_au': 'australian', - 'en_ca': 'canadian', - 'en_gb': 'british', - 'en_nz': 'newzealand', - 'en_us': 'american', - 'eo': 'esperanto', # '^' is active + 'en-AU': 'australian', + 'en-CA': 'canadian', + 'en-GB': 'british', + 'en-NZ': 'newzealand', + 'en-US': 'american', + 'eo': 'esperanto', 'es': 'spanish', 'et': 'estonian', 'eu': 'basque', # 'fa': 'farsi', 'fi': 'finnish', 'fr': 'french', - 'fr_ca': 'canadien', + 'fr-CA': 'canadien', 'ga': 'irish', # Irish Gaelic # 'grc': # Ancient Greek - 'grc_ibycus': 'ibycus', # Ibycus encoding + 'grc-ibycus': 'ibycus', # Ibycus encoding 'gl': 'galician', 'he': 'hebrew', 'hr': 'croatian', @@ -341,57 +342,85 @@ 'nb': 'norsk', # Norwegian Bokmal 'nl': 'dutch', 'nn': 'nynorsk', # Norwegian Nynorsk - 'no': 'norsk', # Norwegian Bokmal + 'no': 'norsk', # Norwegian (Bokmal) 'pl': 'polish', 'pt': 'portuges', - 'pt_br': 'brazil', + 'pt-BR': 'brazil', 'ro': 'romanian', - 'ru': 'russian', # '"' is active + 'ru': 'russian', 'se': 'samin', # North Sami - # sh-cyrl: Serbo-Croatian, Cyrillic script - 'sh-latn': 'serbian', # Serbo-Croatian, Latin script + 'sh-Cyrl': 'serbianc', # Serbo-Croatian, Cyrillic script + 'sh-Latn': 'serbian', # Serbo-Croatian, Latin script see also 'hr' 'sk': 'slovak', 'sl': 'slovene', 'sq': 'albanian', - # 'sr-cyrl': Serbian, Cyrillic script (sr-cyrl) - 'sr-latn': 'serbian', # Serbian, Latin script, " active. + 'sr': 'serbianc', # Serbian, Cyrillic script (contributed) + 'sr-Latn': 'serbian', # Serbian, Latin script 'sv': 'swedish', # 'th': 'thai', 'tr': 'turkish', 'uk': 'ukrainian', 'vi': 'vietnam', - # zh-latn: Chinese Pinyin + # zh-Latn: Chinese Pinyin } + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + warn_msg = 'Language "%s" not supported by LaTeX (babel)' + # "Active characters" are shortcuts that start a LaTeX macro and may need + # escaping for literals use. Characters that prevent literal use (e.g. + # starting accent macros like "a -> ä) will be deactivated if one of the + # defining languages is used in the document. + # Special cases: + # ~ (tilde) -- used in estonian, basque, galician, and old versions of + # spanish -- cannot be deactivated as it denotes a no-break space macro, + # " (straight quote) -- used in albanian, austrian, basque + # brazil, bulgarian, catalan, czech, danish, dutch, estonian, + # finnish, galician, german, icelandic, italian, latin, naustrian, + # ngerman, norsk, nynorsk, polish, portuges, russian, serbian, slovak, + # slovene, spanish, swedish, ukrainian, and uppersorbian -- + # is escaped as ``\textquotedbl``. + active_chars = {# TeX/Babel-name: active characters to deactivate + # 'breton': ':;!?' # ensure whitespace + # 'esperanto': '^', + # 'estonian': '~"`', + # 'french': ':;!?' # ensure whitespace + 'galician': '.<>', # also '~"' + # 'magyar': '`', # for special hyphenation cases + 'spanish': '.<>', # old versions also '~' + # 'turkish': ':!=' # ensure whitespace + } + def __init__(self, language_code, reporter=None): self.reporter = reporter self.language = self.language_name(language_code) self.otherlanguages = {} - self.quote_index = 0 - self.quotes = ('``', "''") - # language dependent configuration: - # double quotes are "active" in some languages (e.g. German). - self.literal_double_quote = u'"' - if self.language in ('ngerman', 'german', 'austrian', 'naustrian', - 'russian'): - self.quotes = (r'\glqq{}', r'\grqq{}') - self.literal_double_quote = ur'\dq{}' - if self.language == 'french': - self.quotes = (r'\og{}', r'\fg{}') - if self.language == 'italian': - self.literal_double_quote = ur'{\char`\"}' def __call__(self): """Return the babel call with correct options and settings""" - languages = self.otherlanguages.keys() + languages = sorted(self.otherlanguages.keys()) languages.append(self.language or 'english') self.setup = [r'\usepackage[%s]{babel}' % ','.join(languages)] - if 'spanish' in languages: - # reset active chars to the original meaning: - self.setup.append( - r'\addto\shorthandsspanish{\spanishdeactivate{."~<>}}') - # or prepend r'\def\spanishoptions{es-noshorthands}' + # Deactivate "active characters" + shorthands = [] + for c in ''.join([self.active_chars.get(l, '') for l in languages]): + if c not in shorthands: + shorthands.append(c) + if shorthands: + self.setup.append(r'\AtBeginDocument{\shorthandoff{%s}}' + % ''.join(shorthands)) + # Including '~' in shorthandoff prevents its use as no-break space + if 'galician' in languages: + self.setup.append(r'\deactivatetilden % restore ~ in Galician') + if 'estonian' in languages: + self.setup.extend([r'\makeatletter', + r' \addto\extrasestonian{\bbl@deactivate{~}}', + r'\makeatother']) + if 'basque' in languages: + self.setup.extend([r'\makeatletter', + r' \addto\extrasbasque{\bbl@deactivate{~}}', + r'\makeatother']) if (languages[-1] == 'english' and 'french' in self.otherlanguages.keys()): self.setup += ['% Prevent side-effects if French hyphenation ' @@ -401,20 +430,6 @@ r'\noextrasfrench}' % self.language] return '\n'.join(self.setup) - def next_quote(self): - q = self.quotes[self.quote_index] - self.quote_index = (self.quote_index+1) % 2 - return q - - def quote_quotes(self,text): - t = None - for part in text.split('"'): - if t == None: - t = part - else: - t += self.next_quote() + part - return t - def language_name(self, language_code): """Return TeX language name for `language_code`""" for tag in utils.normalize_language_tag(language_code): @@ -427,8 +442,7 @@ return '' def get_language(self): - """Return `self.language` (for backwards compatibility with Sphinx). - """ + # Obsolete, kept for backwards compatibility with Sphinx return self.language @@ -466,7 +480,7 @@ PreambleCmds.abstract = r""" % abstract title -\providecommand*{\DUtitleabstract}[1]{\centerline{\textbf{#1}}}""" +\providecommand*{\DUtitleabstract}[1]{\centering\textbf{#1}}""" PreambleCmds.admonition = r""" % admonition (specially marked topic) @@ -548,8 +562,14 @@ \usepackage{graphicx} \else \usepackage[pdftex]{graphicx} -\fi'))""" +\fi""" +PreambleCmds.highlight_rules = r"""% basic code highlight: +\providecommand*\DUrolecomment[1]{\textcolor[rgb]{0.40,0.40,0.40}{#1}} +\providecommand*\DUroledeleted[1]{\textcolor[rgb]{0.40,0.40,0.40}{#1}} +\providecommand*\DUrolekeyword[1]{\textbf{#1}} +\providecommand*\DUrolestring[1]{\textit{#1}}""" + PreambleCmds.inline = r""" % inline markup (custom roles) % \DUrole{#1}{#2} tries \DUrole#1{#2} @@ -638,6 +658,10 @@ % subtitle (for topic/sidebar) \providecommand*{\DUsubtitle}[2][class-arg]{\par\emph{#2}\smallskip}""" +PreambleCmds.documentsubtitle = r""" +% subtitle (in document title) +\providecommand*{\DUdocumentsubtitle}[1]{{\large #1}}""" + PreambleCmds.table = r"""\usepackage{longtable,ltcaption,array} \setlength{\extrarowheight}{2pt} \newlength{\DUtablewidth} % internal use in tables""" @@ -699,6 +723,8 @@ ord('\\'): ur'\textbackslash{}', ord('{'): ur'\{', ord('}'): ur'\}', + # straight double quotes are 'active' in many languages + ord('"'): ur'\textquotedbl{}', # Square brackets are ordinary chars and cannot be escaped with '\', # so we put them in a group '{[}'. (Alternative: ensure that all # macros with optional arguments are terminated with {} and text @@ -707,7 +733,8 @@ # group, e.g. ``\item[{\hyperref[label]{text}}]``. ord('['): ur'{[}', ord(']'): ur'{]}', - # the soft hyphen is unknown in 8-bit text and not properly handled by XeTeX + # the soft hyphen is unknown in 8-bit text + # and not properly handled by XeTeX 0x00AD: ur'\-', # SOFT HYPHEN } # Unicode chars that are not recognized by LaTeX's utf8 encoding @@ -725,6 +752,8 @@ } # Unicode chars that are recognized by LaTeX's utf8 encoding utf8_supported_unicode = { + 0x00AB: ur'\guillemotleft', # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + 0x00bb: ur'\guillemotright', # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK 0x200C: ur'\textcompwordmark', # ZERO WIDTH NON-JOINER 0x2013: ur'\textendash{}', 0x2014: ur'\textemdash{}', @@ -764,7 +793,7 @@ 0x00b3: ur'\textthreesuperior{}', # ³ SUPERSCRIPT THREE 0x00b4: ur'\textasciiacute{}', # ´ ACUTE ACCENT 0x00b5: ur'\textmu{}', # µ MICRO SIGN - 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # not equal to \textpilcrow + 0x00b6: ur'\textparagraph{}', # ¶ PILCROW SIGN # != \textpilcrow 0x00b9: ur'\textonesuperior{}', # ¹ SUPERSCRIPT ONE 0x00ba: ur'\textordmasculine{}', # º MASCULINE ORDINAL INDICATOR 0x00bc: ur'\textonequarter{}', # 1/4 FRACTION @@ -772,7 +801,7 @@ 0x00be: ur'\textthreequarters{}', # 3/4 FRACTION 0x00d7: ur'\texttimes{}', # × MULTIPLICATION SIGN 0x00f7: ur'\textdiv{}', # ÷ DIVISION SIGN - # + # others 0x0192: ur'\textflorin{}', # LATIN SMALL LETTER F WITH HOOK 0x02b9: ur'\textasciiacute{}', # MODIFIER LETTER PRIME 0x02ba: ur'\textacutedbl{}', # MODIFIER LETTER DOUBLE PRIME @@ -1256,9 +1285,19 @@ self.requirements['_inputenc'] = (r'\usepackage[%s]{inputenc}' % self.latex_encoding) # TeX font encoding - if self.font_encoding and not self.is_xetex: - self.requirements['_fontenc'] = (r'\usepackage[%s]{fontenc}' % - self.font_encoding) + if not self.is_xetex: + if self.font_encoding: + self.requirements['_fontenc'] = (r'\usepackage[%s]{fontenc}' % + self.font_encoding) + # ensure \textquotedbl is defined: + for enc in self.font_encoding.split(','): + enc = enc.strip() + if enc == 'OT1': + self.requirements['_textquotedblOT1'] = ( + r'\DeclareTextSymbol{\textquotedbl}{OT1}{`\"}') + elif enc not in ('T1', 'T2A', 'T2B', 'T2C', 'T4', 'T5'): + self.requirements['_textquotedbl'] = ( + r'\DeclareTextSymbolDefault{\textquotedbl}{T1}') # page layout with typearea (if there are relevant document options) if (settings.documentclass.find('scr') == -1 and (self.documentoptions.find('DIV') != -1 or @@ -1288,7 +1327,7 @@ ## len(self.d_class.sections)) # Section numbering - if not self.settings.sectnum_xform: # section numbering by Docutils + if settings.sectnum_xform: # section numbering by Docutils PreambleCmds.secnumdepth = r'\setcounter{secnumdepth}{0}' else: # section numbering by LaTeX: secnumdepth = settings.sectnum_depth @@ -1338,8 +1377,7 @@ path = base + '.sty' # ensure extension try: content = io.FileInput(source_path=path, - encoding='utf-8', - handle_io_errors=False).read() + encoding='utf-8').read() self.settings.record_dependencies.add(path) except IOError, err: msg = u"Cannot embed stylesheet '%s':\n %s." % ( @@ -1440,10 +1478,6 @@ table[ord('>')] = ur'\textgreater{}' if self.insert_non_breaking_blanks: table[ord(' ')] = ur'~' - if self.literal: - # double quotes are 'active' in some languages - # TODO: use \textquotedbl if font encoding starts with T? - table[ord('"')] = self.babel.literal_double_quote # Unicode replacements for 8-bit tex engines (not required with XeTeX/LuaTeX): if not self.is_xetex: table.update(CharMaps.unsupported_unicode) @@ -1480,8 +1514,6 @@ if not line.lstrip(): lines[i] += '~' text = (r'\\' + '\n').join(lines) - if not self.literal: - text = self.babel.quote_quotes(text) if self.literal and not self.insert_non_breaking_blanks: # preserve runs of spaces but allow wrapping text = text.replace(' ', ' ~') @@ -1500,7 +1532,7 @@ ## return head + '\n' + body def is_inline(self, node): - """Check whether a node represents an inline element""" + """Check whether a node represents an inline or block-level element""" return isinstance(node.parent, nodes.TextElement) def append_hypertargets(self, node): @@ -1596,8 +1628,12 @@ self.out.append( '%\n\\begin{list}{}{}\n' ) else: self.out.append( '%\n\\begin{itemize}\n' ) + # if node['classes']: + # self.visit_inline(node) def depart_bullet_list(self, node): + # if node['classes']: + # self.depart_inline(node) if self.is_toc_list: self.out.append( '\n\\end{list}\n' ) else: @@ -1624,7 +1660,7 @@ self.out.append('}') def visit_caption(self, node): - self.out.append( '\\caption{' ) + self.out.append('\n\\caption{') def depart_caption(self, node): self.out.append('}\n') @@ -1871,7 +1907,7 @@ title = [''.join(self.title)] + self.title_labels if self.subtitle: title += [r'\\ % subtitle', - r'\large{%s}' % ''.join(self.subtitle) + r'\DUdocumentsubtitle{%s}' % ''.join(self.subtitle) ] + self.subtitle_labels self.titledata.append(r'\title{%s}' % '%\n '.join(title)) # \author (empty \author prevents warning with \maketitle) @@ -1947,6 +1983,7 @@ count = node['morerows'] + 1 self.active_table.set_rowspan( self.active_table.get_entry_number()-1,count) + # TODO why does multirow end on % ? needs to be checked for below self.out.append('\\multirow{%d}{%s}{%%' % (count,self.active_table.get_column_width())) self.context.append('}') @@ -1970,9 +2007,13 @@ # header / not header if isinstance(node.parent.parent, nodes.thead): + if self.out[-1].endswith("%"): + self.out.append("\n") self.out.append('\\textbf{%') self.context.append('}') elif self.active_table.is_stub_column(): + if self.out[-1].endswith("%"): + self.out.append("\n") self.out.append('\\textbf{') self.context.append('}') else: @@ -2094,21 +2135,18 @@ def visit_figure(self, node): self.requirements['float_settings'] = PreambleCmds.float_settings - # ! the 'align' attribute should set "outer alignment" ! - # For "inner alignment" use LaTeX default alignment (similar to HTML) - ## if ('align' not in node.attributes or - ## node.attributes['align'] == 'center'): - ## align = '\n\\centering' - ## align_end = '' - ## else: - ## # TODO non vertical space for other alignments. - ## align = '\\begin{flush%s}' % node.attributes['align'] - ## align_end = '\\end{flush%s}' % node.attributes['align'] - ## self.out.append( '\\begin{figure}%s\n' % align ) - ## self.context.append( '%s\\end{figure}\n' % align_end ) - self.out.append('\\begin{figure}') + # The 'align' attribute sets the "outer alignment", + # for "inner alignment" use LaTeX default alignment (similar to HTML) + alignment = node.attributes.get('align', 'center') + if alignment != 'center': + # The LaTeX "figure" environment always uses the full textwidth, + # so "outer alignment" is ignored. Just write a comment. + # TODO: use the wrapfigure environment? + self.out.append('\n\\begin{figure} %% align = "%s"\n' % alignment) + else: + self.out.append('\n\\begin{figure}\n') if node.get('ids'): - self.out += ['\n'] + self.ids_to_labels(node) + self.out += self.ids_to_labels(node) + ['\n'] def depart_figure(self, node): self.out.append('\\end{figure}\n') @@ -2262,7 +2300,6 @@ pre = [] post = [] include_graphics_options = [] - display_style = ('block-', 'inline-')[self.is_inline(node)] align_codes = { # inline images: by default latex aligns the bottom. 'bottom': ('', ''), @@ -2273,6 +2310,7 @@ 'left': (r'\noindent{', r'\hfill}'), 'right': (r'\noindent{\hfill', '}'),} if 'align' in attrs: + # TODO: warn or ignore non-applicable alignment settings? try: align_code = align_codes[attrs['align']] pre.append(align_code[0]) @@ -2288,7 +2326,8 @@ if 'width' in attrs: include_graphics_options.append('width=%s' % self.to_latex_length(attrs['width'])) - if not self.is_inline(node): + if not (self.is_inline(node) or + isinstance(node.parent, nodes.figure)): pre.append('\n') post.append('\n') pre.reverse() @@ -2304,27 +2343,18 @@ self.out += self.ids_to_labels(node) + ['\n'] def visit_inline(self, node): # <span>, i.e. custom roles - # Make a copy to keep ``node['classes']`` True if a - # language argument is popped (used in conditional calls of - # depart_inline()): - classes = node['classes'][:] - self.context.append('}' * len(classes)) - # handle language specification: - language_tags = [cls for cls in classes - if cls.startswith('language-')] - if language_tags: - language = self.babel.language_name(language_tags[0][9:]) - if language: - self.babel.otherlanguages[language] = True - self.out.append(r'\otherlanguage{%s}{' % language) - classes.pop(classes.index(language_tags[0])) - if not classes: - return - # mark up for styling with custom macros - if 'align-center' in classes: - self.fallbacks['align-center'] = PreambleCmds.align_center - self.fallbacks['inline'] = PreambleCmds.inline - self.out += [r'\DUrole{%s}{' % cls for cls in classes] + self.context.append('}' * len(node['classes'])) + for cls in node['classes']: + if cls == 'align-center': + self.fallbacks['align-center'] = PreambleCmds.align_center + if cls.startswith('language-'): + language = self.babel.language_name(cls[9:]) + if language: + self.babel.otherlanguages[language] = True + self.out.append(r'\foreignlanguage{%s}{' % language) + else: + self.fallbacks['inline'] = PreambleCmds.inline + self.out.append(r'\DUrole{%s}{' % cls) def depart_inline(self, node): self.out.append(self.context.pop()) @@ -2376,6 +2406,10 @@ def visit_literal(self, node): self.literal = True + if 'code' in node['classes'] and ( + self.settings.syntax_highlight != 'none'): + self.requirements['color'] = PreambleCmds.color + self.fallbacks['code'] = PreambleCmds.highlight_rules self.out.append('\\texttt{') if node['classes']: self.visit_inline(node) @@ -2417,7 +2451,7 @@ if not self.active_table.is_open(): # no quote inside tables, to avoid vertical space between # table border and literal block. - # BUG: fails if normal text preceeds the literal block. + # BUG: fails if normal text precedes the literal block. self.out.append('%\n\\begin{quote}') self.context.append('\n\\end{quote}\n') else: @@ -2433,6 +2467,10 @@ self.literal = True self.insert_newline = True self.insert_non_breaking_blanks = True + if 'code' in node['classes'] and ( + self.settings.syntax_highlight != 'none'): + self.requirements['color'] = PreambleCmds.color + self.fallbacks['code'] = PreambleCmds.highlight_rules self.out.append('{\\ttfamily \\raggedright \\noindent\n') def depart_literal_block(self, node): @@ -2712,6 +2750,7 @@ def visit_subtitle(self, node): if isinstance(node.parent, nodes.document): self.push_output_collector(self.subtitle) + self.fallbacks['documentsubtitle'] = PreambleCmds.documentsubtitle self.subtitle_labels += self.ids_to_labels(node, set_anchor=False) # section subtitle: "starred" (no number, not in ToC) elif isinstance(node.parent, nodes.section): @@ -2879,7 +2918,8 @@ self.context.append('') # Section title else: - self.requirements['secnumdepth'] = PreambleCmds.secnumdepth + if hasattr(PreambleCmds, 'secnumdepth'): + self.requirements['secnumdepth'] = PreambleCmds.secnumdepth section_name = self.d_class.section(self.section_level) self.out.append('\n\n') # System messages heading in red: Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/xelatex.tex =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/xelatex.tex 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/latex2e/xelatex.tex 2014-09-29 12:39:28 UTC (rev 756) @@ -1,7 +1,7 @@ $head_prefix% generated by Docutils <http://docutils.sourceforge.net/> % rubber: set program xelatex \usepackage[no-sscript]{xltxtra} % loads fixltx2e, metalogo, xunicode, fontspec -\defaultfontfeatures{Scale=MatchLowercase} +% \defaultfontfeatures{Scale=MatchLowercase} $requirements %%% Custom LaTeX preamble $latex_preamble Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/manpage.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/manpage.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/manpage.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# $Id: manpage.py 7321 2012-01-20 06:50:44Z grubert $ +# $Id: manpage.py 7628 2013-03-09 10:19:35Z grubert $ # Author: Engelbert Gruber <grubert@users.sourceforge.net> # Copyright: This module is put into the public domain. @@ -57,6 +57,7 @@ DEFINITION_LIST_INDENT = 7 OPTION_LIST_INDENT = 7 BLOCKQOUTE_INDENT = 3.5 +LITERAL_BLOCK_INDENT = 3.5 # Define two macros so man/roff can calculate the # indent/unindent margins by itself @@ -107,7 +108,7 @@ self.output = visitor.astext() -class Table: +class Table(object): def __init__(self): self._rows = [] self._options = ['center'] @@ -245,14 +246,14 @@ def ensure_eol(self): """Ensure the last line in body is terminated by new line.""" - if self.body[-1][-1] != '\n': + if len(self.body) > 0 and self.body[-1][-1] != '\n': self.body.append('\n') def astext(self): """Return the final formatted document as a string.""" if not self.header_written: # ensure we get a ".TH" as viewers require it. - self.head.append(self.header()) + self.append_header() # filter body for i in xrange(len(self.body)-1, 0, -1): # remove superfluous vertical gaps. @@ -288,10 +289,10 @@ text = text.replace(in_char, out_markup) # unicode text = self.deunicode(text) + # prevent interpretation of "." at line start + if text.startswith('.'): + text = '\\&' + text if self._in_literal: - # prevent interpretation of "." at line start - if text[0] == '.': - text = '\\&' + text text = text.replace('\n.', '\n\\&.') self.body.append(text) @@ -299,7 +300,7 @@ pass def list_start(self, node): - class enum_char: + class enum_char(object): enum_style = { 'bullet' : '\\(bu', 'emdash' : '\\(em', @@ -376,8 +377,8 @@ # .TH title_upper section date source manual if self.header_written: return - self.body.append(self.header()) - self.body.append(MACRO_DEF) + self.head.append(self.header()) + self.head.append(MACRO_DEF) self.header_written = 1 def visit_address(self, node): @@ -387,12 +388,27 @@ pass def visit_admonition(self, node, name=None): + # + # Make admonitions a simple block quote + # with a strong heading + # + # Using .IP/.RE doesn't preserve indentation + # when admonitions contain bullets, literal, + # and/or block quotes. + # if name: - self.body.append('.IP %s\n' % - self.language.labels.get(name, name)) + # .. admonition:: has no name + self.body.append('.sp\n') + name = '%s%s:%s\n' % ( + self.defs['strong'][0], + self.language.labels.get(name, name).upper(), + self.defs['strong'][1], + ) + self.body.append(name) + self.visit_block_quote(node) def depart_admonition(self, node): - self.body.append('.RE\n') + self.depart_block_quote(node) def visit_attention(self, node): self.visit_admonition(node, 'attention') @@ -557,7 +573,7 @@ def visit_document(self, node): # no blank line between comment and header. - self.body.append(self.comment(self.document_start).rstrip()+'\n') + self.head.append(self.comment(self.document_start).rstrip()+'\n') # writing header is postboned self.header_written = 0 @@ -799,12 +815,18 @@ self.body.append(self.defs['literal'][1]) def visit_literal_block(self, node): + # BUG/HACK: indent alway uses the _last_ indention, + # thus we need two of them. + self.indent(LITERAL_BLOCK_INDENT) + self.indent(0) self.body.append(self.defs['literal_block'][0]) self._in_literal = True def depart_literal_block(self, node): self._in_literal = False self.body.append(self.defs['literal_block'][1]) + self.dedent() + self.dedent() def visit_math(self, node): self.document.reporter.warning('"math" role not supported', @@ -876,7 +898,7 @@ def visit_option(self, node): # each form of the option will be presented separately if self.context[-1] > 0: - self.body.append(', ') + self.body.append('\\fP,\\fB ') if self.context[-3] == '.BI': self.body.append('\\') self.body.append(' ') Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/odf_odt/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/odf_odt/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/odf_odt/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7385 2012-03-20 00:22:38Z dkuhlman $ +# $Id: __init__.py 7717 2013-08-21 22:01:21Z milde $ # Author: Dave Kuhlman <dkuhlman@rexx.com> # Copyright: This module has been placed in the public domain. @@ -21,13 +21,10 @@ import time import re import StringIO -import inspect -import imp import copy import urllib2 import docutils from docutils import frontend, nodes, utils, writers, languages -from docutils.parsers import rst from docutils.readers import standalone from docutils.transforms import references @@ -64,12 +61,16 @@ except ImportError, exp: pygments = None -# -# Is the PIL imaging library installed? +# check for the Python Imaging Library try: - import Image -except ImportError, exp: - Image = None + import PIL.Image +except ImportError: + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None ## import warnings ## warnings.warn('importing IPShellEmbed', UserWarning) @@ -87,16 +88,20 @@ # that support for the ability to get the parent of an element. # if WhichElementTree == 'elementtree': - class _ElementInterfaceWrapper(etree._ElementInterface): + import weakref + _parents = weakref.WeakKeyDictionary() + if isinstance(etree.Element, type): + _ElementInterface = etree.Element + else: + _ElementInterface = etree._ElementInterface + class _ElementInterfaceWrapper(_ElementInterface): def __init__(self, tag, attrib=None): - etree._ElementInterface.__init__(self, tag, attrib) - if attrib is None: - attrib = {} - self.parent = None + _ElementInterface.__init__(self, tag, attrib) + _parents[self] = None def setparent(self, parent): - self.parent = parent + _parents[self] = parent def getparent(self): - return self.parent + return _parents[self] # @@ -133,7 +138,7 @@ 'oooc': 'http://openoffice.org/2004/calc', 'ooow': 'http://openoffice.org/2004/writer', 'presentation': 'urn:oasis:names:tc:opendocument:xmlns:presentation:1.0', - + 'script': 'urn:oasis:names:tc:opendocument:xmlns:script:1.0', 'style': 'urn:oasis:names:tc:opendocument:xmlns:style:1.0', 'svg': 'urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0', @@ -531,7 +536,7 @@ 'stylesheet_path', ) - config_section = 'opendocument odf writer' + config_section = 'odf_odt writer' config_section_dependencies = ( 'writers', ) @@ -943,7 +948,46 @@ }) el.text = text self.body_text_element.insert(0, el) + el = self.find_first_text_p(self.body_text_element) + if el is not None: + self.attach_page_style(el) + def find_first_text_p(self, el): + """Search the generated doc and return the first <text:p> element. + """ + if ( + el.tag == 'text:p' or + el.tag == 'text:h' + ): + return el + elif el.getchildren(): + for child in el.getchildren(): + el1 = self.find_first_text_p(child) + if el1 is not None: + return el1 + return None + else: + return None + + def attach_page_style(self, el): + """Attach the default page style. + + Create an automatic-style that refers to the current style + of this element and that refers to the default page style. + """ + current_style = el.get('text:style-name') + style_name = 'P1003' + el1 = SubElement( + self.automatic_styles, 'style:style', attrib={ + 'style:name': style_name, + 'style:master-page-name': "rststyle-pagedefault", + 'style:family': "paragraph", + }, nsdict=SNSD) + if current_style: + el1.set('style:parent-style-name', current_style) + el.set('text:style-name', style_name) + + def rststyle(self, name, parameters=( )): """ Returns the style name to use for the given style. @@ -996,7 +1040,14 @@ if master_el is None: return path = '{%s}master-page' % (SNSD['style'], ) - master_el = master_el.find(path) + master_el_container = master_el.findall(path) + master_el = None + target_attrib = '{%s}name' % (SNSD['style'], ) + target_name = self.rststyle('pagedefault') + for el in master_el_container: + if el.get(target_attrib) == target_name: + master_el = el + break if master_el is None: return el1 = master_el @@ -1450,29 +1501,6 @@ def default_departure(self, node): self.document.reporter.warning('missing depart_%s' % (node.tagname, )) -## def add_text_to_element(self, text): -## # Are we in a citation. If so, add text to current element, not -## # to children. -## # Are we in mixed content? If so, add the text to the -## # etree tail of the previous sibling element. -## if not self.in_citation and len(self.current_element.getchildren()) > 0: -## if self.current_element.getchildren()[-1].tail: -## self.current_element.getchildren()[-1].tail += text -## else: -## self.current_element.getchildren()[-1].tail = text -## else: -## if self.current_element.text: -## self.current_element.text += text -## else: -## self.current_element.text = text -## -## def visit_Text(self, node): -## # Skip nodes whose text has been processed in parent nodes. -## if isinstance(node.parent, docutils.nodes.literal_block): -## return -## text = node.astext() -## self.add_text_to_element(text) - def visit_Text(self, node): # Skip nodes whose text has been processed in parent nodes. if isinstance(node.parent, docutils.nodes.literal_block): @@ -1991,21 +2019,27 @@ elif self.citation_id is not None: el = self.append_p('textbody') self.set_current_element(el) - el.text = '[' if self.settings.create_links: + el0 = SubElement(el, 'text:span') + el0.text = '[' el1 = self.append_child('text:reference-mark-start', attrib={ 'text:name': '%s' % (self.citation_id, ), }) + else: + el.text = '[' def depart_label(self, node): if isinstance(node.parent, docutils.nodes.footnote): pass elif self.citation_id is not None: - self.current_element.text += ']' if self.settings.create_links: el = self.append_child('text:reference-mark-end', attrib={ 'text:name': '%s' % (self.citation_id, ), }) + el0 = SubElement(self.current_element, 'text:span') + el0.text = ']' + else: + self.current_element.text += ']' self.set_to_parent() def visit_generated(self, node): @@ -2123,9 +2157,9 @@ height = self.get_image_width_height(node, 'height') dpi = (72, 72) - if Image is not None and source in self.image_dict: + if PIL is not None and source in self.image_dict: filename, destination = self.image_dict[source] - imageobj = Image.open(filename, 'r') + imageobj = PIL.Image.open(filename, 'r') dpi = imageobj.info.get('dpi', dpi) # dpi information can be (xdpi, ydpi) or xydpi try: iter(dpi) @@ -2223,8 +2257,6 @@ 'draw:z-index': '0', } attrib['svg:width'] = width - # dbg - #attrib['svg:height'] = height el3 = SubElement(current_element, 'draw:frame', attrib=attrib) attrib = {} el4 = SubElement(el3, 'draw:text-box', attrib=attrib) Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 6328 2010-05-23 21:20:29Z gbrandl $ +# $Id: __init__.py 7630 2013-03-15 22:27:04Z milde $ # Author: David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -52,8 +52,7 @@ settings_default_overrides = {'stylesheet_path': default_stylesheet_path, 'template': default_template_path,} - relative_path_settings = (html4css1.Writer.relative_path_settings - + ('template',)) + relative_path_settings = ('template',) config_section = 'pep_html writer' config_section_dependencies = ('writers', 'html4css1 writer') Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/pep.css =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/pep.css 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/pep_html/pep.css 2014-09-29 12:39:28 UTC (rev 756) @@ -1,7 +1,7 @@ /* :Author: David Goodger :Contact: goodger@python.org -:date: $Date: 2006-05-21 22:44:42 +0200 (Son, 21 Mai 2006) $ +:date: $Date: 2006-05-21 22:44:42 +0200 (Son, 21. Mai 2006) $ :version: $Revision: 4564 $ :copyright: This stylesheet has been placed in the public domain. Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/s5_html/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/s5_html/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/s5_html/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,4 +1,4 @@ -# $Id: __init__.py 7320 2012-01-19 22:33:02Z milde $ +# $Id: __init__.py 7720 2013-09-05 12:54:56Z milde $ # Authors: Chris Liechti <cliechti@gmx.net>; # David Goodger <goodger@python.org> # Copyright: This module has been placed in the public domain. @@ -285,23 +285,29 @@ self.html_prolog.append(self.doctype) self.meta.insert(0, self.content_type % self.settings.output_encoding) self.head.insert(0, self.content_type % self.settings.output_encoding) - + if self.math_header: + if self.math_output == 'mathjax': + self.head.extend(self.math_header) + else: + self.stylesheet.extend(self.math_header) + # skip content-type meta tag with interpolated charset value: + self.html_head.extend(self.head[1:]) + self.fragment.extend(self.body) + # special S5 code up to the next comment line header = ''.join(self.s5_header) footer = ''.join(self.s5_footer) title = ''.join(self.html_title).replace('<h1 class="title">', '<h1>') layout = self.layout_template % {'header': header, 'title': title, 'footer': footer} - self.fragment.extend(self.body) self.body_prefix.extend(layout) self.body_prefix.append('<div class="presentation">\n') self.body_prefix.append( self.starttag({'classes': ['slide'], 'ids': ['slide0']}, 'div')) if not self.section_count: self.body.append('</div>\n') + # self.body_suffix.insert(0, '</div>\n') - # skip content-type meta tag with interpolated charset value: - self.html_head.extend(self.head[1:]) self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo + self.docinfo + self.body + self.body_suffix[:-1]) Modified: trunk/docutils/src/main/resources/docutils/docutils/writers/xetex/__init__.py =================================================================== --- trunk/docutils/src/main/resources/docutils/docutils/writers/xetex/__init__.py 2014-09-29 12:38:55 UTC (rev 755) +++ trunk/docutils/src/main/resources/docutils/docutils/writers/xetex/__init__.py 2014-09-29 12:39:28 UTC (rev 756) @@ -1,9 +1,9 @@ #!/usr/bin/env python -# -*- coding: utf8 -*- +# -*- coding: utf-8 -*- # :Author: Günter Milde <milde@users.sourceforge.net> -# :Revision: $Revision: 7389 $ -# :Date: $Date: 2012-03-30 13:58:21 +0200 (Fre, 30 Mär 2012) $ +# :Revision: $Revision: 7668 $ +# :Date: $Date: 2013-06-04 14:46:30 +0200 (Die, 04. Jun 2013) $ # :Copyright: © 2010 Günter Milde. # :License: Released under the terms of the `2-Clause BSD license`_, in short: # @@ -42,7 +42,7 @@ r'% Linux Libertine (free, wide coverage, not only for Linux)', r'\setmainfont{Linux Libertine O}', r'\setsansfont{Linux Biolinum O}', - r'\setmonofont[HyphenChar=None]{DejaVu Sans Mono}', + r'\setmonofont[HyphenChar=None,Scale=MatchLowercase]{DejaVu Sans Mono}', ]) config_section = 'xetex writer' @@ -76,30 +76,33 @@ # code Polyglossia-name comment 'cop': 'coptic', 'de': 'german', # new spelling (de_1996) - 'de_1901': 'ogerman', # old spelling + 'de-1901': 'ogerman', # old spelling 'dv': 'divehi', # Maldivian 'dsb': 'lsorbian', - 'el_polyton': 'polygreek', + 'el-polyton': 'polygreek', 'fa': 'farsi', 'grc': 'ancientgreek', 'hsb': 'usorbian', - 'sh-cyrl': 'serbian', # Serbo-Croatian, Cyrillic script - 'sh-latn': 'croatian', # Serbo-Croatian, Latin script + 'sh-Cyrl': 'serbian', # Serbo-Croatian, Cyrillic script + 'sh-Latn': 'croatian', # Serbo-Croatian, Latin script 'sq': 'albanian', - 'sr': 'serbian', # Cyrillic script (sr-cyrl) + 'sr': 'serbian', # Cyrillic script (sr-Cyrl) 'th': 'thai', 'vi': 'vietnamese', - # zh-latn: ??? # Chinese Pinyin + # zh-Latn: ??? # Chinese Pinyin }) + # normalize (downcase) keys + language_codes = dict([(k.lower(), v) for (k,v) in language_codes.items()]) + # Languages without Polyglossia support: for key in ('af', # 'afrikaans', - 'de_at', # 'naustrian', - 'de_at_1901', # 'austrian', - 'fr_ca', # 'canadien', - 'grc_ibycus', # 'ibycus', (Greek Ibycus encoding) - 'sr-latn', # 'serbian script=latin' + 'de-AT', # 'naustrian', + 'de-AT-1901', # 'austrian', + 'fr-CA', # 'canadien', + 'grc-ibycus', # 'ibycus', (Greek Ibycus encoding) + 'sr-Latn', # 'serbian script=latin' ): - del(language_codes[key]) + del(language_codes[key.lower()]) def __init__(self, language_code, reporter): self.language_code = language_code @@ -118,7 +121,7 @@ r'\setdefaultlanguage{%s}' % self.language] if self.otherlanguages: setup.append(r'\setotherlanguages{%s}' % - ','.join(self.otherlanguages.keys())) + ','.join(sorted(self.otherlanguages.keys()))) return '\n'.join(setup)
participants (1)
-
echatellier@users.nuiton.org