diff options
Diffstat (limited to 'lib/jinja2/_markupsafe')
| -rw-r--r-- | lib/jinja2/_markupsafe/__init__.py | 225 | ||||
| -rw-r--r-- | lib/jinja2/_markupsafe/_bundle.py | 49 | ||||
| -rw-r--r-- | lib/jinja2/_markupsafe/_constants.py | 267 | ||||
| -rw-r--r-- | lib/jinja2/_markupsafe/_native.py | 45 | ||||
| -rw-r--r-- | lib/jinja2/_markupsafe/tests.py | 80 | 
5 files changed, 666 insertions, 0 deletions
| diff --git a/lib/jinja2/_markupsafe/__init__.py b/lib/jinja2/_markupsafe/__init__.py new file mode 100644 index 000000000..ec7bd572d --- /dev/null +++ b/lib/jinja2/_markupsafe/__init__.py @@ -0,0 +1,225 @@ +# -*- coding: utf-8 -*- +""" +    markupsafe +    ~~~~~~~~~~ + +    Implements a Markup string. + +    :copyright: (c) 2010 by Armin Ronacher. +    :license: BSD, see LICENSE for more details. +""" +import re +from itertools import imap + + +__all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent'] + + +_striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)') +_entity_re = re.compile(r'&([^;]+);') + + +class Markup(unicode): +    r"""Marks a string as being safe for inclusion in HTML/XML output without +    needing to be escaped.  This implements the `__html__` interface a couple +    of frameworks and web applications use.  :class:`Markup` is a direct +    subclass of `unicode` and provides all the methods of `unicode` just that +    it escapes arguments passed and always returns `Markup`. + +    The `escape` function returns markup objects so that double escaping can't +    happen. + +    The constructor of the :class:`Markup` class can be used for three +    different things:  When passed an unicode object it's assumed to be safe, +    when passed an object with an HTML representation (has an `__html__` +    method) that representation is used, otherwise the object passed is +    converted into a unicode string and then assumed to be safe: + +    >>> Markup("Hello <em>World</em>!") +    Markup(u'Hello <em>World</em>!') +    >>> class Foo(object): +    ...  def __html__(self): +    ...   return '<a href="#">foo</a>' +    ...  +    >>> Markup(Foo()) +    Markup(u'<a href="#">foo</a>') + +    If you want object passed being always treated as unsafe you can use the +    :meth:`escape` classmethod to create a :class:`Markup` object: + +    >>> Markup.escape("Hello <em>World</em>!") +    Markup(u'Hello <em>World</em>!') + +    Operations on a markup string are markup aware which means that all +    arguments are passed through the :func:`escape` function: + +    >>> em = Markup("<em>%s</em>") +    >>> em % "foo & bar" +    Markup(u'<em>foo & bar</em>') +    >>> strong = Markup("<strong>%(text)s</strong>") +    >>> strong % {'text': '<blink>hacker here</blink>'} +    Markup(u'<strong><blink>hacker here</blink></strong>') +    >>> Markup("<em>Hello</em> ") + "<foo>" +    Markup(u'<em>Hello</em> <foo>') +    """ +    __slots__ = () + +    def __new__(cls, base=u'', encoding=None, errors='strict'): +        if hasattr(base, '__html__'): +            base = base.__html__() +        if encoding is None: +            return unicode.__new__(cls, base) +        return unicode.__new__(cls, base, encoding, errors) + +    def __html__(self): +        return self + +    def __add__(self, other): +        if hasattr(other, '__html__') or isinstance(other, basestring): +            return self.__class__(unicode(self) + unicode(escape(other))) +        return NotImplemented + +    def __radd__(self, other): +        if hasattr(other, '__html__') or isinstance(other, basestring): +            return self.__class__(unicode(escape(other)) + unicode(self)) +        return NotImplemented + +    def __mul__(self, num): +        if isinstance(num, (int, long)): +            return self.__class__(unicode.__mul__(self, num)) +        return NotImplemented +    __rmul__ = __mul__ + +    def __mod__(self, arg): +        if isinstance(arg, tuple): +            arg = tuple(imap(_MarkupEscapeHelper, arg)) +        else: +            arg = _MarkupEscapeHelper(arg) +        return self.__class__(unicode.__mod__(self, arg)) + +    def __repr__(self): +        return '%s(%s)' % ( +            self.__class__.__name__, +            unicode.__repr__(self) +        ) + +    def join(self, seq): +        return self.__class__(unicode.join(self, imap(escape, seq))) +    join.__doc__ = unicode.join.__doc__ + +    def split(self, *args, **kwargs): +        return map(self.__class__, unicode.split(self, *args, **kwargs)) +    split.__doc__ = unicode.split.__doc__ + +    def rsplit(self, *args, **kwargs): +        return map(self.__class__, unicode.rsplit(self, *args, **kwargs)) +    rsplit.__doc__ = unicode.rsplit.__doc__ + +    def splitlines(self, *args, **kwargs): +        return map(self.__class__, unicode.splitlines(self, *args, **kwargs)) +    splitlines.__doc__ = unicode.splitlines.__doc__ + +    def unescape(self): +        r"""Unescape markup again into an unicode string.  This also resolves +        known HTML4 and XHTML entities: + +        >>> Markup("Main » <em>About</em>").unescape() +        u'Main \xbb <em>About</em>' +        """ +        from jinja2._markupsafe._constants import HTML_ENTITIES +        def handle_match(m): +            name = m.group(1) +            if name in HTML_ENTITIES: +                return unichr(HTML_ENTITIES[name]) +            try: +                if name[:2] in ('#x', '#X'): +                    return unichr(int(name[2:], 16)) +                elif name.startswith('#'): +                    return unichr(int(name[1:])) +            except ValueError: +                pass +            return u'' +        return _entity_re.sub(handle_match, unicode(self)) + +    def striptags(self): +        r"""Unescape markup into an unicode string and strip all tags.  This +        also resolves known HTML4 and XHTML entities.  Whitespace is +        normalized to one: + +        >>> Markup("Main »  <em>About</em>").striptags() +        u'Main \xbb About' +        """ +        stripped = u' '.join(_striptags_re.sub('', self).split()) +        return Markup(stripped).unescape() + +    @classmethod +    def escape(cls, s): +        """Escape the string.  Works like :func:`escape` with the difference +        that for subclasses of :class:`Markup` this function would return the +        correct subclass. +        """ +        rv = escape(s) +        if rv.__class__ is not cls: +            return cls(rv) +        return rv + +    def make_wrapper(name): +        orig = getattr(unicode, name) +        def func(self, *args, **kwargs): +            args = _escape_argspec(list(args), enumerate(args)) +            _escape_argspec(kwargs, kwargs.iteritems()) +            return self.__class__(orig(self, *args, **kwargs)) +        func.__name__ = orig.__name__ +        func.__doc__ = orig.__doc__ +        return func + +    for method in '__getitem__', 'capitalize', \ +                  'title', 'lower', 'upper', 'replace', 'ljust', \ +                  'rjust', 'lstrip', 'rstrip', 'center', 'strip', \ +                  'translate', 'expandtabs', 'swapcase', 'zfill': +        locals()[method] = make_wrapper(method) + +    # new in python 2.5 +    if hasattr(unicode, 'partition'): +        partition = make_wrapper('partition'), +        rpartition = make_wrapper('rpartition') + +    # new in python 2.6 +    if hasattr(unicode, 'format'): +        format = make_wrapper('format') + +    # not in python 3 +    if hasattr(unicode, '__getslice__'): +        __getslice__ = make_wrapper('__getslice__') + +    del method, make_wrapper + + +def _escape_argspec(obj, iterable): +    """Helper for various string-wrapped functions.""" +    for key, value in iterable: +        if hasattr(value, '__html__') or isinstance(value, basestring): +            obj[key] = escape(value) +    return obj + + +class _MarkupEscapeHelper(object): +    """Helper for Markup.__mod__""" + +    def __init__(self, obj): +        self.obj = obj + +    __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x]) +    __str__ = lambda s: str(escape(s.obj)) +    __unicode__ = lambda s: unicode(escape(s.obj)) +    __repr__ = lambda s: str(escape(repr(s.obj))) +    __int__ = lambda s: int(s.obj) +    __float__ = lambda s: float(s.obj) + + +# we have to import it down here as the speedups and native +# modules imports the markup type which is define above. +try: +    from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode +except ImportError: +    from jinja2._markupsafe._native import escape, escape_silent, soft_unicode diff --git a/lib/jinja2/_markupsafe/_bundle.py b/lib/jinja2/_markupsafe/_bundle.py new file mode 100644 index 000000000..e694faf23 --- /dev/null +++ b/lib/jinja2/_markupsafe/_bundle.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- +""" +    jinja2._markupsafe._bundle +    ~~~~~~~~~~~~~~~~~~~~~~~~~~ + +    This script pulls in markupsafe from a source folder and +    bundles it with Jinja2.  It does not pull in the speedups +    module though. + +    :copyright: Copyright 2010 by the Jinja team, see AUTHORS. +    :license: BSD, see LICENSE for details. +""" +import sys +import os +import re + + +def rewrite_imports(lines): +    for idx, line in enumerate(lines): +        new_line = re.sub(r'(import|from)\s+markupsafe\b', +                          r'\1 jinja2._markupsafe', line) +        if new_line != line: +            lines[idx] = new_line + + +def main(): +    if len(sys.argv) != 2: +        print 'error: only argument is path to markupsafe' +        sys.exit(1) +    basedir = os.path.dirname(__file__) +    markupdir = sys.argv[1] +    for filename in os.listdir(markupdir): +        if filename.endswith('.py'): +            f = open(os.path.join(markupdir, filename)) +            try: +                lines = list(f) +            finally: +                f.close() +            rewrite_imports(lines) +            f = open(os.path.join(basedir, filename), 'w') +            try: +                for line in lines: +                    f.write(line) +            finally: +                f.close() + + +if __name__ == '__main__': +    main() diff --git a/lib/jinja2/_markupsafe/_constants.py b/lib/jinja2/_markupsafe/_constants.py new file mode 100644 index 000000000..919bf03c5 --- /dev/null +++ b/lib/jinja2/_markupsafe/_constants.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- +""" +    markupsafe._constants +    ~~~~~~~~~~~~~~~~~~~~~ + +    Highlevel implementation of the Markup string. + +    :copyright: (c) 2010 by Armin Ronacher. +    :license: BSD, see LICENSE for more details. +""" + + +HTML_ENTITIES = { +    'AElig': 198, +    'Aacute': 193, +    'Acirc': 194, +    'Agrave': 192, +    'Alpha': 913, +    'Aring': 197, +    'Atilde': 195, +    'Auml': 196, +    'Beta': 914, +    'Ccedil': 199, +    'Chi': 935, +    'Dagger': 8225, +    'Delta': 916, +    'ETH': 208, +    'Eacute': 201, +    'Ecirc': 202, +    'Egrave': 200, +    'Epsilon': 917, +    'Eta': 919, +    'Euml': 203, +    'Gamma': 915, +    'Iacute': 205, +    'Icirc': 206, +    'Igrave': 204, +    'Iota': 921, +    'Iuml': 207, +    'Kappa': 922, +    'Lambda': 923, +    'Mu': 924, +    'Ntilde': 209, +    'Nu': 925, +    'OElig': 338, +    'Oacute': 211, +    'Ocirc': 212, +    'Ograve': 210, +    'Omega': 937, +    'Omicron': 927, +    'Oslash': 216, +    'Otilde': 213, +    'Ouml': 214, +    'Phi': 934, +    'Pi': 928, +    'Prime': 8243, +    'Psi': 936, +    'Rho': 929, +    'Scaron': 352, +    'Sigma': 931, +    'THORN': 222, +    'Tau': 932, +    'Theta': 920, +    'Uacute': 218, +    'Ucirc': 219, +    'Ugrave': 217, +    'Upsilon': 933, +    'Uuml': 220, +    'Xi': 926, +    'Yacute': 221, +    'Yuml': 376, +    'Zeta': 918, +    'aacute': 225, +    'acirc': 226, +    'acute': 180, +    'aelig': 230, +    'agrave': 224, +    'alefsym': 8501, +    'alpha': 945, +    'amp': 38, +    'and': 8743, +    'ang': 8736, +    'apos': 39, +    'aring': 229, +    'asymp': 8776, +    'atilde': 227, +    'auml': 228, +    'bdquo': 8222, +    'beta': 946, +    'brvbar': 166, +    'bull': 8226, +    'cap': 8745, +    'ccedil': 231, +    'cedil': 184, +    'cent': 162, +    'chi': 967, +    'circ': 710, +    'clubs': 9827, +    'cong': 8773, +    'copy': 169, +    'crarr': 8629, +    'cup': 8746, +    'curren': 164, +    'dArr': 8659, +    'dagger': 8224, +    'darr': 8595, +    'deg': 176, +    'delta': 948, +    'diams': 9830, +    'divide': 247, +    'eacute': 233, +    'ecirc': 234, +    'egrave': 232, +    'empty': 8709, +    'emsp': 8195, +    'ensp': 8194, +    'epsilon': 949, +    'equiv': 8801, +    'eta': 951, +    'eth': 240, +    'euml': 235, +    'euro': 8364, +    'exist': 8707, +    'fnof': 402, +    'forall': 8704, +    'frac12': 189, +    'frac14': 188, +    'frac34': 190, +    'frasl': 8260, +    'gamma': 947, +    'ge': 8805, +    'gt': 62, +    'hArr': 8660, +    'harr': 8596, +    'hearts': 9829, +    'hellip': 8230, +    'iacute': 237, +    'icirc': 238, +    'iexcl': 161, +    'igrave': 236, +    'image': 8465, +    'infin': 8734, +    'int': 8747, +    'iota': 953, +    'iquest': 191, +    'isin': 8712, +    'iuml': 239, +    'kappa': 954, +    'lArr': 8656, +    'lambda': 955, +    'lang': 9001, +    'laquo': 171, +    'larr': 8592, +    'lceil': 8968, +    'ldquo': 8220, +    'le': 8804, +    'lfloor': 8970, +    'lowast': 8727, +    'loz': 9674, +    'lrm': 8206, +    'lsaquo': 8249, +    'lsquo': 8216, +    'lt': 60, +    'macr': 175, +    'mdash': 8212, +    'micro': 181, +    'middot': 183, +    'minus': 8722, +    'mu': 956, +    'nabla': 8711, +    'nbsp': 160, +    'ndash': 8211, +    'ne': 8800, +    'ni': 8715, +    'not': 172, +    'notin': 8713, +    'nsub': 8836, +    'ntilde': 241, +    'nu': 957, +    'oacute': 243, +    'ocirc': 244, +    'oelig': 339, +    'ograve': 242, +    'oline': 8254, +    'omega': 969, +    'omicron': 959, +    'oplus': 8853, +    'or': 8744, +    'ordf': 170, +    'ordm': 186, +    'oslash': 248, +    'otilde': 245, +    'otimes': 8855, +    'ouml': 246, +    'para': 182, +    'part': 8706, +    'permil': 8240, +    'perp': 8869, +    'phi': 966, +    'pi': 960, +    'piv': 982, +    'plusmn': 177, +    'pound': 163, +    'prime': 8242, +    'prod': 8719, +    'prop': 8733, +    'psi': 968, +    'quot': 34, +    'rArr': 8658, +    'radic': 8730, +    'rang': 9002, +    'raquo': 187, +    'rarr': 8594, +    'rceil': 8969, +    'rdquo': 8221, +    'real': 8476, +    'reg': 174, +    'rfloor': 8971, +    'rho': 961, +    'rlm': 8207, +    'rsaquo': 8250, +    'rsquo': 8217, +    'sbquo': 8218, +    'scaron': 353, +    'sdot': 8901, +    'sect': 167, +    'shy': 173, +    'sigma': 963, +    'sigmaf': 962, +    'sim': 8764, +    'spades': 9824, +    'sub': 8834, +    'sube': 8838, +    'sum': 8721, +    'sup': 8835, +    'sup1': 185, +    'sup2': 178, +    'sup3': 179, +    'supe': 8839, +    'szlig': 223, +    'tau': 964, +    'there4': 8756, +    'theta': 952, +    'thetasym': 977, +    'thinsp': 8201, +    'thorn': 254, +    'tilde': 732, +    'times': 215, +    'trade': 8482, +    'uArr': 8657, +    'uacute': 250, +    'uarr': 8593, +    'ucirc': 251, +    'ugrave': 249, +    'uml': 168, +    'upsih': 978, +    'upsilon': 965, +    'uuml': 252, +    'weierp': 8472, +    'xi': 958, +    'yacute': 253, +    'yen': 165, +    'yuml': 255, +    'zeta': 950, +    'zwj': 8205, +    'zwnj': 8204 +} diff --git a/lib/jinja2/_markupsafe/_native.py b/lib/jinja2/_markupsafe/_native.py new file mode 100644 index 000000000..7b95828ec --- /dev/null +++ b/lib/jinja2/_markupsafe/_native.py @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- +""" +    markupsafe._native +    ~~~~~~~~~~~~~~~~~~ + +    Native Python implementation the C module is not compiled. + +    :copyright: (c) 2010 by Armin Ronacher. +    :license: BSD, see LICENSE for more details. +""" +from jinja2._markupsafe import Markup + + +def escape(s): +    """Convert the characters &, <, >, ' and " in string s to HTML-safe +    sequences.  Use this if you need to display text that might contain +    such characters in HTML.  Marks return value as markup string. +    """ +    if hasattr(s, '__html__'): +        return s.__html__() +    return Markup(unicode(s) +        .replace('&', '&') +        .replace('>', '>') +        .replace('<', '<') +        .replace("'", ''') +        .replace('"', '"') +    ) + + +def escape_silent(s): +    """Like :func:`escape` but converts `None` into an empty +    markup string. +    """ +    if s is None: +        return Markup() +    return escape(s) + + +def soft_unicode(s): +    """Make a string unicode if it isn't already.  That way a markup +    string is not converted back to unicode. +    """ +    if not isinstance(s, unicode): +        s = unicode(s) +    return s diff --git a/lib/jinja2/_markupsafe/tests.py b/lib/jinja2/_markupsafe/tests.py new file mode 100644 index 000000000..c1ce3943a --- /dev/null +++ b/lib/jinja2/_markupsafe/tests.py @@ -0,0 +1,80 @@ +import gc +import unittest +from jinja2._markupsafe import Markup, escape, escape_silent + + +class MarkupTestCase(unittest.TestCase): + +    def test_markup_operations(self): +        # adding two strings should escape the unsafe one +        unsafe = '<script type="application/x-some-script">alert("foo");</script>' +        safe = Markup('<em>username</em>') +        assert unsafe + safe == unicode(escape(unsafe)) + unicode(safe) + +        # string interpolations are safe to use too +        assert Markup('<em>%s</em>') % '<bad user>' == \ +               '<em><bad user></em>' +        assert Markup('<em>%(username)s</em>') % { +            'username': '<bad user>' +        } == '<em><bad user></em>' + +        # an escaped object is markup too +        assert type(Markup('foo') + 'bar') is Markup + +        # and it implements __html__ by returning itself +        x = Markup("foo") +        assert x.__html__() is x + +        # it also knows how to treat __html__ objects +        class Foo(object): +            def __html__(self): +                return '<em>awesome</em>' +            def __unicode__(self): +                return 'awesome' +        assert Markup(Foo()) == '<em>awesome</em>' +        assert Markup('<strong>%s</strong>') % Foo() == \ +               '<strong><em>awesome</em></strong>' + +        # escaping and unescaping +        assert escape('"<>&\'') == '"<>&'' +        assert Markup("<em>Foo & Bar</em>").striptags() == "Foo & Bar" +        assert Markup("<test>").unescape() == "<test>" + +    def test_all_set(self): +        import jinja2._markupsafe as markup +        for item in markup.__all__: +            getattr(markup, item) + +    def test_escape_silent(self): +        assert escape_silent(None) == Markup() +        assert escape(None) == Markup(None) +        assert escape_silent('<foo>') == Markup(u'<foo>') + + +class MarkupLeakTestCase(unittest.TestCase): + +    def test_markup_leaks(self): +        counts = set() +        for count in xrange(20): +            for item in xrange(1000): +                escape("foo") +                escape("<foo>") +                escape(u"foo") +                escape(u"<foo>") +            counts.add(len(gc.get_objects())) +        assert len(counts) == 1, 'ouch, c extension seems to leak objects' + + +def suite(): +    suite = unittest.TestSuite() +    suite.addTest(unittest.makeSuite(MarkupTestCase)) + +    # this test only tests the c extension +    if not hasattr(escape, 'func_code'): +        suite.addTest(unittest.makeSuite(MarkupLeakTestCase)) + +    return suite + + +if __name__ == '__main__': +    unittest.main(defaultTest='suite') | 
