heroes-renaissance/pyglet/text/formats/html.py

# ----------------------------------------------------------------------------
# pyglet
# Copyright (c) 2006-2008 Alex Holkner
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions 
# are met:
#
#  * Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer.
#  * Redistributions in binary form must reproduce the above copyright 
#    notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
#  * Neither the name of pyglet nor the names of its
#    contributors may be used to endorse or promote products
#    derived from this software without specific prior written
#    permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
# ----------------------------------------------------------------------------

'''Decode HTML into attributed text.

A subset of HTML 4.01 Transitional is implemented.  The following elements are
supported fully::

    B BLOCKQUOTE BR CENTER CODE DD DIR DL EM FONT H1 H2 H3 H4 H5 H6 I IMG KBD
    LI MENU OL P PRE Q SAMP STRONG SUB SUP TT U UL VAR 

The mark (bullet or number) of a list item is separated from the body of the
list item with a tab, as the pyglet document model does not allow
out-of-stream text.  This means lists display as expected, but behave a little
oddly if edited.

No CSS styling is supported.
'''

__docformat__ = 'restructuredtext'
__version__ = '$Id: $'

import HTMLParser
import htmlentitydefs
import os
import re

import pyglet
from pyglet.text.formats import structured

def _hex_color(val):
    return [(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff, 255]

_color_names = {
    'black':    _hex_color(0x000000),
    'silver':   _hex_color(0xc0c0c0),
    'gray':     _hex_color(0x808080),
    'white':    _hex_color(0xffffff),
    'maroon':   _hex_color(0x800000),
    'red':      _hex_color(0xff0000),
    'purple':   _hex_color(0x800080),
    'fucsia':   _hex_color(0x008000),
    'green':    _hex_color(0x00ff00),
    'lime':     _hex_color(0xffff00),
    'olive':    _hex_color(0x808000),
    'yellow':   _hex_color(0xff0000),
    'navy':     _hex_color(0x000080),
    'blue':     _hex_color(0x0000ff),
    'teal':     _hex_color(0x008080),
    'aqua':     _hex_color(0x00ffff),
}

def _parse_color(value):
    if value.startswith('#'):
        return _hex_color(int(value[1:], 16))
    else:
        try:
            return _color_names[value.lower()]
        except KeyError:
            raise ValueError()

_whitespace_re = re.compile(u'[\u0020\u0009\u000c\u200b\r\n]+', re.DOTALL)

_metadata_elements = ['head', 'title']

_block_elements = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 
                   'ul', 'ol', 'dir', 'menu', 
                   'pre', 'dl', 'div', 'center', 
                   'noscript', 'noframes', 'blockquote', 'form',
                   'isindex', 'hr', 'table', 'fieldset', 'address',
                    # Incorrect, but we treat list items as blocks:
                   'li', 'dd', 'dt', ]
                  

_block_containers = ['_top_block', 
                     'body', 'div', 'center', 'object', 'applet',
                     'blockquote', 'ins', 'del', 'dd', 'li', 'form',
                     'fieldset', 'button', 'th', 'td', 'iframe', 'noscript',
                     'noframes',
                     # Incorrect, but we treat list items as blocks:
                     'ul', 'ol', 'dir', 'menu', 'dl']


class HTMLDecoder(HTMLParser.HTMLParser, structured.StructuredTextDecoder):
    '''Decoder for HTML documents.
    '''
    #: Default style attributes for unstyled text in the HTML document.
    #:
    #: :type: dict
    default_style = {
        'font_name': 'Times New Roman',
        'font_size': 12,
        'margin_bottom': '12pt',
    }

    #: Map HTML font sizes to actual font sizes, in points.
    #: 
    #: :type: dict
    font_sizes = {
        1: 8,
        2: 10,
        3: 12,
        4: 14,
        5: 18,
        6: 24,
        7: 48
    }

    def decode_structured(self, text, location):
        self.location = location
        self._font_size_stack = [3]
        self.list_stack.append(structured.UnorderedListBuilder({}))
        self.strip_leading_space = True
        self.block_begin = True
        self.need_block_begin = False
        self.element_stack = ['_top_block']
        self.in_metadata = False
        self.in_pre = False

        self.push_style('_default', self.default_style)

        self.feed(text)
        self.close()

    def get_image(self, filename):
        return pyglet.image.load(filename, file=self.location.open(filename))

    def prepare_for_data(self):
        if self.need_block_begin:
            self.add_text('\n')
            self.block_begin = True
            self.need_block_begin = False

    def handle_data(self, data):
        if self.in_metadata:
            return

        if self.in_pre:
            self.add_text(data)
        else:
            data = _whitespace_re.sub(' ', data)
            if data.strip():
                self.prepare_for_data()
                if self.block_begin or self.strip_leading_space:
                    data = data.lstrip()
                    self.block_begin = False
                self.add_text(data)
            self.strip_leading_space = data.endswith(' ')

    def handle_starttag(self, tag, case_attrs):
        if self.in_metadata:
            return

        element = tag.lower()
        attrs = {}
        for key, value in case_attrs:
            attrs[key.lower()] = value

        if element in _metadata_elements:
            self.in_metadata = True
        elif element in _block_elements:
            # Pop off elements until we get to a block container.
            while self.element_stack[-1] not in _block_containers:
                self.handle_endtag(self.element_stack[-1])
            if not self.block_begin:
                self.add_text('\n')
                self.block_begin = True
                self.need_block_begin = False
        self.element_stack.append(element)

        style = {}
        if element in ('b', 'strong'):
            style['bold'] = True
        elif element in ('i', 'em', 'var'):
            style['italic'] = True
        elif element in ('tt', 'code', 'samp', 'kbd'):
            style['font_name'] = 'Courier New'
        elif element == 'u':
            color = self.current_style.get('color')
            if color is None: 
                color = [0, 0, 0, 255]
            style['underline'] = color
        elif element == 'font':
            if 'face' in attrs:
                style['font_name'] = attrs['face'].split(',')
            if 'size' in attrs:
                size = attrs['size']
                try:
                    if size.startswith('+'):
                        size = self._font_size_stack[-1] + int(size[1:])
                    elif size.startswith('-'):
                        size = self._font_size_stack[-1] - int(size[1:])
                    else:
                        size = int(size)
                except ValueError:
                    size = 3
                self._font_size_stack.append(size)
                if size in self.font_sizes:
                    style['font_size'] = self.font_sizes.get(size, 3)
            else:
                self._font_size_stack.append(self._font_size_stack[-1])
            if 'color' in attrs:
                try:
                    style['color'] = _parse_color(attrs['color'])
                except ValueError:
                    pass
        elif element == 'sup':
            size = self._font_size_stack[-1] - 1
            style['font_size'] = self.font_sizes.get(size, 1)
            style['baseline'] = '3pt'
        elif element == 'sub':
            size = self._font_size_stack[-1] - 1
            style['font_size'] = self.font_sizes.get(size, 1)
            style['baseline'] = '-3pt'
        elif element == 'h1':
            style['font_size'] = 24
            style['bold'] = True
            style['align'] = 'center'
        elif element == 'h2':
            style['font_size'] = 18
            style['bold'] = True
        elif element == 'h3':
            style['font_size'] = 16
            style['bold'] = True
        elif element == 'h4':
            style['font_size'] = 14
            style['bold'] = True
        elif element == 'h5':
            style['font_size'] = 12
            style['bold'] = True
        elif element == 'h6':
            style['font_size'] = 12
            style['italic'] = True
        elif element == 'br':
            self.add_text(u'\u2028')
            self.strip_leading_space = True
        elif element == 'p':
            if attrs.get('align') in ('left', 'center', 'right'):
                style['align'] = attrs['align']
        elif element == 'center':
            style['align'] = 'center'
        elif element == 'pre':
            style['font_name'] = 'Courier New'
            style['margin_bottom'] = 0
            self.in_pre = True
        elif element == 'blockquote':
            left_margin = self.current_style.get('margin_left') or 0
            right_margin = self.current_style.get('margin_right') or 0
            style['margin_left'] = left_margin + 60
            style['margin_right'] = right_margin + 60
        elif element == 'q':
            self.handle_data(u'\u201c')
        elif element == 'ol':
            try:
                start = int(attrs.get('start', 1))
            except ValueError:
                start = 1
            format = attrs.get('type', '1') + '.'
            builder = structured.OrderedListBuilder(start, format)
            builder.begin(self, style)
            self.list_stack.append(builder)
        elif element in ('ul', 'dir', 'menu'):
            type = attrs.get('type', 'disc').lower()
            if type == 'circle':
                mark = u'\u25cb'
            elif type == 'square':
                mark = u'\u25a1'
            else:
                mark = u'\u25cf'
            builder = structured.UnorderedListBuilder(mark)
            builder.begin(self, style)
            self.list_stack.append(builder)
        elif element == 'li':
            self.list_stack[-1].item(self, style)
            self.strip_leading_space = True
        elif element == 'dl':
            style['margin_bottom'] = 0
        elif element == 'dd':
            left_margin = self.current_style.get('margin_left') or 0
            style['margin_left'] = left_margin + 30
        elif element == 'img':
            image = self.get_image(attrs.get('src'))
            if image:
                width = attrs.get('width')
                if width:
                    width = int(width)
                height = attrs.get('height')
                if height:
                    height = int(height)
                self.prepare_for_data()
                self.add_element(structured.ImageElement(image, width, height))
                self.strip_leading_space = False

        self.push_style(element, style)

    def handle_endtag(self, tag):
        element = tag.lower()
        if element not in self.element_stack:
            return

        self.pop_style(element)
        while self.element_stack.pop() != element:
            pass

        if element in _metadata_elements:
            self.in_metadata = False
        elif element in _block_elements:
            self.block_begin = False
            self.need_block_begin = True

        if element == 'font' and len(self._font_size_stack) > 1:
            self._font_size_stack.pop()
        elif element == 'pre':
            self.in_pre = False
        elif element == 'q':
            self.handle_data(u'\u201d')
        elif element in ('ul', 'ol'):
            if len(self.list_stack) > 1:
                self.list_stack.pop()

    def handle_entityref(self, name):
        if name in htmlentitydefs.name2codepoint:
            self.handle_data(unichr(htmlentitydefs.name2codepoint[name]))
    
    def handle_charref(self, name):
        name = name.lower()
        try:
            if name.startswith('x'):
                self.handle_data(unichr(int(name[1:], 16)))
            else:
                self.handle_data(unichr(int(name)))
        except ValueError:
            pass
initial version 2008-11-23 20:07:47 +00:00			`# ----------------------------------------------------------------------------`
			`# pyglet`
			`# Copyright (c) 2006-2008 Alex Holkner`
			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions`
			`# are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright`
			`# notice, this list of conditions and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above copyright`
			`# notice, this list of conditions and the following disclaimer in`
			`# the documentation and/or other materials provided with the`
			`# distribution.`
			`# * Neither the name of pyglet nor the names of its`
			`# contributors may be used to endorse or promote products`
			`# derived from this software without specific prior written`
			`# permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS`
			`# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE`
			`# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,`
			`# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,`
			`# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;`
			`# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER`
			`# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT`
			`# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN`
			`# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE`
			`# POSSIBILITY OF SUCH DAMAGE.`
			`# ----------------------------------------------------------------------------`

			`'''Decode HTML into attributed text.`

			`A subset of HTML 4.01 Transitional is implemented. The following elements are`
			`supported fully::`

			`B BLOCKQUOTE BR CENTER CODE DD DIR DL EM FONT H1 H2 H3 H4 H5 H6 I IMG KBD`
			`LI MENU OL P PRE Q SAMP STRONG SUB SUP TT U UL VAR`

			`The mark (bullet or number) of a list item is separated from the body of the`
			`list item with a tab, as the pyglet document model does not allow`
			`out-of-stream text. This means lists display as expected, but behave a little`
			`oddly if edited.`

			`No CSS styling is supported.`
			`'''`

			`__docformat__ = 'restructuredtext'`
			`__version__ = '$Id: $'`

			`import HTMLParser`
			`import htmlentitydefs`
			`import os`
			`import re`

			`import pyglet`
			`from pyglet.text.formats import structured`

			`def _hex_color(val):`
			`return [(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff, 255]`

			`_color_names = {`
			`'black': _hex_color(0x000000),`
			`'silver': _hex_color(0xc0c0c0),`
			`'gray': _hex_color(0x808080),`
			`'white': _hex_color(0xffffff),`
			`'maroon': _hex_color(0x800000),`
			`'red': _hex_color(0xff0000),`
			`'purple': _hex_color(0x800080),`
			`'fucsia': _hex_color(0x008000),`
			`'green': _hex_color(0x00ff00),`
			`'lime': _hex_color(0xffff00),`
			`'olive': _hex_color(0x808000),`
			`'yellow': _hex_color(0xff0000),`
			`'navy': _hex_color(0x000080),`
			`'blue': _hex_color(0x0000ff),`
			`'teal': _hex_color(0x008080),`
			`'aqua': _hex_color(0x00ffff),`
			`}`

			`def _parse_color(value):`
			`if value.startswith('#'):`
			`return _hex_color(int(value[1:], 16))`
			`else:`
			`try:`
			`return _color_names[value.lower()]`
			`except KeyError:`
			`raise ValueError()`

			`_whitespace_re = re.compile(u'[\u0020\u0009\u000c\u200b\r\n]+', re.DOTALL)`

			`_metadata_elements = ['head', 'title']`

			`_block_elements = ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',`
			`'ul', 'ol', 'dir', 'menu',`
			`'pre', 'dl', 'div', 'center',`
			`'noscript', 'noframes', 'blockquote', 'form',`
			`'isindex', 'hr', 'table', 'fieldset', 'address',`
			`# Incorrect, but we treat list items as blocks:`
			`'li', 'dd', 'dt', ]`


			`_block_containers = ['_top_block',`
			`'body', 'div', 'center', 'object', 'applet',`
			`'blockquote', 'ins', 'del', 'dd', 'li', 'form',`
			`'fieldset', 'button', 'th', 'td', 'iframe', 'noscript',`
			`'noframes',`
			`# Incorrect, but we treat list items as blocks:`
			`'ul', 'ol', 'dir', 'menu', 'dl']`


			`class HTMLDecoder(HTMLParser.HTMLParser, structured.StructuredTextDecoder):`
			`'''Decoder for HTML documents.`
			`'''`
			`#: Default style attributes for unstyled text in the HTML document.`
			`#:`
			`#: :type: dict`
			`default_style = {`
			`'font_name': 'Times New Roman',`
			`'font_size': 12,`
			`'margin_bottom': '12pt',`
			`}`

			`#: Map HTML font sizes to actual font sizes, in points.`
			`#:`
			`#: :type: dict`
			`font_sizes = {`
			`1: 8,`
			`2: 10,`
			`3: 12,`
			`4: 14,`
			`5: 18,`
			`6: 24,`
			`7: 48`
			`}`

			`def decode_structured(self, text, location):`
			`self.location = location`
			`self._font_size_stack = [3]`
			`self.list_stack.append(structured.UnorderedListBuilder({}))`
			`self.strip_leading_space = True`
			`self.block_begin = True`
			`self.need_block_begin = False`
			`self.element_stack = ['_top_block']`
			`self.in_metadata = False`
			`self.in_pre = False`

			`self.push_style('_default', self.default_style)`

			`self.feed(text)`
			`self.close()`

			`def get_image(self, filename):`
			`return pyglet.image.load(filename, file=self.location.open(filename))`

			`def prepare_for_data(self):`
			`if self.need_block_begin:`
			`self.add_text('\n')`
			`self.block_begin = True`
			`self.need_block_begin = False`

			`def handle_data(self, data):`
			`if self.in_metadata:`
			`return`

			`if self.in_pre:`
			`self.add_text(data)`
			`else:`
			`data = _whitespace_re.sub(' ', data)`
			`if data.strip():`
			`self.prepare_for_data()`
			`if self.block_begin or self.strip_leading_space:`
			`data = data.lstrip()`
			`self.block_begin = False`
			`self.add_text(data)`
			`self.strip_leading_space = data.endswith(' ')`

			`def handle_starttag(self, tag, case_attrs):`
			`if self.in_metadata:`
			`return`

			`element = tag.lower()`
			`attrs = {}`
			`for key, value in case_attrs:`
			`attrs[key.lower()] = value`

			`if element in _metadata_elements:`
			`self.in_metadata = True`
			`elif element in _block_elements:`
			`# Pop off elements until we get to a block container.`
			`while self.element_stack[-1] not in _block_containers:`
			`self.handle_endtag(self.element_stack[-1])`
			`if not self.block_begin:`
			`self.add_text('\n')`
			`self.block_begin = True`
			`self.need_block_begin = False`
			`self.element_stack.append(element)`

			`style = {}`
			`if element in ('b', 'strong'):`
			`style['bold'] = True`
			`elif element in ('i', 'em', 'var'):`
			`style['italic'] = True`
			`elif element in ('tt', 'code', 'samp', 'kbd'):`
			`style['font_name'] = 'Courier New'`
			`elif element == 'u':`
			`color = self.current_style.get('color')`
			`if color is None:`
			`color = [0, 0, 0, 255]`
			`style['underline'] = color`
			`elif element == 'font':`
			`if 'face' in attrs:`
			`style['font_name'] = attrs['face'].split(',')`
			`if 'size' in attrs:`
			`size = attrs['size']`
			`try:`
			`if size.startswith('+'):`
			`size = self._font_size_stack[-1] + int(size[1:])`
			`elif size.startswith('-'):`
			`size = self._font_size_stack[-1] - int(size[1:])`
			`else:`
			`size = int(size)`
			`except ValueError:`
			`size = 3`
			`self._font_size_stack.append(size)`
			`if size in self.font_sizes:`
			`style['font_size'] = self.font_sizes.get(size, 3)`
			`else:`
			`self._font_size_stack.append(self._font_size_stack[-1])`
			`if 'color' in attrs:`
			`try:`
			`style['color'] = _parse_color(attrs['color'])`
			`except ValueError:`
			`pass`
			`elif element == 'sup':`
			`size = self._font_size_stack[-1] - 1`
			`style['font_size'] = self.font_sizes.get(size, 1)`
			`style['baseline'] = '3pt'`
			`elif element == 'sub':`
			`size = self._font_size_stack[-1] - 1`
			`style['font_size'] = self.font_sizes.get(size, 1)`
			`style['baseline'] = '-3pt'`
			`elif element == 'h1':`
			`style['font_size'] = 24`
			`style['bold'] = True`
			`style['align'] = 'center'`
			`elif element == 'h2':`
			`style['font_size'] = 18`
			`style['bold'] = True`
			`elif element == 'h3':`
			`style['font_size'] = 16`
			`style['bold'] = True`
			`elif element == 'h4':`
			`style['font_size'] = 14`
			`style['bold'] = True`
			`elif element == 'h5':`
			`style['font_size'] = 12`
			`style['bold'] = True`
			`elif element == 'h6':`
			`style['font_size'] = 12`
			`style['italic'] = True`
			`elif element == 'br':`
			`self.add_text(u'\u2028')`
			`self.strip_leading_space = True`
			`elif element == 'p':`
			`if attrs.get('align') in ('left', 'center', 'right'):`
			`style['align'] = attrs['align']`
			`elif element == 'center':`
			`style['align'] = 'center'`
			`elif element == 'pre':`
			`style['font_name'] = 'Courier New'`
			`style['margin_bottom'] = 0`
			`self.in_pre = True`
			`elif element == 'blockquote':`
			`left_margin = self.current_style.get('margin_left') or 0`
			`right_margin = self.current_style.get('margin_right') or 0`
			`style['margin_left'] = left_margin + 60`
			`style['margin_right'] = right_margin + 60`
			`elif element == 'q':`
			`self.handle_data(u'\u201c')`
			`elif element == 'ol':`
			`try:`
			`start = int(attrs.get('start', 1))`
			`except ValueError:`
			`start = 1`
			`format = attrs.get('type', '1') + '.'`
			`builder = structured.OrderedListBuilder(start, format)`
			`builder.begin(self, style)`
			`self.list_stack.append(builder)`
			`elif element in ('ul', 'dir', 'menu'):`
			`type = attrs.get('type', 'disc').lower()`
			`if type == 'circle':`
			`mark = u'\u25cb'`
			`elif type == 'square':`
			`mark = u'\u25a1'`
			`else:`
			`mark = u'\u25cf'`
			`builder = structured.UnorderedListBuilder(mark)`
			`builder.begin(self, style)`
			`self.list_stack.append(builder)`
			`elif element == 'li':`
			`self.list_stack[-1].item(self, style)`
			`self.strip_leading_space = True`
			`elif element == 'dl':`
			`style['margin_bottom'] = 0`
			`elif element == 'dd':`
			`left_margin = self.current_style.get('margin_left') or 0`
			`style['margin_left'] = left_margin + 30`
			`elif element == 'img':`
			`image = self.get_image(attrs.get('src'))`
			`if image:`
			`width = attrs.get('width')`
			`if width:`
			`width = int(width)`
			`height = attrs.get('height')`
			`if height:`
			`height = int(height)`
			`self.prepare_for_data()`
			`self.add_element(structured.ImageElement(image, width, height))`
			`self.strip_leading_space = False`

			`self.push_style(element, style)`

			`def handle_endtag(self, tag):`
			`element = tag.lower()`
			`if element not in self.element_stack:`
			`return`

			`self.pop_style(element)`
			`while self.element_stack.pop() != element:`
			`pass`

			`if element in _metadata_elements:`
			`self.in_metadata = False`
			`elif element in _block_elements:`
			`self.block_begin = False`
			`self.need_block_begin = True`

			`if element == 'font' and len(self._font_size_stack) > 1:`
			`self._font_size_stack.pop()`
			`elif element == 'pre':`
			`self.in_pre = False`
			`elif element == 'q':`
			`self.handle_data(u'\u201d')`
			`elif element in ('ul', 'ol'):`
			`if len(self.list_stack) > 1:`
			`self.list_stack.pop()`

			`def handle_entityref(self, name):`
			`if name in htmlentitydefs.name2codepoint:`
			`self.handle_data(unichr(htmlentitydefs.name2codepoint[name]))`

			`def handle_charref(self, name):`
			`name = name.lower()`
			`try:`
			`if name.startswith('x'):`
			`self.handle_data(unichr(int(name[1:], 16)))`
			`else:`
			`self.handle_data(unichr(int(name)))`
			`except ValueError:`
			`pass`