# -*- coding: utf-8 -*- """Infoset serialization format styles. This modules provides methods assisting the serialization module in formatting the text content of serialized infosets. The methods for "educating" and "stupefying" typographic characters have been inspired by John Gruber's "SmartyPants" project (http://daringfireball.net/projects/smartypants/, see also http://web.chad.org/projects/smartypants.py/). """ __revision__ = "$Rev: 492 $" __date__ = "$Date: 2007-07-06 21:38:45 -0400 (Fri, 06 Jul 2007) $" __author__ = "Christoph Zwerschke (cito@online.de)" __copyright__ = "Copyright 2006, Christoph Zwerschke" __license__ = "MIT " import re __all__ = ['Format', 'output_formats'] class Format(object): """Formatting details for Serializers.""" # Default values for some parameters: wrap = 80 indent = '\t' min_level, max_level = 1, 8 tabsize = 8 apostrophe = u'\u2019' squotes = u'\u2018\u2019' dquotes = u'\u201c\u201d' dashes = u'\u2013\u2014' ellipsis = u'\u2026' # Regular expressions used by the Format class: re_whitespace = re.compile(r'[ \t\n\r]+') re_leading_blanks = re.compile(r'^[ \t]+', re.MULTILINE) re_trailing_blanks = re.compile(r'[ \t]+$', re.MULTILINE) re_duplicate_blanks = re.compile(r'[ \t]{2,}') re_duplicate_newlines = re.compile(r'\n[ \t\n\r]*\n') re_whitespace_with_newline = re.compile(r'[ \t]*\n[ \t\n\r]*') re_indentation = re.compile(r'\n[ \t]*') re_squotes = re.compile(r"'") re_dquotes = re.compile(r'"') re_sbackticks = re.compile(r"`") re_dbackticks = re.compile(r"(? width > 0: t.append('\n') offset = indent + len(word) t.append(word) for word in s: offset += len(word) + 1 if offset <= width: t.append(' ') else: t.append('\n') offset = indent + len(word) t.append(word) return ''.join(t) # Auxiliary functions for indentation and word wrapping def indent_width(indent, tabsize=tabsize): """Calculate width of indentation.""" if indent.startswith('\t'): width = len(indent) indent = indent.lstrip('\t') width -= len(indent) width *= tabsize width += len(indent) else: width = len(indent) return width indent_width = staticmethod(indent_width) def new_offset(s, offset=0): """Calculate new offset after appending a string.""" n = s.rfind('\n') if n < 0: offset += len(s) else: offset = Format.indent_width(s[n+1:]) return offset new_offset = staticmethod(new_offset) # create some predefined serialization formats... output_formats = { 'default': Format(no_empty_lines=True), 'straight': Format(), 'compact': Format(simple_whitespace=True), 'newlines': Format(simple_whitespace=True, indent=''), 'pretty': Format(simple_whitespace=True, indent='\t'), 'wrap': Format(wrap=True, indent=''), 'nice': Format(no_empty_lines=True, nice=True), 'ugly': Format(no_empty_lines=True, ugly=True), 'named': Format(no_empty_lines=True, named=True), 'compact+named': Format(simple_whitespace=True, named=True), 'newlines+named': Format(simple_whitespace=True, indent='', named=True), 'pretty+named': Format(simple_whitespace=True, indent='\t', named=True), 'wrap+named': Format(wrap=True, indent='', named=True), 'compact+nice': Format(simple_whitespace=True, nice=True), 'newlines+nice': Format(simple_whitespace=True, indent='', nice=True), 'pretty+nice': Format(simple_whitespace=True, indent='\t', nice=True), 'wrap+nice': Format(wrap=True, indent='', nice=True), 'nice+named': Format(no_empty_lines=True, nice=True, named=True), 'compact+named+nice': Format(simple_whitespace=True, nice=True, named=True), 'newlines+named+nice': Format(simple_whitespace=True, indent='', nice=True), 'pretty+named+nice': Format(simple_whitespace=True, indent='\t', nice=True, named=True), 'wrap+named+nice': Format(wrap=True, indent='', nice=True, named=True), }