0001"""Kid Parser
0002
0003Parses Kid embedded XML to Python source code.
0004"""
0005
0006from __future__ import generators
0007
0008__revision__ = "$Rev: 346 $"
0009__date__ = "$Date: 2006-06-20 11:09:56 +0000 (Tue, 20 Jun 2006) $"
0010__author__ = "Ryan Tomayko (rtomayko@gmail.com)"
0011__copyright__ = "Copyright 2004-2005, Ryan Tomayko"
0012__license__ = "MIT <http://www.opensource.org/licenses/mit-license.php>"
0013
0014import re
0015from kid.pull import *
0016from kid.et import namespaces
0017from kid import Namespace
0018
0019
0020KID_XMLNS = "http://purl.org/kid/ns#"
0021KID_PREFIX = 'py'
0022kidns = Namespace(KID_XMLNS)
0023QNAME_FOR = kidns['for']
0024QNAME_IF = kidns['if']
0025QNAME_DEF = kidns['def']
0026QNAME_SLOT = kidns['slot']
0027QNAME_CONTENT = kidns['content']
0028QNAME_REPLACE = kidns['replace']
0029QNAME_MATCH = kidns['match']
0030QNAME_STRIP = kidns['strip']
0031QNAME_ATTRIBUTES = kidns['attrs']
0032QNAME_EXTENDS = kidns['extends']
0033QNAME_LAYOUT = kidns['layout']
0034
0035
0036QNAME_OMIT = kidns['omit']
0037QNAME_REPEAT = kidns['repeat']
0038
0039
0040KID_PI = 'python'
0041KID_ALT_PI = 'py'
0042KID_OLD_PI = 'kid'
0043
0044def parse(source, encoding=None, filename=None):
0045 parser = KidParser(document(source, encoding=encoding, filename=filename), encoding)
0046 return parser.parse()
0047
0048def parse_file(filename, encoding=None):
0049 """Parse the file specified.
0050
0051 filename -- the name of a file.
0052 fp -- an optional file like object to read from. If not specified,
0053 filename is opened.
0054
0055 """
0056 source = open(filename, 'rb')
0057 try:
0058 return parse(source, encoding, filename=filename)
0059 finally:
0060 source.close()
0061
0062class KidParser(object):
0063 def __init__(self, stream, encoding=None):
0064 self.stream = stream
0065 self.encoding = encoding or 'utf-8'
0066 self.depth = 0
0067 self.module_code = CodeGenerator()
0068 self.class_code = CodeGenerator()
0069 self.expand_code = CodeGenerator(level=1)
0070 self.end_module_code = CodeGenerator()
0071 self.module_defs = []
0072 self.inst_defs = []
0073
0074 def parse(self):
0075 self.begin()
0076 self.proc_stream(self.module_code)
0077 self.end()
0078 parts = []
0079 parts += self.module_code.code
0080 for c in self.module_defs:
0081 parts += c.code
0082 parts += self.class_code.code
0083 parts += self.expand_code.code
0084 for c in self.inst_defs:
0085 parts += c.code
0086 parts += self.end_module_code.code
0087 return '\n'.join(parts)
0088
0089 def begin(self):
0090 code = self.module_code
0091 code.line('from __future__ import generators')
0092 code.line('import kid')
0093 code.line('from kid.template_util import *')
0094 code.line('import kid.template_util as template_util')
0095 code.line('_def_names = []')
0096
0097
0098 code.line('encoding = "%s"' % self.encoding)
0099 code.line('doctype = None')
0100 code.line('omit_namespaces = [kid.KID_XMLNS]')
0101 code.line('layout_params = {}')
0102
0103
0104 code.line('def pull(**kw): return Template(**kw).pull()')
0105 code.line("def generate(encoding=encoding, fragment=0, output=None, **kw): "
0106 "return Template(**kw).generate(encoding=encoding, fragment=fragment, output=output)")
0107 code.line("def serialize(encoding=encoding, fragment=0, output=None, **kw): "
0108 "return Template(**kw).serialize(encoding=encoding, fragment=fragment, output=output)")
0109 code.line("def write(file, encoding=encoding, fragment=0, output=None, **kw): "
0110 "return Template(**kw).write(file, encoding=encoding, fragment=fragment, output=output)")
0111 code.line('BaseTemplate = kid.BaseTemplate')
0112 code.line('def initialize(template): pass')
0113
0114
0115 code = self.expand_code
0116 code.start_block('def initialize(self):')
0117 code.line('rslt = initialize(self)')
0118 code.line('if rslt != 0: super(Template, self).initialize()')
0119 code.end_block()
0120 code.start_block('def _pull(self):')
0121
0122 code.line("exec template_util.get_locals(self, locals())")
0123 code.line('current, ancestors = None, []')
0124 code.line('if doctype: yield (DOCTYPE, doctype)')
0125
0126 code = self.end_module_code
0127 code.line('')
0128
0129 def end(self):
0130 self.expand_code.end_block()
0131
0132 def proc_stream(self, code):
0133 for (ev, item) in self.stream:
0134 if ev == START:
0135 if item.tag == Comment:
0136 text = item.text.lstrip()
0137 if text.startswith('!'):
0138 continue
0139 line = code.line
0140 if text.startswith('<') or text.startswith('['):
0141 sub = interpolate(item.text)
0142 if isinstance(sub, list):
0143 text = "''.join([unicode(o) for o in %r])" % sub
0144 else:
0145 text = repr(sub)
0146 else:
0147 text = repr(item.text)
0148 line('_e = Comment(%s)' % text)
0149 line('yield (START, _e); yield (END, _e); del _e')
0150 elif item.tag == ProcessingInstruction:
0151 if ' ' in item.text.strip():
0152 (name, data) = item.text.split(' ', 1)
0153 else:
0154 (name, data) = (item.text, '')
0155 if name in (KID_PI, KID_ALT_PI, KID_OLD_PI):
0156 if data:
0157 code.insert_block(data)
0158 else:
0159 c = self.depth and code or self.expand_code
0160 c.line('_e = ProcessingInstruction(%r, %r)' % (name, data) )
0162 c.line('yield (START, _e); yield (END, _e); del _e')
0163 del c
0164 else:
0165 layout = None
0166 if code is self.module_code:
0167 layout = item.get(QNAME_LAYOUT)
0168 if layout is not None:
0169 del item.attrib[QNAME_LAYOUT]
0170 decl = ['class Template(']
0171 extends = item.get(QNAME_EXTENDS)
0172 parts = []
0173 if extends is not None:
0174 del item.attrib[QNAME_EXTENDS]
0175 for c in extends.split(','):
0176 parts.append('template_util.get_base_class(%s, __file__)' % c)
0177 parts.append('BaseTemplate')
0178 decl.append(','.join(parts))
0179 decl.append('):')
0180 code = self.class_code
0181 code.start_block(''.join(decl))
0182 code.line('_match_templates = []')
0183 code = self.expand_code
0184 del decl, parts
0185 self.def_proc(item, item.attrib, code)
0186 if layout is not None:
0187 old_code = code
0188 code = CodeGenerator(level=1)
0189 code.start_block("def _pull(self):")
0190 code.line('exec template_util.get_locals(self, locals())')
0191 code.line('kw = dict(layout_params)')
0192 code.line('kw.update(dict([(name, getattr(self, name)) for name in _def_names]))')
0193 code.line('kw.update(self.__dict__)')
0194
0195
0196 code.line('kw.pop("assume_encoding", None)')
0197 code.line('kw.pop("_layout_classes", None)')
0198 code.line('temp = template_util.get_base_class(%s, __file__)(**kw)' % layout)
0199 code.line('temp._match_templates = self._match_templates + temp._match_templates')
0200 code.line('return temp._pull()')
0201 code.end_block()
0202 self.inst_defs.append(code)
0203 code = old_code
0204 elif ev == END and not item.tag in (ProcessingInstruction, Comment):
0205 break
0206 elif ev == TEXT:
0207 self.text_interpolate(item, code)
0208 elif ev == XML_DECL and item[1] is not None:
0209 self.module_code.line('encoding = %r' % item[1])
0210 elif ev == DOCTYPE:
0211 self.module_code.line('doctype = (%r, %r, %r)' % item)
0212
0213 def def_proc(self, item, attrib, code):
0214 attr_name = QNAME_DEF
0215 decl = attrib.get(attr_name)
0216 if decl is None:
0217 attr_name = QNAME_SLOT
0218 decl = attrib.get(attr_name)
0219 if decl is not None:
0220 del attrib[attr_name]
0221 old_code = code
0222 if '(' not in decl:
0223 decl = decl + '()'
0224 name, args = decl.split('(', 1)
0225 pos = args.rfind(')')
0226 args = args[0:pos].strip()
0227 self_ = args and 'self, ' or 'self'
0228 class_decl = '%s(%s%s)' % (name, self_, args)
0229
0230
0231 code = CodeGenerator()
0232 code.start_block('def %s(*args, **kw):' % name)
0233 code.line('return Template().%s(*args, **kw)' % name)
0234 code.end_block()
0235 code.line('_def_names.append("%s")' % name)
0236 self.module_defs.append(code)
0237
0238
0239 code = CodeGenerator(level=1)
0240 code.start_block('def %s:' % class_decl)
0241 code.line('exec template_util.get_locals(self, locals())')
0242 code.line('current, ancestors = None, []')
0243 self.inst_defs.append(code)
0244 self.match_proc(item, attrib, code)
0245 code.end_block()
0246 if attr_name == QNAME_SLOT:
0247 old_code.line('for _e in template_util.generate_content(self.%s()): yield _e' % name)
0248 else:
0249 self.match_proc(item, attrib, code)
0250
0251 def match_proc(self, item, attrib, code):
0252 expr = attrib.get(QNAME_MATCH)
0253 if expr is not None:
0254 del attrib[QNAME_MATCH]
0255 old_code = code
0256 code = CodeGenerator(level=1)
0257 code.start_block('def _match_func(self, item, apply):')
0258 code.line('exec template_util.get_locals(self, locals())')
0259 code.line('current, ancestors = None, []')
0260 self.for_proc(item, attrib, code)
0261 code.end_block()
0262 code.line('_match_templates.append((lambda item: %s, _match_func))' % expr)
0264 self.inst_defs.append(code)
0265 else:
0266 self.for_proc(item, attrib, code)
0267
0268 def for_proc(self, item, attrib, code):
0269 expr = attrib.get(QNAME_FOR)
0270 if expr is not None:
0271 code.start_block('for %s:' % expr)
0272 del attrib[QNAME_FOR]
0273 self.if_proc(item, attrib, code)
0274 code.end_block()
0275 else:
0276 self.if_proc(item, attrib, code)
0277
0278 def if_proc(self, item, attrib, code):
0279 expr = attrib.get(QNAME_IF)
0280 if expr is not None:
0281 code.start_block('if %s:' % expr)
0282 del attrib[QNAME_IF]
0283 self.replace_proc(item, attrib, code)
0284 code.end_block()
0285 else:
0286 self.replace_proc(item, attrib, code)
0287
0288 def replace_proc(self, item, attrib, code):
0289 expr = attrib.get(QNAME_REPLACE)
0290 if expr is not None:
0291 del attrib[QNAME_REPLACE]
0292 attrib[QNAME_STRIP] = ""
0293 attrib[QNAME_CONTENT] = expr
0294 self.strip_proc(item, attrib, code)
0295
0296 def strip_proc(self, item, attrib, code):
0297 has_content = self.content_proc(item, attrib, code)
0298 expr, attr = (attrib.get(QNAME_STRIP), QNAME_STRIP)
0299 if expr is None:
0300
0301 expr, attr = (attrib.get(QNAME_OMIT), QNAME_OMIT)
0302 start_block, end_block = (code.start_block, code.end_block)
0303 line = code.line
0304 if expr is not None:
0305 del attrib[attr]
0306 if expr != '':
0307 start_block("if not (%s):" % expr)
0308 self.attrib_proc(item, attrib, code)
0309 end_block()
0310 else:
0311 pass
0312 else:
0313 self.attrib_proc(item, attrib, code)
0314 if has_content:
0315 code.start_block(
0316 'for _e in template_util.generate_content(_cont, current):')
0317 line('yield _e')
0318 line('del _e')
0319 code.end_block()
0320 self.stream.eat()
0321 else:
0322 self.depth += 1
0323 self.proc_stream(code)
0324 self.depth -= 1
0325 if expr:
0326 start_block("if not (%s):" % expr)
0327 line('yield (END, current)')
0328 line('current = ancestors.pop(0)')
0329 end_block()
0330 elif expr != '':
0331 line('yield (END, current)')
0332 line('current = ancestors.pop(0)')
0333
0334 def attrib_proc(self, item, attrib, code):
0335 interp = 0
0336 line = code.line
0337 need_interpolation = 0
0338 names = namespaces(item, remove=1)
0339 for (k,v) in attrib.items():
0340 sub = interpolate(v)
0341 if id(sub) != id(v):
0342 attrib[k] = sub
0343 if isinstance(sub, list):
0344 need_interpolation = 1
0345 expr = attrib.get(QNAME_ATTRIBUTES)
0346
0347 if expr is not None:
0348 del attrib[QNAME_ATTRIBUTES]
0349 attr_text = 'template_util.update_dict(%r, "%s", globals(), locals())' % (attrib, expr.replace('"', '\\\"'))
0351 attr_text = 'template_util.make_attrib(%s,self._get_assume_encoding())' % attr_text
0352 else:
0353 if attrib:
0354 if need_interpolation:
0355 attr_text = 'template_util.make_attrib(%r,self._get_assume_encoding())' % attrib
0356 else:
0357 attr_text = repr(attrib)
0358 else:
0359 attr_text = '{}'
0360 line('ancestors.insert(0, current)')
0361 line('current = Element(%r, %s)' % (item.tag, attr_text))
0362 if len(names):
0363 code.start_block('for _p, _u in %r.items():' % names)
0364 line('if not _u in omit_namespaces: yield (START_NS, (_p,_u))')
0365 code.end_block()
0366 line('yield (START, current)')
0367
0368 def content_proc(self, item, attrib, code):
0369 expr = attrib.get(QNAME_CONTENT)
0370 if expr is not None:
0371 del attrib[QNAME_CONTENT]
0372 code.line('_cont = %s' % expr)
0373 return 1
0374
0375 def text_interpolate(self, text, code):
0376 interp = 0
0377 line = code.line
0378 sub = interpolate(text)
0379 if isinstance(sub, list):
0380 code.start_block('for _e in %r:' % sub)
0381 code.line('for _e2 in template_util.generate_content(_e): yield _e2')
0382 code.end_block()
0383 else:
0384 line('yield (TEXT, %r)' % sub)
0385
0386class SubExpression(list):
0387 def __repr__(self):
0388 return "[%s]" % ', '.join(self)
0389
0390_sub_expr = re.compile(r"(?<!\$)\$\{(.+?)\}")
0391_sub_expr_short = re.compile(r"(?<!\$)\$([a-zA-Z][a-zA-Z0-9_\.]*)")
0392
0393def interpolate(text):
0394 parts = _sub_expr.split(text)
0395 if len(parts) == 1:
0396 parts = _sub_expr_short.split(text)
0397 if len(parts) == 1:
0398 return text.replace('$$', '$')
0399 else:
0400 last_checked = len(parts)
0401 else:
0402 last_checked = -1
0403 new_parts = SubExpression()
0404 i = 0
0405 while i < len(parts):
0406 part = parts[i]
0407 if (i % 2) == 1:
0408
0409 new_parts.append(part)
0410 elif part:
0411
0412 if i >= last_checked:
0413 more_parts = _sub_expr_short.split(part)
0414 parts[i:i+1] = more_parts
0415 last_checked = i + len(more_parts)
0416 continue
0417 else:
0418 new_parts.append(repr(part.replace('$$', '$')))
0419 i += 1
0420 return new_parts
0421
0422
0423class CodeGenerator:
0424 """A simple Python code generator."""
0425
0426 level = 0
0427 tab = '\t'
0428
0429 def __init__(self, code=None, level=0, tab='\t'):
0430 self.code = code or []
0431 if level != self.level:
0432 self.level = level
0433 if tab != self.tab:
0434 self.tab = tab
0435
0436 def line(self, text):
0437 self.code.append('%s%s' % (self.tab * self.level, text))
0438
0439 def start_block(self, text):
0440 self.line(text)
0441 self.level+=1
0442
0443 def end_block(self, nblocks=1, with_pass=False):
0444 for n in range(nblocks):
0445 if with_pass:
0446 self.line('pass')
0447 self.level-=1
0448
0449 def insert_block(self, block):
0450 output_line = self.line
0451 lines = block.splitlines()
0452 if len(lines) == 1:
0453
0454 output_line(lines[0].strip())
0455 else:
0456
0457 for line in _adjust_python_block(lines, self.tab):
0458 output_line(line)
0459
0460 def __str__(self):
0461 self.code.append('')
0462 return '\n'.join(self.code)
0463
0464
0465
0466def _adjust_python_block(lines, tab='\t'):
0467 """Adjust the indentation of a Python block."""
0468 lines = [lines[0].strip()] + [line.rstrip() for line in lines[1:]]
0469 ind = None
0470 for line in lines[1:]:
0471 if line != '':
0472 s = line.lstrip()
0473 if s[0] != '#':
0474 i = len(line) - len(s)
0475 if ind is None or i < ind:
0476 ind = i
0477 if i == 0:
0478 break
0479 if ind is not None or ind != 0:
0480 lines[1:] = [line[:ind].lstrip() + line[ind:]
0481 for line in lines[1:]]
0482 if lines[0] and not lines[0][0] == '#':
0483
0484 try:
0485 compile(lines[0], '<string>', 'exec')
0486
0487 except SyntaxError:
0488 try:
0489 block = '\n'.join(lines) + '\n'
0490 compile(block, '<string>', 'exec')
0491
0492 except IndentationError:
0493
0494 lines2 = lines[:1] + [tab + line for line in lines[1:]]
0495 block = '\n'.join(lines2) + '\n'
0496
0497 compile(block, '<string>', 'exec')
0498 lines = lines2
0499 except:
0500 pass
0501 except:
0502 pass
0503 return lines
0504
0505
0506try:
0507 enumerate
0508except NameError:
0509 def enumerate(seq):
0510 for i, elem in zip(range(len(seq)), seq):
0511 yield (i, elem)