2012-04-17 18:14:02 +00:00
|
|
|
uids = {}
|
|
|
|
|
|
|
|
|
2011-06-30 04:46:37 +00:00
|
|
|
def save_to_file(text, filename):
|
|
|
|
f = open(filename, 'wt')
|
2012-04-17 18:14:02 +00:00
|
|
|
f.write("""
|
|
|
|
<meta http-equiv="Content-Type"
|
|
|
|
content="text/html; charset=UTF-8"
|
|
|
|
/>""")
|
2011-06-30 04:46:37 +00:00
|
|
|
f.write(text.encode('utf-8'))
|
|
|
|
f.close()
|
|
|
|
|
2012-04-17 18:14:02 +00:00
|
|
|
|
2011-06-30 04:46:37 +00:00
|
|
|
def describe(node, depth=2):
|
|
|
|
if not hasattr(node, 'tag'):
|
|
|
|
return "[%s]" % type(node)
|
|
|
|
name = node.tag
|
2012-04-17 18:14:02 +00:00
|
|
|
if node.get('id', ''):
|
|
|
|
name += '#' + node.get('id')
|
|
|
|
if node.get('class', ''):
|
|
|
|
name += '.' + node.get('class').replace(' ', '.')
|
2011-06-30 04:46:37 +00:00
|
|
|
if name[:4] in ['div#', 'div.']:
|
|
|
|
name = name[3:]
|
|
|
|
if name in ['tr', 'td', 'div', 'p']:
|
|
|
|
if not node in uids:
|
2012-04-17 18:14:02 +00:00
|
|
|
uid = uids[node] = len(uids) + 1
|
2011-06-30 04:46:37 +00:00
|
|
|
else:
|
|
|
|
uid = uids.get(node)
|
|
|
|
name += "%02d" % (uid)
|
|
|
|
if depth and node.getparent() is not None:
|
2012-04-17 18:14:02 +00:00
|
|
|
return name + ' - ' + describe(node.getparent(), depth - 1)
|
2011-06-30 04:46:37 +00:00
|
|
|
return name
|