# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""
===============
TracRemote.util
===============
Utility functions and classes for internal use by the TracRemote package.
"""
from html.parser import HTMLParser
from urllib.parse import unquote
import re
from collections import OrderedDict
[docs]def CRLF(text):
"""Convert Unix line endings to CRLF, which is required by the POST data
mime-types application/x-www-form-urlencoded and multipart/form-data.
Parameters
----------
text : :class:`str`
Text to convert.
Returns
-------
:class:`str`
Input text converted to CRLF line endings. Any initial blank lines
are also removed.
"""
crlf_text = text.replace("\r\n", "\n").replace("\r", "\n").replace("\n",
"\r\n")
while crlf_text[:2] == "\r\n":
crlf_text = crlf_text[2:]
return crlf_text
[docs]class SimpleAttachmentHTMLParser(HTMLParser):
"""Parse an attachment list page.
Attributes
----------
mtimere : Regular Expression
Regular Expression for extracting modification times.
"""
mtimere = re.compile(r'/timeline\?from=([0-9T:-]+)&precision=second')
def __init__(self):
HTMLParser.__init__(self)
self.found_div = False
self.div_id = 'attachments'
self.found_list = False
self.found_author = False
self.found_comment = False
self.attachments = OrderedDict()
self.current_attachment = None
return
def handle_starttag(self, tag, attrs):
if self.found_div:
if self.found_list:
dattrs = dict(attrs)
if tag == 'a':
try:
if dattrs['title'] == 'View attachment':
ca = dattrs['href'].split('/')[-1]
a = {'size': 0, 'mtime': None}
self.current_attachment = ca
self.attachments[self.current_attachment] = a
self.found_author = False
self.found_comment = False
elif dattrs['title'] == 'Download':
# May want to grab this someday.
pass
else:
# foo = self.attachments[self.current_attachment]
# foo['mtime'] = unquote(dattrs['href'])
m = self.mtimere.search(unquote(dattrs['href']))
if m is not None:
mtime = m.groups()[0]
foo = self.attachments[self.current_attachment]
foo['mtime'] = mtime
except KeyError:
pass
if tag == 'span':
try:
t = dattrs['title']
try:
size = int(t.split(' ')[0])
except ValueError:
print(t)
size = 0
foo = self.attachments[self.current_attachment]
foo['size'] = size
except KeyError:
try:
c = dattrs['class']
if c == 'trac-author':
self.found_author = True
except KeyError:
pass
if tag == 'em':
#
# Trac 1.0 and older.
#
self.found_author = True
if tag == 'dd':
self.found_comment = True
else:
if tag == 'dl':
self.found_list = True
else:
if tag == 'div':
#
# Search for the id
#
dattrs = dict(attrs)
try:
correct_id = (dattrs['id'] == self.div_id)
except KeyError:
correct_id = False
self.found_div = correct_id
return
def handle_data(self, data):
if self.found_author:
self.attachments[self.current_attachment]['author'] = data.strip()
self.found_author = False
if self.found_comment:
self.attachments[self.current_attachment]['comment'] = data.strip()
self.found_comment = False
return
def handle_endtag(self, tag):
if tag == 'dl' and self.found_div:
self.found_div = False
self.found_list = False
if tag == 'dd' and self.found_comment:
self.found_comment = False
return
[docs]class SimpleIndexHTMLParser(HTMLParser):
"""Parse the Trac TitleIndex page.
This parser should be capable of handling Trac 1.0-style Index pages
as well as older versions.
"""
def __init__(self):
HTMLParser.__init__(self)
self.found_h1 = None
self.h1_attr = {'h1': 'id', 'div': 'class'}
self.h1_id = {'h1': 'TitleIndex', 'div': 'titleindex'}
self.TitleIndex = list()
return
def handle_starttag(self, tag, attrs):
if self.found_h1 is not None:
if tag == 'a':
i = attrs[0][1].index('/wiki/')
self.TitleIndex.append(attrs[0][1][i+6:])
else:
if tag == 'h1' or tag == 'div':
#
# Search for the id
#
dattrs = dict(attrs)
correct_id = None
try:
if (dattrs[self.h1_attr[tag]] == self.h1_id[tag]):
correct_id = tag
except KeyError:
pass
self.found_h1 = correct_id
return
def handle_endtag(self, tag):
if tag == 'ul' and self.found_h1 == 'h1':
self.found_h1 = None
if tag == 'div' and self.found_h1 == 'div':
self.found_h1 = None
return
[docs]class SimpleWikiHTMLParser(HTMLParser):
"""Handle simple forms in Trac documents. The form is searched
for certain embedded values.
Attributes
----------
search_value : :class:`str`
The embedded value found in the form. Initially set to ``None``.
"""
def __init__(self, search='token'):
HTMLParser.__init__(self)
self.found_form = False
self.search_value = None
self.search = search
if search == 'token':
self.form_id = 'acctmgr_loginform'
self.input_name = '__FORM_TOKEN'
else:
self.form_id = 'edit'
self.input_name = 'version'
return
def handle_starttag(self, tag, attrs):
if self.found_form:
if tag == 'input':
dattrs = dict(attrs)
try:
found_token = (dattrs['name'] == self.input_name)
except KeyError:
found_token = False
if found_token:
self.search_value = dattrs['value']
else:
if tag == 'form':
#
# Search for the id
#
dattrs = dict(attrs)
try:
correct_id = (dattrs['id'] == self.form_id)
except KeyError:
correct_id = False
self.found_form = correct_id
return
def handle_endtag(self, tag):
if tag == 'form' and self.found_form:
self.found_form = False
return