Source code for TracRemote.util

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""
===============
TracRemote.util
===============

Utility functions and classes for internal use by the TracRemote package.
"""
from html.parser import HTMLParser
from urllib.parse import unquote
import re
from collections import OrderedDict


[docs]def CRLF(text): """Convert Unix line endings to CRLF, which is required by the POST data mime-types application/x-www-form-urlencoded and multipart/form-data. Parameters ---------- text : :class:`str` Text to convert. Returns ------- :class:`str` Input text converted to CRLF line endings. Any initial blank lines are also removed. """ crlf_text = text.replace("\r\n", "\n").replace("\r", "\n").replace("\n", "\r\n") while crlf_text[:2] == "\r\n": crlf_text = crlf_text[2:] return crlf_text
[docs]class SimpleAttachmentHTMLParser(HTMLParser): """Parse an attachment list page. Attributes ---------- mtimere : Regular Expression Regular Expression for extracting modification times. """ mtimere = re.compile(r'/timeline\?from=([0-9T:-]+)&precision=second') def __init__(self): HTMLParser.__init__(self) self.found_div = False self.div_id = 'attachments' self.found_list = False self.found_author = False self.found_comment = False self.attachments = OrderedDict() self.current_attachment = None return def handle_starttag(self, tag, attrs): if self.found_div: if self.found_list: dattrs = dict(attrs) if tag == 'a': try: if dattrs['title'] == 'View attachment': ca = dattrs['href'].split('/')[-1] a = {'size': 0, 'mtime': None} self.current_attachment = ca self.attachments[self.current_attachment] = a self.found_author = False self.found_comment = False elif dattrs['title'] == 'Download': # May want to grab this someday. pass else: # foo = self.attachments[self.current_attachment] # foo['mtime'] = unquote(dattrs['href']) m = self.mtimere.search(unquote(dattrs['href'])) if m is not None: mtime = m.groups()[0] foo = self.attachments[self.current_attachment] foo['mtime'] = mtime except KeyError: pass if tag == 'span': try: t = dattrs['title'] try: size = int(t.split(' ')[0]) except ValueError: print(t) size = 0 foo = self.attachments[self.current_attachment] foo['size'] = size except KeyError: try: c = dattrs['class'] if c == 'trac-author': self.found_author = True except KeyError: pass if tag == 'em': # # Trac 1.0 and older. # self.found_author = True if tag == 'dd': self.found_comment = True else: if tag == 'dl': self.found_list = True else: if tag == 'div': # # Search for the id # dattrs = dict(attrs) try: correct_id = (dattrs['id'] == self.div_id) except KeyError: correct_id = False self.found_div = correct_id return def handle_data(self, data): if self.found_author: self.attachments[self.current_attachment]['author'] = data.strip() self.found_author = False if self.found_comment: self.attachments[self.current_attachment]['comment'] = data.strip() self.found_comment = False return def handle_endtag(self, tag): if tag == 'dl' and self.found_div: self.found_div = False self.found_list = False if tag == 'dd' and self.found_comment: self.found_comment = False return
[docs]class SimpleIndexHTMLParser(HTMLParser): """Parse the Trac TitleIndex page. This parser should be capable of handling Trac 1.0-style Index pages as well as older versions. """ def __init__(self): HTMLParser.__init__(self) self.found_h1 = None self.h1_attr = {'h1': 'id', 'div': 'class'} self.h1_id = {'h1': 'TitleIndex', 'div': 'titleindex'} self.TitleIndex = list() return def handle_starttag(self, tag, attrs): if self.found_h1 is not None: if tag == 'a': i = attrs[0][1].index('/wiki/') self.TitleIndex.append(attrs[0][1][i+6:]) else: if tag == 'h1' or tag == 'div': # # Search for the id # dattrs = dict(attrs) correct_id = None try: if (dattrs[self.h1_attr[tag]] == self.h1_id[tag]): correct_id = tag except KeyError: pass self.found_h1 = correct_id return def handle_endtag(self, tag): if tag == 'ul' and self.found_h1 == 'h1': self.found_h1 = None if tag == 'div' and self.found_h1 == 'div': self.found_h1 = None return
[docs]class SimpleWikiHTMLParser(HTMLParser): """Handle simple forms in Trac documents. The form is searched for certain embedded values. Attributes ---------- search_value : :class:`str` The embedded value found in the form. Initially set to ``None``. """ def __init__(self, search='token'): HTMLParser.__init__(self) self.found_form = False self.search_value = None self.search = search if search == 'token': self.form_id = 'acctmgr_loginform' self.input_name = '__FORM_TOKEN' else: self.form_id = 'edit' self.input_name = 'version' return def handle_starttag(self, tag, attrs): if self.found_form: if tag == 'input': dattrs = dict(attrs) try: found_token = (dattrs['name'] == self.input_name) except KeyError: found_token = False if found_token: self.search_value = dattrs['value'] else: if tag == 'form': # # Search for the id # dattrs = dict(attrs) try: correct_id = (dattrs['id'] == self.form_id) except KeyError: correct_id = False self.found_form = correct_id return def handle_endtag(self, tag): if tag == 'form' and self.found_form: self.found_form = False return