#
#  equerybts.parser.bugs.py  --  Html parser for equerybts
#                                  + For bugs +
#
#   Copyright (c) 2004 Akinori Hattori, All rights reserved.
#
#   Permission is hereby granted, free of charge, to any person obtaining a
#   copy of this software and associated documentation files (the "Software"),
#   to deal in the Software without restriction, including without limitation
#   the rights to use, copy, modify, merge, publish, distribute, sublicense,
#   and/or sell copies of the Software, and to permit persons to whom
#   the Software is furnished to do so, subject to the following conditions:
#
#   The above copyright notice and this permission notice shall be included in
#   all copies or substantial portions of the Software.
#
#   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
#   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
#   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
#   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
#   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
#   OTHER DEALINGS IN THE SOFTWARE.
#
#  $Id: bugs.py 73 2004-01-26 11:57:38Z hattya $
#

import sgmllib
import string
import re


class BugsParser(sgmllib.SGMLParser):

    def __init__(self, verbose=0):
        sgmllib.SGMLParser.__init__(self, verbose)
        self.__in_table = 0          # count for table
        self.__in_item  = 0          # count for bug info (in td = 1, in b = 2)
        self.__in_div   = 0          # count for div
        self.__in_com   = -1         # count for pre (in pre += 1, in i += 0.1)
        self.__at_sum   = 0          # flag of summary
        self.__at_link  = [0, 0]     # count for a if in pre
        self.__com_key  = ''
        self.__id_re    = re.compile('.+id=(\d+)')
        self.savedata   = ''
        self.fields     = {}
        self.comments   = {}
        self.links      = {}

    def __text_proc(self, text):
        # replace all contiguous spaces and newlines into a single space
        strip_re = re.compile('[\s]+')
        return strip_re.sub(' ', text)

    def __save_link(self, attr):
        # save href attribute of anchor
        if len(attr) == 3:
            for a in attr:
                if a[0] == 'href':
                    self.__at_link[1] += 1
                    key = 'A' + str(self.__at_link[1])
                    id  = self.__id_re.match(a[1])
                    self.links[key] = [id.group(1)]

                elif a[0] == 'title':
                    self.links[key].insert(0, a[1])

        else:
            self.__at_link[0] = 0

    def save_bgn(self):
        self.savedata = ''

    def save_end(self):
        return self.savedata

    def handle_data(self, data):
        if self.__in_item == 2:
            if not self.__at_sum:
                self.savedata += data.strip()

            else:
                self.savedata += data

        elif self.__in_div > 1:
            if self.__in_com > int(self.__in_com):
                self.savedata += data.strip() + '|'

            else:
                if self.__at_link[0]:
                    # mark the link
                    bgn = '#A'  + str(self.__at_link[1]) + '#'
                    end = '#/A' + str(self.__at_link[1]) + '#'
                    self.savedata += bgn + data + end

                else:
                    self.savedata += data

    def start_table(self, attr):
        self.__in_table += 1

        if self.__in_table > 1:
            self.__in_div = 0

    def start_td(self, attr):
        if self.__in_table == 1:
            self.__in_item = 1

            if self.save_end():
                items = string.split(self.save_end(), ':', 1)
                item  = items[0].lower().replace(' ', '-')

                if item == 'opened':
                    date = string.split(items[1])
                    self.fields[item] = {'date': date[0], 'time': date[1], 'tz': date[2]}

                else:
                    self.fields[item] = items[1]

                    if item == 'url':
                        self.__at_sum = 1

            self.save_bgn()

    def end_td(self):
        if self.__in_table == 1:
            self.__in_item = 0

    def start_b(self, attr):
        if self.__in_table == 1:
            self.__in_item += 1

    def start_div(self, attr):
        self.__in_div += 1

    def start_pre(self, attr):
        if self.__in_div > 1:
            self.__in_com += 1
            self.save_bgn()

    def end_pre(self):
        if self.__in_div == 2:
            # description
            self.fields.update({'description': self.save_end()})

        else:
            # additional comment
            self.comments[self.__com_key].update({'com': self.save_end()})

    def start_i(self, attr):
        if self.__in_div:
            self.__com_key = 'com#' + str(self.__in_com + 1)
            self.comments[self.__com_key] = {}
            self.__in_com += 0.1
            self.save_bgn()

    def end_i(self):
        if self.__in_div:
            self.__in_com = int(self.__in_com)
            tmp = self.__text_proc(self.save_end())

            info = string.split(tmp, '|')

            if len(info) == 6:
                name = info[3]
                date = string.split(info[4])

            else:
                name = string.join(info[3:6], '')
                date = string.split(info[6])


            self.comments[self.__com_key].update({'name': name,
                                                  'date': date[0],
                                                  'time': date[1],
                                                  'tz':   date[2]})

    def start_a(self, attr):
        if self.__in_table == 1 and self.__in_div == 2:
            # description
            self.__at_link[0] = 1
            self.__save_link(attr)

        elif self.__in_com > int(self.__in_com):
            # additional comment in i
            if len(attr) == 1:
                mail = string.split(attr[0][1], ':')[1]
                mail = re.sub('&#64;', '@', mail)
                self.comments[self.__com_key].update({'mail': mail})

        elif self.__in_table == 1 and self.__in_com >= 0:
            # additional comment in pre
            self.__at_link[0] = 1
            self.__save_link(attr)

    def end_a(self):
        self.__at_link[0] = 0
