########################################################################
#
# File Name:            pDomletteReader.py
#
# Documentation:        http://docs.4suite.org/pDomletteReader.py.html
#
"""
Reader tools for pDomlette
WWW: http://4suite.org        e-mail: support@4suite.org

Copyright (c) 2001 Fourthought Inc, USA.   All Rights Reserved.
See  http://4suite.org/COPYRIGHT  for license and copyright information
"""

import sys, string, os, urllib
from types import StringType
stringTypes = [StringType]
try:
    from types import UnicodeType
    stringTypes.append(UnicodeType)
except ImportError, e:
    pass

from xml.sax import saxlib, sax2exts, handler, xmlreader
from Ft.Lib import ReaderBase, ReaderException
from xml.dom.ext.reader.Sax2 import XmlDomGenerator, NsHandler
from xml.parsers import expat
from xml.dom import XML_NAMESPACE, XMLNS_NAMESPACE
from xml.dom.ext import SplitQName
from Ft.Lib import Uri

from pDomlette import Node, Document, Element, Attribute, DocumentFragment, \
     Text, ProcessingInstruction, Comment, NamedNodeMap, ReleaseNode


class Handler(ReaderBase.HandlerBase,
              ReaderBase.NamespaceAdapterMixin,
              ReaderBase.XincludeMixin,
              ReaderBase.Force8BitMixin,
              ReaderBase.DocIndexMixin,
              ReaderBase.StringIndexMixin,
              ReaderBase.PreserveSpaceMixin,
              ):
    def __init__(self, resolveEntity=None, processIncludes=1,
                 visitedHrefs=None, indexStringValues=1, force8Bit=0,
                 uriResolver=None):
        ReaderBase.HandlerBase.__init__(self, uriResolver, resolveEntity)
        ReaderBase.XincludeMixin.__init__(self, processIncludes, visitedHrefs)
        ReaderBase.Force8BitMixin.__init__(self, force8Bit)
        ReaderBase.StringIndexMixin.__init__(self, indexStringValues)
        return

    def initState(self, ownerDoc=None, stripElements=None, refUri=None):
        if ownerDoc:
            self._ownerDoc = ownerDoc
            #Create a docfrag to hold all the generated nodes.
            self._rootNode = DocumentFragment(ownerDoc)
        else:
            self._rootNode = self._ownerDoc = Document()
            self._rootNode.refUri = refUri
        ReaderBase.PreserveSpaceMixin._initState(self, 1, stripElements)
        ReaderBase.XincludeMixin._initState(self)
        ReaderBase.DocIndexMixin._initState(self)
        ReaderBase.StringIndexMixin._initState(self)
        ReaderBase.HandlerBase.initState(self)
        return

    def processingInstruction(self, target, data):
        target = self._checkString(target)
        data = self._checkString(data)
        if self._includeDepth: return
        self._completeTextNode()
        pi = ProcessingInstruction(self._ownerDoc)
        pi.target = target
        pi.data = data
        self._docIndex(pi)
        self._nodeStack[-1].appendChild(pi)
        return

    def comment(self, data):
        data = self._checkString(data)
        if self._includeDepth: return
        self._completeTextNode()
        comment = Comment(self._ownerDoc)
        comment.data = data
        self._docIndex(comment)
        self._nodeStack[-1].appendChild(comment)
        return

    def startElement(self, name, attribs):
        name = self._checkString(name)
        attribs = self._checkDict(attribs)
        (name, qname, nsattribs) = self._handleStartElementNss(name, attribs)
        include_depth = self._handleIncludes(name, qname, nsattribs)
        if include_depth: return
        (namespace, local, prefix) = ReaderBase.HandlerBase._startElementPrep(
            self, name, qname
            )
        new_element = Element(self._ownerDoc, namespace, local, prefix)
        self._docIndex(new_element)
        for attr_qname in nsattribs.getQNames():
            attr_ns = nsattribs.getNameByQName(attr_qname)[0]
            (attr_prefix, attr_local) = SplitQName(attr_qname)
            attr = Attribute(self._ownerDoc, attr_ns, attr_local, attr_prefix)
            attr.value = nsattribs.getValueByQName(attr_qname)
            attr.ownerElement = new_element
            self._docIndex(attr)
            if attr_local == 'xmlns':
                attr_local = attr_prefix
            new_element.attributes[(attr_ns, attr_local)] = attr
        self._updatePreserveStateStack(new_element)
        self._nodeStack.append(new_element)
        self._reset(new_element)
        return

    def endElement(self, name):
        name = self._checkString(name)
        del self._namespaces[-1]
        skip = self._popInclude()
        if skip: return
        self._completeTextNode()
        new_element = self._nodeStack[-1]
        self._popPreserveStateStack()
        del self._nodeStack[-1]
        self._nodeStack[-1].appendChild(new_element)
        return

    def characters(self, data):
        data = self._checkString(data)
        if self._checkIncludeDepth(): return
        self._currText = self._currText + data
        return

    def entityRef(self, context, base, sysid, pubid):
        return ReaderBase.HandlerBase.entityRef(self, context,
                                                self._checkString(base),
                                                self._checkString(sysid),
                                                self._checkString(pubid))

    def resolveEntity(self, pubid, sysid):
        return ReaderBase.HandlerBase.resolveEntity(self,
                                                    self._checkString(pubid),
                                                    self._checkString(sysid))
            
    def _completeTextNode(self):
        #Note some parsers don't report ignorable white space properly
        if self._currText and len(self._nodeStack) and self._nodeStack[-1].nodeType != Node.DOCUMENT_NODE:
            if self._peekPreserveStateStack() or string.strip(self._currText):
                new_text = Text(self._ownerDoc)
                new_text.data = self._currText
                self._docIndex(new_text)
                top_node = self._nodeStack[-1]
                top_node.appendChild(new_text)
                self._updateTopNodeStringIndex(self._currText)
        self._currText = ''
        return


class PyExpatReader(ReaderBase.DomletteReader):
    HandlerClass = Handler
    def __init__(self, resolveEntity=None, processIncludes=1,
                 visitedHrefs=None, force8Bit=0):
        ReaderBase.DomletteReader.__init__(self, force8Bit)
        self._resolveEntity = resolveEntity
        self._processIncludes = processIncludes
        self._visitedHrefs = visitedHrefs
        return

    def fromStream(self, stream, refUri='', ownerDoc=None,
                   stripElements=None):
        self.initParser()
        self.handler.initState(ownerDoc, stripElements, refUri)
        self.parser.SetBase(refUri)
        try:
            self.parser.ParseFile(stream)
        except Exception, e:
            #FIXME: This makes poor assumptions of the handler: encapsulate.
            if self.handler._rootNode: ReleaseNode(self.handler._rootNode)
            if self.handler._ownerDoc: ReleaseNode(self.handler._ownerDoc)
            if self.parser.ErrorCode:
                raise ReaderException(ReaderException.XML_PARSE_ERROR,
                                  self.parser.ErrorLineNumber,
                                  self.parser.ErrorColumnNumber,
                                  expat.ErrorString(self.parser.ErrorCode))
            else:
                raise
        rootNode = self.handler.getRootNode()
        self.killParser()
        return rootNode

    def releaseNode(self, node):
        ReleaseNode(node)

    def initParser(self):
        self.handler = self.HandlerClass(resolveEntity=self._resolveEntity,
                                         processIncludes=self._processIncludes,
                                         visitedHrefs=self._visitedHrefs,
                                         force8Bit=self._force8Bit)
        self.parser=expat.ParserCreate()
        self.parser.StartElementHandler = self.handler.startElement
        self.parser.EndElementHandler = self.handler.endElement
        self.parser.CharacterDataHandler = self.handler.characters
        self.parser.ProcessingInstructionHandler = self.handler.processingInstruction
        self.parser.CommentHandler = self.handler.comment
        self.parser.ExternalEntityRefHandler = self.handler.entityRef
        self.handler.parser = self.parser
        return

    def killParser(self):
        self.parser = None
        self.handler = None
        return


class Sax2Handler(Handler, XmlDomGenerator):
    def __init__(self, keepAllWs=0, processIncludes=1,
                 visitedHrefs=None, indexStringValues=1):
        Handler.__init__(self, processIncludes,
                         visitedHrefs, indexStringValues)
        XmlDomGenerator.__init__(self, keepAllWs)
        return

    def initState(self, ownerDoc=None, stripElements=None, refUri=None):
        self._dt = None
        self._xmlDecl = None
        self._orphanedNodes = []
        self._currText = ''
        NsHandler.initState(self, ownerDoc)
        Handler.initState(self, ownerDoc, stripElements, refUri)
        return
        
    def getRootNode(self):
        self._completeTextNode()
        return self._rootNode

    def startElementNS(self, name, qname, attribs):
        namespace = name[0]
        local = name[1]
        prefix = SplitQName(qname)[0]
        (foo,bar, baz) = self._handleStartElementNss(name, attribs)


        new_element = Element(self._ownerDoc, namespace, local, prefix)
        for attr_qname in attribs.getQNames():
            (attr_ns, attr_local) = attribs.getNameByQName(attr_qname)
            attr_prefix = SplitQName(attr_qname)[0]
            attr = Attribute(self._ownerDoc, attr_ns, attr_local, attr_prefix)
            attr.value = attribs.getValueByQName(attr_qname)
            attr.ownerElement = new_element
            new_element.attributes[(attr_ns, attr_local)] = attr
        self._updatePreserveStateStack(new_element)
        self._nodeStack.append(new_element)
        self._reset(new_element)
        return

    def endElementNS(self, name, qname):
        self._completeTextNode()
        new_element = self._nodeStack[-1]
        self._popPreserveStateStack()
        del self._nodeStack[-1]
        self._nodeStack[-1].appendChild(new_element)
        return

    def error(self, exception):
        pub_id = exception.getPublicId()
        raise ReaderException(ReaderException.XML_SAX_PARSE_ERROR,
                          exception.getSystemId(),
                          pub_id and '(public ID "%s)'%pub_id or " ",
                          exception.getLineNumber(),
                          exception.getColumnNumber(),
                          exception.getMessage())
        raise exception

    def fatalError(self, exception):
        pub_id = exception.getPublicId()
        raise ReaderException(ReaderException.XML_SAX_PARSE_ERROR,
                          exception.getSystemId(),
                          pub_id and '(public ID "%s)'%pub_id or " ",
                          exception.getLineNumber(),
                          exception.getColumnNumber(),
                          exception.getMessage())
        raise exception


class SaxReader(ReaderBase.DomletteReader):
    def __init__(self, validate=0, keepAllWs=0, catName=None,
                 saxHandlerClass=Sax2Handler, parser=None,
                 force8Bit=0):
        ReaderBase.DomletteReader.__init__(self, force8Bit)
        self._origParser = parser
        self._validate = validate
        self._keepAllWs = keepAllWs
        self._catName = catName
        self._saxHandlerClass = saxHandlerClass
        return

    def __getinitargs__(self):
        return (self.validate, self._keepAllWs, self._catName,
                self._saxHandlerClass, self._parser)

    def fromStream(self, stream, refUri='', ownerDoc=None,
                   stripElements=None):
        self.initParser()
        self.handler.initState(ownerDoc, stripElements, refUri)
        try:
            self.parser.parse(stream)
        except Exception, e:
            #FIXME: This makes poor assumptions of the handler: encapsulate.
            if self.handler._rootNode: ReleaseNode(self.handler._rootNode)
            if self.handler._ownerDoc: ReleaseNode(self.handler._ownerDoc)
            raise
        rootNode = self.handler.getRootNode()
        self.killParser()
        return rootNode

    def releaseNode(self, node):
        ReleaseNode(node)

    def initParser(self):
        self.parser = self._origParser or (self._validate and sax2exts.XMLValParserFactory.make_parser()) or sax2exts.XMLParserFactory.make_parser()
        if self._catName:
            #set up the catalog, if there is one
            from xml.parsers.xmlproc import catalog
            cat_handler = catalog.SAX_catalog(self._catName, catalog.CatParserFactory())
            self.parser.setEntityResolver(self._cat_handler)
        self.handler = self._saxHandlerClass(self._keepAllWs)
        if self._force8Bit:
            self.handler = Utf8OnlyHandler(self.handler)
        self.parser.setContentHandler(self.handler)
        self.parser.setDTDHandler(self.handler)
        self.parser.setErrorHandler(self.handler)
        self.parser.setProperty(handler.property_lexical_handler, self.handler)
        try:
            self.parser.setProperty(handler.property_declaration_handler,
                                    self.handler)
        except (SystemExit, KeyboardInterrupt):
            raise
        except:
            pass
        return

    def killParser(self):
        self.parser = None
        self.handler = None


