# # Pyrex wrapper for the expat API # import __builtin__ import sys cdef extern from "expat/expat.h": cdef struct XML_ParserStruct ctypedef XML_ParserStruct *XML_Parser ctypedef char XML_Char cdef int XML_Parse(XML_Parser p, char *data, int length, int isFinal) cdef XML_Parser XML_ParserCreate(XML_Char *encodingName) cdef int XML_GetErrorCode(XML_Parser p) cdef int XML_GetErrorLineNumber(XML_Parser p) cdef int XML_GetErrorColumnNumber(XML_Parser p) cdef char *XML_ErrorString(int code) cdef void XML_SetStartElementHandler(XML_Parser, void *) cdef void XML_SetEndElementHandler(XML_Parser, void *) cdef void XML_SetUserData(XML_Parser, void *) cdef void *XML_GetBuffer(XML_Parser, int bufsize) cdef int XML_ParseBuffer(XML_Parser, int bytes, int final) cdef extern from "stdio.h": cdef struct FILE cdef int strlen(char *string) cdef int fread(void *buf, int size, int length, FILE *) cdef extern from "Python.h": cdef object PyUnicode_DecodeUTF8(char *str, int l, char *strict) cdef void *PyDict_GetItem(object intern_dict, object val) cdef FILE *PyFile_AsFile(object file) class ExpatError(Exception): def __init__(self, char *msg, int code, int offset, int lineno): Exception.__init__(self, msg) self.message = msg self.code = code self.offset = offset self.lineno = lineno cdef class ExpatParser: cdef XML_Parser _parser cdef object exception_occurred cdef char *encoding cdef object intern_dict cdef object StartElementHandler cdef object EndElementHandler def __init__(self, char *encoding): self._parser = XML_ParserCreate(encoding) XML_SetUserData(self._parser, self) self.encoding = encoding self.intern_dict = {} def set_error(self, int code): cdef int lineno, column lineno = XML_GetErrorLineNumber(self._parser) column = XML_GetErrorColumnNumber(self._parser) message = "%s: line %d, column %d" % ( XML_ErrorString(code), lineno, column) raise ExpatError(message, code, lineno, column) cdef ParseFP(self, FILE *fp): cdef int BUF_SIZE, rv, bytes_read cdef void *buf BUF_SIZE = 2048 rv = bytes_read = 1 while(rv and bytes_read and (self.exception_occurred is None)): buf = XML_GetBuffer(self._parser, BUF_SIZE) if not buf: raise MemoryError bytes_read = fread(buf, sizeof(char), BUF_SIZE, fp) if bytes_read < 0: raise IOError rv = XML_ParseBuffer(self._parser, bytes_read, bytes_read == 0) return rv cdef ParseFileLike(self, readmethod): cdef int BUF_SIZE, rv, bytes_read BUF_SIZE = 2048 rv = bytes_read = 1 while(rv and data and (self.exception_occurred is None)): data = readmethod(BUF_SIZE) if data: rv = self.Parse(data, 0) else: rv = self.Parse(data, 1) return rv def ParseFile(self, object f): cdef XML_Parser parser cdef int rv cdef FILE *fp parser = self._parser if isinstance(f, file): fp = PyFile_AsFile(f) rv = self.ParseFP(fp) else: # this branch hasn't been tested readmethod = getattr(f, "read", None) if not readmethod: raise TypeError, \ "Argument should have 'read' attribute" self.ParseFileLike(readmethod) if self.exception_occurred is not None: raise self.exception_occurred[0], \ self.exception_occurred[1], \ self.exception_occurred[2] if not rv: self.set_error(XML_GetErrorCode(parser)) else: return rv def Parse(self, char *data, int isFinal): """Parse(data[, isfinal]) Parse XML data. `isfinal` should be true at end of input.""" result = XML_Parse(self._parser, data, len(data), isFinal) if not result: self.set_error(XML_GetErrorCode(self._parser)) return result def sethandler(self, name, function): try: handler = handlerInfo[name] except KeyError: raise AttributeError, "No such attribute %s" % name handler(self, function) def __setattr__(self, name, value): self.sethandler(name, value) cdef object string_intern(intern_dict, char *string): if intern_dict is None: return unicode(string) cdef void *value value = PyDict_GetItem(intern_dict, string) if value != NULL: return value result = unicode(string) intern_dict[string] = result return result cdef do_StartElement( void *userData, XML_Char *name, XML_Char *attrs[]): cdef int i, length, max cdef ExpatParser parser parser = userData i = length = 0 while attrs[length]: length = length + 2 max = length/2 pyattrs = [] for i from 0 <= i < max: attr_name = string_intern(parser.intern_dict, attrs[i] ) attr_value = PyUnicode_DecodeUTF8(attrs[i+1], strlen(attrs[i+1]), "strict") pyattrs.append((attr_name, attr_value)) try: parser.StartElementHandler(name, pyattrs) except: print "EXCEPTION!!!" parser.exception_occurred = sys.exc_info() def setStartElementHandler(ExpatParser parser, object handler): XML_SetStartElementHandler(parser._parser, do_StartElement) parser.StartElementHandler = handler cdef do_EndElement( void *userData, XML_Char *name): cdef ExpatParser parser parser = userData parser.EndElementHandler(string_intern( parser.intern_dict, name)) def setEndElementHandler(ExpatParser parser, object handler): XML_SetEndElementHandler(parser._parser, do_EndElement) parser.EndElementHandler = handler handlerInfo = {"StartElementHandler": setStartElementHandler, "EndElementHandler" : setEndElementHandler } def CreateParser(char *encodingName): return ExpatParser(encodingName) #def find(f): # find_cheeses(callback, f) #cdef void callback(char *name, void *f): # (f)(name)