#!/usr/bin/python -u # # This is the API builder, it parses the C sources and build the # API formal description in XML. # # See Copyright for the status of this software. # # daniel@veillard.com # import os, sys import string import glob

debug=0 debugsym='ignorableWhitespaceSAXFunc' debugsym=None

# # C parser analysis code # ignored_files = {

"trio": "too many non standard macros",
"trio.c": "too many non standard macros",
"trionan.c": "too many non standard macros",
"triostr.c": "too many non standard macros",
"acconfig.h": "generated portability layer",
"config.h": "generated portability layer",
"libxml.h": "internal only",
"testOOM.c": "out of memory tester",
"testOOMlib.h": "out of memory tester",
"testOOMlib.c": "out of memory tester",
"rngparser.c": "not yet integrated",
"rngparser.h": "not yet integrated",
"elfgcchack.h": "not a normal header",
"testHTML.c": "test tool",
"testReader.c": "test tool",
"testSchemas.c": "test tool",
"testXPath.c": "test tool",
"testAutomata.c": "test tool",
"testModule.c": "test tool",
"testRegexp.c": "test tool",
"testThreads.c": "test tool",
"testC14N.c": "test tool",
"testRelax.c": "test tool",
"testSAX.c": "test tool",
"testURI.c": "test tool",
"testapi.c": "generated regression tests",
"runtest.c": "regression tests program",
"runsuite.c": "regression tests program",
"tst.c": "not part of the library",
"test.c": "not part of the library",
"testdso.c": "test for dynamid shared libraries",
"testrecurse.c": "test for entities recursions",
"xzlib.h": "Internal API only 2.8.0",
"buf.h": "Internal API only 2.9.0",
"enc.h": "Internal API only 2.9.0",
"/save.h": "Internal API only 2.9.0",
"timsort.h": "Internal header only for xpath.c 2.9.0",

}

ignored_words = {

"WINAPI": (0, "Windows keyword"),
"LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
"XMLPUBVAR": (0, "Special macro for extern vars for win32"),
"XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
"EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
"XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
"XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
"EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
"XMLCALL": (0, "Special macro for win32 calls"),
"XSLTCALL": (0, "Special macro for win32 calls"),
"XMLCDECL": (0, "Special macro for win32 calls"),
"EXSLTCALL": (0, "Special macro for win32 calls"),
"__declspec": (3, "Windows keyword"),
"__stdcall": (0, "Windows keyword"),
"ATTRIBUTE_UNUSED": (0, "macro keyword"),
"LIBEXSLT_PUBLIC": (0, "macro keyword"),
"X_IN_Y": (5, "macro function builder"),
"ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
"ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
"LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
"LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
"ATTRIBUTE_NO_SANITIZE": (3, "macro keyword"),

}

def escape(raw):

raw = raw.replace('&', '&')
raw = raw.replace('<', '&lt;')
raw = raw.replace('>', '&gt;')
raw = raw.replace("'", '&apos;')
raw = raw.replace('"', '&quot;')
return raw

def uniq(items):

d = {}
for item in items:
    d[item]=1
return list(d.keys())

class identifier:

def __init__(self, name, header=None, module=None, type=None, lineno = 0,
             info=None, extra=None, conditionals = None):
    self.name = name
    self.header = header
    self.module = module
    self.type = type
    self.info = info
    self.extra = extra
    self.lineno = lineno
    self.static = 0
    if conditionals == None or len(conditionals) == 0:
        self.conditionals = None
    else:
        self.conditionals = conditionals[:]
    if self.name == debugsym:
        print("=> define %s : %s" % (debugsym, (module, type, info,
                                     extra, conditionals)))

def __repr__(self):
    r = "%s %s:" % (self.type, self.name)
    if self.static:
        r = r + " static"
    if self.module != None:
        r = r + " from %s" % (self.module)
    if self.info != None:
        r = r + " " +  repr(self.info)
    if self.extra != None:
        r = r + " " + repr(self.extra)
    if self.conditionals != None:
        r = r + " " + repr(self.conditionals)
    return r

def set_header(self, header):
    self.header = header
def set_module(self, module):
    self.module = module
def set_type(self, type):
    self.type = type
def set_info(self, info):
    self.info = info
def set_extra(self, extra):
    self.extra = extra
def set_lineno(self, lineno):
    self.lineno = lineno
def set_static(self, static):
    self.static = static
def set_conditionals(self, conditionals):
    if conditionals == None or len(conditionals) == 0:
        self.conditionals = None
    else:
        self.conditionals = conditionals[:]

def get_name(self):
    return self.name
def get_header(self):
    return self.module
def get_module(self):
    return self.module
def get_type(self):
    return self.type
def get_info(self):
    return self.info
def get_lineno(self):
    return self.lineno
def get_extra(self):
    return self.extra
def get_static(self):
    return self.static
def get_conditionals(self):
    return self.conditionals

def update(self, header, module, type = None, info = None, extra=None,
           conditionals=None):
    if self.name == debugsym:
        print("=> update %s : %s" % (debugsym, (module, type, info,
                                     extra, conditionals)))
    if header != None and self.header == None:
        self.set_header(module)
    if module != None and (self.module == None or self.header == self.module):
        self.set_module(module)
    if type != None and self.type == None:
        self.set_type(type)
    if info != None:
        self.set_info(info)
    if extra != None:
        self.set_extra(extra)
    if conditionals != None:
        self.set_conditionals(conditionals)

class index:

def __init__(self, name = "noname"):
    self.name = name
    self.identifiers = {}
    self.functions = {}
    self.variables = {}
    self.includes = {}
    self.structs = {}
    self.enums = {}
    self.typedefs = {}
    self.macros = {}
    self.references = {}
    self.info = {}

def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
    if name[0:2] == '__':
        return None
    d = None
    try:
       d = self.identifiers[name]
       d.update(header, module, type, lineno, info, extra, conditionals)
    except:
       d = identifier(name, header, module, type, lineno, info, extra, conditionals)
       self.identifiers[name] = d

    if d != None and static == 1:
        d.set_static(1)

    if d != None and name != None and type != None:
        self.references[name] = d

    if name == debugsym:
        print("New ref: %s" % (d))

    return d

def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
    if name[0:2] == '__':
        return None
    d = None
    try:
       d = self.identifiers[name]
       d.update(header, module, type, lineno, info, extra, conditionals)
    except:
       d = identifier(name, header, module, type, lineno, info, extra, conditionals)
       self.identifiers[name] = d

    if d != None and static == 1:
        d.set_static(1)

    if d != None and name != None and type != None:
        if type == "function":
            self.functions[name] = d
        elif type == "functype":
            self.functions[name] = d
        elif type == "variable":
            self.variables[name] = d
        elif type == "include":
            self.includes[name] = d
        elif type == "struct":
            self.structs[name] = d
        elif type == "enum":
            self.enums[name] = d
        elif type == "typedef":
            self.typedefs[name] = d
        elif type == "macro":
            self.macros[name] = d
        else:
            print("Unable to register type ", type)

    if name == debugsym:
        print("New symbol: %s" % (d))

    return d

def merge(self, idx):
    for id in list(idx.functions.keys()):
          #
          # macro might be used to override functions or variables
          # definitions
          #
         if id in self.macros:
             del self.macros[id]
         if id in self.functions:
             print("function %s from %s redeclared in %s" % (
                id, self.functions[id].header, idx.functions[id].header))
         else:
             self.functions[id] = idx.functions[id]
             self.identifiers[id] = idx.functions[id]
    for id in list(idx.variables.keys()):
          #
          # macro might be used to override functions or variables
          # definitions
          #
         if id in self.macros:
             del self.macros[id]
         if id in self.variables:
             print("variable %s from %s redeclared in %s" % (
                id, self.variables[id].header, idx.variables[id].header))
         else:
             self.variables[id] = idx.variables[id]
             self.identifiers[id] = idx.variables[id]
    for id in list(idx.structs.keys()):
         if id in self.structs:
             print("struct %s from %s redeclared in %s" % (
                id, self.structs[id].header, idx.structs[id].header))
         else:
             self.structs[id] = idx.structs[id]
             self.identifiers[id] = idx.structs[id]
    for id in list(idx.typedefs.keys()):
         if id in self.typedefs:
             print("typedef %s from %s redeclared in %s" % (
                id, self.typedefs[id].header, idx.typedefs[id].header))
         else:
             self.typedefs[id] = idx.typedefs[id]
             self.identifiers[id] = idx.typedefs[id]
    for id in list(idx.macros.keys()):
          #
          # macro might be used to override functions or variables
          # definitions
          #
         if id in self.variables:
             continue
         if id in self.functions:
             continue
         if id in self.enums:
             continue
         if id in self.macros:
             print("macro %s from %s redeclared in %s" % (
                id, self.macros[id].header, idx.macros[id].header))
         else:
             self.macros[id] = idx.macros[id]
             self.identifiers[id] = idx.macros[id]
    for id in list(idx.enums.keys()):
         if id in self.enums:
             print("enum %s from %s redeclared in %s" % (
                id, self.enums[id].header, idx.enums[id].header))
         else:
             self.enums[id] = idx.enums[id]
             self.identifiers[id] = idx.enums[id]

def merge_public(self, idx):
    for id in list(idx.functions.keys()):
         if id in self.functions:
             # check that function condition agrees with header
             if idx.functions[id].conditionals != \
                self.functions[id].conditionals:
                 print("Header condition differs from Function for %s:" \
                    % id)
                 print("  H: %s" % self.functions[id].conditionals)
                 print("  C: %s" % idx.functions[id].conditionals)
             up = idx.functions[id]
             self.functions[id].update(None, up.module, up.type, up.info, up.extra)
     #     else:
     #         print "Function %s from %s is not declared in headers" % (
     #                id, idx.functions[id].module)
     # TODO: do the same for variables.

def analyze_dict(self, type, dict):
    count = 0
    public = 0
    for name in list(dict.keys()):
        id = dict[name]
        count = count + 1
        if id.static == 0:
            public = public + 1
    if count != public:
        print("  %d %s , %d public" % (count, type, public))
    elif count != 0:
        print("  %d public %s" % (count, type))

def analyze(self):
    self.analyze_dict("functions", self.functions)
    self.analyze_dict("variables", self.variables)
    self.analyze_dict("structs", self.structs)
    self.analyze_dict("typedefs", self.typedefs)
    self.analyze_dict("macros", self.macros)

class CLexer:

"""A lexer for the C language, tokenize the input by reading and
   analyzing it line by line"""
def __init__(self, input):
    self.input = input
    self.tokens = []
    self.line = ""
    self.lineno = 0

def getline(self):
    line = ''
    while line == '':
        line = self.input.readline()
        if not line:
            return None
        self.lineno = self.lineno + 1
        line = line.lstrip()
        line = line.rstrip()
        if line == '':
            continue
        while line[-1] == '\\':
            line = line[:-1]
            n = self.input.readline()
            self.lineno = self.lineno + 1
            n = n.lstrip()
            n = n.rstrip()
            if not n:
                break
            else:
                line = line + n
    return line

def getlineno(self):
    return self.lineno

def push(self, token):
    self.tokens.insert(0, token);

def debug(self):
    print("Last token: ", self.last)
    print("Token queue: ", self.tokens)
    print("Line %d end: " % (self.lineno), self.line)

def token(self):
    while self.tokens == []:
        if self.line == "":
            line = self.getline()
        else:
            line = self.line
            self.line = ""
        if line == None:
            return None

        if line[0] == '#':
            self.tokens = list(map((lambda x: ('preproc', x)),
                              line.split()))
            break;
        l = len(line)
        if line[0] == '"' or line[0] == "'":
            end = line[0]
            line = line[1:]
            found = 0
            tok = ""
            while found == 0:
                i = 0
                l = len(line)
                while i < l:
                    if line[i] == end:
                        self.line = line[i+1:]
                        line = line[:i]
                        l = i
                        found = 1
                        break
                    if line[i] == '\\':
                        i = i + 1
                    i = i + 1
                tok = tok + line
                if found == 0:
                    line = self.getline()
                    if line == None:
                        return None
            self.last = ('string', tok)
            return self.last

        if l >= 2 and line[0] == '/' and line[1] == '*':
            line = line[2:]
            found = 0
            tok = ""
            while found == 0:
                i = 0
                l = len(line)
                while i < l:
                    if line[i] == '*' and i+1 < l and line[i+1] == '/':
                        self.line = line[i+2:]
                        line = line[:i-1]
                        l = i
                        found = 1
                        break
                    i = i + 1
                if tok != "":
                    tok = tok + "\n"
                tok = tok + line
                if found == 0:
                    line = self.getline()
                    if line == None:
                        return None
            self.last = ('comment', tok)
            return self.last
        if l >= 2 and line[0] == '/' and line[1] == '/':
            line = line[2:]
            self.last = ('comment', line)
            return self.last
        i = 0
        while i < l:
            if line[i] == '/' and i+1 < l and line[i+1] == '/':
                self.line = line[i:]
                line = line[:i]
                break
            if line[i] == '/' and i+1 < l and line[i+1] == '*':
                self.line = line[i:]
                line = line[:i]
                break
            if line[i] == '"' or line[i] == "'":
                self.line = line[i:]
                line = line[:i]
                break
            i = i + 1
        l = len(line)
        i = 0
        while i < l:
            if line[i] == ' ' or line[i] == '\t':
                i = i + 1
                continue
            o = ord(line[i])
            if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
               (o >= 48 and o <= 57):
                s = i
                while i < l:
                    o = ord(line[i])
                    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
                       (o >= 48 and o <= 57) or \
                       (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
                        i = i + 1
                    else:
                        break
                self.tokens.append(('name', line[s:i]))
                continue
            if "(){}:;,[]".find(line[i]) != -1:

# if line == '(' or line == ')' or line == '{' or \ # line == '}' or line == ':' or line == ';' or \ # line == ',' or line == '[' or line == ']':

    self.tokens.append(('sep', line[i]))
    i = i + 1
    continue
if "+-*><=/%&!|.".find(line[i]) != -1:

# if line == '+' or line == '-' or line == '*' or \ # line == '>' or line == '<' or line == '=' or \ # line == '/' or line == '%' or line == '&' or \ # line == '!' or line == '|' or line == '.':

if line[i] == '.' and  i + 2 < l and \
   line[i+1] == '.' and line[i+2] == '.':
    self.tokens.append(('name', '...'))
    i = i + 3
    continue

j = i + 1
if j < l and (
   "+-*><=/%&!|".find(line[j]) != -1):

# line == '+' or line == '-' or line == '*' or \ # line == '>' or line == '<' or line == '=' or \ # line == '/' or line == '%' or line == '&' or \ # line == '!' or line == '|'):

        self.tokens.append(('op', line[i:j+1]))
        i = j + 1
    else:
        self.tokens.append(('op', line[i]))
        i = i + 1
    continue
s = i
while i < l:
    o = ord(line[i])
    if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
       (o >= 48 and o <= 57) or (
        " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):

# line != ' ' and line != 't' and # line != '(' and line != ')' and # line != '{' and line != '}' and # line != ':' and line != ';' and # line != ',' and line != '+' and # line != '-' and line != '*' and # line != '/' and line != '%' and # line != '&' and line != '!' and # line != '|' and line != '[' and # line != ']' and line != '=' and # line != '*' and line != '>' and # line != '<'):

                i = i + 1
            else:
                break
        self.tokens.append(('name', line[s:i]))

tok = self.tokens[0]
self.tokens = self.tokens[1:]
self.last = tok
return tok

class CParser:

"""The C module parser"""
def __init__(self, filename, idx = None):
    self.filename = filename
    if len(filename) > 2 and filename[-2:] == '.h':
        self.is_header = 1
    else:
        self.is_header = 0
    self.input = open(filename)
    self.lexer = CLexer(self.input)
    if idx == None:
        self.index = index()
    else:
        self.index = idx
    self.top_comment = ""
    self.last_comment = ""
    self.comment = None
    self.collect_ref = 0
    self.no_error = 0
    self.conditionals = []
    self.defines = []

def collect_references(self):
    self.collect_ref = 1

def stop_error(self):
    self.no_error = 1

def start_error(self):
    self.no_error = 0

def lineno(self):
    return self.lexer.getlineno()

def index_add(self, name, module, static, type, info=None, extra = None):
    if self.is_header == 1:
        self.index.add(name, module, module, static, type, self.lineno(),
                       info, extra, self.conditionals)
    else:
        self.index.add(name, None, module, static, type, self.lineno(),
                       info, extra, self.conditionals)

def index_add_ref(self, name, module, static, type, info=None,
                  extra = None):
    if self.is_header == 1:
        self.index.add_ref(name, module, module, static, type,
                           self.lineno(), info, extra, self.conditionals)
    else:
        self.index.add_ref(name, None, module, static, type, self.lineno(),
                           info, extra, self.conditionals)

def warning(self, msg):
    if self.no_error:
        return
    print(msg)

def error(self, msg, token=-1):
    if self.no_error:
        return

    print("Parse Error: " + msg)
    if token != -1:
        print("Got token ", token)
    self.lexer.debug()
    sys.exit(1)

def debug(self, msg, token=-1):
    print("Debug: " + msg)
    if token != -1:
        print("Got token ", token)
    self.lexer.debug()

def parseTopComment(self, comment):
    res = {}
    lines = comment.split("\n")
    item = None
    for line in lines:
        while line != "" and (line[0] == ' ' or line[0] == '\t'):
            line = line[1:]
        while line != "" and line[0] == '*':
            line = line[1:]
        while line != "" and (line[0] == ' ' or line[0] == '\t'):
            line = line[1:]
        try:
            (it, line) = line.split(":", 1)
            item = it
            while line != "" and (line[0] == ' ' or line[0] == '\t'):
                line = line[1:]
            if item in res:
                res[item] = res[item] + " " + line
            else:
                res[item] = line
        except:
            if item != None:
                if item in res:
                    res[item] = res[item] + " " + line
                else:
                    res[item] = line
    self.index.info = res

def parseComment(self, token):
    if self.top_comment == "":
        self.top_comment = token[1]
    if self.comment == None or token[1][0] == '*':
        self.comment = token[1];
    else:
        self.comment = self.comment + token[1]
    token = self.lexer.token()

    if self.comment.find("DOC_DISABLE") != -1:
        self.stop_error()

    if self.comment.find("DOC_ENABLE") != -1:
        self.start_error()

    return token

#
# Parse a comment block associate to a typedef
#
def parseTypeComment(self, name, quiet = 0):
    if name[0:2] == '__':
        quiet = 1

    args = []
    desc = ""

    if self.comment == None:
        if not quiet:
            self.warning("Missing comment for type %s" % (name))
        return((args, desc))
    if self.comment[0] != '*':
        if not quiet:
            self.warning("Missing * in type comment for %s" % (name))
        return((args, desc))
    lines = self.comment.split('\n')
    if lines[0] == '*':
        del lines[0]
    if lines[0] != "* %s:" % (name):
        if not quiet:
            self.warning("Misformatted type comment for %s" % (name))
            self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
        return((args, desc))
    del lines[0]
    while len(lines) > 0 and lines[0] == '*':
        del lines[0]
    desc = ""
    while len(lines) > 0:
        l = lines[0]
        while len(l) > 0 and l[0] == '*':
            l = l[1:]
        l = l.strip()
        desc = desc + " " + l
        del lines[0]

    desc = desc.strip()

    if quiet == 0:
        if desc == "":
            self.warning("Type comment for %s lack description of the macro" % (name))

    return(desc)
#
# Parse a comment block associate to a macro
#
def parseMacroComment(self, name, quiet = 0):
    if name[0:2] == '__':
        quiet = 1

    args = []
    desc = ""

    if self.comment == None:
        if not quiet:
            self.warning("Missing comment for macro %s" % (name))
        return((args, desc))
    if self.comment[0] != '*':
        if not quiet:
            self.warning("Missing * in macro comment for %s" % (name))
        return((args, desc))
    lines = self.comment.split('\n')
    if lines[0] == '*':
        del lines[0]
    if lines[0] != "* %s:" % (name):
        if not quiet:
            self.warning("Misformatted macro comment for %s" % (name))
            self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
        return((args, desc))
    del lines[0]
    while lines[0] == '*':
        del lines[0]
    while len(lines) > 0 and lines[0][0:3] == '* @':
        l = lines[0][3:]
        try:
            (arg, desc) = l.split(':', 1)
            desc=desc.strip()
            arg=arg.strip()
        except:
            if not quiet:
                self.warning("Misformatted macro comment for %s" % (name))
                self.warning("  problem with '%s'" % (lines[0]))
            del lines[0]
            continue
        del lines[0]
        l = lines[0].strip()
        while len(l) > 2 and l[0:3] != '* @':
            while l[0] == '*':
                l = l[1:]
            desc = desc + ' ' + l.strip()
            del lines[0]
            if len(lines) == 0:
                break
            l = lines[0]
        args.append((arg, desc))
    while len(lines) > 0 and lines[0] == '*':
        del lines[0]
    desc = ""
    while len(lines) > 0:
        l = lines[0]
        while len(l) > 0 and l[0] == '*':
            l = l[1:]
        l = l.strip()
        desc = desc + " " + l
        del lines[0]

    desc = desc.strip()

    if quiet == 0:
        if desc == "":
            self.warning("Macro comment for %s lack description of the macro" % (name))

    return((args, desc))

 #
 # Parse a comment block and merge the informations found in the
 # parameters descriptions, finally returns a block as complete
 # as possible
 #
def mergeFunctionComment(self, name, description, quiet = 0):
    if name == 'main':
        quiet = 1
    if name[0:2] == '__':
        quiet = 1

    (ret, args) = description
    desc = ""
    retdesc = ""

    if self.comment == None:
        if not quiet:
            self.warning("Missing comment for function %s" % (name))
        return(((ret[0], retdesc), args, desc))
    if self.comment[0] != '*':
        if not quiet:
            self.warning("Missing * in function comment for %s" % (name))
        return(((ret[0], retdesc), args, desc))
    lines = self.comment.split('\n')
    if lines[0] == '*':
        del lines[0]
    if lines[0] != "* %s:" % (name):
        if not quiet:
            self.warning("Misformatted function comment for %s" % (name))
            self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
        return(((ret[0], retdesc), args, desc))
    del lines[0]
    while lines[0] == '*':
        del lines[0]
    nbargs = len(args)
    while len(lines) > 0 and lines[0][0:3] == '* @':
        l = lines[0][3:]
        try:
            (arg, desc) = l.split(':', 1)
            desc=desc.strip()
            arg=arg.strip()
        except:
            if not quiet:
                self.warning("Misformatted function comment for %s" % (name))
                self.warning("  problem with '%s'" % (lines[0]))
            del lines[0]
            continue
        del lines[0]
        l = lines[0].strip()
        while len(l) > 2 and l[0:3] != '* @':
            while l[0] == '*':
                l = l[1:]
            desc = desc + ' ' + l.strip()
            del lines[0]
            if len(lines) == 0:
                break
            l = lines[0]
        i = 0
        while i < nbargs:
            if args[i][1] == arg:
                args[i] = (args[i][0], arg, desc)
                break;
            i = i + 1
        if i >= nbargs:
            if not quiet:
                self.warning("Unable to find arg %s from function comment for %s" % (
                   arg, name))
    while len(lines) > 0 and lines[0] == '*':
        del lines[0]
    desc = ""
    while len(lines) > 0:
        l = lines[0]
        while len(l) > 0 and l[0] == '*':
            l = l[1:]
        l = l.strip()
        if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
            try:
                l = l.split(' ', 1)[1]
            except:
                l = ""
            retdesc = l.strip()
            del lines[0]
            while len(lines) > 0:
                l = lines[0]
                while len(l) > 0 and l[0] == '*':
                    l = l[1:]
                l = l.strip()
                retdesc = retdesc + " " + l
                del lines[0]
        else:
            desc = desc + " " + l
            del lines[0]

    retdesc = retdesc.strip()
    desc = desc.strip()

    if quiet == 0:
         #
         # report missing comments
         #
        i = 0
        while i < nbargs:
            if args[i][2] == None and args[i][0] != "void" and \
               ((args[i][1] != None) or (args[i][1] == '')):
                self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
            i = i + 1
        if retdesc == "" and ret[0] != "void":
            self.warning("Function comment for %s lacks description of return value" % (name))
        if desc == "":
            self.warning("Function comment for %s lacks description of the function" % (name))

    return(((ret[0], retdesc), args, desc))

def parsePreproc(self, token):
    if debug:
        print("=> preproc ", token, self.lexer.tokens)
    name = token[1]
    if name == "#include":
        token = self.lexer.token()
        if token == None:
            return None
        if token[0] == 'preproc':
            self.index_add(token[1], self.filename, not self.is_header,
                            "include")
            return self.lexer.token()
        return token
    if name == "#define":
        token = self.lexer.token()
        if token == None:
            return None
        if token[0] == 'preproc':
             # TODO macros with arguments
            name = token[1]
            lst = []
            token = self.lexer.token()
            while token != None and token[0] == 'preproc' and \
                  token[1][0] != '#':
                lst.append(token[1])
                token = self.lexer.token()
            try:
                name = name.split('(') [0]
            except:
                pass
            info = self.parseMacroComment(name, not self.is_header)
            self.index_add(name, self.filename, not self.is_header,
                            "macro", info)
            return token

    #
    # Processing of conditionals modified by Bill 1/1/05
    #
    # We process conditionals (i.e. tokens from #ifdef, #ifndef,
    # #if, #else and #endif) for headers and mainline code,
    # store the ones from the header in libxml2-api.xml, and later
    # (in the routine merge_public) verify that the two (header and
    # mainline code) agree.
    #
    # There is a small problem with processing the headers. Some of
    # the variables are not concerned with enabling / disabling of
    # library functions (e.g. '__XML_PARSER_H__'), and we don't want
    # them to be included in libxml2-api.xml, or involved in
    # the check between the header and the mainline code.  To
    # accomplish this, we ignore any conditional which doesn't include
    # the string 'ENABLED'
    #
    if name == "#ifdef":
        apstr = self.lexer.tokens[0][1]
        try:
            self.defines.append(apstr)
            if apstr.find('ENABLED') != -1:
                self.conditionals.append("defined(%s)" % apstr)
        except:
            pass
    elif name == "#ifndef":
        apstr = self.lexer.tokens[0][1]
        try:
            self.defines.append(apstr)
            if apstr.find('ENABLED') != -1:
                self.conditionals.append("!defined(%s)" % apstr)
        except:
            pass
    elif name == "#if":
        apstr = ""
        for tok in self.lexer.tokens:
            if apstr != "":
                apstr = apstr + " "
            apstr = apstr + tok[1]
        try:
            self.defines.append(apstr)
            if apstr.find('ENABLED') != -1:
                self.conditionals.append(apstr)
        except:
            pass
    elif name == "#else":
        if self.conditionals != [] and \
           self.defines[-1].find('ENABLED') != -1:
            self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
    elif name == "#endif":
        if self.conditionals != [] and \
           self.defines[-1].find('ENABLED') != -1:
            self.conditionals = self.conditionals[:-1]
        self.defines = self.defines[:-1]
    token = self.lexer.token()
    while token != None and token[0] == 'preproc' and \
        token[1][0] != '#':
        token = self.lexer.token()
    return token

 #
 # token acquisition on top of the lexer, it handle internally
 # preprocessor and comments since they are logically not part of
 # the program structure.
 #
def token(self):
    global ignored_words

    token = self.lexer.token()
    while token != None:
        if token[0] == 'comment':
            token = self.parseComment(token)
            continue
        elif token[0] == 'preproc':
            token = self.parsePreproc(token)
            continue
        elif token[0] == "name" and token[1] == "__const":
            token = ("name", "const")
            return token
        elif token[0] == "name" and token[1] == "__attribute":
            token = self.lexer.token()
            while token != None and token[1] != ";":
                token = self.lexer.token()
            return token
        elif token[0] == "name" and token[1] in ignored_words:
            (n, info) = ignored_words[token[1]]
            i = 0
            while i < n:
                token = self.lexer.token()
                i = i + 1
            token = self.lexer.token()
            continue
        else:
            if debug:
                print("=> ", token)
            return token
    return None

 #
 # Parse a typedef, it records the type and its name.
 #
def parseTypedef(self, token):
    if token == None:
        return None
    token = self.parseType(token)
    if token == None:
        self.error("parsing typedef")
        return None
    base_type = self.type
    type = base_type
     #self.debug("end typedef type", token)
    while token != None:
        if token[0] == "name":
            name = token[1]
            signature = self.signature
            if signature != None:
                type = type.split('(')[0]
                d = self.mergeFunctionComment(name,
                        ((type, None), signature), 1)
                self.index_add(name, self.filename, not self.is_header,
                                "functype", d)
            else:
                if base_type == "struct":
                    self.index_add(name, self.filename, not self.is_header,
                                    "struct", type)
                    base_type = "struct " + name
                else:
                    # TODO report missing or misformatted comments
                    info = self.parseTypeComment(name, 1)
                    self.index_add(name, self.filename, not self.is_header,
                                "typedef", type, info)
            token = self.token()
        else:
            self.error("parsing typedef: expecting a name")
            return token
         #self.debug("end typedef", token)
        if token != None and token[0] == 'sep' and token[1] == ',':
            type = base_type
            token = self.token()
            while token != None and token[0] == "op":
                type = type + token[1]
                token = self.token()
        elif token != None and token[0] == 'sep' and token[1] == ';':
            break;
        elif token != None and token[0] == 'name':
            type = base_type
            continue;
        else:
            self.error("parsing typedef: expecting ';'", token)
            return token
    token = self.token()
    return token

 #
 # Parse a C code block, used for functions it parse till
 # the balancing } included
 #
def parseBlock(self, token):
    while token != None:
        if token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseBlock(token)
        elif token[0] == "sep" and token[1] == "}":
            self.comment = None
            token = self.token()
            return token
        else:
            if self.collect_ref == 1:
                oldtok = token
                token = self.token()
                if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
                    if token[0] == "sep" and token[1] == "(":
                        self.index_add_ref(oldtok[1], self.filename,
                                            0, "function")
                        token = self.token()
                    elif token[0] == "name":
                        token = self.token()
                        if token[0] == "sep" and (token[1] == ";" or
                           token[1] == "," or token[1] == "="):
                            self.index_add_ref(oldtok[1], self.filename,
                                                0, "type")
                elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
                    self.index_add_ref(oldtok[1], self.filename,
                                        0, "typedef")
                elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
                    self.index_add_ref(oldtok[1], self.filename,
                                        0, "typedef")

            else:
                token = self.token()
    return token

 #
 # Parse a C struct definition till the balancing }
 #
def parseStruct(self, token):
    fields = []
     #self.debug("start parseStruct", token)
    while token != None:
        if token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseTypeBlock(token)
        elif token[0] == "sep" and token[1] == "}":
            self.struct_fields = fields
             #self.debug("end parseStruct", token)
             #print fields
            token = self.token()
            return token
        else:
            base_type = self.type
             #self.debug("before parseType", token)
            token = self.parseType(token)
             #self.debug("after parseType", token)
            if token != None and token[0] == "name":
                fname = token[1]
                token = self.token()
                if token[0] == "sep" and token[1] == ";":
                    self.comment = None
                    token = self.token()
                    fields.append((self.type, fname, self.comment))
                    self.comment = None
                else:
                    self.error("parseStruct: expecting ;", token)
            elif token != None and token[0] == "sep" and token[1] == "{":
                token = self.token()
                token = self.parseTypeBlock(token)
                if token != None and token[0] == "name":
                    token = self.token()
                if token != None and token[0] == "sep" and token[1] == ";":
                    token = self.token()
                else:
                    self.error("parseStruct: expecting ;", token)
            else:
                self.error("parseStruct: name", token)
                token = self.token()
            self.type = base_type;
    self.struct_fields = fields
     #self.debug("end parseStruct", token)
     #print fields
    return token

 #
 # Parse a C enum block, parse till the balancing }
 #
def parseEnumBlock(self, token):
    self.enums = []
    name = None
    self.comment = None
    comment = ""
    value = "0"
    while token != None:
        if token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseTypeBlock(token)
        elif token[0] == "sep" and token[1] == "}":
            if name != None:
                if self.comment != None:
                    comment = self.comment
                    self.comment = None
                self.enums.append((name, value, comment))
            token = self.token()
            return token
        elif token[0] == "name":
                if name != None:
                    if self.comment != None:
                        comment = self.comment.strip()
                        self.comment = None
                    self.enums.append((name, value, comment))
                name = token[1]
                comment = ""
                token = self.token()
                if token[0] == "op" and token[1][0] == "=":
                    value = ""
                    if len(token[1]) > 1:
                        value = token[1][1:]
                    token = self.token()
                    while token[0] != "sep" or (token[1] != ',' and
                          token[1] != '}'):
                        value = value + token[1]
                        token = self.token()
                else:
                    try:
                        value = "%d" % (int(value) + 1)
                    except:
                        self.warning("Failed to compute value of enum %s" % (name))
                        value=""
                if token[0] == "sep" and token[1] == ",":
                    token = self.token()
        else:
            token = self.token()
    return token

 #
 # Parse a C definition block, used for structs it parse till
 # the balancing }
 #
def parseTypeBlock(self, token):
    while token != None:
        if token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseTypeBlock(token)
        elif token[0] == "sep" and token[1] == "}":
            token = self.token()
            return token
        else:
            token = self.token()
    return token

 #
 # Parse a type: the fact that the type name can either occur after
 #    the definition or within the definition makes it a little harder
 #    if inside, the name token is pushed back before returning
 #
def parseType(self, token):
    self.type = ""
    self.struct_fields = []
    self.signature = None
    if token == None:
        return token

    while token[0] == "name" and (
          token[1] == "const" or \
          token[1] == "unsigned" or \
          token[1] == "signed"):
        if self.type == "":
            self.type = token[1]
        else:
            self.type = self.type + " " + token[1]
        token = self.token()

    if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
        if self.type == "":
            self.type = token[1]
        else:
            self.type = self.type + " " + token[1]
        if token[0] == "name" and token[1] == "int":
            if self.type == "":
                self.type = tmp[1]
            else:
                self.type = self.type + " " + tmp[1]

    elif token[0] == "name" and token[1] == "struct":
        if self.type == "":
            self.type = token[1]
        else:
            self.type = self.type + " " + token[1]
        token = self.token()
        nametok = None
        if token[0] == "name":
            nametok = token
            token = self.token()
        if token != None and token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseStruct(token)
        elif token != None and token[0] == "op" and token[1] == "*":
            self.type = self.type + " " + nametok[1] + " *"
            token = self.token()
            while token != None and token[0] == "op" and token[1] == "*":
                self.type = self.type + " *"
                token = self.token()
            if token[0] == "name":
                nametok = token
                token = self.token()
            else:
                self.error("struct : expecting name", token)
                return token
        elif token != None and token[0] == "name" and nametok != None:
            self.type = self.type + " " + nametok[1]
            return token

        if nametok != None:
            self.lexer.push(token)
            token = nametok
        return token

    elif token[0] == "name" and token[1] == "enum":
        if self.type == "":
            self.type = token[1]
        else:
            self.type = self.type + " " + token[1]
        self.enums = []
        token = self.token()
        if token != None and token[0] == "sep" and token[1] == "{":
            token = self.token()
            token = self.parseEnumBlock(token)
        else:
            self.error("parsing enum: expecting '{'", token)
        enum_type = None
        if token != None and token[0] != "name":
            self.lexer.push(token)
            token = ("name", "enum")
        else:
            enum_type = token[1]
        for enum in self.enums:
            self.index_add(enum[0], self.filename,
                           not self.is_header, "enum",
                           (enum[1], enum[2], enum_type))
        return token

    elif token[0] == "name":
        if self.type == "":
            self.type = token[1]
        else:
            self.type = self.type + " " + token[1]
    else:
        self.error("parsing type %s: expecting a name" % (self.type),
                   token)
        return token
    token = self.token()
    while token != None and (token[0] == "op" or
          token[0] == "name" and token[1] == "const"):
        self.type = self.type + " " + token[1]
        token = self.token()

     #
     # if there is a parenthesis here, this means a function type
     #
    if token != None and token[0] == "sep" and token[1] == '(':
        self.type = self.type + token[1]
        token = self.token()
        while token != None and token[0] == "op" and token[1] == '*':
            self.type = self.type + token[1]
            token = self.token()
        if token == None or token[0] != "name" :
            self.error("parsing function type, name expected", token);
            return token
        self.type = self.type + token[1]
        nametok = token
        token = self.token()
        if token != None and token[0] == "sep" and token[1] == ')':
            self.type = self.type + token[1]
            token = self.token()
            if token != None and token[0] == "sep" and token[1] == '(':
                token = self.token()
                type = self.type;
                token = self.parseSignature(token);
                self.type = type;
            else:
                self.error("parsing function type, '(' expected", token);
                return token
        else:
            self.error("parsing function type, ')' expected", token);
            return token
        self.lexer.push(token)
        token = nametok
        return token

     #
     # do some lookahead for arrays
     #
    if token != None and token[0] == "name":
        nametok = token
        token = self.token()
        if token != None and token[0] == "sep" and token[1] == '[':
            self.type = self.type + nametok[1]
            while token != None and token[0] == "sep" and token[1] == '[':
                self.type = self.type + token[1]
                token = self.token()
                while token != None and token[0] != 'sep' and \
                      token[1] != ']' and token[1] != ';':
                    self.type = self.type + token[1]
                    token = self.token()
            if token != None and token[0] == 'sep' and token[1] == ']':
                self.type = self.type + token[1]
                token = self.token()
            else:
                self.error("parsing array type, ']' expected", token);
                return token
        elif token != None and token[0] == "sep" and token[1] == ':':
             # remove :12 in case it's a limited int size
            token = self.token()
            token = self.token()
        self.lexer.push(token)
        token = nametok

    return token

 #
 # Parse a signature: '(' has been parsed and we scan the type definition
 #    up to the ')' included
def parseSignature(self, token):
    signature = []
    if token != None and token[0] == "sep" and token[1] == ')':
        self.signature = []
        token = self.token()
        return token
    while token != None:
        token = self.parseType(token)
        if token != None and token[0] == "name":
            signature.append((self.type, token[1], None))
            token = self.token()
        elif token != None and token[0] == "sep" and token[1] == ',':
            token = self.token()
            continue
        elif token != None and token[0] == "sep" and token[1] == ')':
             # only the type was provided
            if self.type == "...":
                signature.append((self.type, "...", None))
            else:
                signature.append((self.type, None, None))
        if token != None and token[0] == "sep":
            if token[1] == ',':
                token = self.token()
                continue
            elif token[1] == ')':
                token = self.token()
                break
    self.signature = signature
    return token

 #
 # Parse a global definition, be it a type, variable or function
 # the extern "C" blocks are a bit nasty and require it to recurse.
 #
def parseGlobal(self, token):
    static = 0
    if token[1] == 'extern':
        token = self.token()
        if token == None:
            return token
        if token[0] == 'string':
            if token[1] == 'C':
                token = self.token()
                if token == None:
                    return token
                if token[0] == 'sep' and token[1] == "{":
                    token = self.token()

# print 'Entering extern “C line ', self.lineno()

while token != None and (token[0] != 'sep' or
      token[1] != "}"):
    if token[0] == 'name':
        token = self.parseGlobal(token)
    else:
        self.error(
         "token %s %s unexpected at the top level" % (
                token[0], token[1]))
        token = self.parseGlobal(token)

# print 'Exiting extern “C” line', self.lineno()

                    token = self.token()
                    return token
            else:
                return token
    elif token[1] == 'static':
        static = 1
        token = self.token()
        if token == None or  token[0] != 'name':
            return token

    if token[1] == 'typedef':
        token = self.token()
        return self.parseTypedef(token)
    else:
        token = self.parseType(token)
        type_orig = self.type
    if token == None or token[0] != "name":
        return token
    type = type_orig
    self.name = token[1]
    token = self.token()
    while token != None and (token[0] == "sep" or token[0] == "op"):
        if token[0] == "sep":
            if token[1] == "[":
                type = type + token[1]
                token = self.token()
                while token != None and (token[0] != "sep" or \
                      token[1] != ";"):
                    type = type + token[1]
                    token = self.token()

        if token != None and token[0] == "op" and token[1] == "=":
             #
             # Skip the initialization of the variable
             #
            token = self.token()
            if token[0] == 'sep' and token[1] == '{':
                token = self.token()
                token = self.parseBlock(token)
            else:
                self.comment = None
                while token != None and (token[0] != "sep" or \
                      (token[1] != ';' and token[1] != ',')):
                        token = self.token()
            self.comment = None
            if token == None or token[0] != "sep" or (token[1] != ';' and
               token[1] != ','):
                self.error("missing ';' or ',' after value")

        if token != None and token[0] == "sep":
            if token[1] == ";":
                self.comment = None
                token = self.token()
                if type == "struct":
                    self.index_add(self.name, self.filename,
                         not self.is_header, "struct", self.struct_fields)
                else:
                    self.index_add(self.name, self.filename,
                         not self.is_header, "variable", type)
                break
            elif token[1] == "(":
                token = self.token()
                token = self.parseSignature(token)
                if token == None:
                    return None
                if token[0] == "sep" and token[1] == ";":
                    d = self.mergeFunctionComment(self.name,
                            ((type, None), self.signature), 1)
                    self.index_add(self.name, self.filename, static,
                                    "function", d)
                    token = self.token()
                elif token[0] == "sep" and token[1] == "{":
                    d = self.mergeFunctionComment(self.name,
                            ((type, None), self.signature), static)
                    self.index_add(self.name, self.filename, static,
                                    "function", d)
                    token = self.token()
                    token = self.parseBlock(token);
            elif token[1] == ',':
                self.comment = None
                self.index_add(self.name, self.filename, static,
                                "variable", type)
                type = type_orig
                token = self.token()
                while token != None and token[0] == "sep":
                    type = type + token[1]
                    token = self.token()
                if token != None and token[0] == "name":
                    self.name = token[1]
                    token = self.token()
            else:
                break

    return token

def parse(self):
    self.warning("Parsing %s" % (self.filename))
    token = self.token()
    while token != None:
        if token[0] == 'name':
            token = self.parseGlobal(token)
        else:
            self.error("token %s %s unexpected at the top level" % (
                   token[0], token[1]))
            token = self.parseGlobal(token)
            return
    self.parseTopComment(self.top_comment)
    return self.index

class docBuilder:

"""A documentation builder"""
def __init__(self, name, directories=['.'], excludes=[]):
    self.name = name
    self.directories = directories
    self.excludes = excludes + list(ignored_files.keys())
    self.modules = {}
    self.headers = {}
    self.idx = index()
    self.xref = {}
    self.index = {}
    if name == 'libxml2':
        self.basename = 'libxml'
    else:
        self.basename = name

def indexString(self, id, str):
    if str == None:
        return
    str = str.replace("'", ' ')
    str = str.replace('"', ' ')
    str = str.replace("/", ' ')
    str = str.replace('*', ' ')
    str = str.replace("[", ' ')
    str = str.replace("]", ' ')
    str = str.replace("(", ' ')
    str = str.replace(")", ' ')
    str = str.replace("<", ' ')
    str = str.replace('>', ' ')
    str = str.replace("&", ' ')
    str = str.replace('#', ' ')
    str = str.replace(",", ' ')
    str = str.replace('.', ' ')
    str = str.replace(';', ' ')
    tokens = str.split()
    for token in tokens:
        try:
            c = token[0]
            if string.ascii_letters.find(c) < 0:
                pass
            elif len(token) < 3:
                pass
            else:
                lower = token.lower()
                # TODO: generalize this a bit
                if lower == 'and' or lower == 'the':
                    pass
                elif token in self.xref:
                    self.xref[token].append(id)
                else:
                    self.xref[token] = [id]
        except:
            pass

def analyze(self):
    print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
    self.idx.analyze()

def scanHeaders(self):
    for header in list(self.headers.keys()):
        parser = CParser(header)
        idx = parser.parse()
        self.headers[header] = idx;
        self.idx.merge(idx)

def scanModules(self):
    for module in list(self.modules.keys()):
        parser = CParser(module)
        idx = parser.parse()
        # idx.analyze()
        self.modules[module] = idx
        self.idx.merge_public(idx)

def scan(self):
    for directory in self.directories:
        files = glob.glob(directory + "/*.c")
        for file in files:
            skip = 0
            for excl in self.excludes:
                if file.find(excl) != -1:
                    print("Skipping %s" % file)
                    skip = 1
                    break
            if skip == 0:
                self.modules[file] = None;
        files = glob.glob(directory + "/*.h")
        for file in files:
            skip = 0
            for excl in self.excludes:
                if file.find(excl) != -1:
                    print("Skipping %s" % file)
                    skip = 1
                    break
            if skip == 0:
                self.headers[file] = None;
    self.scanHeaders()
    self.scanModules()

def modulename_file(self, file):
    module = os.path.basename(file)
    if module[-2:] == '.h':
        module = module[:-2]
    elif module[-2:] == '.c':
        module = module[:-2]
    return module

def serialize_enum(self, output, name):
    id = self.idx.enums[name]
    output.write("    <enum name='%s' file='%s'" % (name,
                 self.modulename_file(id.header)))
    if id.info != None:
        info = id.info
        if info[0] != None and info[0] != '':
            try:
                val = eval(info[0])
            except:
                val = info[0]
            output.write(" value='%s'" % (val));
        if info[2] != None and info[2] != '':
            output.write(" type='%s'" % info[2]);
        if info[1] != None and info[1] != '':
            output.write(" info='%s'" % escape(info[1]));
    output.write("/>\n")

def serialize_macro(self, output, name):
    id = self.idx.macros[name]
    output.write("    <macro name='%s' file='%s'>\n" % (name,
                 self.modulename_file(id.header)))
    if id.info != None:
        try:
            (args, desc) = id.info
            if desc != None and desc != "":
                output.write("      <info>%s</info>\n" % (escape(desc)))
                self.indexString(name, desc)
            for arg in args:
                (name, desc) = arg
                if desc != None and desc != "":
                    output.write("      <arg name='%s' info='%s'/>\n" % (
                                 name, escape(desc)))
                    self.indexString(name, desc)
                else:
                    output.write("      <arg name='%s'/>\n" % (name))
        except:
            pass
    output.write("    </macro>\n")

def serialize_typedef(self, output, name):
    id = self.idx.typedefs[name]
    if id.info[0:7] == 'struct ':
        output.write("    <struct name='%s' file='%s' type='%s'" % (
                 name, self.modulename_file(id.header), id.info))
        name = id.info[7:]
        if name in self.idx.structs and ( \
           type(self.idx.structs[name].info) == type(()) or
            type(self.idx.structs[name].info) == type([])):
            output.write(">\n");
            try:
                for field in self.idx.structs[name].info:
                    desc = field[2]
                    self.indexString(name, desc)
                    if desc == None:
                        desc = ''
                    else:
                        desc = escape(desc)
                    output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
            except:
                print("Failed to serialize struct %s" % (name))
            output.write("    </struct>\n")
        else:
            output.write("/>\n");
    else :
        output.write("    <typedef name='%s' file='%s' type='%s'" % (
                     name, self.modulename_file(id.header), id.info))
        try:
            desc = id.extra
            if desc != None and desc != "":
                output.write(">\n      <info>%s</info>\n" % (escape(desc)))
                output.write("    </typedef>\n")
            else:
                output.write("/>\n")
        except:
            output.write("/>\n")

def serialize_variable(self, output, name):
    id = self.idx.variables[name]
    if id.info != None:
        output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
                name, self.modulename_file(id.header), id.info))
    else:
        output.write("    <variable name='%s' file='%s'/>\n" % (
                name, self.modulename_file(id.header)))

def serialize_function(self, output, name):
    id = self.idx.functions[name]
    if name == debugsym:
        print("=>", id)

    output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
                 name, self.modulename_file(id.header),
                 self.modulename_file(id.module)))
    #
    # Processing of conditionals modified by Bill 1/1/05
    #
    if id.conditionals != None:
        apstr = ""
        for cond in id.conditionals:
            if apstr != "":
                apstr = apstr + " &amp;&amp; "
            apstr = apstr + cond
        output.write("      <cond>%s</cond>\n"% (apstr));
    try:
        (ret, params, desc) = id.info
        if (desc == None or desc == '') and \
           name[0:9] != "xmlThrDef" and name != "xmlDllMain":
            print("%s %s from %s has no description" % (id.type, name,
                   self.modulename_file(id.module)))

        output.write("      <info>%s</info>\n" % (escape(desc)))
        self.indexString(name, desc)
        if ret[0] != None:
            if ret[0] == "void":
                output.write("      <return type='void'/>\n")
            else:
                output.write("      <return type='%s' info='%s'/>\n" % (
                         ret[0], escape(ret[1])))
                self.indexString(name, ret[1])
        for param in params:
            if param[0] == 'void':
                continue
            if param[2] == None:
                output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
            else:
                output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
                self.indexString(name, param[2])
    except:
        print("Failed to save function %s info: " % name, repr(id.info))
    output.write("    </%s>\n" % (id.type))

def serialize_exports(self, output, file):
    module = self.modulename_file(file)
    output.write("    <file name='%s'>\n" % (module))
    dict = self.headers[file]
    if dict.info != None:
        for data in ('Summary', 'Description', 'Author'):
            try:
                output.write("     <%s>%s</%s>\n" % (
                             data.lower(),
                             escape(dict.info[data]),
                             data.lower()))
            except:
                print("Header %s lacks a %s description" % (module, data))
        if 'Description' in dict.info:
            desc = dict.info['Description']
            if desc.find("DEPRECATED") != -1:
                output.write("     <deprecated/>\n")

    ids = list(dict.macros.keys())
    ids.sort()
    for id in uniq(ids):
        # Macros are sometime used to masquerade other types.
        if id in dict.functions:
            continue
        if id in dict.variables:
            continue
        if id in dict.typedefs:
            continue
        if id in dict.structs:
            continue
        if id in dict.enums:
            continue
        output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
    ids = list(dict.enums.keys())
    ids.sort()
    for id in uniq(ids):
        output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
    ids = list(dict.typedefs.keys())
    ids.sort()
    for id in uniq(ids):
        output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
    ids = list(dict.structs.keys())
    ids.sort()
    for id in uniq(ids):
        output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
    ids = list(dict.variables.keys())
    ids.sort()
    for id in uniq(ids):
        output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
    ids = list(dict.functions.keys())
    ids.sort()
    for id in uniq(ids):
        output.write("     <exports symbol='%s' type='function'/>\n" % (id))
    output.write("    </file>\n")

def serialize_xrefs_files(self, output):
    headers = list(self.headers.keys())
    headers.sort()
    for file in headers:
        module = self.modulename_file(file)
        output.write("    <file name='%s'>\n" % (module))
        dict = self.headers[file]
        ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
              list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
              list(dict.structs.keys()) + list(dict.enums.keys()))
        ids.sort()
        for id in ids:
            output.write("      <ref name='%s'/>\n" % (id))
        output.write("    </file>\n")
    pass

def serialize_xrefs_functions(self, output):
    funcs = {}
    for name in list(self.idx.functions.keys()):
        id = self.idx.functions[name]
        try:
            (ret, params, desc) = id.info
            for param in params:
                if param[0] == 'void':
                    continue
                if param[0] in funcs:
                    funcs[param[0]].append(name)
                else:
                    funcs[param[0]] = [name]
        except:
            pass
    typ = list(funcs.keys())
    typ.sort()
    for type in typ:
        if type == '' or type == 'void' or type == "int" or \
           type == "char *" or type == "const char *" :
            continue
        output.write("    <type name='%s'>\n" % (type))
        ids = funcs[type]
        ids.sort()
        pid = ''        # not sure why we have dups, but get rid of them!
        for id in ids:
            if id != pid:
                output.write("      <ref name='%s'/>\n" % (id))
                pid = id
        output.write("    </type>\n")

def serialize_xrefs_constructors(self, output):
    funcs = {}
    for name in list(self.idx.functions.keys()):
        id = self.idx.functions[name]
        try:
            (ret, params, desc) = id.info
            if ret[0] == "void":
                continue
            if ret[0] in funcs:
                funcs[ret[0]].append(name)
            else:
                funcs[ret[0]] = [name]
        except:
            pass
    typ = list(funcs.keys())
    typ.sort()
    for type in typ:
        if type == '' or type == 'void' or type == "int" or \
           type == "char *" or type == "const char *" :
            continue
        output.write("    <type name='%s'>\n" % (type))
        ids = funcs[type]
        ids.sort()
        for id in ids:
            output.write("      <ref name='%s'/>\n" % (id))
        output.write("    </type>\n")

def serialize_xrefs_alpha(self, output):
    letter = None
    ids = list(self.idx.identifiers.keys())
    ids.sort()
    for id in ids:
        if id[0] != letter:
            if letter != None:
                output.write("    </letter>\n")
            letter = id[0]
            output.write("    <letter name='%s'>\n" % (letter))
        output.write("      <ref name='%s'/>\n" % (id))
    if letter != None:
        output.write("    </letter>\n")

def serialize_xrefs_references(self, output):
    typ = list(self.idx.identifiers.keys())
    typ.sort()
    for id in typ:
        idf = self.idx.identifiers[id]
        module = idf.header
        output.write("    <reference name='%s' href='%s'/>\n" % (id,
                     'html/' + self.basename + '-' +
                     self.modulename_file(module) + '.html#' +
                     id))

def serialize_xrefs_index(self, output):
    index = self.xref
    typ = list(index.keys())
    typ.sort()
    letter = None
    count = 0
    chunk = 0
    chunks = []
    for id in typ:
        if len(index[id]) > 30:
            continue
        if id[0] != letter:
            if letter == None or count > 200:
                if letter != None:
                    output.write("      </letter>\n")
                    output.write("    </chunk>\n")
                    count = 0
                    chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
                output.write("    <chunk name='chunk%s'>\n" % (chunk))
                first_letter = id[0]
                chunk = chunk + 1
            elif letter != None:
                output.write("      </letter>\n")
            letter = id[0]
            output.write("      <letter name='%s'>\n" % (letter))
        output.write("        <word name='%s'>\n" % (id))
        tokens = index[id];
        tokens.sort()
        tok = None
        for token in tokens:
            if tok == token:
                continue
            tok = token
            output.write("          <ref name='%s'/>\n" % (token))
            count = count + 1
        output.write("        </word>\n")
    if letter != None:
        output.write("      </letter>\n")
        output.write("    </chunk>\n")
        if count != 0:
            chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
        output.write("    <chunks>\n")
        for ch in chunks:
            output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
                         ch[0], ch[1], ch[2]))
        output.write("    </chunks>\n")

def serialize_xrefs(self, output):
    output.write("  <references>\n")
    self.serialize_xrefs_references(output)
    output.write("  </references>\n")
    output.write("  <alpha>\n")
    self.serialize_xrefs_alpha(output)
    output.write("  </alpha>\n")
    output.write("  <constructors>\n")
    self.serialize_xrefs_constructors(output)
    output.write("  </constructors>\n")
    output.write("  <functions>\n")
    self.serialize_xrefs_functions(output)
    output.write("  </functions>\n")
    output.write("  <files>\n")
    self.serialize_xrefs_files(output)
    output.write("  </files>\n")
    output.write("  <index>\n")
    self.serialize_xrefs_index(output)
    output.write("  </index>\n")

def serialize(self):
    filename = "%s-api.xml" % self.name
    print("Saving XML description %s" % (filename))
    output = open(filename, "w")
    output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
    output.write("<api name='%s'>\n" % self.name)
    output.write("  <files>\n")
    headers = list(self.headers.keys())
    headers.sort()
    for file in headers:
        self.serialize_exports(output, file)
    output.write("  </files>\n")
    output.write("  <symbols>\n")
    macros = list(self.idx.macros.keys())
    macros.sort()
    for macro in macros:
        self.serialize_macro(output, macro)
    enums = list(self.idx.enums.keys())
    enums.sort()
    for enum in enums:
        self.serialize_enum(output, enum)
    typedefs = list(self.idx.typedefs.keys())
    typedefs.sort()
    for typedef in typedefs:
        self.serialize_typedef(output, typedef)
    variables = list(self.idx.variables.keys())
    variables.sort()
    for variable in variables:
        self.serialize_variable(output, variable)
    functions = list(self.idx.functions.keys())
    functions.sort()
    for function in functions:
        self.serialize_function(output, function)
    output.write("  </symbols>\n")
    output.write("</api>\n")
    output.close()

    filename = "%s-refs.xml" % self.name
    print("Saving XML Cross References %s" % (filename))
    output = open(filename, "w")
    output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
    output.write("<apirefs name='%s'>\n" % self.name)
    self.serialize_xrefs(output)
    output.write("</apirefs>\n")
    output.close()

def rebuild():

builder = None
if glob.glob("parser.c") != [] :
    print("Rebuilding API description for libxml2")
    builder = docBuilder("libxml2", [".", "."],
                         ["xmlwin32version.h", "tst.c"])
elif glob.glob("../parser.c") != [] :
    print("Rebuilding API description for libxml2")
    builder = docBuilder("libxml2", ["..", "../include/libxml"],
                         ["xmlwin32version.h", "tst.c"])
elif glob.glob("../libxslt/transform.c") != [] :
    print("Rebuilding API description for libxslt")
    builder = docBuilder("libxslt", ["../libxslt"],
                         ["win32config.h", "libxslt.h", "tst.c"])
else:
    print("rebuild() failed, unable to guess the module")
    return None
builder.scan()
builder.analyze()
builder.serialize()
if glob.glob("../libexslt/exslt.c") != [] :
    extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
    extra.scan()
    extra.analyze()
    extra.serialize()
return builder

# # for debugging the parser # def parse(filename):

parser = CParser(filename)
idx = parser.parse()
return idx

if __name__ == “__main__”:

if len(sys.argv) > 1:
    debug = 1
    parse(sys.argv[1])
else:
    rebuild()