python解析xml文件操作实例
本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:
xml文件内容如下:
<?xmlversion="1.0"?> <!--Simplexmldocument__chapter8--> <book> <title> samplexmlthing </title> <author> <name> <first> ma </first> <last> xiaoju </last> </name> <affiliation> SpringsWidgets,Inc. </affiliation> </author> <chapternumber="1"> <title> First </title> <para> Ithinkwidgetsaregreate.Youshouldbuylotsofthemforom <company> SpirngyWidgts,Inc </company> </para> </chapter> </book>
python代码:
fromxml.domimportminidom,Node
importre,textwrap
classSampleScanner:
""""""
def__init__(self,doc):
"""Constructor"""
assert(isinstance(doc,minidom.Document))
forchildindoc.childNodes:
ifchild.nodeType==Node.ELEMENT_NODEand\
child.tagName=="book":
self.handle_book(child)
defhandle_book(self,node):
forchildinnode.childNodes:
ifchild.nodeType!=Node.ELEMENT_NODE:
continue
ifchild.tagName=="title":
print"Booktitileis:",self.gettext(child.childNodes)
ifchild.tagName=="author":
self.handle_author(child)
ifchild.tagName=="chapter":
self.handle_chapter(child)
defhandle_chapter(self,node):
number=node.getAttribute("number")
print"number:",number
title_node=node.getElementsByTagName("title")
print"title:",self.gettext(title_node)
forchildinnode.childNodes:
ifchild.nodeType!=Node.ELEMENT_NODE:
continue
ifchild.tagName=="para":
self.handle_chapter_para(child)
defhandle_chapter_para(self,node):
company=""
company=self.gettext(node.getElementsByTagName("company"))
print"chapter:para:company",company
defhandle_author(self,node):
forchildinnode.childNodes:
ifchild.nodeType!=Node.ELEMENT_NODE:
continue
ifchild.tagName=="name":
self.handle_author_name(child)
ifchild.tagName=="affiliation":
print"affiliation:",self.gettext(child.childNodes)
defhandle_author_name(self,node):
first=""
last=""
forchildinnode.childNodes:
ifchild.nodeType!=Node.ELEMENT_NODE:
continue
ifchild.tagName=="first":
first=self.gettext(child.childNodes)
ifchild.tagName=='last':
last=self.gettext(child.childNodes)
print"firstname:%s,lastname:%s"%(first,last)
defgettext(self,nodelist):
retlist=[]
fornodeinnodelist:
ifnode.nodeType==Node.TEXT_NODE:
retlist.append(node.wholeText)
elifnode.hasChildNodes:
retlist.append(self.gettext(node.childNodes))
returnre.sub('\s+',"",''.join(retlist))
if__name__=="__main__":
doc=minidom.parse("simple.xml")
sample=SampleScanner(doc)
希望本文所述对大家的Python程序设计有所帮助。