用Python去解析XML报文 V1.1
演示如何用Python解析XML。如下分别是待解析的XML报文,尾部是Python 源码。
[*]
[*]<?xmlversion="1.0"encoding="iso8859-1"?>
[*]<viewid="CCBSA_v"msgType="1">
[*]<viewElemid="jrcfwq"dispName="集群">
[*]<viewElemid="fwqsl"dispName="服务器数量"value="1"/>
[*]<viewElemid="Master"dispName="主控节点"value="STAR"/>
[*]<viewElemid="Server">
[*]<viewElemid="SrvName"value="01"dispName="STAR">
[*]<viewElemid="Plat"dispName="平台信息">
[*]<viewElemid="PlatStat"dispName="平台状态">
[*]<viewElemid="MchPort"dispName="机器端口"value="12000"/>
[*]<viewElemid="MchState"dispName="机器状态"value="在线"/>
[*]<viewElemid="AllProc"dispName="总进程数"value="0"/>
[*]<viewElemid="EslProc"dispName="平台进程数"value="29"/>
[*]<viewElemid="SemNum"dispName="信号灯数"value="104"/>
[*]<viewElemid="DrqSem"dispName="DRQ使用信号灯"value="53"/>
[*]</viewElem>
[*]<viewElemid="CpuStat"dispName="CPU状态">
[*]<viewElemid="CpuIdle"dispName="idle"value="0"/>
[*]<viewElemid="CpuUser"dispName="user"value="0"/>
[*]<viewElemid="CpuNice"dispName="nice"value="0"/>
[*]<viewElemid="CpuSys"dispName="sys"value="0"/>
[*]<viewElemid="CpuWio"dispName="wio"value="0"/>
[*]</viewElem>
[*]<viewElemid="SysInfo"dispName="操作系统信息">
[*]<viewElemid="CpuNum"dispName="CPU颗数"value="0"/>
[*]<viewElemid="PhyMem"dispName="物理内存(K)"value="0"/>
[*]<viewElemid="VirtMem"dispName="虚拟内存(K)"value="0"/>
[*]<viewElemid="UsedMem"dispName="已用内存(K)"value="0"/>
[*]<viewElemid="exit_freeMem"dispName="可用内存(K)"value="0"/>
[*]<viewElemid="IFaceNum"dispName="网络接口数"value="0"/>
[*]</viewElem>
[*]<viewElemid="ProjInfo"dispName="项目信息">
[*]<viewElemid="ShmSize"dispName="部署共享内存大小(k)"value="15251168"/>
[*]<viewElemid="ShmKeep"dispName="部署共享内存剩余(k)"value="16769072"/>
[*]<viewElemid="BcbSize"dispName="交换共享内存大小(k)"value="6304"/>
[*]<viewElemid="BcbUsed"dispName="已用交换共享内存(k)"value="2208"/>
[*]<viewElemid="BcbMax"dispName="最大交换共享内存(k)"value="70816"/>
[*]<viewElemid="ChkNum"dispName="当前已用缓冲区段数"value="1"/>
[*]<viewElemid="DrqMsg"dispName="缓存消息数"value="0"/>
[*]<viewElemid="MsqNum"dispName="队列中的消息数"value="0"/>
[*]<viewElemid="TranNum"dispName="本机交易统计"value="322047"/>
[*]<viewElemid="DTAMPID"dispName="DTA管理服务进程号"value="3727372"/>
[*]<viewElemid="RCYCLPID"dispName="垃圾回收服务进程号"value="3739830"/>
[*]<viewElemid="SYNCPID"dispName="多机同步服务进程号"value="3670122"/>
[*]<viewElemid="ISSUEPID"dispName="多机发布服务进程号"value="2871348"/>
[*]<viewElemid="MACHSVRPID"dispName="多机交换服务进程号"value="0"/>
[*]<viewElemid="MONCPID"dispName="多机监控同步服务进程号"value="0"/>
[*]<viewElemid="MONSPID"dispName="多机监控发布服务进程号"value="0"/>
[*]<viewElemid="DTANUM"dispName="适配器个数"value="12"/>
[*]....
[*]....
[*]......
[*]fromxml.domimportminidom
[*]fromsysimportstderr
[*]fromos.pathimportjoin
[*]
[*]__version__="V1.1"
[*]
[*]"""
[*]Definitionfortypesandmaps
[*]"""
[*]dataTypMap={"integer":0,"foat":1,"boolean":2,
[*]"char":3,"string":4,"void":5,"double":6,
[*]"long":7,"object":8,"datetime":9}
[*]nodeTypMap={"LEAF":0,"NODE":2}
[*]nodeImgMap={nodeTypMap["LEAF"]:"/images/leaf.gif",
[*]nodeTypMap["NODE"]:"/images/node.gif"}
[*]whoMap={"Views":0,"Nodes":1,
[*]"NodeViews":2,"NodesRel":3}
[*]
[*]classTIndicatorData:
[*]
[*]def__init__(self,XML,dest):
[*]self.__xmldoc=minidom.parse(XML.strip(""))
[*]self.__root=self.__xmldoc.documentElement
[*]self.__destPath=dest.strip("")
[*]self.__gNodesList=[]#Storinguniquenodes
[*]self.__hViews=None#Filehandleforisac_mnt_view
[*]self.__hNodes=None#Filehandleforisac_mnt_basenode
[*]self.__hNodeViews=None#Filehandleforisac_mnt_basenode_view
[*]self.__hNodesRel=None#Filehandleforisac_mnt_basenode_rel
[*]self.__outputData={"Views":"","Nodes":"","NodeViews":"","NodesRel":""}
[*]#enddef
[*]
[*]defGenIndicators(self):
[*]self.__hViews=open(join(self.__destPath,"isac_mnt_view.txt"),"w+")
[*]self.__hNodes=open(join(self.__destPath,"isac_mnt_basenode.txt"),"w+")
[*]self.__hNodeViews=open(join(self.__destPath,"isac_mnt_basenode_view.txt"),"w+")
[*]self.__hNodesRel=open(join(self.__destPath,"isac_mnt_basenode_rel.txt"),"w+")
[*]try:
[*]self.__retrieveIndViewData("Nodes")
[*]self.__retrieveIndViewData("Views")
[*]self.__retrieveIndViewData("NodeViews")
[*]self.__retrieveIndViewData("NodesRel")
[*]
[*]self.__write(self.__hNodes,"Nodes")
[*]self.__write(self.__hViews,"Views")
[*]self.__write(self.__hNodeViews,"NodeViews")
[*]self.__write(self.__hNodesRel,"NodesRel")
[*]finally:
[*]self.__hViews.close()
[*]self.__hNodes.close()
[*]self.__hNodeViews.close()
[*]self.__hNodesRel.close()
[*]#enddef
[*]
[*]def__write(self,fileHandle,outputDataName):
[*]ifnotfileHandle:
[*]stderr.write("Invalidfilehandlecorrespondingto%s"%outputDataName)
[*]return
[*]fileHandle.write(self.__outputData.encode("GB2312"))
[*]printfileHandle.name
[*]
[*]def__retrieveIndViewData(self,whoMapKeyName):
[*]self.__getNodes(self.__root,whoMap)
[*]self.__clearNodesList()
[*]
[*]def__clearNodesList(self):
[*]self.__gNodesList=[]
[*]
[*]def__getNodePaths(self,ANode,isDynNode=True,initPath=""):
[*]"""
[*]retrievesthevalueofstringofstaticnodesordynamicnodes
[*]excluededleaf-nodes.
[*]"""
[*]if(notANode)or(notANode.hasChildNodes()):
[*]returninitPath
[*]
[*]ifinitPath.strip("")=="":
[*]initPath="/%s"%ANode.attributes["id"].value.strip("")
[*]
[*]foreleminANode.childNodes:
[*]if(notelem.localName)or(notelem.hasChildNodes()):
[*]continue
[*]
[*]idPath=""
[*]ifisDynNodeandelem.hasAttribute("value"):
[*]idPath="%s/%s"%(initPath,elem.attributes["value"].value.strip(""))
[*]else:
[*]idPath="%s/%s"%(initPath,elem.attributes["id"].value.strip(""))
[*]
[*]printidPath
[*]self.__getNodePaths(elem,isDynNode,idPath)
[*]#endfor
[*]#enddef
[*]
[*]def__getNodeType(self,ANode):
[*]ret=nodeTypMap["LEAF"]
[*]ifANode.hasChildNodes():
[*]ret=nodeTypMap["NODE"]
[*]returnret
[*]#enddef
[*]
[*]def__outputNodes(self,ANode):
[*]"""
[*]generatestherecordsofuniquenodesinXML
[*]"""
[*]ifnotANode:
[*]return
[*]
[*]nodeType=self.__getNodeType(ANode)
[*]dataType=dataTypMap["void"]
[*]ifnodeType==nodeTypMap["LEAF"]andANode.hasAttribute("value"):
[*]dataValue=ANode.attributes["value"].value.strip("")
[*]ifdataValue.isdigit():
[*]dataType=dataTypMap["integer"]
[*]else:
[*]try:
[*]float(dataValue)
[*]dataType=dataTypMap["float"]
[*]except:
[*]dataType=dataTypMap["string"]
[*]
[*]dispName=ANode.attributes["id"].value.strip("")
[*]ifANode.hasAttribute("dispName"):
[*]dispName=ANode.attributes["dispName"].value.strip("")/
[*].encode("ISO8859").decode("GB2312")
[*]
[*]ProcType=0
[*]PrstType=1
[*]UnitName=''
[*]initValue='0'
[*]updateTime=''
[*]self.__outputData["Nodes"]+="%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s|/n"%/
[*](ANode.attributes["id"].value.strip(""),
[*]dispName,dispName,nodeImgMap,
[*]nodeType,dataType,ProcType,PrstType,
[*]UnitName,initValue,updateTime)
[*]#enddef
[*]
[*]def__outputViews(self,ANode):
[*]"""
[*]generatestherecordsofviews.
[*]"""
[*]if(notANode)or(self.__getNodeType(ANode)!=nodeTypMap["NODE"]):
[*]return
[*]
[*]nodeID=ANode.attributes["id"].value.strip("")
[*]viewName=nodeID
[*]ifANode.hasAttribute("dispName"):
[*]viewName=ANode.attributes["dispName"].value/
[*].encode("ISO8859").decode("GB2312")
[*]
[*]self.__outputData["Views"]+="%s|%s|%s|/n"%(nodeID,viewName,nodeID)
[*]
[*]#enddef
[*]
[*]def__outputNodeViews(self,ANode):
[*]ifnotANode:
[*]return
[*]
[*]ifself.__getNodeType(ANode)==nodeTypMap["NODE"]:
[*]self.__outputData["NodeViews"]+=/
[*]"%s|%s|/n"%/
[*](ANode.attributes["id"].value.strip(""),
[*]ANode.attributes["id"].value.strip(""))
[*]else:
[*]self.__outputData["NodeViews"]+=/
[*]"%s|%s|/n"%/
[*](ANode.attributes["id"].value.strip(""),
[*]ANode.parentNode.attributes["id"].value.strip(""))
[*]#enddef
[*]
[*]def__outputNodesRel(self,ANode):
[*]if(notANode)or(ANode.parentNode.nodeType==ANode.DOCUMENT_NODE):
[*]return
[*]
[*]self.__outputData["NodesRel"]+="%s|%s|/n"%/
[*](ANode.parentNode.attributes["id"].value.strip(""),
[*]ANode.attributes["id"].value.strip(""))
[*]#enddef
[*]
[*]def__handleByType(self,ANode,Who):
[*]"""
[*]Determinewhichkindofdatashouldbeoutput.
[*]"""
[*]ifnotANode:
[*]return
[*]
[*]ifWho==whoMap["Nodes"]:
[*]self.__outputNodes(ANode)
[*]elifWho==whoMap["Views"]:
[*]self.__outputViews(ANode)
[*]elifWho==whoMap["NodeViews"]:
[*]self.__outputNodeViews(ANode)
[*]elifWho==whoMap["NodesRel"]:
[*]self.__outputNodesRel(ANode)
[*]#enddef
[*]
[*]def__getNodes(self,ANode,Who):
[*]"""retrievesallstaticnodes"""
[*]ifnotANode:
[*]return
[*]
[*]ifANode.parentNode.nodeType==ANode.DOCUMENT_NODE:
[*]self.__handleByType(ANode,Who)
[*]
[*]foreleminANode.childNodes:
[*]ifnotelem.localName:
[*]continue
[*]
[*]ifnotelem.hasAttribute("id"):
[*]stderr.write("Invalidtagwithoutanattributenamed'id'FOUND!")
[*]continue
[*]
[*]tagID=elem.attributes["id"].value.strip("")
[*]try:
[*]self.__gNodesList.index(tagID)
[*]continue
[*]exceptValueError:
[*]#NOTFOUND
[*]self.__gNodesList.append(tagID)
[*]
[*]self.__handleByType(elem,Who)
[*]self.__getNodes(elem,Who)
[*]#enddef
[*]#endclass
[*]
[*]if__name__=="__main__":
[*]"""
[*]CAUTION:PleasesetencodingbyISO8859inXMLhead!
[*]"""
[*]ind=TIndicatorData(
[*]r"D:/Documents/Construction_Bank/SH/CCBSA/Design/ccbsa_ind21.xml",
[*]r"D:/Temp")
[*]ind.GenIndicators()
[*]print"/nMISSIONCOMPLETED."
[*]
[*]
[*]#ENDOFFILE.
[*]
页:
[1]