Python Xml Parsing Types
Python provides several libraries for parsing and processing XML documents. In this tutorial, we will discuss how to parse XML documents using Python.
1.Parsing XML Documents with xml.etree.ElementTree:
The xml.etree.ElementTree module provides a simple and efficient way to parse and manipulate XML documents in Python. Here is an example of parsing an XML document using the ElementTree module:
import xml.etree.ElementTree as ET
tree = ET.parse('example.xml')
root = tree.getroot()
# print the tag and attributes of the root element
print(root.tag)
print(root.attrib)
# iterate over the child elements of the root element
for child in root:
print(child.tag, child.attrib)
# find elements by tag name
for country in root.findall('country'):
rank = country.find('rank').text
name = country.get('name')
print(name, rank)
from lxml import etreetree = etree.parse('example.xml')root = tree.getroot()# print the tag and attributes of the root elementprint(root.tag)print(root.attrib)# iterate over the child elements of the root elementfor child in root:print(child.tag, child.attrib)# find elements by tag namefor country in root.findall('country'):rank = country.find('rank').textname = country.get('name')print(name, rank)
import xml.dom.minidom
# parse the XML document
dom = xml.dom.minidom.parse('example.xml')
root = dom.documentElement
# print the tag and attributes of the root element
print(root.tagName)
print(root.attributes['name'].value)
# iterate over the child elements of the root element
for child in root.childNodes:
if child.nodeType == xml.dom.minidom.Node.ELEMENT_NODE:
print(child.tagName, child.attributes['name'].value)
# find elements by tag name
countries = root.getElementsByTagName('country')
for country in countries:
rank = country.getElementsByTagName('rank')[0].childNodes[0].data
name = country.attributes['name'].value
print(name, rank)
import xml.saxclass MyHandler(xml.sax.ContentHandler):def __init__(self):self.current_element = ""self.in_title = Falseself.titles = []def startElement(self, name, attrs):self.current_element = nameif name == "title":self.in_title = Truedef endElement(self, name):if name == "title":self.in_title = Falsedef characters(self, content):if self.in_title:self.titles.append(content)handler = MyHandler()parser = xml.sax.make_parser()parser.setContentHandler(handler)parser.parse("example.xml")print(handler.titles)
import urllib.requestimport xml.etree.ElementTree as ETurl = 'https://example.com/data.xml'response = urllib.request.urlopen(url)xml_data = response.read().decode()root = ET.fromstring(xml_data)
import xml.etree.ElementTree as ETxml_string = '<root xmlns:ns="http://example.com/ns"><ns:child>value</ns:child></root>'root = ET.fromstring(xml_string)ns = {'ns': 'http://example.com/ns'}child = root.find('ns:child', ns)print(child.text) # output: value
import xml.etree.ElementTree as ETxml_string = '<root><child>value1</child><child>value2</child></root>'root = ET.fromstring(xml_string)values = root.findall('./child')for value in values:print(value.text) # output: value1 value2
import xml.etree.ElementTree as ETxml_string = '<root><child>old value</child></root>'root = ET.fromstring(xml_string)child = root.find('child')child.text = 'new value'xml_string = ET.tostring(root).decode()print(xml_string) # output: <root><child>new value</child></root>
import xmltodict# parse the XML filewith open('example.xml') as fd:doc = xmltodict.parse(fd.read())# get the root elementroot = doc['root']# iterate over child elementsfor child in root['items']['item']:print(child['name'], child['description'])# find elements with a specific tagfor item in doc['root']['items']['item']:if item['price'] == '9.99':print(item['name'])
Labels: best practices, programming, python xml
0 Comments:
Post a Comment
Note: only a member of this blog may post a comment.
<< Home