codex/files-formats/xml

docs/codex/files-formats/xml_parsing.cat
#!/usr/bin/env catnip
# Parsing XML avec xml.etree.ElementTree
# Module stdlib pour lire et naviguer dans des documents XML

ET = import('xml.etree.ElementTree')

# Modélisation des données

struct Book {
    id; title; author; year; category;

    display(self) => {
        f"Book#{self.id} '{self.title}' ({self.author}, {self.year}) [{self.category}]"
    }
}

# Extraction d'un Book depuis un élément XML

book_from_xml = (node) => {
    try {
        Book(
            node.get('id'),
            node.find('title').text,
            node.find('author').text,
            node.find('year').text,
            node.get('category'),
        )
    } except {
        _ => { None }
    }
}

# Données XML

xml_string = '<?xml version="1.0"?>
<catalog>
    <book id="1" category="fiction">
        <title>The Great Gatsby</title>
        <author>F. Scott Fitzgerald</author>
        <year>1925</year>
    </book>
    <book id="2" category="science">
        <title>A Brief History of Time</title>
        <author>Stephen Hawking</author>
        <year>1988</year>
    </book>
    <book id="3" category="fiction">
        <title>1984</title>
        <author>George Orwell</author>
        <year>1949</year>
    </book>
</catalog>
'

# XML malformé

print("⇒ XML malformé")
try {
    ET.fromstring("<broken><no-close>")
} except {
    e: RuntimeError => { print(f"  Attrapé: {e}") }
}

# Parsing

print()
print("⇒ Parsing XML")
root = ET.fromstring(xml_string)
print(f"  Tag racine: {root.tag}")
print(f"  Nombre d'enfants: {len(root)}")

# Conversion en structs

print()
print("⇒ Conversion en structs Book")
books = list()
for node in root {
    book = book_from_xml(node)
    if book != None {
        books = books + list(book)
    }
}
for book in books {
    print(f"  {book}")
}

# Filtrage par catégorie avec match

print()
print("⇒ Filtrage par catégorie (match)")
for book in books {
    label = match book.category {
        'fiction' => { "Roman" }
        'science' => { "Science" }
        _         => { "Autre" }
    }
    print(f"  [{label}] {book.title}")
}

# Recherche avec findall

print()
print("⇒ Recherche avec findall")
titles = root.findall('.//title')
print("  Tous les titres:")
for t in titles {
    print(f"    - {t.text}")
}

# Attributs

print()
print("⇒ Attributs")
for node in root {
    attrs = node.attrib
    print(f"  Book: {attrs}")
}

# Navigation dans l'arbre

print()
print("⇒ Navigation dans l'arbre")
for node in root {
    book = book_from_xml(node)
    print(f"  Book id:{book.id}")
    print(f"    Titre: {book.title}")
    print(f"    Auteur: {book.author}")
}

# Livres par décennie via broadcasting

print()
print("⇒ Livres par décennie")
decades = books.[(b) => {
    decade = int(b.year) - int(b.year) % 10
    f"  {b.title} → {decade}s"
}]
for d in decades {
    print(d)
}