codex/files-formats/regex_patterns.cat
# Pattern matching avec les expressions régulières (re)
# Le module re permet de rechercher et extraire des motifs dans du texte
#
# Exécuter: catnip -m re regex_patterns.cat

re = import("re")

# Données de test (logs simplifiés)

log1 = "2024-03-15 10:23:45 INFO User alice from 192.168.1.100"
log2 = "2024-03-15 10:24:12 ERROR Connection timeout"
log3 = "2024-03-15 10:25:01 WARN Memory at 85%"
log4 = "2024-03-15 10:26:33 INFO User bob from 10.0.0.42"
log5 = "2024-03-15 10:27:55 ERROR Database deadlock"
log_lines = list(log1, log2, log3, log4, log5)

emails = list("alice@example.com", "bob@company.co.uk", "invalid", "test@sub.org")

# Recherche simple

print("⇒ Recherche simple (search)")
for line in log_lines {
    m = re.search("ERROR", line)
    if m {
        print("  Erreur trouvée:", line)
    }
}

# Extraction avec groupes

print("\n⇒ Extraction de dates et niveaux (groupes)")
pattern = "(\\d{4}-\\d{2}-\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (\\w+)"
for line in log_lines {
    m = re.search(pattern, line)
    if m {
        print("  Date:", m.group(1), "| Level:", m.group(3))
    }
}

# Groupes nommés

print("\n⇒ Groupes nommés")
ip_pattern = "(?P<user>\\w+) from (?P<ip>[\\d.]+)"
for line in log_lines {
    m = re.search(ip_pattern, line)
    if m {
        print("  User:", m.group("user"), "| IP:", m.group("ip"))
    }
}

# Validation d'emails

print("\n⇒ Validation d'emails")
email_pattern = "^[\\w.-]+@[\\w.-]+\\.\\w{2,}$"
for email in emails {
    if re.match(email_pattern, email) {
        print("  [OK]", email)
    } else {
        print("  [KO]", email)
    }
}

# findall: toutes les correspondances

text = "Contacts: alice@a.com, bob@b.org, charlie@c.net"

print("\n⇒ Extraction de tous les emails (findall)")
found = re.findall("[\\w.-]+@[\\w.-]+\\.\\w+", text)
for email in found {
    print("  -", email)
}

# Substitution (sub)

print("\n⇒ Anonymisation d'IPs (sub)")
for line in log_lines {
    anonymized = re.sub("\\d+\\.\\d+\\.\\d+\\.\\d+", "XXX.XXX.XXX.XXX", line)
    if anonymized.find("XXX") >= 0 {
        print("  ", anonymized)
    }
}

# Split avec pattern

data = "pomme;orange,banane;kiwi,mangue"

print("\n⇒ Split avec plusieurs séparateurs")
fruits = re.split("[;,]", data)
for fruit in fruits {
    print("  -", fruit)
}

# Compilation de pattern

print("\n⇒ Pattern compilé (performance)")
error_re = re.compile("ERROR\\s+(.+)$")
for line in log_lines {
    m = error_re.search(line)
    if m {
        print("  Message:", m.group(1))
    }
}

# Flags: case insensitive

print("\n⇒ Recherche insensible à la casse")
text_mixed = "Python PYTHON python PyThOn"
matches = re.findall("python", text_mixed, re.IGNORECASE)
print("  Trouvés:", len(matches), "occurrences")

# Extraction de structure

print("\n⇒ Parsing de clé=valeur")
config = "host=localhost port=5432 user=admin"
pairs = re.findall("(\\w+)=(\\w+)", config)
for pair in pairs {
    print("  ", pair[0], "=>", pair[1])
}