#!/usr/bin/env catnip
# Pattern matching avec les expressions régulières (re)
# Le module re permet de rechercher et extraire des motifs dans du texte
re = import('re')
# Modélisation des patterns et des logs
struct Pattern {
name; regex;
apply(self, text) => { re.findall(self.regex, text) }
display(self) => { f"Pattern({self.name})" }
}
struct LogEntry {
date; time; level; message; source;
display(self) => { f"[{self.level}] {self.date} {self.time} - {self.message}" }
}
# Patterns réutilisables
log_pattern = Pattern("log", "(\\d{4}-\\d{2}-\\d{2}) (\\d{2}:\\d{2}:\\d{2}) (\\w+) (.+)")
ip_pattern = Pattern("ip", "(?P<user>\\w+) from (?P<ip>[\\d.]+)")
email_pattern = Pattern("email", "[\\w.-]+@[\\w.-]+\\.\\w{2,}")
kv_pattern = Pattern("kv", "(\\w+)=(\\w+)")
# Données de test
log_lines = list(
"2024-03-15 10:23:45 INFO User alice from 192.168.1.100",
"2024-03-15 10:24:12 ERROR Connection timeout",
"2024-03-15 10:25:01 WARN Memory at 85%",
"2024-03-15 10:26:33 INFO User bob from 10.0.0.42",
"2024-03-15 10:27:55 ERROR Database deadlock",
)
emails = list("alice@example.com", "bob@company.co.uk", "invalid", "test@sub.org")
# Parsing structuré des logs
parse_log = (line) => {
m = re.search(log_pattern.regex, line)
if m {
# Source = tout après le level, on extrait l'IP si présente
ip_match = re.search("from ([\\d.]+)", m.group(4))
source = if ip_match { ip_match.group(1) } else { None }
LogEntry(m.group(1), m.group(2), m.group(3), m.group(4), source)
} else {
None
}
}
print("⇒ Parsing structuré des logs")
entries = log_lines.[(line) => { parse_log(line) }]
for e in entries {
if e != None {
print(f" {e}")
}
}
# Classification par niveau avec match
print()
print("⇒ Classification par niveau (match)")
classify = (entry) => {
match entry.level {
'ERROR' => { f" [CRITICAL] {entry.message}" }
'WARN' => { f" [ALERTE] {entry.message}" }
'INFO' => { f" [OK] {entry.message}" }
_ => { f" [?] {entry.message}" }
}
}
for e in entries {
if e != None {
print(classify(e))
}
}
# Recherche simple
print()
print("⇒ Recherche simple (search)")
for line in log_lines {
m = re.search("ERROR", line)
if m {
print(f" Erreur trouvée: {line}")
}
}
# Extraction avec groupes nommés
print()
print("⇒ Groupes nommés (IPs)")
for line in log_lines {
m = re.search(ip_pattern.regex, line)
if m {
print(f" User: {m.group('user')} | IP: {m.group('ip')}")
}
}
# Validation d'emails via broadcasting
print()
print("⇒ Validation d'emails")
validate = (email) => {
if re.match("^[\\w.-]+@[\\w.-]+\\.\\w{2,}$", email) {
f" [OK] {email}"
} else {
f" [KO] {email}"
}
}
emails.[(e) => { print(validate(e)) }]
# findall via Pattern.apply
print()
print("⇒ Extraction de tous les emails (findall)")
text = "Contacts: alice@a.com, bob@b.org, charlie@c.net"
found = email_pattern.apply(text)
for email in found {
print(f" - {email}")
}
# Substitution (sub)
print()
print("⇒ Anonymisation d'IPs (sub)")
anonymize = (line) => { re.sub("\\d+\\.\\d+\\.\\d+\\.\\d+", "XXX.XXX.XXX.XXX", line) }
anonymized = log_lines.[(line) => { anonymize(line) }]
for line in anonymized {
if line.find("XXX") >= 0 {
print(f" {line}")
}
}
# Split avec pattern
print()
print("⇒ Split avec plusieurs séparateurs")
data = "pomme;orange,banane;kiwi,mangue"
fruits = re.split("[;,]", data)
for fruit in fruits {
print(f" - {fruit}")
}
# Pattern compilé
print()
print("⇒ Pattern compilé (performance)")
error_re = re.compile("ERROR\\s+(.+)$")
for line in log_lines {
m = error_re.search(line)
if m {
print(f" Message: {m.group(1)}")
}
}
# Flags: case insensitive
print()
print("⇒ Recherche insensible à la casse")
text_mixed = "Python PYTHON python PyThOn"
matches = re.findall("python", text_mixed, re.IGNORECASE)
print(f" Trouvés: {len(matches)} occurrences")
# Extraction clé=valeur via Pattern.apply
print()
print("⇒ Parsing de clé=valeur")
config = "host=localhost port=5432 user=admin"
pairs = kv_pattern.apply(config)
for pair in pairs {
print(f" {pair[0]} => {pair[1]}")
}