codex/data-analytics/numpy

docs/codex/data-analytics/numpy_climate.cat
#!/usr/bin/env catnip
# Analyse comparative des températures globales (GISTEMP vs GCAG)
# Dataset: anomalies annuelles moyennes, deux sources indépendantes
# Sources: NASA GISTEMP (1880-2023) + NOAA GCAG (1850-2024)
#   https://github.com/datasets/global-temp

numpy = import('numpy')
import('pathlib', 'Path')

# Chargement des données (3 colonnes: Source, Year, Mean)

data_dir = Path(META.file).parent / "data"
csv_path = str(data_dir / "global-temp_annual.csv")
sources = numpy.genfromtxt(csv_path, delimiter=",", skip_header=1, usecols=0, dtype="U10")
years_all = numpy.genfromtxt(csv_path, delimiter=",", skip_header=1, usecols=1)
means_all = numpy.genfromtxt(csv_path, delimiter=",", skip_header=1, usecols=2)

# Séparation par source

gi_mask = numpy.char.equal(sources, "GISTEMP")
gc_mask = numpy.char.equal(sources, "gcag")

gi_years = years_all[gi_mask]
gi_means = means_all[gi_mask]
gc_years = years_all[gc_mask]
gc_means = means_all[gc_mask]

print("⇒ Dataset Annual Temperature Anomalies")
print(f"  GISTEMP: {int(gi_years[0])}-{int(gi_years[-1])} ({len(gi_years)} années)")
print(f"  GCAG:    {int(gc_years[0])}-{int(gc_years[-1])} ({len(gc_years)} années)")

# Statistiques par source

struct SourceStats {
    name; mean; std; min_val; max_val

    display(self) => {
        print(f"  {self.name}")
        print(f"    Moyenne:    {self.mean} °C")
        print(f"    Écart-type: {self.std} °C")
        print(f"    Plage:      [{self.min_val}, {self.max_val}] °C")
    }
}

make_stats = (name, data) => {
    SourceStats(
        name,
        round(numpy.mean(data), 3),
        round(numpy.std(data), 3),
        round(numpy.min(data), 3),
        round(numpy.max(data), 3),
    )
}

print()
print("⇒ Statistiques par source")
make_stats("GISTEMP", gi_means).display()
make_stats("GCAG", gc_means).display()

# Divergence entre sources (période commune 1880-2023)

gi_common = gi_means
gc_common_mask = numpy.logical_and(
    numpy.greater_equal(gc_years, gi_years[0]),
    numpy.less_equal(gc_years, gi_years[-1]),
)
gc_common = gc_means[gc_common_mask]
common_years = gi_years

divergence = numpy.subtract(gi_common, gc_common)

print()
print(f"⇒ Divergence GISTEMP - GCAG ({int(common_years[0])}-{int(common_years[-1])})")
print(f"  Divergence moyenne:  {round(numpy.mean(divergence), 4)} °C")
abs_div = numpy.abs(divergence)
print(f"  Divergence max:      {round(numpy.max(abs_div), 4)} °C")
print(f"  Année de max écart:  {int(common_years[numpy.argmax(abs_div)])}")

# Analyse par période via struct

struct Period {
    start; end; gi_mean; gc_mean; diff

    display(self) => {
        f"  {self.start}-{self.end} : GI={self.gi_mean} / GC={self.gc_mean} (Δ {self.diff})"
    }
}

period_stats = (start, end) => {
    gi_m = numpy.logical_and(numpy.greater_equal(gi_years, start), numpy.less_equal(gi_years, end))
    gc_m = numpy.logical_and(numpy.greater_equal(gc_years, start), numpy.less_equal(gc_years, end))
    gi_val = round(numpy.mean(gi_means[gi_m]), 3)
    gc_val = round(numpy.mean(gc_means[gc_m]), 3)
    Period(start, end, gi_val, gc_val, round(gi_val - gc_val, 3))
}

periods = list(
    period_stats(1880, 1920),
    period_stats(1921, 1960),
    period_stats(1961, 2000),
    period_stats(2001, 2023),
)

print()
print("⇒ Évolution par période (GISTEMP vs GCAG)")
periods.[(p) => { print(p.display()) }]

# Années extrêmes (GISTEMP comme référence)

sorted_idx = numpy.argsort(gi_means)

print()
print("⇒ Top 5 années les plus chaudes (GISTEMP)")
top5 = numpy.flip(sorted_idx[-5:])
for i in top5 {
    print(f"  {int(gi_years[i])} : {round(gi_means[i], 3)} °C")
}

print()
print("⇒ Top 5 années les plus froides (GISTEMP)")
bottom5 = sorted_idx[:5]
for i in bottom5 {
    print(f"  {int(gi_years[i])} : {round(gi_means[i], 3)} °C")
}

# Classification des anomalies par match (moyenne des deux sources)

classify = (value) => {
    match True {
        _ if value > 0.5  => { "forte positive" }
        _ if value > 0    => { "positive" }
        _ if value < -0.3 => { "forte négative" }
        _ if value < 0    => { "négative" }
        _                 => { "neutre" }
    }
}

print()
print("⇒ Classification récente (moyenne des sources)")
nc = len(common_years)
for i in range(nc - 5, nc) {
    avg = round((gi_common[i] + gc_common[i]) / 2, 3)
    label = classify(avg)
    print(f"  {int(common_years[i])} : {avg} °C → {label}")
}

# Tendances linéaires comparées

linear_slope = (xs, ys) => {
    x_mean = numpy.mean(xs)
    y_mean = numpy.mean(ys)
    xc = numpy.subtract(xs, x_mean)
    yc = numpy.subtract(ys, y_mean)
    numpy.sum(numpy.multiply(xc, yc)) / numpy.sum(numpy.power(xc, 2))
}

gi_slope = linear_slope(gi_years, gi_means)
gc_slope = linear_slope(gc_years, gc_means)

print()
print("⇒ Tendances linéaires")
print(f"  GISTEMP: {round(gi_slope * 100, 3)} °C/siècle")
print(f"  GCAG:    {round(gc_slope * 100, 3)} °C/siècle")

gi_intercept = numpy.mean(gi_means) - gi_slope * numpy.mean(gi_years)
print(f"  Projection GISTEMP 2050: {round(gi_slope * 2050 + gi_intercept, 2)} °C")
print(f"  Projection GISTEMP 2100: {round(gi_slope * 2100 + gi_intercept, 2)} °C")

# Moyenne mobile comparée (fenêtre de 10 ans)

window = 10
kernel = numpy.ones(window) / window
gi_ma = numpy.convolve(gi_means, kernel, mode="valid")
gc_ma = numpy.convolve(gc_means, kernel, mode="valid")

print()
print(f"⇒ Moyenne mobile ({window} ans)")
print(f"  GISTEMP dernière valeur: {round(gi_ma[-1], 3)} °C")
print(f"  GCAG dernière valeur:    {round(gc_ma[-1], 3)} °C")

# Broadcasting: conversion en Fahrenheit

gi_f = gi_common * 1.8
gc_f = gc_common * 1.8

print()
print("⇒ Anomalies récentes en Fahrenheit")
for i in range(nc - 4, nc) {
    print(f"  {int(common_years[i])} : GI={round(gi_f[i], 3)} / GC={round(gc_f[i], 3)} °F")
}

# Comptage: années au-dessus du seuil

threshold = 0.5
gi_above = int(numpy.sum(numpy.greater(gi_means, threshold)))
gc_above = int(numpy.sum(numpy.greater(gc_means, threshold)))
gi_first = int(gi_years[numpy.argmax(numpy.greater(gi_means, threshold))])
gc_first = int(gc_years[numpy.argmax(numpy.greater(gc_means, threshold))])

print()
print(f"⇒ Années avec anomalie > {threshold} °C")
print(f"  GISTEMP: {gi_above} années (première: {gi_first})")
print(f"  GCAG:    {gc_above} années (première: {gc_first})")