Added Weather API

2025-09-27 18:13:53 -04:00
parent 2471610d80
commit 629444c382
22 changed files with 629 additions and 308 deletions
--- a/roadcast/evaluate_and_visualize.py
+++ b/roadcast/evaluate_and_visualize.py
@@ -0,0 +1,193 @@
+import argparse
+import json
+import os
+import numpy as np
+import pandas as pd
+import torch
+import torch.nn.functional as F
+from sklearn.metrics import accuracy_score, classification_report
+import matplotlib.pyplot as plt
+
+# Minimal helper: try to reconstruct the model if checkpoint stores config, else attempt full-model load.
+def load_checkpoint(checkpoint_path, model_builder=None, device="cpu"):
+    ckpt = torch.load(checkpoint_path, map_location=device)
+    # if checkpoint contains state_dict + model_config, try to rebuild using models.create_model
+    if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
+        builder = model_builder
+        if builder is None:
+            try:
+                from models import create_model as _create_model
+                builder = lambda cfg: _create_model(device=device, model_type=cfg.get("model_type", "mlp") if "model_type" in cfg else "mlp", input_dim=cfg.get("input_dim"), num_classes=cfg.get("num_classes"), hidden_dims=cfg.get("hidden_dims"))
+            except Exception:
+                builder = None
+        if builder is not None and "model_config" in ckpt:
+            model = builder(ckpt.get("model_config", {}))
+            model.load_state_dict(ckpt["model_state_dict"])
+            model.to(device).eval()
+            meta = {k: v for k, v in ckpt.items() if k not in ("model_state_dict",)}
+            return model, meta
+        else:
+            # try to load full model object or raise
+            try:
+                model = ckpt
+                model.to(device).eval()
+                return model, {}
+            except Exception:
+                raise RuntimeError("Checkpoint contains model_state_dict but cannot reconstruct model; provide model_builder.")
+    else:
+        # maybe the full model object was saved
+        try:
+            model = ckpt
+            model.to(device).eval()
+            return model, {}
+        except Exception as e:
+            raise RuntimeError(f"Can't load checkpoint automatically: {e}")
+
+def prepare_features(df, feature_cols=None):
+    if feature_cols is None:
+        # assume all columns except label are features
+        return df.drop(columns=[c for c in df.columns if c.endswith("label")], errors='ignore').values.astype(np.float32)
+    return df[feature_cols].values.astype(np.float32)
+
+def plot_sample(x, true_label, pred_label):
+    x = np.asarray(x)
+    title = f"true: {true_label}  pred: {pred_label}"
+    if x.ndim == 1:
+        n = x.size
+        sq = int(np.sqrt(n))
+        if sq * sq == n:
+            plt.imshow(x.reshape(sq, sq), cmap="gray")
+            plt.title(title)
+            plt.axis("off")
+            plt.show()
+            return
+        if x.size <= 3:
+            plt.bar(range(x.size), x)
+            plt.title(title)
+            plt.show()
+            return
+        # fallback: plot first 200 dims as line
+        plt.plot(x[:200])
+        plt.title(title + " (first 200 dims)")
+        plt.show()
+        return
+    elif x.ndim == 2:
+        plt.imshow(x, aspect='auto')
+        plt.title(title)
+        plt.show()
+        return
+    else:
+        print("Sample too high-dim to plot, printing summary:")
+        print("mean", x.mean(), "std", x.std())
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument("--checkpoint", required=True, help="Path to saved checkpoint (.pt)")
+    p.add_argument("--data", required=True, help="CSV with features and optional label column")
+    p.add_argument("--label-col", default=None, help="Original label column name in CSV (if present)")
+    p.add_argument("--batch-size", type=int, default=256)
+    p.add_argument("--sample-index", type=int, default=0, help="Index of a sample to plot")
+    p.add_argument("--plot", action="store_true")
+    p.add_argument("--device", default="cpu")
+    args = p.parse_args()
+
+    device = args.device
+    # If your project has a known model class, replace model_builder with a lambda that instantiates it.
+    model_builder = None
+
+    # load checkpoint
+    model, meta = load_checkpoint(args.checkpoint, model_builder=model_builder, device=device)
+
+    # try to discover preprocess_meta and label_info
+    ckpt_dir = os.path.dirname(args.checkpoint)
+    preprocess_meta = None
+    meta_preprocess_path = os.path.join(ckpt_dir, meta.get("preprocess_meta", "")) if isinstance(meta, dict) else None
+    if meta_preprocess_path and os.path.exists(meta_preprocess_path):
+        try:
+            import numpy as _np
+            d = _np.load(meta_preprocess_path, allow_pickle=True)
+            preprocess_meta = {
+                "feature_columns": [str(x) for x in d["feature_columns"].tolist()],
+                "means": d["means"].astype(np.float32),
+                "stds": d["stds"].astype(np.float32),
+            }
+            print(f"Loaded preprocess meta from {meta_preprocess_path}")
+        except Exception:
+            preprocess_meta = None
+
+    # prefer label_col from CSV, otherwise load saved assignments if present
+    y_true = None
+    if args.label_col and args.label_col in df.columns:
+        y_true = df[args.label_col].values
+    else:
+        # check label_info from checkpoint dir
+        label_info_path = os.path.join(ckpt_dir, "label_info.json")
+        label_info = {}
+        if os.path.exists(label_info_path):
+            with open(label_info_path, "r") as f:
+                label_info = json.load(f)
+        elif isinstance(meta, dict) and "label_info" in meta:
+            label_info = meta["label_info"]
+        if "assignments" in label_info:
+            y_true = np.array(label_info["assignments"])
+        elif "assignments_file" in label_info:
+            try:
+                import numpy as _np
+                arr = _np.load(os.path.join(ckpt_dir, label_info["assignments_file"]))
+                y_true = arr["assignments"]
+            except Exception:
+                pass
+
+    # prepare features: if preprocess_meta is present use its feature_columns and scaling
+    if preprocess_meta is not None:
+        feature_cols = preprocess_meta["feature_columns"]
+        feature_df = df[feature_cols]
+        X = feature_df.values.astype(np.float32)
+        # apply scaling
+        means = preprocess_meta["means"]
+        stds = preprocess_meta["stds"]
+        stds[stds == 0] = 1.0
+        X = (X - means) / stds
+    else:
+        if args.label_col and args.label_col in df.columns:
+            feature_df = df.drop(columns=[args.label_col])
+        else:
+            feature_df = df.select_dtypes(include=[np.number])
+        X = feature_df.values.astype(np.float32)
+
+    # create DataLoader-like batching for inference
+    model.to(device)
+    model.eval()
+    preds = []
+    with torch.no_grad():
+        for i in range(0, X.shape[0], args.batch_size):
+            batch = torch.from_numpy(X[i:i+args.batch_size]).to(device)
+            out = model(batch)  # adapt if your model returns (logits, ...)
+            if isinstance(out, (tuple, list)):
+                out = out[0]
+            probs = F.softmax(out, dim=1) if out.dim() == 2 else out
+            pred = probs.argmax(dim=1).cpu().numpy()
+            preds.append(pred)
+    preds = np.concatenate(preds, axis=0)
+
+    if y_true is not None:
+        acc = accuracy_score(y_true, preds)
+        print(f"Accuracy: {acc:.4f}")
+        print("Classification report:")
+        print(classification_report(y_true, preds, zero_division=0))
+    else:
+        print("Predictions computed but no true labels available to compute accuracy.")
+        print("First 20 predictions:", preds[:20])
+
+    if args.plot:
+        idx = args.sample_index
+        if idx < 0 or idx >= X.shape[0]:
+            print("sample-index out of range")
+            return
+        sample_x = X[idx]
+        true_label = y_true[idx] if y_true is not None else None
+        pred_label = preds[idx]
+        plot_sample(sample_x, true_label, pred_label)
+
+if __name__ == "__main__":
+    main()