Added Weather API

This commit is contained in:
Pranav Malladi
2025-09-27 18:13:53 -04:00
parent 2471610d80
commit 629444c382
22 changed files with 629 additions and 308 deletions

View File

@@ -0,0 +1,193 @@
import argparse
import json
import os
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
# Minimal helper: try to reconstruct the model if checkpoint stores config, else attempt full-model load.
def load_checkpoint(checkpoint_path, model_builder=None, device="cpu"):
ckpt = torch.load(checkpoint_path, map_location=device)
# if checkpoint contains state_dict + model_config, try to rebuild using models.create_model
if isinstance(ckpt, dict) and "model_state_dict" in ckpt:
builder = model_builder
if builder is None:
try:
from models import create_model as _create_model
builder = lambda cfg: _create_model(device=device, model_type=cfg.get("model_type", "mlp") if "model_type" in cfg else "mlp", input_dim=cfg.get("input_dim"), num_classes=cfg.get("num_classes"), hidden_dims=cfg.get("hidden_dims"))
except Exception:
builder = None
if builder is not None and "model_config" in ckpt:
model = builder(ckpt.get("model_config", {}))
model.load_state_dict(ckpt["model_state_dict"])
model.to(device).eval()
meta = {k: v for k, v in ckpt.items() if k not in ("model_state_dict",)}
return model, meta
else:
# try to load full model object or raise
try:
model = ckpt
model.to(device).eval()
return model, {}
except Exception:
raise RuntimeError("Checkpoint contains model_state_dict but cannot reconstruct model; provide model_builder.")
else:
# maybe the full model object was saved
try:
model = ckpt
model.to(device).eval()
return model, {}
except Exception as e:
raise RuntimeError(f"Can't load checkpoint automatically: {e}")
def prepare_features(df, feature_cols=None):
if feature_cols is None:
# assume all columns except label are features
return df.drop(columns=[c for c in df.columns if c.endswith("label")], errors='ignore').values.astype(np.float32)
return df[feature_cols].values.astype(np.float32)
def plot_sample(x, true_label, pred_label):
x = np.asarray(x)
title = f"true: {true_label} pred: {pred_label}"
if x.ndim == 1:
n = x.size
sq = int(np.sqrt(n))
if sq * sq == n:
plt.imshow(x.reshape(sq, sq), cmap="gray")
plt.title(title)
plt.axis("off")
plt.show()
return
if x.size <= 3:
plt.bar(range(x.size), x)
plt.title(title)
plt.show()
return
# fallback: plot first 200 dims as line
plt.plot(x[:200])
plt.title(title + " (first 200 dims)")
plt.show()
return
elif x.ndim == 2:
plt.imshow(x, aspect='auto')
plt.title(title)
plt.show()
return
else:
print("Sample too high-dim to plot, printing summary:")
print("mean", x.mean(), "std", x.std())
def main():
p = argparse.ArgumentParser()
p.add_argument("--checkpoint", required=True, help="Path to saved checkpoint (.pt)")
p.add_argument("--data", required=True, help="CSV with features and optional label column")
p.add_argument("--label-col", default=None, help="Original label column name in CSV (if present)")
p.add_argument("--batch-size", type=int, default=256)
p.add_argument("--sample-index", type=int, default=0, help="Index of a sample to plot")
p.add_argument("--plot", action="store_true")
p.add_argument("--device", default="cpu")
args = p.parse_args()
device = args.device
# If your project has a known model class, replace model_builder with a lambda that instantiates it.
model_builder = None
# load checkpoint
model, meta = load_checkpoint(args.checkpoint, model_builder=model_builder, device=device)
# try to discover preprocess_meta and label_info
ckpt_dir = os.path.dirname(args.checkpoint)
preprocess_meta = None
meta_preprocess_path = os.path.join(ckpt_dir, meta.get("preprocess_meta", "")) if isinstance(meta, dict) else None
if meta_preprocess_path and os.path.exists(meta_preprocess_path):
try:
import numpy as _np
d = _np.load(meta_preprocess_path, allow_pickle=True)
preprocess_meta = {
"feature_columns": [str(x) for x in d["feature_columns"].tolist()],
"means": d["means"].astype(np.float32),
"stds": d["stds"].astype(np.float32),
}
print(f"Loaded preprocess meta from {meta_preprocess_path}")
except Exception:
preprocess_meta = None
# prefer label_col from CSV, otherwise load saved assignments if present
y_true = None
if args.label_col and args.label_col in df.columns:
y_true = df[args.label_col].values
else:
# check label_info from checkpoint dir
label_info_path = os.path.join(ckpt_dir, "label_info.json")
label_info = {}
if os.path.exists(label_info_path):
with open(label_info_path, "r") as f:
label_info = json.load(f)
elif isinstance(meta, dict) and "label_info" in meta:
label_info = meta["label_info"]
if "assignments" in label_info:
y_true = np.array(label_info["assignments"])
elif "assignments_file" in label_info:
try:
import numpy as _np
arr = _np.load(os.path.join(ckpt_dir, label_info["assignments_file"]))
y_true = arr["assignments"]
except Exception:
pass
# prepare features: if preprocess_meta is present use its feature_columns and scaling
if preprocess_meta is not None:
feature_cols = preprocess_meta["feature_columns"]
feature_df = df[feature_cols]
X = feature_df.values.astype(np.float32)
# apply scaling
means = preprocess_meta["means"]
stds = preprocess_meta["stds"]
stds[stds == 0] = 1.0
X = (X - means) / stds
else:
if args.label_col and args.label_col in df.columns:
feature_df = df.drop(columns=[args.label_col])
else:
feature_df = df.select_dtypes(include=[np.number])
X = feature_df.values.astype(np.float32)
# create DataLoader-like batching for inference
model.to(device)
model.eval()
preds = []
with torch.no_grad():
for i in range(0, X.shape[0], args.batch_size):
batch = torch.from_numpy(X[i:i+args.batch_size]).to(device)
out = model(batch) # adapt if your model returns (logits, ...)
if isinstance(out, (tuple, list)):
out = out[0]
probs = F.softmax(out, dim=1) if out.dim() == 2 else out
pred = probs.argmax(dim=1).cpu().numpy()
preds.append(pred)
preds = np.concatenate(preds, axis=0)
if y_true is not None:
acc = accuracy_score(y_true, preds)
print(f"Accuracy: {acc:.4f}")
print("Classification report:")
print(classification_report(y_true, preds, zero_division=0))
else:
print("Predictions computed but no true labels available to compute accuracy.")
print("First 20 predictions:", preds[:20])
if args.plot:
idx = args.sample_index
if idx < 0 or idx >= X.shape[0]:
print("sample-index out of range")
return
sample_x = X[idx]
true_label = y_true[idx] if y_true is not None else None
pred_label = preds[idx]
plot_sample(sample_x, true_label, pred_label)
if __name__ == "__main__":
main()