// agent

Private Spending Auditor Agent

by ozzo · Jun 09, 2026 Public

Choose how to run this agent

Requires an API key and an AgentOp account.

1 downloads
0 forks
0.0 rating

Description

Upload a bank or card statement CSV and get a full spending audit — automatic categorization, forgotten-subscription detection, hidden-fee hunting, charts, and AI savings recommendations with follow-up Q&A. 100% in-browser: your financial data never leaves your device.

Source Code

agent.py
import io
import json
import re

import numpy as np
import pandas as pd

# ── Tunables (template variables) ─────────────────────────────────────────
MAX_PROMPT_ITEMS = [[[MAX_PROMPT_ITEMS|8]]]

# ── Module state (persists for the session) ───────────────────────────────
_state = {"df": None, "filename": None, "analysis": None, "sign_note": ""}

_CATEGORY_RULES = [
    ("Income", ["salary", "payroll", "paycheck", "direct deposit", "dividend",
                "interest paid", "refund", "reimburs", "cashback", "maas", "maa\u015f"]),
    ("Fees & Charges", ["fee", "overdraft", "interest charge", "penalty",
                        "service charge", "maintenance charge", "commission",
                        "fx charge", "late charge", "atm charge"]),
    ("Subscriptions & Streaming", ["netflix", "spotify", "hulu", "disney",
        "youtube premium", "apple.com/bill", "icloud", "prime video", "hbo",
        "paramount", "audible", "patreon", "subscription", "membership",
        "adobe", "dropbox", "openai", "chatgpt", "notion", "canva", "gym",
        "fitness", "kindle"]),
    ("Groceries", ["grocery", "supermarket", "whole foods", "trader joe",
        "aldi", "lidl", "kroger", "safeway", "costco", "walmart", "tesco",
        "migros", "carrefour", "market"]),
    ("Dining & Delivery", ["restaurant", "cafe", "coffee", "starbucks",
        "mcdonald", "burger", "pizza", "doordash", "ubereats", "uber eats",
        "grubhub", "deliveroo", "yemeksepeti", "getir", "bakery", "diner",
        "kebab", "sushi"]),
    ("Transport", ["uber", "lyft", "taxi", "fuel", "shell", "chevron",
        "parking", "metro", "transit", "train", "toll", "gas station"]),
    ("Utilities & Bills", ["electric", "water bill", "gas bill", "internet",
        "comcast", "verizon", "t-mobile", "vodafone", "utility", "phone bill",
        "insurance", "rent", "mortgage", "council tax"]),
    ("Shopping", ["amazon", "ebay", "etsy", "target", "best buy", "ikea",
        "zara", "h&m", "nike", "decathlon", "store", "shop"]),
    ("Health", ["pharmacy", "cvs", "walgreens", "doctor", "dental", "clinic",
        "hospital", "medical", "optician"]),
    ("Entertainment", ["cinema", "movie", "theater", "concert", "ticket",
        "steam", "playstation", "xbox", "nintendo", "game"]),
    ("Travel", ["airline", "airbnb", "hotel", "booking.com", "expedia",
        "flight", "hostel", "ryanair", "easyjet"]),
    ("Transfers & Cash", ["transfer", "zelle", "venmo", "paypal", "wise",
        "revolut", "withdrawal", "atm "]),
]


def _categorize(description: str) -> str:
    d = " " + str(description).lower() + " "
    for cat, keywords in _CATEGORY_RULES:
        for kw in keywords:
            if kw in d:
                return cat
    return "Other"


def _to_number(series: pd.Series) -> pd.Series:
    s = series.astype(str).str.strip()
    s = s.str.replace(r"\((.+)\)", r"-\1", regex=True)
    s = s.str.replace(r"[^\d\-.,]", "", regex=True)

    def conv(x):
        if not isinstance(x, str) or x in ("", "-", ".", ","):
            return np.nan
        if "," in x and "." in x:
            if x.rfind(".") > x.rfind(","):
                x = x.replace(",", "")
            else:
                x = x.replace(".", "").replace(",", ".")
        elif "," in x:
            head, _, tail = x.rpartition(",")
            if len(tail) == 2:
                x = head.replace(",", "") + "." + tail
            else:
                x = x.replace(",", "")
        try:
            return float(x)
        except ValueError:
            return np.nan

    return s.map(conv)


def _parse_dates(series: pd.Series) -> pd.Series:
    import warnings
    warnings.filterwarnings("ignore", message="Could not infer format")
    parsed = pd.to_datetime(series, errors="coerce", dayfirst=False)
    if parsed.isna().mean() > 0.3:
        retry = pd.to_datetime(series, errors="coerce", dayfirst=True)
        if retry.isna().mean() < parsed.isna().mean():
            parsed = retry
    return parsed


def _pick_columns(df: pd.DataFrame) -> dict:
    cols = {c: str(c).strip().lower() for c in df.columns}
    date_col, desc_col, amount_col = None, None, None
    debit_col, credit_col = None, None

    for c, name in cols.items():
        if debit_col is None and any(k in name for k in ("debit", "withdraw", "money out", "paid out", "borc", "bor\u00e7")):
            debit_col = c
        if credit_col is None and any(k in name for k in ("credit", "deposit", "money in", "paid in", "alacak", "yatan")):
            credit_col = c

    best_ratio = 0.0
    for c in df.columns:
        if pd.api.types.is_numeric_dtype(df[c]):
            continue
        ratio = _parse_dates(df[c].astype(str)).notna().mean()
        if ratio > best_ratio and ratio > 0.5:
            best_ratio, date_col = ratio, c

    for c, name in cols.items():
        if amount_col is None and name in ("amount", "amt", "value", "tutar", "transaction amount", "betrag"):
            amount_col = c
    if amount_col is None and not (debit_col is not None and credit_col is not None):
        best_num = 0.0
        for c in df.columns:
            if c == date_col:
                continue
            ratio = _to_number(df[c]).notna().mean()
            if ratio > best_num and ratio > 0.6:
                best_num, amount_col = ratio, c

    best_len = 0.0
    for c in df.columns:
        if c in (date_col, amount_col, debit_col, credit_col):
            continue
        if pd.api.types.is_numeric_dtype(df[c]):
            continue
        avg_len = df[c].astype(str).str.len().mean()
        if avg_len > best_len:
            best_len, desc_col = avg_len, c

    return {"date": date_col, "desc": desc_col, "amount": amount_col,
            "debit": debit_col, "credit": credit_col}


def load_csv_data(csv_text: str, filename: str) -> str:
    """Parse a bank statement CSV into normalized transactions.

    Called from JavaScript only (this name is excluded from LLM tools).

    Args:
        csv_text: Raw CSV file contents.
        filename: Original file name, for display.

    Returns:
        JSON string with ok flag, row count and detected columns, or an error.
    """
    try:
        raw = pd.read_csv(io.StringIO(csv_text), sep=None, engine="python")
    except Exception as exc:
        return json.dumps({"ok": False, "error": "Could not parse CSV: " + str(exc)})
    if raw.empty or len(raw.columns) < 2:
        return json.dumps({"ok": False, "error": "The file has no usable rows or columns."})

    picks = _pick_columns(raw)
    if picks["date"] is None or picks["desc"] is None:
        return json.dumps({"ok": False, "error":
            "Could not detect date and description columns. "
            "Export a standard CSV from your bank and try again."})

    df = pd.DataFrame()
    df["date"] = _parse_dates(raw[picks["date"]])
    df["description"] = raw[picks["desc"]].astype(str).str.strip()

    sign_note = ""
    if picks["debit"] is not None and picks["credit"] is not None:
        debit = _to_number(raw[picks["debit"]]).fillna(0.0).abs()
        credit = _to_number(raw[picks["credit"]]).fillna(0.0).abs()
        df["spend"] = debit
        df["income"] = credit
        sign_note = "Separate debit/credit columns detected."
    elif picks["amount"] is not None:
        amount = _to_number(raw[picks["amount"]])
        df = df[amount.notna()].copy()
        amount = amount.dropna()
        if (amount < 0).any():
            df["spend"] = np.where(amount < 0, -amount, 0.0)
            df["income"] = np.where(amount > 0, amount, 0.0)
            sign_note = "Negative amounts treated as spending."
        else:
            cats = df["description"].map(_categorize)
            df["spend"] = np.where(cats != "Income", amount, 0.0)
            df["income"] = np.where(cats == "Income", amount, 0.0)
            sign_note = "All amounts positive: rows matching income keywords treated as income."
    else:
        return json.dumps({"ok": False, "error": "Could not detect an amount column."})

    df = df[df["date"].notna() & (df["description"] != "")].copy()
    if df.empty:
        return json.dumps({"ok": False, "error": "No valid transactions found after parsing."})

    df["category"] = df["description"].map(_categorize)
    df = df.sort_values("date").reset_index(drop=True)

    _state["df"] = df
    _state["filename"] = filename
    _state["sign_note"] = sign_note
    _state["analysis"] = _compute_analysis(df)

    return json.dumps({"ok": True, "rows": int(len(df)), "note": sign_note,
                       "columns": {k: str(v) for k, v in picks.items() if v is not None}})


def _merchant_key(description: str) -> str:
    d = re.sub(r"[0-9#*/]+", " ", str(description).lower())
    d = re.sub(r"[^a-z&. ]", " ", d)
    tokens = [t for t in d.split() if len(t) > 2][:3]
    return " ".join(tokens)


def _detect_recurring(df: pd.DataFrame) -> list:
    spend = df[df["spend"] > 0]
    found = []
    for key, grp in spend.groupby(spend["description"].map(_merchant_key)):
        if not key or len(grp) < 2:
            continue
        amounts = grp["spend"].to_numpy()
        mean_amt = float(amounts.mean())
        if mean_amt <= 0:
            continue
        cv = float(amounts.std() / mean_amt) if len(amounts) > 1 else 0.0
        dates = grp["date"].sort_values().to_numpy()
        gaps = np.diff(dates).astype("timedelta64[D]").astype(int) if len(dates) > 1 else []
        median_gap = float(np.median(gaps)) if len(gaps) else 0.0
        if 25 <= median_gap <= 35:
            cadence, monthly = "monthly", mean_amt
        elif 5 <= median_gap <= 9:
            cadence, monthly = "weekly", mean_amt * 4.33
        elif 350 <= median_gap <= 380:
            cadence, monthly = "annual", mean_amt / 12.0
        else:
            cadence, monthly = "irregular", 0.0
        identical = bool(np.allclose(amounts, amounts[0]))
        strong = (len(grp) >= 3 and cv < 0.25 and cadence != "irregular") or \
                 (len(grp) >= 2 and identical and cadence != "irregular")
        if strong:
            found.append({
                "merchant": grp["description"].iloc[-1][:48],
                "count": int(len(grp)),
                "cadence": cadence,
                "avg_amount": round(mean_amt, 2),
                "monthly_cost": round(monthly, 2),
                "annual_cost": round(monthly * 12.0, 2),
                "last_charged": str(pd.Timestamp(dates[-1]).date()),
            })
    found.sort(key=lambda r: -r["monthly_cost"])
    return found


def _compute_analysis(df: pd.DataFrame) -> dict:
    by_cat = (df.groupby("category")["spend"].sum()
                .sort_values(ascending=False))
    by_cat = by_cat[by_cat > 0]
    fees = df[(df["category"] == "Fees & Charges") & (df["spend"] > 0)]
    fee_rows = [{"date": str(r["date"].date()), "description": r["description"][:60],
                 "amount": round(float(r["spend"]), 2)} for _, r in fees.iterrows()]
    top_merchants = (df[df["spend"] > 0].groupby(df["description"].str[:40])["spend"]
                     .sum().sort_values(ascending=False).head(10))
    return {
        "filename": _state.get("filename") or "",
        "date_from": str(df["date"].min().date()),
        "date_to": str(df["date"].max().date()),
        "transactions": int(len(df)),
        "total_spend": round(float(df["spend"].sum()), 2),
        "total_income": round(float(df["income"].sum()), 2),
        "by_category": {k: round(float(v), 2) for k, v in by_cat.items()},
        "subscriptions": _detect_recurring(df),
        "fees": fee_rows,
        "fees_total": round(float(fees["spend"].sum()), 2),
        "top_merchants": {k: round(float(v), 2) for k, v in top_merchants.items()},
        "sign_note": _state.get("sign_note") or "",
    }


def _no_data() -> str:
    return json.dumps({"error": "No statement loaded yet. Ask the user to upload a CSV first."})


def _full_analysis_json() -> str:
    if _state["analysis"] is None:
        return _no_data()
    return json.dumps(_state["analysis"])


def _audit_context_json() -> str:
    a = _state["analysis"]
    if a is None:
        return _no_data()
    compact = {
        "period": a["date_from"] + " to " + a["date_to"],
        "total_spend": a["total_spend"],
        "total_income": a["total_income"],
        "top_categories": dict(list(a["by_category"].items())[:MAX_PROMPT_ITEMS]),
        "subscriptions": a["subscriptions"][:MAX_PROMPT_ITEMS],
        "fees_total": a["fees_total"],
        "fees_count": len(a["fees"]),
        "top_merchants": dict(list(a["top_merchants"].items())[:5]),
    }
    return json.dumps(compact)


def _fig_to_img() -> str:
    import base64
    import matplotlib.pyplot as plt
    buf = io.BytesIO()
    plt.savefig(buf, format="png", dpi=110, bbox_inches="tight")
    plt.close("all")
    b64 = base64.b64encode(buf.getvalue()).decode("ascii")
    return '<img alt="chart" style="max-width:100%" src="data:image/png;base64,' + b64 + '" />'


def _chart_category_breakdown() -> str:
    if _state["analysis"] is None:
        return ""
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    cats = dict(list(_state["analysis"]["by_category"].items())[:8])
    if not cats:
        return ""
    labels = list(cats.keys())[::-1]
    values = list(cats.values())[::-1]
    plt.figure(figsize=(6.4, 3.6))
    plt.barh(labels, values, color="#1E3A8A")
    plt.title("Spending by category")
    plt.xlabel("Amount")
    return _fig_to_img()


def _chart_monthly_trend() -> str:
    df = _state["df"]
    if df is None:
        return ""
    import matplotlib
    matplotlib.use("Agg")
    import matplotlib.pyplot as plt
    span_days = (df["date"].max() - df["date"].min()).days
    freq = "W" if span_days < 35 else "MS"
    series = df.set_index("date")["spend"].resample(freq).sum()
    labels = [d.strftime("%d %b" if freq == "W" else "%b %Y") for d in series.index]
    plt.figure(figsize=(6.4, 3.2))
    plt.bar(labels, series.to_numpy(), color="#CA8A04")
    plt.title("Spending over time" + (" (weekly)" if freq == "W" else " (monthly)"))
    plt.xticks(rotation=45, ha="right")
    return _fig_to_img()


# ── LLM tools ──────────────────────────────────────────────────────────────
def get_spending_summary() -> str:
    """Get the spending summary: period, totals and amount per category.

    Args:

    Returns:
        JSON string with period, total_spend, total_income and by_category.
    """
    a = _state["analysis"]
    if a is None:
        return _no_data()
    return json.dumps({
        "period": a["date_from"] + " to " + a["date_to"],
        "transactions": a["transactions"],
        "total_spend": a["total_spend"],
        "total_income": a["total_income"],
        "by_category": a["by_category"],
    })


def get_subscriptions() -> str:
    """List detected recurring charges and subscriptions with their costs.

    Args:

    Returns:
        JSON string with a list of merchant, cadence, monthly_cost, annual_cost.
    """
    a = _state["analysis"]
    if a is None:
        return _no_data()
    return json.dumps({"subscriptions": a["subscriptions"],
                       "monthly_total": round(sum(s["monthly_cost"] for s in a["subscriptions"]), 2)})


def get_fees() -> str:
    """List bank fees, penalty and interest charges found in the statement.

    Args:

    Returns:
        JSON string with fee transactions and the fees_total.
    """
    a = _state["analysis"]
    if a is None:
        return _no_data()
    return json.dumps({"fees": a["fees"], "fees_total": a["fees_total"]})


def search_transactions(keyword: str) -> str:
    """Search transactions whose description contains a keyword.

    Args:
        keyword: Merchant name or word to search for, e.g. 'uber' or 'netflix'.

    Returns:
        JSON string with matching transactions, their count and total spend.
    """
    df = _state["df"]
    if df is None:
        return _no_data()
    kw = str(keyword).strip().lower()
    if not kw:
        return json.dumps({"error": "Empty keyword."})
    hits = df[df["description"].str.lower().str.contains(re.escape(kw), na=False)]
    rows = [{"date": str(r["date"].date()), "description": r["description"][:60],
             "spend": round(float(r["spend"]), 2), "income": round(float(r["income"]), 2)}
            for _, r in hits.head(25).iterrows()]
    return json.dumps({"keyword": kw, "matches": int(len(hits)),
                       "total_spend": round(float(hits["spend"].sum()), 2), "transactions": rows})

More by ozzo

ATS Resume Optimizer Agent

The ATS Resume Optimizer Agent helps you tailor your resume to specific job postings and stand out …

Contract Plain-Language Explainer Agent

Contracts are written by lawyers, for lawyers — but you’re the one signing them. The Contract Plain…

Data Analysis Agent

The Data Analysis Agent is a powerful, browser-based AI agent built on AgentOp that lets you explor…