RealEstateAI/app.py at main · FritzLD/RealEstateAI · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
"""
RealEstateAI – Streamlit Application
Run: streamlit run app.py

Tab 1 – Market Dashboard  : live KPIs + key charts
Tab 2 – AI Agent Chat     : conversational real estate analyst
Tab 3 – Forecasts & Analysis : 12-month outlook, refi windows, model evaluation
"""

from __future__ import annotations

import sys
from pathlib import Path

# Make src/ importable when running from project root
sys.path.insert(0, str(Path(__file__).parent))

import streamlit as st

from src import config
from src.data_loader import RealEstateDataLoader, load_reference_documents
from src.forecasting import MarketForecaster, load_saved_forecasts
from src.knowledge_base import KnowledgeBase
from src.llm_chain import RealEstateChain
from src.refi_analysis import RefiAnalyzer
from src.retriever import RealEstateRetriever
from src.visualizations import RealEstateVisualizer
from src.rate_service import build_pmms_context

# ── Page config ───────────────────────────────────────────────────────────────

st.set_page_config(
    page_title=config.APP_TITLE,
    page_icon=config.APP_ICON,
    layout="wide",
    initial_sidebar_state="expanded",
)

st.markdown(
    """
    <style>
    /* Larger tab labels and emoji icons */
    .stTabs [data-baseweb="tab-list"] button p {
        font-size: 1.15rem;
        font-weight: 500;
    }
    .stTabs [data-baseweb="tab"] {
        padding-top: 10px;
        padding-bottom: 10px;
        padding-left: 20px;
        padding-right: 20px;
    }
    </style>
    """,
    unsafe_allow_html=True,
)

# ── System initialisation (cached) ───────────────────────────────────────────

@st.cache_resource(show_spinner="Loading market data and building knowledge base…")
def build_system(api_key: str, model_name: str) -> dict:
    """One-time initialisation: load data, build vector store, wire agent."""
    loader     = RealEstateDataLoader()
    forecaster = MarketForecaster(loader.df, loader.exog_cols)
    refi       = RefiAnalyzer(loader.df)
    visualizer = RealEstateVisualizer(loader.df)
    summary    = loader.get_market_summary()

    # RAG knowledge base
    kb          = KnowledgeBase(api_key=api_key)
    market_docs = loader.generate_knowledge_documents()
    ref_docs    = load_reference_documents()
    vector_store = kb.get_or_create(market_docs + ref_docs)
    base_retriever = kb.get_retriever(k=config.TOP_K_RETRIEVAL)

    # Hybrid retriever
    hybrid_retriever = RealEstateRetriever(
        vector_retriever=base_retriever,
        loader=loader,
        forecaster=forecaster,
    )

    # LLM chain
    chain = RealEstateChain(
        api_key=api_key,
        model_name=model_name,
        retriever=hybrid_retriever,
    )

    return {
        "loader":     loader,
        "forecaster": forecaster,
        "refi":       refi,
        "visualizer": visualizer,
        "chain":      chain,
        "summary":    summary,
    }


# ── Sidebar ───────────────────────────────────────────────────────────────────

def render_sidebar() -> str:
    # ── Branding photo ────────────────────────────────────────────────────────
    photo_path = Path(__file__).parent / "assets" / "profile.jpg"
    if not photo_path.exists():
        # Also accept .png
        photo_path = Path(__file__).parent / "assets" / "profile.png"
    if photo_path.exists():
        st.sidebar.image(str(photo_path), use_column_width=True)

    st.sidebar.title(f"{config.APP_ICON} RealEstateAI")
    st.sidebar.markdown("**Dayton MSA Market Intelligence**")

    # ── Contact card ──────────────────────────────────────────────────────────
    st.sidebar.markdown(
        """
**Frederick Duff MBA**
Senior Mortgage Banker / Data Scientist

📧 [FDuff@QueenCitymortgage.net](mailto:FDuff@QueenCitymortgage.net)
📞 (513) 445-9811 &nbsp;|&nbsp; (502) 345-0682
🪪 NMLS 835831
📍 Licensed in Ohio, Kentucky & Florida
🌐 [Apply Online](https://www.pre-qualifymymortgage.com)
    Queen City Mortgage is a Equal Housing Lender.
        """
    )
    st.sidebar.divider()

    model = st.sidebar.selectbox(
        "Model",
        ["gpt-4o-mini", "gpt-4o", "gpt-4-turbo"],
        index=0,
    )

    if "system" in st.session_state:
        s = st.session_state.system["summary"]
        st.sidebar.divider()
        st.sidebar.markdown("**Market Snapshot**")
        st.sidebar.metric("Data Through",    s["data_through"])
        st.sidebar.metric("Active Listings", f"{s['current_active']:,}")
        st.sidebar.metric("Monthly Sales",   f"{s['current_sales']:,}")
        st.sidebar.metric("Disparity %",     f"{s['disparity_pct']}%")
        st.sidebar.metric("30-Yr Rate",      f"{s['current_rate']}%")
        st.sidebar.metric("YoY Sales Δ",     f"{s['yoy_sales_chg']:+}%")

    return model


# ── Tab 1: Market Dashboard ───────────────────────────────────────────────────

def render_dashboard(sys: dict) -> None:
    st.header("Market Dashboard")

    s = sys["summary"]
    c1, c2, c3, c4, c5 = st.columns(5)
    c1.metric("Active Listings",    f"{s['current_active']:,}",  f"{s['yoy_active_chg']:+}% YoY")
    c2.metric("Monthly Sales",      f"{s['current_sales']:,}",   f"{s['yoy_sales_chg']:+}% YoY")
    c3.metric(
        "Market Disparity %",
        f"{s['disparity_pct']}%",
        delta=f"{s['disparity_pct'] - s['balanced_threshold_pct']:+.1f}% vs hist. mean ({s['balanced_threshold_pct']}%)",
        delta_color="off",
    )
    c4.metric("30-Yr Mortgage Rate",f"{s['current_rate']}%")
    c5.metric("12-Mo Avg Sales",    f"{s['avg_sales_12mo']:,.0f}")

    # ── Market Disparity definition ───────────────────────────────────────────
    condition     = s["market_condition"]
    current_pct   = s["disparity_pct"]
    mean_pct      = s["balanced_threshold_pct"]
    diff          = current_pct - mean_pct
    condition_note = (
        f"Currently **{abs(diff):.1f}% below** the historical mean — trending toward a **Seller's Market**."
        if diff < -2 else
        f"Currently **{abs(diff):.1f}% above** the historical mean — trending toward a **Buyer's Market**."
        if diff > 2 else
        "Currently **near the historical mean** — market is approaching **Balanced** conditions."
    )
    st.info(
        f"**📊 What is Market Disparity %?**  "
        f"Market Disparity % = (Active Listings − Monthly Sales) ÷ Active Listings. "
        f"It measures how much unsold inventory exists relative to total listings. "
        f"A **lower %** means sales are absorbing more of the available supply — a **Seller's Market**. "
        f"A **higher %** means inventory is building faster than it sells — a **Buyer's Market**. "
        f"The Dayton MSA historical mean is **{mean_pct}%**, which represents a **Balanced Market** for this area. "
        f"Current reading: **{current_pct}% ({condition})**. {condition_note}"
    )

    viz = sys["visualizer"]
    st.plotly_chart(viz.active_vs_sales_trend(),    use_container_width=True)

    col_left, col_right = st.columns(2)
    with col_left:
        st.plotly_chart(viz.disparity_chart(),          use_container_width=True)
    with col_right:
        st.plotly_chart(viz.mortgage_rate_history(),    use_container_width=True)

    st.plotly_chart(viz.yoy_sales_comparison(),         use_container_width=True)
    st.plotly_chart(viz.economic_indicators_panel(),    use_container_width=True)
    st.plotly_chart(viz.correlation_heatmap(),          use_container_width=True)


# ── Tab 2: AI Agent Chat ──────────────────────────────────────────────────────

def render_chat(sys: dict) -> None:
    st.header("AI Real Estate Analyst")
    st.caption(
        "Ask about the Dayton MSA market, forecasts, mortgage rate trends , refinancing "
        "opportunities, seasonal patterns, or anything in the data."
    )

    chain = sys["chain"]

    # Initialise chat history
    if "messages" not in st.session_state:
        st.session_state.messages = []

    # Replay conversation
    for msg in st.session_state.messages:
        with st.chat_message(msg["role"]):
            st.markdown(msg["content"])

    # Input
    if prompt := st.chat_input("Ask a market question…"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            with st.spinner("Analysing…"):
                live_rate_context = build_pmms_context()

                answer = chain.ask(
                    prompt,
                    session_id="streamlit",
                    live_rate_context=live_rate_context,
                )

            st.markdown(answer)

        st.session_state.messages.append({"role": "assistant", "content": answer})

    # Clear button
    if st.session_state.messages:
        if st.button("Clear conversation", key="clear_chat"):
            chain.clear_history("streamlit")
            st.session_state.messages = []
            st.rerun()


# ── Tab 3: Forecasts & Analysis ───────────────────────────────────────────────

@st.cache_resource(show_spinner="Loading forecast data…")
def run_forecasts(_forecaster: MarketForecaster, _refi: RefiAnalyzer):
    """
    Primary path: load pre-computed forecasts saved by run_monthly_forecast.py.
    Fallback: fit SARIMAX + Prophet live if no saved file exists yet.
    Returns (saved_dict_or_None, fc_sales, ci_sales, fc_active, ci_active, refi_df).
    """
    refi_df = _refi.find_refi_windows()

    # ── Try pre-computed first (instant) ──────────────────────────────────────
    saved = load_saved_forecasts()
    if saved:
        return saved, None, None, None, None, refi_df

    # ── Fallback: fit SARIMAX live (no neural nets on cloud) ──────────────────
    fc_sales,  ci_sales  = _forecaster.forecast_sarimax_sales()
    fc_active, ci_active = _forecaster.forecast_sarimax_active()
    return None, fc_sales, ci_sales, fc_active, ci_active, refi_df


def render_forecasts(sys: dict) -> None:
    st.header("Forecasts & Analysis")

    forecaster = sys["forecaster"]
    refi       = sys["refi"]
    viz        = sys["visualizer"]

    with st.spinner("Loading forecasts…"):
        saved, fc_sales, ci_sales, fc_active, ci_active, refi_df = run_forecasts(
            forecaster, refi
        )

    if saved:
        # ── Pre-computed path: all 6 models available ─────────────────────────
        st.success(
            f"📂 Forecasts loaded from pre-computed file "
            f"(generated {saved.get('generated_at','')[:10]}, "
            f"data through **{saved.get('data_through','')}**)"
        )

        # ── Best model callout ─────────────────────────────────────────────────
        best       = saved.get("best_model", "")
        mse_scores = saved.get("mse_scores", {})
        if best and mse_scores:
            ca, cb, cc = st.columns(3)
            ca.metric("🏆 Best Forecast Model", best)
            cb.metric("Best Model Test MSE",    f"{mse_scores.get(best, 0):,.0f}")
            cc.metric("Models Compared",        str(len(mse_scores)))

        # ── Model selector ─────────────────────────────────────────────────────
        sales_model_names = [k for k in saved.get("models", {}) if k != "SARIMAX_Active"]
        view_options      = ["Best Model (auto)", "All Models"] + sales_model_names
        selected_view     = st.selectbox("Select forecast view:", view_options, index=0)

        if selected_view == "Best Model (auto)":
            chart_model = best if best else "All Models"
        elif selected_view == "All Models":
            chart_model = "All Models"
        else:
            chart_model = selected_view

        st.plotly_chart(
            viz.all_models_forecast_chart(saved, selected_model=chart_model),
            use_container_width=True,
        )

        col1, col2 = st.columns(2)
        with col1:
            if "SARIMAX" in saved["models"]:
                import pandas as pd
                m = saved["models"]["SARIMAX"]
                fc  = pd.Series(m["forecast"], index=pd.to_datetime(m["dates"]), name="forecast")
                ci  = pd.DataFrame({"lower": m["lower"], "upper": m["upper"]},
                                   index=pd.to_datetime(m["dates"]))
                st.plotly_chart(viz.sales_forecast_chart(fc, ci), use_container_width=True)
        with col2:
            if "SARIMAX_Active" in saved["models"]:
                import pandas as pd
                m = saved["models"]["SARIMAX_Active"]
                fc  = pd.Series(m["forecast"], index=pd.to_datetime(m["dates"]), name="forecast")
                ci  = pd.DataFrame({"lower": m["lower"], "upper": m["upper"]},
                                   index=pd.to_datetime(m["dates"]))
                st.plotly_chart(viz.active_forecast_chart(fc, ci), use_container_width=True)

        if saved.get("mse_scores"):
            st.plotly_chart(
                viz.model_comparison_chart(saved["mse_scores"]), use_container_width=True
            )
            best = saved.get("best_model", "")
            if best:
                st.caption(
                    f"✅ **Best model: {best}** "
                    f"(MSE {saved['mse_scores'].get(best, 0):,.0f} on held-out 12-month test set). "
                    "MSE computed locally — not on Streamlit Cloud."
                )

    else:
        # ── Live fallback: SARIMAX only ───────────────────────────────────────
        st.info(
            "⚡ No pre-computed forecasts found. Showing live SARIMAX only. "
            "Run `python scripts/run_monthly_forecast.py` locally to enable all 6 models."
        )
        col1, col2 = st.columns(2)
        with col1:
            st.plotly_chart(viz.sales_forecast_chart(fc_sales, ci_sales), use_container_width=True)
        with col2:
            st.plotly_chart(viz.active_forecast_chart(fc_active, ci_active), use_container_width=True)

        st.subheader("Model Accuracy Comparison")
        st.caption("Compares SARIMAX vs Prophet on a held-out 12-month test set.")
        if st.button("▶ Run Model Comparison", key="run_eval"):
            with st.spinner("Evaluating models…"):
                mse_dict = forecaster.evaluate_models("Sales")
            if mse_dict:
                st.plotly_chart(viz.model_comparison_chart(mse_dict), use_container_width=True)

    # Refi analysis
    st.subheader("Refinancing Opportunity Analysis")
    threshold = st.slider(
        "Rate threshold above current rate (pp)",
        min_value=0.25, max_value=2.0, value=config.REFI_THRESHOLD_PP, step=0.25,
    )
    refi_df_custom = refi.find_refi_windows(threshold_pp=threshold)
    st.plotly_chart(
        viz.refi_opportunity_windows(refi_df_custom, threshold_pp=threshold),
        use_container_width=True,
    )

    if not refi_df_custom.empty:
        st.dataframe(
            refi_df_custom.rename(columns={
                "date": "Month", "rate": "Rate (%)", "excess_pp": "Excess (pp)",
                **{f"savings_{int(l/1000)}k": f"${l:,} Loan Savings/Mo"
                   for l in config.REFI_LOAN_AMOUNTS},
            }).style.format({
                "Rate (%)": "{:.2f}",
                "Excess (pp)": "{:.2f}",
                **{f"${l:,} Loan Savings/Mo": "${:,.0f}" for l in config.REFI_LOAN_AMOUNTS},
            }),
            use_container_width=True,
        )

    # Narrative summary from agent
    st.subheader("Refi Summary")
    st.info(refi.generate_refi_summary(threshold_pp=threshold))


# ── Main ──────────────────────────────────────────────────────────────────────

def main() -> None:
    model = render_sidebar()

    # Resolve API key from environment / Streamlit secrets (never from user input)
    api_key = config.OPENAI_API_KEY

    if not api_key:
        st.error(
            "⚠️ No OpenAI API key found.  "
            "Add `OPENAI_API_KEY` to your `.env` file (local) or "
            "Streamlit Cloud **Secrets** (deployed)."
        )
        st.stop()

    # Auto-initialise once per session
    if "system" not in st.session_state:
        try:
            st.session_state.system = build_system(api_key, model)
        except Exception as e:
            st.error(f"Initialisation failed: {e}")
            st.stop()

    system = st.session_state.system

    tab1, tab2, tab3 = st.tabs(["📊 Market Dashboard", "🤖 AI Analyst Chat", "📈 Forecasts & Analysis"])
    with tab1:
        render_dashboard(system)
    with tab2:
        render_chat(system)
    with tab3:
        render_forecasts(system)


if __name__ == "__main__":
    main()