import json
import pandas as pd
from datetime import date

raw = json.load(open("data/market/tsla_raw.json"))
info = raw.get("info", {})

# yfinance导出的dict结构转DataFrame（列是日期）
def dict_to_df(d):
    if not d:
        return pd.DataFrame()
    df = pd.DataFrame(d)
    # 统一索引为指标名、列为日期
    if isinstance(df.index, pd.RangeIndex):
        pass
    return df

qf = pd.DataFrame(raw.get("quarterly_financials", {}))
qb = pd.DataFrame(raw.get("quarterly_balance_sheet", {}))
qc = pd.DataFrame(raw.get("quarterly_cashflow", {}))

# 某些版本是“行=指标，列=日期”；某些反过来，做兼容
def normalize(df):
    if df.empty:
        return df
    # 如果列看起来像指标名，就转置
    sample_cols = [str(c).lower() for c in df.columns[:5]]
    metric_hints = ["total revenue","operating income","gross profit","cash","inventory","free cash flow"]
    if any(any(h in c for h in metric_hints) for c in sample_cols):
        return df.T
    return df

qf = normalize(qf)
qb = normalize(qb)
qc = normalize(qc)

# 统一日期列排序（最近在前）
def sort_cols(df):
    if df.empty: return df
    cols = []
    for c in df.columns:
        try:
            cols.append(pd.to_datetime(str(c)))
        except:
            cols.append(pd.NaT)
    order = sorted(range(len(cols)), key=lambda i: (pd.isna(cols[i]), cols[i]), reverse=True)
    return df.iloc[:, order]

qf = sort_cols(qf)
qb = sort_cols(qb)
qc = sort_cols(qc)

def pick(df, candidates):
    if df.empty: return pd.Series(dtype="float64")
    low_idx = {str(i).lower(): i for i in df.index}
    for c in candidates:
        for k,v in low_idx.items():
            if c in k:
                s = pd.to_numeric(df.loc[v], errors="coerce")
                s.name = v
                return s
    return pd.Series(dtype="float64")

rev = pick(qf, ["total revenue","revenue"])
gross = pick(qf, ["gross profit"])
op_income = pick(qf, ["operating income"])
net_income = pick(qf, ["net income"])
ocf = pick(qc, ["operating cash flow","cash from operations"])
fcf = pick(qc, ["free cash flow"])
cash = pick(qb, ["cash and cash equivalents","cash cash equivalents and short term investments","cash"])
inv = pick(qb, ["inventory"])

def to_df(name, s):
    if s.empty: return pd.DataFrame(columns=["Quarter", name])
    out = pd.DataFrame({"Quarter":[str(i)[:10] for i in s.index], name:s.values})
    return out.head(8)

# 组装近8季度表
quarters = []
for s in [rev,gross,op_income,net_income,ocf,fcf,cash,inv]:
    if not s.empty:
        quarters = [str(i)[:10] for i in s.index][:8]
        break

tbl = pd.DataFrame({"Quarter": quarters})
def add_metric(tbl, label, s):
    if s.empty:
        tbl[label] = None
    else:
        mp = {str(i)[:10]:v for i,v in s.items()}
        tbl[label] = tbl["Quarter"].map(mp)
    return tbl

tbl = add_metric(tbl, "Revenue", rev)
tbl = add_metric(tbl, "GrossProfit", gross)
tbl = add_metric(tbl, "OperatingIncome", op_income)
tbl = add_metric(tbl, "NetIncome", net_income)
tbl = add_metric(tbl, "OperatingCashFlow", ocf)
tbl = add_metric(tbl, "FreeCashFlow", fcf)
tbl = add_metric(tbl, "Cash", cash)
tbl = add_metric(tbl, "Inventory", inv)

# 计算QoQ/YoY（基于Revenue/FCF示例）
def pct(a,b):
    if pd.isna(a) or pd.isna(b) or b==0: return None
    return (a-b)/b

tbl["Revenue_QoQ"] = tbl["Revenue"].astype("float64").pct_change(-1)
tbl["Revenue_YoY"] = tbl["Revenue"].astype("float64").pct_change(-4)
tbl["FCF_QoQ"] = tbl["FreeCashFlow"].astype("float64").pct_change(-1)
tbl["FCF_YoY"] = tbl["FreeCashFlow"].astype("float64").pct_change(-4)

def fmt(x):
    if pd.isna(x): return "N/A"
    if abs(x) < 1 and isinstance(x,float): return f"{x:.2%}"
    try:
        return f"{x:,.0f}"
    except:
        return str(x)

# 输出markdown
lines = []
lines.append(f"# TSLA 完整版数据调研报告\\n")
lines.append(f"- 日期：{date.today()}\\n- 说明：自动拉取公开数据生成（非投资建议）\\n")
lines.append("## 1) 当前估值与行情快照")
for k,label in [
    ("marketCap","市值"),
    ("forwardPE","Forward PE"),
    ("trailingPE","Trailing PE"),
    ("priceToSalesTrailing12Months","PS(TTM)"),
    ("beta","Beta"),
]:
    v = info.get(k, "N/A")
    lines.append(f"- {label}: {v}")
lines.append("")

lines.append("## 2) 近8季度关键财务（自动抽取）")
if tbl.empty:
    lines.append("- 未抽取到季度表，请检查 `data/market/tsla_raw.json`")
else:
    md_tbl = tbl.copy()
    for c in md_tbl.columns:
        md_tbl[c] = md_tbl[c].map(fmt)
    lines.append(md_tbl.to_markdown(index=False))
lines.append("")

lines.append("## 3) 关键解读")
lines.append("- 优先关注 Revenue 与 FCF 的 QoQ/YoY 是否同步改善。")
lines.append("- 若 Revenue 修复但 FCF 持续走弱，需警惕利润质量。")
lines.append("- 若 Inventory 持续攀升快于 Revenue，需关注需求与定价压力。")
lines.append("")

lines.append("## 4) 交易触发条件（可执行）")
lines.append("- 加仓触发：连续2个季度 Revenue_YoY 改善 + FCF 转强。")
lines.append("- 减仓触发：Revenue_YoY 走弱且 FCF 连续为负/恶化。")
lines.append("- 观察触发：估值扩张过快但基本面未同步验证。")
lines.append("")

lines.append("## 5) 数据源")
lines.append("- 原始数据：`data/market/tsla_raw.json`")
lines.append("- 生成脚本：`scripts/build_tsla_report_full.py`")

out = "05_reference/tsla-investment-research-full.md"
open(out,"w").write("\\n".join(lines))
print("wrote", out)
