From f9339aa039d912b380ecdd4d9d1a68a798d3b0ec Mon Sep 17 00:00:00 2001 From: ankitsinghh007 Date: Thu, 14 May 2026 01:05:44 +0530 Subject: [PATCH 1/2] level-6: Ankit Kumar Singh --- .../ankitsinghh007/level6/DASHBOARD_URL.txt | 1 + submissions/ankitsinghh007/level6/README.md | 108 ++++ submissions/ankitsinghh007/level6/app.py | 512 ++++++++++++++++++ .../level6/data/factory_capacity.csv | 9 + .../level6/data/factory_production.csv | 69 +++ .../level6/data/factory_workers.csv | 15 + submissions/ankitsinghh007/level6/env.example | 5 + .../ankitsinghh007/level6/requirements.txt | 7 + .../ankitsinghh007/level6/seed_graph.py | 462 ++++++++++++++++ 9 files changed, 1188 insertions(+) create mode 100644 submissions/ankitsinghh007/level6/DASHBOARD_URL.txt create mode 100644 submissions/ankitsinghh007/level6/README.md create mode 100644 submissions/ankitsinghh007/level6/app.py create mode 100644 submissions/ankitsinghh007/level6/data/factory_capacity.csv create mode 100644 submissions/ankitsinghh007/level6/data/factory_production.csv create mode 100644 submissions/ankitsinghh007/level6/data/factory_workers.csv create mode 100644 submissions/ankitsinghh007/level6/env.example create mode 100644 submissions/ankitsinghh007/level6/requirements.txt create mode 100644 submissions/ankitsinghh007/level6/seed_graph.py diff --git a/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt b/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt new file mode 100644 index 000000000..d9140dc5f --- /dev/null +++ b/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt @@ -0,0 +1 @@ +https://ankitsinghh007-factory-graph.streamlit.app diff --git a/submissions/ankitsinghh007/level6/README.md b/submissions/ankitsinghh007/level6/README.md new file mode 100644 index 000000000..4c20c85ba --- /dev/null +++ b/submissions/ankitsinghh007/level6/README.md @@ -0,0 +1,108 @@ +# Factory Knowledge Graph Dashboard — Level 6 + +**Author:** Ankit Kumar Singh · [ankitsinghh007](https://github.com/ankitsinghh007) +**Level:** 6 — LifeAtlas Contributor Program +**Deployed:** [DASHBOARD_URL.txt](DASHBOARD_URL.txt) + +--- + +## What this is + +A Neo4j knowledge graph + Streamlit dashboard built from real Swedish steel fabrication +factory data (8 projects, 9 stations, 13 workers, 8 weeks). + +**Dashboard pages:** +1. **Project Overview** — all 8 projects, planned vs actual hours, variance +2. **Station Load** — heatmap and bar charts, overruns highlighted red +3. **Capacity Tracker** — stacked bar with deficit/surplus by week +4. **Worker Coverage** — matrix + single-point-of-failure detection +5. **Forecast (Bonus C)** — week 9 linear extrapolation with confidence band +6. **Self-Test** — 6 automated checks, total score displayed + +--- + +## Setup + +### 1. Neo4j — create a free Aura instance + +1. Go to [neo4j.io/aura](https://neo4j.io/aura) +2. Create a free instance +3. Save the URI, username, and password + +### 2. Environment + +```bash +python -m venv venv +venv\Scripts\activate # Windows +pip install -r requirements.txt + +cp .env.example .env +# Edit .env with your Neo4j credentials +``` + +### 3. Copy data files + +```bash +mkdir data +# Copy the 3 CSV files into data/ +# data/factory_production.csv +# data/factory_workers.csv +# data/factory_capacity.csv +``` + +### 4. Seed the graph (run once) + +```bash +python seed_graph.py +# Output shows node/relationship counts +# Run again safely — uses MERGE everywhere (idempotent) + +python seed_graph.py --verify # check counts without re-seeding +``` + +### 5. Run the dashboard + +```bash +streamlit run app.py +# Opens at http://localhost:8501 +``` + +--- + +## Deploy to Streamlit Cloud + +1. Push this folder to a GitHub repo +2. Go to [share.streamlit.io](https://share.streamlit.io) +3. Connect repo → select `app.py` +4. **Settings → Secrets** → add (TOML format): + ```toml + NEO4J_URI = "neo4j+s://xxxxx.databases.neo4j.io" + NEO4J_USER = "neo4j" + NEO4J_PASSWORD = "your-password" + ``` +5. Deploy + +--- + +## Graph Schema + +**8 node labels:** Project, Product, Station, Worker, Week, Etapp, Certification, Capacity, Bottleneck + +**10 relationship types:** +- `(Project)-[:PRODUCES {quantity, unit_factor}]->(Product)` +- `(Project)-[:SCHEDULED_AT {week, planned_hours, actual_hours, variance_pct}]->(Station)` +- `(Project)-[:IN_ETAPP]->(Etapp)` +- `(Worker)-[:WORKS_AT {primary}]->(Station)` +- `(Worker)-[:CAN_COVER]->(Station)` +- `(Worker)-[:HAS_CERTIFICATION]->(Certification)` +- `(Week)-[:HAS_CAPACITY {deficit, total_capacity}]->(Capacity)` +- `(Product)-[:PROCESSED_AT]->(Station)` +- `(Bottleneck)-[:TRIGGERED_AT]->(Station)` + +--- + +## Security + +- `.env` is in `.gitignore` — credentials never in repo +- `seed_graph.py` uses `MERGE` throughout — idempotent, safe to re-run +- Streamlit secrets used for deployment — no hardcoded credentials diff --git a/submissions/ankitsinghh007/level6/app.py b/submissions/ankitsinghh007/level6/app.py new file mode 100644 index 000000000..0a0c49940 --- /dev/null +++ b/submissions/ankitsinghh007/level6/app.py @@ -0,0 +1,512 @@ +""" +app.py — Factory Knowledge Graph Dashboard +Author: Ankit Kumar Singh (ankitsinghh007) +Level 6 — LifeAtlas Contributor Program + +6 pages: Project Overview, Station Load, Capacity Tracker, + Worker Coverage, Forecast (Bonus C), Self-Test + +ALL data from Neo4j — no CSV reads in this file. + +Run: streamlit run app.py +""" + +import os, streamlit as st, pandas as pd, numpy as np +import plotly.express as px, plotly.graph_objects as go +from neo4j import GraphDatabase +from dotenv import load_dotenv + +load_dotenv() + +st.set_page_config(page_title="Factory Graph", page_icon="🏭", layout="wide") + +st.markdown(""" + +""", unsafe_allow_html=True) + + +# ── Neo4j connection ────────────────────────────────────────── + +@st.cache_resource +def get_driver(): + try: + uri = st.secrets["NEO4J_URI"] + usr = st.secrets["NEO4J_USER"] + pwd = st.secrets["NEO4J_PASSWORD"] + except Exception: + uri = os.getenv("NEO4J_URI", "bolt://localhost:7687") + usr = os.getenv("NEO4J_USER", "neo4j") + pwd = os.getenv("NEO4J_PASSWORD", "password") + return GraphDatabase.driver(uri, auth=(usr, pwd)) + + +def q(cypher, **params): + with get_driver().session() as s: + return [dict(r) for r in s.run(cypher, **params)] + + +# ── Sidebar ─────────────────────────────────────────────────── + +PAGES = ["🏗️ Project Overview","⚙️ Station Load", + "📊 Capacity Tracker","👷 Worker Coverage", + "📈 Forecast","🧪 Self-Test"] + +with st.sidebar: + st.markdown("### 🏭 Factory Graph") + st.caption("ankitsinghh007 · Level 6") + st.divider() + page = st.radio("Page", PAGES, label_visibility="collapsed") + st.divider() + st.caption("All data from Neo4j") + + +# ───────────────────────────────────────────────────────────── +# PAGE 1 — PROJECT OVERVIEW +# ───────────────────────────────────────────────────────────── + +if page == PAGES[0]: + st.title("🏗️ Project Overview") + st.caption("8 construction projects — planned vs actual hours and variance") + + rows = q(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WITH p, sum(r.planned_hours) AS tp, sum(r.actual_hours) AS ta, + count(DISTINCT s) AS stations + MATCH (p)-[:PRODUCES]->(pr:Product) + WITH p, tp, ta, stations, collect(DISTINCT pr.type) AS products + RETURN p.id AS id, p.name AS name, p.etapp AS etapp, + tp AS total_planned, ta AS total_actual, + round((ta-tp)/tp*100,1) AS variance_pct, + stations, products + ORDER BY p.id + """) + + if not rows: + st.error("No data. Run: python seed_graph.py") + st.stop() + + df = pd.DataFrame(rows) + + c1,c2,c3,c4 = st.columns(4) + c1.metric("Projects", len(df)) + c2.metric("Total Planned", f"{df.total_planned.sum():.0f} h") + c3.metric("Total Actual", f"{df.total_actual.sum():.0f} h") + over = (df.variance_pct > 10).sum() + c4.metric("Over Budget >10%", int(over)) + st.divider() + + # Planned vs actual bar chart + fig = go.Figure() + fig.add_bar(x=df.name, y=df.total_planned, name="Planned", marker_color="#2d5a8e") + fig.add_bar(x=df.name, y=df.total_actual, name="Actual", marker_color="#00d4aa") + fig.update_layout(barmode="group", xaxis_tickangle=-25, height=380, + paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", + font_color="#ccccdd", title="Planned vs Actual by Project") + st.plotly_chart(fig, use_container_width=True) + st.divider() + + # Project cards + for _, r in df.iterrows(): + v = r.variance_pct + badge = "🔴" if v>10 else ("🟡" if v>0 else "🟢") + with st.expander(f"{badge} **{r.id}** — {r.name} | variance {v:+.1f}%"): + a,b,c,d = st.columns(4) + a.metric("Planned", f"{r.total_planned:.0f} h") + b.metric("Actual", f"{r.total_actual:.0f} h") + c.metric("Variance", f"{v:+.1f}%") + d.metric("Stations", r.stations) + st.write(f"**Products:** {', '.join(r.products)} | **Etapp:** {r.etapp}") + + +# ───────────────────────────────────────────────────────────── +# PAGE 2 — STATION LOAD +# ───────────────────────────────────────────────────────────── + +elif page == PAGES[1]: + st.title("⚙️ Station Load") + st.caption("Actual hours per station × week — red = >10% over plan") + + rows = q(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + RETURN s.code AS code, s.name AS sname, r.week AS week, + sum(r.planned_hours) AS planned, + sum(r.actual_hours) AS actual + ORDER BY s.code, r.week + """) + + if not rows: + st.error("No data."); st.stop() + + df = pd.DataFrame(rows) + df["label"] = df["code"] + " " + df["sname"] + df["variance"] = ((df["actual"] - df["planned"]) / df["planned"] * 100).round(1) + + # Heatmap + wk = [f"w{i}" for i in range(1,9)] + pivot = df.pivot_table(index="label", columns="week", values="actual", aggfunc="sum").fillna(0) + pivot = pivot.reindex(columns=[w for w in wk if w in pivot.columns]) + + fig = px.imshow(pivot, + color_continuous_scale=[[0,"#0f1117"],[0.4,"#1a4a6b"],[0.75,"#e8a020"],[1,"#ff4b4b"]], + title="Actual Hours Heatmap (station × week)", text_auto=".0f", aspect="auto") + fig.update_layout(height=440, paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", font_color="#ccccdd") + st.plotly_chart(fig, use_container_width=True) + + st.divider() + st.subheader("Overruns — actual > planned by >10%") + over = df[df["variance"] > 10].sort_values("variance", ascending=False) + if over.empty: + st.success("No overruns.") + else: + show = over[["code","sname","week","planned","actual","variance"]].copy() + show.columns = ["Code","Station","Week","Planned h","Actual h","Variance %"] + # Plain dataframe — no styling library needed + st.dataframe(show, use_container_width=True, hide_index=True) + + st.divider() + st.subheader("Total load per station (all weeks)") + tot = df.groupby("label").agg(planned=("planned","sum"), actual=("actual","sum")).reset_index() + fig2 = go.Figure() + fig2.add_bar(x=tot.label, y=tot.planned, name="Planned", marker_color="#2d5a8e") + fig2.add_bar(x=tot.label, y=tot.actual, name="Actual", marker_color="#00d4aa") + fig2.update_layout(barmode="group", xaxis_tickangle=-30, height=360, + paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", + font_color="#ccccdd") + st.plotly_chart(fig2, use_container_width=True) + + +# ───────────────────────────────────────────────────────────── +# PAGE 3 — CAPACITY TRACKER +# ───────────────────────────────────────────────────────────── + +elif page == PAGES[2]: + st.title("📊 Capacity Tracker") + st.caption("Weekly capacity vs demand — deficit weeks in red") + + rows = q(""" + MATCH (w:Week)-[r:HAS_CAPACITY]->(c:Capacity) + RETURN w.id AS week, c.own_hours AS own, c.hired_hours AS hired, + c.overtime_hours AS ot, c.total_capacity AS cap, + c.total_planned AS planned, c.deficit AS deficit, + c.own_staff AS own_staff, c.hired_staff AS hired_staff + ORDER BY w.id + """) + + if not rows: + st.error("No capacity data."); st.stop() + + df = pd.DataFrame(rows) + def_wks = df[df.deficit < 0] + + c1,c2,c3,c4 = st.columns(4) + c1.metric("Deficit Weeks", len(def_wks)) + c2.metric("Surplus Weeks", len(df) - len(def_wks)) + c3.metric("Total Deficit", f"{def_wks.deficit.sum():.0f} h") + worst = df.loc[df.deficit.idxmin()] + c4.metric("Worst Week", worst["week"], delta=f"{worst.deficit:.0f} h", delta_color="inverse") + st.divider() + + # Stacked bar + demand line + fig = go.Figure() + fig.add_bar(x=df.week, y=df.own, name="Own Staff", marker_color="#2d5a8e") + fig.add_bar(x=df.week, y=df.hired, name="Hired Staff", marker_color="#1a6b3c") + fig.add_bar(x=df.week, y=df.ot, name="Overtime", marker_color="#e8a020") + fig.add_scatter(x=df.week, y=df.planned, mode="lines+markers", + name="Demand", line=dict(color="#ff4b4b", width=2.5, dash="dash"), + marker=dict(size=9)) + fig.update_layout(barmode="stack", title="Capacity vs Demand by Week", + height=400, paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", font_color="#ccccdd") + st.plotly_chart(fig, use_container_width=True) + + # Deficit bar + colors = ["#ff4b4b" if d < 0 else "#00d4aa" for d in df.deficit] + fig2 = go.Figure(go.Bar(x=df.week, y=df.deficit, marker_color=colors, + text=[f"{d:+.0f}h" for d in df.deficit], + textposition="outside")) + fig2.add_hline(y=0, line_color="#555577", line_dash="dot") + fig2.update_layout(title="Deficit / Surplus per Week", height=300, + paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", + font_color="#ccccdd") + st.plotly_chart(fig2, use_container_width=True) + + st.divider() + df["Status"] = df.deficit.apply(lambda d: "🔴 Deficit" if d<0 else "🟢 Surplus") + show = df[["week","own_staff","hired_staff","own","hired","ot","cap","planned","deficit","Status"]] + show.columns = ["Week","Own","Hired","Own h","Hired h","OT h","Capacity","Planned","Deficit","Status"] + st.dataframe(show, use_container_width=True, hide_index=True) + + +# ───────────────────────────────────────────────────────────── +# PAGE 4 — WORKER COVERAGE +# ───────────────────────────────────────────────────────────── + +elif page == PAGES[3]: + st.title("👷 Worker Coverage") + st.caption("Coverage matrix — single-point-of-failure stations flagged") + + workers = q(""" + MATCH (w:Worker) + OPTIONAL MATCH (w)-[:WORKS_AT]->(ps:Station) + OPTIONAL MATCH (w)-[:CAN_COVER]->(cs:Station) + WITH w, collect(DISTINCT ps.code) AS primary_s, + collect(DISTINCT cs.code) AS cover_s + RETURN w.id AS id, w.name AS name, w.role AS role, + w.type AS wtype, w.hours_per_week AS hpw, + primary_s, cover_s + ORDER BY w.id + """) + + stations = q(""" + MATCH (s:Station) + OPTIONAL MATCH (w:Worker)-[:CAN_COVER]->(s) + WITH s, count(w) AS cover_count + RETURN s.code AS code, s.name AS sname, cover_count + ORDER BY s.code + """) + + if not workers or not stations: + st.error("No data."); st.stop() + + df_w = pd.DataFrame(workers) + df_s = pd.DataFrame(stations) + + spf = df_s[df_s.cover_count <= 1] + if not spf.empty: + st.warning(f"⚠️ Single Point of Failure: " + f"{', '.join(spf.code + ' ' + spf.sname)}") + st.divider() + + all_codes = sorted(df_s.code.unique()) + matrix = [] + for _, w in df_w.iterrows(): + row = {"Worker": w["name"], "Role": w["role"], "Type": w.wtype} + for sc in all_codes: + if sc in w.primary_s: row[sc] = "PRIMARY" + elif sc in w.cover_s: row[sc] = "COVER" + else: row[sc] = "" + matrix.append(row) + + df_m = pd.DataFrame(matrix) + + st.subheader("Coverage Matrix") + st.caption("PRIMARY = main station | COVER = can substitute | blank = not assigned") + # Plain dataframe — works on all pandas/streamlit versions + st.dataframe(df_m, use_container_width=True, hide_index=True) + st.divider() + + fig = px.bar(df_s, x="code", y="cover_count", + color="cover_count", + color_continuous_scale=[[0,"#ff4b4b"],[0.3,"#e8a020"],[1,"#00d4aa"]], + title="Workers Who Can Cover Each Station", text="cover_count", + labels={"code":"Station","cover_count":"# Workers"}) + fig.add_hline(y=1, line_color="#ff4b4b", line_dash="dot", + annotation_text="SPF threshold") + fig.update_layout(height=360, showlegend=False, + paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", + font_color="#ccccdd") + st.plotly_chart(fig, use_container_width=True) + + +# ───────────────────────────────────────────────────────────── +# PAGE 5 — FORECAST (Bonus C) +# ───────────────────────────────────────────────────────────── + +elif page == PAGES[4]: + st.title("📈 Week 9 Forecast") + st.caption("Bonus C — linear extrapolation from 8 weeks of actual station load") + + rows = q(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + RETURN s.code AS code, s.name AS sname, + r.week AS week, sum(r.actual_hours) AS actual + ORDER BY s.code, r.week + """) + + if not rows: + st.error("No data."); st.stop() + + df = pd.DataFrame(rows) + df["wnum"] = df.week.str.replace("w","").astype(int) + + forecasts = [] + for sc in df.code.unique(): + sdf = df[df.code==sc].sort_values("wnum") + sname = sdf.sname.iloc[0] + x = sdf.wnum.values + y = sdf.actual.values + slope, intercept = np.polyfit(x, y, 1) + w9 = max(0.0, slope*9 + intercept) + resid = y - (slope*x + intercept) + std = float(np.std(resid)) + forecasts.append({"code":sc,"sname":sname,"slope":round(slope,2), + "w9":round(w9,1),"lower":round(max(0,w9-std),1), + "upper":round(w9+std,1), + "trend":"📈" if slope>2 else ("📉" if slope<-2 else "➡️")}) + + df_f = pd.DataFrame(forecasts).sort_values("w9", ascending=False) + at_risk = df_f[df_f.w9 > 50] + + c1,c2 = st.columns(2) + c1.metric("Stations at Risk (>50h forecast)", len(at_risk)) + top = df_f.iloc[0] + c2.metric("Highest Forecast", f"{top.w9:.0f} h — {top.code}") + st.divider() + + colors = ["#ff4b4b" if v>50 else ("#e8a020" if v>30 else "#00d4aa") + for v in df_f.w9] + fig = go.Figure() + fig.add_bar(x=df_f.code, y=df_f.w9, + error_y=dict(type="data", + array=(df_f.upper-df_f.w9).tolist(), + arrayminus=(df_f.w9-df_f.lower).tolist(), + visible=True), + marker_color=colors, + text=df_f.w9.apply(lambda v: f"{v:.0f}h"), + textposition="outside") + fig.add_hline(y=50, line_color="#ff4b4b", line_dash="dot", + annotation_text="Risk threshold") + fig.update_layout(title="Forecast Station Load — Week 9", height=400, + paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", + font_color="#ccccdd") + st.plotly_chart(fig, use_container_width=True) + + show = df_f[["code","sname","slope","w9","lower","upper","trend"]].copy() + show.columns = ["Code","Station","Slope h/wk","W9 Forecast","Lower","Upper","Trend"] + st.dataframe(show, use_container_width=True, hide_index=True) + + st.divider() + st.subheader("Historical trends + projection to week 9") + fig2 = go.Figure() + palette = px.colors.qualitative.Set2 + for i, sc in enumerate(df.code.unique()): + sdf = df[df.code==sc].sort_values("wnum") + col = palette[i % len(palette)] + sn = sdf.sname.iloc[0] + fig2.add_scatter(x=sdf.wnum, y=sdf.actual, mode="lines+markers", + name=f"{sc} {sn}", line=dict(color=col)) + xs = list(sdf.wnum) + [9] + slp, icp = np.polyfit(sdf.wnum, sdf.actual, 1) + ys = [slp*xi + icp for xi in xs] + fig2.add_scatter(x=xs, y=ys, mode="lines", showlegend=False, + line=dict(color=col, dash="dot", width=1)) + + fig2.add_vline(x=8.5, line_dash="dash", line_color="#555577", + annotation_text="w9 →") + fig2.update_layout( + xaxis=dict(tickvals=list(range(1,10)), + ticktext=[f"w{i}" for i in range(1,10)]), + title="Station load trends (actual + w9 projection)", + height=450, paper_bgcolor="rgba(0,0,0,0)", + plot_bgcolor="rgba(0,0,0,0)", font_color="#ccccdd") + st.plotly_chart(fig2, use_container_width=True) + + +# ───────────────────────────────────────────────────────────── +# PAGE 6 — SELF-TEST +# ───────────────────────────────────────────────────────────── + +elif page == PAGES[5]: + st.title("🧪 Self-Test") + st.caption("6 automated checks against Neo4j — as specified in the Level 6 brief") + + def run_self_test(driver): + checks = [] + try: + with driver.session() as s: + s.run("RETURN 1") + checks.append(("Neo4j connected", True, 3)) + except Exception as e: + checks.append((f"Neo4j connection failed: {e}", False, 3)) + return checks + + with driver.session() as s: + c = s.run("MATCH (n) RETURN count(n) AS c").single()["c"] + checks.append((f"{c} nodes (min: 50)", c >= 50, 3)) + + c = s.run("MATCH ()-[r]->() RETURN count(r) AS c").single()["c"] + checks.append((f"{c} relationships (min: 100)", c >= 100, 3)) + + c = s.run("CALL db.labels() YIELD label RETURN count(label) AS c").single()["c"] + checks.append((f"{c} node labels (min: 6)", c >= 6, 3)) + + c = s.run("CALL db.relationshipTypes() YIELD relationshipType " + "RETURN count(relationshipType) AS c").single()["c"] + checks.append((f"{c} relationship types (min: 8)", c >= 8, 3)) + + rows = [dict(r) for r in s.run(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.1 + RETURN p.name AS project, s.name AS station, + r.planned_hours AS planned, r.actual_hours AS actual + LIMIT 10 + """)] + checks.append((f"Variance query: {len(rows)} results", len(rows) > 0, 5)) + + return checks + + if st.button("▶ Run Self-Test", type="primary", use_container_width=True): + with st.spinner("Running checks..."): + try: + driver = get_driver() + checks = run_self_test(driver) + except Exception as e: + st.error(f"Cannot connect: {e}"); st.stop() + + st.divider() + total_pts = 0 + total_max = sum(c[2] for c in checks) + + for label, passed, pts in checks: + icon = "✅" if passed else "❌" + score = f"{pts}/{pts}" if passed else f"0/{pts}" + st.markdown(f"`{icon} {label:<46} {score}`") + if passed: + total_pts += pts + + st.divider() + pct = int(total_pts / total_max * 100) + if total_pts == total_max: + st.success(f"### SELF-TEST SCORE: {total_pts}/{total_max} ✅ All checks passed!") + else: + st.warning(f"### SELF-TEST SCORE: {total_pts}/{total_max} ({pct}%)") + + # Show graph stats + st.divider() + st.subheader("Graph contents") + try: + with get_driver().session() as s: + labels = [r["label"] for r in + s.run("CALL db.labels() YIELD label RETURN label ORDER BY label")] + rels = [r["relationshipType"] for r in + s.run("CALL db.relationshipTypes() YIELD relationshipType " + "RETURN relationshipType ORDER BY relationshipType")] + c1, c2 = st.columns(2) + with c1: + st.write("**Node labels**") + for l in labels: st.write(f" `{l}`") + with c2: + st.write("**Relationship types**") + for r in rels: st.write(f" `{r}`") + except Exception: + pass + + else: + st.info("Click **Run Self-Test** to validate the graph.") + st.markdown(""" +| Check | Points | +|-------|--------| +| Neo4j connected | 3 | +| Node count ≥ 50 | 3 | +| Relationship count ≥ 100 | 3 | +| 6+ node labels | 3 | +| 8+ relationship types | 3 | +| Variance query returns results | 5 | +| **Total** | **20** | + """) diff --git a/submissions/ankitsinghh007/level6/data/factory_capacity.csv b/submissions/ankitsinghh007/level6/data/factory_capacity.csv new file mode 100644 index 000000000..795ff52f0 --- /dev/null +++ b/submissions/ankitsinghh007/level6/data/factory_capacity.csv @@ -0,0 +1,9 @@ +week,own_staff_count,hired_staff_count,own_hours,hired_hours,overtime_hours,total_capacity,total_planned,deficit +w1,10,2,400,80,0,480,612,-132 +w2,10,2,400,80,40,520,645,-125 +w3,10,2,400,80,0,480,398,82 +w4,10,2,400,80,20,500,550,-50 +w5,10,2,400,80,30,510,480,30 +w6,9,2,360,80,0,440,520,-80 +w7,10,2,400,80,40,520,600,-80 +w8,10,2,400,80,20,500,470,30 \ No newline at end of file diff --git a/submissions/ankitsinghh007/level6/data/factory_production.csv b/submissions/ankitsinghh007/level6/data/factory_production.csv new file mode 100644 index 000000000..ca6ce43e1 --- /dev/null +++ b/submissions/ankitsinghh007/level6/data/factory_production.csv @@ -0,0 +1,69 @@ +project_id,project_number,project_name,product_type,unit,quantity,unit_factor,station_code,station_name,etapp,bop,week,planned_hours,actual_hours,completed_units +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w1,48.0,45.2,28 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w1,32.0,35.5,25 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,013,Montering IQB,ET1,BOP1,w1,28.0,26.0,22 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,014,Svets o montage IQB,ET1,BOP1,w1,35.0,38.2,20 +P01,4501,Stålverket Borås,SB,styck,40,4.0,018,SB B/F-hall,ET1,BOP1,w1,16.0,14.5,4 +P01,4501,Stålverket Borås,SP,styck,180,2.0,019,SP B/F-hall,ET1,BOP1,w1,12.0,13.0,7 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,011,FS IQB,ET1,BOP1,w2,48.0,50.0,32 +P01,4501,Stålverket Borås,IQB,meter,600,1.77,012,Förmontering IQB,ET1,BOP1,w2,32.0,30.0,28 +P01,4501,Stålverket Borås,IQP,styck,90,2.80,015,Montering IQP,ET1,BOP2,w2,25.0,28.0,9 +P01,4501,Stålverket Borås,SR,styck,8,45.0,021,SR B/F-hall,ET1,BOP2,w2,40.0,42.0,1 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w1,30.0,28.0,20 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,012,Förmontering IQB,ET1,BOP1,w1,22.0,24.5,18 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,013,Montering IQB,ET1,BOP1,w1,18.0,17.0,16 +P02,4502,Kontorshus Mölndal,IQP,styck,70,2.70,015,Montering IQP,ET1,BOP1,w1,19.0,21.0,7 +P02,4502,Kontorshus Mölndal,SD,styck,30,3.00,018,SB B/F-hall,ET1,BOP1,w1,9.0,8.5,3 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,011,FS IQB,ET1,BOP1,w2,30.0,32.0,24 +P02,4502,Kontorshus Mölndal,IQB,meter,350,1.50,014,Svets o montage IQB,ET1,BOP1,w2,25.0,23.0,20 +P02,4502,Kontorshus Mölndal,SP,styck,120,1.75,019,SP B/F-hall,ET1,BOP2,w2,14.0,15.5,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w1,72.0,70.0,40 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,012,Förmontering IQB,ET1,BOP1,w1,48.0,52.0,35 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,013,Montering IQB,ET1,BOP1,w1,38.0,36.5,30 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,014,Svets o montage IQB,ET1,BOP1,w1,42.0,48.0,28 +P03,4503,Lagerhall Jönköping,SB,styck,60,6.00,018,SB B/F-hall,ET1,BOP1,w1,36.0,38.0,6 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,011,FS IQB,ET1,BOP1,w2,72.0,75.0,45 +P03,4503,Lagerhall Jönköping,IQP,styck,110,2.90,015,Montering IQP,ET1,BOP2,w2,32.0,30.0,11 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,016,Gjutning,ET1,BOP2,w2,28.0,35.0,8 +P03,4503,Lagerhall Jönköping,IQB,meter,900,1.89,017,Målning,ET1,BOP2,w3,24.0,22.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w1,38.0,36.0,24 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,012,Förmontering IQB,ET1,BOP1,w1,25.0,27.0,20 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,013,Montering IQB,ET1,BOP1,w1,20.0,19.0,18 +P04,4504,Parkering Helsingborg,IQP,styck,55,2.85,015,Montering IQP,ET1,BOP1,w1,16.0,18.0,6 +P04,4504,Parkering Helsingborg,SB,styck,25,7.50,018,SB B/F-hall,ET1,BOP1,w1,19.0,22.0,3 +P04,4504,Parkering Helsingborg,IQB,meter,450,1.65,011,FS IQB,ET1,BOP1,w2,38.0,40.0,28 +P04,4504,Parkering Helsingborg,SP,styck,100,2.00,019,SP B/F-hall,ET1,BOP2,w2,12.0,11.0,6 +P04,4504,Parkering Helsingborg,SR,styck,12,120.0,021,SR B/F-hall,ET1,BOP2,w2,60.0,65.0,1 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w1,95.0,90.0,50 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,012,Förmontering IQB,ET2,BOP3,w1,65.0,68.0,42 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,013,Montering IQB,ET2,BOP3,w1,50.0,48.0,38 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,014,Svets o montage IQB,ET2,BOP3,w1,58.0,62.0,35 +P05,4505,Sjukhus Linköping ET2,IQP,styck,150,2.88,015,Montering IQP,ET2,BOP3,w1,30.0,33.0,10 +P05,4505,Sjukhus Linköping ET2,SB,styck,50,5.00,018,SB B/F-hall,ET2,BOP3,w1,25.0,28.0,5 +P05,4505,Sjukhus Linköping ET2,SD,styck,45,2.75,018,SB B/F-hall,ET2,BOP3,w1,12.0,11.5,4 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,011,FS IQB,ET2,BOP3,w2,95.0,98.0,55 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,016,Gjutning,ET2,BOP3,w2,35.0,40.0,12 +P05,4505,Sjukhus Linköping ET2,IQB,meter,1200,1.85,017,Målning,ET2,BOP3,w2,28.0,26.0,25 +P05,4505,Sjukhus Linköping ET2,SR,styck,20,274.0,021,SR B/F-hall,ET2,BOP3,w3,120.0,115.0,2 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,011,FS IQB,ET1,BOP1,w2,40.0,38.0,26 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,012,Förmontering IQB,ET1,BOP1,w2,28.0,30.0,22 +P06,4506,Skola Uppsala,IQB,meter,500,1.60,013,Montering IQB,ET1,BOP1,w2,22.0,20.0,18 +P06,4506,Skola Uppsala,IQP,styck,80,2.75,015,Montering IQP,ET1,BOP1,w2,22.0,24.0,8 +P06,4506,Skola Uppsala,SB,styck,35,4.50,018,SB B/F-hall,ET1,BOP1,w2,16.0,18.0,4 +P06,4506,Skola Uppsala,SP,styck,140,1.50,019,SP B/F-hall,ET1,BOP2,w3,14.0,12.0,10 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w1,45.0,42.0,22 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,012,Förmontering IQB,ET1,BOP1,w1,30.0,33.0,18 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,014,Svets o montage IQB,ET1,BOP1,w1,35.0,32.0,16 +P07,4507,Idrottshall Västerås,SB,styck,45,3.50,018,SB B/F-hall,ET1,BOP1,w1,16.0,18.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,011,FS IQB,ET1,BOP1,w2,45.0,48.0,26 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,016,Gjutning,ET1,BOP2,w2,20.0,22.0,5 +P07,4507,Idrottshall Västerås,HSQ,meter,400,2.05,017,Målning,ET1,BOP2,w3,18.0,16.0,15 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w1,65.0,62.0,36 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,012,Förmontering IQB,ET1,BOP1,w1,42.0,45.0,30 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,013,Montering IQB,ET1,BOP1,w1,35.0,38.0,25 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,014,Svets o montage IQB,ET1,BOP1,w1,40.0,44.0,22 +P08,4508,Bro E6 Halmstad,SP,styck,200,2.50,019,SP B/F-hall,ET1,BOP1,w1,20.0,18.0,8 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,011,FS IQB,ET1,BOP1,w2,65.0,68.0,42 +P08,4508,Bro E6 Halmstad,IQP,styck,95,2.93,015,Montering IQP,ET1,BOP2,w2,28.0,30.0,10 +P08,4508,Bro E6 Halmstad,IQB,meter,800,1.80,016,Gjutning,ET1,BOP2,w3,22.0,25.0,8 +P08,4508,Bro E6 Halmstad,SR,styck,15,180.0,021,SR B/F-hall,ET1,BOP2,w3,90.0,85.0,2 \ No newline at end of file diff --git a/submissions/ankitsinghh007/level6/data/factory_workers.csv b/submissions/ankitsinghh007/level6/data/factory_workers.csv new file mode 100644 index 000000000..3110285cc --- /dev/null +++ b/submissions/ankitsinghh007/level6/data/factory_workers.csv @@ -0,0 +1,15 @@ +worker_id,name,role,primary_station,can_cover_stations,certifications,hours_per_week,type +W01,Erik Lindberg,Operator,011,"011,012","MIG/MAG,TIG,ISO 9606",40,permanent +W02,Anna Berg,Operator,011,"011,014","MIG/MAG,TIG",40,permanent +W03,Lars Jensen,Operator,012,"012,013","Surface treatment,CE marking",40,permanent +W04,Maria Stone,Operator,013,"013","Blasting,Surface protection",40,permanent +W05,Johan Peters,Operator,014,"014,015","Hydraulics,Mechanics,Crane",40,permanent +W06,Karen Nilsen,Inspector,015,"015","SIS,SS-EN 1090,NDT",40,permanent +W07,Per Hansen,Operator,016,"016,017","Casting,Formwork",40,permanent +W08,Sofia Arden,Operator,017,"017","Surface treatment,Spray painting",40,permanent +W09,Magnus Stone,Operator,018,"018,019","Sheet metal,Assembly",40,permanent +W10,Elin Frank,Operator,019,"019,018","Assembly,Welding",32,permanent +W11,Victor Elm,Foreman,all,"011,012,013,014,015,016,017,018,019,021","Leadership,CE,ISO 9001",45,permanent +W12,Lena Dale,Quality Manager,015,"015","ISO 9001,SS-EN 1090,Audit",40,permanent +W13,Ahmed Hassan,Operator,011,"011","MIG/MAG",40,hired +W14,Petra Steen,Operator,012,"012,013","Surface treatment",40,hired \ No newline at end of file diff --git a/submissions/ankitsinghh007/level6/env.example b/submissions/ankitsinghh007/level6/env.example new file mode 100644 index 000000000..edd6d253b --- /dev/null +++ b/submissions/ankitsinghh007/level6/env.example @@ -0,0 +1,5 @@ +# Copy this file to .env and fill in your Neo4j credentials + +NEO4J_URI=neo4j+s://xxxxxxxx.databases.neo4j.io +NEO4J_USER=neo4j +NEO4J_PASSWORD=your-password-here diff --git a/submissions/ankitsinghh007/level6/requirements.txt b/submissions/ankitsinghh007/level6/requirements.txt new file mode 100644 index 000000000..dc07a9c37 --- /dev/null +++ b/submissions/ankitsinghh007/level6/requirements.txt @@ -0,0 +1,7 @@ +streamlit>=1.32.0 +neo4j>=5.18.0 +python-dotenv>=1.0.0 +pandas>=2.0.0 +plotly>=5.18.0 +numpy>=1.26.0 +matplotlib>=3.7.0 diff --git a/submissions/ankitsinghh007/level6/seed_graph.py b/submissions/ankitsinghh007/level6/seed_graph.py new file mode 100644 index 000000000..53f585a9f --- /dev/null +++ b/submissions/ankitsinghh007/level6/seed_graph.py @@ -0,0 +1,462 @@ +#!/usr/bin/env python3 +""" +seed_graph.py — Factory Knowledge Graph Seeder +Author: Ankit Kumar Singh (ankitsinghh007) +Level 6 — LifeAtlas Contributor Program + +Reads all 3 CSV files and populates a Neo4j knowledge graph. +Idempotent: uses MERGE everywhere — safe to run multiple times. + +Usage: + python seed_graph.py + python seed_graph.py --verify # run after seeding to check counts +""" + +import csv +import os +import sys +import argparse +from neo4j import GraphDatabase +from dotenv import load_dotenv + +load_dotenv() + +# ── Connection ──────────────────────────────────────────────── +NEO4J_URI = os.getenv("NEO4J_URI", "bolt://localhost:7687") +NEO4J_USER = os.getenv("NEO4J_USER", "neo4j") +NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD", "password") + +# ── Data paths ──────────────────────────────────────────────── +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +PRODUCTION_CSV = os.path.join(DATA_DIR, "factory_production.csv") +WORKERS_CSV = os.path.join(DATA_DIR, "factory_workers.csv") +CAPACITY_CSV = os.path.join(DATA_DIR, "factory_capacity.csv") + + +def get_driver(): + return GraphDatabase.driver(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD)) + + +# ───────────────────────────────────────────────────────────── +# STEP 0 — Constraints (uniqueness + index) +# ───────────────────────────────────────────────────────────── + +CONSTRAINTS = [ + "CREATE CONSTRAINT project_id IF NOT EXISTS FOR (n:Project) REQUIRE n.id IS UNIQUE", + "CREATE CONSTRAINT product_type IF NOT EXISTS FOR (n:Product) REQUIRE n.type IS UNIQUE", + "CREATE CONSTRAINT station_code IF NOT EXISTS FOR (n:Station) REQUIRE n.code IS UNIQUE", + "CREATE CONSTRAINT worker_id IF NOT EXISTS FOR (n:Worker) REQUIRE n.id IS UNIQUE", + "CREATE CONSTRAINT week_id IF NOT EXISTS FOR (n:Week) REQUIRE n.id IS UNIQUE", + "CREATE CONSTRAINT etapp_id IF NOT EXISTS FOR (n:Etapp) REQUIRE n.id IS UNIQUE", + "CREATE CONSTRAINT cert_name IF NOT EXISTS FOR (n:Certification) REQUIRE n.name IS UNIQUE", +] + +def create_constraints(session): + print("Creating constraints...") + for c in CONSTRAINTS: + try: + session.run(c) + except Exception as e: + # Constraint may already exist — safe to ignore + if "already exists" not in str(e).lower(): + print(f" Warning: {e}") + print(" Constraints ready.") + + +# ───────────────────────────────────────────────────────────── +# STEP 1 — Seed from factory_production.csv +# ───────────────────────────────────────────────────────────── + +def seed_production(session): + print("\nSeeding from factory_production.csv...") + + projects_seen = {} + products_seen = {} + stations_seen = {} + etapps_seen = {} + schedules = [] + + with open(PRODUCTION_CSV, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + pid = row["project_id"] + ptype = row["product_type"] + scode = row["station_code"] + etapp = row["etapp"] + week = row["week"] + + planned = float(row["planned_hours"]) + actual = float(row["actual_hours"]) + variance = round((actual - planned) / planned * 100, 2) if planned else 0 + + # Collect unique nodes + if pid not in projects_seen: + projects_seen[pid] = { + "id": pid, + "number": row["project_number"], + "name": row["project_name"], + "etapp": etapp, + "bop": row["bop"], + } + + if ptype not in products_seen: + products_seen[ptype] = { + "type": ptype, + "unit": row["unit"], + "unit_factor": float(row["unit_factor"]), + } + + if scode not in stations_seen: + stations_seen[scode] = { + "code": scode, + "name": row["station_name"], + } + + if etapp not in etapps_seen: + etapps_seen[etapp] = {"id": etapp} + + schedules.append({ + "project_id": pid, + "station_code": scode, + "product_type": ptype, + "week": week, + "planned_hours": planned, + "actual_hours": actual, + "completed_units": int(row["completed_units"]), + "variance_pct": variance, + "quantity": float(row["quantity"]), + "unit_factor": float(row["unit_factor"]), + }) + + # ── Write Project nodes ── + for p in projects_seen.values(): + session.run(""" + MERGE (n:Project {id: $id}) + SET n.number = $number, + n.name = $name, + n.etapp = $etapp, + n.bop = $bop + """, **p) + + # ── Write Product nodes ── + for p in products_seen.values(): + session.run(""" + MERGE (n:Product {type: $type}) + SET n.unit = $unit, + n.unit_factor = $unit_factor + """, **p) + + # ── Write Station nodes ── + for s in stations_seen.values(): + session.run(""" + MERGE (n:Station {code: $code}) + SET n.name = $name + """, **s) + + # ── Write Etapp nodes ── + for e in etapps_seen.values(): + session.run("MERGE (n:Etapp {id: $id})", **e) + + # ── Write Week nodes (from production weeks seen) ── + weeks_in_prod = sorted(set(s["week"] for s in schedules)) + for w in weeks_in_prod: + session.run("MERGE (n:Week {id: $id})", id=w) + + # ── Write relationships from schedules ── + for s in schedules: + # Project -[:IN_ETAPP]-> Etapp + session.run(""" + MATCH (p:Project {id: $project_id}) + MATCH (e:Etapp {id: $etapp}) + MERGE (p)-[:IN_ETAPP]->(e) + """, project_id=s["project_id"], etapp=projects_seen[s["project_id"]]["etapp"]) + + # Project -[:PRODUCES {quantity, unit_factor}]-> Product + session.run(""" + MATCH (p:Project {id: $project_id}) + MATCH (pr:Product {type: $product_type}) + MERGE (p)-[r:PRODUCES]->(pr) + SET r.quantity = $quantity, + r.unit_factor = $unit_factor + """, **{k: s[k] for k in ["project_id","product_type","quantity","unit_factor"]}) + + # Product -[:PROCESSED_AT]-> Station + session.run(""" + MATCH (pr:Product {type: $product_type}) + MATCH (st:Station {code: $station_code}) + MERGE (pr)-[:PROCESSED_AT]->(st) + """, product_type=s["product_type"], station_code=s["station_code"]) + + # Project -[:SCHEDULED_AT {week, hours, variance}]-> Station + session.run(""" + MATCH (p:Project {id: $project_id}) + MATCH (st:Station {code: $station_code}) + MERGE (p)-[r:SCHEDULED_AT {week: $week}]->(st) + SET r.planned_hours = $planned_hours, + r.actual_hours = $actual_hours, + r.completed_units = $completed_units, + r.variance_pct = $variance_pct + """, **{k: s[k] for k in [ + "project_id","station_code","week", + "planned_hours","actual_hours","completed_units","variance_pct" + ]}) + + print(f" Projects: {len(projects_seen)}") + print(f" Products: {len(products_seen)}") + print(f" Stations: {len(stations_seen)}") + print(f" Etapps: {len(etapps_seen)}") + print(f" Schedules: {len(schedules)}") + + +# ───────────────────────────────────────────────────────────── +# STEP 2 — Seed from factory_workers.csv +# ───────────────────────────────────────────────────────────── + +def seed_workers(session): + print("\nSeeding from factory_workers.csv...") + + with open(WORKERS_CSV, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = list(reader) + + for row in rows: + wid = row["worker_id"] + name = row["name"] + role = row["role"] + prim = row["primary_station"].strip() + cover = [s.strip() for s in row["can_cover_stations"].split(",") if s.strip()] + certs = [c.strip() for c in row["certifications"].split(",") if c.strip()] + hpw = int(row["hours_per_week"]) + wtype = row["type"] + + # Worker node + session.run(""" + MERGE (w:Worker {id: $id}) + SET w.name = $name, + w.role = $role, + w.hours_per_week = $hpw, + w.type = $wtype, + w.primary_station = $prim + """, id=wid, name=name, role=role, hpw=hpw, wtype=wtype, prim=prim) + + # Primary station — WORKS_AT + if prim != "all": + session.run(""" + MATCH (w:Worker {id: $wid}) + MATCH (s:Station {code: $scode}) + MERGE (w)-[r:WORKS_AT]->(s) + SET r.primary = true + """, wid=wid, scode=prim) + else: + # Victor Elm (Foreman) works at all stations + all_stations = ["011","012","013","014","015","016","017","018","019","021"] + for sc in all_stations: + session.run(""" + MATCH (w:Worker {id: $wid}) + MATCH (s:Station {code: $scode}) + MERGE (w)-[r:WORKS_AT]->(s) + SET r.primary = true + """, wid=wid, scode=sc) + + # CAN_COVER stations + for sc in cover: + session.run(""" + MATCH (w:Worker {id: $wid}) + MATCH (s:Station {code: $scode}) + MERGE (w)-[:CAN_COVER]->(s) + """, wid=wid, scode=sc) + + # Certifications + for cert in certs: + session.run(""" + MERGE (c:Certification {name: $cert}) + """, cert=cert) + session.run(""" + MATCH (w:Worker {id: $wid}) + MATCH (c:Certification {name: $cert}) + MERGE (w)-[:HAS_CERTIFICATION]->(c) + """, wid=wid, cert=cert) + + print(f" Workers seeded: {len(rows)}") + + +# ───────────────────────────────────────────────────────────── +# STEP 3 — Seed from factory_capacity.csv +# ───────────────────────────────────────────────────────────── + +def seed_capacity(session): + print("\nSeeding from factory_capacity.csv...") + + with open(CAPACITY_CSV, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + rows = list(reader) + + for row in rows: + week_id = row["week"] + own_h = float(row["own_hours"]) + hired_h = float(row["hired_hours"]) + ot_h = float(row["overtime_hours"]) + total_c = float(row["total_capacity"]) + total_p = float(row["total_planned"]) + deficit = float(row["deficit"]) + own_cnt = int(row["own_staff_count"]) + hired_cnt= int(row["hired_staff_count"]) + + # Ensure Week node exists + session.run("MERGE (w:Week {id: $id})", id=week_id) + + # Capacity node (one per week) + session.run(""" + MERGE (c:Capacity {week: $week_id}) + SET c.own_hours = $own_h, + c.hired_hours = $hired_h, + c.overtime_hours = $ot_h, + c.total_capacity = $total_c, + c.total_planned = $total_p, + c.deficit = $deficit, + c.own_staff = $own_cnt, + c.hired_staff = $hired_cnt, + c.is_deficit = ($deficit < 0) + """, week_id=week_id, own_h=own_h, hired_h=hired_h, ot_h=ot_h, + total_c=total_c, total_p=total_p, deficit=deficit, + own_cnt=own_cnt, hired_cnt=hired_cnt) + + # Week -[:HAS_CAPACITY]-> Capacity + session.run(""" + MATCH (w:Week {id: $week_id}) + MATCH (c:Capacity {week: $week_id}) + MERGE (w)-[r:HAS_CAPACITY]->(c) + SET r.deficit = $deficit, + r.total_capacity = $total_c, + r.total_planned = $total_p + """, week_id=week_id, deficit=deficit, total_c=total_c, total_p=total_p) + + print(f" Weeks seeded: {len(rows)}") + + +# ───────────────────────────────────────────────────────────── +# STEP 4 — Compute Bottleneck nodes (derived) +# ───────────────────────────────────────────────────────────── + +def seed_bottlenecks(session): + print("\nComputing Bottleneck nodes...") + + session.run(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.1 + WITH s, r.week AS week, + count(p) AS overrun_count, + sum(r.actual_hours - r.planned_hours) AS excess_hours, + collect(p.id) AS projects + MERGE (b:Bottleneck {station_code: s.code, week: week}) + SET b.excess_hours = excess_hours, + b.overrun_count = overrun_count, + b.projects = projects, + b.severity = CASE + WHEN excess_hours > 15 THEN "HIGH" + WHEN excess_hours > 7 THEN "MEDIUM" + ELSE "LOW" + END + WITH b, s + MERGE (b)-[:TRIGGERED_AT]->(s) + """) + + result = session.run("MATCH (b:Bottleneck) RETURN count(b) AS c") + count = result.single()["c"] + print(f" Bottleneck nodes created: {count}") + + +# ───────────────────────────────────────────────────────────── +# VERIFY — counts and relationship types +# ───────────────────────────────────────────────────────────── + +def verify(session): + print("\n── Verification ─────────────────────────────────────────") + + node_counts = session.run(""" + MATCH (n) + RETURN labels(n)[0] AS label, count(n) AS count + ORDER BY count DESC + """) + print("\nNode counts:") + total_nodes = 0 + for r in node_counts: + print(f" {r['label']:<16} {r['count']}") + total_nodes += r['count'] + print(f" {'TOTAL':<16} {total_nodes}") + + rel_counts = session.run(""" + MATCH ()-[r]->() + RETURN type(r) AS rel_type, count(r) AS count + ORDER BY count DESC + """) + print("\nRelationship counts:") + total_rels = 0 + for r in rel_counts: + print(f" {r['rel_type']:<24} {r['count']}") + total_rels += r['count'] + print(f" {'TOTAL':<24} {total_rels}") + + print(f"\n{'✅' if total_nodes >= 50 else '❌'} Nodes >= 50: {total_nodes}") + print(f"{'✅' if total_rels >= 100 else '❌'} Relationships >= 100: {total_rels}") + + # Check variance query works + result = session.run(""" + MATCH (p:Project)-[r:SCHEDULED_AT]->(s:Station) + WHERE r.actual_hours > r.planned_hours * 1.1 + RETURN count(*) AS c + """) + var_count = result.single()["c"] + print(f"{'✅' if var_count > 0 else '❌'} Variance >10% rows: {var_count}") + print("─────────────────────────────────────────────────────────") + + +# ───────────────────────────────────────────────────────────── +# MAIN +# ───────────────────────────────────────────────────────────── + +def main(): + parser = argparse.ArgumentParser(description="Seed factory knowledge graph into Neo4j") + parser.add_argument("--verify", action="store_true", help="Only run verification") + args = parser.parse_args() + + # Check CSV files exist + for path, name in [ + (PRODUCTION_CSV, "factory_production.csv"), + (WORKERS_CSV, "factory_workers.csv"), + (CAPACITY_CSV, "factory_capacity.csv"), + ]: + if not os.path.isfile(path): + print(f"[ERROR] Cannot find {name} at {path}") + print(f" Place CSV files in a 'data/' folder next to seed_graph.py") + sys.exit(1) + + print(f"Connecting to Neo4j at {NEO4J_URI}...") + driver = get_driver() + + try: + with driver.session() as session: + session.run("RETURN 1") # connection test + print("Connected.\n") + except Exception as e: + print(f"[ERROR] Cannot connect to Neo4j: {e}") + print(" Check NEO4J_URI, NEO4J_USER, NEO4J_PASSWORD in .env") + sys.exit(1) + + with driver.session() as session: + if args.verify: + verify(session) + else: + create_constraints(session) + seed_production(session) + seed_workers(session) + seed_capacity(session) + seed_bottlenecks(session) + print("\nSeeding complete.") + verify(session) + + driver.close() + + +if __name__ == "__main__": + main() From 86a1ed47436cd8d3bfaeff0dc662e47e76b5523c Mon Sep 17 00:00:00 2001 From: ankitsinghh007 Date: Thu, 14 May 2026 01:22:15 +0530 Subject: [PATCH 2/2] update: add deployed dashboard url --- submissions/ankitsinghh007/level6/DASHBOARD_URL.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt b/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt index d9140dc5f..99b5aeb77 100644 --- a/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt +++ b/submissions/ankitsinghh007/level6/DASHBOARD_URL.txt @@ -1 +1 @@ -https://ankitsinghh007-factory-graph.streamlit.app +https://lpi-developer-kit-wn5ku7ouu7daxccmdewqwj.streamlit.app/ \ No newline at end of file