-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathknowledge.py
More file actions
167 lines (141 loc) · 6.13 KB
/
Copy pathknowledge.py
File metadata and controls
167 lines (141 loc) · 6.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
Knowledge store — loads LPI data into memory and provides keyword search.
Ported from LPI MCP Server knowledge-store.ts.
"""
import json
import os
from dataclasses import dataclass
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
@dataclass
class SearchResult:
id: str
title: str
content: str
tags: list[str]
source: str
score: float
tier: str
class KnowledgeStore:
def __init__(self):
self.entries: list[dict] = []
self.case_studies: list[dict] = []
self.smile: dict = {}
self._load()
def _load(self):
with open(os.path.join(DATA_DIR, "knowledge-base.json")) as f:
self.entries = json.load(f)
with open(os.path.join(DATA_DIR, "case-studies.json")) as f:
self.case_studies = json.load(f)
with open(os.path.join(DATA_DIR, "smile-framework.json")) as f:
self.smile = json.load(f)
def search(self, query: str, limit: int = 5, include_paid: bool = False) -> list[SearchResult]:
# Clean query: lowercase, strip punctuation for matching
import re
clean_query = re.sub(r'[^\w\s]', '', query.lower())
terms = clean_query.split()
if not terms:
return []
# Detect beginner intent — boost intro content only when no specific topic terms
beginner_signals = {"what", "how", "start", "begin", "getting", "started", "basics", "new", "explain", "introduction", "necessary"}
topic_signals = {"smile", "edge", "interoperability", "ontology", "ant", "mim", "lean", "mvt", "sustainability", "energy", "building", "buildings", "maritime", "methodology", "phase", "phases"}
has_beginner_words = len(beginner_signals.intersection(terms)) >= 1
has_topic_words = len(topic_signals.intersection(terms)) >= 1
is_beginner = has_beginner_words and not has_topic_words
scored: list[tuple[dict, float]] = []
for entry in self.entries:
if not include_paid and entry.get("tier") == "paid":
continue
if entry.get("visibility") != "public":
continue
score = self._score_entry(entry, terms)
# Boost beginner-friendly content only for truly introductory questions
if is_beginner and any(t in entry.get("tags", []) for t in ["beginner", "introduction", "getting-started"]):
score += 10.0
if score > 0:
scored.append((entry, score))
# Also search case studies (free tier only)
for cs in self.case_studies:
if not include_paid and cs.get("tier") == "paid":
continue
score = self._score_case_study(cs, terms)
if score > 0:
scored.append((cs, score))
scored.sort(key=lambda x: x[1], reverse=True)
results = []
for entry, score in scored[:limit]:
results.append(SearchResult(
id=entry.get("id", ""),
title=entry.get("title", ""),
content=entry.get("content") or entry.get("summary", ""),
tags=entry.get("tags", entry.get("smilePhases", [])),
source=entry.get("source", entry.get("industry", "")),
score=score,
tier=entry.get("tier", "free"),
))
return results
# Common words that shouldn't contribute to title/content matching
STOP_WORDS = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
"do", "does", "did", "have", "has", "had", "it", "its",
"to", "of", "in", "for", "on", "with", "at", "by", "from",
"and", "or", "but", "not", "no", "this", "that", "these",
"i", "me", "my", "we", "our", "you", "your", "they", "them",
"even", "also", "just", "very", "so", "too"}
def _score_entry(self, entry: dict, terms: list[str]) -> float:
score = 0.0
title = entry.get("title", "").lower()
content = entry.get("content", "").lower()
tags = " ".join(entry.get("tags", [])).lower()
for term in terms:
if term in self.STOP_WORDS:
continue
if term in title:
score += 3.0
if term in tags:
score += 2.0
if term in content:
score += 1.0
return score
def _score_case_study(self, cs: dict, terms: list[str]) -> float:
score = 0.0
title = cs.get("title", "").lower()
challenge = cs.get("challenge", "").lower()
approach = cs.get("approach", "").lower()
outcome = cs.get("outcome", "").lower()
industry = cs.get("industry", "").lower()
for term in terms:
if term in title:
score += 3.0
if term in industry:
score += 2.0
if term in challenge or term in approach or term in outcome:
score += 1.0
return score
def get_smile_overview(self) -> dict:
return {
"methodology": self.smile.get("methodology", {}),
"phases": [
{"name": p["name"], "order": p["order"], "description": p["description"], "duration": p["duration"]}
for p in self.smile.get("phases", [])
],
"perspectives": self.smile.get("perspectives", []),
"aiJourney": self.smile.get("aiJourney", []),
}
def get_phase_detail(self, phase_id: str) -> dict | None:
for phase in self.smile.get("phases", []):
if phase["id"] == phase_id:
return phase
return None
def has_paid_matches(self, query: str) -> bool:
"""Check if query would match paid-tier content (for CTA triggers)."""
terms = query.lower().split()
for entry in self.entries:
if entry.get("tier") == "paid":
if self._score_entry(entry, terms) > 0:
return True
for cs in self.case_studies:
if cs.get("tier") == "paid":
if self._score_case_study(cs, terms) > 0:
return True
return False
# Singleton
store = KnowledgeStore()