Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,6 @@ pub fn build_phrase_index(repo_path: &str, out_dir: &Path, verbose: bool) -> Res
} else {
let db = Connection::open(&db_path).map_err(|e| format!("db: {}", e))?;
schema::open_existing_db(&db).map_err(|e| format!("pragmas: {}", e))?;
schema::create_new_db(&db).ok(); // create tables if they don't exist
(db, false)
};

Expand Down Expand Up @@ -757,6 +756,8 @@ fn collect_source_files(repo: &Path) -> Vec<String> {
]
.into();

const MAX_FILES: usize = 500_000;

for entry in WalkDir::new(repo)
.follow_links(false)
.into_iter()
Expand All @@ -775,6 +776,9 @@ fn collect_source_files(repo: &Path) -> Vec<String> {
if !entry.file_type().is_file() {
continue;
}
if files.len() >= MAX_FILES {
break;
}
let fname = entry.file_name().to_string_lossy();
if lock_suffixes.contains(fname.as_ref()) {
continue;
Expand All @@ -798,6 +802,7 @@ fn collect_source_files(repo: &Path) -> Vec<String> {
}
}
files.sort();
files.truncate(MAX_FILES);
files
}

Expand Down
15 changes: 13 additions & 2 deletions src/index/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use rusqlite::Connection;

pub(crate) const SCHEMA_VERSION: i32 = 1;

pub(crate) fn create_new_db(db: &Connection) -> rusqlite::Result<()> {
db.execute_batch(
"PRAGMA synchronous = OFF;
Expand All @@ -9,7 +11,8 @@ pub(crate) fn create_new_db(db: &Connection) -> rusqlite::Result<()> {
PRAGMA temp_store = MEMORY;
PRAGMA lock_timeout = 5000;",
)?;
create_tables(db)
create_tables(db)?;
db.execute_batch(&format!("PRAGMA user_version = {}", SCHEMA_VERSION))
}

pub(crate) fn open_existing_db(db: &Connection) -> rusqlite::Result<()> {
Expand All @@ -20,7 +23,15 @@ pub(crate) fn open_existing_db(db: &Connection) -> rusqlite::Result<()> {
PRAGMA mmap_size = 268435456;
PRAGMA temp_store = MEMORY;
PRAGMA lock_timeout = 5000;",
)
)?;
let version: i32 = db.query_row("PRAGMA user_version", [], |r| r.get(0))?;
if version != SCHEMA_VERSION {
return Err(rusqlite::Error::InvalidColumnName(format!(
"Schema version mismatch: DB has {}, stria expects {}. Run `stria build` to upgrade.",
version, SCHEMA_VERSION
)));
}
Ok(())
}

fn create_tables(db: &Connection) -> rusqlite::Result<()> {
Expand Down
47 changes: 35 additions & 12 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ fn main() {
.unwrap_or_else(|_| repo_path.to_path_buf());
let out_dir = cache_dir
.as_ref()
.map(|c| Path::new(c).to_path_buf())
.map(|c| std::fs::canonicalize(c).unwrap_or_else(|_| Path::new(c).to_path_buf()))
.unwrap_or_else(|| canonical.join(".stria"));
if !out_dir.join("phrases.sqlite").exists() {
match index::build_phrase_index(
Expand Down Expand Up @@ -123,7 +123,7 @@ fn main() {
.unwrap_or_else(|_| repo_path.to_path_buf());
let out_dir = cache_dir
.as_ref()
.map(|c| Path::new(c).to_path_buf())
.map(|c| std::fs::canonicalize(c).unwrap_or_else(|_| Path::new(c).to_path_buf()))
.unwrap_or_else(|| canonical.join(".stria"));
if !out_dir.join("phrases.sqlite").exists() {
match index::build_phrase_index(
Expand Down Expand Up @@ -155,16 +155,19 @@ fn main() {
fn mcp_server(initial_repo: String, initial_out: &std::path::Path) {
use serde_json::{json, Value};
use std::io::{self, BufRead, Write};
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::Mutex;

struct ServerState {
repo: String,
cache_dir: std::path::PathBuf,
build_in_progress: AtomicBool,
}

let state = Mutex::new(ServerState {
repo: initial_repo,
cache_dir: initial_out.to_path_buf(),
build_in_progress: AtomicBool::new(false),
});

let db_path_of = |cache: &std::path::Path| -> String {
Expand Down Expand Up @@ -578,17 +581,37 @@ fn mcp_server(initial_repo: String, initial_out: &std::path::Path) {
} else {
let out_dir = std::path::Path::new(&canonical).join(".stria");
if !out_dir.join("phrases.sqlite").exists() {
match index::build_phrase_index(&canonical, &out_dir, false) {
Ok(n) => {
let mut st = state.lock().unwrap();
st.repo = canonical;
st.cache_dir = out_dir;
drop(st);
json!({"status": "ok", "phrases": n})
}
Err(e) => {
json!({"error": format!("Index build failed: {}", e)})
// Check if a build is already in progress
let already_building = {
let st = state.lock().unwrap();
st.build_in_progress.load(Ordering::Relaxed)
};
if already_building {
json!({"error": "Index build already in progress for another repo"})
} else {
{
let st = state.lock().unwrap();
st.build_in_progress.store(true, Ordering::Relaxed);
}
let result = match index::build_phrase_index(
&canonical, &out_dir, false,
) {
Ok(n) => {
let mut st = state.lock().unwrap();
st.repo = canonical;
st.cache_dir = out_dir;
st.build_in_progress.store(false, Ordering::Relaxed);
drop(st);
json!({"status": "ok", "phrases": n})
}
Err(e) => {
let st = state.lock().unwrap();
st.build_in_progress.store(false, Ordering::Relaxed);
drop(st);
json!({"error": format!("Index build failed: {}", e)})
}
};
result
}
} else {
let mut st = state.lock().unwrap();
Expand Down
153 changes: 84 additions & 69 deletions src/search/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,39 +309,44 @@ fn _search_phrases(db_path: &str, query: &str, top_n: usize) -> Vec<(String, f64
Ok(q) => q,
Err(_) => continue,
};
let rows_result = exact_q
.query_map([st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let line_blob = r.get::<_, Vec<u8>>(2)?;
let doc_len = r.get::<_, f64>(3)?;
let uniq_def = r.get::<_, i64>(4)?;
let total_def = r.get::<_, i64>(5)?;
let comment_ratio = r.get::<_, f64>(6)?;
let overflow_count = r.get::<_, u32>(7)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
let is_def = schema::unpack_is_def(f);
let zone_int = schema::unpack_zone_int(f);
let base_count = schema::unpack_count(f);
let tf = if base_count >= 31 {
overflow_count as f64
} else {
base_count as f64
};
let first_line = schema::unpack_line_nos(&line_blob) as i32;
Ok((
fid,
tf,
is_def,
zone_int,
doc_len,
uniq_def,
total_def,
comment_ratio,
first_line,
))
})
.unwrap();
let rows_result = exact_q.query_map([st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let line_blob = r.get::<_, Vec<u8>>(2)?;
let doc_len = r.get::<_, f64>(3)?;
let uniq_def = r.get::<_, i64>(4)?;
let total_def = r.get::<_, i64>(5)?;
let comment_ratio = r.get::<_, f64>(6)?;
let overflow_count = r.get::<_, u32>(7)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
let is_def = schema::unpack_is_def(f);
let zone_int = schema::unpack_zone_int(f);
let base_count = schema::unpack_count(f);
let tf = if base_count >= 31 {
overflow_count as f64
} else {
base_count as f64
};
let first_line = schema::unpack_line_nos(&line_blob) as i32;
Ok((
fid,
tf,
is_def,
zone_int,
doc_len,
uniq_def,
total_def,
comment_ratio,
first_line,
))
});
let rows_result = match rows_result {
Ok(rows) => rows,
Err(e) => {
eprintln!("eh:warn: search: exact query_map failed: {}", e);
continue;
}
};
for row in rows_result.filter_map(|r| r.ok()) {
let (
fid,
Expand Down Expand Up @@ -406,26 +411,31 @@ fn _search_phrases(db_path: &str, query: &str, top_n: usize) -> Vec<(String, f64
Err(_) => continue,
};
let mut max_per_file: HashMap<i64, f64> = HashMap::new();
let prefix_rows = prefix_q
.query_map(params![&pattern, st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let doc_len = r.get::<_, f64>(2)?;
let uniq_def = r.get::<_, i64>(3)?;
let total_def = r.get::<_, i64>(4)?;
let comment_ratio = r.get::<_, f64>(5)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
Ok((
fid,
schema::unpack_is_def(f),
schema::unpack_zone_int(f),
doc_len,
uniq_def,
total_def,
comment_ratio,
))
})
.unwrap();
let prefix_result = prefix_q.query_map(params![&pattern, st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let doc_len = r.get::<_, f64>(2)?;
let uniq_def = r.get::<_, i64>(3)?;
let total_def = r.get::<_, i64>(4)?;
let comment_ratio = r.get::<_, f64>(5)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
Ok((
fid,
schema::unpack_is_def(f),
schema::unpack_zone_int(f),
doc_len,
uniq_def,
total_def,
comment_ratio,
))
});
let prefix_rows = match prefix_result {
Ok(rows) => rows,
Err(e) => {
eprintln!("eh:warn: search: prefix query_map failed: {}", e);
continue;
}
};
for row in prefix_rows.filter_map(|r| r.ok()) {
let (fid, is_def, zone_int, doc_len, uniq_def, total_def, comment_ratio) = row;
let tf = 1.0;
Expand Down Expand Up @@ -479,22 +489,27 @@ fn _search_phrases(db_path: &str, query: &str, top_n: usize) -> Vec<(String, f64
Err(_) => continue,
};
let mut max_per_file: HashMap<i64, f64> = HashMap::new();
let sub_rows = sub_q
.query_map(params![&pattern, &excl_prefix, st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let doc_len = r.get::<_, f64>(2)?;
let comment_ratio = r.get::<_, f64>(3)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
Ok((
fid,
schema::unpack_is_def(f),
schema::unpack_zone_int(f),
doc_len,
comment_ratio,
))
})
.unwrap();
let sub_result = sub_q.query_map(params![&pattern, &excl_prefix, st], |r| {
let fid = r.get::<_, i64>(0)?;
let flags = r.get::<_, Vec<u8>>(1)?;
let doc_len = r.get::<_, f64>(2)?;
let comment_ratio = r.get::<_, f64>(3)?;
let f = if !flags.is_empty() { flags[0] } else { 0 };
Ok((
fid,
schema::unpack_is_def(f),
schema::unpack_zone_int(f),
doc_len,
comment_ratio,
))
});
let sub_rows = match sub_result {
Ok(rows) => rows,
Err(e) => {
eprintln!("eh:warn: search: substring query_map failed: {}", e);
continue;
}
};
for row in sub_rows.filter_map(|r| r.ok()) {
let (fid, is_def, zone_int, doc_len, comment_ratio) = row;
let tf = 1.0;
Expand Down
Loading
Loading