-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvectorize.py
More file actions
24 lines (22 loc) · 827 Bytes
/
vectorize.py
File metadata and controls
24 lines (22 loc) · 827 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from langchain.vectorstores.faiss import FAISS
from langchain.document_loaders.unstructured import UnstructuredFileLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
import dotenv
import pickle
if __name__ == "__main__":
dotenv.load_dotenv()
loader = UnstructuredFileLoader("datasets/Q&A.txt")
splitter = RecursiveCharacterTextSplitter(
separators='\n\n',
chunk_size=20,
chunk_overlap=0,
length_function=len,
)
documents = loader.load_and_split(splitter)
for doc in documents:
print(doc.page_content.strip())
print('-' * 80)
vectorstore = FAISS.from_documents(documents, OpenAIEmbeddings())
with open("datasets/vectorstore.pkl", "wb") as f:
pickle.dump(vectorstore, f)