-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathingest.py
More file actions
31 lines (22 loc) · 773 Bytes
/
ingest.py
File metadata and controls
31 lines (22 loc) · 773 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import pandas as pd
from sentence_transformers import SentenceTransformer
import chromadb
# Load your dataset
df = pd.read_csv("chatbot_agri.csv", encoding='latin1')
chroma_client = chromadb.PersistentClient(path="db")
collection = chroma_client.get_or_create_collection(name="agri-qa")
# Load model for embeddings
model = SentenceTransformer("all-MiniLM-L6-v2")
# Generate embeddings and store in ChromaDB
for i, row in df.iterrows():
embedding = model.encode(row["Question"]).tolist()
collection.add(
documents=[row["Answer"]],
metadatas=[{"source": "chatbot_agri.csv"}],
ids=[str(i)],
embeddings=[embedding]
)
# Persist DB to disk
print("Data has been ingested into ChromaDB.")
# Show first few rows
print(df.head())