-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathapp.py
130 lines (97 loc) · 3.69 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
import textwrap
import pandas as pd
import time
from sentence_transformers import SentenceTransformer, util
from annoy import AnnoyIndex
footer = """
<p style='text-align: center; color: gray;'>Made with inspiration by Om A. Soni</p>
"""
shlok_keys = ['Title', 'Chapter', 'Verse', 'Hindi Anuvad' , 'Enlgish Translation']
max_line_length = 80 # Adjust as needed
@st.cache_resource
def load_data():
hn_filepath = 'Gita.xlsx'
return pd.read_excel(hn_filepath)
@st.cache_resource
def load_hn_model():
return SentenceTransformer('all-mpnet-base-v2')
hn_model = load_hn_model()
@st.cache_resource
def build_embeddings(hn_data):
return [hn_model.encode(hn_data['Enlgish Translation'][i], convert_to_tensor=True).numpy() for i in range(len(hn_data))]
@st.cache_resource
def build_annoy_index(shloka_embeddings):
embedding_size = len(shloka_embeddings[0])
annoy_index = AnnoyIndex(embedding_size, metric='angular')
for i, embedding in enumerate(shloka_embeddings):
annoy_index.add_item(i, embedding)
annoy_index.build(18) # 18 trees for faster search
return annoy_index
def wrap_text(text):
pass
# st.write("shree ganeshay namah")
hn_data = load_data()
shloka_embeddings = build_embeddings(hn_data)
annoy_index = build_annoy_index(shloka_embeddings)
st.title("TopShlok Bhagavad Gita Assistant")
st.markdown(footer, unsafe_allow_html=True)
st.markdown(
"""
<style>
.reportview-container {
width: 90%;
}
</style>
""",
unsafe_allow_html=True
)
st.markdown(
"""
<style>
.streamlit-text-container {
white-space: pre-line;
}
</style>
""",
unsafe_allow_html=True
)
query = st.text_input("Ask any question related to the Bhagavad Gita: ")
if st.button('Ask'):
query_embedding = hn_model.encode(query, convert_to_tensor=True).numpy()
# Use Annoy Index for efficient similarity search
similar_indices = annoy_index.get_nns_by_vector(query_embedding, 18)
# Process and display similar Shlokas
similarities = []
for curr_index in similar_indices:
similarity = util.cos_sim(query_embedding, shloka_embeddings[curr_index])
curr_shlok_details = {key: hn_data[key][curr_index] for key in hn_data}
similarities.append({"shlok_details": curr_shlok_details, "similarity": similarity})
# Get the most similar Shloka
top_result = sorted(similarities, key=lambda x: x["similarity"], reverse=True)[0]
top_shlok_details = top_result["shlok_details"]
adhyay_number = top_shlok_details['Chapter'].split(" ")[1]
shlok_number = top_shlok_details['Verse'].split(" ")[1].split(".")[1]
st.write("------------------------------")
st.write(f"{top_shlok_details['Chapter']} , Shloka : {shlok_number}")
wrapped_text = textwrap.fill(top_shlok_details['Enlgish Translation'], width=max_line_length)
wrapped_hindi_text=textwrap.fill(top_shlok_details['Hindi Anuvad'], width=max_line_length)
placeholder = st.empty()
prev_text=''
for char in wrapped_text:
prev_text=prev_text+char
placeholder.text(prev_text)
time.sleep(0.01) # Adjust the sleep duration as needed
st.write("\n------------------------------")
hindi_placeholder = st.empty()
hindi_text=''
for char in wrapped_hindi_text :
hindi_text=hindi_text+char
hindi_placeholder.text(hindi_text)
time.sleep(0.005) # Adjust the sleep duration as needed
st.write("\n------------------------------")
# Prompt for continuation
# next_input = input("Type 'jsk' to stop or press Enter to continue: ")
# if next_input.lower() == "jsk":
# st.write("|| Jai Shree Krishna ||") # English farewell
# break