$CONTENT" > "$HTML_FILEPATH"
echo "Сохранено: $OUTPUT_PATH/$TITLE.html"
CHILD_IDS=$(echo "$RESPONSE" | jq -r '.children.page.results[]?.id' 2>/dev/null)
diff --git a/rag/rag.py b/rag/rag.py
index addf8a7..d7c7898 100644
--- a/rag/rag.py
+++ b/rag/rag.py
@@ -1,31 +1,34 @@
-import argparse
import os
-import hashlib
import requests
+import json
+import time
from sentence_transformers import SentenceTransformer
-class LocalRAGSystem:
+class RagSystem:
def __init__(self,
md_folder: str = "input_md",
ollama_url: str = "http://localhost:11434",
qdrant_host: str = "localhost",
qdrant_port: int = 6333,
embed_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
- chat_model: str = "qwen2.5:3b"):
+ chat_model: str = "phi4-mini:3.8b"):
self.md_folder = md_folder
self.ollama_url = ollama_url
self.qdrant_host = qdrant_host
self.qdrant_port = qdrant_port
- self.embed_model = embed_model
self.chat_model = chat_model
self.emb_model = SentenceTransformer(embed_model)
self.prompt = ""
+ self.conversation_history = []
+ self.load_chat_model()
- def get_embedding(self, text: str):
- return self.emb_model.encode(text, show_progress_bar=False).tolist()
+ def load_chat_model(self):
+ url = f"{self.ollama_url}/api/generate"
+ body = {"model": self.chat_model}
+ requests.post(url, json=body, timeout=600)
def search_qdrant(self, query: str, top_k: int = 6):
- query_vec = self.get_embedding(query)
+ query_vec = self.emb_model.encode(query, show_progress_bar=False).tolist()
url = f"http://{self.qdrant_host}:{self.qdrant_port}/collections/rag_collection/points/search"
payload = {
"vector": query_vec,
@@ -38,69 +41,156 @@ class LocalRAGSystem:
results = resp.json().get("result", [])
return results
- def generate_answer(self, prompt: str):
- url = f"{self.ollama_url}/api/generate"
- body = {
- "model": self.chat_model,
- "prompt": prompt,
- "stream": False
- }
- resp = requests.post(url, json=body, timeout=600)
- if resp.status_code != 200:
- return f"Ошибка генерации ответа: {resp.status_code} {resp.text}"
- return resp.json().get("response", "").strip()
-
def prepare_sources(self, context_docs: list):
sources = ""
for idx, doc in enumerate(context_docs, start=1):
text = doc['payload'].get("text", "").strip()
- sources = f"{sources}\n--- Source [{idx}] ---\n{text}\n"
- return sources.strip()
+ sources = f"{sources}\n\n{text}\n\n"
+ return sources
def prepare_prompt(self, query: str, context_docs: list):
sources = self.prepare_sources(context_docs)
if os.path.exists('sys_prompt.txt'):
with open('sys_prompt.txt', 'r') as fp:
- return fp.read().replace("{{sources}}", sources).replace("{{query}}", query)
+ prompt_template = fp.read()
+ return prompt_template.replace("{{sources}}", sources).replace("{{query}}", query)
else:
- return f"""
- Please provide an answer based solely on the provided sources.
- It is prohibited to generate an answer based on your pretrained data.
- If uncertain, ask the user for clarification.
- Respond in the same language as the user's query.
- If there are no sources in context, clearly state that.
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
- When referencing information from a source, cite the appropriate source(s) using their corresponding numbers.
- Every answer should include at least one source citation.
- Only cite a source when you are explicitly referencing it.
+ return f"""### Your role
+You are a helpful assistant that can answer questions based on the provided sources.
- If none of the sources are helpful, you should indicate that.
- For example:
+### Your user
+User is a human who is asking a question related to the provided sources.
- --- Source 1 ---
- The sky is red in the evening and blue in the morning.
+### Your task
+Please provide an answer based solely on the provided sources and the conversation history.
- --- Source 2 ---
- Water is wet when the sky is red.
+### Rules
+- You **MUST** respond in the SAME language as the user's query.
+- If uncertain, you **MUST** the user for clarification.
+- If there are no sources in context, you **MUST** clearly state that.
+- If none of the sources are helpful, you **MUST** clearly state that.
+- If you are unsure about the answer, you **MUST** clearly state that.
+- If the context is unreadable or of poor quality, you **MUST** inform the user and provide the best possible answer.
+- When referencing information from a source, you **MUST** cite the appropriate source(s) using their corresponding numbers.
+- **Only include inline citations using [id] (e.g., [1], [2]) when the tag includes an id attribute.**
+- You NEVER MUST NOT add or any XML/HTML tags in your response.
+- You NEVER MUST NOT cite if the tag does not contain an id attribute.
+- Every answer MAY include at least one source citation.
+- Only cite a source when you are explicitly referencing it.
+- You may also cite multiple sources if they are all relevant to the question.
+- Ensure citations are concise and directly related to the information provided.
+- You CAN format your responses using Markdown.
- Query: When is water wet?
- Answer: Water will be wet when the sky is red [2], which occurs in the evening [1].
+### Example of sources list:
- Now it's your turn. Below are several numbered sources of information:
- {context}
+```
+The sky is red in the evening and blue in the morning.
+Water is wet when the sky is red.
+When is water wet?
+```
+Response:
+```
+Water will be wet when the sky is red [2], which occurs in the evening [1].
+```
- User query: {query}
- Your answer:
- """
+### Now let's start!
+
+```
+{sources}
+{query}
+```
+
+Respond."""
+
+ def generate_answer(self, prompt: str):
+ url = f"{self.ollama_url}/api/generate"
+ body = {
+ "model": self.chat_model,
+ "prompt": prompt,
+ "messages": self.conversation_history,
+ "stream": False,
+ # "options": {
+ # "temperature": 0.4,
+ # "top_p": 0.1,
+ # },
+ }
+ self.response = requests.post(url, json=body, timeout=600)
+ if self.response.status_code != 200:
+ return f"Ошибка генерации ответа: {self.response.status_code} {self.response.text}"
+ return self.response.json().get("response", "").strip()
+
+ def generate_answer_stream(self, prompt: str):
+ url = f"{self.ollama_url}/api/generate"
+ body = {
+ "model": self.chat_model,
+ "prompt": prompt,
+ "messages": self.conversation_history,
+ "stream": True
+ }
+ resp = requests.post(url, json=body, stream=True, timeout=600)
+ if resp.status_code != 200:
+ raise RuntimeError(f"Ошибка генерации ответа: {resp.status_code} {resp.text}")
+ full_answer = ""
+ for chunk in resp.iter_lines():
+ if chunk:
+ try:
+ decoded_chunk = chunk.decode('utf-8')
+ data = json.loads(decoded_chunk)
+ if "response" in data:
+ yield data["response"]
+ full_answer += data["response"]
+ elif "error" in data:
+ print(f"Stream error: {data['error']}")
+ break
+ except json.JSONDecodeError:
+ print(f"Could not decode JSON from chunk: {chunk.decode('utf-8')}")
+ except Exception as e:
+ print(f"Error processing chunk: {e}")
+
+ def get_prompt_eval_count(self):
+ return self.response.json().get("prompt_eval_count", 0)
+
+ def get_prompt_eval_duration(self):
+ return self.response.json().get("prompt_eval_duration", 0) / (10 ** 9)
+
+ def get_eval_count(self):
+ return self.response.json().get("eval_count", 0)
+
+ def get_eval_duration(self):
+ return self.response.json().get("eval_duration", 0) / (10 ** 9)
+
+ def get_total_duration(self):
+ return self.response.json().get("total_duration", 0) / (10 ** 9)
+
+ def get_tps(self):
+ eval_count = self.get_eval_count()
+ eval_duration = self.get_eval_duration()
+ if eval_count == 0 or eval_duration == 0:
+ return 0
+ return eval_count / eval_duration
def print_sources(context_docs: list):
+ print("\n\nИсточники:")
for idx, doc in enumerate(context_docs, start=1):
filename = doc['payload'].get("filename", None)
url = doc['payload'].get("url", None)
title = filename
if url is None:
url = "(нет веб-ссылки)"
- print(f"{idx}. {title}\n {url}")
+ print(f"{idx}. {title}\n {url}\n")
+
+def print_v(text: str, is_verbose: bool):
+ if is_verbose:
+ print(text)
+
+def print_stats(rag: RagSystem):
+ print("Статистика:")
+ print(f"* Time: {rag.get_total_duration()}s")
+ print(f"* TPS: {rag.get_tps()}")
+ print(f"* PEC: {rag.get_prompt_eval_count()}")
+ print(f"* PED: {rag.get_prompt_eval_duration()}s")
+ print(f"* EC: {rag.get_eval_count()}")
+ print(f"* ED: {rag.get_eval_duration()}s\n")
def main():
import sys
@@ -114,41 +204,43 @@ def main():
parser.add_argument("--qdrant-port", type=int, default=6333, help="Qdrant port")
parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama API URL")
parser.add_argument("--emb-model", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", help="Модель эмбеддинга")
- parser.add_argument("--chat-model", default="qwen2.5:3b", help="Модель генерации Ollama")
+ parser.add_argument("--chat-model", default="phi4-mini:3.8b", help="Модель генерации Ollama")
parser.add_argument("--topk", type=int, default=6, help="Количество документов для поиска")
+ parser.add_argument("--verbose", default=False, action=argparse.BooleanOptionalAction, help="Выводить промежуточные служебные сообщения")
+ parser.add_argument("--show-stats", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе (не работает с --stream)")
+ parser.add_argument("--stream", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе")
args = parser.parse_args()
if not args.query and not args.interactive:
print("Ошибка: укажите запрос (--query) и/или используйте интерактивный режим (--interactive)")
sys.exit(1)
- print(f"Адрес ollama: {args.ollama_url}")
- print(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}")
- print(f"Модель эмбеддинга: {args.emb_model}")
- print(f"Модель чата: {args.chat_model}")
- print(f"Документов для поиска: {args.topk}")
+ print_v(f"Адрес ollama: {args.ollama_url}", args.verbose)
+ print_v(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}", args.verbose)
+ print_v(f"Модель эмбеддинга: {args.emb_model}", args.verbose)
+ print_v(f"Модель чата: {args.chat_model}", args.verbose)
+ print_v(f"Документов для поиска: {args.topk}", args.verbose)
if os.path.exists('sys_prompt.txt'):
- print("Будет использоваться sys_prompt.txt!")
+ print_v("Будет использоваться sys_prompt.txt!", args.verbose)
- print("\nПервая инициализация моделей...")
- rag = LocalRAGSystem(
+ print_v("\nПервая инициализация моделей...", args.verbose)
+ rag = RagSystem(
ollama_url=args.ollama_url,
qdrant_host=args.qdrant_host,
qdrant_port=args.qdrant_port,
embed_model=args.emb_model,
chat_model=args.chat_model
)
- print(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG")
+ print_v(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG", args.verbose)
+ query = None
if args.interactive:
- print("\nИНТЕРАКТИВНЫЙ РЕЖИМ")
- print("Можете вводить запрос (или 'exit' для выхода)\n")
+ print_v("\nИНТЕРАКТИВНЫЙ РЕЖИМ", args.verbose)
+ print_v("Можете вводить запрос (или 'exit' для выхода)\n", args.verbose)
if args.query:
query = args.query.strip()
print(f">>> {query}")
- else:
- query = input(">>> ").strip()
while True:
try:
@@ -158,34 +250,103 @@ def main():
if not query or query == "":
continue
- if query.lower() == "exit":
- print("\n*** Завершение работы")
+ if query.lower() == "help":
+ print("<<< Команды итерактивного режима:")
+ print("save -- сохранить диалог в файл")
+ print("exit -- выход\n")
+ query = None
+ continue
+
+ if query.strip().lower() == "save":
+ import datetime
+ timestamp = int(time.time())
+ dt = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ')
+ filename = f"chats/chat-{timestamp}.md"
+
+ markdown_content = f"# История диалога от {dt}\n\n"
+ markdown_content += f"## Параметры диалога\n"
+ markdown_content += f"```\nargs = {args}\n```\n"
+ markdown_content += f"```\nemb_model = {rag.emb_model}\n```\n"
+
+ for entry in rag.conversation_history:
+ if entry['role'] == 'user':
+ markdown_content += f"## Пользователь\n\n"
+ elif entry['role'] == 'assistant':
+ markdown_content += f"## Модель\n\n"
+ docs = rag.prepare_sources(entry['docs']).replace("```", "")
+ markdown_content += f"```\n{docs}\n```\n\n"
+ markdown_content += f"{entry['content']}\n\n"
+
+ os.makedirs('chats', exist_ok=True)
+ with open(filename, 'w') as fp:
+ fp.write(markdown_content)
+
+ print(f"<<< Диалог сохранён в файл: {filename}\n")
+ query = None
+ continue
+
+ if query.strip().lower() == "exit":
+ print_v("\n*** Завершение работы", args.verbose)
break
- print("\nПоиск релевантных документов...")
+ print_v("\nПоиск релевантных документов...", args.verbose)
context_docs = rag.search_qdrant(query, top_k=args.topk)
if not context_docs:
- print("Релевантные документы не найдены.")
+ print_v("Релевантные документы не найдены.", args.verbose)
if args.interactive:
+ query = None
continue
else:
break
- print(f"Найдено {len(context_docs)} релевантных документов:")
- print_sources(context_docs)
+ print_v(f"Найдено {len(context_docs)} релевантных документов", args.verbose)
+ # print_sources(context_docs)
prompt = rag.prepare_prompt(query=query, context_docs=context_docs)
if args.show_prompt:
- print("\nПолный системный промпт: --------------------------\n")
- print(f"{prompt}\n---------------------------------------------------\n")
+ print("\nПолный системный промпт: --------------------------")
+ print(f"{prompt}\n---------------------------------------------------")
+
+ print_v("\nГенерация ответа...\n", args.verbose)
+
+ if args.stream:
+ answer = "\n<<< "
+ print(answer, end='', flush=True)
+ try:
+ for message_part in rag.generate_answer_stream(prompt):
+ answer += message_part
+ print(message_part, end='', flush=True)
+ except RuntimeError as e:
+ answer = str(e)
+ print(f"\n{answer}\n===================================================\n")
+ else:
+ answer = rag.generate_answer(prompt)
+ print(f"<<< {answer}\n")
+
+ print_sources(context_docs)
+ if args.show_stats and not args.stream:
+ print_stats(rag)
+
+ rag.conversation_history.append({
+ "role": "user",
+ "content": query,
+ })
+
+ rag.conversation_history.append({
+ "role": "assistant",
+ "docs": context_docs,
+ "content": answer,
+ })
+
+ if args.interactive:
+ query = None
+ else:
+ break
- print("Генерация ответа...")
- answer = rag.generate_answer(prompt)
- print(f"\n<<< {answer}\n===================================================\n")
- query = None
except KeyboardInterrupt:
print("\n*** Завершение работы")
break
+
except Exception as e:
print(f"Ошибка: {e}")
break