From 3f2491db277b49ac8f1611bda1cae8fe34a1ee67 Mon Sep 17 00:00:00 2001 From: AnthonyAxenov Date: Tue, 26 Aug 2025 01:10:18 +0800 Subject: [PATCH] WIP --- .gitignore | 1 + .idea/.gitignore | 3 + .../inspectionProfiles/profiles_settings.xml | 6 + .idea/misc.xml | 7 + .idea/modules.xml | 8 + .idea/ollama.iml | 17 + .idea/vcs.xml | 6 + rag/README.md | 5 +- rag/TODO.md | 1 + rag/chats/.gitkeep | 0 rag/download.sh | 2 +- rag/rag.py | 307 +++++++++++++----- 12 files changed, 287 insertions(+), 76 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/inspectionProfiles/profiles_settings.xml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/ollama.iml create mode 100644 .idea/vcs.xml create mode 100644 rag/chats/.gitkeep diff --git a/.gitignore b/.gitignore index 24fe2f7..96b1cb8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /rag/input_html/* /rag/input_md/* /rag/sys_prompt.txt +/rag/chats/*.md .old/ .venv/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..0cb5d9c --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..556232f --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/ollama.iml b/.idea/ollama.iml new file mode 100644 index 0000000..9084195 --- /dev/null +++ b/.idea/ollama.iml @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..35eb1dd --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/rag/README.md b/rag/README.md index 4a9156d..03d78a2 100644 --- a/rag/README.md +++ b/rag/README.md @@ -38,8 +38,9 @@ RAG (Retrieval-Augmented Generation) — это архитектура, кото rag/ ├── input_html/ # Входные файлы HTML, загруженные из Confluence ├── input_md/ # Входные (конвертированные) файлы Markdown -├── download.sh # Скрипт для загрузки страниц из Confluence -├── convert.py # Скрипт конвертации HTML в Markdown +├── chats/ # Директория для сохранения чатов +├── download.sh # Скрипт для загрузки страниц из Confluence +├── convert.py # Скрипт конвертации HTML в Markdown ├── vectorize.py # Скрипт векторизации Markdown ├── rag.py # Основной скрипт RAG системы ├── clear.sh # Скрипт очистки html/md файлов diff --git a/rag/TODO.md b/rag/TODO.md index 7603f05..48c2071 100644 --- a/rag/TODO.md +++ b/rag/TODO.md @@ -3,6 +3,7 @@ * [ ] Описать подготовку знаний в Open WebUI * [ ] Обработка pdf, json, ... * [ ] Ранжировние результатов +* [ ] Конвертирование таблиц в списки * [ ] Режим диалога (запоминание запросов и ответов) * [ ] API * [ ] Telegram-бот diff --git a/rag/chats/.gitkeep b/rag/chats/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/rag/download.sh b/rag/download.sh index d1036df..5d0fc3a 100755 --- a/rag/download.sh +++ b/rag/download.sh @@ -55,7 +55,7 @@ for PAGE_ID in "$@"; do CONTENT=${CONTENT//src=\"\//src=\"$CONF_URL} URL="$CONF_URL/pages/viewpage.action?pageId=$PAGE_ID" - echo -e "@@$URL@@\n
Исходная страница: $URL

$TITLE

$CONTENT" > "$HTML_FILEPATH" + echo -e "@@$URL@@\n
Исходная страница: $URL

$TITLE

$CONTENT" > "$HTML_FILEPATH" echo "Сохранено: $OUTPUT_PATH/$TITLE.html" CHILD_IDS=$(echo "$RESPONSE" | jq -r '.children.page.results[]?.id' 2>/dev/null) diff --git a/rag/rag.py b/rag/rag.py index addf8a7..d7c7898 100644 --- a/rag/rag.py +++ b/rag/rag.py @@ -1,31 +1,34 @@ -import argparse import os -import hashlib import requests +import json +import time from sentence_transformers import SentenceTransformer -class LocalRAGSystem: +class RagSystem: def __init__(self, md_folder: str = "input_md", ollama_url: str = "http://localhost:11434", qdrant_host: str = "localhost", qdrant_port: int = 6333, embed_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", - chat_model: str = "qwen2.5:3b"): + chat_model: str = "phi4-mini:3.8b"): self.md_folder = md_folder self.ollama_url = ollama_url self.qdrant_host = qdrant_host self.qdrant_port = qdrant_port - self.embed_model = embed_model self.chat_model = chat_model self.emb_model = SentenceTransformer(embed_model) self.prompt = "" + self.conversation_history = [] + self.load_chat_model() - def get_embedding(self, text: str): - return self.emb_model.encode(text, show_progress_bar=False).tolist() + def load_chat_model(self): + url = f"{self.ollama_url}/api/generate" + body = {"model": self.chat_model} + requests.post(url, json=body, timeout=600) def search_qdrant(self, query: str, top_k: int = 6): - query_vec = self.get_embedding(query) + query_vec = self.emb_model.encode(query, show_progress_bar=False).tolist() url = f"http://{self.qdrant_host}:{self.qdrant_port}/collections/rag_collection/points/search" payload = { "vector": query_vec, @@ -38,69 +41,156 @@ class LocalRAGSystem: results = resp.json().get("result", []) return results - def generate_answer(self, prompt: str): - url = f"{self.ollama_url}/api/generate" - body = { - "model": self.chat_model, - "prompt": prompt, - "stream": False - } - resp = requests.post(url, json=body, timeout=600) - if resp.status_code != 200: - return f"Ошибка генерации ответа: {resp.status_code} {resp.text}" - return resp.json().get("response", "").strip() - def prepare_sources(self, context_docs: list): sources = "" for idx, doc in enumerate(context_docs, start=1): text = doc['payload'].get("text", "").strip() - sources = f"{sources}\n--- Source [{idx}] ---\n{text}\n" - return sources.strip() + sources = f"{sources}\n\n{text}\n\n" + return sources def prepare_prompt(self, query: str, context_docs: list): sources = self.prepare_sources(context_docs) if os.path.exists('sys_prompt.txt'): with open('sys_prompt.txt', 'r') as fp: - return fp.read().replace("{{sources}}", sources).replace("{{query}}", query) + prompt_template = fp.read() + return prompt_template.replace("{{sources}}", sources).replace("{{query}}", query) else: - return f""" - Please provide an answer based solely on the provided sources. - It is prohibited to generate an answer based on your pretrained data. - If uncertain, ask the user for clarification. - Respond in the same language as the user's query. - If there are no sources in context, clearly state that. - If the context is unreadable or of poor quality, inform the user and provide the best possible answer. - When referencing information from a source, cite the appropriate source(s) using their corresponding numbers. - Every answer should include at least one source citation. - Only cite a source when you are explicitly referencing it. + return f"""### Your role +You are a helpful assistant that can answer questions based on the provided sources. - If none of the sources are helpful, you should indicate that. - For example: +### Your user +User is a human who is asking a question related to the provided sources. - --- Source 1 --- - The sky is red in the evening and blue in the morning. +### Your task +Please provide an answer based solely on the provided sources and the conversation history. - --- Source 2 --- - Water is wet when the sky is red. +### Rules +- You **MUST** respond in the SAME language as the user's query. +- If uncertain, you **MUST** the user for clarification. +- If there are no sources in context, you **MUST** clearly state that. +- If none of the sources are helpful, you **MUST** clearly state that. +- If you are unsure about the answer, you **MUST** clearly state that. +- If the context is unreadable or of poor quality, you **MUST** inform the user and provide the best possible answer. +- When referencing information from a source, you **MUST** cite the appropriate source(s) using their corresponding numbers. +- **Only include inline citations using [id] (e.g., [1], [2]) when the tag includes an id attribute.** +- You NEVER MUST NOT add or any XML/HTML tags in your response. +- You NEVER MUST NOT cite if the tag does not contain an id attribute. +- Every answer MAY include at least one source citation. +- Only cite a source when you are explicitly referencing it. +- You may also cite multiple sources if they are all relevant to the question. +- Ensure citations are concise and directly related to the information provided. +- You CAN format your responses using Markdown. - Query: When is water wet? - Answer: Water will be wet when the sky is red [2], which occurs in the evening [1]. +### Example of sources list: - Now it's your turn. Below are several numbered sources of information: - {context} +``` +The sky is red in the evening and blue in the morning. +Water is wet when the sky is red. +When is water wet? +``` +Response: +``` +Water will be wet when the sky is red [2], which occurs in the evening [1]. +``` - User query: {query} - Your answer: - """ +### Now let's start! + +``` +{sources} +{query} +``` + +Respond.""" + + def generate_answer(self, prompt: str): + url = f"{self.ollama_url}/api/generate" + body = { + "model": self.chat_model, + "prompt": prompt, + "messages": self.conversation_history, + "stream": False, + # "options": { + # "temperature": 0.4, + # "top_p": 0.1, + # }, + } + self.response = requests.post(url, json=body, timeout=600) + if self.response.status_code != 200: + return f"Ошибка генерации ответа: {self.response.status_code} {self.response.text}" + return self.response.json().get("response", "").strip() + + def generate_answer_stream(self, prompt: str): + url = f"{self.ollama_url}/api/generate" + body = { + "model": self.chat_model, + "prompt": prompt, + "messages": self.conversation_history, + "stream": True + } + resp = requests.post(url, json=body, stream=True, timeout=600) + if resp.status_code != 200: + raise RuntimeError(f"Ошибка генерации ответа: {resp.status_code} {resp.text}") + full_answer = "" + for chunk in resp.iter_lines(): + if chunk: + try: + decoded_chunk = chunk.decode('utf-8') + data = json.loads(decoded_chunk) + if "response" in data: + yield data["response"] + full_answer += data["response"] + elif "error" in data: + print(f"Stream error: {data['error']}") + break + except json.JSONDecodeError: + print(f"Could not decode JSON from chunk: {chunk.decode('utf-8')}") + except Exception as e: + print(f"Error processing chunk: {e}") + + def get_prompt_eval_count(self): + return self.response.json().get("prompt_eval_count", 0) + + def get_prompt_eval_duration(self): + return self.response.json().get("prompt_eval_duration", 0) / (10 ** 9) + + def get_eval_count(self): + return self.response.json().get("eval_count", 0) + + def get_eval_duration(self): + return self.response.json().get("eval_duration", 0) / (10 ** 9) + + def get_total_duration(self): + return self.response.json().get("total_duration", 0) / (10 ** 9) + + def get_tps(self): + eval_count = self.get_eval_count() + eval_duration = self.get_eval_duration() + if eval_count == 0 or eval_duration == 0: + return 0 + return eval_count / eval_duration def print_sources(context_docs: list): + print("\n\nИсточники:") for idx, doc in enumerate(context_docs, start=1): filename = doc['payload'].get("filename", None) url = doc['payload'].get("url", None) title = filename if url is None: url = "(нет веб-ссылки)" - print(f"{idx}. {title}\n {url}") + print(f"{idx}. {title}\n {url}\n") + +def print_v(text: str, is_verbose: bool): + if is_verbose: + print(text) + +def print_stats(rag: RagSystem): + print("Статистика:") + print(f"* Time: {rag.get_total_duration()}s") + print(f"* TPS: {rag.get_tps()}") + print(f"* PEC: {rag.get_prompt_eval_count()}") + print(f"* PED: {rag.get_prompt_eval_duration()}s") + print(f"* EC: {rag.get_eval_count()}") + print(f"* ED: {rag.get_eval_duration()}s\n") def main(): import sys @@ -114,41 +204,43 @@ def main(): parser.add_argument("--qdrant-port", type=int, default=6333, help="Qdrant port") parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama API URL") parser.add_argument("--emb-model", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", help="Модель эмбеддинга") - parser.add_argument("--chat-model", default="qwen2.5:3b", help="Модель генерации Ollama") + parser.add_argument("--chat-model", default="phi4-mini:3.8b", help="Модель генерации Ollama") parser.add_argument("--topk", type=int, default=6, help="Количество документов для поиска") + parser.add_argument("--verbose", default=False, action=argparse.BooleanOptionalAction, help="Выводить промежуточные служебные сообщения") + parser.add_argument("--show-stats", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе (не работает с --stream)") + parser.add_argument("--stream", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе") args = parser.parse_args() if not args.query and not args.interactive: print("Ошибка: укажите запрос (--query) и/или используйте интерактивный режим (--interactive)") sys.exit(1) - print(f"Адрес ollama: {args.ollama_url}") - print(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}") - print(f"Модель эмбеддинга: {args.emb_model}") - print(f"Модель чата: {args.chat_model}") - print(f"Документов для поиска: {args.topk}") + print_v(f"Адрес ollama: {args.ollama_url}", args.verbose) + print_v(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}", args.verbose) + print_v(f"Модель эмбеддинга: {args.emb_model}", args.verbose) + print_v(f"Модель чата: {args.chat_model}", args.verbose) + print_v(f"Документов для поиска: {args.topk}", args.verbose) if os.path.exists('sys_prompt.txt'): - print("Будет использоваться sys_prompt.txt!") + print_v("Будет использоваться sys_prompt.txt!", args.verbose) - print("\nПервая инициализация моделей...") - rag = LocalRAGSystem( + print_v("\nПервая инициализация моделей...", args.verbose) + rag = RagSystem( ollama_url=args.ollama_url, qdrant_host=args.qdrant_host, qdrant_port=args.qdrant_port, embed_model=args.emb_model, chat_model=args.chat_model ) - print(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG") + print_v(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG", args.verbose) + query = None if args.interactive: - print("\nИНТЕРАКТИВНЫЙ РЕЖИМ") - print("Можете вводить запрос (или 'exit' для выхода)\n") + print_v("\nИНТЕРАКТИВНЫЙ РЕЖИМ", args.verbose) + print_v("Можете вводить запрос (или 'exit' для выхода)\n", args.verbose) if args.query: query = args.query.strip() print(f">>> {query}") - else: - query = input(">>> ").strip() while True: try: @@ -158,34 +250,103 @@ def main(): if not query or query == "": continue - if query.lower() == "exit": - print("\n*** Завершение работы") + if query.lower() == "help": + print("<<< Команды итерактивного режима:") + print("save -- сохранить диалог в файл") + print("exit -- выход\n") + query = None + continue + + if query.strip().lower() == "save": + import datetime + timestamp = int(time.time()) + dt = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') + filename = f"chats/chat-{timestamp}.md" + + markdown_content = f"# История диалога от {dt}\n\n" + markdown_content += f"## Параметры диалога\n" + markdown_content += f"```\nargs = {args}\n```\n" + markdown_content += f"```\nemb_model = {rag.emb_model}\n```\n" + + for entry in rag.conversation_history: + if entry['role'] == 'user': + markdown_content += f"## Пользователь\n\n" + elif entry['role'] == 'assistant': + markdown_content += f"## Модель\n\n" + docs = rag.prepare_sources(entry['docs']).replace("```", "") + markdown_content += f"```\n{docs}\n```\n\n" + markdown_content += f"{entry['content']}\n\n" + + os.makedirs('chats', exist_ok=True) + with open(filename, 'w') as fp: + fp.write(markdown_content) + + print(f"<<< Диалог сохранён в файл: {filename}\n") + query = None + continue + + if query.strip().lower() == "exit": + print_v("\n*** Завершение работы", args.verbose) break - print("\nПоиск релевантных документов...") + print_v("\nПоиск релевантных документов...", args.verbose) context_docs = rag.search_qdrant(query, top_k=args.topk) if not context_docs: - print("Релевантные документы не найдены.") + print_v("Релевантные документы не найдены.", args.verbose) if args.interactive: + query = None continue else: break - print(f"Найдено {len(context_docs)} релевантных документов:") - print_sources(context_docs) + print_v(f"Найдено {len(context_docs)} релевантных документов", args.verbose) + # print_sources(context_docs) prompt = rag.prepare_prompt(query=query, context_docs=context_docs) if args.show_prompt: - print("\nПолный системный промпт: --------------------------\n") - print(f"{prompt}\n---------------------------------------------------\n") + print("\nПолный системный промпт: --------------------------") + print(f"{prompt}\n---------------------------------------------------") + + print_v("\nГенерация ответа...\n", args.verbose) + + if args.stream: + answer = "\n<<< " + print(answer, end='', flush=True) + try: + for message_part in rag.generate_answer_stream(prompt): + answer += message_part + print(message_part, end='', flush=True) + except RuntimeError as e: + answer = str(e) + print(f"\n{answer}\n===================================================\n") + else: + answer = rag.generate_answer(prompt) + print(f"<<< {answer}\n") + + print_sources(context_docs) + if args.show_stats and not args.stream: + print_stats(rag) + + rag.conversation_history.append({ + "role": "user", + "content": query, + }) + + rag.conversation_history.append({ + "role": "assistant", + "docs": context_docs, + "content": answer, + }) + + if args.interactive: + query = None + else: + break - print("Генерация ответа...") - answer = rag.generate_answer(prompt) - print(f"\n<<< {answer}\n===================================================\n") - query = None except KeyboardInterrupt: print("\n*** Завершение работы") break + except Exception as e: print(f"Ошибка: {e}") break