1
0
This commit is contained in:
2025-08-26 01:10:18 +08:00
parent 3b15a6a19e
commit 3f2491db27
12 changed files with 287 additions and 76 deletions

1
.gitignore vendored
View File

@@ -2,6 +2,7 @@
/rag/input_html/*
/rag/input_md/*
/rag/sys_prompt.txt
/rag/chats/*.md
.old/
.venv/

3
.idea/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,3 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml generated Normal file
View File

@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Python 3.13" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.13 (ollama)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/ollama.iml" filepath="$PROJECT_DIR$/.idea/ollama.iml" />
</modules>
</component>
</project>

17
.idea/ollama.iml generated Normal file
View File

@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.13 (ollama)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="GOOGLE" />
<option name="myDocStringFormat" value="Google" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="py.test" />
</component>
</module>

6
.idea/vcs.xml generated Normal file
View File

@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@@ -38,8 +38,9 @@ RAG (Retrieval-Augmented Generation) — это архитектура, кото
rag/
├── input_html/ # Входные файлы HTML, загруженные из Confluence
├── input_md/ # Входные (конвертированные) файлы Markdown
├── download.sh # Скрипт для загрузки страниц из Confluence
├── convert.py # Скрипт конвертации HTML в Markdown
├── chats/ # Директория для сохранения чатов
├── download.sh # Скрипт для загрузки страниц из Confluence
├── convert.py # Скрипт конвертации HTML в Markdown
├── vectorize.py # Скрипт векторизации Markdown
├── rag.py # Основной скрипт RAG системы
├── clear.sh # Скрипт очистки html/md файлов

View File

@@ -3,6 +3,7 @@
* [ ] Описать подготовку знаний в Open WebUI
* [ ] Обработка pdf, json, ...
* [ ] Ранжировние результатов
* [ ] Конвертирование таблиц в списки
* [ ] Режим диалога (запоминание запросов и ответов)
* [ ] API
* [ ] Telegram-бот

0
rag/chats/.gitkeep Normal file
View File

View File

@@ -55,7 +55,7 @@ for PAGE_ID in "$@"; do
CONTENT=${CONTENT//src=\"\//src=\"$CONF_URL}
URL="$CONF_URL/pages/viewpage.action?pageId=$PAGE_ID"
echo -e "@@$URL@@\n<br><html><body>Исходная страница: <a href=$URL>$URL</a><br><br><h1>$TITLE</h1>$CONTENT</body></html>" > "$HTML_FILEPATH"
echo -e "@@$URL@@\n<br><html><body>Исходная страница: <a href=\"$URL\">$URL</a><br><br><h1>$TITLE</h1>$CONTENT</body></html>" > "$HTML_FILEPATH"
echo "Сохранено: $OUTPUT_PATH/$TITLE.html"
CHILD_IDS=$(echo "$RESPONSE" | jq -r '.children.page.results[]?.id' 2>/dev/null)

View File

@@ -1,31 +1,34 @@
import argparse
import os
import hashlib
import requests
import json
import time
from sentence_transformers import SentenceTransformer
class LocalRAGSystem:
class RagSystem:
def __init__(self,
md_folder: str = "input_md",
ollama_url: str = "http://localhost:11434",
qdrant_host: str = "localhost",
qdrant_port: int = 6333,
embed_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
chat_model: str = "qwen2.5:3b"):
chat_model: str = "phi4-mini:3.8b"):
self.md_folder = md_folder
self.ollama_url = ollama_url
self.qdrant_host = qdrant_host
self.qdrant_port = qdrant_port
self.embed_model = embed_model
self.chat_model = chat_model
self.emb_model = SentenceTransformer(embed_model)
self.prompt = ""
self.conversation_history = []
self.load_chat_model()
def get_embedding(self, text: str):
return self.emb_model.encode(text, show_progress_bar=False).tolist()
def load_chat_model(self):
url = f"{self.ollama_url}/api/generate"
body = {"model": self.chat_model}
requests.post(url, json=body, timeout=600)
def search_qdrant(self, query: str, top_k: int = 6):
query_vec = self.get_embedding(query)
query_vec = self.emb_model.encode(query, show_progress_bar=False).tolist()
url = f"http://{self.qdrant_host}:{self.qdrant_port}/collections/rag_collection/points/search"
payload = {
"vector": query_vec,
@@ -38,69 +41,156 @@ class LocalRAGSystem:
results = resp.json().get("result", [])
return results
def generate_answer(self, prompt: str):
url = f"{self.ollama_url}/api/generate"
body = {
"model": self.chat_model,
"prompt": prompt,
"stream": False
}
resp = requests.post(url, json=body, timeout=600)
if resp.status_code != 200:
return f"Ошибка генерации ответа: {resp.status_code} {resp.text}"
return resp.json().get("response", "").strip()
def prepare_sources(self, context_docs: list):
sources = ""
for idx, doc in enumerate(context_docs, start=1):
text = doc['payload'].get("text", "").strip()
sources = f"{sources}\n--- Source [{idx}] ---\n{text}\n"
return sources.strip()
sources = f"{sources}\n<source id=\"{idx}\">\n{text}\n</source>\n"
return sources
def prepare_prompt(self, query: str, context_docs: list):
sources = self.prepare_sources(context_docs)
if os.path.exists('sys_prompt.txt'):
with open('sys_prompt.txt', 'r') as fp:
return fp.read().replace("{{sources}}", sources).replace("{{query}}", query)
prompt_template = fp.read()
return prompt_template.replace("{{sources}}", sources).replace("{{query}}", query)
else:
return f"""
Please provide an answer based solely on the provided sources.
It is prohibited to generate an answer based on your pretrained data.
If uncertain, ask the user for clarification.
Respond in the same language as the user's query.
If there are no sources in context, clearly state that.
If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
When referencing information from a source, cite the appropriate source(s) using their corresponding numbers.
Every answer should include at least one source citation.
Only cite a source when you are explicitly referencing it.
return f"""### Your role
You are a helpful assistant that can answer questions based on the provided sources.
If none of the sources are helpful, you should indicate that.
For example:
### Your user
User is a human who is asking a question related to the provided sources.
--- Source 1 ---
The sky is red in the evening and blue in the morning.
### Your task
Please provide an answer based solely on the provided sources and the conversation history.
--- Source 2 ---
Water is wet when the sky is red.
### Rules
- You **MUST** respond in the SAME language as the user's query.
- If uncertain, you **MUST** the user for clarification.
- If there are no sources in context, you **MUST** clearly state that.
- If none of the sources are helpful, you **MUST** clearly state that.
- If you are unsure about the answer, you **MUST** clearly state that.
- If the context is unreadable or of poor quality, you **MUST** inform the user and provide the best possible answer.
- When referencing information from a source, you **MUST** cite the appropriate source(s) using their corresponding numbers.
- **Only include inline citations using [id] (e.g., [1], [2]) when the <source> tag includes an id attribute.**
- You NEVER MUST NOT add <source> or any XML/HTML tags in your response.
- You NEVER MUST NOT cite if the <source> tag does not contain an id attribute.
- Every answer MAY include at least one source citation.
- Only cite a source when you are explicitly referencing it.
- You may also cite multiple sources if they are all relevant to the question.
- Ensure citations are concise and directly related to the information provided.
- You CAN format your responses using Markdown.
Query: When is water wet?
Answer: Water will be wet when the sky is red [2], which occurs in the evening [1].
### Example of sources list:
Now it's your turn. Below are several numbered sources of information:
{context}
```
<source id="1">The sky is red in the evening and blue in the morning.</source>
<source id="2">Water is wet when the sky is red.</source>
<query>When is water wet?</query>
```
Response:
```
Water will be wet when the sky is red [2], which occurs in the evening [1].
```
User query: {query}
Your answer:
"""
### Now let's start!
```
{sources}
<query>{query}</query>
```
Respond."""
def generate_answer(self, prompt: str):
url = f"{self.ollama_url}/api/generate"
body = {
"model": self.chat_model,
"prompt": prompt,
"messages": self.conversation_history,
"stream": False,
# "options": {
# "temperature": 0.4,
# "top_p": 0.1,
# },
}
self.response = requests.post(url, json=body, timeout=600)
if self.response.status_code != 200:
return f"Ошибка генерации ответа: {self.response.status_code} {self.response.text}"
return self.response.json().get("response", "").strip()
def generate_answer_stream(self, prompt: str):
url = f"{self.ollama_url}/api/generate"
body = {
"model": self.chat_model,
"prompt": prompt,
"messages": self.conversation_history,
"stream": True
}
resp = requests.post(url, json=body, stream=True, timeout=600)
if resp.status_code != 200:
raise RuntimeError(f"Ошибка генерации ответа: {resp.status_code} {resp.text}")
full_answer = ""
for chunk in resp.iter_lines():
if chunk:
try:
decoded_chunk = chunk.decode('utf-8')
data = json.loads(decoded_chunk)
if "response" in data:
yield data["response"]
full_answer += data["response"]
elif "error" in data:
print(f"Stream error: {data['error']}")
break
except json.JSONDecodeError:
print(f"Could not decode JSON from chunk: {chunk.decode('utf-8')}")
except Exception as e:
print(f"Error processing chunk: {e}")
def get_prompt_eval_count(self):
return self.response.json().get("prompt_eval_count", 0)
def get_prompt_eval_duration(self):
return self.response.json().get("prompt_eval_duration", 0) / (10 ** 9)
def get_eval_count(self):
return self.response.json().get("eval_count", 0)
def get_eval_duration(self):
return self.response.json().get("eval_duration", 0) / (10 ** 9)
def get_total_duration(self):
return self.response.json().get("total_duration", 0) / (10 ** 9)
def get_tps(self):
eval_count = self.get_eval_count()
eval_duration = self.get_eval_duration()
if eval_count == 0 or eval_duration == 0:
return 0
return eval_count / eval_duration
def print_sources(context_docs: list):
print("\n\nИсточники:")
for idx, doc in enumerate(context_docs, start=1):
filename = doc['payload'].get("filename", None)
url = doc['payload'].get("url", None)
title = filename
if url is None:
url = "(нет веб-ссылки)"
print(f"{idx}. {title}\n {url}")
print(f"{idx}. {title}\n {url}\n")
def print_v(text: str, is_verbose: bool):
if is_verbose:
print(text)
def print_stats(rag: RagSystem):
print("Статистика:")
print(f"* Time: {rag.get_total_duration()}s")
print(f"* TPS: {rag.get_tps()}")
print(f"* PEC: {rag.get_prompt_eval_count()}")
print(f"* PED: {rag.get_prompt_eval_duration()}s")
print(f"* EC: {rag.get_eval_count()}")
print(f"* ED: {rag.get_eval_duration()}s\n")
def main():
import sys
@@ -114,41 +204,43 @@ def main():
parser.add_argument("--qdrant-port", type=int, default=6333, help="Qdrant port")
parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama API URL")
parser.add_argument("--emb-model", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", help="Модель эмбеддинга")
parser.add_argument("--chat-model", default="qwen2.5:3b", help="Модель генерации Ollama")
parser.add_argument("--chat-model", default="phi4-mini:3.8b", help="Модель генерации Ollama")
parser.add_argument("--topk", type=int, default=6, help="Количество документов для поиска")
parser.add_argument("--verbose", default=False, action=argparse.BooleanOptionalAction, help="Выводить промежуточные служебные сообщения")
parser.add_argument("--show-stats", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе (не работает с --stream)")
parser.add_argument("--stream", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе")
args = parser.parse_args()
if not args.query and not args.interactive:
print("Ошибка: укажите запрос (--query) и/или используйте интерактивный режим (--interactive)")
sys.exit(1)
print(f"Адрес ollama: {args.ollama_url}")
print(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}")
print(f"Модель эмбеддинга: {args.emb_model}")
print(f"Модель чата: {args.chat_model}")
print(f"Документов для поиска: {args.topk}")
print_v(f"Адрес ollama: {args.ollama_url}", args.verbose)
print_v(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}", args.verbose)
print_v(f"Модель эмбеддинга: {args.emb_model}", args.verbose)
print_v(f"Модель чата: {args.chat_model}", args.verbose)
print_v(f"Документов для поиска: {args.topk}", args.verbose)
if os.path.exists('sys_prompt.txt'):
print("Будет использоваться sys_prompt.txt!")
print_v("Будет использоваться sys_prompt.txt!", args.verbose)
print("\nПервая инициализация моделей...")
rag = LocalRAGSystem(
print_v("\nПервая инициализация моделей...", args.verbose)
rag = RagSystem(
ollama_url=args.ollama_url,
qdrant_host=args.qdrant_host,
qdrant_port=args.qdrant_port,
embed_model=args.emb_model,
chat_model=args.chat_model
)
print(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG")
print_v(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG", args.verbose)
query = None
if args.interactive:
print("\nИНТЕРАКТИВНЫЙ РЕЖИМ")
print("Можете вводить запрос (или 'exit' для выхода)\n")
print_v("\nИНТЕРАКТИВНЫЙ РЕЖИМ", args.verbose)
print_v("Можете вводить запрос (или 'exit' для выхода)\n", args.verbose)
if args.query:
query = args.query.strip()
print(f">>> {query}")
else:
query = input(">>> ").strip()
while True:
try:
@@ -158,34 +250,103 @@ def main():
if not query or query == "":
continue
if query.lower() == "exit":
print("\n*** Завершение работы")
if query.lower() == "help":
print("<<< Команды итерактивного режима:")
print("save -- сохранить диалог в файл")
print("exit -- выход\n")
query = None
continue
if query.strip().lower() == "save":
import datetime
timestamp = int(time.time())
dt = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ')
filename = f"chats/chat-{timestamp}.md"
markdown_content = f"# История диалога от {dt}\n\n"
markdown_content += f"## Параметры диалога\n"
markdown_content += f"```\nargs = {args}\n```\n"
markdown_content += f"```\nemb_model = {rag.emb_model}\n```\n"
for entry in rag.conversation_history:
if entry['role'] == 'user':
markdown_content += f"## Пользователь\n\n"
elif entry['role'] == 'assistant':
markdown_content += f"## Модель\n\n"
docs = rag.prepare_sources(entry['docs']).replace("```", "")
markdown_content += f"```\n{docs}\n```\n\n"
markdown_content += f"{entry['content']}\n\n"
os.makedirs('chats', exist_ok=True)
with open(filename, 'w') as fp:
fp.write(markdown_content)
print(f"<<< Диалог сохранён в файл: {filename}\n")
query = None
continue
if query.strip().lower() == "exit":
print_v("\n*** Завершение работы", args.verbose)
break
print("\nПоиск релевантных документов...")
print_v("\nПоиск релевантных документов...", args.verbose)
context_docs = rag.search_qdrant(query, top_k=args.topk)
if not context_docs:
print("Релевантные документы не найдены.")
print_v("Релевантные документы не найдены.", args.verbose)
if args.interactive:
query = None
continue
else:
break
print(f"Найдено {len(context_docs)} релевантных документов:")
print_sources(context_docs)
print_v(f"Найдено {len(context_docs)} релевантных документов", args.verbose)
# print_sources(context_docs)
prompt = rag.prepare_prompt(query=query, context_docs=context_docs)
if args.show_prompt:
print("\nПолный системный промпт: --------------------------\n")
print(f"{prompt}\n---------------------------------------------------\n")
print("\nПолный системный промпт: --------------------------")
print(f"{prompt}\n---------------------------------------------------")
print_v("\nГенерация ответа...\n", args.verbose)
if args.stream:
answer = "\n<<< "
print(answer, end='', flush=True)
try:
for message_part in rag.generate_answer_stream(prompt):
answer += message_part
print(message_part, end='', flush=True)
except RuntimeError as e:
answer = str(e)
print(f"\n{answer}\n===================================================\n")
else:
answer = rag.generate_answer(prompt)
print(f"<<< {answer}\n")
print_sources(context_docs)
if args.show_stats and not args.stream:
print_stats(rag)
rag.conversation_history.append({
"role": "user",
"content": query,
})
rag.conversation_history.append({
"role": "assistant",
"docs": context_docs,
"content": answer,
})
if args.interactive:
query = None
else:
break
print("Генерация ответа...")
answer = rag.generate_answer(prompt)
print(f"\n<<< {answer}\n===================================================\n")
query = None
except KeyboardInterrupt:
print("\n*** Завершение работы")
break
except Exception as e:
print(f"Ошибка: {e}")
break