From defc30cad000f30b5c15ec72038b24756ba79de1 Mon Sep 17 00:00:00 2001 From: AnthonyAxenov Date: Sun, 31 Aug 2025 00:51:42 +0800 Subject: [PATCH] =?UTF-8?q?=D0=94=D0=BE=D1=80=D0=B0=D0=B1=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D0=B0=20rag,=20=D1=83=D0=B4=D0=B0=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D1=81=D0=BA=D1=80=D0=B8=D0=BF=D1=82=D0=BE=D0=B2?= =?UTF-8?q?=20=D0=BC=D0=BE=D0=B4=D0=B5=D0=BB=D0=B5=D0=B9,=20=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D1=83=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20?= =?UTF-8?q?README?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 28 ++ README.md | 8 +- models/codegemma/2b | 4 - models/codegemma/7b | 4 - models/codellama/13b | 4 - models/codellama/34b | 4 - models/codellama/7b | 4 - models/codeqwen1.5/1.5b | 4 - models/codeqwen1.5/7b | 4 - models/codestral/22b | 4 - models/deepcoder/1.5b | 4 - models/deepcoder/14b | 4 - models/deepseek-coder-v2/16b | 4 - models/deepseek-coder/1.3b | 4 - models/deepseek-coder/33b | 4 - models/deepseek-coder/6.7b | 4 - models/deepseek-r1/1.5b | 4 - models/deepseek-r1/14b | 4 - models/deepseek-r1/32b | 4 - models/deepseek-r1/7b | 4 - models/deepseek-r1/8b | 4 - models/devstral/24b | 4 - models/dolphin3/8b | 4 - models/gemma/2b | 4 - models/gemma/7b | 4 - models/gemma3/12b | 4 - models/gemma3/1b | 4 - models/gemma3/270m | 4 - models/gemma3/27b | 4 - models/gemma3/4b | 4 - models/gemma3n/e2b | 4 - models/gemma3n/e4b | 4 - models/gpt-oss/20b | 4 - models/granite-code/20b | 4 - models/granite-code/34b | 4 - models/granite-code/3b | 4 - models/granite-code/8b | 4 - models/llama2/13b | 4 - models/llama2/7b | 4 - models/llama3.1/8b | 4 - models/llama3.2/1b | 4 - models/llama3.2/3b | 4 - models/llama3/8b | 4 - models/llava-llama3/8b | 4 - models/magistral/24b | 4 - models/mistral-nemo/12b | 4 - models/mistral-small/22b | 4 - models/mistral-small/24b | 4 - models/mistral/7b | 4 - models/mixtral/8x7b | 4 - models/mxbai-embed-large/latest | 4 - models/nomic-embed-text/latest | 4 - models/openthinker/32b | 4 - models/openthinker/7b | 4 - models/phi/2.7b | 4 - models/phi3.5/3.8b | 4 - models/phi3/14b | 4 - models/phi3/3.8b | 4 - models/phi3/instruct | 4 - models/phi3/medium | 4 - models/phi3/mini | 4 - models/phi4/14b | 4 - models/phi4/mini-reasoning:3.8b | 4 - models/phi4/mini:3.8b | 4 - models/phi4/reasoning:14b | 4 - models/qwen/0.5b | 4 - models/qwen/1.8b | 4 - models/qwen/14b | 4 - models/qwen/32b | 4 - models/qwen/4b | 4 - models/qwen/7b | 4 - models/qwen2.5-coder/0.5b | 4 - models/qwen2.5-coder/1.5b | 4 - models/qwen2.5-coder/14b | 4 - models/qwen2.5-coder/32b | 4 - models/qwen2.5-coder/3b | 4 - models/qwen2.5-coder/7b | 4 - models/qwen2.5/0.5b | 4 - models/qwen2.5/1.5b | 4 - models/qwen2.5/14b | 4 - models/qwen2.5/32b | 4 - models/qwen2.5/3b | 4 - models/qwen2.5/7b | 4 - models/qwen2/0.5b | 4 - models/qwen2/1.5b | 4 - models/qwen2/7b | 4 - models/qwen3-coder/30b | 4 - models/qwen3/0.6b | 4 - models/qwen3/1.7b | 4 - models/qwen3/14b | 4 - models/qwen3/30b | 4 - models/qwen3/32b | 4 - models/qwen3/4b | 4 - models/qwen3/8b | 4 - models/qwq/32b | 4 - models/smollm2/1.7b | 4 - models/smollm2/135m | 4 - models/smollm2/360m | 4 - models/stable-code/3b | 4 - models/stable-code/instruct | 4 - models/starcoder2/15b | 4 - models/starcoder2/3b | 4 - models/starcoder2/7b | 4 - rag/CHECKLIST.md | 17 + rag/README.md | 174 ++++++-- rag/rag.py | 704 ++++++++++++++++++-------------- rag/vectorize.py | 43 +- up | 2 +- 108 files changed, 635 insertions(+), 745 deletions(-) create mode 100644 .vscode/launch.json delete mode 100755 models/codegemma/2b delete mode 100755 models/codegemma/7b delete mode 100755 models/codellama/13b delete mode 100755 models/codellama/34b delete mode 100755 models/codellama/7b delete mode 100755 models/codeqwen1.5/1.5b delete mode 100755 models/codeqwen1.5/7b delete mode 100755 models/codestral/22b delete mode 100755 models/deepcoder/1.5b delete mode 100755 models/deepcoder/14b delete mode 100755 models/deepseek-coder-v2/16b delete mode 100755 models/deepseek-coder/1.3b delete mode 100755 models/deepseek-coder/33b delete mode 100755 models/deepseek-coder/6.7b delete mode 100755 models/deepseek-r1/1.5b delete mode 100755 models/deepseek-r1/14b delete mode 100755 models/deepseek-r1/32b delete mode 100755 models/deepseek-r1/7b delete mode 100755 models/deepseek-r1/8b delete mode 100755 models/devstral/24b delete mode 100755 models/dolphin3/8b delete mode 100755 models/gemma/2b delete mode 100755 models/gemma/7b delete mode 100755 models/gemma3/12b delete mode 100755 models/gemma3/1b delete mode 100755 models/gemma3/270m delete mode 100755 models/gemma3/27b delete mode 100755 models/gemma3/4b delete mode 100755 models/gemma3n/e2b delete mode 100755 models/gemma3n/e4b delete mode 100755 models/gpt-oss/20b delete mode 100755 models/granite-code/20b delete mode 100755 models/granite-code/34b delete mode 100755 models/granite-code/3b delete mode 100755 models/granite-code/8b delete mode 100755 models/llama2/13b delete mode 100755 models/llama2/7b delete mode 100755 models/llama3.1/8b delete mode 100755 models/llama3.2/1b delete mode 100755 models/llama3.2/3b delete mode 100755 models/llama3/8b delete mode 100755 models/llava-llama3/8b delete mode 100755 models/magistral/24b delete mode 100755 models/mistral-nemo/12b delete mode 100755 models/mistral-small/22b delete mode 100755 models/mistral-small/24b delete mode 100755 models/mistral/7b delete mode 100755 models/mixtral/8x7b delete mode 100755 models/mxbai-embed-large/latest delete mode 100755 models/nomic-embed-text/latest delete mode 100755 models/openthinker/32b delete mode 100755 models/openthinker/7b delete mode 100755 models/phi/2.7b delete mode 100755 models/phi3.5/3.8b delete mode 100755 models/phi3/14b delete mode 100755 models/phi3/3.8b delete mode 100755 models/phi3/instruct delete mode 100755 models/phi3/medium delete mode 100755 models/phi3/mini delete mode 100755 models/phi4/14b delete mode 100755 models/phi4/mini-reasoning:3.8b delete mode 100755 models/phi4/mini:3.8b delete mode 100755 models/phi4/reasoning:14b delete mode 100755 models/qwen/0.5b delete mode 100755 models/qwen/1.8b delete mode 100755 models/qwen/14b delete mode 100755 models/qwen/32b delete mode 100755 models/qwen/4b delete mode 100755 models/qwen/7b delete mode 100755 models/qwen2.5-coder/0.5b delete mode 100755 models/qwen2.5-coder/1.5b delete mode 100755 models/qwen2.5-coder/14b delete mode 100755 models/qwen2.5-coder/32b delete mode 100755 models/qwen2.5-coder/3b delete mode 100755 models/qwen2.5-coder/7b delete mode 100755 models/qwen2.5/0.5b delete mode 100755 models/qwen2.5/1.5b delete mode 100755 models/qwen2.5/14b delete mode 100755 models/qwen2.5/32b delete mode 100755 models/qwen2.5/3b delete mode 100755 models/qwen2.5/7b delete mode 100755 models/qwen2/0.5b delete mode 100755 models/qwen2/1.5b delete mode 100755 models/qwen2/7b delete mode 100755 models/qwen3-coder/30b delete mode 100755 models/qwen3/0.6b delete mode 100755 models/qwen3/1.7b delete mode 100755 models/qwen3/14b delete mode 100755 models/qwen3/30b delete mode 100755 models/qwen3/32b delete mode 100755 models/qwen3/4b delete mode 100755 models/qwen3/8b delete mode 100755 models/qwq/32b delete mode 100755 models/smollm2/1.7b delete mode 100755 models/smollm2/135m delete mode 100755 models/smollm2/360m delete mode 100755 models/stable-code/3b delete mode 100755 models/stable-code/instruct delete mode 100755 models/starcoder2/15b delete mode 100755 models/starcoder2/3b delete mode 100755 models/starcoder2/7b create mode 100644 rag/CHECKLIST.md diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..04186f1 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,28 @@ +{ + // Используйте IntelliSense, чтобы узнать о возможных атрибутах. + // Наведите указатель мыши, чтобы просмотреть описания существующих атрибутов. + // Для получения дополнительной информации посетите: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "rag", + "type": "debugpy", + "request": "launch", + "program": "${workspaceFolder}/rag/rag.py", + "args": [ + "--show-stats", + "--interactive", + "--use-rank", + // "--stream", + "--show-prompt", + "--sys-prompt", + "${workspaceFolder}/rag/lis-sp.md", + "--qdrant-collection", + "rag-2000-300", + "--query", + "привет" + ], + "console": "integratedTerminal" + } + ] +} diff --git a/README.md b/README.md index 5d7b151..6d40f76 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,7 @@ Набор скриптов для быстрого запуска локальных LLM. -Модели подбираются вручную, примерно в пределазх 40 млрд параметров (обычно, максимум 32b или 34b). -Такие модели наиболее реально запускать на домашних ПК разных мощностей. +Модели примерно до 40 млрд параметров (обычно 32b) наиболее реально запускать на домашних ПК разных мощностей. Меньше параметров → меньше памяти на диске и в ОЗУ → выше скорость (tps) → ниже качество. @@ -16,10 +15,9 @@ ``` ./ -├── models/ # Директория со скриптами установки моделей ollama ├── rag/ # Директория для работы с RAG -├── up # Скрипт для запуска ollama + open-webui -├── down # Скрипт для остановки ollama + open-webui +├── up # Скрипт для запуска docker-стека +├── down # Скрипт для остановки docker-стека ├── ollama # Скрипт для выполнения произвольных команд ollama ├── ollama.code-workspace # Конфигурация VSCode Workspace └── README.md # Этот файл diff --git a/models/codegemma/2b b/models/codegemma/2b deleted file mode 100755 index dca6e32..0000000 --- a/models/codegemma/2b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codegemma/tags - -docker exec -it ai-ollama ollama run codegemma:2b --verbose diff --git a/models/codegemma/7b b/models/codegemma/7b deleted file mode 100755 index c42cd16..0000000 --- a/models/codegemma/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codegemma/tags - -docker exec -it ai-ollama ollama run codegemma:7b --verbose diff --git a/models/codellama/13b b/models/codellama/13b deleted file mode 100755 index 0fde673..0000000 --- a/models/codellama/13b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codellama/tags - -docker exec -it ai-ollama ollama run codellama:13b --verbose diff --git a/models/codellama/34b b/models/codellama/34b deleted file mode 100755 index de83d9c..0000000 --- a/models/codellama/34b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codellama/tags - -docker exec -it ai-ollama ollama run codellama:34b --verbose diff --git a/models/codellama/7b b/models/codellama/7b deleted file mode 100755 index 06f8033..0000000 --- a/models/codellama/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codellama/tags - -docker exec -it ai-ollama ollama run codellama:7b --verbose diff --git a/models/codeqwen1.5/1.5b b/models/codeqwen1.5/1.5b deleted file mode 100755 index 392cd12..0000000 --- a/models/codeqwen1.5/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codeqwen/tags - -docker exec -it ai-ollama ollama run codeqwen:1.5b --verbose diff --git a/models/codeqwen1.5/7b b/models/codeqwen1.5/7b deleted file mode 100755 index 7dc4771..0000000 --- a/models/codeqwen1.5/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codeqwen/tags - -docker exec -it ai-ollama ollama run codeqwen:7b --verbose diff --git a/models/codestral/22b b/models/codestral/22b deleted file mode 100755 index d1d5d01..0000000 --- a/models/codestral/22b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/codestral/tags - -docker exec -it ai-ollama ollama run codestral:22b --verbose diff --git a/models/deepcoder/1.5b b/models/deepcoder/1.5b deleted file mode 100755 index 511deb1..0000000 --- a/models/deepcoder/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepcoder/tags - -docker exec -it ai-ollama ollama run deepcoder:1.5b --verbose diff --git a/models/deepcoder/14b b/models/deepcoder/14b deleted file mode 100755 index f80e037..0000000 --- a/models/deepcoder/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepcoder/tags - -docker exec -it ai-ollama ollama run deepcoder:14b --verbose diff --git a/models/deepseek-coder-v2/16b b/models/deepseek-coder-v2/16b deleted file mode 100755 index 6c2419c..0000000 --- a/models/deepseek-coder-v2/16b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-coder-v2/tags - -docker exec -it ai-ollama ollama run deepseek-coder-v2:16b --verbose diff --git a/models/deepseek-coder/1.3b b/models/deepseek-coder/1.3b deleted file mode 100755 index 310bb06..0000000 --- a/models/deepseek-coder/1.3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-coder/tags - -docker exec -it ai-ollama ollama run deepseek-coder:1.3b --verbose diff --git a/models/deepseek-coder/33b b/models/deepseek-coder/33b deleted file mode 100755 index 267da11..0000000 --- a/models/deepseek-coder/33b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-coder/tags - -docker exec -it ai-ollama ollama run deepseek-coder:33b --verbose diff --git a/models/deepseek-coder/6.7b b/models/deepseek-coder/6.7b deleted file mode 100755 index a15e389..0000000 --- a/models/deepseek-coder/6.7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-coder/tags - -docker exec -it ai-ollama ollama run deepseek-coder:6.7b --verbose diff --git a/models/deepseek-r1/1.5b b/models/deepseek-r1/1.5b deleted file mode 100755 index d618f0b..0000000 --- a/models/deepseek-r1/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-r1/tags - -docker exec -it ai-ollama ollama run deepseek-r1:1.5b --verbose diff --git a/models/deepseek-r1/14b b/models/deepseek-r1/14b deleted file mode 100755 index 1159fad..0000000 --- a/models/deepseek-r1/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-r1/tags - -docker exec -it ai-ollama ollama run deepseek-r1:14b --verbose diff --git a/models/deepseek-r1/32b b/models/deepseek-r1/32b deleted file mode 100755 index 01cb462..0000000 --- a/models/deepseek-r1/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-r1/tags - -docker exec -it ai-ollama ollama run deepseek-r1:32b --verbose diff --git a/models/deepseek-r1/7b b/models/deepseek-r1/7b deleted file mode 100755 index 93c24c8..0000000 --- a/models/deepseek-r1/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-r1/tags - -docker exec -it ai-ollama ollama run deepseek-r1:7b --verbose diff --git a/models/deepseek-r1/8b b/models/deepseek-r1/8b deleted file mode 100755 index 31d7727..0000000 --- a/models/deepseek-r1/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/deepseek-r1/tags - -docker exec -it ai-ollama ollama run deepseek-r1:8b --verbose diff --git a/models/devstral/24b b/models/devstral/24b deleted file mode 100755 index bd91a2c..0000000 --- a/models/devstral/24b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/devstral/tags - -docker exec -it ai-ollama ollama run devstral:24b --verbose diff --git a/models/dolphin3/8b b/models/dolphin3/8b deleted file mode 100755 index 04dd3a2..0000000 --- a/models/dolphin3/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/dolphin3/tags - -docker exec -it ai-ollama ollama run dolphin3:8b --verbose diff --git a/models/gemma/2b b/models/gemma/2b deleted file mode 100755 index 2660c07..0000000 --- a/models/gemma/2b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma/tags - -docker exec -it ai-ollama ollama run gemma:2b --verbose diff --git a/models/gemma/7b b/models/gemma/7b deleted file mode 100755 index e37c08b..0000000 --- a/models/gemma/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma/tags - -docker exec -it ai-ollama ollama run gemma:7b --verbose diff --git a/models/gemma3/12b b/models/gemma3/12b deleted file mode 100755 index 05a44f2..0000000 --- a/models/gemma3/12b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3/tags - -docker exec -it ai-ollama ollama run gemma3:12b --verbose diff --git a/models/gemma3/1b b/models/gemma3/1b deleted file mode 100755 index 70b2e70..0000000 --- a/models/gemma3/1b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3/tags - -docker exec -it ai-ollama ollama run gemma3:1b --verbose diff --git a/models/gemma3/270m b/models/gemma3/270m deleted file mode 100755 index a9386ad..0000000 --- a/models/gemma3/270m +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3/tags - -docker exec -it ai-ollama ollama run gemma3:270m --verbose diff --git a/models/gemma3/27b b/models/gemma3/27b deleted file mode 100755 index 97e2104..0000000 --- a/models/gemma3/27b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3/tags - -docker exec -it ai-ollama ollama run gemma3:27b --verbose diff --git a/models/gemma3/4b b/models/gemma3/4b deleted file mode 100755 index 6cdf054..0000000 --- a/models/gemma3/4b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3/tags - -docker exec -it ai-ollama ollama run gemma3:4b --verbose diff --git a/models/gemma3n/e2b b/models/gemma3n/e2b deleted file mode 100755 index f93eb07..0000000 --- a/models/gemma3n/e2b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3n/tags - -docker exec -it ai-ollama ollama run gemma3n:e2b --verbose diff --git a/models/gemma3n/e4b b/models/gemma3n/e4b deleted file mode 100755 index b411b9f..0000000 --- a/models/gemma3n/e4b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gemma3n/tags - -docker exec -it ai-ollama ollama run gemma3n:e4b --verbose diff --git a/models/gpt-oss/20b b/models/gpt-oss/20b deleted file mode 100755 index 5d88e46..0000000 --- a/models/gpt-oss/20b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/gpt-oss/tags - -docker exec -it ai-ollama ollama run gpt-oss:20b --verbose diff --git a/models/granite-code/20b b/models/granite-code/20b deleted file mode 100755 index 6bbad2d..0000000 --- a/models/granite-code/20b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/granite-code/tags - -docker exec -it ai-ollama ollama run granite-code:20b --verbose diff --git a/models/granite-code/34b b/models/granite-code/34b deleted file mode 100755 index dedaa43..0000000 --- a/models/granite-code/34b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/granite-code/tags - -docker exec -it ai-ollama ollama run granite-code:34b --verbose diff --git a/models/granite-code/3b b/models/granite-code/3b deleted file mode 100755 index 8986221..0000000 --- a/models/granite-code/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/granite-code/tags - -docker exec -it ai-ollama ollama run granite-code:3b --verbose diff --git a/models/granite-code/8b b/models/granite-code/8b deleted file mode 100755 index e625654..0000000 --- a/models/granite-code/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/granite-code/tags - -docker exec -it ai-ollama ollama run granite-code:8b --verbose diff --git a/models/llama2/13b b/models/llama2/13b deleted file mode 100755 index 5d3af58..0000000 --- a/models/llama2/13b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama2/tags - -docker exec -it ai-ollama ollama run llama2:13b --verbose diff --git a/models/llama2/7b b/models/llama2/7b deleted file mode 100755 index 01daf61..0000000 --- a/models/llama2/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama2/tags - -docker exec -it ai-ollama ollama run llama2:7b --verbose diff --git a/models/llama3.1/8b b/models/llama3.1/8b deleted file mode 100755 index 1959292..0000000 --- a/models/llama3.1/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama3.1/tags - -docker exec -it ai-ollama ollama run llama3.1:8b --verbose diff --git a/models/llama3.2/1b b/models/llama3.2/1b deleted file mode 100755 index 574f741..0000000 --- a/models/llama3.2/1b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama3.2/tags - -docker exec -it ai-ollama ollama run llama3.2:1b --verbose diff --git a/models/llama3.2/3b b/models/llama3.2/3b deleted file mode 100755 index d6fdf2b..0000000 --- a/models/llama3.2/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama3.2/tags - -docker exec -it ai-ollama ollama run llama3.2:3b --verbose diff --git a/models/llama3/8b b/models/llama3/8b deleted file mode 100755 index bbb1541..0000000 --- a/models/llama3/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llama3/tags - -docker exec -it ai-ollama ollama run llama3:8b --verbose diff --git a/models/llava-llama3/8b b/models/llava-llama3/8b deleted file mode 100755 index bb9071c..0000000 --- a/models/llava-llama3/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/llava-llama3/tags - -docker exec -it ai-ollama ollama run llava-llama3:8b --verbose diff --git a/models/magistral/24b b/models/magistral/24b deleted file mode 100755 index b73a23a..0000000 --- a/models/magistral/24b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/magistral/tags - -docker exec -it ai-ollama ollama run magistral:24b --verbose diff --git a/models/mistral-nemo/12b b/models/mistral-nemo/12b deleted file mode 100755 index 43e7675..0000000 --- a/models/mistral-nemo/12b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mistral-nemo/tags - -docker exec -it ai-ollama ollama run mistral-nemo:12b --verbose diff --git a/models/mistral-small/22b b/models/mistral-small/22b deleted file mode 100755 index 10d7c0d..0000000 --- a/models/mistral-small/22b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mistral-small/tags - -docker exec -it ai-ollama ollama run mistral-small:22b --verbose diff --git a/models/mistral-small/24b b/models/mistral-small/24b deleted file mode 100755 index 4e45a6d..0000000 --- a/models/mistral-small/24b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mistral-small/tags - -docker exec -it ai-ollama ollama run mistral-small:24b --verbose diff --git a/models/mistral/7b b/models/mistral/7b deleted file mode 100755 index a34e3ef..0000000 --- a/models/mistral/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mistral/tags - -docker exec -it ai-ollama ollama run mistral:7b --verbose diff --git a/models/mixtral/8x7b b/models/mixtral/8x7b deleted file mode 100755 index abb1c4d..0000000 --- a/models/mixtral/8x7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mixtral/tags - -docker exec -it ai-ollama ollama run mixtral:8x7b --verbose diff --git a/models/mxbai-embed-large/latest b/models/mxbai-embed-large/latest deleted file mode 100755 index d4731d4..0000000 --- a/models/mxbai-embed-large/latest +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/mxbai-embed-large/tags - -../ollama pull mxbai-embed-large:latest --verbose diff --git a/models/nomic-embed-text/latest b/models/nomic-embed-text/latest deleted file mode 100755 index e5a624a..0000000 --- a/models/nomic-embed-text/latest +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/nomic-embed-text/tags - -../ollama pull nomic-embed-text:latest --verbose diff --git a/models/openthinker/32b b/models/openthinker/32b deleted file mode 100755 index 45bd35b..0000000 --- a/models/openthinker/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/openthinker/tags - -docker exec -it ai-ollama ollama run openthinker:32b --verbose diff --git a/models/openthinker/7b b/models/openthinker/7b deleted file mode 100755 index 7db944d..0000000 --- a/models/openthinker/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/openthinker/tags - -docker exec -it ai-ollama ollama run openthinker:7b --verbose diff --git a/models/phi/2.7b b/models/phi/2.7b deleted file mode 100755 index 8fd5797..0000000 --- a/models/phi/2.7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi/tags - -docker exec -it ai-ollama ollama run phi:2.7b --verbose diff --git a/models/phi3.5/3.8b b/models/phi3.5/3.8b deleted file mode 100755 index 5d03682..0000000 --- a/models/phi3.5/3.8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3.5/tags - -docker exec -it ai-ollama ollama run phi3.5:3.8b --verbose diff --git a/models/phi3/14b b/models/phi3/14b deleted file mode 100755 index 7079fd8..0000000 --- a/models/phi3/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3/tags - -docker exec -it ai-ollama ollama run phi3:14b --verbose diff --git a/models/phi3/3.8b b/models/phi3/3.8b deleted file mode 100755 index 9ac2711..0000000 --- a/models/phi3/3.8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3/tags - -docker exec -it ai-ollama ollama run phi3:3.8b --verbose diff --git a/models/phi3/instruct b/models/phi3/instruct deleted file mode 100755 index fcb0b4a..0000000 --- a/models/phi3/instruct +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3/tags - -docker exec -it ai-ollama ollama run phi3:instruct --verbose diff --git a/models/phi3/medium b/models/phi3/medium deleted file mode 100755 index 38b7c55..0000000 --- a/models/phi3/medium +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3/tags - -docker exec -it ai-ollama ollama run phi3:medium --verbose diff --git a/models/phi3/mini b/models/phi3/mini deleted file mode 100755 index e20e463..0000000 --- a/models/phi3/mini +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi3/tags - -docker exec -it ai-ollama ollama run phi3:mini --verbose diff --git a/models/phi4/14b b/models/phi4/14b deleted file mode 100755 index 8046766..0000000 --- a/models/phi4/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi4/tags - -docker exec -it ai-ollama ollama run phi4:14b --verbose diff --git a/models/phi4/mini-reasoning:3.8b b/models/phi4/mini-reasoning:3.8b deleted file mode 100755 index c843947..0000000 --- a/models/phi4/mini-reasoning:3.8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi4-mini-reasoning/tags - -docker exec -it ai-ollama ollama run phi4-mini-reasoning:3.8b --verbose diff --git a/models/phi4/mini:3.8b b/models/phi4/mini:3.8b deleted file mode 100755 index 5c0936e..0000000 --- a/models/phi4/mini:3.8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi4-mini/tags - -docker exec -it ai-ollama ollama run phi4-mini:3.8b --verbose diff --git a/models/phi4/reasoning:14b b/models/phi4/reasoning:14b deleted file mode 100755 index 8123476..0000000 --- a/models/phi4/reasoning:14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/phi4-reasoning/tags - -docker exec -it ai-ollama ollama run phi4-reasoning:14b --verbose diff --git a/models/qwen/0.5b b/models/qwen/0.5b deleted file mode 100755 index f2f44ad..0000000 --- a/models/qwen/0.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:0.5b --verbose diff --git a/models/qwen/1.8b b/models/qwen/1.8b deleted file mode 100755 index 558aea2..0000000 --- a/models/qwen/1.8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:1.8b --verbose diff --git a/models/qwen/14b b/models/qwen/14b deleted file mode 100755 index 9b7aedb..0000000 --- a/models/qwen/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:14b --verbose diff --git a/models/qwen/32b b/models/qwen/32b deleted file mode 100755 index 467198b..0000000 --- a/models/qwen/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:32b --verbose diff --git a/models/qwen/4b b/models/qwen/4b deleted file mode 100755 index 8ef35b3..0000000 --- a/models/qwen/4b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:4b --verbose diff --git a/models/qwen/7b b/models/qwen/7b deleted file mode 100755 index 2bb8a8f..0000000 --- a/models/qwen/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen/tags - -docker exec -it ai-ollama ollama run qwen:7b --verbose diff --git a/models/qwen2.5-coder/0.5b b/models/qwen2.5-coder/0.5b deleted file mode 100755 index 47f2acb..0000000 --- a/models/qwen2.5-coder/0.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:0.5b --verbose diff --git a/models/qwen2.5-coder/1.5b b/models/qwen2.5-coder/1.5b deleted file mode 100755 index fb89bb6..0000000 --- a/models/qwen2.5-coder/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:1.5b --verbose diff --git a/models/qwen2.5-coder/14b b/models/qwen2.5-coder/14b deleted file mode 100755 index 1ff159b..0000000 --- a/models/qwen2.5-coder/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:14b --verbose diff --git a/models/qwen2.5-coder/32b b/models/qwen2.5-coder/32b deleted file mode 100755 index 42b6ec9..0000000 --- a/models/qwen2.5-coder/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:32b --verbose diff --git a/models/qwen2.5-coder/3b b/models/qwen2.5-coder/3b deleted file mode 100755 index 48f090d..0000000 --- a/models/qwen2.5-coder/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:3b --verbose diff --git a/models/qwen2.5-coder/7b b/models/qwen2.5-coder/7b deleted file mode 100755 index 452304a..0000000 --- a/models/qwen2.5-coder/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5-coder/tags - -docker exec -it ai-ollama ollama run qwen2.5-coder:7b --verbose diff --git a/models/qwen2.5/0.5b b/models/qwen2.5/0.5b deleted file mode 100755 index ba75713..0000000 --- a/models/qwen2.5/0.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:0.5b --verbose diff --git a/models/qwen2.5/1.5b b/models/qwen2.5/1.5b deleted file mode 100755 index 6cf4e50..0000000 --- a/models/qwen2.5/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:1.5b --verbose diff --git a/models/qwen2.5/14b b/models/qwen2.5/14b deleted file mode 100755 index d463fad..0000000 --- a/models/qwen2.5/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:14b --verbose diff --git a/models/qwen2.5/32b b/models/qwen2.5/32b deleted file mode 100755 index e798cac..0000000 --- a/models/qwen2.5/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:32b --verbose diff --git a/models/qwen2.5/3b b/models/qwen2.5/3b deleted file mode 100755 index b99da2f..0000000 --- a/models/qwen2.5/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:3b --verbose diff --git a/models/qwen2.5/7b b/models/qwen2.5/7b deleted file mode 100755 index c26f22a..0000000 --- a/models/qwen2.5/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2.5/tags - -docker exec -it ai-ollama ollama run qwen2.5:7b --verbose diff --git a/models/qwen2/0.5b b/models/qwen2/0.5b deleted file mode 100755 index b6a15e4..0000000 --- a/models/qwen2/0.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2/tags - -docker exec -it ai-ollama ollama run qwen2:0.5b --verbose diff --git a/models/qwen2/1.5b b/models/qwen2/1.5b deleted file mode 100755 index 6f36d3f..0000000 --- a/models/qwen2/1.5b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2/tags - -docker exec -it ai-ollama ollama run qwen2:1.5b --verbose diff --git a/models/qwen2/7b b/models/qwen2/7b deleted file mode 100755 index 938b82a..0000000 --- a/models/qwen2/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen2/tags - -docker exec -it ai-ollama ollama run qwen2:7b --verbose diff --git a/models/qwen3-coder/30b b/models/qwen3-coder/30b deleted file mode 100755 index 6e2ca82..0000000 --- a/models/qwen3-coder/30b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3-coder/tags - -docker exec -it ai-ollama ollama run qwen3-coder:30b --verbose diff --git a/models/qwen3/0.6b b/models/qwen3/0.6b deleted file mode 100755 index 51110d4..0000000 --- a/models/qwen3/0.6b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:0.6b --verbose diff --git a/models/qwen3/1.7b b/models/qwen3/1.7b deleted file mode 100755 index 52415b7..0000000 --- a/models/qwen3/1.7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:1.7b --verbose diff --git a/models/qwen3/14b b/models/qwen3/14b deleted file mode 100755 index 9cab84f..0000000 --- a/models/qwen3/14b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:14b --verbose diff --git a/models/qwen3/30b b/models/qwen3/30b deleted file mode 100755 index 104c6df..0000000 --- a/models/qwen3/30b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:30b --verbose diff --git a/models/qwen3/32b b/models/qwen3/32b deleted file mode 100755 index 68e5b73..0000000 --- a/models/qwen3/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:32b --verbose diff --git a/models/qwen3/4b b/models/qwen3/4b deleted file mode 100755 index 22dbe24..0000000 --- a/models/qwen3/4b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:4b --verbose diff --git a/models/qwen3/8b b/models/qwen3/8b deleted file mode 100755 index 52353fc..0000000 --- a/models/qwen3/8b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwen3/tags - -docker exec -it ai-ollama ollama run qwen3:8b --verbose diff --git a/models/qwq/32b b/models/qwq/32b deleted file mode 100755 index b50227a..0000000 --- a/models/qwq/32b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/qwq/tags - -docker exec -it ai-ollama ollama run qwq:32b --verbose diff --git a/models/smollm2/1.7b b/models/smollm2/1.7b deleted file mode 100755 index d715cd6..0000000 --- a/models/smollm2/1.7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/smollm2/tags - -docker exec -it ai-ollama ollama run smollm2:1.7m --verbose diff --git a/models/smollm2/135m b/models/smollm2/135m deleted file mode 100755 index 5ca6f9c..0000000 --- a/models/smollm2/135m +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/smollm2/tags - -docker exec -it ai-ollama ollama run smollm2:135m --verbose diff --git a/models/smollm2/360m b/models/smollm2/360m deleted file mode 100755 index 2fc8d1d..0000000 --- a/models/smollm2/360m +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/smollm2/tags - -docker exec -it ai-ollama ollama run smollm2:360m --verbose diff --git a/models/stable-code/3b b/models/stable-code/3b deleted file mode 100755 index c95c9a7..0000000 --- a/models/stable-code/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/stable-code/tags - -docker exec -it ai-ollama ollama run stable-code:3b --verbose diff --git a/models/stable-code/instruct b/models/stable-code/instruct deleted file mode 100755 index 8513990..0000000 --- a/models/stable-code/instruct +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/stable-code/tags - -docker exec -it ai-ollama ollama run stable-code:instruct --verbose diff --git a/models/starcoder2/15b b/models/starcoder2/15b deleted file mode 100755 index 0aad5f2..0000000 --- a/models/starcoder2/15b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/starcoder2/tags - -docker exec -it ai-ollama ollama run starcoder2:15b --verbose diff --git a/models/starcoder2/3b b/models/starcoder2/3b deleted file mode 100755 index 6a3c341..0000000 --- a/models/starcoder2/3b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/starcoder2/tags - -docker exec -it ai-ollama ollama run starcoder2:3b --verbose diff --git a/models/starcoder2/7b b/models/starcoder2/7b deleted file mode 100755 index fc4a552..0000000 --- a/models/starcoder2/7b +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# https://ollama.com/library/starcoder2/tags - -docker exec -it ai-ollama ollama run starcoder2:7b --verbose diff --git a/rag/CHECKLIST.md b/rag/CHECKLIST.md new file mode 100644 index 0000000..34a5f11 --- /dev/null +++ b/rag/CHECKLIST.md @@ -0,0 +1,17 @@ +# Чек-лист по построению RAG + +* [ ] Определиться с форматом входных данных +* [ ] Очистить входные данные, обеспечив метаданными +* [ ] Подобрать модель эмбеддинга +* [ ] Подобрать размер чанка и перекрытия для эмбеддинга +* [ ] Подобрать место хранения (векторная СУБД) +* [ ] Подобрать модель ранжирования +* [ ] Подобрать модель генерации +* [ ] Подобрать для неё системный промпт (для встраивания найденных чанков, грамотного их цитирования) +* [ ] Подобрать параметры: + * [ ] top_k (количество чанков для поиска при эмбеддинге) + * [ ] top_n (остаток найденных чанков после ранжирования) + * [ ] temperature (степень фантазии) + * [ ] top_p (???) + * [ ] другие? +* [ ] diff --git a/rag/README.md b/rag/README.md index 9c76a82..d0c6ac3 100644 --- a/rag/README.md +++ b/rag/README.md @@ -7,10 +7,10 @@ ```bash cd ..; ./up; cd - python3 -m venv .venv -source ./venv/bin/activate -pip install beautifulsoup4 markdownify sentence-transformers qdrant-client langchain transformers hashlib +source .venv/bin/activate +pip install beautifulsoup4 markdownify sentence-transformers qdrant-client langchain transformers pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu -./download.sh 123456789 +./download.sh 123456789 # <<== pageId страницы в Confluence python3 convert.py python3 vectorize.py python3 rag.py --interactive @@ -153,10 +153,6 @@ python3 vectorize.py - молниеносный поиск по индексу чанков (частям документов); - корректное насыщение контекста для генеративной модели. -Впоследствии embedding-модель будет встраивать эти данные в диалог с генеративной моделью. -Каждый запрос сначала будет обрабатывать именно она, находя подходящие по векторам документы, и подставлять их в контекст генеративной модели. -Последняя будет всего лишь генерировать ответ, опираясь на предоставленные из документов данные, ссылаясь на них в ответе. - Для получения справки по скрипту выполни команду: ``` @@ -192,41 +188,167 @@ python3 rag.py --help ### Кастомный системный промпт -Если хочется уточнить роль генеративной модели, можно создать файл `sys_prompt.txt` и прописать туда всё необходимое, учитывая следующие правила: +Если хочется уточнить роль генеративной модели, можно создать текстовый файл и прописать туда всё необходимое, учитывая следующие правила: -1. Шаблон `{{sources}}` будет заменён на цитаты документов, найденные в qdrant +1. Шаблон `{{sources}}` будет заменён на цитаты документов, найденных в qdrant 2. Шаблон `{{query}}` будет заменён на запрос пользователя 3. Если этих двух шаблонов не будет в промпте, результаты будут непредсказуемыми 4. Каждая цитата в списке цитат формируется в формате: - ``` - --- Source X --- + ```xml + Lorem ipsum dolor sit amet - <пустая строка> + ``` -5. Если в этой директории нет файла `sys_prompt.txt`, то будет применён промпт по умолчанию (см. функцию `generate_answer()`). +5. При вызове `rag.py` указать путь к файлу промпта, используя аргумент `--sys-prompt $путь_к_файлу` +6. Если указанного файла не существует, то будет применён промпт по умолчанию. Посмотреть полный промпт можно указав аргумент `--show_prompt` при вызове `rag.py`. -### Неплохие лёгкие модели +### Неплохие модели для экспериментов -Для эмбеддинга: +Обозначения: +* ☑️ — по умолчанию +* 🧠 — размышляющая +* 🏋️ — требуются ресурсы -- `sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2` (по умолчанию, хорошо адаптирована под русский язык) -- `nomad-embed-text` (популярная) +#### Эмбеддинг + +- [`sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2`](https://hf.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) ☑️ +- [`nomad-embed-text`](https://ollama.com/library/nomad-embed-text) - ... -Для генерации ответов: +#### Ранжирование -- `qwen2.5:3b` (по умолчанию) -- `qwen3:8b` -- `gemma3n:e2b` -- `phi4-mini:3.8b` -- `qwen2.5:1.5b` +- [`cross-encoder/ms-marco-MMarco-mMiniLMv2-L12-V1`](https://hf.co/cross-encoder/ms-marco-MMarco-mMiniLMv2-L12-V1) ☑️ +- `cross-encoder/ms-marco-MiniLM-L-6-v2` +- `cross-encoder/ms-marco-TinyBERT-L-2-v2` +- ... > [!NOTE] -> Чем меньше млрд параметров (b, billion), тем меньше вероятности получить корректный ответ на не-английском языке. -> Такие модели работают быстро, но качество ответов низкое. -> Чем больше параметров, тем лучше и медленее ответы. +> Другие можно найти здесь: https://github.com/AlexeyMalafeev/ruformers + +#### Генеративные + +Перечислен список: по убыванию качества ответов и размера модели, по возрастанию скорости ответов на обычном домашнем ПК. + +- [`deepseek-r1:8b`](https://ollama.com/library/deepseek-r1) 🏋️🧠 +- [`qwen3:8b`](https://ollama.com/library/qwen3) 🏋️🧠 +- [`dolphin3:8b`](https://ollama.com/library/dolphin3)🏋️ +- [`cogito:8b`](https://ollama.com/library/cogito)🏋️ +- [`openchat:7b`](https://ollama.com/library/openchat) 🏋️☑️ +- [`phi4-mini:3.8b`](https://ollama.com/library/phi4-mini) +- [`gemma3:4b`](https://ollama.com/library/gemma3) +- [`gemma3n:e4b`](https://ollama.com/library/gemma3n) +- [`gemma3n:e2b`](https://ollama.com/library/gemma3n) + +
+ Полный список лёгких и средних моделей, которые можно использовать не только в RAG + + ``` + codegemma:2b + codegemma:7b + codellama:7b + codellama:13b + codellama:34b + codeqwen:1.5b + codeqwen:7b + codestral:22b + deepcoder:1.5b + deepcoder:14b + deepseek-coder:1.3b + deepseek-coder:6.7b + deepseek-coder:33b + deepseek-coder-v2:16b + deepseek-r1:1.5b + deepseek-r1:7b + deepseek-r1:8b + deepseek-r1:14b + deepseek-r1:32b + devstral:24b + dolphin3:8b + gemma:2b + gemma:7b + gemma3:1b + gemma3:4b + gemma3:12b + gemma3:27b + gemma3:270m + gemma3n:e2b + gemma3n:e4b + gpt-oss:20b + granite-code:3b + granite-code:8b + granite-code:20b + granite-code:34b + llama2:7b + llama2:13b + llama3:8b + llama3.1:8b + llama3.2:1b + llama3.2:3b + llava-llama3:8b + magistral:24b + mistral:7b + mistral-nemo:12b + mistral-small:22b + mistral-small:24b + mixtral:8x7b + mxbai-embed-large:latest + nomic-embed-text:latest + openthinker:7b + openthinker:32b + phi:2.7b + phi3:3.8b + phi3:14b + phi3:instruct + phi3:medium + phi3:mini + phi3.5:3.8b + phi4:14b + phi4-mini-reasoning:3.8b + phi4-mini:3.8b + phi4-reasoning:14b + qwen:0.5b + qwen:1.8b + qwen:4b + qwen:7b + qwen:14b + qwen:32b + qwen2:0.5b + qwen2:1.5b + qwen2:7b + qwen2.5:0.5b + qwen2.5:1.5b + qwen2.5:3b + qwen2.5:7b + qwen2.5:14b + qwen2.5:32b + qwen2.5-coder:0.5b + qwen2.5-coder:1.5b + qwen2.5-coder:3b + qwen2.5-coder:7b + qwen2.5-coder:14b + qwen2.5-coder:32b + qwen3:0.6b + qwen3:1.7b + qwen3:4b + qwen3:8b + qwen3:14b + qwen3:30b + qwen3:32b + qwen3-coder:30b + qwq:32b + smollm2:1.7m + smollm2:135m + smollm2:360m + stable-code:3b + stable-code:instruct + starcoder2:3b + starcoder2:7b + starcoder2:15b + ``` + +
## Дисклеймер diff --git a/rag/rag.py b/rag/rag.py index 81a7d39..21367a4 100644 --- a/rag/rag.py +++ b/rag/rag.py @@ -2,24 +2,49 @@ import os import requests import json import time -from sentence_transformers import SentenceTransformer +import sys +from qdrant_client import QdrantClient +from sentence_transformers import SentenceTransformer, CrossEncoder + +DEFAULT_CHAT_MODEL = "phi4-mini:3.8b" +DEFAULT_EMBED_MODEL = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2" +DEFAULT_RANK_MODEL = "cross-encoder/mmarco-mMiniLMv2-L12-H384-v1" +# DEFAULT_RANK_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2" +# DEFAULT_RANK_MODEL = "cross-encoder/ms-marco-TinyBERT-L-2-v2" +DEFAULT_MD_FOLDER = "data" +DEFAULT_OLLAMA_URL = "http://localhost:11434" +DEFAULT_QDRANT_HOST = "localhost" +DEFAULT_QDRANT_PORT = 6333 +DEFAULT_QDRANT_COLLECTION = "rag" +DEFAULT_TOP_K = 30 +DEFAULT_USE_RANK = False +DEFAULT_TOP_N = 8 +DEFAULT_VERBOSE = False +DEFAULT_SHOW_STATS = False +DEFAULT_STREAM = False +DEFAULT_INTERACTIVE = False +DEFAULT_SHOW_PROMPT = False class RagSystem: def __init__(self, - md_folder: str = "data", - ollama_url: str = "http://localhost:11434", - qdrant_host: str = "localhost", - qdrant_port: int = 6333, - embed_model: str = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", - chat_model: str = "phi4-mini:3.8b"): - self.md_folder = md_folder + ollama_url: str = DEFAULT_OLLAMA_URL, + qdrant_host: str = DEFAULT_QDRANT_HOST, + qdrant_port: int = DEFAULT_QDRANT_PORT, + embed_model: str = DEFAULT_EMBED_MODEL, + rank_model: str = DEFAULT_RANK_MODEL, + use_rank: bool = DEFAULT_USE_RANK, + chat_model: str = DEFAULT_CHAT_MODEL): self.ollama_url = ollama_url self.qdrant_host = qdrant_host self.qdrant_port = qdrant_port self.chat_model = chat_model self.emb_model = SentenceTransformer(embed_model) - self.prompt = "" + self.qdrant = QdrantClient(host=args.qdrant_host, port=args.qdrant_port) + self.use_rank = use_rank + if self.use_rank: + self.rank_model = CrossEncoder(rank_model) self.conversation_history = [] + self.load_chat_model() def load_chat_model(self): @@ -27,111 +52,73 @@ class RagSystem: body = {"model": self.chat_model} requests.post(url, json=body, timeout=600) - def search_qdrant(self, query: str, top_k: int = 6, qdrant_collection="rag"): + def search_qdrant(self, query: str, doc_count: int = DEFAULT_TOP_K, collection_name = DEFAULT_QDRANT_COLLECTION): query_vec = self.emb_model.encode(query, show_progress_bar=False).tolist() - url = f"http://{self.qdrant_host}:{self.qdrant_port}/collections/{qdrant_collection}/points/search" - payload = { - "vector": query_vec, - "top": top_k, - "with_payload": True, - # "score_threshold": 0.6 - } - resp = requests.post(url, json=payload) - if resp.status_code != 200: - raise RuntimeError(f"> Ошибка qdrant: {resp.status_code} {resp.text}") - results = resp.json().get("result", []) - return results + results = self.qdrant.query_points( + collection_name=collection_name, + query=query_vec, + limit=doc_count, + # score_threshold=0.5, + ) + docs = [] + for point in results.points: + docs.append({ + "payload": point.payload, + "score": point.score, + }) + return docs - def prepare_sources(self, context_docs: list): - sources = "" - for idx, doc in enumerate(context_docs, start=1): - text = doc['payload'].get("text", "").strip() - sources = f"{sources}\n\n{text}\n\n" - return sources + def rank_documents(self, query: str, documents: list, top_n: int = DEFAULT_TOP_N): + if not self.use_rank: + return documents - def prepare_prompt(self, query: str, context_docs: list): - sources = self.prepare_sources(context_docs) - if os.path.exists('sys_prompt.txt'): - with open('sys_prompt.txt', 'r') as fp: - prompt_template = fp.read() - return prompt_template.replace("{{sources}}", sources).replace("{{query}}", query) - else: - return f"""### Your role -You are a helpful assistant that can answer questions based on the provided sources. + pairs = [[query, doc["payload"]["text"]] for doc in documents] + scores = self.rank_model.predict(pairs) -### Your user -User is a human who is asking a question related to the provided sources. + for i, doc in enumerate(documents): + doc["rank_score"] = float(scores[i]) -### Your task -Please provide an answer based solely on the provided sources and the conversation history. + documents.sort(key=lambda x: x['rank_score'], reverse=True) + return documents[:top_n] -### Rules -- You **MUST** respond in the SAME language as the user's query. -- If uncertain, you **MUST** the user for clarification. -- If there are no sources in context, you **MUST** clearly state that. -- If none of the sources are helpful, you **MUST** clearly state that. -- If you are unsure about the answer, you **MUST** clearly state that. -- If the context is unreadable or of poor quality, you **MUST** inform the user and provide the best possible answer. -- When referencing information from a source, you **MUST** cite the appropriate source(s) using their corresponding numbers. -- **Only include inline citations using [id] (e.g., [1], [2]) when the tag includes an id attribute.** -- You NEVER MUST NOT add or any XML/HTML tags in your response. -- You NEVER MUST NOT cite if the tag does not contain an id attribute. -- Every answer MAY include at least one source citation. -- Only cite a source when you are explicitly referencing it. -- You may also cite multiple sources if they are all relevant to the question. -- Ensure citations are concise and directly related to the information provided. -- You CAN format your responses using Markdown. - -### Example of sources list: - -``` -The sky is red in the evening and blue in the morning. -Water is wet when the sky is red. -When is water wet? -``` -Response: -``` -Water will be wet when the sky is red [2], which occurs in the evening [1]. -``` - -### Now let's start! - -``` -{sources} -{query} -``` - -Respond.""" - - def generate_answer(self, prompt: str): + def generate_answer(self, sys_prompt: str, user_prompt: str): url = f"{self.ollama_url}/api/generate" body = { "model": self.chat_model, - "prompt": prompt, - "messages": self.conversation_history, + "system": sys_prompt, + "prompt": user_prompt, + #"context": self.conversation_history, "stream": False, - # "options": { - # "temperature": 0.4, - # "top_p": 0.1, - # }, + "options": { + "temperature": 0.5, + # "top_p": 0.2, + }, } - self.response = requests.post(url, json=body, timeout=900) - if self.response.status_code != 200: - return f"Ошибка генерации ответа: {self.response.status_code} {self.response.text}" - return self.response.json().get("response", "").strip() - def generate_answer_stream(self, prompt: str): + response = requests.post(url, json=body, timeout=900) + if response.status_code != 200: + return f"Ошибка генерации ответа: {response.status_code} {response.text}" + self.response = response.json() + return self.response["response"] + + def generate_answer_stream(self, sys_prompt: str, user_prompt: str): url = f"{self.ollama_url}/api/generate" body = { "model": self.chat_model, - "prompt": prompt, - "messages": self.conversation_history, - "stream": True + "system": sys_prompt, + "prompt": user_prompt, + #"context": self.conversation_history, + "stream": True, + "options": { + "temperature": 0.1, + "top_p": 0.2, + }, } resp = requests.post(url, json=body, stream=True, timeout=900) if resp.status_code != 200: raise RuntimeError(f"Ошибка генерации ответа: {resp.status_code} {resp.text}") - full_answer = "" + + answer = "" for chunk in resp.iter_lines(): if chunk: try: @@ -139,39 +126,42 @@ Respond.""" data = json.loads(decoded_chunk) if "response" in data: yield data["response"] - full_answer += data["response"] - elif "error" in data: - print(f"Stream error: {data['error']}") + answer += data["response"] + if "done" in data and data["done"] is True: + self.response = data break - except json.JSONDecodeError: - print(f"Could not decode JSON from chunk: {chunk.decode('utf-8')}") + elif "error" in data: + answer += f" | Ошибка стриминга ответа: {data['error']}" + break + except json.JSONDecodeError as e: + answer += f" | Ошибка конвертации чанка: {chunk.decode('utf-8')} - {e}" except Exception as e: - print(f"Error processing chunk: {e}") + answer += f" | Ошибка обработки чанка: {e}" def get_prompt_eval_count(self): - if not self.response: + if not self.response["prompt_eval_count"]: return 0 - return self.response.json().get("prompt_eval_count", 0) + return self.response["prompt_eval_count"] def get_prompt_eval_duration(self): - if not self.response: + if not self.response["prompt_eval_duration"]: return 0 - return self.response.json().get("prompt_eval_duration", 0) / (10 ** 9) + return self.response["prompt_eval_duration"] / (10 ** 9) def get_eval_count(self): - if not self.response: + if not self.response["eval_count"]: return 0 - return self.response.json().get("eval_count", 0) + return self.response["eval_count"] def get_eval_duration(self): - if not self.response: + if not self.response["eval_duration"]: return 0 - return self.response.json().get("eval_duration", 0) / (10 ** 9) + return self.response["eval_duration"] / (10 ** 9) def get_total_duration(self): - if not self.response: + if not self.response["total_duration"]: return 0 - return self.response.json().get("total_duration", 0) / (10 ** 9) + return self.response["total_duration"] / (10 ** 9) def get_tps(self): eval_count = self.get_eval_count() @@ -180,202 +170,318 @@ Respond.""" return 0 return eval_count / eval_duration -def print_sources(context_docs: list): - print("\n\nИсточники:") - for idx, doc in enumerate(context_docs, start=1): - title = doc['payload'].get("filename", None) - url = doc['payload'].get("url", None) - date = doc['payload'].get("date", None) - version = doc['payload'].get("version", None) - author = doc['payload'].get("author", None) +class App: + def __init__( + self, + args: list = [] + ): + if not args.query and not args.interactive: + print("Ошибка: укажите запрос (--query) и/или используйте интерактивный режим (--interactive)") + sys.exit(1) - if url is None: - url = "(нет веб-ссылки)" - if date is None: - date = "(неизвестно)" - if version is None: - version = "0" - if author is None: - author = "(неизвестен)" + self.args = args + self.print_v(text=f"Включить интерактивный режим диалога: {args.interactive}") + self.print_v(text=f"Включить потоковый вывод: {args.stream}") + if self.is_custom_sys_prompt(): + self.print_v(text=f"Системный промпт: {args.sys_prompt}") + else: + self.print_v(text=f"Системный промпт: по умолчанию") + self.print_v(text=f"Показать сист. промпт перед запросом: {args.show_prompt}") + self.print_v(text=f"Выводить служебные сообщения: {args.verbose}") + self.print_v(text=f"Выводить статистику об ответе: {args.show_stats}") + self.print_v(text=f"Адрес хоста Qdrant: {args.qdrant_host}") + self.print_v(text=f"Номер порта Qdrant: {args.qdrant_port}") + self.print_v(text=f"Название коллекции для поиска документов: {args.qdrant_collection}") + self.print_v(text=f"Ollama API URL: {args.ollama_url}") + self.print_v(text=f"Модель генерации Ollama: {args.chat_model}") + self.print_v(text=f"Модель эмбеддинга: {args.emb_model}") + self.print_v(text=f"Количество документов для поиска: {args.topk}") + self.print_v(text=f"Включить ранжирование: {args.use_rank}") + self.print_v(text=f"Модель ранжирования: {args.rank_model}") + self.print_v(text=f"Количество документов после ранжирования: {args.topn}") + self.init_rag() - print(f"{idx}. {title}") - print(f" {url} (v{version} {author})") - print(f" актуальность на {date}") + def print_v(self, text: str = "\n"): + if self.args.verbose: + print(f"{text}") -def print_v(text: str, is_verbose: bool): - if is_verbose: - print(text) + def init_rag(self): + self.print_v(text="\nИнициализация моделей...") + self.rag = RagSystem( + ollama_url = self.args.ollama_url, + qdrant_host = self.args.qdrant_host, + qdrant_port = self.args.qdrant_port, + embed_model = self.args.emb_model, + rank_model = self.args.rank_model, + use_rank = self.args.use_rank, + chat_model = self.args.chat_model + ) + self.print_v(text=f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG") -def print_stats(rag: RagSystem): - print("\n\nСтатистика:") - print(f"* Time: {rag.get_total_duration()}s") - print(f"* TPS: {rag.get_tps()}") - print(f"* PEC: {rag.get_prompt_eval_count()}") - print(f"* PED: {rag.get_prompt_eval_duration()}s") - print(f"* EC: {rag.get_eval_count()}") - print(f"* ED: {rag.get_eval_duration()}s\n") + def init_query(self): + self.query = None + if args.interactive: + self.print_v(text="\nИНТЕРАКТИВНЫЙ РЕЖИМ") + self.print_v(text="Можете вводить запрос (или 'exit' для выхода)\n") -def main(): - import sys + if self.args.query: + self.query = self.args.query.strip() + print(f">>> {self.query}") + elif args.interactive: + self.query = input(">>> ").strip() + + def process_help(self): + print("<<< Команды итерактивного режима:") + print("save -- сохранить диалог в файл") + print("exit -- выход\n") + self.query = None + self.args.query = None + + def process_save(self): + import datetime + timestamp = int(time.time()) + dt = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') + filename = f"chats/chat-{timestamp}-{self.args.chat_model}.md" + + markdown_content = f"# История диалога от {dt}\n\n" + markdown_content += f"## Параметры диалога\n" + markdown_content += f"```\nargs = {self.args}\n```\n" + markdown_content += f"```\nemb_model = {self.rag.emb_model}\n```\n" + markdown_content += f"```\nrank_model = {self.rag.rank_model}\n```\n" + + for entry in self.rag.conversation_history: + if entry['role'] == 'user': + markdown_content += f"## Пользователь\n\n" + elif entry['role'] == 'assistant': + markdown_content += f"## Модель\n\n" + docs = self.rag.prepare_ctx_sources(entry['docs']).replace("```", "") + markdown_content += f"```\n{docs}\n```\n\n" + markdown_content += f"{entry['content']}\n\n" + + os.makedirs('chats', exist_ok=True) + with open(filename, 'w') as fp: + fp.write(markdown_content) + + print(f"<<< Диалог сохранён в файл: {filename}\n") + self.query = None + + def find_docs(self, query: str, top_k: int, collection_name: str): + self.print_v(text="\nПоиск документов...") + context_docs = self.rag.search_qdrant(query, top_k, collection_name) + self.print_v(text=f"Найдено {len(context_docs)} документов") + return context_docs + + def rank_docs(self, docs: list = [], top_n = DEFAULT_TOP_N): + self.print_v(text="\nРанжирование документов...") + ranked_docs = self.rag.rank_documents(self.query, docs, top_n) + self.print_v(text=f"После ранжирования осталось {len(ranked_docs)} документов") + return ranked_docs + + def prepare_ctx_sources(self, docs: list): + sources = "" + for idx, doc in enumerate(docs, start=1): + text = doc['payload'].get("text", "").strip() + sources = f"{sources}\n\n{text}\n\n" + return sources + + def prepare_cli_sources(self, docs: list): + sources = "\nИсточники:\n" + for idx, doc in enumerate(docs, start=1): + title = doc['payload'].get("filename", None) + url = doc['payload'].get("url", None) + date = doc['payload'].get("date", None) + version = doc['payload'].get("version", None) + author = doc['payload'].get("author", None) + + if url is None: + url = "(нет веб-ссылки)" + if date is None: + date = "(неизвестно)" + if version is None: + version = "0" + if author is None: + author = "(неизвестен)" + + sources += f"{idx}. {title}\n" + sources += f" {url}\n" + sources += f" Версия {version} от {author}, актуальная на {date}\n" + if doc['rank_score']: + sources += f" score = {doc['score']} | rank_score = {doc['rank_score']}\n" + else: + sources += f" score = {doc['score']}\n" + return sources + + def prepare_sys_prompt(self, query: str, docs: list): + if self.is_custom_sys_prompt(): + with open(self.args.sys_prompt, 'r') as fp: + prompt_tpl = fp.read() + else: + prompt_tpl = """You are a helpful assistant that can answer questions based on the provided context. +Your user is the person asking the source-related question. +Your job is to answer the question based on the context alone. +If the context doesn't provide much information, answer "I don't know." +Adhere to this in all languages. + +Context: + +----------------------------------------- +{{sources}} +----------------------------------------- +""" + + sources = self.prepare_ctx_sources(docs) + return prompt_tpl.replace("{{sources}}", sources).replace("{{query}}", query) + + def show_prompt(self, sys_prompt: str): + print("\n================ Системный промпт ==================") + print(f"{sys_prompt}\n============ Конец системного промпта ==============\n") + + def process_query(self, sys_prompt: str, user_prompt: str, streaming: bool = DEFAULT_STREAM): + answer = "" + # try: + if streaming: + self.print_v(text="\nГенерация потокового ответа (^C для остановки)...\n") + print(f"<<< ", end='', flush=True) + for token in self.rag.generate_answer_stream(sys_prompt, user_prompt): + answer += token + print(token, end='', flush=True) + else: + self.print_v(text="\nГенерация ответа (^C для остановки)...\n") + answer = self.rag.generate_answer(sys_prompt, user_prompt) + print(f"<<< {answer}\n") + # except RuntimeError as e: + # answer = str(e) + + print(f"\n===================================================") + return answer + + def is_custom_sys_prompt(self): + return self.args.sys_prompt and os.path.exists(self.args.sys_prompt) + + def print_stats(self): + print(f"* Time: {self.rag.get_total_duration()}s") + print(f"* TPS: {self.rag.get_tps()}") + print(f"* PEC: {self.rag.get_prompt_eval_count()}") + print(f"* PED: {self.rag.get_prompt_eval_duration()}s") + print(f"* EC: {self.rag.get_eval_count()}") + print(f"* ED: {self.rag.get_eval_duration()}s\n") + self.query = None + self.args.query = None + + def process(self): + while True: + try: + self.init_query() + + if not self.query or self.query == "": + continue + + if self.query.lower() == "help": + self.process_help() + continue + + if self.query.strip().lower() == "save": + self.process_save() + continue + + if self.query.strip().lower() == "stats": + print("\n<<< Статистика:") + self.print_stats() + continue + + if self.query.strip().lower() == "exit": + self.print_v(text="\n*** Завершение работы") + sys.exit(0) + + context_docs = self.find_docs(self.query, self.args.topk, self.args.qdrant_collection) + if not context_docs: + if args.interactive: + print("<<< Релевантные документы не найдены") + self.query = None + self.args.query = None + continue + else: + break + + ranked_docs = self.rank_docs(context_docs, self.args.topn) + if not ranked_docs: + if args.interactive: + print("<<< Релевантные документы были отсеяны полностью") + self.query = None + self.args.query = None + continue + else: + break + + sys_prompt = self.prepare_sys_prompt(self.query, ranked_docs) + if self.args.show_prompt: + self.show_prompt(sys_prompt) + + try: + answer = self.process_query(sys_prompt, self.query, self.args.stream) + except KeyboardInterrupt: + print("\n*** Генерация ответа прервана") + self.query = None + self.args.query = None + print(self.prepare_cli_sources(ranked_docs)) + if self.args.show_stats: + print("\nСтатистика:") + self.print_stats() + continue + + print(self.prepare_cli_sources(ranked_docs)) + + if self.args.show_stats: + print("\nСтатистика:") + self.print_stats() + + self.rag.conversation_history.append({ + "role": "user", + "content": self.query, + }) + + self.rag.conversation_history.append({ + "role": "assistant", + "docs": ranked_docs, + "content": answer, + }) + + if args.interactive: + self.query = None + self.args.query = None + else: + break + + except KeyboardInterrupt: + print("\n*** Завершение работы") + break + + except Exception as e: + print(f"Ошибка: {e}") + break + +if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="RAG-система с использованием Ollama и Qdrant") parser.add_argument("--query", type=str, help="Запрос к RAG") - parser.add_argument("--interactive", default=False, action=argparse.BooleanOptionalAction, help="Перейти в интерактивный режим диалога") - parser.add_argument("--show-prompt", default=False, action=argparse.BooleanOptionalAction, help="Показать полный промпт перед обработкой запроса") - parser.add_argument("--qdrant-host", default="localhost", help="Qdrant host") - parser.add_argument("--qdrant-port", type=int, default=6333, help="Qdrant port") - parser.add_argument("--qdrant-collection", type=str, default="rag", help="Название коллекции для поиска документов") - parser.add_argument("--ollama-url", default="http://localhost:11434", help="Ollama API URL") - parser.add_argument("--emb-model", default="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", help="Модель эмбеддинга") - parser.add_argument("--chat-model", default="phi4-mini:3.8b", help="Модель генерации Ollama") - parser.add_argument("--topk", type=int, default=6, help="Количество документов для поиска") - parser.add_argument("--verbose", default=False, action=argparse.BooleanOptionalAction, help="Выводить промежуточные служебные сообщения") - parser.add_argument("--show-stats", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе (не работает с --stream)") - parser.add_argument("--stream", default=False, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе") + parser.add_argument("--interactive", default=DEFAULT_INTERACTIVE, action=argparse.BooleanOptionalAction, help="Включить интерактивный режим диалога") + parser.add_argument("--stream", default=DEFAULT_STREAM, action=argparse.BooleanOptionalAction, help="Включить потоковый вывод") + parser.add_argument("--sys-prompt", type=str, help="Путь к файлу шаблона системного промпта") + parser.add_argument("--show-prompt", default=DEFAULT_SHOW_PROMPT, action=argparse.BooleanOptionalAction, help="Показать сист. промпт перед запросом") + parser.add_argument("--verbose", default=DEFAULT_VERBOSE, action=argparse.BooleanOptionalAction, help="Выводить служебные сообщения") + parser.add_argument("--show-stats", default=DEFAULT_SHOW_STATS, action=argparse.BooleanOptionalAction, help="Выводить статистику об ответе (не работает с --stream)") + parser.add_argument("--qdrant-host", default=DEFAULT_QDRANT_HOST, help="Адрес хоста Qdrant") + parser.add_argument("--qdrant-port", type=int, default=DEFAULT_QDRANT_PORT, help="Номер порта Qdrant") + parser.add_argument("--qdrant-collection", type=str, default=DEFAULT_QDRANT_COLLECTION, help="Название коллекции для поиска документов") + parser.add_argument("--ollama-url", default=DEFAULT_OLLAMA_URL, help="Ollama API URL") + parser.add_argument("--chat-model", default=DEFAULT_CHAT_MODEL, help="Модель генерации Ollama") + parser.add_argument("--emb-model", default=DEFAULT_EMBED_MODEL, help="Модель эмбеддинга") + parser.add_argument("--topk", type=int, default=DEFAULT_TOP_K, help="Количество документов для поиска") + parser.add_argument("--use-rank", default=DEFAULT_USE_RANK, action=argparse.BooleanOptionalAction, help="Включить ранжирование") + parser.add_argument("--rank-model", type=str, default=DEFAULT_RANK_MODEL, help="Модель ранжирования") + parser.add_argument("--topn", type=int, default=DEFAULT_TOP_N, help="Количество документов после ранжирования") args = parser.parse_args() - if not args.query and not args.interactive: - print("Ошибка: укажите запрос (--query) и/или используйте интерактивный режим (--interactive)") - sys.exit(1) - - print_v(f"Адрес ollama: {args.ollama_url}", args.verbose) - print_v(f"Адрес qdrant: {args.qdrant_host}:{args.qdrant_port}", args.verbose) - print_v(f"Модель эмбеддинга: {args.emb_model}", args.verbose) - print_v(f"Модель чата: {args.chat_model}", args.verbose) - print_v(f"Документов для поиска: {args.topk}", args.verbose) - print_v(f"Коллекция для поиска: {args.qdrant_collection}", args.verbose) - if os.path.exists('sys_prompt.txt'): - print_v("Будет использоваться sys_prompt.txt!", args.verbose) - - print_v("\nПервая инициализация моделей...", args.verbose) - rag = RagSystem( - ollama_url=args.ollama_url, - qdrant_host=args.qdrant_host, - qdrant_port=args.qdrant_port, - embed_model=args.emb_model, - chat_model=args.chat_model - ) - print_v(f"Модели загружены. Если ответ плохой, переформулируйте запрос, укажите --chat-model или улучшите исходные данные RAG", args.verbose) - - query = None - if args.interactive: - print_v("\nИНТЕРАКТИВНЫЙ РЕЖИМ", args.verbose) - print_v("Можете вводить запрос (или 'exit' для выхода)\n", args.verbose) - - if args.query: - query = args.query.strip() - print(f">>> {query}") - - while True: - try: - if not query or query == "": - query = input(">>> ").strip() - - if not query or query == "": - continue - - if query.lower() == "help": - print("<<< Команды итерактивного режима:") - print("save -- сохранить диалог в файл") - print("stats -- вывести статистику последнего ответа") - print("exit -- выход\n") - query = None - continue - - if query.strip().lower() == "save": - import datetime - timestamp = int(time.time()) - dt = datetime.datetime.fromtimestamp(timestamp).strftime('%Y-%m-%dT%H:%M:%SZ') - filename = f"chats/chat-{timestamp}.md" - - markdown_content = f"# История диалога от {dt}\n\n" - markdown_content += f"## Параметры диалога\n" - markdown_content += f"```\nargs = {args}\n```\n" - markdown_content += f"```\nemb_model = {rag.emb_model}\n```\n" - - for entry in rag.conversation_history: - if entry['role'] == 'user': - markdown_content += f"## Пользователь\n\n" - elif entry['role'] == 'assistant': - markdown_content += f"## Модель\n\n" - docs = rag.prepare_sources(entry['docs']).replace("```", "") - markdown_content += f"```\n{docs}\n```\n\n" - markdown_content += f"{entry['content']}\n\n" - - os.makedirs('chats', exist_ok=True) - with open(filename, 'w') as fp: - fp.write(markdown_content) - - print(f"<<< Диалог сохранён в файл: {filename}\n") - query = None - continue - - if query.strip().lower() == "exit": - print_v("\n*** Завершение работы", args.verbose) - break - - print_v("\nПоиск релевантных документов...", args.verbose) - context_docs = rag.search_qdrant(query, top_k=args.topk, qdrant_collection=args.qdrant_collection) - if not context_docs: - print("<<< Релевантные документы не найдены") - if args.interactive: - query = None - continue - else: - break - - print_v(f"Найдено {len(context_docs)} релевантных документов", args.verbose) - # print_sources(context_docs) - - prompt = rag.prepare_prompt(query=query, context_docs=context_docs) - if args.show_prompt: - print("\nПолный системный промпт: --------------------------") - print(f"{prompt}\n---------------------------------------------------") - - print_v("\nГенерация ответа...\n", args.verbose) - - if args.stream: - answer = "\n<<< " - print(answer, end='', flush=True) - try: - for message_part in rag.generate_answer_stream(prompt): - answer += message_part - print(message_part, end='', flush=True) - except RuntimeError as e: - answer = str(e) - print(f"\n{answer}\n===================================================\n") - else: - answer = rag.generate_answer(prompt) - print(f"<<< {answer}\n") - - print_sources(context_docs) - if args.show_stats and not args.stream: - print_stats(rag) - - rag.conversation_history.append({ - "role": "user", - "content": query, - }) - - rag.conversation_history.append({ - "role": "assistant", - "docs": context_docs, - "content": answer, - }) - - if args.interactive: - query = None - else: - break - - except KeyboardInterrupt: - print("\n*** Завершение работы") - break - - except Exception as e: - print(f"Ошибка: {e}") - break - -if __name__ == "__main__": - main() + app = App(args) + app.process() diff --git a/rag/vectorize.py b/rag/vectorize.py index 98975fe..2e497a7 100644 --- a/rag/vectorize.py +++ b/rag/vectorize.py @@ -4,6 +4,7 @@ from sentence_transformers import SentenceTransformer from qdrant_client import QdrantClient from qdrant_client.http import models from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.text_splitter import MarkdownHeaderTextSplitter DEFAULT_INPUT_DIR="data" DEFAULT_CHUNK_SIZE=500 @@ -59,24 +60,45 @@ def load_markdown_files(input_dir): return documents def chunk_text(texts, chunk_size, chunk_overlap): - splitter = RecursiveCharacterTextSplitter( + markdown_splitter = MarkdownHeaderTextSplitter( + headers_to_split_on=[ + ("#", "Header 1"), + ("##", "Header 2"), + ("###", "Header 3"), + ], + strip_headers=False, + return_each_line=False, + ) + text_splitter = RecursiveCharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, + add_start_index=True, length_function=len, separators=["\n\n", "\n", " ", ""] ) + chunks = [] for doc in texts: - doc_chunks = splitter.split_text(doc["text"]) - for i, chunk in enumerate(doc_chunks): - chunk_id = f"{doc['id']}_chunk{i}" - chunk_dict = {"id": chunk_id, "text": chunk} + md_header_splits = markdown_splitter.split_text(doc["text"]) - # Перенос всех доступных метаданных - for key in ["url", "version", "author", "date"]: - if key in doc and doc[key] is not None: - chunk_dict[key] = doc[key] - chunks.append(chunk_dict) + for md_split in md_header_splits: + # RecursiveCharacterTextSplitter for each markdown split + split_docs = text_splitter.split_documents([md_split]) + + for i, chunk in enumerate(split_docs): + chunk_id = f"{doc['id']}_chunk{i}" + chunk_dict = {"id": chunk_id, "text": chunk.page_content} + + # Перенос всех доступных метаданных, включая метаданные из MarkdownHeaderTextSplitter + for key in ["url", "version", "author", "date"]: + if key in doc and doc[key] is not None: + chunk_dict[key] = doc[key] + + # Добавление метаданных из MarkdownHeaderTextSplitter + for key, value in chunk.metadata.items(): + chunk_dict[key] = value + + chunks.append(chunk_dict) return chunks def embed_and_upload(chunks, embedding_model_name, qdrant_host="localhost", qdrant_port=6333, qdrant_collection="rag"): @@ -149,3 +171,4 @@ if __name__ == "__main__": args.qdrant_port, args.qdrant_collection ) + diff --git a/up b/up index cb2afd8..8f6e0e4 100755 --- a/up +++ b/up @@ -7,5 +7,5 @@ docker compose up -d --build --remove-orphans echo "* Ollama доступен по адресу: localhost:$OLLAMA_PORT" echo "* Open WebUI доступен по адресу: http://localhost:$QDRANT_PORT/" echo "* Qdrant доступен по адресу: localhost:$OWEBUI_PORT" -echo "* Qdrant UI доступен по адресу: http://localhost:$OWEBUI_PORT/dashboard" +echo "* Qdrant UI доступен по адресу: http://localhost:$QDRANT_PORT/dashboard" echo "Для остановки контейнеров выполните ./down"