Скрипты генерации rag
This commit is contained in:
49
@rag/1_download_page.sh
Executable file
49
@rag/1_download_page.sh
Executable file
@@ -0,0 +1,49 @@
|
||||
#!/bin/bash
|
||||
DELAY=1
|
||||
|
||||
# 1. Указать реквизиты доступа к confluence
|
||||
USERNAME=""
|
||||
PASSWORD=""
|
||||
CONFLUENCE_URL=""
|
||||
|
||||
# 2. Вызвать: ./1_download_page.sh <pageId>
|
||||
|
||||
##################################################################
|
||||
|
||||
if [ $# -lt 1 ]; then
|
||||
echo "Usage: $0 <pageId>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
command -v curl >/dev/null 2>&1 || { echo >&2 "Error: curl is required but not installed."; exit 1; }
|
||||
command -v jq >/dev/null 2>&1 || { echo >&2 "Error: jq is required but not installed."; exit 1; }
|
||||
|
||||
PAGE_ID="$1"
|
||||
API_ENDPOINT="${CONFLUENCE_URL}/rest/api/content/${PAGE_ID}?expand=body.storage,children.page"
|
||||
|
||||
echo
|
||||
echo "Downloading: $API_ENDPOINT"
|
||||
|
||||
response=$(curl -s -u "$USERNAME:$PASSWORD" -H "Accept: application/json" "${API_ENDPOINT}")
|
||||
if [ $? -ne 0 ]; then
|
||||
echo "Error: Failed to retrieve article"
|
||||
fi
|
||||
|
||||
error_message=$(echo "$response" | jq -r '.message' 2>/dev/null)
|
||||
if [ -n "$error_message" ] && [ "$error_message" != "null" ]; then
|
||||
echo "API Error: $error_message"
|
||||
else
|
||||
output_path="./input_html/"
|
||||
title=$(echo "$response" | jq -r .title)
|
||||
content=$(echo "$response" | jq -r .body.storage.value)
|
||||
[ ! -d "$output_path" ] && mkdir -p "$output_path"
|
||||
echo "<html><body>$content</body></html>" > "$output_path/$title.html"
|
||||
echo "Saved as: $output_path/$title.html"
|
||||
|
||||
child_ids=$(echo "$response" | jq -r '.children.page.results[]?.id' 2>/dev/null)
|
||||
for child_id in $child_ids; do
|
||||
echo "Downloading child page ID: $child_id"
|
||||
sleep $DELAY
|
||||
./confluence_get_article.sh "$child_id"
|
||||
done
|
||||
fi
|
||||
Reference in New Issue
Block a user