1
0
Files
ollama/@rag/1_download_page.sh

50 lines
1.5 KiB
Bash
Executable File

#!/bin/bash
DELAY=1
# 1. Указать реквизиты доступа к confluence
USERNAME=""
PASSWORD=""
CONFLUENCE_URL=""
# 2. Вызвать: ./1_download_page.sh <pageId>
##################################################################
if [ $# -lt 1 ]; then
echo "Usage: $0 <pageId>"
exit 1
fi
command -v curl >/dev/null 2>&1 || { echo >&2 "Error: curl is required but not installed."; exit 1; }
command -v jq >/dev/null 2>&1 || { echo >&2 "Error: jq is required but not installed."; exit 1; }
PAGE_ID="$1"
API_ENDPOINT="${CONFLUENCE_URL}/rest/api/content/${PAGE_ID}?expand=body.storage,children.page"
echo
echo "Downloading: $API_ENDPOINT"
response=$(curl -s -u "$USERNAME:$PASSWORD" -H "Accept: application/json" "${API_ENDPOINT}")
if [ $? -ne 0 ]; then
echo "Error: Failed to retrieve article"
fi
error_message=$(echo "$response" | jq -r '.message' 2>/dev/null)
if [ -n "$error_message" ] && [ "$error_message" != "null" ]; then
echo "API Error: $error_message"
else
output_path="./input_html/"
title=$(echo "$response" | jq -r .title)
content=$(echo "$response" | jq -r .body.storage.value)
[ ! -d "$output_path" ] && mkdir -p "$output_path"
echo "<html><body>$content</body></html>" > "$output_path/$title.html"
echo "Saved as: $output_path/$title.html"
child_ids=$(echo "$response" | jq -r '.children.page.results[]?.id' 2>/dev/null)
for child_id in $child_ids; do
echo "Downloading child page ID: $child_id"
sleep $DELAY
./confluence_get_article.sh "$child_id"
done
fi