diff --git a/rag/1_download_html.sh b/rag/1_download_html.sh index 9147d0e..09a7e4f 100755 --- a/rag/1_download_html.sh +++ b/rag/1_download_html.sh @@ -49,10 +49,8 @@ if [ -z "$content" ]; then fi path="$output_path/${title//\//_}.html" -content=$(echo "$content" | sed "s#href=\"/#href=\"$CONF_URL/#g") -content=$(echo "$content" | sed "s#src=\"/#src=\"$CONF_URL/#g") -# content=${content//href=\"\//href=\"$CONF_URL} -# content=${content//src=\"\//src=\"$CONF_URL} +content=${content//href=\"\//href=\"$CONF_URL} +content=${content//src=\"\//src=\"$CONF_URL} echo "Страница: $CONF_URL/pages/viewpage.action?pageId=$PAGE_ID

$title

$content" > "$path" echo "Сохранено: $output_path/$title.html" diff --git a/rag/clear.sh b/rag/clear.sh new file mode 100755 index 0000000..4618517 --- /dev/null +++ b/rag/clear.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +rm -rf ./input_html/*.html +rm -rf ./input_pdf/*.pdf +rm -rf ./output_md/*.md +rm -rf ./ready_rag/* +touch ./ready_rag/.gitkeep