Update mkbsd.py

Refactor image download script to track and skip previously downloaded files based on unique key and consistent naming - Added logic to use unique key from JSON data for consistent filenames and tracking - Improved file existence check to skip downloading files that already exist in the directory - Updated JSON list to store keys of downloaded files for persistent tracking across runs - Incorporated delay between downloads for smoother processing - Cleaned up old redundant code and ensured consistency in file naming and tracking
2024-12-23 00:55:20 +00:00 · 2024-09-29 02:18:54 +05:30 · 2024-09-29 02:18:54 +05:30 · 9b1264c54a
commit 9b1264c54a
parent b87195fa08
1 changed files with 62 additions and 12 deletions
--- a/mkbsd.py
+++ b/mkbsd.py
@ -1,11 +1,15 @@
 # Licensed under the WTFPL License

 import os
+import json
 import time
 import aiohttp
 import asyncio
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlsplit
+import hashlib
+
 url = 'https://storage.googleapis.com/panels-api/data/20240916/media-1a-i-p~s'
+downloaded_list_path = 'downloadedList.json'

 async def delay(ms):
    await asyncio.sleep(ms / 1000)
@ -21,15 +25,40 @@ async def download_image(session, image_url, file_path):
    except Exception as e:
        print(f"Error downloading image: {str(e)}")

+def extract_name_from_url(url):
+    try:
+        path = urlsplit(url).path
+        name_with_extension = os.path.basename(path)
+        name_without_query = name_with_extension.split('?')[0]
+
+        # Get prefix (e.g., 'hytha', 'outrunyouth', etc.)
+        prefix_part = next((part for part in path.split('/') if part.startswith('a~')), None)
+        prefix = prefix_part.split('~')[1].split('_')[0].lower() if prefix_part else 'unknown'
+
+        # Get base name
+        base_name = name_without_query.split('.')[0].split('~')[0].replace(r'[^a-zA-Z0-9]+', '').lower()
+
+        return f"{prefix}-{base_name}"
+    except Exception as e:
+        print(f"Error extracting name from URL: {str(e)}")
+        return hashlib.md5(url.encode()).hexdigest()
+
 async def main():
    try:
+        # Load existing downloaded list
+        if os.path.exists(downloaded_list_path):
+            with open(downloaded_list_path, 'r') as f:
+                downloaded_list = json.load(f)
+        else:
+            downloaded_list = []
+
        async with aiohttp.ClientSession() as session:
            async with session.get(url) as response:
                if response.status != 200:
                    raise Exception(f"⛔ Failed to fetch JSON file: {response.status}")
                json_data = await response.json()
                data = json_data.get('data')
-                
+
                if not data:
                    raise Exception('⛔ JSON does not have a "data" property at its root.')

@ -38,21 +67,42 @@ async def main():
                    os.makedirs(download_dir)
                    print(f"📁 Created directory: {download_dir}")

-                file_index = 1
+                downloaded_count = 0
+                skipped_count = 0
+
                for key, subproperty in data.items():
                    if subproperty and subproperty.get('dhd'):
                        image_url = subproperty['dhd']
-                        print(f"🔍 Found image URL!")
-                        parsed_url = urlparse(image_url)
-                        ext = os.path.splitext(parsed_url.path)[-1] or '.jpg'
-                        filename = f"{file_index}{ext}"
-                        file_path = os.path.join(download_dir, filename)
+                        image_name = f"{extract_name_from_url(image_url)}-{key}"
+                        ext = os.path.splitext(urlparse(image_url).path)[-1] or '.jpg'
+                        file_path = os.path.join(download_dir, f"{image_name}{ext}")

-                        await download_image(session, image_url, file_path)
-                        print(f"🖼️ Saved image to {file_path}")
+                        # Check if file already exists
+                        if os.path.exists(file_path):
+                            if key not in downloaded_list:
+                                downloaded_list.append(key)
+                                print(f"✅ Found existing file, added key to list: {file_path}")
+                                with open(downloaded_list_path, 'w') as f:
+                                    json.dump(downloaded_list, f, indent=2)
+                            skipped_count += 1
+                        else:
+                            # Download the image if it doesn't exist
+                            downloaded_count += 1
+                            print(f"🔍 Found new image URL: {image_url}")

-                        file_index += 1
-                        await delay(250)
+                            await download_image(session, image_url, file_path)
+                            print(f"🖼️ Saved image to {file_path}")
+
+                            # Add key to downloaded list
+                            downloaded_list.append(key)
+                            with open(downloaded_list_path, 'w') as f:
+                                json.dump(downloaded_list, f, indent=2)
+                            print(f"📄 Updated downloaded list with key: {key}")
+
+                            await delay(250)
+
+                print(f"🚀 Downloaded {downloaded_count} new images")
+                print(f"✅ Skipped {skipped_count} images that already exist")

    except Exception as e:
        print(f"Error: {str(e)}")