Skip to content

Instantly share code, notes, and snippets.

@micro-tiger
Last active January 1, 2026 15:49
Show Gist options
  • Select an option

  • Save micro-tiger/cf67e7458730f03f881040e39d3ba1d3 to your computer and use it in GitHub Desktop.

Select an option

Save micro-tiger/cf67e7458730f03f881040e39d3ba1d3 to your computer and use it in GitHub Desktop.
import os
import re
import random
import string
import urllib.request
import time
import logging
# Creates a folder in "location" to store the pictures and the modified files (local link to imgs)
class FolderCreator:
#def __init__(self, location = ".."):
def __init__(self, location = "."):
self.location = location
def create_folder(self, name):
self.name = name
self.folder = self.location + "/" + self.name
if not os.path.exists(self.folder):
os.mkdir(self.folder)
# Write the content ("filedata") of each new modified file with "filename" as name, on the "folder_path"
class FileWritter:
def write_file(self, folder_path, filename, filedata):
self.folder_path = folder_path
self.filename = filename
self.filedata = filedata
with open(self.folder_path + "\\" + self.filename, "w", encoding="utf-8") as file:
file.write(self.filedata)
# Download the images from the links obtained from the markdown files to the "destination folder"
# The user-agent can be specified in order to circunvent some simple potential connection block from the
# sources of the images
class ImgDownloader:
def download_images(self, url_dict, folder_path, user_agent):
self.url_dict = url_dict
self.folder_path = folder_path
self.user_agent = user_agent
for url, name in self.url_dict.items():
opener = urllib.request.build_opener()
opener.addheaders = [('User-agent', self.user_agent)]
urllib.request.install_opener(opener)
save_name = self.folder_path + "\\" + name
try:
urllib.request.urlretrieve(url, save_name)
except Exception as e:
logging.exception(f"Error when downloading {url}")
time.sleep(random.randint(0,2))
# Open and reads the file received and returns the content
class FileOpener:
def open_and_read(self, filename):
self.url_dict = {}
self.filename = filename
try:
with open(os.path.join(os.getcwd(), filename), "r", encoding="utf-8") as self.current_opened_file:
print(f"\nOpened file: {self.filename}")
logging.info(f"Opened file: {self.filename}\n")
return self.current_opened_file.read()
except Exception as e:
logging.exception(f"Error when opening file {self.filename}")
# Find(regex) URL's for images on the received "file_data" and creates a dictionary with the url's for later download as keys
# and a random 10 digit number followed by the images names (something.jpg)
# in order to save the files later and prevent name collisions
class UrlDictCreator:
def create(self, regex, file_data, file_name):
self.file_name = file_name
self.url_dict = {}
self.regex = regex
self.file_data = file_data
try:
# Используем re.finditer для поиска совпадений
matches = re.finditer(self.regex, self.file_data)
#count_name += 1
for match in matches:
url = match.group(0) # Полный URL
file_extension = match.group(1) # Расширение файла
#count = count + 1
# Извлечение имени файла из URL (до последней точки)
file_name_without_ext = url.split('/')[-1].rsplit('.', 1)[0]
print(f"URL: {url}, Имя файла: {file_name_without_ext}, Расширение: .{file_extension}") # Отладочный вывод
file_name = file_name.replace(" ", "_")
new_file_name = f"{file_name[:-3]}_{len(self.url_dict)}.{file_extension}"
#new_file_name.replace(" ", "_")
# Сохранение URL и нового имени файла в словаре
if url not in self.url_dict:
self.url_dict[url] = new_file_name
except Exception as e:
logging.exception("Ошибка при попытке найти URL и добавить их в словарь: %s", e)
print(self.url_dict)
return self.url_dict
# Edit the markdown files, changing the url's links for a new name corresponding to the name of the local file
# images that will be downloaded later
class FileDataEditor:
def edit(self, file_data, url_dict, file_name):
self.file_name = file_name
self.url_dict = url_dict
self.file_data = file_data
for key, value in url_dict.items():
self.file_data = self.file_data.replace(key, value)
print(f"\nreplaced: {key}\nwith {value}\n on file {self.file_name}\n")
logging.info(f"replaced: {key}\nwith: {value}\non file: {self.file_name}\n")
return self.file_data
# Program start:
print("\nStarting..")
# Create new log file
logging.basicConfig(filename='Img_To_Local_Python.log', encoding='utf-8', filemode="w", level=logging.DEBUG)
# Defines the folder to write the new markdown files and the downloaded images
#folder_name = "External_Imgs_to_Local_Files"
#folder_path = os.path.abspath(os.path.join(os.getcwd(),os.pardir) + f"\/{folder_name}\/")
folder_name = "md_images"
folder_path = os.path.abspath(os.getcwd() + f"\/{folder_name}\/")
# Create new folder to receive the downloaded imgs and edited MD files
folder_creator = FolderCreator()
folder_creator.create_folder(folder_name)
logging.info(f"New folder created: {folder_path}\n")
print(f"New folder created: {folder_path}")
logging.info("to receive the imgs and edited markdown files\n")
print("to receive the imgs and edited markdown files\n")
# Regex that will be used to look for url's of images
#regex = r"(?:\(|\[)(?P<url>(?:https?\:(?:\/\/)?)(?:\w|\-|\_|\.|\?|\/)+?\/(?P<end>(?:\w|\-|\_)+\.(?:png|jpg|jpeg|gif|bmp|svg)))(?:\)|\])"
# Regex that will be used to look for url's of images
# Can filter
# ![](https://www.ip.com/img.png)
# ![text](https://www.ip.com/img.png "text")
# ![](https://www.ip.com/img.png.png.png)
regex = r'(?<=\]\()https?:\/\/[^\s"()]+\.(png|jpg|jpeg|gif|bmp|svg)(?=\s*["\)]|$)'
# Loop throught every markdown file on this script folder
for filename in os.listdir(os.getcwd()):
print("\n")
if filename[-3:] != ".md":
# log_file_creator.write(f"{filename} ignored (not '.md')\n")
logging.info(f"Skipped file: {filename}\n")
print(f"Skipped file: {filename}")
continue
# Open and read each file
file_opener = FileOpener()
file_data = file_opener.open_and_read(filename)
# Create a dictionary of images URLs for each file
url_dict_creator = UrlDictCreator()
url_dict = url_dict_creator.create(regex, file_data, filename)
# Edit the read content of each file, replacing the found imgs urls with local file names instead
file_data_editor = FileDataEditor()
edited_file_data = file_data_editor.edit(file_data, url_dict, filename)
# Download the images listed on the dictionary of found urls for each file
images_downloader = ImgDownloader()
#user_agent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1"
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.3"
images_downloader.download_images(url_dict, folder_path, user_agent)
# Write the modified markdown files
if url_dict:
file_name_writter = FileWritter()
file_name_writter.write_file(folder_path,filename, edited_file_data)
print(f"Closed file: {filename}")
logging.info(f"Closed file: {filename}\n")
print("\n\n\nIf everything went OK, you can check your modified markdown")
print("files and the downloaded images on the folder:")
print(f"{folder_path}")
print(f"\nFor more info check the log file on \n{os.getcwd()}\\PythonObsidian.log")
print("\nPress enter to close")
input()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment