-
-
Save lowell80/ade1b19dbd1322dc8eacd5fa332b2ba5 to your computer and use it in GitHub Desktop.
Monitors a Syncthing-synced directory and tries to merge conflicting files (based on https://www.rafa.ee/articles/resolve-syncthing-conflicts-using-three-way-merge/). Probably adaptable for other directory types, but only tested with Logseq (works for me™️).
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This script automatically handles Syncthing conflicts on text files by applying a | |
| # git three-way merge between the previously synced version and each divergent version. | |
| # It depends on the watchdog package and git. | |
| # For automatic dependency installation when running with ´uv run --script deconflicter.py´: | |
| # /// script | |
| # requires-python = ">=3.10" | |
| # dependencies = [ | |
| # "watchdog", | |
| # ] | |
| # /// | |
| # This code is MIT Licensed: | |
| # Copyright 2024 solarkraft | |
| # Permission is hereby granted, free of charge, to any person obtaining a copy | |
| # of this software and associated documentation files (the "Software"), to deal | |
| # in the Software without restriction, including without limitation the rights | |
| # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
| # copies of the Software, and to permit persons to whom the Software is | |
| # furnished to do so, subject to the following conditions: | |
| # The above copyright notice and this permission notice shall be included in all | |
| # copies or substantial portions of the Software. | |
| # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
| # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
| # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
| # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
| # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
| # SOFTWARE. | |
| import os | |
| import time | |
| import re | |
| import hashlib | |
| import subprocess | |
| from pathlib import Path | |
| from watchdog.observers import Observer as FileSystemObserver | |
| from watchdog.events import FileSystemEventHandler | |
| # Starting here | |
| # TODO: Add support for blocking certain portions files (regex match). Example Obsidian (plugin) updates the 'updated' field in the frontmatter (yaml); then just ignore that. (Right now I get two 'updated: ' lines, with different dates, but that causes a YAML syntax error) | |
| # TODO: Add a global file pattern include/exclude lists. For example, don't try to automatically merge .json files. Or don't try to merge anything in the `.obsidian` folder, for example. | |
| # TODO: Fix up ancestor finding support (pick latest date, NOT after the timestamp embedded in the sync conflict file name) | |
| # TODO: Do we need any special handling for the fact that my ST somehow spits out 2 conflicts at once (Is that just me? ST bug/glitch between versions I'm running??? One file has local time, one has UTC time); but the hash of the 2 files are identical; no need to process *both* of these. Maybe a worker thread and collector thread? Maybe this isn't an issue for the current design at all? Maybe there are too many words in this todo!? | |
| # Improve design | |
| # TODO: Oh yeah, add a CLI. | |
| # TODO: Check for the .stversions directory at the startup, and for the presence of the git command, as there's no point to running without this. | |
| # TODO: Allow this script to run in real-time 'watchdog' mode; or via a scheduled job (run-once) | |
| # TODO: Use pathlib; and make this more OS agnostic. (Not sure how much I care about making it work on Windows; but I generally try) | |
| # TODO: use logging, not print() | |
| # Bigger things... | |
| # TODO: Allow it use "GIT" history as the actual basis for the common ancestor (if the file in question lives in a GIT repo, of course.) Even still, there could be some other smarter "merge" options we could consider. | |
| # TODO: Allow pulling history from .zfs/snapshots; (Won't work inside of a container) | |
| # TODO: Add better auditing and "undo(ish)" support. For example, we should store a copy of the unmerged file BEFORE overwriting it (in the merge goes wrong. | |
| # I've seen examples where entire blocks of text have been repeated unexpectedly; possibly because of a poorly chosen ancestor.) There should be CLI options for this. | |
| # Is there any value is checking for the `.stfolder` at startup to make sure we're running from the correct location? (Does it hurt if we don't? Maybe just at warning to the user...) | |
| # Because you can change this in the UI/config: | |
| STVERSIONS_DIR = ".stversions" | |
| MAX_HASH_HISTORY = 100 | |
| hash_history = [] | |
| def get_hash(path: Path) -> str: | |
| path = Path(path) | |
| h = hashlib.new("sha256") | |
| h.update(path.read_bytes()) | |
| return h.hexdigest() | |
| def get_relative_path(path): | |
| return os.path.relpath(path) | |
| def merge_files(original, backup, conflict): | |
| command = ["git", "merge-file", "--union", original, backup, conflict] | |
| print("Performing three way merge with git command:") | |
| print(" ".join(command)) | |
| exitcode = subprocess.call(command, cwd=os.getcwd()) | |
| if exitcode != 0: | |
| # Should we abort or just try again on the next change? If this is running as a background process, this could be quite unexpected. Caller should check return status. | |
| raise RuntimeError("Git command failed!") | |
| def merge_if_applicable(src_path): | |
| """Perform a three way merge on and remove a given possible syncthing conflict file if: | |
| - It is an actual conflict file (determined by naming scheme) | |
| - The associated canonical file exists ("real" file path) | |
| - A backup file in .stversions exists | |
| """ | |
| global hash_history | |
| if not os.path.isfile(src_path): | |
| # print(src_path, "is not a file") | |
| return | |
| if f"/{STVERSIONS_DIR}/" in src_path: | |
| # Ignore any activity in the .stversion folder. | |
| # I've rarely found sync-conflict in my .stversion folder | |
| # | |
| # find ../.stversions \( -name '*.sync-conflict*' -o -name '.syncthing.*' \) | wc -l | |
| # 5 | |
| return | |
| candidate_file_path = get_relative_path(src_path) | |
| match = re.search( | |
| # . is converted to %2F when a conflict file is opened in Logseq | |
| r"^(.*?)(?:\.|%2F)sync-conflict-([0-9]{8})-([0-9]{6})-(.{7})\.?([^.]*)$", | |
| candidate_file_path, | |
| ) | |
| if match is None: | |
| # The file is not a syncthing conflict file | |
| # print(candidate_file_path, "is not a conflict file") | |
| return | |
| if not src_path.endswith(".md"): | |
| print("Refusing to attempt to merge NON *.md files....") | |
| # Specifically .json files don't merge well... | |
| return | |
| conflict_file_hash = get_hash(candidate_file_path) | |
| conflict_file_path = candidate_file_path | |
| print() # Make run easier to recognize | |
| print(f"Conflict file found: {conflict_file_path} {conflict_file_hash[:12]}") | |
| # print(x.groups()) | |
| conflict_file_name = match.group(1) | |
| conflict_file_date = match.group(2) | |
| conflict_file_time = match.group(3) | |
| conflict_file_id = match.group(4) | |
| conflict_file_extension = match.group(5) | |
| # print(conflict_file_path, conflict_file_date, conflict_file_time, conflict_file_id, conflict_file_extension) | |
| original_file_path = conflict_file_name + "." + conflict_file_extension | |
| # Check fix history. Sometimes syncthing emits the same conflict with 2 | |
| # files one for local, one with UTC time. IDK why, but let's just clean | |
| # it up. Avoid doubling up change (dup content) | |
| hash_key = (original_file_path, conflict_file_hash) | |
| if hash_key in hash_history: | |
| print(f"Deleting conflict file due to fix history. {hash_key}") | |
| os.remove(os.path.join(os.getcwd(), conflict_file_path)) | |
| return | |
| # HACK: Give Syncthing some time to move the tmpfile (.syncthing.MyFileName) to its real location | |
| time.sleep(0.1) | |
| if not os.path.isfile(original_file_path): | |
| print("... but original file", original_file_path, "doesn't exist") | |
| # Here we may be too early to leave before Syncthing has moved its tempfile to the real location | |
| # .syncthing.Testseite.md.tmp | |
| # print("... what about the Syncthing tempfile?") | |
| # p = list(os.path.split(original_file_path)) | |
| # tmpfile_name = ".syncthing." + p.pop() + ".tmp" | |
| # print("name:", tmpfile_name, "path:", p) | |
| return | |
| print("For original file:", original_file_path) | |
| backup_file_regex_string = ( | |
| STVERSIONS_DIR + "/" | |
| + conflict_file_name | |
| + r"~([0-9]{8})-([0-9]{6})\." | |
| + conflict_file_extension | |
| ) | |
| backup_file_regex = re.compile(backup_file_regex_string) | |
| backup_files = [] | |
| for dirpath, _, files in os.walk(os.path.join(os.getcwd(), STVERSIONS_DIR)): | |
| for file in files: | |
| candidate_path = str(os.path.join(get_relative_path(dirpath), file)) | |
| # print("Test:", candidate) | |
| match = backup_file_regex.match(candidate_path) | |
| if match: | |
| backup_file_date = match.group(1) | |
| backup_file_time = match.group(2) | |
| # print("Matched:", candidate_path, backup_file_date, backup_file_time) | |
| backup_files.append(candidate_path) | |
| # Hmmm. No checking to see which files is most recent or closest to (but not _older_ than) the sync conflict??? | |
| # Apparently this just relies on os.listdir() sort order, which is OS, and FS specific....? whoops | |
| # Hacky. Good enough for now. It should work based on text sort order | |
| backup_files.sort(reverse=True) | |
| if len(backup_files) == 0: | |
| print( | |
| f"No backup file candidates were found by pattern {backup_file_regex_string}. There isn't enough data for a three way merge." | |
| ) | |
| print("This may be due to custom versioning settings - try simple versioning.") | |
| # (TODO): We can still merge the 2 files here. This will increase compatiblility with other versioning schemes | |
| return | |
| # print("Backup files:", backup_files) | |
| # We want the latest backup file, which is the first in the list (??? maybe they are sorted differently) | |
| backup_file = backup_files[0] | |
| print("Latest backup file:", backup_file) | |
| merge_files(original_file_path, backup_file, conflict_file_path) | |
| # TODO: This really _should_ be done on a temp file for file safety and to reduce race conditions | |
| fixup_obsidian_frontmatter(original_file_path) | |
| print("Deleting conflict file") | |
| os.remove(os.path.join(os.getcwd(), conflict_file_path)) | |
| hash_history.insert(0, hash_key) | |
| if len(hash_history) > MAX_HASH_HISTORY: | |
| hash_history = hash_history[:MAX_HASH_HISTORY] | |
| print("Deconfliction done!") | |
| print() | |
| class FileChangeHandler(FileSystemEventHandler): | |
| # To support manually "touch"ing a file to get the script to handle it | |
| @staticmethod | |
| def on_modified(event): | |
| # print(f"A file was modified: {event}") | |
| merge_if_applicable(event.src_path) | |
| # This is how Syncthing creates the conflict files | |
| @staticmethod | |
| def on_moved(event): | |
| # print("A file was moved, may have been syncthing") | |
| # print(event) # Syncthing does some moving-around business | |
| merge_if_applicable(event.dest_path) | |
| def read_obsidian_md(path, divider="---"): | |
| frontmatter = [] | |
| body = [] | |
| with open(path, "r") as f: | |
| line = next(f).rstrip() | |
| if line == divider: | |
| print("Looking for frontmatter") | |
| while True: | |
| frontmatter.append(line) | |
| line = next(f).rstrip() | |
| if line == divider: | |
| frontmatter.append(line) | |
| break | |
| for line in f: | |
| body.append(line.rstrip()) | |
| return frontmatter, body | |
| def fix_frontmatter(frontmatter): | |
| """ | |
| Identify duplicate keys. Use a simple sort order to keep the "highest" value and discard all other values. | |
| A general attempt is made to NOT change anything if there's nothing to update, but trailing spaces and such may be be removed. | |
| NOTE: This does NOT work for anything other than very simple (single line) YAML keys. | |
| """ | |
| simple_yaml_value_re = re.compile(r'^([a-zA-Z_][a-zA-Z0-9]*)\s*:\s*(.*)$') | |
| temp_output = [] | |
| known_kv: dict[str, str] = {} | |
| replace_kv: dict[str, str] = {} | |
| for line in frontmatter: | |
| match = simple_yaml_value_re.match(line) | |
| if match: | |
| key, value = match.groups() | |
| if key in known_kv: | |
| prev_value = known_kv[key] | |
| new_value = max(prev_value, value) | |
| if new_value != value: | |
| # TODO: Don't log for unit tests... | |
| print(f"Replacing frontmatter key: {key} = '{new_value}' (was '{prev_value}')") | |
| else: | |
| print(f"Replacing frontmatter key: {key} = '{new_value}' (alternate '{value}')") | |
| line = None | |
| replace_kv[key] = value | |
| known_kv[key] = value | |
| if line is not None: | |
| temp_output.append(line) | |
| output = [] | |
| for line in temp_output: | |
| match = simple_yaml_value_re.match(line) | |
| if match: | |
| key, value = match.groups() | |
| if key in replace_kv: | |
| line = f"{key}: {replace_kv[key]}" | |
| output.append(line) | |
| return output | |
| # Lazy unit test | |
| assert fix_frontmatter("""\ | |
| ---\ | |
| updated: 2025-05-04T18:00:14-04:00\ | |
| updated: 2025-05-04T18:01:43-04:00\ | |
| created: 2025-05-04T17:46:38-04:00\ | |
| ---\n""".splitlines(keepends=True)) != [ | |
| "---", | |
| "updated: 2025-05-04T18:01:43-04:00", | |
| "created: 2025-05-04T17:46:38-04:00", | |
| "---"] | |
| def fixup_obsidian_frontmatter(path: str): | |
| if not path.endswith(".md"): | |
| return | |
| frontmatter, body = read_obsidian_md(path) | |
| if not frontmatter: | |
| # Nothing to fix. We're done | |
| return | |
| new_frontmatter = fix_frontmatter(frontmatter) | |
| if frontmatter == new_frontmatter: | |
| # No fixes needed. Exit now | |
| return | |
| print(f"Front matter changes for {path}. Updating") | |
| with open(path, "w") as f: | |
| for line in new_frontmatter + body: | |
| f.write(line) | |
| f.write("\n") | |
| print(f"Successfully overwrote {path}") | |
| if __name__ == "__main__": | |
| print("Running Syncthing deconflicter") | |
| # timeout=10 prevents events being lost on macOS | |
| observer = FileSystemObserver(timeout=10) | |
| event_handler = FileChangeHandler() | |
| path = "." | |
| # From quickstart | |
| observer.schedule(event_handler, path, recursive=True) | |
| observer.start() | |
| try: | |
| while observer.is_alive(): | |
| observer.join(1) | |
| finally: | |
| observer.stop() | |
| observer.join() | |
| print("Stopped Syncthing deconflicter") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment