|
#!/usr/bin/env python3 |
|
""" |
|
GitLab to Forgejo Attachment Migration Script |
|
|
|
This script downloads attachments from GitLab issues and uploads them to |
|
corresponding Forgejo issues, updating the URLs in the process. |
|
|
|
Dependencies: |
|
pip install python-gitlab pyforgejo requests |
|
""" |
|
|
|
import os |
|
import re |
|
import sys |
|
import logging |
|
import requests |
|
from pathlib import Path |
|
from urllib.parse import urlparse, urljoin |
|
from typing import Dict, List, Tuple, Optional |
|
import tempfile |
|
import mimetypes |
|
|
|
import gitlab |
|
from pyforgejo import PyforgejoApi |
|
|
|
# Configure logging |
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class GitLabToForgejoAttachmentMigrator: |
|
"""Migrates attachments from GitLab issues to Forgejo issues.""" |
|
|
|
def __init__(self, gitlab_url: str, gitlab_token: str, |
|
forgejo_url: str, forgejo_token: str): |
|
""" |
|
Initialize the migrator. |
|
|
|
Args: |
|
gitlab_url: GitLab instance URL (e.g., 'https://gitlab.com') |
|
gitlab_token: GitLab API token |
|
forgejo_url: Forgejo instance URL (e.g., 'https://codeberg.org') |
|
forgejo_token: Forgejo API token |
|
""" |
|
self.gitlab_url = gitlab_url.rstrip('/') |
|
self.gitlab_token = gitlab_token |
|
self.forgejo_url = forgejo_url.rstrip('/') |
|
self.forgejo_token = forgejo_token |
|
|
|
# Initialize GitLab client |
|
self.gitlab = gitlab.Gitlab(gitlab_url, private_token=gitlab_token) |
|
|
|
# Initialize Forgejo client (using direct API since pyforgejo might need configuration) |
|
self.session = requests.Session() |
|
self.session.headers.update({ |
|
'Authorization': f'token {forgejo_token}', |
|
'Content-Type': 'application/json' |
|
}) |
|
|
|
# Pattern to match GitLab upload URLs |
|
self.upload_pattern = re.compile(r'(/uploads/[a-f0-9]{32}/[^)\s]+)') |
|
|
|
def get_gitlab_project(self, project_id: str) -> gitlab.v4.objects.Project: |
|
"""Get GitLab project by ID or path.""" |
|
try: |
|
return self.gitlab.projects.get(project_id) |
|
except gitlab.exceptions.GitlabGetError: |
|
logger.error(f"Could not find GitLab project: {project_id}") |
|
raise |
|
|
|
def get_project_issues(self, project: gitlab.v4.objects.Project) -> List: |
|
"""Get all issues from a GitLab project.""" |
|
try: |
|
issues = project.issues.list(all=True, state='all') |
|
logger.info(f"Found {len(issues)} issues in project {project.name}") |
|
return issues |
|
except gitlab.exceptions.GitlabGetError as e: |
|
logger.error(f"Error fetching issues: {e}") |
|
return [] |
|
|
|
def find_attachments_in_text(self, text: str) -> List[str]: |
|
""" |
|
Find all GitLab upload URLs in text content. |
|
|
|
Args: |
|
text: Issue description or comment text |
|
|
|
Returns: |
|
List of upload URLs found in the text |
|
""" |
|
if not text: |
|
return [] |
|
|
|
matches = self.upload_pattern.findall(text) |
|
return matches |
|
|
|
def download_attachment(self, project: gitlab.v4.objects.Project, |
|
upload_path: str) -> Tuple[Optional[bytes], Optional[str]]: |
|
""" |
|
Download an attachment from GitLab. |
|
|
|
Args: |
|
project: GitLab project object |
|
upload_path: Upload path (e.g., '/uploads/abc123.../file.png') |
|
|
|
Returns: |
|
Tuple of (file_content, filename) or (None, None) if failed |
|
""" |
|
try: |
|
# Construct full URL using GitLab's project ID format |
|
full_url = f"{self.gitlab_url}/-/project/{project.id}{upload_path}" |
|
|
|
logger.info(f"Attempting to download: {full_url}") |
|
|
|
# Download with authentication |
|
headers = {'Private-Token': self.gitlab_token} |
|
response = requests.get(full_url, headers=headers, stream=True) |
|
|
|
if response.status_code == 200: |
|
filename = os.path.basename(upload_path) |
|
logger.info(f"✅ Downloaded attachment: {filename}") |
|
return response.content, filename |
|
else: |
|
logger.warning(f"❌ Failed to download {upload_path}: HTTP {response.status_code}") |
|
logger.warning(f" Full URL attempted: {full_url}") |
|
return None, None |
|
|
|
except Exception as e: |
|
logger.error(f"Error downloading {upload_path}: {e}") |
|
return None, None |
|
|
|
def upload_to_forgejo(self, forgejo_owner: str, forgejo_repo: str, |
|
file_content: bytes, filename: str, issue_number: int) -> Optional[str]: |
|
""" |
|
Upload a file to Forgejo and return the new URL. |
|
|
|
Args: |
|
forgejo_owner: Forgejo repository owner |
|
forgejo_repo: Forgejo repository name |
|
file_content: File content as bytes |
|
filename: Original filename |
|
issue_number: Forgejo issue number to attach to |
|
|
|
Returns: |
|
New Forgejo URL or None if failed |
|
""" |
|
try: |
|
# Prepare the upload - try different parameter names that Forgejo might expect |
|
files = { |
|
'attachment': (filename, file_content, self._get_content_type(filename)) |
|
} |
|
|
|
# Upload to Forgejo (using the issue assets API endpoint) |
|
upload_url = f"{self.forgejo_url}/api/v1/repos/{forgejo_owner}/{forgejo_repo}/issues/{issue_number}/assets" |
|
|
|
logger.info(f"Attempting to upload to: {upload_url}") |
|
|
|
# Create clean session for file upload (no JSON content-type) |
|
upload_session = requests.Session() |
|
upload_session.headers.update({ |
|
'Authorization': f'token {self.forgejo_token}' |
|
}) |
|
|
|
# Log detailed request info |
|
logger.info(f"Request details:") |
|
logger.info(f" - Method: POST") |
|
logger.info(f" - URL: {upload_url}") |
|
logger.info(f" - Headers: {upload_session.headers}") |
|
logger.info(f" - File size: {len(file_content)} bytes") |
|
logger.info(f" - Content-Type: {self._get_content_type(filename)}") |
|
|
|
response = upload_session.post(upload_url, files=files) |
|
|
|
# Log detailed response info |
|
logger.info(f"Response details:") |
|
logger.info(f" - Status: {response.status_code}") |
|
logger.info(f" - Headers: {dict(response.headers)}") |
|
logger.info(f" - Body: {response.text}") |
|
|
|
if response.status_code == 201: |
|
upload_data = response.json() |
|
new_url = upload_data.get('browser_download_url') or upload_data.get('download_url') |
|
logger.info(f"✅ Uploaded {filename} to Forgejo: {new_url}") |
|
return new_url |
|
else: |
|
logger.error(f"❌ Upload failed for {filename}:") |
|
logger.error(f" Status: {response.status_code}") |
|
logger.error(f" Response: {response.text}") |
|
logger.error(f" Headers: {dict(response.headers)}") |
|
return None |
|
|
|
except Exception as e: |
|
logger.error(f"Error uploading {filename} to Forgejo: {e}") |
|
return None |
|
|
|
def _get_content_type(self, filename: str) -> str: |
|
"""Get MIME type for a filename.""" |
|
content_type, _ = mimetypes.guess_type(filename) |
|
return content_type or 'application/octet-stream' |
|
|
|
def update_forgejo_issue(self, forgejo_owner: str, forgejo_repo: str, |
|
issue_number: int, new_content: str) -> bool: |
|
""" |
|
Update a Forgejo issue with new content. |
|
|
|
Args: |
|
forgejo_owner: Forgejo repository owner |
|
forgejo_repo: Forgejo repository name |
|
issue_number: Issue number |
|
new_content: Updated content with new URLs |
|
|
|
Returns: |
|
True if successful, False otherwise |
|
""" |
|
try: |
|
update_url = f"{self.forgejo_url}/api/v1/repos/{forgejo_owner}/{forgejo_repo}/issues/{issue_number}" |
|
|
|
data = {'body': new_content} |
|
response = self.session.patch(update_url, json=data) |
|
|
|
if response.status_code == 201: |
|
logger.info(f"Updated Forgejo issue #{issue_number}") |
|
return True |
|
else: |
|
logger.error(f"Failed to update issue #{issue_number}: {response.status_code} - {response.text}") |
|
return False |
|
|
|
except Exception as e: |
|
logger.error(f"Error updating Forgejo issue #{issue_number}: {e}") |
|
return False |
|
|
|
def migrate_issue_attachments(self, gitlab_project_id: str, |
|
forgejo_owner: str, forgejo_repo: str, |
|
issue_mapping: Optional[Dict[int, int]] = None) -> Dict[str, int]: |
|
""" |
|
Migrate attachments for all issues from GitLab to Forgejo. |
|
|
|
Args: |
|
gitlab_project_id: GitLab project ID or path |
|
forgejo_owner: Forgejo repository owner |
|
forgejo_repo: Forgejo repository name |
|
issue_mapping: Optional mapping of GitLab issue IID to Forgejo issue number |
|
If not provided, assumes 1:1 mapping |
|
|
|
Returns: |
|
Dictionary with migration statistics |
|
""" |
|
stats = { |
|
'issues_processed': 0, |
|
'attachments_migrated': 0, |
|
'attachments_failed': 0, |
|
'issues_updated': 0 |
|
} |
|
|
|
# Get GitLab project |
|
try: |
|
project = self.get_gitlab_project(gitlab_project_id) |
|
except Exception: |
|
return stats |
|
|
|
# Get all issues |
|
issues = self.get_project_issues(project) |
|
|
|
for issue in issues: |
|
stats['issues_processed'] += 1 |
|
logger.info(f"Processing GitLab issue #{issue.iid}: {issue.title}") |
|
|
|
# Find attachments in issue description |
|
attachments = self.find_attachments_in_text(issue.description) |
|
|
|
# Also check comments for attachments |
|
try: |
|
notes = issue.notes.list(all=True) |
|
for note in notes: |
|
attachments.extend(self.find_attachments_in_text(note.body)) |
|
except Exception as e: |
|
logger.warning(f"Could not fetch comments for issue #{issue.iid}: {e}") |
|
|
|
if not attachments: |
|
logger.info(f"No attachments found in issue #{issue.iid}") |
|
continue |
|
|
|
logger.info(f"Found {len(attachments)} attachments in issue #{issue.iid}") |
|
|
|
# Determine Forgejo issue number upfront |
|
forgejo_issue_num = issue_mapping.get(issue.iid, issue.iid) if issue_mapping else issue.iid |
|
|
|
# Process each attachment |
|
url_mapping = {} |
|
for attachment_path in attachments: |
|
file_content, filename = self.download_attachment(project, attachment_path) |
|
|
|
if file_content and filename: |
|
new_url = self.upload_to_forgejo(forgejo_owner, forgejo_repo, |
|
file_content, filename, forgejo_issue_num) |
|
if new_url: |
|
url_mapping[attachment_path] = new_url |
|
stats['attachments_migrated'] += 1 |
|
else: |
|
stats['attachments_failed'] += 1 |
|
else: |
|
stats['attachments_failed'] += 1 |
|
|
|
# Update Forgejo issue if we have URL mappings |
|
if url_mapping: |
|
# Replace URLs in issue description |
|
new_description = issue.description |
|
for old_url, new_url in url_mapping.items(): |
|
new_description = new_description.replace(old_url, new_url) |
|
|
|
# Update the Forgejo issue |
|
if self.update_forgejo_issue(forgejo_owner, forgejo_repo, |
|
forgejo_issue_num, new_description): |
|
stats['issues_updated'] += 1 |
|
|
|
return stats |
|
|
|
|
|
def main(): |
|
"""Main entry point for the script.""" |
|
# Configuration - you can modify these or use environment variables |
|
GITLAB_URL = os.getenv('GITLAB_URL', 'https://gitlab.com') |
|
GITLAB_TOKEN = os.getenv('GITLAB_TOKEN') |
|
FORGEJO_URL = os.getenv('FORGEJO_URL', 'https://codeberg.org') |
|
FORGEJO_TOKEN = os.getenv('FORGEJO_TOKEN') |
|
|
|
# Project configuration |
|
GITLAB_PROJECT_ID = os.getenv('GITLAB_PROJECT_ID') # e.g., 'group/project' or '12345' |
|
FORGEJO_OWNER = os.getenv('FORGEJO_OWNER') # e.g., 'username' |
|
FORGEJO_REPO = os.getenv('FORGEJO_REPO') # e.g., 'repo-name' |
|
|
|
# Validate required environment variables |
|
required_vars = [ |
|
('GITLAB_TOKEN', GITLAB_TOKEN), |
|
('FORGEJO_TOKEN', FORGEJO_TOKEN), |
|
('GITLAB_PROJECT_ID', GITLAB_PROJECT_ID), |
|
('FORGEJO_OWNER', FORGEJO_OWNER), |
|
('FORGEJO_REPO', FORGEJO_REPO) |
|
] |
|
|
|
missing_vars = [var_name for var_name, var_value in required_vars if not var_value] |
|
|
|
if missing_vars: |
|
logger.error(f"Missing required environment variables: {', '.join(missing_vars)}") |
|
logger.info("Set the following environment variables:") |
|
logger.info("GITLAB_TOKEN - GitLab API token") |
|
logger.info("FORGEJO_TOKEN - Forgejo API token") |
|
logger.info("GITLAB_PROJECT_ID - GitLab project ID or path (e.g., 'group/project')") |
|
logger.info("FORGEJO_OWNER - Forgejo repository owner") |
|
logger.info("FORGEJO_REPO - Forgejo repository name") |
|
logger.info("Optional: GITLAB_URL, FORGEJO_URL") |
|
sys.exit(1) |
|
|
|
# Create migrator |
|
migrator = GitLabToForgejoAttachmentMigrator( |
|
gitlab_url=GITLAB_URL, |
|
gitlab_token=GITLAB_TOKEN, |
|
forgejo_url=FORGEJO_URL, |
|
forgejo_token=FORGEJO_TOKEN |
|
) |
|
|
|
logger.info("Starting GitLab to Forgejo attachment migration...") |
|
logger.info(f"GitLab: {GITLAB_URL}/{GITLAB_PROJECT_ID}") |
|
logger.info(f"Forgejo: {FORGEJO_URL}/{FORGEJO_OWNER}/{FORGEJO_REPO}") |
|
|
|
# Run migration |
|
try: |
|
stats = migrator.migrate_issue_attachments( |
|
gitlab_project_id=GITLAB_PROJECT_ID, |
|
forgejo_owner=FORGEJO_OWNER, |
|
forgejo_repo=FORGEJO_REPO |
|
) |
|
|
|
# Print results |
|
logger.info("Migration completed!") |
|
logger.info(f"Issues processed: {stats['issues_processed']}") |
|
logger.info(f"Attachments migrated: {stats['attachments_migrated']}") |
|
logger.info(f"Attachments failed: {stats['attachments_failed']}") |
|
logger.info(f"Issues updated: {stats['issues_updated']}") |
|
|
|
except KeyboardInterrupt: |
|
logger.info("Migration interrupted by user") |
|
sys.exit(1) |
|
except Exception as e: |
|
logger.error(f"Migration failed: {e}") |
|
sys.exit(1) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |