Last active
September 19, 2025 10:41
-
-
Save powmod/b6a2d9640ee98265d92c4b8206f2451e to your computer and use it in GitHub Desktop.
reMarkable to Zotero Sync
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Configuration - Update these values if needed | |
| const ZOTERO_API_KEY = ''; | |
| const ZOTERO_USER_ID = ''; | |
| const GMAIL_LABEL = 'reMarkable/Processed'; | |
| const SEARCH_QUERY = 'to:mygmail+zotero@gmail.com from:my@remarkable.com has:attachment'; | |
| const WEBDAV_URL = ''; | |
| const WEBDAV_USERNAME = ''; | |
| const WEBDAV_PASSWORD = ''; | |
| const SEND_FAILURE_NOTIFICATIONS = true; // Set to false to disable email notifications | |
| // Tags to add to items after processing annotated PDFs | |
| // Modify this array to add your own custom tags | |
| const TAGS_TO_ADD = [ | |
| 'annotated', // Indicates the item has been annotated | |
| 'remarkable', // Shows it was processed through reMarkable | |
| 'read' // Marks the item as read | |
| // Add more tags here as needed, e.g.: | |
| // 'to-review', | |
| // '2025', | |
| // 'important' | |
| ]; | |
| function processRemarkableEmails() { | |
| try { | |
| const threads = GmailApp.search(SEARCH_QUERY + ' -label:' + GMAIL_LABEL.replace('/', '-')); | |
| if (threads.length > 0) { | |
| Logger.log(`Found ${threads.length} unprocessed emails.`); | |
| } | |
| threads.forEach(thread => { | |
| const messages = thread.getMessages(); | |
| messages.forEach(message => { | |
| // Extract the email body to look for Zotero item key | |
| const emailBody = message.getPlainBody() || message.getBody(); | |
| const emailItemKey = extractItemKeyFromEmailBody(emailBody); | |
| const attachments = message.getAttachments(); | |
| attachments.forEach(attachment => { | |
| if (attachment.getContentType() === 'application/pdf') { | |
| const fileName = attachment.getName(); | |
| const pdfBlob = attachment.copyBlob(); | |
| Logger.log(`Processing PDF: ${fileName}`); | |
| let itemKey = null; | |
| let metadata = null; | |
| // Priority 1: Check if item key was found in email body | |
| if (emailItemKey) { | |
| Logger.log(`Using item key from email body: ${emailItemKey}`); | |
| itemKey = emailItemKey.toUpperCase(); | |
| } | |
| // Priority 2: Try to extract from filename | |
| else { | |
| itemKey = extractItemKey(fileName); | |
| if (itemKey) { | |
| Logger.log(`Using item key from filename: ${itemKey}`); | |
| } | |
| } | |
| // Priority 3: Try to find by metadata if no key was found | |
| if (!itemKey) { | |
| metadata = extractMetadata(fileName); | |
| Logger.log(`No item key found in email or filename. Searching by metadata: ${JSON.stringify(metadata)}`); | |
| itemKey = findZoteroItem(metadata, fileName); | |
| if (itemKey) { | |
| Logger.log(`Found item by metadata search: ${itemKey}`); | |
| } | |
| } | |
| // Upload if we found an item key, or send failure notification | |
| if (itemKey) { | |
| const success = uploadToZotero(itemKey, pdfBlob, fileName); | |
| if (success) { | |
| Logger.log(`Successfully uploaded ${fileName} to Zotero item ${itemKey}.`); | |
| markAsProcessed(thread); | |
| } else { | |
| Logger.log(`Failed to upload ${fileName} to item ${itemKey}.`); | |
| // Send notification about upload failure (different from detection failure) | |
| sendFailureNotification(fileName, metadata || extractMetadata(fileName), message); | |
| // Still mark as processed to avoid repeated notifications | |
| markAsProcessed(thread); | |
| } | |
| } else { | |
| Logger.log(`Could not find matching item in Zotero for ${fileName}. Sending failure notification.`); | |
| // Send notification about detection failure | |
| sendFailureNotification(fileName, metadata || extractMetadata(fileName), message); | |
| // Mark as processed to avoid sending multiple notifications | |
| markAsProcessed(thread); | |
| } | |
| } | |
| }); | |
| }); | |
| }); | |
| } catch (error) { | |
| Logger.log(`Error processing emails: ${error.toString()}`); | |
| } | |
| } | |
| /** | |
| * Extracts a Zotero item key from the email body. | |
| * Looks for an 8-character alphanumeric key in the email text. | |
| * @param {string} emailBody - The plain text or HTML body of the email | |
| * @returns {string|null} The Zotero item key if found, null otherwise | |
| */ | |
| function extractItemKeyFromEmailBody(emailBody) { | |
| if (!emailBody) return null; | |
| // First check if there's an 8-character key at the very beginning (before reMarkable signature) | |
| // This handles the case where user just types the key and sends | |
| // The key might be followed directly by "--" without a space | |
| const startMatch = emailBody.match(/^([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/); | |
| if (startMatch) { | |
| const potentialKey = startMatch[1]; | |
| if (isValidZoteroKey(potentialKey)) { | |
| Logger.log(`Found Zotero key at start of email body: ${potentialKey}`); | |
| return potentialKey; // Return in original case | |
| } | |
| } | |
| // Zotero keys are 8 characters, letters and numbers | |
| // Look for standalone 8-character codes that match this pattern | |
| const patterns = [ | |
| // Standalone 8-character key (with word boundaries or special chars) - case-insensitive | |
| // This pattern looks for keys surrounded by non-alphanumeric characters | |
| /(?:^|[^A-Za-z0-9])([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/g, | |
| // Key with common prefixes/labels | |
| /zotero[:\s]+([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/gi, | |
| /key[:\s]+([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/gi, | |
| /id[:\s]+([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/gi, | |
| /item[:\s]+([A-Za-z0-9]{8})(?:[^A-Za-z0-9]|$)/gi, | |
| ]; | |
| // Try each pattern | |
| for (const pattern of patterns) { | |
| const matches = emailBody.matchAll(pattern); | |
| for (const match of matches) { | |
| const potentialKey = match[1]; | |
| // Skip if this is part of the reMarkable signature URLs | |
| if (emailBody.includes('remarkable.com') && | |
| emailBody.indexOf(match[0]) > emailBody.indexOf('remarkable.com')) { | |
| continue; | |
| } | |
| // Validate that it looks like a Zotero key | |
| if (isValidZoteroKey(potentialKey)) { | |
| Logger.log(`Found Zotero key in email body: ${potentialKey}`); | |
| return potentialKey; // Return in original case | |
| } | |
| } | |
| } | |
| return null; | |
| } | |
| /** | |
| * Validates if a string looks like a valid Zotero item key. | |
| * Zotero keys are 8 characters with letters and/or numbers. | |
| * Note: Real Zotero keys are always uppercase, but we accept mixed case and convert. | |
| * @param {string} key - The potential key to validate | |
| * @returns {boolean} True if it appears to be a valid Zotero key | |
| */ | |
| function isValidZoteroKey(key) { | |
| // Must be exactly 8 characters | |
| if (key.length !== 8) return false; | |
| // Must only contain letters and numbers (case-insensitive) | |
| if (!/^[A-Za-z0-9]{8}$/.test(key)) return false; | |
| // Zotero keys can have any combination of letters and numbers | |
| // Just exclude obvious non-keys like dates or all numbers | |
| const allNumbers = /^[0-9]{8}$/.test(key); | |
| const looksLikeDate = /^20[0-9]{6}$/.test(key); // Dates starting with 20 | |
| // Allow all-letter keys (they exist in Zotero) | |
| // Just exclude all-number keys and dates | |
| return !allNumbers && !looksLikeDate; | |
| } | |
| /** | |
| * Extracts a Zotero item key directly from a filename. | |
| */ | |
| function extractItemKey(fileName) { | |
| const patterns = [ | |
| /\[([A-Z0-9]{8})\]/, // [ABCD1234] format | |
| /_([A-Z0-9]{8})\.pdf$/, // _ABCD1234.pdf format | |
| /zotero_([A-Z0-9]{8})/i, // zotero_ABCD1234 format | |
| /key_([A-Z0-9]{8})/i, // key_ABCD1234 format | |
| ]; | |
| for (const pattern of patterns) { | |
| const match = fileName.match(pattern); | |
| if (match) { | |
| return match[1]; | |
| } | |
| } | |
| return null; | |
| } | |
| const NOTIFICATION_EMAIL = null; // Set to specific email address to override | |
| /** | |
| * Sends an email notification when automatic detection fails | |
| * @param {string} pdfFileName - Name of the PDF that failed | |
| * @param {Object} metadata - The extracted metadata that was used in the search | |
| * @param {GmailMessage} originalMessage - The original message for threading | |
| */ | |
| function sendFailureNotification(pdfFileName, metadata, originalMessage) { | |
| // Check if notifications are enabled | |
| if (!SEND_FAILURE_NOTIFICATIONS) { | |
| Logger.log(`Failure notifications disabled. Would have sent notification for: ${pdfFileName}`); | |
| return; | |
| } | |
| try { | |
| // Get the recipient email address (where the reMarkable email was sent to) | |
| let recipientEmail = null; | |
| // First check if there's a configured override email | |
| if (NOTIFICATION_EMAIL) { | |
| recipientEmail = NOTIFICATION_EMAIL; | |
| Logger.log(`Using configured notification email: ${recipientEmail}`); | |
| } | |
| // Otherwise try to extract from the original message | |
| else if (originalMessage) { | |
| // Extract the "To" address from the original message | |
| const toField = originalMessage.getTo(); | |
| if (toField) { | |
| // Extract email from potential format "Name <email@domain.com>" or just "email@domain.com" | |
| const emailMatch = toField.match(/([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})/); | |
| if (emailMatch) { | |
| recipientEmail = emailMatch[1]; | |
| } | |
| } | |
| } | |
| // Fallback to the active user's email if we couldn't extract the recipient | |
| if (!recipientEmail) { | |
| recipientEmail = Session.getActiveUser().getEmail(); | |
| } | |
| Logger.log(`Sending failure notification to: ${recipientEmail}`); | |
| // Create a user-friendly metadata summary | |
| let metadataInfo = ''; | |
| if (metadata.authors && metadata.authors.length > 0) { | |
| metadataInfo += ` • Authors: ${metadata.authors.join(', ')}\n`; | |
| } | |
| if (metadata.title) { | |
| metadataInfo += ` • Title: ${metadata.title}\n`; | |
| } | |
| if (metadata.year) { | |
| metadataInfo += ` • Year: ${metadata.year}\n`; | |
| } | |
| if (!metadataInfo) { | |
| metadataInfo = ' • No metadata could be extracted from the filename\n'; | |
| } | |
| // Email subject - include reference to original if available | |
| let subject = `Zotero Upload Failed: ${pdfFileName}`; | |
| if (originalMessage) { | |
| const originalSubject = originalMessage.getSubject(); | |
| subject = `Re: ${originalSubject} - Zotero Upload Failed`; | |
| } | |
| // Email body with instructions | |
| const body = `Hello, | |
| The automatic detection failed to find a matching Zotero item for your annotated PDF: | |
| **File:** ${pdfFileName} | |
| **Extracted metadata:** | |
| ${metadataInfo} | |
| **How to fix this:** | |
| 1. Find the correct item in your Zotero library | |
| 2. Copy its 8-character item key (you can use Zutilo plugin) | |
| 3. From your reMarkable, resend the annotated PDF email and include the Zotero item key in the email body | |
| The script will automatically detect the key and upload the PDF to the correct item. | |
| **Alternative solutions:** | |
| - Make sure the PDF filename contains identifiable information (author names, year, title) | |
| - Rename the PDF on reMarkable to include the Zotero key in brackets, like: [ABCD1234] Document Title.pdf | |
| This is an automated message from your Zotero-reMarkable sync script.`; | |
| const htmlBody = `<div style="font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto;"> | |
| <h2 style="color: #d32f2f;">Zotero Upload Failed</h2> | |
| <p>The automatic detection failed to find a matching Zotero item for your annotated PDF:</p> | |
| <div style="background-color: #f5f5f5; padding: 15px; border-radius: 5px; margin: 20px 0;"> | |
| <p style="margin: 0;"><strong>File:</strong> ${pdfFileName}</p> | |
| </div> | |
| <div style="background-color: #fff3e0; padding: 15px; border-radius: 5px; margin: 20px 0;"> | |
| <p style="margin-top: 0;"><strong>Extracted metadata:</strong></p> | |
| <pre style="margin: 0; font-size: 14px;">${metadataInfo}</pre> | |
| </div> | |
| <h3 style="color: #1976d2;">How to fix this:</h3> | |
| <ol> | |
| <li>Find the correct item in your Zotero library</li> | |
| <li>Copy its 8-character item key (you can use Zutilo plugin)</li> | |
| <li>From your reMarkable, resend the annotated PDF email and include the Zotero item key in the email body</li> | |
| </ol> | |
| <div style="background-color: #f3e5f5; padding: 15px; border-radius: 5px; margin: 20px 0;"> | |
| <p style="margin-top: 0;"><strong>Alternative solutions:</strong></p> | |
| <ul style="margin-bottom: 0;"> | |
| <li>Make sure the PDF filename contains identifiable information (author names, year, title)</li> | |
| <li>Rename the PDF on reMarkable to include the Zotero key in brackets, like: <code>[ABCD1234] Document Title.pdf</code></li> | |
| </ul> | |
| </div> | |
| <hr style="border: none; border-top: 1px solid #e0e0e0; margin: 30px 0;"> | |
| <p style="color: #666; font-size: 12px;">This is an automated message from your Zotero-reMarkable sync script.</p> | |
| </div>`; | |
| // Send the notification email to the recipient (not back to reMarkable) | |
| const emailOptions = { | |
| htmlBody: htmlBody, | |
| name: 'Zotero Sync Script' | |
| }; | |
| // If we have the original message, include it in the references for threading | |
| if (originalMessage) { | |
| const messageId = originalMessage.getId(); | |
| emailOptions.replyTo = 'noreply@zotero-sync.local'; // Prevent replies to non-existent address | |
| // Note: Gmail's threading is automatic based on subject matching | |
| } | |
| // Send the email to the correct recipient | |
| GmailApp.sendEmail(recipientEmail, subject, body, emailOptions); | |
| Logger.log(`Sent failure notification to ${recipientEmail} for: ${pdfFileName}`); | |
| } catch (error) { | |
| Logger.log(`Error sending failure notification: ${error.toString()}`); | |
| } | |
| } | |
| /** | |
| * Enhanced metadata extraction with better handling of complex filenames | |
| */ | |
| function extractMetadata(fileName) { | |
| const metadata = { | |
| title: null, | |
| authors: [], // Changed to array to handle multiple authors | |
| year: null, | |
| firstAuthor: null // Keep first author for searching | |
| }; | |
| // Remove .pdf extension for processing | |
| let cleanName = fileName.replace(/\.pdf$/i, ''); | |
| // Remove quotes and clean up special characters | |
| cleanName = cleanName.replace(/["""]/g, '').replace(/\s+/g, ' ').trim(); | |
| // Pattern 1: "Author et al. - Additional Authors - Title" format | |
| // This handles your specific case | |
| const etAlPattern = /^(.+?)\s+et\s+al\.\s*[-–]\s*(.+?)$/i; | |
| const etAlMatch = cleanName.match(etAlPattern); | |
| if (etAlMatch) { | |
| const authorsAndRest = etAlMatch[1].trim(); | |
| const restPart = etAlMatch[2].trim(); | |
| // Extract primary authors from first part | |
| const primaryAuthors = extractAuthorsFromString(authorsAndRest); | |
| metadata.authors = metadata.authors.concat(primaryAuthors); | |
| // Check if rest contains more authors followed by title | |
| const restPattern = /^(.+?)[,.]?\s*[-–]\s*(.+)$/; | |
| const restMatch = restPart.match(restPattern); | |
| if (restMatch) { | |
| // Could be additional authors | |
| const possibleAuthors = restMatch[1].trim(); | |
| const possibleTitle = restMatch[2].trim(); | |
| // Check if possibleAuthors looks like author names | |
| if (looksLikeAuthorNames(possibleAuthors)) { | |
| const additionalAuthors = extractAuthorsFromString(possibleAuthors); | |
| metadata.authors = metadata.authors.concat(additionalAuthors); | |
| metadata.title = cleanString(possibleTitle); | |
| } else { | |
| // It's probably the title | |
| metadata.title = cleanString(restPart); | |
| } | |
| } else { | |
| metadata.title = cleanString(restPart); | |
| } | |
| } else { | |
| // Try other common patterns | |
| const patterns = [ | |
| // Author(s) - Year - Title | |
| { | |
| pattern: /^(.+?)\s*[-–]\s*(\d{4})\s*[-–]\s*(.+)$/, | |
| handler: (match) => { | |
| metadata.authors = extractAuthorsFromString(match[1]); | |
| metadata.year = match[2]; | |
| metadata.title = cleanString(match[3]); | |
| } | |
| }, | |
| // Author(s) (Year) Title | |
| { | |
| pattern: /^(.+?)\s*\((\d{4})\)\s*[-–]?\s*(.+)$/, | |
| handler: (match) => { | |
| metadata.authors = extractAuthorsFromString(match[1]); | |
| metadata.year = match[2]; | |
| metadata.title = cleanString(match[3]); | |
| } | |
| }, | |
| // Author(s) Year - Title | |
| { | |
| pattern: /^(.+?)\s+(\d{4})\s*[-–]\s*(.+)$/, | |
| handler: (match) => { | |
| metadata.authors = extractAuthorsFromString(match[1]); | |
| metadata.year = match[2]; | |
| metadata.title = cleanString(match[3]); | |
| } | |
| }, | |
| // Year - Author(s) - Title | |
| { | |
| pattern: /^(\d{4})\s*[-–]\s*(.+?)\s*[-–]\s*(.+)$/, | |
| handler: (match) => { | |
| metadata.year = match[1]; | |
| metadata.authors = extractAuthorsFromString(match[2]); | |
| metadata.title = cleanString(match[3]); | |
| } | |
| }, | |
| // Author(s) - Title (with possible year in title) | |
| { | |
| pattern: /^(.+?)\s*[-–]\s*(.+)$/, | |
| handler: (match) => { | |
| const possibleAuthors = match[1].trim(); | |
| const possibleTitle = match[2].trim(); | |
| // Check if first part looks like author names | |
| if (looksLikeAuthorNames(possibleAuthors)) { | |
| metadata.authors = extractAuthorsFromString(possibleAuthors); | |
| metadata.title = cleanString(possibleTitle); | |
| } else { | |
| // Might be title only | |
| metadata.title = cleanString(cleanName); | |
| } | |
| } | |
| } | |
| ]; | |
| for (const config of patterns) { | |
| const match = cleanName.match(config.pattern); | |
| if (match) { | |
| config.handler(match); | |
| break; | |
| } | |
| } | |
| } | |
| // Extract year if not found yet | |
| if (!metadata.year) { | |
| const yearMatch = cleanName.match(/\b(19[5-9]\d|20[0-2]\d)\b/); | |
| if (yearMatch) { | |
| metadata.year = yearMatch[1]; | |
| } | |
| } | |
| // If no title found, use the cleaned filename | |
| if (!metadata.title) { | |
| metadata.title = cleanString(cleanName); | |
| } | |
| // Set first author for backward compatibility | |
| if (metadata.authors.length > 0) { | |
| metadata.firstAuthor = metadata.authors[0]; | |
| metadata.author = metadata.authors[0]; // For backward compatibility | |
| } | |
| return metadata; | |
| } | |
| /** | |
| * Extract authors from a string, handling various formats | |
| */ | |
| function extractAuthorsFromString(authorsString) { | |
| const authors = []; | |
| // Clean the string | |
| let cleaned = authorsString.trim(); | |
| // Remove "and" at the end if present | |
| cleaned = cleaned.replace(/\s+and\s*$/i, ''); | |
| // Handle different separators | |
| let authorParts = []; | |
| // First try comma separation | |
| if (cleaned.includes(',')) { | |
| authorParts = cleaned.split(','); | |
| } | |
| // Then try "and" separation | |
| else if (cleaned.toLowerCase().includes(' and ')) { | |
| authorParts = cleaned.split(/\s+and\s+/i); | |
| } | |
| // Then try "&" separation | |
| else if (cleaned.includes('&')) { | |
| authorParts = cleaned.split('&'); | |
| } | |
| // Single author | |
| else { | |
| authorParts = [cleaned]; | |
| } | |
| // Process each author | |
| for (let author of authorParts) { | |
| author = author.trim(); | |
| if (author && author.length > 1) { | |
| // Remove titles like Dr., Prof., etc. | |
| author = author.replace(/^(Dr\.|Prof\.|Mr\.|Ms\.|Mrs\.)\s+/i, ''); | |
| // Extract last name if in "FirstName LastName" format | |
| const nameParts = author.split(/\s+/); | |
| if (nameParts.length > 1) { | |
| // Get the last name (last word) | |
| authors.push(nameParts[nameParts.length - 1]); | |
| } else if (author.length > 1) { | |
| authors.push(author); | |
| } | |
| } | |
| } | |
| return authors; | |
| } | |
| /** | |
| * Check if a string looks like author names | |
| */ | |
| function looksLikeAuthorNames(str) { | |
| // Check for common author patterns | |
| const authorPatterns = [ | |
| /^[A-Z][a-z]+(\s+[A-Z]\.?)?(\s+[A-Z][a-z]+)?$/, // John Smith, J. Smith, John D. Smith | |
| /^[A-Z][a-z]+\s*,\s*[A-Z]\.?$/, // Smith, J. | |
| /et\s+al\.?/i, // et al. | |
| /\band\b/i, // and | |
| /^[A-Z]\.\s*[A-Z][a-z]+$/, // J. Smith | |
| ]; | |
| // Check if string contains any author-like patterns | |
| for (const pattern of authorPatterns) { | |
| if (pattern.test(str)) { | |
| return true; | |
| } | |
| } | |
| // Check if it's a list of capitalized words (likely names) | |
| const words = str.split(/\s+/); | |
| const capitalizedWords = words.filter(word => /^[A-Z]/.test(word)); | |
| // If most words are capitalized and short enough, likely authors | |
| if (capitalizedWords.length >= words.length * 0.6 && words.length <= 6) { | |
| return true; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Cleans a string for better matching. | |
| */ | |
| function cleanString(str) { | |
| if (!str) return null; | |
| return str | |
| .replace(/_/g, ' ') | |
| .replace(/\s+/g, ' ') | |
| .replace(/[\[\]]/g, '') | |
| .replace(/["""]/g, '') | |
| .trim(); | |
| } | |
| /** | |
| * Enhanced Zotero item search with multiple strategies | |
| */ | |
| function findZoteroItem(metadata) { | |
| // Try exact author + year combination first | |
| if (metadata.authors.length > 0 && metadata.year) { | |
| for (const author of metadata.authors) { | |
| const itemKey = searchZoteroByAuthorYear(author, metadata.year); | |
| if (itemKey) return itemKey; | |
| } | |
| } | |
| // Try title search if we have a good title | |
| if (metadata.title && metadata.title.length > 10) { | |
| const itemKey = searchZoteroByField('title', metadata.title); | |
| if (itemKey) return itemKey; | |
| } | |
| // Try combined search with all metadata | |
| return fuzzySearchZotero(metadata); | |
| } | |
| /** | |
| * Enhanced search by author and year | |
| */ | |
| function searchZoteroByAuthorYear(author, year) { | |
| try { | |
| // Try different query formats | |
| const queries = [ | |
| `${author} ${year}`, | |
| `creator:"${author}" date:${year}`, | |
| `"${author}" ${year}` | |
| ]; | |
| for (const query of queries) { | |
| const url = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items?q=${encodeURIComponent(query)}&limit=20`; | |
| const response = UrlFetchApp.fetch(url, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| }, | |
| muteHttpExceptions: true | |
| }); | |
| if (response.getResponseCode() === 200) { | |
| const items = JSON.parse(response.getContentText()); | |
| // Filter and score items | |
| const scoredItems = items | |
| .filter(item => item.data.itemType !== 'attachment') | |
| .map(item => { | |
| let score = 0; | |
| // Check year | |
| const itemYear = item.data.date ? item.data.date.match(/\b\d{4}\b/) : null; | |
| if (itemYear && itemYear[0] === year) { | |
| score += 0.5; | |
| } | |
| // Check authors | |
| const creators = item.data.creators || []; | |
| for (const creator of creators) { | |
| const creatorLastName = (creator.lastName || creator.name || '').toLowerCase(); | |
| if (creatorLastName && creatorLastName.includes(author.toLowerCase())) { | |
| score += 0.5; | |
| break; | |
| } | |
| } | |
| return { item, score }; | |
| }) | |
| .filter(result => result.score > 0) | |
| .sort((a, b) => b.score - a.score); | |
| if (scoredItems.length > 0 && scoredItems[0].score >= 0.8) { | |
| return scoredItems[0].item.key; | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| Logger.log(`Error in searchZoteroByAuthorYear: ${e.toString()}`); | |
| } | |
| return null; | |
| } | |
| /** | |
| * Searches Zotero by a specific field with improved matching | |
| */ | |
| function searchZoteroByField(field, value) { | |
| try { | |
| // Clean the search value | |
| const searchValue = value.replace(/["""]/g, '').trim(); | |
| const url = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items?${field}=${encodeURIComponent(searchValue)}&limit=20`; | |
| const response = UrlFetchApp.fetch(url, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| }, | |
| muteHttpExceptions: true | |
| }); | |
| if (response.getResponseCode() === 200) { | |
| const items = JSON.parse(response.getContentText()); | |
| const parentItems = items.filter(item => item.data.itemType !== 'attachment'); | |
| if (parentItems.length === 0) return null; | |
| if (parentItems.length === 1) return parentItems[0].key; | |
| // Score items based on similarity | |
| const scoredItems = parentItems.map(item => ({ | |
| item, | |
| score: calculateImprovedSimilarity(searchValue.toLowerCase(), (item.data.title || '').toLowerCase()) | |
| })); | |
| scoredItems.sort((a, b) => b.score - a.score); | |
| if (scoredItems[0].score >= 0.6) { | |
| return scoredItems[0].item.key; | |
| } | |
| } | |
| } catch (e) { | |
| Logger.log(`Error in searchZoteroByField: ${e.toString()}`); | |
| } | |
| return null; | |
| } | |
| /** | |
| * Enhanced fuzzy search with better scoring | |
| */ | |
| function fuzzySearchZotero(metadata) { | |
| try { | |
| // Build search query from all available metadata | |
| const searchParts = []; | |
| if (metadata.authors.length > 0) { | |
| searchParts.push(metadata.authors.join(' ')); | |
| } | |
| if (metadata.title) { | |
| // Take key words from title | |
| const titleWords = metadata.title.split(/\s+/) | |
| .filter(word => word.length > 3) | |
| .slice(0, 5); | |
| searchParts.push(titleWords.join(' ')); | |
| } | |
| if (metadata.year) { | |
| searchParts.push(metadata.year); | |
| } | |
| if (searchParts.length === 0) return null; | |
| const query = searchParts.join(' '); | |
| const url = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items?q=${encodeURIComponent(query)}&limit=30`; | |
| const response = UrlFetchApp.fetch(url, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| }, | |
| muteHttpExceptions: true | |
| }); | |
| if (response.getResponseCode() === 200) { | |
| const items = JSON.parse(response.getContentText()); | |
| const validItems = items.filter(item => item.data.itemType !== 'attachment'); | |
| if (validItems.length > 0) { | |
| const scoredItems = validItems.map(item => { | |
| let score = 0; | |
| let matches = 0; | |
| // Title matching (most important) | |
| if (metadata.title && item.data.title) { | |
| const titleScore = calculateImprovedSimilarity( | |
| metadata.title.toLowerCase(), | |
| item.data.title.toLowerCase() | |
| ); | |
| score += titleScore * 0.5; | |
| if (titleScore > 0.3) matches++; | |
| } | |
| // Year matching | |
| const itemYear = item.data.date ? item.data.date.match(/\b\d{4}\b/) : null; | |
| if (metadata.year && itemYear && itemYear[0] === metadata.year) { | |
| score += 0.25; | |
| matches++; | |
| } | |
| // Author matching | |
| if (metadata.authors.length > 0 && item.data.creators) { | |
| let authorMatch = false; | |
| for (const searchAuthor of metadata.authors) { | |
| for (const creator of item.data.creators) { | |
| const creatorName = (creator.lastName || creator.name || '').toLowerCase(); | |
| if (creatorName.includes(searchAuthor.toLowerCase()) || | |
| searchAuthor.toLowerCase().includes(creatorName)) { | |
| authorMatch = true; | |
| break; | |
| } | |
| } | |
| if (authorMatch) break; | |
| } | |
| if (authorMatch) { | |
| score += 0.25; | |
| matches++; | |
| } | |
| } | |
| // Require at least 2 matching components for consideration | |
| if (matches < 2) score = 0; | |
| return { item, score, matches }; | |
| }); | |
| scoredItems.sort((a, b) => b.score - a.score); | |
| // Log top results for debugging | |
| Logger.log(`Top 3 fuzzy search results:`); | |
| for (let i = 0; i < Math.min(3, scoredItems.length); i++) { | |
| const result = scoredItems[i]; | |
| Logger.log(` ${i+1}. Score: ${result.score.toFixed(2)}, Matches: ${result.matches}, Title: ${result.item.data.title}`); | |
| } | |
| if (scoredItems[0].score >= 0.4 && scoredItems[0].matches >= 2) { | |
| return scoredItems[0].item.key; | |
| } | |
| } | |
| } | |
| } catch (e) { | |
| Logger.log(`Error in fuzzySearchZotero: ${e.toString()}`); | |
| } | |
| return null; | |
| } | |
| /** | |
| * Improved string similarity calculation using multiple methods | |
| */ | |
| function calculateImprovedSimilarity(str1, str2) { | |
| // Normalize strings | |
| const norm1 = str1.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ').trim(); | |
| const norm2 = str2.toLowerCase().replace(/[^\w\s]/g, '').replace(/\s+/g, ' ').trim(); | |
| // Method 1: Exact match | |
| if (norm1 === norm2) return 1.0; | |
| // Method 2: Substring match | |
| if (norm1.includes(norm2) || norm2.includes(norm1)) return 0.8; | |
| // Method 3: Word overlap | |
| const words1 = norm1.split(' ').filter(w => w.length > 2); | |
| const words2 = norm2.split(' ').filter(w => w.length > 2); | |
| if (words1.length === 0 || words2.length === 0) return 0; | |
| let matchedWords = 0; | |
| for (const word1 of words1) { | |
| for (const word2 of words2) { | |
| if (word1 === word2 || | |
| (word1.length > 4 && word2.length > 4 && | |
| (word1.includes(word2) || word2.includes(word1)))) { | |
| matchedWords++; | |
| break; | |
| } | |
| } | |
| } | |
| const wordOverlapScore = matchedWords / Math.max(words1.length, words2.length); | |
| // Method 4: Levenshtein distance for short strings | |
| let levenshteinScore = 0; | |
| if (norm1.length < 50 && norm2.length < 50) { | |
| const distance = levenshteinDistance(norm1, norm2); | |
| const maxLength = Math.max(norm1.length, norm2.length); | |
| levenshteinScore = 1 - (distance / maxLength); | |
| } | |
| // Return weighted average | |
| return Math.max(wordOverlapScore * 0.7 + levenshteinScore * 0.3, wordOverlapScore); | |
| } | |
| /** | |
| * Calculate Levenshtein distance between two strings | |
| */ | |
| function levenshteinDistance(str1, str2) { | |
| const matrix = []; | |
| for (let i = 0; i <= str2.length; i++) { | |
| matrix[i] = [i]; | |
| } | |
| for (let j = 0; j <= str1.length; j++) { | |
| matrix[0][j] = j; | |
| } | |
| for (let i = 1; i <= str2.length; i++) { | |
| for (let j = 1; j <= str1.length; j++) { | |
| if (str2.charAt(i - 1) === str1.charAt(j - 1)) { | |
| matrix[i][j] = matrix[i - 1][j - 1]; | |
| } else { | |
| matrix[i][j] = Math.min( | |
| matrix[i - 1][j - 1] + 1, | |
| matrix[i][j - 1] + 1, | |
| matrix[i - 1][j] + 1 | |
| ); | |
| } | |
| } | |
| } | |
| return matrix[str2.length][str1.length]; | |
| } | |
| /** | |
| * Main handler to upload a PDF to a Zotero item. | |
| */ | |
| function uploadToZotero(itemKey, pdfBlob, fileName) { | |
| try { | |
| const itemUrl = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items/${itemKey}`; | |
| const itemResponse = UrlFetchApp.fetch(itemUrl, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| }, | |
| muteHttpExceptions: true | |
| }); | |
| if (itemResponse.getResponseCode() !== 200) { | |
| Logger.log(`Item ${itemKey} not found in Zotero.`); | |
| return false; | |
| } | |
| const attachmentsUrl = `${itemUrl}/children`; | |
| const attachmentsResponse = UrlFetchApp.fetch(attachmentsUrl, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| } | |
| }); | |
| const attachments = JSON.parse(attachmentsResponse.getContentText()); | |
| // Find existing annotated attachment | |
| const existingAnnotated = attachments.find(att => | |
| att.data.itemType === 'attachment' && | |
| (att.data.title || '').toLowerCase().includes('annotated') | |
| ); | |
| // Find original PDF attachments (to rename them) | |
| const originalPDFs = attachments.filter(att => | |
| att.data.itemType === 'attachment' && | |
| att.data.contentType === 'application/pdf' && | |
| !(att.data.title || '').toLowerCase().includes('annotated') && | |
| !(att.data.title || '').toLowerCase().includes('original') | |
| ); | |
| let success = false; | |
| if (existingAnnotated) { | |
| Logger.log(`Updating existing annotated attachment: ${existingAnnotated.key}`); | |
| success = updateAttachment(existingAnnotated.key, pdfBlob); | |
| } else { | |
| // Rename original PDFs to mark them as non-primary | |
| if (originalPDFs.length > 0) { | |
| Logger.log(`Found ${originalPDFs.length} original PDF(s), marking as non-primary`); | |
| for (const originalPDF of originalPDFs) { | |
| markAttachmentAsOriginal(originalPDF.key, originalPDF.data.title, originalPDF.version); | |
| } | |
| } | |
| Logger.log(`Creating new annotated attachment for item: ${itemKey}`); | |
| success = createNewAttachment(itemKey, pdfBlob, fileName, true); | |
| } | |
| // If upload was successful, add tags to the parent item | |
| if (success) { | |
| addTagsToItem(itemKey); | |
| } | |
| return success; | |
| } catch (e) { | |
| Logger.log(`Error in uploadToZotero: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Adds tags to a Zotero item | |
| */ | |
| function addTagsToItem(itemKey) { | |
| try { | |
| // Skip if no tags are configured | |
| if (!TAGS_TO_ADD || TAGS_TO_ADD.length === 0) { | |
| Logger.log('No tags configured to add'); | |
| return true; | |
| } | |
| const itemUrl = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items/${itemKey}`; | |
| // First, get the current item data | |
| const getResponse = UrlFetchApp.fetch(itemUrl, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| }, | |
| muteHttpExceptions: true | |
| }); | |
| if (getResponse.getResponseCode() !== 200) { | |
| Logger.log(`Failed to get item for tagging: ${getResponse.getResponseCode()}`); | |
| return false; | |
| } | |
| const item = JSON.parse(getResponse.getContentText()); | |
| const currentTags = item.data.tags || []; | |
| const currentTagValues = currentTags.map(t => t.tag.toLowerCase()); | |
| // Only add tags that aren't already present | |
| let tagsModified = false; | |
| for (const tagToAdd of TAGS_TO_ADD) { | |
| // Skip empty strings or invalid tags | |
| if (!tagToAdd || typeof tagToAdd !== 'string') continue; | |
| const normalizedTag = tagToAdd.trim(); | |
| if (normalizedTag && !currentTagValues.includes(normalizedTag.toLowerCase())) { | |
| currentTags.push({ tag: normalizedTag, type: 0 }); // type: 0 for manual tags | |
| tagsModified = true; | |
| } | |
| } | |
| // Only update if we actually added new tags | |
| if (tagsModified) { | |
| const updateData = { | |
| tags: currentTags | |
| }; | |
| const updateResponse = UrlFetchApp.fetch(itemUrl, { | |
| method: 'PATCH', | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3', | |
| 'Content-Type': 'application/json', | |
| 'If-Unmodified-Since-Version': item.version.toString() | |
| }, | |
| payload: JSON.stringify(updateData), | |
| muteHttpExceptions: true | |
| }); | |
| if (updateResponse.getResponseCode() === 204) { | |
| Logger.log(`Successfully added tags to item ${itemKey}: ${TAGS_TO_ADD.filter(t => t).join(', ')}`); | |
| return true; | |
| } else { | |
| Logger.log(`Failed to add tags: ${updateResponse.getResponseCode()}`); | |
| return false; | |
| } | |
| } else { | |
| Logger.log(`All configured tags already present on item ${itemKey}`); | |
| return true; | |
| } | |
| } catch (e) { | |
| Logger.log(`Error adding tags to item: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Marks an attachment as the original (non-primary) version | |
| */ | |
| function markAttachmentAsOriginal(attachmentKey, currentTitle, version) { | |
| try { | |
| // Only rename if not already marked as original | |
| if (currentTitle && !currentTitle.toLowerCase().includes('original')) { | |
| const updateUrl = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items/${attachmentKey}`; | |
| // Prepare the update data | |
| const updateData = { | |
| title: currentTitle + ' (original)' | |
| }; | |
| const updateResponse = UrlFetchApp.fetch(updateUrl, { | |
| method: 'PATCH', | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3', | |
| 'Content-Type': 'application/json', | |
| 'If-Unmodified-Since-Version': version.toString() | |
| }, | |
| payload: JSON.stringify(updateData), | |
| muteHttpExceptions: true | |
| }); | |
| if (updateResponse.getResponseCode() === 204) { | |
| Logger.log(`Successfully marked attachment ${attachmentKey} as original`); | |
| return true; | |
| } else { | |
| Logger.log(`Failed to mark attachment as original: ${updateResponse.getResponseCode()}`); | |
| return false; | |
| } | |
| } | |
| return true; | |
| } catch (e) { | |
| Logger.log(`Error marking attachment as original: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Creates a new attachment item in Zotero and uploads the file to WebDAV. | |
| */ | |
| function createNewAttachment(parentKey, pdfBlob, fileName, isPrimary = false) { | |
| try { | |
| // Create a title that will sort first (making it primary) | |
| const baseTitle = fileName.replace(/\.pdf$/i, ''); | |
| const attachmentTitle = isPrimary | |
| ? `! ${baseTitle} (annotated from reMarkable)` // '!' prefix makes it sort first | |
| : `${baseTitle} (annotated from reMarkable)`; | |
| const attachmentData = [{ | |
| itemType: 'attachment', | |
| parentItem: parentKey, | |
| linkMode: 'imported_file', | |
| title: attachmentTitle, | |
| contentType: 'application/pdf', | |
| filename: fileName | |
| }]; | |
| const createUrl = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items`; | |
| const createResponse = UrlFetchApp.fetch(createUrl, { | |
| method: 'POST', | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3', | |
| 'Content-Type': 'application/json' | |
| }, | |
| payload: JSON.stringify(attachmentData), | |
| muteHttpExceptions: true | |
| }); | |
| if (createResponse.getResponseCode() !== 200) { | |
| Logger.log(`Failed to create attachment item: ${createResponse.getContentText()}`); | |
| return false; | |
| } | |
| const createdResult = JSON.parse(createResponse.getContentText()); | |
| const attachmentKey = createdResult.success['0']; | |
| if (!attachmentKey) { | |
| Logger.log(`Creation failed: ${JSON.stringify(createdResult.failed)}`); | |
| return false; | |
| } | |
| return uploadToWebDAV(attachmentKey, pdfBlob); | |
| } catch (e) { | |
| Logger.log(`Error in createNewAttachment: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Updates an existing attachment by re-uploading the file to WebDAV. | |
| */ | |
| function updateAttachment(attachmentKey, pdfBlob) { | |
| try { | |
| const webdavSuccess = uploadToWebDAV(attachmentKey, pdfBlob); | |
| if (webdavSuccess) { | |
| const updateUrl = `https://api.zotero.org/users/${ZOTERO_USER_ID}/items/${attachmentKey}`; | |
| const getResponse = UrlFetchApp.fetch(updateUrl, { | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3' | |
| } | |
| }); | |
| const attachmentItem = JSON.parse(getResponse.getContentText()); | |
| const pdfBytes = pdfBlob.getBytes(); | |
| const md5 = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5, pdfBytes); | |
| const md5Hex = md5.map(byte => ('0' + (byte & 0xFF).toString(16)).slice(-2)).join(''); | |
| const patchData = { md5: md5Hex, mtime: Date.now() }; | |
| const updateResponse = UrlFetchApp.fetch(updateUrl, { | |
| method: 'PATCH', | |
| headers: { | |
| 'Zotero-API-Key': ZOTERO_API_KEY, | |
| 'Zotero-API-Version': '3', | |
| 'Content-Type': 'application/json', | |
| 'If-Unmodified-Since-Version': attachmentItem.version.toString() | |
| }, | |
| payload: JSON.stringify(patchData), | |
| muteHttpExceptions: true | |
| }); | |
| return updateResponse.getResponseCode() === 204; | |
| } | |
| return false; | |
| } catch (e) { | |
| Logger.log(`Error in updateAttachment: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Uploads a file to a WebDAV server using Zotero's storage structure. | |
| */ | |
| function uploadToWebDAV(attachmentKey, pdfBlob) { | |
| try { | |
| const pdfBytes = pdfBlob.getBytes(); | |
| const namedPdfBlob = Utilities.newBlob(pdfBytes, 'application/pdf', 'document.pdf'); | |
| const zipBlob = Utilities.zip([namedPdfBlob], `${attachmentKey}.zip`); | |
| const webdavPath = `${WEBDAV_URL}${attachmentKey}.zip`; | |
| const authHeader = 'Basic ' + Utilities.base64Encode(`${WEBDAV_USERNAME}:${WEBDAV_PASSWORD}`); | |
| const zipResponse = UrlFetchApp.fetch(webdavPath, { | |
| method: 'PUT', | |
| headers: { 'Authorization': authHeader, 'Content-Type': 'application/zip' }, | |
| payload: zipBlob.getBytes(), | |
| muteHttpExceptions: true | |
| }); | |
| const zipSuccess = [200, 201, 204].includes(zipResponse.getResponseCode()); | |
| if (!zipSuccess) { | |
| Logger.log(`WebDAV ZIP upload failed with code ${zipResponse.getResponseCode()}: ${zipResponse.getContentText()}`); | |
| return false; | |
| } | |
| const propContent = createPropFile(pdfBlob); | |
| const propPath = `${WEBDAV_URL}${attachmentKey}.prop`; | |
| UrlFetchApp.fetch(propPath, { | |
| method: 'PUT', | |
| headers: { 'Authorization': authHeader, 'Content-Type': 'text/xml' }, | |
| payload: propContent, | |
| muteHttpExceptions: true | |
| }); | |
| return true; | |
| } catch (e) { | |
| Logger.log(`Error in uploadToWebDAV: ${e.toString()}`); | |
| return false; | |
| } | |
| } | |
| /** | |
| * Creates the content for the .prop file required by Zotero. | |
| */ | |
| function createPropFile(pdfBlob) { | |
| const pdfBytes = pdfBlob.getBytes(); | |
| const md5 = Utilities.computeDigest(Utilities.DigestAlgorithm.MD5, pdfBytes); | |
| const md5Hex = md5.map(byte => ('0' + (byte & 0xFF).toString(16)).slice(-2)).join(''); | |
| return `<properties version="1"><mtime>${Date.now()}</mtime><hash>${md5Hex}</hash></properties>`; | |
| } | |
| /** | |
| * Marks an email thread as processed by adding a label and marking as read. | |
| */ | |
| function markAsProcessed(thread) { | |
| let label = GmailApp.getUserLabelByName(GMAIL_LABEL); | |
| if (!label) { | |
| label = GmailApp.createLabel(GMAIL_LABEL); | |
| } | |
| thread.addLabel(label); | |
| thread.markRead(); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment