Created
November 19, 2025 19:39
-
-
Save ejfox/f708c260687de1b3b060a0072d6be90c to your computer and use it in GitHub Desktop.
Newspaper Content Linter for Metro Maker 4 - Validates newspaper templates for common issues
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env node | |
| /** | |
| * Newspaper Content Linter | |
| * Validates newspaper templates for common issues: | |
| * - JSON parsing errors | |
| * - Missing required fields | |
| * - Malformed quotes or special characters | |
| * - Missing paragraph breaks (walls of text) | |
| * - Suspicious content patterns | |
| * | |
| * Usage: | |
| * node scripts/lint-newspapers.js # Lint all newspapers | |
| * node scripts/lint-newspapers.js --fix # Auto-fix some issues | |
| * node scripts/lint-newspapers.js --strict # Show warnings too | |
| */ | |
| import fs from 'fs'; | |
| import path from 'path'; | |
| import { fileURLToPath } from 'url'; | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = path.dirname(__filename); | |
| const templatesPath = path.join(__dirname, '..', 'public', 'templates', 'newspapers.jsonl'); | |
| const args = process.argv.slice(2); | |
| const FIX_MODE = args.includes('--fix'); | |
| const STRICT_MODE = args.includes('--strict'); | |
| // ANSI colors for output | |
| const colors = { | |
| red: '\x1b[31m', | |
| yellow: '\x1b[33m', | |
| green: '\x1b[32m', | |
| cyan: '\x1b[36m', | |
| gray: '\x1b[90m', | |
| reset: '\x1b[0m', | |
| bold: '\x1b[1m' | |
| }; | |
| function error(msg) { | |
| console.log(`${colors.red}✗${colors.reset} ${msg}`); | |
| } | |
| function warning(msg) { | |
| console.log(`${colors.yellow}⚠${colors.reset} ${msg}`); | |
| } | |
| function success(msg) { | |
| console.log(`${colors.green}✓${colors.reset} ${msg}`); | |
| } | |
| function info(msg) { | |
| console.log(`${colors.cyan}ℹ${colors.reset} ${msg}`); | |
| } | |
| // Validation functions | |
| const validators = { | |
| // Check for required fields | |
| requiredFields(article, lineNum) { | |
| const required = ['headline', 'content', 'metadata']; | |
| const missing = required.filter(field => !article[field]); | |
| if (missing.length > 0) { | |
| error(`Line ${lineNum}: Missing required fields: ${missing.join(', ')}`); | |
| return false; | |
| } | |
| return true; | |
| }, | |
| // Check headline isn't empty or just whitespace | |
| headlineValid(article, lineNum) { | |
| if (!article.headline || article.headline.trim().length === 0) { | |
| error(`Line ${lineNum}: Headline is empty`); | |
| return false; | |
| } | |
| if (article.headline.length > 200) { | |
| warning(`Line ${lineNum}: Headline is very long (${article.headline.length} chars)`); | |
| if (STRICT_MODE) return false; | |
| } | |
| return true; | |
| }, | |
| // Check content has paragraph breaks (not a wall of text) | |
| contentParagraphs(article, lineNum) { | |
| const content = article.content; | |
| const paragraphs = content.split('\n').filter(p => p.trim()); | |
| if (paragraphs.length === 0) { | |
| error(`Line ${lineNum}: Content is empty`); | |
| return false; | |
| } | |
| if (paragraphs.length === 1 && content.length > 500) { | |
| warning(`Line ${lineNum}: Content is one long paragraph (${content.length} chars, no \\n breaks)`); | |
| if (STRICT_MODE) return false; | |
| } | |
| // Check for paragraphs that are too long | |
| for (let i = 0; i < paragraphs.length; i++) { | |
| if (paragraphs[i].length > 1000) { | |
| warning(`Line ${lineNum}: Paragraph ${i + 1} is very long (${paragraphs[i].length} chars)`); | |
| } | |
| } | |
| return true; | |
| }, | |
| // Check for suspicious quote patterns | |
| quotesValid(article, lineNum) { | |
| const text = article.headline + ' ' + article.content; | |
| // Check for unmatched quotes | |
| const singleQuotes = (text.match(/'/g) || []).length; | |
| const doubleQuotes = (text.match(/"/g) || []).length; | |
| const smartSingleLeft = (text.match(/'/g) || []).length; | |
| const smartSingleRight = (text.match(/'/g) || []).length; | |
| const smartDoubleLeft = (text.match(/"/g) || []).length; | |
| const smartDoubleRight = (text.match(/"/g) || []).length; | |
| // Smart quotes should be balanced | |
| if (smartDoubleLeft !== smartDoubleRight) { | |
| warning(`Line ${lineNum}: Unbalanced smart double quotes (${smartDoubleLeft} left, ${smartDoubleRight} right)`); | |
| if (STRICT_MODE) return false; | |
| } | |
| if (smartSingleLeft !== smartSingleRight) { | |
| warning(`Line ${lineNum}: Unbalanced smart single quotes (${smartSingleLeft} left, ${smartSingleRight} right)`); | |
| if (STRICT_MODE) return false; | |
| } | |
| return true; | |
| }, | |
| // Check for HTML entities that might not render correctly | |
| htmlEntities(article, lineNum) { | |
| const text = article.headline + ' ' + article.content; | |
| const entities = text.match(/&[a-z]+;/gi) || []; | |
| if (entities.length > 0) { | |
| const unique = [...new Set(entities)]; | |
| warning(`Line ${lineNum}: Contains HTML entities: ${unique.join(', ')}`); | |
| } | |
| return true; | |
| }, | |
| // Check for control characters or invalid unicode | |
| invalidCharacters(article, lineNum) { | |
| const text = article.headline + ' ' + article.content; | |
| // Check for control characters (except newlines/tabs) | |
| const controlChars = text.match(/[\x00-\x08\x0B-\x0C\x0E-\x1F\x7F]/g); | |
| if (controlChars) { | |
| error(`Line ${lineNum}: Contains control characters: ${controlChars.map(c => '0x' + c.charCodeAt(0).toString(16)).join(', ')}`); | |
| return false; | |
| } | |
| return true; | |
| }, | |
| // Check metadata structure | |
| metadataValid(article, lineNum) { | |
| if (!article.metadata || typeof article.metadata !== 'object') { | |
| error(`Line ${lineNum}: Metadata is missing or not an object`); | |
| return false; | |
| } | |
| // Check for valid category | |
| const validCategories = ['milestone', 'crisis', 'comedy', 'opinion', 'investigation', 'political', 'financial', 'expansion', 'delay']; | |
| if (article.metadata.category && !validCategories.includes(article.metadata.category)) { | |
| warning(`Line ${lineNum}: Unknown category: ${article.metadata.category}`); | |
| } | |
| // Check for valid tone | |
| const validTones = ['humorous', 'satirical', 'critical', 'celebratory', 'urgent', 'neutral', 'concerned']; | |
| if (article.metadata.tone) { | |
| const tones = article.metadata.tone.split('|'); | |
| const invalid = tones.filter(t => !validTones.includes(t)); | |
| if (invalid.length > 0) { | |
| warning(`Line ${lineNum}: Unknown tones: ${invalid.join(', ')}`); | |
| } | |
| } | |
| return true; | |
| }, | |
| // Check for template variables | |
| templateVariables(article, lineNum) { | |
| const text = article.headline + ' ' + article.content; | |
| const variables = text.match(/\{\{[A-Z_]+\}\}/g) || []; | |
| if (variables.length > 0) { | |
| const unique = [...new Set(variables)]; | |
| info(`Line ${lineNum}: Uses template variables: ${unique.join(', ')}`); | |
| } | |
| return true; | |
| } | |
| }; | |
| // Main linting function | |
| function lintNewspapers() { | |
| console.log(`\n${colors.bold}Newspaper Content Linter${colors.reset}`); | |
| console.log(`${colors.gray}Reading: ${templatesPath}${colors.reset}\n`); | |
| if (!fs.existsSync(templatesPath)) { | |
| error(`File not found: ${templatesPath}`); | |
| process.exit(1); | |
| } | |
| const fileContent = fs.readFileSync(templatesPath, 'utf8'); | |
| const lines = fileContent.split('\n').filter(line => line.trim()); | |
| let errorCount = 0; | |
| let warningCount = 0; | |
| const validArticles = []; | |
| console.log(`Linting ${lines.length} articles...\n`); | |
| lines.forEach((line, index) => { | |
| const lineNum = index + 1; | |
| let article; | |
| // Try to parse JSON | |
| try { | |
| article = JSON.parse(line); | |
| } catch (parseError) { | |
| error(`Line ${lineNum}: JSON parse error - ${parseError.message}`); | |
| errorCount++; | |
| return; | |
| } | |
| // Run all validators | |
| let lineValid = true; | |
| for (const [name, validator] of Object.entries(validators)) { | |
| try { | |
| if (!validator(article, lineNum)) { | |
| lineValid = false; | |
| if (name !== 'templateVariables' && name !== 'htmlEntities') { | |
| errorCount++; | |
| } else { | |
| warningCount++; | |
| } | |
| } | |
| } catch (err) { | |
| error(`Line ${lineNum}: Validator '${name}' crashed - ${err.message}`); | |
| errorCount++; | |
| lineValid = false; | |
| } | |
| } | |
| if (lineValid) { | |
| validArticles.push(article); | |
| } | |
| }); | |
| // Summary | |
| console.log(`\n${colors.bold}Summary${colors.reset}`); | |
| console.log(`${colors.gray}─────────────────────${colors.reset}`); | |
| console.log(`Total articles: ${lines.length}`); | |
| console.log(`Valid articles: ${colors.green}${validArticles.length}${colors.reset}`); | |
| if (errorCount > 0) { | |
| console.log(`Errors: ${colors.red}${errorCount}${colors.reset}`); | |
| } else { | |
| console.log(`Errors: ${colors.green}0${colors.reset}`); | |
| } | |
| if (warningCount > 0) { | |
| console.log(`Warnings: ${colors.yellow}${warningCount}${colors.reset}`); | |
| } else { | |
| console.log(`Warnings: ${colors.green}0${colors.reset}`); | |
| } | |
| console.log(); | |
| if (errorCount === 0 && warningCount === 0) { | |
| success('All newspapers passed linting! 🎉\n'); | |
| return 0; | |
| } else if (errorCount === 0) { | |
| success('No critical errors, but some warnings were found.\n'); | |
| return 0; | |
| } else { | |
| error(`Found ${errorCount} error(s) that need fixing.\n`); | |
| return 1; | |
| } | |
| } | |
| // Run the linter | |
| const exitCode = lintNewspapers(); | |
| process.exit(exitCode); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment