Instantly share code, notes, and snippets.
Last active
March 7, 2025 11:48
-
Star
0
(0)
You must be signed in to star a gist -
Fork
0
(0)
You must be signed in to fork a gist
-
-
Save arumsey/a66e25a5292afcc0f34be48a84c8c548 to your computer and use it in GitHub Desktop.
Templates used to build import scripts for AEM edge delivery services sites.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* global WebImporter */ | |
| function isDate(str) { | |
| if (typeof str !== 'string') return false; | |
| const date = new Date(str); | |
| return !Number.isNaN(Number(date)); | |
| } | |
| /** | |
| * Parse the document for metadata cell values. | |
| * | |
| * @param {HTMLElement} element The root query element. | |
| * @param {Object} props Additional parse function props. | |
| */ | |
| export default function parse(element, props) { | |
| const { document } = props; | |
| const baseMetadata = WebImporter.Blocks.getMetadata(document) || {}; | |
| const customMetadata = {{configs}}; | |
| const meta = { ...baseMetadata, ...customMetadata }; | |
| Object.entries(meta).forEach(([key, value]) => { | |
| // use first image | |
| if (key === 'Image') { | |
| const [img1] = value.src.split(','); | |
| value.src = img1; | |
| } | |
| // convert dates | |
| if (isDate(value)) { | |
| meta[key] = new Date(value).toISOString().slice(0, 10); | |
| } | |
| }); | |
| return meta; | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * The import rules object defines elements that can be removed (cleanup) from the source | |
| * document and elements that should be transformed into blocks. | |
| */ | |
| const importRules = {{{rules}}}; | |
| export default importRules; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* global WebImporter */ | |
| import importRules from './import-rules.js'; | |
| {{#parsers}} | |
| import {{block}}Parser from '{{{path}}}'; | |
| {{/parsers}} | |
| {{#transformers}} | |
| import {{name}}Transformer from '{{{path}}}'; | |
| {{/transformers}} | |
| /** | |
| * Custom parser functions for each block type. | |
| * | |
| * Each parser function will be passed a root HTML element and | |
| * is expected to return a 2-dimensional array or an object of name/value | |
| * pairs that will be used to construct each block. | |
| */ | |
| const parsers = { | |
| {{#parsers}} | |
| {{block}}: {{block}}Parser, | |
| {{/parsers}} | |
| }; | |
| /** | |
| * Transformation functions against main content. | |
| */ | |
| const transformers = { | |
| {{#transformers}} | |
| {{name}}: {{name}}Transformer, | |
| {{/transformers}} | |
| }; | |
| function isEmpty(cells) { | |
| if (Array.isArray(cells)) { | |
| return cells.length === 0; | |
| } if (typeof cells === 'object' && cells !== null) { | |
| return Object.keys(cells).length === 0; | |
| } | |
| return false; | |
| } | |
| /** | |
| * Return a path that describes the document being transformed (file name, nesting...). | |
| * The path is then used to create the corresponding Word document. | |
| * @param {String} url The url of the document being transformed. | |
| * @param {HTMLDocument} document The document | |
| */ | |
| function generateDocumentPath({ url }) { | |
| let p = new URL(url).pathname; | |
| if (p.endsWith('/')) { | |
| p = `${p}index`; | |
| } | |
| p = decodeURIComponent(p) | |
| .toLowerCase() | |
| .replace(/\.html$/, '') | |
| .replace(/[^a-z0-9/]/gm, '-'); | |
| return WebImporter.FileUtils.sanitizePath(p); | |
| } | |
| export default { | |
| /** | |
| * Apply DOM operations to the provided document and return | |
| * the root element to be then transformed to Markdown. | |
| * @param {HTMLDocument} document The document | |
| * @param {string} url The url of the page imported | |
| * @param {string} html The raw html (the document is cleaned up during preprocessing) | |
| * @param {object} params Object containing some parameters given by the import process. | |
| * @returns {HTMLElement} The root element to be transformed | |
| */ | |
| transform: (source) => { | |
| // eslint-disable-next-line no-unused-vars | |
| const { document, url, html, params } = source; | |
| const { | |
| root = 'main', | |
| cleanup: { | |
| start: removeStart = [], | |
| end: removeEnd = [], | |
| }, | |
| blocks = [], | |
| } = importRules; | |
| // define the main element: the one that will be transformed to Markdown | |
| const main = document.querySelector(root) || document.body; | |
| // attempt to remove non-content elements | |
| WebImporter.DOMUtils.remove(main, removeStart); | |
| // transform all blocks using cell parsers | |
| blocks.forEach((blockCfg) => { | |
| const { | |
| type, variants, selectors = [], insertMode = 'replace', params = {}, | |
| } = blockCfg; | |
| const parserFn = parsers[params.id || type]; | |
| const elements = selectors.length | |
| ? selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], []) | |
| : [main]; | |
| // process every element for this block | |
| elements.forEach((element) => { | |
| // parse the element into block items | |
| let items = parserFn ? parserFn.call(this, element, { ...source }) : []; | |
| if (Array.isArray(items)) { | |
| items = items.filter((item) => item); | |
| } | |
| if (!isEmpty(items)) { | |
| // create the block | |
| const block = WebImporter.Blocks.createBlock(document, { | |
| name: type, | |
| variants, | |
| cells: items, | |
| }); | |
| if (block) { | |
| // add block to DOM | |
| if (insertMode === 'append') { | |
| main.append(block); | |
| } else if (insertMode === 'prepend') { | |
| main.prepend(block); | |
| } else if (element !== main) { | |
| element.replaceWith(block); | |
| } | |
| } | |
| } | |
| }); | |
| }); | |
| // perform any additional transformations | |
| Object.values(transformers).forEach((transformerFn) => transformerFn.call(this, main, { ...source })); | |
| WebImporter.rules.transformBackgroundImages(main, document); | |
| WebImporter.rules.adjustImageUrls(main, url, params.originalURL); | |
| // attempt to remove non-content elements | |
| WebImporter.DOMUtils.remove(main, removeEnd); | |
| return [{ | |
| element: main, | |
| path: generateDocumentPath(source), | |
| }]; | |
| }, | |
| }; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* global WebImporter */ | |
| import importRules from './import-rules.js'; | |
| {{#parsers}} | |
| import {{block}}Parser from '{{{path}}}'; | |
| {{/parsers}} | |
| {{#transformers}} | |
| import {{name}}Transformer from '{{{path}}}'; | |
| {{/transformers}} | |
| /** | |
| * Import utility functions | |
| */ | |
| WebImporter.Import = { | |
| isEmpty: (cells) => { | |
| if (Array.isArray(cells)) { | |
| return cells.length === 0; | |
| } else if (typeof cells === 'object' && cells !== null) { | |
| return Object.keys(cells).length === 0; | |
| } | |
| return !cells; | |
| }, | |
| getElementByXPath: (document, xpath) => { | |
| const result = document.evaluate( | |
| xpath, | |
| document, | |
| null, | |
| XPathResult.FIRST_ORDERED_NODE_TYPE, | |
| null, | |
| ); | |
| return result.singleNodeValue; | |
| }, | |
| getFragmentXPaths: (instances, url) => { | |
| return instances | |
| .filter((instance) => instance.url === url) | |
| .map(({ xpath }) => xpath); | |
| } | |
| } | |
| /** | |
| * Return a path that describes the document being transformed (file name, nesting...). | |
| * The path is then used to create the corresponding Word document. | |
| * @param {String} url The url of the document being transformed. | |
| * @param {HTMLDocument} document The document | |
| */ | |
| function generateDocumentPath({ url }) { | |
| let p = new URL(url).pathname; | |
| if (p.endsWith('/')) { | |
| p = `${p}index`; | |
| } | |
| p = decodeURIComponent(p) | |
| .toLowerCase() | |
| .replace(/\.html$/, '') | |
| .replace(/[^a-z0-9/]/gm, '-'); | |
| return WebImporter.FileUtils.sanitizePath(p); | |
| } | |
| /** | |
| * Page transformation function | |
| */ | |
| function transformPage(main, { inventory = { fragments: [], blocks: [] }, ...source }) { | |
| const { document, url, params: { originalURL } } = source; | |
| const { | |
| cleanup: { | |
| start: removeStart = [], | |
| end: removeEnd = [], | |
| }, | |
| blocks = [], | |
| } = importRules; | |
| // remove non-content elements | |
| WebImporter.DOMUtils.remove(main, removeStart); | |
| // remove fragment elements | |
| inventory.fragments | |
| .reduce((xpathList, fragment) => { | |
| return [...xpathList, ...WebImporter.Import.getFragmentXPaths(fragment.instances, originalURL)]; | |
| }, []) | |
| .map((xpath) => WebImporter.Import.getElementByXPath(document, xpath)) | |
| .forEach((element) => { | |
| element.remove(); | |
| }); | |
| // transform all blocks using parsers | |
| blocks.forEach((blockCfg) => { | |
| const { | |
| type, variants, selectors = [], insertMode = 'replace', params = {}, | |
| } = blockCfg; | |
| const parserFn = parsers[params.id || type]; | |
| const { instances = [] } = inventory.blocks.find(({ name, cluster }) => ( | |
| name === params.name && cluster === params.cluster | |
| )) || {}; | |
| const instancesForUrl = instances.filter((instance) => instance.url === originalURL); | |
| let elements = []; | |
| if (selectors.length > 0) { | |
| elements = selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], []); | |
| } | |
| if (instancesForUrl.length > 0) { | |
| elements = instancesForUrl.map(({xpath}) => WebImporter.Import.getElementByXPath(document, xpath)); | |
| } | |
| elements = elements.filter((el) => el); | |
| // process every element for this block | |
| elements.forEach((element) => { | |
| // parse the element | |
| let items = null; | |
| try { | |
| items = parserFn ? parserFn.call(this, element, {...source}) : null; | |
| } catch (e) { | |
| console.warn(`Failed to parse block ${type}`, e); | |
| } | |
| // remove empty items | |
| if (Array.isArray(items)) { | |
| items = items.filter((item) => item); | |
| } | |
| if (!WebImporter.Import.isEmpty(items)) { | |
| // create the block | |
| const block = WebImporter.Blocks.createBlock(document, { | |
| name: params.block || type, | |
| variants, | |
| cells: items, | |
| }); | |
| if (block) { | |
| // add block to DOM | |
| if (insertMode === 'append') { | |
| main.append(block); | |
| } else if (insertMode === 'prepend') { | |
| main.prepend(block); | |
| } else if (element !== main) { | |
| element.replaceWith(block); | |
| } | |
| } | |
| } | |
| }); | |
| }); | |
| // perform any additional transformations | |
| Object.entries(transformers) | |
| .filter(([name]) => !name.startsWith('_')) | |
| .forEach(([,transformerFn]) => transformerFn.call(this, main, source)); | |
| WebImporter.rules.transformBackgroundImages(main, document); | |
| WebImporter.rules.adjustImageUrls(main, url, originalURL); | |
| // attempt to remove non-content elements | |
| WebImporter.DOMUtils.remove(main, removeEnd); | |
| } | |
| /** | |
| * Fragment transformation function | |
| */ | |
| function transformFragment(main, { fragment, inventory, ...source }) { | |
| const { document } = source; | |
| for (const { xpath } of (fragment.instances || [])) { | |
| const element = WebImporter.Import.getElementByXPath(document, xpath); | |
| if (!element) { | |
| continue; | |
| } | |
| // find block with matching xpath | |
| let fragmentBlock = null; | |
| for (const block of inventory.blocks) { | |
| const { instances } = block; | |
| const instance = instances.find(({ xpath: blockXpath }) => blockXpath === xpath); | |
| if (instance) { | |
| fragmentBlock = block; | |
| break; | |
| } | |
| } | |
| const blockRule = importRules.blocks.find(({ type }) => fragmentBlock && fragmentBlock.name === type); | |
| const parserFn = parsers[blockRule?.params?.id]; | |
| if (parserFn) { | |
| try { | |
| parserFn.call(this, element, source); | |
| } catch (e) { | |
| console.warn(`Failed to parse block ${blockRule.type}`, e); | |
| } | |
| } | |
| main.append(element); | |
| } | |
| } | |
| /** | |
| * Custom parser functions for each block type. | |
| */ | |
| const parsers = { | |
| {{#parsers}} | |
| {{block}}: {{block}}Parser, | |
| {{/parsers}} | |
| }; | |
| /** | |
| * Transformation functions against main content. | |
| */ | |
| const transformers = { | |
| _transformPage: transformPage, | |
| _transformFragment: transformFragment, | |
| {{#transformers}} | |
| {{name}}: {{name}}Transformer, | |
| {{/transformers}} | |
| }; | |
| export default { | |
| /** | |
| * Apply DOM operations to the provided document and return | |
| * the root element to be then transformed to Markdown. | |
| * @param {object} source The transformation source object | |
| * @param {HTMLDocument} source.document The document | |
| * @param {string} source.url The url of the page imported | |
| * @param {string} source.html The raw html (the document is cleaned up during preprocessing) | |
| * @param {object} source.params Object containing some parameters given by the import process. | |
| * @returns {HTMLElement} The root element to be transformed | |
| */ | |
| transform: async (source) => { | |
| const { document, params: { originalURL } } = source; | |
| // fetch the inventory | |
| const inventoryUrl = new URL('/tools/importer/inventory.json', '{{publishUrl}}'); | |
| let inventory = null; | |
| try { | |
| const inventoryResp = await fetch(inventoryUrl.href); | |
| inventory = await inventoryResp.json(); | |
| } catch (e) { | |
| //try localhost proxy | |
| const inventoryResp = await fetch(`http://localhost:3001${inventoryUrl.pathname}?host=${encodeURIComponent(inventoryUrl.origin)}`); | |
| inventory = await inventoryResp.json(); | |
| } | |
| if (!inventory) { | |
| console.error('Failed to fetch inventory'); | |
| return []; | |
| } | |
| // perform the transformation | |
| let main = null; | |
| const sourceUrl = new URL(originalURL); | |
| const sourceParams = new URLSearchParams(sourceUrl.search); | |
| if (sourceParams.has('frag')) { | |
| // fragment transformation | |
| const fragName = sourceParams.get('frag'); | |
| const fragment = inventory.fragments.find(({ name }) => name === fragName); | |
| if (!fragment) { | |
| return []; | |
| } | |
| const main = document.createElement('div'); | |
| transformers._transformFragment(main, { ...source, fragment, inventory }); | |
| return [{ | |
| element: main, | |
| path: fragment.path, | |
| }]; | |
| } | |
| // page transformation | |
| main = document.querySelector(importRules.root) || document.body; | |
| transformers._transformPage(main, { ...source, inventory }); | |
| return [{ | |
| element: main, | |
| path: generateDocumentPath(source), | |
| }]; | |
| }, | |
| }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment