Skip to content

Instantly share code, notes, and snippets.

@arumsey
Last active March 7, 2025 11:48
Show Gist options
  • Select an option

  • Save arumsey/a66e25a5292afcc0f34be48a84c8c548 to your computer and use it in GitHub Desktop.

Select an option

Save arumsey/a66e25a5292afcc0f34be48a84c8c548 to your computer and use it in GitHub Desktop.
Templates used to build import scripts for AEM edge delivery services sites.
/* global WebImporter */
function isDate(str) {
if (typeof str !== 'string') return false;
const date = new Date(str);
return !Number.isNaN(Number(date));
}
/**
* Parse the document for metadata cell values.
*
* @param {HTMLElement} element The root query element.
* @param {Object} props Additional parse function props.
*/
export default function parse(element, props) {
const { document } = props;
const baseMetadata = WebImporter.Blocks.getMetadata(document) || {};
const customMetadata = {{configs}};
const meta = { ...baseMetadata, ...customMetadata };
Object.entries(meta).forEach(([key, value]) => {
// use first image
if (key === 'Image') {
const [img1] = value.src.split(',');
value.src = img1;
}
// convert dates
if (isDate(value)) {
meta[key] = new Date(value).toISOString().slice(0, 10);
}
});
return meta;
}
/**
* The import rules object defines elements that can be removed (cleanup) from the source
* document and elements that should be transformed into blocks.
*/
const importRules = {{{rules}}};
export default importRules;
/* global WebImporter */
import importRules from './import-rules.js';
{{#parsers}}
import {{block}}Parser from '{{{path}}}';
{{/parsers}}
{{#transformers}}
import {{name}}Transformer from '{{{path}}}';
{{/transformers}}
/**
* Custom parser functions for each block type.
*
* Each parser function will be passed a root HTML element and
* is expected to return a 2-dimensional array or an object of name/value
* pairs that will be used to construct each block.
*/
const parsers = {
{{#parsers}}
{{block}}: {{block}}Parser,
{{/parsers}}
};
/**
* Transformation functions against main content.
*/
const transformers = {
{{#transformers}}
{{name}}: {{name}}Transformer,
{{/transformers}}
};
function isEmpty(cells) {
if (Array.isArray(cells)) {
return cells.length === 0;
} if (typeof cells === 'object' && cells !== null) {
return Object.keys(cells).length === 0;
}
return false;
}
/**
* Return a path that describes the document being transformed (file name, nesting...).
* The path is then used to create the corresponding Word document.
* @param {String} url The url of the document being transformed.
* @param {HTMLDocument} document The document
*/
function generateDocumentPath({ url }) {
let p = new URL(url).pathname;
if (p.endsWith('/')) {
p = `${p}index`;
}
p = decodeURIComponent(p)
.toLowerCase()
.replace(/\.html$/, '')
.replace(/[^a-z0-9/]/gm, '-');
return WebImporter.FileUtils.sanitizePath(p);
}
export default {
/**
* Apply DOM operations to the provided document and return
* the root element to be then transformed to Markdown.
* @param {HTMLDocument} document The document
* @param {string} url The url of the page imported
* @param {string} html The raw html (the document is cleaned up during preprocessing)
* @param {object} params Object containing some parameters given by the import process.
* @returns {HTMLElement} The root element to be transformed
*/
transform: (source) => {
// eslint-disable-next-line no-unused-vars
const { document, url, html, params } = source;
const {
root = 'main',
cleanup: {
start: removeStart = [],
end: removeEnd = [],
},
blocks = [],
} = importRules;
// define the main element: the one that will be transformed to Markdown
const main = document.querySelector(root) || document.body;
// attempt to remove non-content elements
WebImporter.DOMUtils.remove(main, removeStart);
// transform all blocks using cell parsers
blocks.forEach((blockCfg) => {
const {
type, variants, selectors = [], insertMode = 'replace', params = {},
} = blockCfg;
const parserFn = parsers[params.id || type];
const elements = selectors.length
? selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], [])
: [main];
// process every element for this block
elements.forEach((element) => {
// parse the element into block items
let items = parserFn ? parserFn.call(this, element, { ...source }) : [];
if (Array.isArray(items)) {
items = items.filter((item) => item);
}
if (!isEmpty(items)) {
// create the block
const block = WebImporter.Blocks.createBlock(document, {
name: type,
variants,
cells: items,
});
if (block) {
// add block to DOM
if (insertMode === 'append') {
main.append(block);
} else if (insertMode === 'prepend') {
main.prepend(block);
} else if (element !== main) {
element.replaceWith(block);
}
}
}
});
});
// perform any additional transformations
Object.values(transformers).forEach((transformerFn) => transformerFn.call(this, main, { ...source }));
WebImporter.rules.transformBackgroundImages(main, document);
WebImporter.rules.adjustImageUrls(main, url, params.originalURL);
// attempt to remove non-content elements
WebImporter.DOMUtils.remove(main, removeEnd);
return [{
element: main,
path: generateDocumentPath(source),
}];
},
};
/* global WebImporter */
import importRules from './import-rules.js';
{{#parsers}}
import {{block}}Parser from '{{{path}}}';
{{/parsers}}
{{#transformers}}
import {{name}}Transformer from '{{{path}}}';
{{/transformers}}
/**
* Import utility functions
*/
WebImporter.Import = {
isEmpty: (cells) => {
if (Array.isArray(cells)) {
return cells.length === 0;
} else if (typeof cells === 'object' && cells !== null) {
return Object.keys(cells).length === 0;
}
return !cells;
},
getElementByXPath: (document, xpath) => {
const result = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null,
);
return result.singleNodeValue;
},
getFragmentXPaths: (instances, url) => {
return instances
.filter((instance) => instance.url === url)
.map(({ xpath }) => xpath);
}
}
/**
* Return a path that describes the document being transformed (file name, nesting...).
* The path is then used to create the corresponding Word document.
* @param {String} url The url of the document being transformed.
* @param {HTMLDocument} document The document
*/
function generateDocumentPath({ url }) {
let p = new URL(url).pathname;
if (p.endsWith('/')) {
p = `${p}index`;
}
p = decodeURIComponent(p)
.toLowerCase()
.replace(/\.html$/, '')
.replace(/[^a-z0-9/]/gm, '-');
return WebImporter.FileUtils.sanitizePath(p);
}
/**
* Page transformation function
*/
function transformPage(main, { inventory = { fragments: [], blocks: [] }, ...source }) {
const { document, url, params: { originalURL } } = source;
const {
cleanup: {
start: removeStart = [],
end: removeEnd = [],
},
blocks = [],
} = importRules;
// remove non-content elements
WebImporter.DOMUtils.remove(main, removeStart);
// remove fragment elements
inventory.fragments
.reduce((xpathList, fragment) => {
return [...xpathList, ...WebImporter.Import.getFragmentXPaths(fragment.instances, originalURL)];
}, [])
.map((xpath) => WebImporter.Import.getElementByXPath(document, xpath))
.forEach((element) => {
element.remove();
});
// transform all blocks using parsers
blocks.forEach((blockCfg) => {
const {
type, variants, selectors = [], insertMode = 'replace', params = {},
} = blockCfg;
const parserFn = parsers[params.id || type];
const { instances = [] } = inventory.blocks.find(({ name, cluster }) => (
name === params.name && cluster === params.cluster
)) || {};
const instancesForUrl = instances.filter((instance) => instance.url === originalURL);
let elements = [];
if (selectors.length > 0) {
elements = selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], []);
}
if (instancesForUrl.length > 0) {
elements = instancesForUrl.map(({xpath}) => WebImporter.Import.getElementByXPath(document, xpath));
}
elements = elements.filter((el) => el);
// process every element for this block
elements.forEach((element) => {
// parse the element
let items = null;
try {
items = parserFn ? parserFn.call(this, element, {...source}) : null;
} catch (e) {
console.warn(`Failed to parse block ${type}`, e);
}
// remove empty items
if (Array.isArray(items)) {
items = items.filter((item) => item);
}
if (!WebImporter.Import.isEmpty(items)) {
// create the block
const block = WebImporter.Blocks.createBlock(document, {
name: params.block || type,
variants,
cells: items,
});
if (block) {
// add block to DOM
if (insertMode === 'append') {
main.append(block);
} else if (insertMode === 'prepend') {
main.prepend(block);
} else if (element !== main) {
element.replaceWith(block);
}
}
}
});
});
// perform any additional transformations
Object.entries(transformers)
.filter(([name]) => !name.startsWith('_'))
.forEach(([,transformerFn]) => transformerFn.call(this, main, source));
WebImporter.rules.transformBackgroundImages(main, document);
WebImporter.rules.adjustImageUrls(main, url, originalURL);
// attempt to remove non-content elements
WebImporter.DOMUtils.remove(main, removeEnd);
}
/**
* Fragment transformation function
*/
function transformFragment(main, { fragment, inventory, ...source }) {
const { document } = source;
for (const { xpath } of (fragment.instances || [])) {
const element = WebImporter.Import.getElementByXPath(document, xpath);
if (!element) {
continue;
}
// find block with matching xpath
let fragmentBlock = null;
for (const block of inventory.blocks) {
const { instances } = block;
const instance = instances.find(({ xpath: blockXpath }) => blockXpath === xpath);
if (instance) {
fragmentBlock = block;
break;
}
}
const blockRule = importRules.blocks.find(({ type }) => fragmentBlock && fragmentBlock.name === type);
const parserFn = parsers[blockRule?.params?.id];
if (parserFn) {
try {
parserFn.call(this, element, source);
} catch (e) {
console.warn(`Failed to parse block ${blockRule.type}`, e);
}
}
main.append(element);
}
}
/**
* Custom parser functions for each block type.
*/
const parsers = {
{{#parsers}}
{{block}}: {{block}}Parser,
{{/parsers}}
};
/**
* Transformation functions against main content.
*/
const transformers = {
_transformPage: transformPage,
_transformFragment: transformFragment,
{{#transformers}}
{{name}}: {{name}}Transformer,
{{/transformers}}
};
export default {
/**
* Apply DOM operations to the provided document and return
* the root element to be then transformed to Markdown.
* @param {object} source The transformation source object
* @param {HTMLDocument} source.document The document
* @param {string} source.url The url of the page imported
* @param {string} source.html The raw html (the document is cleaned up during preprocessing)
* @param {object} source.params Object containing some parameters given by the import process.
* @returns {HTMLElement} The root element to be transformed
*/
transform: async (source) => {
const { document, params: { originalURL } } = source;
// fetch the inventory
const inventoryUrl = new URL('/tools/importer/inventory.json', '{{publishUrl}}');
let inventory = null;
try {
const inventoryResp = await fetch(inventoryUrl.href);
inventory = await inventoryResp.json();
} catch (e) {
//try localhost proxy
const inventoryResp = await fetch(`http://localhost:3001${inventoryUrl.pathname}?host=${encodeURIComponent(inventoryUrl.origin)}`);
inventory = await inventoryResp.json();
}
if (!inventory) {
console.error('Failed to fetch inventory');
return [];
}
// perform the transformation
let main = null;
const sourceUrl = new URL(originalURL);
const sourceParams = new URLSearchParams(sourceUrl.search);
if (sourceParams.has('frag')) {
// fragment transformation
const fragName = sourceParams.get('frag');
const fragment = inventory.fragments.find(({ name }) => name === fragName);
if (!fragment) {
return [];
}
const main = document.createElement('div');
transformers._transformFragment(main, { ...source, fragment, inventory });
return [{
element: main,
path: fragment.path,
}];
}
// page transformation
main = document.querySelector(importRules.root) || document.body;
transformers._transformPage(main, { ...source, inventory });
return [{
element: main,
path: generateDocumentPath(source),
}];
},
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment