arumsey/import-metadata-template.hbs

## import-metadata-template.hbs
/* global WebImporter */

function isDate(str) {
    if (typeof str !== 'string') return false;
    const date = new Date(str);
    return !Number.isNaN(Number(date));
}

/**
* Parse the document for metadata cell values.
*
* @param {HTMLElement} element The root query element.
* @param {Object} props Additional parse function props.
*/
export default function parse(element, props) {
  const { document } = props;
  const baseMetadata = WebImporter.Blocks.getMetadata(document) || {};
  const customMetadata = {{configs}};
  const meta = { ...baseMetadata, ...customMetadata };
  Object.entries(meta).forEach(([key, value]) => {
    // use first image
    if (key === 'Image') {
      const [img1] = value.src.split(',');
      value.src = img1;
    }
    // convert dates
    if (isDate(value)) {
      meta[key] = new Date(value).toISOString().slice(0, 10);
    }
  });
  return meta;
}

## import-rules-template.hbs
/**
 * The import rules object defines elements that can be removed (cleanup) from the source
 * document and elements that should be transformed into blocks.
 */
const importRules = {{{rules}}};

export default importRules;

## import-script-template.hbs
/* global WebImporter */

import importRules from './import-rules.js';
{{#parsers}}
import {{block}}Parser from '{{{path}}}';
{{/parsers}}
{{#transformers}}
import {{name}}Transformer from '{{{path}}}';
{{/transformers}}

/**
 * Custom parser functions for each block type.
 *
 * Each parser function will be passed a root HTML element and
 * is expected to return a 2-dimensional array or an object of name/value
 * pairs that will be used to construct each block.
 */
const parsers = {
{{#parsers}}
  {{block}}: {{block}}Parser,
{{/parsers}}
};

/**
 * Transformation functions against main content.
 */
const transformers = {
{{#transformers}}
  {{name}}: {{name}}Transformer,
{{/transformers}}
};

function isEmpty(cells) {
  if (Array.isArray(cells)) {
    return cells.length === 0;
  } if (typeof cells === 'object' && cells !== null) {
    return Object.keys(cells).length === 0;
  }
  return false;
}

/**
 * Return a path that describes the document being transformed (file name, nesting...).
 * The path is then used to create the corresponding Word document.
 * @param {String} url The url of the document being transformed.
 * @param {HTMLDocument} document The document
 */
function generateDocumentPath({ url }) {
  let p = new URL(url).pathname;
  if (p.endsWith('/')) {
    p = `${p}index`;
  }
  p = decodeURIComponent(p)
    .toLowerCase()
    .replace(/\.html$/, '')
    .replace(/[^a-z0-9/]/gm, '-');
  return WebImporter.FileUtils.sanitizePath(p);
}

export default {
  /**
   * Apply DOM operations to the provided document and return
   * the root element to be then transformed to Markdown.
   * @param {HTMLDocument} document The document
   * @param {string} url The url of the page imported
   * @param {string} html The raw html (the document is cleaned up during preprocessing)
   * @param {object} params Object containing some parameters given by the import process.
   * @returns {HTMLElement} The root element to be transformed
   */
  transform: (source) => {
    // eslint-disable-next-line no-unused-vars
    const { document, url, html, params } = source;

    const {
      root = 'main',
      cleanup: {
        start: removeStart = [],
        end: removeEnd = [],
      },
      blocks = [],
    } = importRules;

    // define the main element: the one that will be transformed to Markdown
    const main = document.querySelector(root) || document.body;

    // attempt to remove non-content elements
    WebImporter.DOMUtils.remove(main, removeStart);

    // transform all blocks using cell parsers
    blocks.forEach((blockCfg) => {
      const {
        type, variants, selectors = [], insertMode = 'replace', params = {},
      } = blockCfg;
      const parserFn = parsers[params.id || type];
      const elements = selectors.length
        ? selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], [])
        : [main];
      // process every element for this block
      elements.forEach((element) => {
        // parse the element into block items
        let items = parserFn ? parserFn.call(this, element, { ...source }) : [];
        if (Array.isArray(items)) {
          items = items.filter((item) => item);
        }
        if (!isEmpty(items)) {
          // create the block
          const block = WebImporter.Blocks.createBlock(document, {
            name: type,
            variants,
            cells: items,
          });
          if (block) {
            // add block to DOM
            if (insertMode === 'append') {
              main.append(block);
            } else if (insertMode === 'prepend') {
              main.prepend(block);
            } else if (element !== main) {
              element.replaceWith(block);
            }
          }
        }
      });
    });

    // perform any additional transformations
    Object.values(transformers).forEach((transformerFn) => transformerFn.call(this, main, { ...source }));

    WebImporter.rules.transformBackgroundImages(main, document);
    WebImporter.rules.adjustImageUrls(main, url, params.originalURL);

    // attempt to remove non-content elements
    WebImporter.DOMUtils.remove(main, removeEnd);

    return [{
      element: main,
      path: generateDocumentPath(source),
    }];
  },
};

## inventory-script-template.hbs
/* global WebImporter */

import importRules from './import-rules.js';
{{#parsers}}
import {{block}}Parser from '{{{path}}}';
{{/parsers}}
{{#transformers}}
import {{name}}Transformer from '{{{path}}}';
{{/transformers}}

/**
* Import utility functions
*/
WebImporter.Import = {
  isEmpty: (cells) => {
    if (Array.isArray(cells)) {
      return cells.length === 0;
    } else if (typeof cells === 'object' && cells !== null) {
      return Object.keys(cells).length === 0;
    }
    return !cells;
  },
  getElementByXPath: (document, xpath) => {
    const result = document.evaluate(
      xpath,
      document,
      null,
      XPathResult.FIRST_ORDERED_NODE_TYPE,
      null,
    );
    return result.singleNodeValue;
  },
  getFragmentXPaths: (instances, url) => {
    return instances
      .filter((instance) => instance.url === url)
      .map(({ xpath }) => xpath);
  }
}

/**
* Return a path that describes the document being transformed (file name, nesting...).
* The path is then used to create the corresponding Word document.
* @param {String} url The url of the document being transformed.
* @param {HTMLDocument} document The document
*/
function generateDocumentPath({ url }) {
  let p = new URL(url).pathname;
  if (p.endsWith('/')) {
    p = `${p}index`;
  }
  p = decodeURIComponent(p)
    .toLowerCase()
    .replace(/\.html$/, '')
    .replace(/[^a-z0-9/]/gm, '-');
  return WebImporter.FileUtils.sanitizePath(p);
}

/**
* Page transformation function
*/
function transformPage(main, { inventory = { fragments: [], blocks: [] }, ...source }) {
  const { document, url, params: { originalURL } } = source;
  const {
    cleanup: {
      start: removeStart = [],
      end: removeEnd = [],
    },
    blocks = [],
  } = importRules;

  // remove non-content elements
  WebImporter.DOMUtils.remove(main, removeStart);
  // remove fragment elements
  inventory.fragments
    .reduce((xpathList, fragment) => {
      return [...xpathList, ...WebImporter.Import.getFragmentXPaths(fragment.instances, originalURL)];
    }, [])
    .map((xpath) => WebImporter.Import.getElementByXPath(document, xpath))
    .forEach((element) => {
      element.remove();
    });

  // transform all blocks using parsers
  blocks.forEach((blockCfg) => {
    const {
      type, variants, selectors = [], insertMode = 'replace', params = {},
    } = blockCfg;
    const parserFn = parsers[params.id || type];
    const { instances = [] } = inventory.blocks.find(({ name, cluster }) => (
      name === params.name && cluster === params.cluster
    )) || {};
    const instancesForUrl = instances.filter((instance) => instance.url === originalURL);

    let elements = [];
    if (selectors.length > 0) {
      elements = selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], []);
    }
    if (instancesForUrl.length > 0) {
      elements = instancesForUrl.map(({xpath}) => WebImporter.Import.getElementByXPath(document, xpath));
    }
    elements = elements.filter((el) => el);
    // process every element for this block
    elements.forEach((element) => {
      // parse the element
      let items = null;
      try {
        items = parserFn ? parserFn.call(this, element, {...source}) : null;
      } catch (e) {
        console.warn(`Failed to parse block ${type}`, e);
      }
      // remove empty items
      if (Array.isArray(items)) {
        items = items.filter((item) => item);
      }
      if (!WebImporter.Import.isEmpty(items)) {
        // create the block
        const block = WebImporter.Blocks.createBlock(document, {
          name: params.block || type,
          variants,
          cells: items,
        });
        if (block) {
          // add block to DOM
          if (insertMode === 'append') {
            main.append(block);
          } else if (insertMode === 'prepend') {
            main.prepend(block);
          } else if (element !== main) {
            element.replaceWith(block);
          }
        }
      }
    });
  });

  // perform any additional transformations
  Object.entries(transformers)
    .filter(([name]) => !name.startsWith('_'))
    .forEach(([,transformerFn]) => transformerFn.call(this, main, source));

  WebImporter.rules.transformBackgroundImages(main, document);
  WebImporter.rules.adjustImageUrls(main, url, originalURL);

  // attempt to remove non-content elements
  WebImporter.DOMUtils.remove(main, removeEnd);
}

/**
* Fragment transformation function
*/
function transformFragment(main, { fragment, inventory, ...source }) {
  const { document } = source;

  for (const { xpath } of (fragment.instances || [])) {
    const element = WebImporter.Import.getElementByXPath(document, xpath);
    if (!element) {
      continue;
    }

    // find block with matching xpath
    let fragmentBlock = null;
    for (const block of inventory.blocks) {
      const { instances } = block;
      const instance = instances.find(({ xpath: blockXpath }) => blockXpath === xpath);
      if (instance) {
        fragmentBlock = block;
        break;
      }
    }
    const blockRule = importRules.blocks.find(({ type }) => fragmentBlock && fragmentBlock.name === type);
    const parserFn = parsers[blockRule?.params?.id];
    if (parserFn) {
      try {
        parserFn.call(this, element, source);
      } catch (e) {
        console.warn(`Failed to parse block ${blockRule.type}`, e);
      }
    }
    main.append(element);
  }
}

/**
* Custom parser functions for each block type.
*/
const parsers = {
{{#parsers}}
  {{block}}: {{block}}Parser,
{{/parsers}}
};

/**
* Transformation functions against main content.
*/
const transformers = {
  _transformPage: transformPage,
  _transformFragment: transformFragment,
{{#transformers}}
  {{name}}: {{name}}Transformer,
{{/transformers}}
};

export default {
  /**
  * Apply DOM operations to the provided document and return
  * the root element to be then transformed to Markdown.
  * @param {object} source The transformation source object
  * @param {HTMLDocument} source.document The document
  * @param {string} source.url The url of the page imported
  * @param {string} source.html The raw html (the document is cleaned up during preprocessing)
  * @param {object} source.params Object containing some parameters given by the import process.
  * @returns {HTMLElement} The root element to be transformed
  */
  transform: async (source) => {
    const { document, params: { originalURL } } = source;

    // fetch the inventory
    const inventoryUrl = new URL('/tools/importer/inventory.json', '{{publishUrl}}');
    let inventory = null;
    try {
      const inventoryResp = await fetch(inventoryUrl.href);
      inventory = await inventoryResp.json();
    } catch (e) {
      //try localhost proxy
      const inventoryResp = await fetch(`http://localhost:3001${inventoryUrl.pathname}?host=${encodeURIComponent(inventoryUrl.origin)}`);
      inventory = await inventoryResp.json();
    }
    if (!inventory) {
        console.error('Failed to fetch inventory');
        return [];
    }

    // perform the transformation
    let main = null;
    const sourceUrl = new URL(originalURL);
    const sourceParams = new URLSearchParams(sourceUrl.search);
    if (sourceParams.has('frag')) {
      // fragment transformation
      const fragName = sourceParams.get('frag');
      const fragment = inventory.fragments.find(({ name }) => name === fragName);
      if (!fragment) {
        return [];
      }
      const main = document.createElement('div');
      transformers._transformFragment(main, { ...source, fragment, inventory });
      return [{
        element: main,
        path: fragment.path,
      }];
    }
    // page transformation
    main = document.querySelector(importRules.root) || document.body;
    transformers._transformPage(main, { ...source, inventory });
    return [{
      element: main,
      path: generateDocumentPath(source),
    }];
  },
};
	/* global WebImporter */

	function isDate(str) {
	if (typeof str !== 'string') return false;
	const date = new Date(str);
	return !Number.isNaN(Number(date));
	}

	/**
	* Parse the document for metadata cell values.
	*
	* @param {HTMLElement} element The root query element.
	* @param {Object} props Additional parse function props.
	*/
	export default function parse(element, props) {
	const { document } = props;
	const baseMetadata = WebImporter.Blocks.getMetadata(document) \|\| {};
	const customMetadata = {{configs}};
	const meta = { ...baseMetadata, ...customMetadata };
	Object.entries(meta).forEach(([key, value]) => {
	// use first image
	if (key === 'Image') {
	const [img1] = value.src.split(',');
	value.src = img1;
	}
	// convert dates
	if (isDate(value)) {
	meta[key] = new Date(value).toISOString().slice(0, 10);
	}
	});
	return meta;
	}
	/**
	* The import rules object defines elements that can be removed (cleanup) from the source
	* document and elements that should be transformed into blocks.
	*/
	const importRules = {{{rules}}};

	export default importRules;
	/* global WebImporter */

	import importRules from './import-rules.js';
	{{#parsers}}
	import {{block}}Parser from '{{{path}}}';
	{{/parsers}}
	{{#transformers}}
	import {{name}}Transformer from '{{{path}}}';
	{{/transformers}}

	/**
	* Custom parser functions for each block type.
	*
	* Each parser function will be passed a root HTML element and
	* is expected to return a 2-dimensional array or an object of name/value
	* pairs that will be used to construct each block.
	*/
	const parsers = {
	{{#parsers}}
	{{block}}: {{block}}Parser,
	{{/parsers}}
	};

	/**
	* Transformation functions against main content.
	*/
	const transformers = {
	{{#transformers}}
	{{name}}: {{name}}Transformer,
	{{/transformers}}
	};

	function isEmpty(cells) {
	if (Array.isArray(cells)) {
	return cells.length === 0;
	} if (typeof cells === 'object' && cells !== null) {
	return Object.keys(cells).length === 0;
	}
	return false;
	}

	/**
	* Return a path that describes the document being transformed (file name, nesting...).
	* The path is then used to create the corresponding Word document.
	* @param {String} url The url of the document being transformed.
	* @param {HTMLDocument} document The document
	*/
	function generateDocumentPath({ url }) {
	let p = new URL(url).pathname;
	if (p.endsWith('/')) {
	p = `${p}index`;
	}
	p = decodeURIComponent(p)
	.toLowerCase()
	.replace(/\.html$/, '')
	.replace(/[^a-z0-9/]/gm, '-');
	return WebImporter.FileUtils.sanitizePath(p);
	}

	export default {
	/**
	* Apply DOM operations to the provided document and return
	* the root element to be then transformed to Markdown.
	* @param {HTMLDocument} document The document
	* @param {string} url The url of the page imported
	* @param {string} html The raw html (the document is cleaned up during preprocessing)
	* @param {object} params Object containing some parameters given by the import process.
	* @returns {HTMLElement} The root element to be transformed
	*/
	transform: (source) => {
	// eslint-disable-next-line no-unused-vars
	const { document, url, html, params } = source;

	const {
	root = 'main',
	cleanup: {
	start: removeStart = [],
	end: removeEnd = [],
	},
	blocks = [],
	} = importRules;

	// define the main element: the one that will be transformed to Markdown
	const main = document.querySelector(root) \|\| document.body;

	// attempt to remove non-content elements
	WebImporter.DOMUtils.remove(main, removeStart);

	// transform all blocks using cell parsers
	blocks.forEach((blockCfg) => {
	const {
	type, variants, selectors = [], insertMode = 'replace', params = {},
	} = blockCfg;
	const parserFn = parsers[params.id \|\| type];
	const elements = selectors.length
	? selectors.reduce((acc, selector) => [...acc, ...main.querySelectorAll(selector)], [])
	: [main];
	// process every element for this block
	elements.forEach((element) => {
	// parse the element into block items
	let items = parserFn ? parserFn.call(this, element, { ...source }) : [];
	if (Array.isArray(items)) {
	items = items.filter((item) => item);
	}
	if (!isEmpty(items)) {
	// create the block
	const block = WebImporter.Blocks.createBlock(document, {
	name: type,
	variants,
	cells: items,
	});
	if (block) {
	// add block to DOM
	if (insertMode === 'append') {
	main.append(block);
	} else if (insertMode === 'prepend') {
	main.prepend(block);
	} else if (element !== main) {
	element.replaceWith(block);
	}
	}
	}
	});
	});

	// perform any additional transformations
	Object.values(transformers).forEach((transformerFn) => transformerFn.call(this, main, { ...source }));

	WebImporter.rules.transformBackgroundImages(main, document);
	WebImporter.rules.adjustImageUrls(main, url, params.originalURL);

	// attempt to remove non-content elements
	WebImporter.DOMUtils.remove(main, removeEnd);

	return [{
	element: main,
	path: generateDocumentPath(source),
	}];
	},
	};