Last active
December 5, 2025 08:54
-
-
Save lucaslouca/0f9971cdd6845d224f76 to your computer and use it in GitHub Desktop.
Convert a XLSX file to List<Map<String, String>>
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.tool.main; | |
| import java.util.List; | |
| import java.util.Map; | |
| public interface TTSheet2HashMap { | |
| /****************************************** PUBLIC ********************************************/ | |
| /** | |
| * Initiates the processing of the sheet to List<Map<String, | |
| * String>>. Only rows for which all values found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * Considers rows within range [start, end]. | |
| * | |
| * @param columnName2SeachValue | |
| * Map = ColumnName -> SearchValue | |
| * @param start | |
| * @param end | |
| * @return | |
| */ | |
| List<Map<String, String>> load(Map<String, String> columnName2SeachValue, int start, int end); | |
| /** | |
| * Initiates the processing of the sheet to List<Map<String, | |
| * String>>. Only rows for which all value found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * Considers all rows. | |
| * | |
| * @param columnName2SeachValue | |
| * Map = ColumnName -> SearchValue | |
| * @return | |
| */ | |
| List<Map<String, String>> load(Map<String, String> columnName2SeachValue); | |
| /** | |
| * Initiates the processing of the sheet to List<Map<String, | |
| * String>>. | |
| * | |
| * Loads all rows. | |
| * | |
| * @return | |
| */ | |
| List<Map<String, String>> load(); | |
| /** | |
| * Loads rows within range [start, end]. | |
| * | |
| * @param start | |
| * @param end | |
| * @return | |
| */ | |
| List<Map<String, String>> load(int start, int end); | |
| /** | |
| * Returns total number of rows in the sheet. | |
| * | |
| * @return number of rows in the sheet | |
| */ | |
| int numberOfRows(); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package com.tool.main; | |
| import java.io.File; | |
| import java.io.IOException; | |
| import java.io.InputStream; | |
| import java.util.ArrayList; | |
| import java.util.HashMap; | |
| import java.util.LinkedHashMap; | |
| import java.util.List; | |
| import java.util.Map; | |
| import javax.xml.parsers.ParserConfigurationException; | |
| import org.apache.commons.lang3.StringUtils; | |
| import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |
| import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |
| import org.apache.poi.openxml4j.opc.OPCPackage; | |
| import org.apache.poi.openxml4j.opc.PackageAccess; | |
| import org.apache.poi.ss.usermodel.DataFormatter; | |
| import org.apache.poi.ss.util.CellReference; | |
| import org.apache.poi.util.SAXHelper; | |
| import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; | |
| import org.apache.poi.xssf.eventusermodel.XSSFReader; | |
| import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; | |
| import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; | |
| import org.apache.poi.xssf.model.StylesTable; | |
| import org.apache.poi.xssf.usermodel.XSSFComment; | |
| import org.xml.sax.ContentHandler; | |
| import org.xml.sax.InputSource; | |
| import org.xml.sax.SAXException; | |
| import org.xml.sax.XMLReader; | |
| public class TTXLSX2HashMap implements TTSheet2HashMap { | |
| private OPCPackage xlsxPackage; | |
| private List<Map<String, String>> output; | |
| private Map<String, String> columnName2SeachValue; | |
| private int startRow; | |
| private int endRow; | |
| private int rowCount; | |
| /** | |
| * Constructor | |
| * | |
| * @param path | |
| * to Excel file | |
| */ | |
| public TTXLSX2HashMap(String path) { | |
| try { | |
| this.xlsxPackage = OPCPackage.open(path, PackageAccess.READ); | |
| } catch (InvalidFormatException e) { | |
| e.printStackTrace(); | |
| } | |
| } | |
| /** | |
| * Parses and shows the content of one sheet using the specified styles and | |
| * shared-strings tables. | |
| * | |
| * @param styles | |
| * @param strings | |
| * @param sheetInputStream | |
| */ | |
| private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException, | |
| ParserConfigurationException, SAXException { | |
| DataFormatter formatter = new DataFormatter(); | |
| InputSource sheetSource = new InputSource(sheetInputStream); | |
| try { | |
| XMLReader sheetParser = SAXHelper.newXMLReader(); | |
| ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false); | |
| sheetParser.setContentHandler(handler); | |
| sheetParser.parse(sheetSource); | |
| } catch (ParserConfigurationException e) { | |
| throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); | |
| } | |
| } | |
| /** | |
| * Initiates the processing of the XLS workbook file to List<Map<String, | |
| * String>>. | |
| * | |
| * @throws IOException | |
| * @throws OpenXML4JException | |
| * @throws ParserConfigurationException | |
| * @throws SAXException | |
| */ | |
| private void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { | |
| ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage); | |
| XSSFReader xssfReader = new XSSFReader(this.xlsxPackage); | |
| StylesTable styles = xssfReader.getStylesTable(); | |
| XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); | |
| while (iter.hasNext()) { | |
| InputStream stream = iter.next(); | |
| processSheet(styles, strings, new SheetToHashMap(), stream); | |
| stream.close(); | |
| } | |
| } | |
| /** | |
| * Initiates the processing of the XLS workbook file to List<Map<String, | |
| * String>>. Only rows for which all value found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * @param columnName2SeachValue | |
| * Map = Column Name ==> Search Value. | |
| * @throws IOException | |
| * @throws OpenXML4JException | |
| * @throws ParserConfigurationException | |
| * @throws SAXException | |
| */ | |
| private void process(Map<String, String> columnName2SeachValue) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException { | |
| this.columnName2SeachValue = columnName2SeachValue; | |
| process(); | |
| } | |
| /** | |
| * Returns the index from this cell reference. | |
| * Example: Given 'A' returns '1' as a String | |
| */ | |
| private String getColumnReference(String cellReference) { | |
| if (StringUtils.isBlank(cellReference)) { | |
| return ""; | |
| } | |
| return ""+CellReference.convertColStringToIndex(cellReference.split("[0-9]*$")[0]); | |
| } | |
| /****************************************** PUBLIC ********************************************/ | |
| /** | |
| * Initiates the processing of the XLS workbook file to List<Map<String, | |
| * String>>. Only rows for which all values found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * Loads rows within range [start, end]. | |
| * | |
| * @param columnName2SeachValue | |
| * Map = ColumnName -> SearchValue | |
| * @param start | |
| * @param end | |
| * @return | |
| */ | |
| public List<Map<String, String>> load(Map<String, String> columnName2SeachValue, int start, int end) { | |
| try { | |
| this.rowCount = 0; | |
| this.startRow = start; | |
| this.endRow = end; | |
| this.output = new ArrayList<Map<String, String>>(); | |
| this.process(columnName2SeachValue); | |
| } catch (IOException | OpenXML4JException | ParserConfigurationException | SAXException e) { | |
| e.printStackTrace(); | |
| } | |
| return this.output; | |
| } | |
| /** | |
| * Initiates the processing of the XLS workbook file to List<Map<String, | |
| * String>>. Only rows for which all value found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * Loads all rows. | |
| * | |
| * @param columnName2SeachValue | |
| * Map = ColumnName -> SearchValue | |
| * @return | |
| */ | |
| public List<Map<String, String>> load(Map<String, String> columnName2SeachValue) { | |
| return this.load(columnName2SeachValue, -1, -1); | |
| } | |
| /** | |
| * Initiates the processing of the XLS workbook file to List<Map<String, | |
| * String>>. Only rows for which all value found in the | |
| * columnName2SeachValue map, will be included in the output. | |
| * | |
| * Loads all rows. | |
| * | |
| * @return | |
| */ | |
| public List<Map<String, String>> load() { | |
| return this.load(new HashMap<String, String>(), -1, -1); | |
| } | |
| /** | |
| * Loads rows within range [start, end]. | |
| * | |
| * @param start | |
| * @param end | |
| * @return | |
| */ | |
| public List<Map<String, String>> load(int start, int end) { | |
| return this.load(new HashMap<String, String>(), start, end); | |
| } | |
| public int numberOfRows() { | |
| this.load(-1, -1); | |
| return rowCount; | |
| } | |
| /****************************************** MAIN ********************************************/ | |
| public static void main(String[] args) throws Exception { | |
| if (args.length < 1) { | |
| System.err.println("Use:"); | |
| System.err.println("TTXLSX2HashMap <xlsx file> [min columns]"); | |
| return; | |
| } | |
| File xlsxFile = new File(args[0]); | |
| if (!xlsxFile.exists()) { | |
| System.err.println("Not found or not a file: " + xlsxFile.getPath()); | |
| return; | |
| } | |
| TTXLSX2HashMap xlsx2map = new TTXLSX2HashMap(xlsxFile.getPath()); | |
| xlsx2map.load(1, 2); | |
| } | |
| /****************************************** HELPER CLASSES ********************************************/ | |
| private class SheetToHashMap implements SheetContentsHandler { | |
| private static final int HEADER_ROW = 0; | |
| private LinkedHashMap<String, String> currentRowMap; // map of column headers => row values | |
| private LinkedHashMap<String, String> columnHeaders; // map of column references => column headers | |
| private int currentRow; | |
| /** | |
| * AND Condition | |
| * | |
| * @param row | |
| * @param col2val | |
| * @return | |
| */ | |
| private boolean rowContainsAllValues(LinkedHashMap<String, String> row, Map<String, String> col2val) { | |
| String searchValue; | |
| for (String searchCol : col2val.keySet()) { | |
| if (row.containsKey(searchCol)) { | |
| searchValue = col2val.get(searchCol); | |
| if (!searchValue.equalsIgnoreCase(row.get(searchCol))) { | |
| return false; | |
| } | |
| } else { | |
| throw new IllegalArgumentException("Row does not contain search column '" + searchCol + "'"); | |
| } | |
| } | |
| return true; | |
| } | |
| private boolean isRowInRange(int rowNum, int start, int end) { | |
| // Only start limit | |
| if (start == -1 && end != -1) { | |
| return rowNum <= end; | |
| } else if (end == -1 && start != -1) { | |
| return rowNum >= start; | |
| } else if (start == -1 && end == -1) { | |
| return true; | |
| } else { | |
| return (rowNum >= start && rowNum <= end); | |
| } | |
| } | |
| public void startRow(int rowNum) { | |
| rowCount++; | |
| this.currentRow = rowNum; | |
| if (this.currentRow == HEADER_ROW) { | |
| this.columnHeaders = new LinkedHashMap<String, String>(); | |
| } else { | |
| if (isRowInRange(rowNum, startRow, endRow)) { | |
| this.currentRowMap = new LinkedHashMap<String, String>(); | |
| for (String columnHeader : this.columnHeaders.values()) { | |
| this.currentRowMap.put(columnHeader, ""); | |
| } | |
| } | |
| } | |
| } | |
| public void endRow(int rowNum) { | |
| if (isRowInRange(rowNum, startRow, endRow)) { | |
| if (rowNum > HEADER_ROW) { | |
| if ((columnName2SeachValue == null || columnName2SeachValue.isEmpty()) || (this.rowContainsAllValues(currentRowMap, columnName2SeachValue))) { | |
| output.add(currentRowMap); | |
| } | |
| } | |
| } | |
| if (rowNum == HEADER_ROW) { | |
| output.add(columnHeaders); | |
| } | |
| } | |
| public void cell(String cellReference, String formattedValue, XSSFComment comment) { | |
| if (this.currentRow == HEADER_ROW) { | |
| this.columnHeaders.put(getColumnReference(cellReference), formattedValue); | |
| } else { | |
| if (isRowInRange(currentRow, startRow, endRow) && formattedValue != null) { | |
| String columnHeader = this.columnHeaders.get(getColumnReference(cellReference)); | |
| this.currentRowMap.put(columnHeader, formattedValue); | |
| } | |
| } | |
| } | |
| public void headerFooter(String text, boolean isHeader, String tagName) { | |
| // Skip, no headers or footers in CSV | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment