Skip to content

Instantly share code, notes, and snippets.

@lucaslouca
Last active December 5, 2025 08:54
Show Gist options
  • Select an option

  • Save lucaslouca/0f9971cdd6845d224f76 to your computer and use it in GitHub Desktop.

Select an option

Save lucaslouca/0f9971cdd6845d224f76 to your computer and use it in GitHub Desktop.
Convert a XLSX file to List<Map<String, String>>
package com.tool.main;
import java.util.List;
import java.util.Map;
public interface TTSheet2HashMap {
/****************************************** PUBLIC ********************************************/
/**
* Initiates the processing of the sheet to List<Map<String,
* String>>. Only rows for which all values found in the
* columnName2SeachValue map, will be included in the output.
*
* Considers rows within range [start, end].
*
* @param columnName2SeachValue
* Map = ColumnName -> SearchValue
* @param start
* @param end
* @return
*/
List<Map<String, String>> load(Map<String, String> columnName2SeachValue, int start, int end);
/**
* Initiates the processing of the sheet to List<Map<String,
* String>>. Only rows for which all value found in the
* columnName2SeachValue map, will be included in the output.
*
* Considers all rows.
*
* @param columnName2SeachValue
* Map = ColumnName -> SearchValue
* @return
*/
List<Map<String, String>> load(Map<String, String> columnName2SeachValue);
/**
* Initiates the processing of the sheet to List<Map<String,
* String>>.
*
* Loads all rows.
*
* @return
*/
List<Map<String, String>> load();
/**
* Loads rows within range [start, end].
*
* @param start
* @param end
* @return
*/
List<Map<String, String>> load(int start, int end);
/**
* Returns total number of rows in the sheet.
*
* @return number of rows in the sheet
*/
int numberOfRows();
}
package com.tool.main;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
public class TTXLSX2HashMap implements TTSheet2HashMap {
private OPCPackage xlsxPackage;
private List<Map<String, String>> output;
private Map<String, String> columnName2SeachValue;
private int startRow;
private int endRow;
private int rowCount;
/**
* Constructor
*
* @param path
* to Excel file
*/
public TTXLSX2HashMap(String path) {
try {
this.xlsxPackage = OPCPackage.open(path, PackageAccess.READ);
} catch (InvalidFormatException e) {
e.printStackTrace();
}
}
/**
* Parses and shows the content of one sheet using the specified styles and
* shared-strings tables.
*
* @param styles
* @param strings
* @param sheetInputStream
*/
private void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException,
ParserConfigurationException, SAXException {
DataFormatter formatter = new DataFormatter();
InputSource sheetSource = new InputSource(sheetInputStream);
try {
XMLReader sheetParser = SAXHelper.newXMLReader();
ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
sheetParser.setContentHandler(handler);
sheetParser.parse(sheetSource);
} catch (ParserConfigurationException e) {
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
}
}
/**
* Initiates the processing of the XLS workbook file to List<Map<String,
* String>>.
*
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
private void process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
while (iter.hasNext()) {
InputStream stream = iter.next();
processSheet(styles, strings, new SheetToHashMap(), stream);
stream.close();
}
}
/**
* Initiates the processing of the XLS workbook file to List<Map<String,
* String>>. Only rows for which all value found in the
* columnName2SeachValue map, will be included in the output.
*
* @param columnName2SeachValue
* Map = Column Name ==> Search Value.
* @throws IOException
* @throws OpenXML4JException
* @throws ParserConfigurationException
* @throws SAXException
*/
private void process(Map<String, String> columnName2SeachValue) throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
this.columnName2SeachValue = columnName2SeachValue;
process();
}
/**
* Returns the index from this cell reference.
* Example: Given 'A' returns '1' as a String
*/
private String getColumnReference(String cellReference) {
if (StringUtils.isBlank(cellReference)) {
return "";
}
return ""+CellReference.convertColStringToIndex(cellReference.split("[0-9]*$")[0]);
}
/****************************************** PUBLIC ********************************************/
/**
* Initiates the processing of the XLS workbook file to List<Map<String,
* String>>. Only rows for which all values found in the
* columnName2SeachValue map, will be included in the output.
*
* Loads rows within range [start, end].
*
* @param columnName2SeachValue
* Map = ColumnName -> SearchValue
* @param start
* @param end
* @return
*/
public List<Map<String, String>> load(Map<String, String> columnName2SeachValue, int start, int end) {
try {
this.rowCount = 0;
this.startRow = start;
this.endRow = end;
this.output = new ArrayList<Map<String, String>>();
this.process(columnName2SeachValue);
} catch (IOException | OpenXML4JException | ParserConfigurationException | SAXException e) {
e.printStackTrace();
}
return this.output;
}
/**
* Initiates the processing of the XLS workbook file to List<Map<String,
* String>>. Only rows for which all value found in the
* columnName2SeachValue map, will be included in the output.
*
* Loads all rows.
*
* @param columnName2SeachValue
* Map = ColumnName -> SearchValue
* @return
*/
public List<Map<String, String>> load(Map<String, String> columnName2SeachValue) {
return this.load(columnName2SeachValue, -1, -1);
}
/**
* Initiates the processing of the XLS workbook file to List<Map<String,
* String>>. Only rows for which all value found in the
* columnName2SeachValue map, will be included in the output.
*
* Loads all rows.
*
* @return
*/
public List<Map<String, String>> load() {
return this.load(new HashMap<String, String>(), -1, -1);
}
/**
* Loads rows within range [start, end].
*
* @param start
* @param end
* @return
*/
public List<Map<String, String>> load(int start, int end) {
return this.load(new HashMap<String, String>(), start, end);
}
public int numberOfRows() {
this.load(-1, -1);
return rowCount;
}
/****************************************** MAIN ********************************************/
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Use:");
System.err.println("TTXLSX2HashMap <xlsx file> [min columns]");
return;
}
File xlsxFile = new File(args[0]);
if (!xlsxFile.exists()) {
System.err.println("Not found or not a file: " + xlsxFile.getPath());
return;
}
TTXLSX2HashMap xlsx2map = new TTXLSX2HashMap(xlsxFile.getPath());
xlsx2map.load(1, 2);
}
/****************************************** HELPER CLASSES ********************************************/
private class SheetToHashMap implements SheetContentsHandler {
private static final int HEADER_ROW = 0;
private LinkedHashMap<String, String> currentRowMap; // map of column headers => row values
private LinkedHashMap<String, String> columnHeaders; // map of column references => column headers
private int currentRow;
/**
* AND Condition
*
* @param row
* @param col2val
* @return
*/
private boolean rowContainsAllValues(LinkedHashMap<String, String> row, Map<String, String> col2val) {
String searchValue;
for (String searchCol : col2val.keySet()) {
if (row.containsKey(searchCol)) {
searchValue = col2val.get(searchCol);
if (!searchValue.equalsIgnoreCase(row.get(searchCol))) {
return false;
}
} else {
throw new IllegalArgumentException("Row does not contain search column '" + searchCol + "'");
}
}
return true;
}
private boolean isRowInRange(int rowNum, int start, int end) {
// Only start limit
if (start == -1 && end != -1) {
return rowNum <= end;
} else if (end == -1 && start != -1) {
return rowNum >= start;
} else if (start == -1 && end == -1) {
return true;
} else {
return (rowNum >= start && rowNum <= end);
}
}
public void startRow(int rowNum) {
rowCount++;
this.currentRow = rowNum;
if (this.currentRow == HEADER_ROW) {
this.columnHeaders = new LinkedHashMap<String, String>();
} else {
if (isRowInRange(rowNum, startRow, endRow)) {
this.currentRowMap = new LinkedHashMap<String, String>();
for (String columnHeader : this.columnHeaders.values()) {
this.currentRowMap.put(columnHeader, "");
}
}
}
}
public void endRow(int rowNum) {
if (isRowInRange(rowNum, startRow, endRow)) {
if (rowNum > HEADER_ROW) {
if ((columnName2SeachValue == null || columnName2SeachValue.isEmpty()) || (this.rowContainsAllValues(currentRowMap, columnName2SeachValue))) {
output.add(currentRowMap);
}
}
}
if (rowNum == HEADER_ROW) {
output.add(columnHeaders);
}
}
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
if (this.currentRow == HEADER_ROW) {
this.columnHeaders.put(getColumnReference(cellReference), formattedValue);
} else {
if (isRowInRange(currentRow, startRow, endRow) && formattedValue != null) {
String columnHeader = this.columnHeaders.get(getColumnReference(cellReference));
this.currentRowMap.put(columnHeader, formattedValue);
}
}
}
public void headerFooter(String text, boolean isHeader, String tagName) {
// Skip, no headers or footers in CSV
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment