Created
January 14, 2026 02:51
-
-
Save robsonkades/02d40889d828caa999fa0682e0922c15 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import org.springframework.stereotype.Component; | |
| import javax.xml.stream.XMLInputFactory; | |
| import javax.xml.stream.XMLStreamConstants; | |
| import javax.xml.stream.XMLStreamException; | |
| import javax.xml.stream.XMLStreamReader; | |
| import java.io.Serial; | |
| import java.io.StringReader; | |
| import java.util.ArrayDeque; | |
| import java.util.ArrayList; | |
| import java.util.Collections; | |
| import java.util.Deque; | |
| import java.util.HashMap; | |
| import java.util.Iterator; | |
| import java.util.List; | |
| import java.util.Map; | |
| import java.util.Optional; | |
| /** | |
| * Parser XML de alta performance usando StAX (Streaming API for XML). | |
| * Refatorado para maior legibilidade e robustez. | |
| */ | |
| @Component | |
| public class XmlTagReaderOptimized { | |
| private static final int MAX_TEXT_LENGTH = 1_000_000; | |
| private static final int MAX_LIST_SIZE = 10_000; | |
| /** | |
| * Cache para evitar recompilação de paths comuns se necessário. | |
| * StAX é inerentemente thread-safe para leitura se usarmos fábricas corretamente. | |
| */ | |
| private static final ThreadLocal<XMLInputFactory> INPUT_FACTORY = | |
| ThreadLocal.withInitial(() -> { | |
| XMLInputFactory factory = XMLInputFactory.newInstance(); | |
| factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); | |
| factory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); | |
| factory.setProperty(XMLInputFactory.IS_COALESCING, false); | |
| factory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false); | |
| return factory; | |
| }); | |
| /** | |
| * Extrai valor único de uma tag. | |
| */ | |
| public Optional<String> extractValue(String xml, String... path) { | |
| return withReader(xml, reader -> findValue(reader, path, 0)); | |
| } | |
| /** | |
| * Extrai todos os elementos de uma lista (tags repetidas). | |
| */ | |
| @SuppressWarnings("unchecked") | |
| public List<Map<String, Object>> extractList(String xml, String... listPath) { | |
| List<Map<String, Object>> items = new ArrayList<>(); | |
| withReader(xml, reader -> { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT && matchesPath(reader, listPath)) { | |
| Object result = extractElementAsMap(reader); | |
| if (result instanceof Map) { | |
| items.add((Map<String, Object>) result); | |
| } else { | |
| // Se for apenas texto, coloca num map com a tag atual como chave ou _text | |
| items.add(Map.of("_value", result)); | |
| } | |
| if (items.size() >= MAX_LIST_SIZE) break; | |
| } | |
| } | |
| return null; | |
| }); | |
| return items; | |
| } | |
| /** | |
| * Extrai lista com campos específicos de cada item. | |
| */ | |
| public List<Map<String, String>> extractListWithFields(String xml, String[] listPath, Map<String, String[]> fieldPaths) { | |
| List<Map<String, String>> items = new ArrayList<>(); | |
| withReader(xml, reader -> { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT && matchesPath(reader, listPath)) { | |
| items.add(extractSpecificFields(reader, fieldPaths)); | |
| if (items.size() >= MAX_LIST_SIZE) break; | |
| } | |
| } | |
| return null; | |
| }); | |
| return items; | |
| } | |
| /** | |
| * Extrai múltiplos valores numa única passada. | |
| */ | |
| public Map<String, String> extractBatch(String xml, Map<String, String[]> queries) { | |
| if (queries == null || queries.isEmpty()) return Collections.emptyMap(); | |
| return withReader(xml, reader -> { | |
| Map<String, String> results = new HashMap<>(queries.size()); | |
| Deque<String> currentPath = new ArrayDeque<>(); | |
| while (reader.hasNext() && results.size() < queries.size()) { | |
| int event = reader.next(); | |
| if (event == XMLStreamConstants.START_ELEMENT) { | |
| currentPath.addLast(reader.getLocalName()); | |
| checkAndExtractBatch(reader, currentPath, queries, results); | |
| } else if (event == XMLStreamConstants.END_ELEMENT) { | |
| if (!currentPath.isEmpty()) currentPath.removeLast(); | |
| } | |
| } | |
| return results; | |
| }); | |
| } | |
| /** | |
| * Extrai valor com namespace explícito. | |
| */ | |
| public Optional<String> extractWithNamespace(String xml, String namespace, String... path) { | |
| return withReader(xml, reader -> findValueWithNamespace(reader, namespace, path, 0)); | |
| } | |
| /** | |
| * Extrai valor de um atributo de elemento. | |
| */ | |
| public Optional<String> extractAttribute(String xml, String attributeName, String... elementPath) { | |
| return withReader(xml, reader -> findAttribute(reader, attributeName, elementPath, 0)); | |
| } | |
| /** | |
| * Extrai chave de acesso da NF-e (método especializado). | |
| */ | |
| public Optional<String> extractChaveAcesso(String xml) { | |
| return extractAttribute(xml, "Id", "nfeProc", "NFe", "infNFe") | |
| .or(() -> extractAttribute(xml, "Id", "NFe", "infNFe")) | |
| .map(id -> id.startsWith("NFe") ? id.substring(3) : id) | |
| .or(() -> extractValue(xml, "nfeProc", "protNFe", "infProt", "chNFe")) | |
| .or(() -> extractValue(xml, "protNFe", "infProt", "chNFe")) | |
| .or(() -> extractValue(xml, "nfeProc", "NFe", "infNFeSupl", "chNFe")) | |
| .or(() -> extractValue(xml, "NFe", "infNFeSupl", "chNFe")); | |
| } | |
| /** | |
| * Extrai múltiplos atributos de um mesmo elemento. | |
| */ | |
| public Map<String, String> extractAttributes(String xml, String[] elementPath, String... attributeNames) { | |
| return withReader(xml, reader -> { | |
| if (findElement(reader, elementPath, 0)) { | |
| Map<String, String> attributes = new HashMap<>(); | |
| for (String attrName : attributeNames) { | |
| String attrValue = reader.getAttributeValue(null, attrName); | |
| if (attrValue != null) attributes.put(attrName, attrValue); | |
| } | |
| return attributes; | |
| } | |
| return Collections.emptyMap(); | |
| }); | |
| } | |
| // ========== MÉTODOS AUXILIARES ========== | |
| private <T> T withReader(String xml, ReaderAction<T> action) { | |
| try (StringReader stringReader = new StringReader(xml)) { | |
| XMLStreamReader reader = INPUT_FACTORY.get().createXMLStreamReader(stringReader); | |
| try { | |
| return action.execute(reader); | |
| } finally { | |
| reader.close(); | |
| } | |
| } catch (XMLStreamException e) { | |
| throw new XmlProcessingException("Erro ao processar XML", e); | |
| } | |
| } | |
| @FunctionalInterface | |
| private interface ReaderAction<T> { | |
| T execute(XMLStreamReader reader) throws XMLStreamException; | |
| } | |
| private Optional<String> findValue(XMLStreamReader reader, String[] path, int depth) throws XMLStreamException { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT) { | |
| if (reader.getLocalName().equals(path[depth])) { | |
| if (depth == path.length - 1) return Optional.of(extractText(reader)); | |
| return findValue(reader, path, depth + 1); | |
| } | |
| } | |
| } | |
| return Optional.empty(); | |
| } | |
| private Optional<String> findValueWithNamespace(XMLStreamReader reader, String namespace, String[] path, int depth) throws XMLStreamException { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT) { | |
| String ns = reader.getNamespaceURI(); | |
| boolean nsMatch = (namespace == null) || namespace.equals(ns); | |
| if (nsMatch && reader.getLocalName().equals(path[depth])) { | |
| if (depth == path.length - 1) return Optional.of(extractText(reader)); | |
| return findValueWithNamespace(reader, namespace, path, depth + 1); | |
| } | |
| } | |
| } | |
| return Optional.empty(); | |
| } | |
| @SuppressWarnings("unchecked") | |
| private Object extractElementAsMap(XMLStreamReader reader) throws XMLStreamException { | |
| Map<String, Object> map = new HashMap<>(); | |
| StringBuilder text = new StringBuilder(); | |
| int depth = 1; | |
| while (reader.hasNext() && depth > 0) { | |
| int event = reader.next(); | |
| if (event == XMLStreamConstants.START_ELEMENT) { | |
| String tagName = reader.getLocalName(); | |
| Object value = extractElementAsMap(reader); | |
| map.merge(tagName, value, (oldValue, newValue) -> { | |
| if (oldValue instanceof List) { | |
| ((List<Object>) oldValue).add(newValue); | |
| return oldValue; | |
| } else { | |
| List<Object> list = new ArrayList<>(); | |
| list.add(oldValue); | |
| list.add(newValue); | |
| return list; | |
| } | |
| }); | |
| } else if (event == XMLStreamConstants.CHARACTERS || event == XMLStreamConstants.CDATA) { | |
| String content = reader.getText(); | |
| if (text.length() + content.length() > MAX_TEXT_LENGTH) { | |
| throw new XmlProcessingException("Texto excede tamanho máximo permitido"); | |
| } | |
| text.append(content); | |
| } else if (event == XMLStreamConstants.END_ELEMENT) { | |
| depth--; | |
| } | |
| } | |
| if (map.isEmpty()) return text.toString().trim(); | |
| String t = text.toString().trim(); | |
| if (!t.isEmpty()) map.put("_text", t); | |
| return map; | |
| } | |
| private Map<String, String> extractSpecificFields(XMLStreamReader reader, Map<String, String[]> fieldPaths) throws XMLStreamException { | |
| Map<String, String> fields = new HashMap<>(); | |
| Deque<String> currentPath = new ArrayDeque<>(); | |
| int depth = 1; | |
| while (reader.hasNext() && depth > 0) { | |
| int event = reader.next(); | |
| if (event == XMLStreamConstants.START_ELEMENT) { | |
| depth++; | |
| currentPath.addLast(reader.getLocalName()); | |
| for (Map.Entry<String, String[]> entry : fieldPaths.entrySet()) { | |
| if (!fields.containsKey(entry.getKey()) && matchesPathFromCurrent(currentPath, entry.getValue())) { | |
| fields.put(entry.getKey(), extractText(reader)); | |
| // extractText consome o END_ELEMENT, então decrementamos o depth aqui | |
| depth--; | |
| currentPath.removeLast(); | |
| break; | |
| } | |
| } | |
| } else if (event == XMLStreamConstants.END_ELEMENT) { | |
| depth--; | |
| if (!currentPath.isEmpty()) currentPath.removeLast(); | |
| } | |
| } | |
| return fields; | |
| } | |
| private String extractText(XMLStreamReader reader) throws XMLStreamException { | |
| StringBuilder text = new StringBuilder(); | |
| while (reader.hasNext()) { | |
| int event = reader.next(); | |
| if (event == XMLStreamConstants.CHARACTERS || event == XMLStreamConstants.CDATA) { | |
| String content = reader.getText(); | |
| if (text.length() + content.length() > MAX_TEXT_LENGTH) { | |
| throw new XmlProcessingException("Texto excede tamanho máximo permitido"); | |
| } | |
| text.append(content); | |
| } else if (event == XMLStreamConstants.END_ELEMENT) { | |
| break; | |
| } else if (event == XMLStreamConstants.START_ELEMENT) { | |
| skipElement(reader); | |
| } | |
| } | |
| return text.toString().trim(); | |
| } | |
| private void skipElement(XMLStreamReader reader) throws XMLStreamException { | |
| int depth = 1; | |
| while (reader.hasNext() && depth > 0) { | |
| int event = reader.next(); | |
| if (event == XMLStreamConstants.START_ELEMENT) depth++; | |
| else if (event == XMLStreamConstants.END_ELEMENT) depth--; | |
| } | |
| } | |
| private boolean matchesPath(XMLStreamReader reader, String[] expectedPath) { | |
| return reader.getLocalName().equals(expectedPath[expectedPath.length - 1]); | |
| } | |
| private boolean matchesPathFromCurrent(Deque<String> currentPath, String[] expectedPath) { | |
| if (currentPath.size() != expectedPath.length) return false; | |
| Iterator<String> it = currentPath.iterator(); | |
| for (String expected : expectedPath) { | |
| if (!it.hasNext() || !it.next().equals(expected)) return false; | |
| } | |
| return true; | |
| } | |
| private void checkAndExtractBatch(XMLStreamReader reader, Deque<String> currentPath, Map<String, String[]> queries, Map<String, String> results) throws XMLStreamException { | |
| for (Map.Entry<String, String[]> entry : queries.entrySet()) { | |
| if (!results.containsKey(entry.getKey()) && matchesPathFromCurrent(currentPath, entry.getValue())) { | |
| results.put(entry.getKey(), extractText(reader)); | |
| currentPath.removeLast(); // Compensar o fato de extractText consumir o END_ELEMENT | |
| break; | |
| } | |
| } | |
| } | |
| private Optional<String> findAttribute(XMLStreamReader reader, String attributeName, String[] elementPath, int depth) throws XMLStreamException { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT) { | |
| if (reader.getLocalName().equals(elementPath[depth])) { | |
| if (depth == elementPath.length - 1) return Optional.ofNullable(reader.getAttributeValue(null, attributeName)); | |
| return findAttribute(reader, attributeName, elementPath, depth + 1); | |
| } | |
| } | |
| } | |
| return Optional.empty(); | |
| } | |
| private boolean findElement(XMLStreamReader reader, String[] elementPath, int depth) throws XMLStreamException { | |
| while (reader.hasNext()) { | |
| if (reader.next() == XMLStreamConstants.START_ELEMENT) { | |
| if (reader.getLocalName().equals(elementPath[depth])) { | |
| if (depth == elementPath.length - 1) return true; | |
| return findElement(reader, elementPath, depth + 1); | |
| } | |
| } | |
| } | |
| return false; | |
| } | |
| public static class XmlProcessingException extends RuntimeException { | |
| @Serial | |
| private static final long serialVersionUID = 8311458058556201016L; | |
| public XmlProcessingException(String message, Throwable cause) { super(message, cause); } | |
| public XmlProcessingException(String message) { super(message); } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment