How to read '.xls' file using Mule 4
- Aminul Haque
- Apr 28, 2023
- 2 min read
Support for older excel sheet file format has been discontinued since Dataweave 2.0 (default for Mule 4). The only format Mule 4 support is the .xlsx (which started in 2007). An excel file which is in 2003 format (.xls) can not be processed by Mule 4 anymore without using a custom mechanism. The good news is most of the company uses the latest .xlsx file, so there is little chance of this limitation will cause any issues.
But sometime we see some older integrations still use the old format which we need to handle in Mule 4. The most simple solution to handle older excel sheet format is to use a custom java code to read the file. We can also convert the java code into a Mule 4 connector but it will cause maintenance overhead. Supporting simple java code requires far less skill than Mule 4 connector. Moreover, I do not expect the solution will be used widely, there will be very few instances when this solution will be useful.
Dependency
The java solution which I am going to explain here going to use the most popular Apache POI library for excel file management. The code will support reading both the latest and older format of the excel sheet. Before start coding java, you will need to import the Apache POI dependency in the maven pom.xml file:
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>5.2.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.0</version>
</dependency> |
At the time of writing this post, the latest version of the apache-poi is 5.2.3 but it has some library version conflicts with the Mule EE 4.4 runtime (Spacially the log4j library). This is why I am using a little older stable version which worked with Mule 4.4 runtime
Java code
package org.neointegration.transformer;
import java.io.InputStream;
import java.util.List;
import java.util.Map;
public interface Transformer {
public Object transform(InputStream data, String inputMimeType, boolean headerExists) throws Exception;
} |
package org.neointegration.transformer.xls;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.neointegration.transformer.Transformer;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
public class XLSTransformer implements Transformer {
private static final Logger _logger = LoggerFactory.getLogger(XLSTransformer.class);
@Override
public Object transform(InputStream stream, String inputMimeType, boolean headerExists) throws Exception {
if (stream == null) {
_logger.error("The stream is empty");
return null;
}
if (inputMimeType == null || (inputMimeType.equalsIgnoreCase("xls") && inputMimeType.equalsIgnoreCase("xlsx"))) {
_logger.error("It supports only xls or xlsx file type");
throw new NullPointerException("It supports only xls or xlsx file type, please set inputMimeType.");
}
return transformToObject(stream, inputMimeType, headerExists);
}
private List<Map<String, Object>> transformToObject(InputStream stream, String inputMimeType, boolean headerExists) throws IOException {
Workbook workbook = null;
final List<Map<String, Object>> data = new ArrayList<Map<String, Object>>();
try {
if (inputMimeType.equalsIgnoreCase("xls")) {
workbook = new HSSFWorkbook(stream);
} else {
workbook = new XSSFWorkbook(stream);
}
final Sheet sheet = workbook.getSheetAt(0);
final List<String> headers = new ArrayList<>();
final Row tempRow = sheet.getRow(0);
int cellCount = 1;
for (final Cell cell : tempRow) {
if (headerExists == true) {
headers.add(cell.getRichStringCellValue().getString().trim());
} else {
headers.add("attribute-" + cellCount);
}
cellCount = cellCount + 1;
}
int rowNumber = 0;
for (final Row row : sheet) {
rowNumber = rowNumber + 1;
if (rowNumber == 1 && headerExists == true) {
continue;
}
int cellNumber = 0;
final Map<String, Object> rowlist = new HashMap<>();
data.add(rowlist);
for (final Cell cell : row) {
Object v = "";
switch (cell.getCellType()) {
case STRING:
v = cell.getRichStringCellValue().getString();
break;
case NUMERIC:
v = cell.getNumericCellValue();
break;
case BOOLEAN:
v = cell.getBooleanCellValue();
break;
case FORMULA:
v = cell.getCellFormula();
break;
default:
v = "";
}
if (v == null) v = "";
rowlist.put(headers.get(cellNumber), v);
cellNumber = cellNumber + 1;
}
}
} finally {
if (workbook != null) {
try { workbook.close();} catch (Exception ignored) { }
}
}
return data;
}
} |
Mule Sample Code
Flow

Code
<?xml version="1.0" encoding="UTF-8"?>
<mule xmlns:java="http://www.mulesoft.org/schema/mule/java" xmlns:http="http://www.mulesoft.org/schema/mule/http"
xmlns:xml-module="http://www.mulesoft.org/schema/mule/xml-module"
xmlns:ee="http://www.mulesoft.org/schema/mule/ee/core" xmlns="http://www.mulesoft.org/schema/mule/core" xmlns:doc="http://www.mulesoft.org/schema/mule/documentation" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mulesoft.org/schema/mule/core http://www.mulesoft.org/schema/mule/core/current/mule.xsd
http://www.mulesoft.org/schema/mule/ee/core http://www.mulesoft.org/schema/mule/ee/core/current/mule-ee.xsd
http://www.mulesoft.org/schema/mule/xml-module http://www.mulesoft.org/schema/mule/xml-module/current/mule-xml-module.xsd
http://www.mulesoft.org/schema/mule/http http://www.mulesoft.org/schema/mule/http/current/mule-http.xsd
http://www.mulesoft.org/schema/mule/java http://www.mulesoft.org/schema/mule/java/current/mule-java.xsd">
<http:request-config name="HTTP_Request_configuration" doc:name="HTTP Request configuration" doc:id="dda707af-76e1-4402-bc53-60f27fa4e104" >
<http:request-connection host="localhost" port="8091" />
</http:request-config>
<flow name="testFlow" doc:id="cc5275e3-bd40-4760-a31f-bc8467892372" >
<java:new doc:name="Create Instance" doc:id="7217050f-3217-4d9a-9337-fd43908a3039" class="org.neointegration.transformer.xls.XLSTransformer" constructor="XLSTransformer()" target="instance"/>
<http:request method="GET" doc:name="Request" doc:id="37fbca38-a7f7-43e2-9672-edc962e1f0aa" config-ref="HTTP_Request_configuration" path="/api/sftp">
<http:headers ><![CDATA[#[output application/java
---
{
"Content-Type" : "application/octet-stream",
"country-code": "US",
"client_id" : "23t7878427357y2375",
"client_secret": "6t2346t385823587237"
}]]]></http:headers>
<http:query-params ><![CDATA[#[output application/java
---
{
"fileName" : "Central1.xls",
"filePath": "/us/mulesoft/gains/in/"
}]]]></http:query-params>
</http:request>
<java:invoke doc:name="Transform data" doc:id="8b439594-2c00-4d0c-b0ed-7e53c38d5227" instance="#[vars.instance]" class="org.neointegration.transformer.xls.XLSTransformer" method="transform(java.io.InputStream,java.lang.String,boolean)">
<java:args ><![CDATA[#[{
stream: payload,
inputMimeType: "xls",
headerExists: true
}]]]></java:args>
</java:invoke>
<foreach doc:name="For Each" doc:id="5bf7d211-e2be-4bd6-982b-15cae287da4c" >
<logger level="INFO" doc:name="Logger" doc:id="dd71ba22-ae70-400c-a28f-997e71f3e010" message="#[payload]"/>
</foreach>
</flow>
</mule> |
Comments