diff --git a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/FormatToBytesWritableMapper.java b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/FormatToBytesWritableMapper.java index d99f76371b1..016022e3776 100644 --- a/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/FormatToBytesWritableMapper.java +++ b/phoenix-core-server/src/main/java/org/apache/phoenix/mapreduce/FormatToBytesWritableMapper.java @@ -163,7 +163,7 @@ protected void map(LongWritable key, Text value, Context context) RECORD record = null; try { record = getLineParser().parse(value.toString()); - } catch (IOException e) { + } catch (IOException | RuntimeException e) { context.getCounter(COUNTER_GROUP_NAME, "Parser errors").increment(1L); return; } diff --git a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapperTest.java b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapperTest.java index 242166ceb16..ade8096853a 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapperTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/mapreduce/CsvToKeyValueMapperTest.java @@ -18,7 +18,10 @@ package org.apache.phoenix.mapreduce; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; import java.io.IOException; import org.apache.commons.csv.CSVRecord; @@ -49,4 +52,87 @@ public void testCsvLineParserWithQuoting() throws IOException { assertTrue(parsed.isConsistent()); assertEquals(1, parsed.getRecordNumber()); } + + /** + * Test that a normal CSV line parses successfully. + */ + @Test + public void testParseValidLine() throws IOException { + CsvToKeyValueMapper.CsvLineParser parser = + new CsvToKeyValueMapper.CsvLineParser(',', '"', '\\'); + CSVRecord record = parser.parse("value1,value2,value3"); + assertNotNull("Valid CSV line should parse to a non-null record", record); + } + + /** + * Test that an empty line returns null (due to ignoreEmptyLines). + */ + @Test + public void testParseEmptyLine() throws IOException { + CsvToKeyValueMapper.CsvLineParser parser = + new CsvToKeyValueMapper.CsvLineParser(',', '"', '\\'); + CSVRecord record = parser.parse(""); + assertNull("Empty line should return null", record); + } + + /** + * PHOENIX-7267: Verifies that commons-csv throws RuntimeException (not IOException) + * for malformed CSV input with unclosed quotes. + *

+ * This is the root cause of the bug: the CSVParser iterator wraps IOException in a + * RuntimeException, which was not caught by FormatToBytesWritableMapper.map(). + * The fix adds RuntimeException to the catch block. + */ + @Test + public void testParseMalformedLineThrowsRuntimeException() { + CsvToKeyValueMapper.CsvLineParser parser = + new CsvToKeyValueMapper.CsvLineParser(',', '"', null); + + // This input has an opening quote that is never closed — triggers + // "EOF reached before encapsulated token finished" in commons-csv + String malformedInput = "value1,\"unclosed value"; + + try { + parser.parse(malformedInput); + fail("Expected an exception for malformed CSV input with unclosed quote"); + } catch (IOException e) { + fail("Got IOException directly — commons-csv wraps this in RuntimeException"); + } catch (RuntimeException e) { + // commons-csv's CSVParser iterator wraps IOException in RuntimeException. + // The fix in FormatToBytesWritableMapper.map() now catches this. + assertTrue("RuntimeException should wrap an IOException", + e.getCause() instanceof IOException); + assertTrue("Should contain EOF error message", + e.getMessage().contains("EOF reached before encapsulated token finished")); + } + } + + /** + * PHOENIX-7267: Verifies that the mapper's error handling pattern works correctly. + *

+ * Simulates the catch block in FormatToBytesWritableMapper.map() to confirm that + * both IOException and RuntimeException from malformed CSV are properly caught + * after the fix. + */ + @Test + public void testMapperCatchBlockHandlesMalformedCsv() { + CsvToKeyValueMapper.CsvLineParser parser = + new CsvToKeyValueMapper.CsvLineParser(',', '"', null); + + String malformedInput = "val1,\"val2,val3"; + boolean errorHandled = false; + + // This simulates the fixed catch block in FormatToBytesWritableMapper.map(): + // catch (IOException | RuntimeException e) { ... } + try { + parser.parse(malformedInput); + } catch (IOException | RuntimeException e) { + // With the fix, both IOException and RuntimeException are caught here. + // The mapper increments the "Parser errors" counter and continues + // processing the next record instead of failing the entire job. + errorHandled = true; + } + + assertTrue("Malformed CSV should be caught by the fixed catch block", errorHandled); + } }