From 3b189fc700c9c7c210f4b78fc66b01f4c13ae5d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fran=C3=A7ois=20Tiercelin?= Date: Tue, 22 Oct 2024 14:24:36 +0100 Subject: [PATCH] PDF417: Check that input is made of 0...127 chars when using Compaction.TEXT, throw an explicit exception if not the case (#1878) * throw a more explicit exception when trying to PDF417/TEXT encode something outside of 0...255 * refactor PDF417HighLevelEncoder to avoid code duplication extend UT to new method PDF417HighLevelEncoder#checkCharset * fix javadoc typo make UT more stringent on PDF417HighLevelEncoder#checkCharset * restrict TEXT to 0...127 test with CP437 and Greek chars * reinstate testEncodeAuto UT * refactor testEncodeAuto UT * address codacy findings * formatting --- .../encoder/PDF417HighLevelEncoder.java | 29 +++-- .../pdf417/encoder/PDF417EncoderTestCase.java | 100 ++++++++++++++++-- 2 files changed, 116 insertions(+), 13 deletions(-) diff --git a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java index 564d7c968..8ac202b9d 100644 --- a/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java +++ b/core/src/main/java/com/google/zxing/pdf417/encoder/PDF417HighLevelEncoder.java @@ -174,16 +174,15 @@ final class PDF417HighLevelEncoder { if (msg.isEmpty()) { throw new WriterException("Empty message not allowed"); } + + if (Compaction.TEXT == compaction) { + checkCharset(msg, 127, "Consider specifying Compaction.AUTO instead of Compaction.TEXT"); + } if (encoding == null && !autoECI) { - for (int i = 0; i < msg.length(); i++) { - if (msg.charAt(i) > 255) { - throw new WriterException("Non-encodable character detected: " + msg.charAt(i) + " (Unicode: " + - (int) msg.charAt(i) + - "). Consider specifying EncodeHintType.PDF417_AUTO_ECI and/or EncodeTypeHint.CHARACTER_SET."); - } - } + checkCharset(msg, 255, "Consider specifying EncodeHintType.PDF417_AUTO_ECI and/or EncodeTypeHint.CHARACTER_SET"); } + //the codewords 0..928 are encoded as Unicode characters StringBuilder sb = new StringBuilder(msg.length()); @@ -283,6 +282,22 @@ final class PDF417HighLevelEncoder { return sb.toString(); } + + /** + * Check if input is only made of characters between 0 and the upper limit + * @param input the input + * @param max the upper limit for charset + * @param errorMessage the message to explain the error + * @throws WriterException exception highlighting the offending character and a suggestion to avoid the error + */ + protected static void checkCharset(String input, int max, String errorMessage) throws WriterException { + for (int i = 0; i < input.length(); i++) { + if (input.charAt(i) > max) { + throw new WriterException("Non-encodable character detected: " + input.charAt(i) + " (Unicode: " + + (int) input.charAt(i) + ") at position #" + i + " - " + errorMessage); + } + } + } /** * Encode parts of the message using Text Compaction as described in ISO/IEC 15438:2001(E), diff --git a/core/src/test/java/com/google/zxing/pdf417/encoder/PDF417EncoderTestCase.java b/core/src/test/java/com/google/zxing/pdf417/encoder/PDF417EncoderTestCase.java index f0c9aa5ab..2f582b901 100644 --- a/core/src/test/java/com/google/zxing/pdf417/encoder/PDF417EncoderTestCase.java +++ b/core/src/test/java/com/google/zxing/pdf417/encoder/PDF417EncoderTestCase.java @@ -16,8 +16,16 @@ package com.google.zxing.pdf417.encoder; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +import com.google.zxing.BarcodeFormat; +import com.google.zxing.EncodeHintType; +import com.google.zxing.MultiFormatWriter; import com.google.zxing.WriterException; +import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import org.junit.Assert; @@ -28,18 +36,98 @@ import org.junit.Test; */ public final class PDF417EncoderTestCase extends Assert { + private static final String PDF417PFX = "\u039f\u001A\u0385"; + @Test public void testEncodeAuto() throws Exception { - String encoded = PDF417HighLevelEncoder.encodeHighLevel( - "ABCD", Compaction.AUTO, StandardCharsets.UTF_8, false); - assertEquals("\u039f\u001A\u0385ABCD", encoded); + String input = "ABCD"; + assertEquals(PDF417PFX + input, checkEncodeAutoWithSpecialChars(input, Compaction.AUTO)); } - + @Test public void testEncodeAutoWithSpecialChars() throws Exception { // Just check if this does not throw an exception - PDF417HighLevelEncoder.encodeHighLevel( - "1%§s ?aG$", Compaction.AUTO, StandardCharsets.UTF_8, false); + checkEncodeAutoWithSpecialChars("1%§s ?aG$", Compaction.AUTO); + checkEncodeAutoWithSpecialChars("日本語", Compaction.AUTO); + checkEncodeAutoWithSpecialChars("₸ 5555", Compaction.AUTO); + checkEncodeAutoWithSpecialChars("€ 123,45", Compaction.AUTO); + checkEncodeAutoWithSpecialChars("€ 123,45", Compaction.BYTE); + checkEncodeAutoWithSpecialChars("123,45", Compaction.TEXT); + + // Greek alphabet + Charset cp437 = Charset.forName("IBM437"); + assertNotNull(cp437); + byte[] cp437Array = {(byte) 224,(byte) 225,(byte) 226,(byte) 227,(byte) 228}; //αßΓπΣ + String greek = new String(cp437Array, cp437); + assertEquals("αßΓπΣ", greek); + checkEncodeAutoWithSpecialChars(greek, Compaction.AUTO); + checkEncodeAutoWithSpecialChars(greek, Compaction.BYTE); + PDF417HighLevelEncoder.encodeHighLevel(greek, Compaction.AUTO, cp437, true); + PDF417HighLevelEncoder.encodeHighLevel(greek, Compaction.AUTO, cp437, false); + + try { + // detect when a TEXT Compaction is applied to a non text input + checkEncodeAutoWithSpecialChars("€ 123,45", Compaction.TEXT); + } catch (WriterException e) { + assertNotNull(e.getMessage()); + assertTrue(e.getMessage().contains("8364")); + assertTrue(e.getMessage().contains("Compaction.TEXT")); + assertTrue(e.getMessage().contains("Compaction.AUTO")); + } + + try { + // detect when a TEXT Compaction is applied to a non text input + String input = "Hello! " + (char) 128; + checkEncodeAutoWithSpecialChars(input, Compaction.TEXT); + } catch (WriterException e) { + assertNotNull(e.getMessage()); + assertTrue(e.getMessage().contains("128")); + assertTrue(e.getMessage().contains("Compaction.TEXT")); + assertTrue(e.getMessage().contains("Compaction.AUTO")); + } + + try { + // detect when a TEXT Compaction is applied to a non text input + // https://github.com/zxing/zxing/issues/1761 + String content = "€ 123,45"; + Map hints = new HashMap<>(); + hints.put(EncodeHintType.ERROR_CORRECTION, 4); + hints.put(EncodeHintType.PDF417_DIMENSIONS, new Dimensions(7, 7, 1, 300)); + hints.put(EncodeHintType.MARGIN, 0); + hints.put(EncodeHintType.CHARACTER_SET, "ISO-8859-15"); + hints.put(EncodeHintType.PDF417_COMPACTION, Compaction.TEXT); + + (new MultiFormatWriter()).encode(content, BarcodeFormat.PDF_417, 200, 100, hints); + } catch (WriterException e) { + assertNotNull(e.getMessage()); + assertTrue(e.getMessage().contains("8364")); + assertTrue(e.getMessage().contains("Compaction.TEXT")); + assertTrue(e.getMessage().contains("Compaction.AUTO")); + } + } + + public String checkEncodeAutoWithSpecialChars(String input, Compaction compaction) throws Exception { + return PDF417HighLevelEncoder.encodeHighLevel(input, compaction, StandardCharsets.UTF_8, false); + } + + @Test + public void testCheckCharset() throws Exception { + String input = "Hello!"; + String errorMessage = UUID.randomUUID().toString(); + + // no exception + PDF417HighLevelEncoder.checkCharset(input,255,errorMessage); + PDF417HighLevelEncoder.checkCharset(input,1255,errorMessage); + PDF417HighLevelEncoder.checkCharset(input,111,errorMessage); + + try { + // should throw an exception for character 'o' because it exceeds upper limit 110 + PDF417HighLevelEncoder.checkCharset(input,110,errorMessage); + } catch (WriterException e) { + assertNotNull(e.getMessage()); + assertTrue(e.getMessage().contains("111")); + assertTrue(e.getMessage().contains(errorMessage)); + } } @Test